summaryrefslogtreecommitdiff
path: root/usr/src/uts/intel/ml
diff options
context:
space:
mode:
authorRichard Lowe <richlowe@richlowe.net>2021-06-04 15:15:12 -0500
committerRichard Lowe <richlowe@richlowe.net>2021-08-16 12:46:39 -0500
commitf0089e391b2bc4be2755f1a1b51fb4cd9b8f3988 (patch)
treec4ac2f5e703ed459d50bcee7ddb38a993d961520 /usr/src/uts/intel/ml
parentd083fed0c91296a88878f7a468910ad5b5c888ea (diff)
downloadillumos-gate-f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988.tar.gz
13941 intel code and headers should not look ia32 specific
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: Patrick Mooney <pmooney@pfmooney.com> Approved by: Garret D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/intel/ml')
-rw-r--r--usr/src/uts/intel/ml/copy.s1908
-rw-r--r--usr/src/uts/intel/ml/ddi_i86_asm.s522
-rw-r--r--usr/src/uts/intel/ml/desctbls_asm.s118
-rw-r--r--usr/src/uts/intel/ml/exception.s917
-rw-r--r--usr/src/uts/intel/ml/float.s347
-rw-r--r--usr/src/uts/intel/ml/hypersubr.s164
-rw-r--r--usr/src/uts/intel/ml/i86_subr.s1629
-rw-r--r--usr/src/uts/intel/ml/lock_prim.s714
-rw-r--r--usr/src/uts/intel/ml/modstubs.s1320
-rw-r--r--usr/src/uts/intel/ml/ovbcopy.s92
-rw-r--r--usr/src/uts/intel/ml/retpoline.s211
-rw-r--r--usr/src/uts/intel/ml/sseblk.s280
-rw-r--r--usr/src/uts/intel/ml/swtch.s509
13 files changed, 8731 insertions, 0 deletions
diff --git a/usr/src/uts/intel/ml/copy.s b/usr/src/uts/intel/ml/copy.s
new file mode 100644
index 0000000000..5e5f822518
--- /dev/null
+++ b/usr/src/uts/intel/ml/copy.s
@@ -0,0 +1,1908 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2009, Intel Corporation
+ * All rights reserved.
+ */
+
+/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
+/* All Rights Reserved */
+
+/* Copyright (c) 1987, 1988 Microsoft Corporation */
+/* All Rights Reserved */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/asm_linkage.h>
+
+#include "assym.h"
+
+#define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
+#define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
+/*
+ * Non-temopral access (NTA) alignment requirement
+ */
+#define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
+#define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
+#define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
+#define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1)
+
+/*
+ * With the introduction of Broadwell, Intel has introduced supervisor mode
+ * access protection -- SMAP. SMAP forces the kernel to set certain bits to
+ * enable access of user pages (AC in rflags, defines as PS_ACHK in
+ * <sys/psw.h>). One of the challenges is that the implementation of many of the
+ * userland copy routines directly use the kernel ones. For example, copyin and
+ * copyout simply go and jump to the do_copy_fault label and traditionally let
+ * those deal with the return for them. In fact, changing that is a can of frame
+ * pointers.
+ *
+ * Rules and Constraints:
+ *
+ * 1. For anything that's not in copy.s, we have it do explicit smap_disable()
+ * or smap_enable() calls. This is restricted to the following three places:
+ * DTrace, resume() in swtch.s and on_fault/no_fault. If you want to add it
+ * somewhere else, we should be thinking twice.
+ *
+ * 2. We try to toggle this at the smallest window possible. This means that if
+ * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
+ * other function, we will always leave with SMAP enabled (the kernel cannot
+ * access user pages).
+ *
+ * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
+ * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
+ * which already takes care of ensuring that SMAP is enabled and disabled. Note
+ * this means that when under an on_fault()/no_fault() handler, one must not
+ * call the non-*_noerr() routines.
+ *
+ * 4. The first thing we should do after coming out of an lofault handler is to
+ * make sure that we call smap_enable() again to ensure that we are safely
+ * protected, as more often than not, we will have disabled smap to get there.
+ *
+ * 5. smap_enable() and smap_disable() don't exist: calls to these functions
+ * generate runtime relocations, that are then processed into the necessary
+ * clac/stac, via the krtld hotinlines mechanism and hotinline_smap().
+ *
+ * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
+ * SMAP_DISABLE_INSTR macro should be used. If the number of these is changed,
+ * you must update the constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
+ *
+ * 7. Generally this .s file is processed by a K&R style cpp. This means that it
+ * really has a lot of feelings about whitespace. In particular, if you have a
+ * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
+ *
+ * 8. In general, the kernel has its own value for rflags that gets used. This
+ * is maintained in a few different places which vary based on how the thread
+ * comes into existence and whether it's a user thread. In general, when the
+ * kernel takes a trap, it always will set ourselves to a known set of flags,
+ * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
+ * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
+ * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
+ * where that gets masked off.
+ */
+
+/*
+ * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
+ * "rep smovq" for large sizes. Performance data shows that many calls to
+ * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
+ * these small sizes unrolled code is used. For medium sizes loops writing
+ * 64-bytes per loop are used. Transition points were determined experimentally.
+ */
+#define BZERO_USE_REP (1024)
+#define BCOPY_DFLT_REP (128)
+#define BCOPY_NHM_REP (768)
+
+/*
+ * Copy a block of storage, returning an error code if `from' or
+ * `to' takes a kernel pagefault which cannot be resolved.
+ * Returns errno value on pagefault error, 0 if all ok
+ */
+
+/*
+ * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
+ * additional call instructions.
+ */
+#define SMAP_DISABLE_COUNT 16
+#define SMAP_ENABLE_COUNT 26
+
+#define SMAP_DISABLE_INSTR(ITER) \
+ .globl _smap_disable_patch_/**/ITER; \
+ _smap_disable_patch_/**/ITER/**/:; \
+ nop; nop; nop;
+
+#define SMAP_ENABLE_INSTR(ITER) \
+ .globl _smap_enable_patch_/**/ITER; \
+ _smap_enable_patch_/**/ITER/**/:; \
+ nop; nop; nop;
+
+ .globl kernelbase
+ .globl postbootkernelbase
+
+ ENTRY(kcopy)
+ pushq %rbp
+ movq %rsp, %rbp
+#ifdef DEBUG
+ cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
+ jb 0f
+ cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
+ jnb 1f
+0: leaq .kcopy_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _kcopy_copyerr(%rip), %rcx
+ movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
+
+do_copy_fault:
+ movq T_LOFAULT(%r9), %r11 /* save the current lofault */
+ movq %rcx, T_LOFAULT(%r9) /* new lofault */
+ call bcopy_altentry
+ xorl %eax, %eax /* return 0 (success) */
+ SMAP_ENABLE_INSTR(0)
+
+ /*
+ * A fault during do_copy_fault is indicated through an errno value
+ * in %rax and we iretq from the trap handler to here.
+ */
+_kcopy_copyerr:
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+ leave
+ ret
+ SET_SIZE(kcopy)
+
+#undef ARG_FROM
+#undef ARG_TO
+#undef ARG_COUNT
+
+#define COPY_LOOP_INIT(src, dst, cnt) \
+ addq cnt, src; \
+ addq cnt, dst; \
+ shrq $3, cnt; \
+ neg cnt
+
+ /* Copy 16 bytes per loop. Uses %rax and %r8 */
+#define COPY_LOOP_BODY(src, dst, cnt) \
+ prefetchnta 0x100(src, cnt, 8); \
+ movq (src, cnt, 8), %rax; \
+ movq 0x8(src, cnt, 8), %r8; \
+ movnti %rax, (dst, cnt, 8); \
+ movnti %r8, 0x8(dst, cnt, 8); \
+ addq $2, cnt
+
+ ENTRY(kcopy_nta)
+ pushq %rbp
+ movq %rsp, %rbp
+#ifdef DEBUG
+ cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
+ jb 0f
+ cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
+ jnb 1f
+0: leaq .kcopy_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+
+ movq %gs:CPU_THREAD, %r9
+ cmpq $0, %rcx /* No non-temporal access? */
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */
+ jnz do_copy_fault /* use regular access */
+ /*
+ * Make sure cnt is >= KCOPY_MIN_SIZE
+ */
+ cmpq $KCOPY_MIN_SIZE, %rdx
+ jb do_copy_fault
+
+ /*
+ * Make sure src and dst are NTA_ALIGN_SIZE aligned,
+ * count is COUNT_ALIGN_SIZE aligned.
+ */
+ movq %rdi, %r10
+ orq %rsi, %r10
+ andq $NTA_ALIGN_MASK, %r10
+ orq %rdx, %r10
+ andq $COUNT_ALIGN_MASK, %r10
+ jnz do_copy_fault
+
+ ALTENTRY(do_copy_fault_nta)
+ movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
+ movq T_LOFAULT(%r9), %r11 /* save the current lofault */
+ movq %rcx, T_LOFAULT(%r9) /* new lofault */
+
+ /*
+ * COPY_LOOP_BODY uses %rax and %r8
+ */
+ COPY_LOOP_INIT(%rdi, %rsi, %rdx)
+2: COPY_LOOP_BODY(%rdi, %rsi, %rdx)
+ jnz 2b
+
+ mfence
+ xorl %eax, %eax /* return 0 (success) */
+ SMAP_ENABLE_INSTR(1)
+
+_kcopy_nta_copyerr:
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+ leave
+ ret
+ SET_SIZE(do_copy_fault_nta)
+ SET_SIZE(kcopy_nta)
+
+ ENTRY(bcopy)
+#ifdef DEBUG
+ orq %rdx, %rdx /* %rdx = count */
+ jz 1f
+ cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */
+ jb 0f
+ cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */
+ jnb 1f
+0: leaq .bcopy_panic_msg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+1:
+#endif
+ /*
+ * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
+ * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
+ * uses these registers in future they must be saved and restored.
+ */
+ ALTENTRY(bcopy_altentry)
+do_copy:
+#define L(s) .bcopy/**/s
+ cmpq $0x50, %rdx /* 80 */
+ jae bcopy_ck_size
+
+ /*
+ * Performance data shows many caller's copy small buffers. So for
+ * best perf for these sizes unrolled code is used. Store data without
+ * worrying about alignment.
+ */
+ leaq L(fwdPxQx)(%rip), %r10
+ addq %rdx, %rdi
+ addq %rdx, %rsi
+ movslq (%r10,%rdx,4), %rcx
+ leaq (%rcx,%r10,1), %r10
+ INDIRECT_JMP_REG(r10)
+
+ .p2align 4
+L(fwdPxQx):
+ .int L(P0Q0)-L(fwdPxQx) /* 0 */
+ .int L(P1Q0)-L(fwdPxQx)
+ .int L(P2Q0)-L(fwdPxQx)
+ .int L(P3Q0)-L(fwdPxQx)
+ .int L(P4Q0)-L(fwdPxQx)
+ .int L(P5Q0)-L(fwdPxQx)
+ .int L(P6Q0)-L(fwdPxQx)
+ .int L(P7Q0)-L(fwdPxQx)
+
+ .int L(P0Q1)-L(fwdPxQx) /* 8 */
+ .int L(P1Q1)-L(fwdPxQx)
+ .int L(P2Q1)-L(fwdPxQx)
+ .int L(P3Q1)-L(fwdPxQx)
+ .int L(P4Q1)-L(fwdPxQx)
+ .int L(P5Q1)-L(fwdPxQx)
+ .int L(P6Q1)-L(fwdPxQx)
+ .int L(P7Q1)-L(fwdPxQx)
+
+ .int L(P0Q2)-L(fwdPxQx) /* 16 */
+ .int L(P1Q2)-L(fwdPxQx)
+ .int L(P2Q2)-L(fwdPxQx)
+ .int L(P3Q2)-L(fwdPxQx)
+ .int L(P4Q2)-L(fwdPxQx)
+ .int L(P5Q2)-L(fwdPxQx)
+ .int L(P6Q2)-L(fwdPxQx)
+ .int L(P7Q2)-L(fwdPxQx)
+
+ .int L(P0Q3)-L(fwdPxQx) /* 24 */
+ .int L(P1Q3)-L(fwdPxQx)
+ .int L(P2Q3)-L(fwdPxQx)
+ .int L(P3Q3)-L(fwdPxQx)
+ .int L(P4Q3)-L(fwdPxQx)
+ .int L(P5Q3)-L(fwdPxQx)
+ .int L(P6Q3)-L(fwdPxQx)
+ .int L(P7Q3)-L(fwdPxQx)
+
+ .int L(P0Q4)-L(fwdPxQx) /* 32 */
+ .int L(P1Q4)-L(fwdPxQx)
+ .int L(P2Q4)-L(fwdPxQx)
+ .int L(P3Q4)-L(fwdPxQx)
+ .int L(P4Q4)-L(fwdPxQx)
+ .int L(P5Q4)-L(fwdPxQx)
+ .int L(P6Q4)-L(fwdPxQx)
+ .int L(P7Q4)-L(fwdPxQx)
+
+ .int L(P0Q5)-L(fwdPxQx) /* 40 */
+ .int L(P1Q5)-L(fwdPxQx)
+ .int L(P2Q5)-L(fwdPxQx)
+ .int L(P3Q5)-L(fwdPxQx)
+ .int L(P4Q5)-L(fwdPxQx)
+ .int L(P5Q5)-L(fwdPxQx)
+ .int L(P6Q5)-L(fwdPxQx)
+ .int L(P7Q5)-L(fwdPxQx)
+
+ .int L(P0Q6)-L(fwdPxQx) /* 48 */
+ .int L(P1Q6)-L(fwdPxQx)
+ .int L(P2Q6)-L(fwdPxQx)
+ .int L(P3Q6)-L(fwdPxQx)
+ .int L(P4Q6)-L(fwdPxQx)
+ .int L(P5Q6)-L(fwdPxQx)
+ .int L(P6Q6)-L(fwdPxQx)
+ .int L(P7Q6)-L(fwdPxQx)
+
+ .int L(P0Q7)-L(fwdPxQx) /* 56 */
+ .int L(P1Q7)-L(fwdPxQx)
+ .int L(P2Q7)-L(fwdPxQx)
+ .int L(P3Q7)-L(fwdPxQx)
+ .int L(P4Q7)-L(fwdPxQx)
+ .int L(P5Q7)-L(fwdPxQx)
+ .int L(P6Q7)-L(fwdPxQx)
+ .int L(P7Q7)-L(fwdPxQx)
+
+ .int L(P0Q8)-L(fwdPxQx) /* 64 */
+ .int L(P1Q8)-L(fwdPxQx)
+ .int L(P2Q8)-L(fwdPxQx)
+ .int L(P3Q8)-L(fwdPxQx)
+ .int L(P4Q8)-L(fwdPxQx)
+ .int L(P5Q8)-L(fwdPxQx)
+ .int L(P6Q8)-L(fwdPxQx)
+ .int L(P7Q8)-L(fwdPxQx)
+
+ .int L(P0Q9)-L(fwdPxQx) /* 72 */
+ .int L(P1Q9)-L(fwdPxQx)
+ .int L(P2Q9)-L(fwdPxQx)
+ .int L(P3Q9)-L(fwdPxQx)
+ .int L(P4Q9)-L(fwdPxQx)
+ .int L(P5Q9)-L(fwdPxQx)
+ .int L(P6Q9)-L(fwdPxQx)
+ .int L(P7Q9)-L(fwdPxQx) /* 79 */
+
+ .p2align 4
+L(P0Q9):
+ mov -0x48(%rdi), %rcx
+ mov %rcx, -0x48(%rsi)
+L(P0Q8):
+ mov -0x40(%rdi), %r10
+ mov %r10, -0x40(%rsi)
+L(P0Q7):
+ mov -0x38(%rdi), %r8
+ mov %r8, -0x38(%rsi)
+L(P0Q6):
+ mov -0x30(%rdi), %rcx
+ mov %rcx, -0x30(%rsi)
+L(P0Q5):
+ mov -0x28(%rdi), %r10
+ mov %r10, -0x28(%rsi)
+L(P0Q4):
+ mov -0x20(%rdi), %r8
+ mov %r8, -0x20(%rsi)
+L(P0Q3):
+ mov -0x18(%rdi), %rcx
+ mov %rcx, -0x18(%rsi)
+L(P0Q2):
+ mov -0x10(%rdi), %r10
+ mov %r10, -0x10(%rsi)
+L(P0Q1):
+ mov -0x8(%rdi), %r8
+ mov %r8, -0x8(%rsi)
+L(P0Q0):
+ ret
+
+ .p2align 4
+L(P1Q9):
+ mov -0x49(%rdi), %r8
+ mov %r8, -0x49(%rsi)
+L(P1Q8):
+ mov -0x41(%rdi), %rcx
+ mov %rcx, -0x41(%rsi)
+L(P1Q7):
+ mov -0x39(%rdi), %r10
+ mov %r10, -0x39(%rsi)
+L(P1Q6):
+ mov -0x31(%rdi), %r8
+ mov %r8, -0x31(%rsi)
+L(P1Q5):
+ mov -0x29(%rdi), %rcx
+ mov %rcx, -0x29(%rsi)
+L(P1Q4):
+ mov -0x21(%rdi), %r10
+ mov %r10, -0x21(%rsi)
+L(P1Q3):
+ mov -0x19(%rdi), %r8
+ mov %r8, -0x19(%rsi)
+L(P1Q2):
+ mov -0x11(%rdi), %rcx
+ mov %rcx, -0x11(%rsi)
+L(P1Q1):
+ mov -0x9(%rdi), %r10
+ mov %r10, -0x9(%rsi)
+L(P1Q0):
+ movzbq -0x1(%rdi), %r8
+ mov %r8b, -0x1(%rsi)
+ ret
+
+ .p2align 4
+L(P2Q9):
+ mov -0x4a(%rdi), %r8
+ mov %r8, -0x4a(%rsi)
+L(P2Q8):
+ mov -0x42(%rdi), %rcx
+ mov %rcx, -0x42(%rsi)
+L(P2Q7):
+ mov -0x3a(%rdi), %r10
+ mov %r10, -0x3a(%rsi)
+L(P2Q6):
+ mov -0x32(%rdi), %r8
+ mov %r8, -0x32(%rsi)
+L(P2Q5):
+ mov -0x2a(%rdi), %rcx
+ mov %rcx, -0x2a(%rsi)
+L(P2Q4):
+ mov -0x22(%rdi), %r10
+ mov %r10, -0x22(%rsi)
+L(P2Q3):
+ mov -0x1a(%rdi), %r8
+ mov %r8, -0x1a(%rsi)
+L(P2Q2):
+ mov -0x12(%rdi), %rcx
+ mov %rcx, -0x12(%rsi)
+L(P2Q1):
+ mov -0xa(%rdi), %r10
+ mov %r10, -0xa(%rsi)
+L(P2Q0):
+ movzwq -0x2(%rdi), %r8
+ mov %r8w, -0x2(%rsi)
+ ret
+
+ .p2align 4
+L(P3Q9):
+ mov -0x4b(%rdi), %r8
+ mov %r8, -0x4b(%rsi)
+L(P3Q8):
+ mov -0x43(%rdi), %rcx
+ mov %rcx, -0x43(%rsi)
+L(P3Q7):
+ mov -0x3b(%rdi), %r10
+ mov %r10, -0x3b(%rsi)
+L(P3Q6):
+ mov -0x33(%rdi), %r8
+ mov %r8, -0x33(%rsi)
+L(P3Q5):
+ mov -0x2b(%rdi), %rcx
+ mov %rcx, -0x2b(%rsi)
+L(P3Q4):
+ mov -0x23(%rdi), %r10
+ mov %r10, -0x23(%rsi)
+L(P3Q3):
+ mov -0x1b(%rdi), %r8
+ mov %r8, -0x1b(%rsi)
+L(P3Q2):
+ mov -0x13(%rdi), %rcx
+ mov %rcx, -0x13(%rsi)
+L(P3Q1):
+ mov -0xb(%rdi), %r10
+ mov %r10, -0xb(%rsi)
+ /*
+ * These trailing loads/stores have to do all their loads 1st,
+ * then do the stores.
+ */
+L(P3Q0):
+ movzwq -0x3(%rdi), %r8
+ movzbq -0x1(%rdi), %r10
+ mov %r8w, -0x3(%rsi)
+ mov %r10b, -0x1(%rsi)
+ ret
+
+ .p2align 4
+L(P4Q9):
+ mov -0x4c(%rdi), %r8
+ mov %r8, -0x4c(%rsi)
+L(P4Q8):
+ mov -0x44(%rdi), %rcx
+ mov %rcx, -0x44(%rsi)
+L(P4Q7):
+ mov -0x3c(%rdi), %r10
+ mov %r10, -0x3c(%rsi)
+L(P4Q6):
+ mov -0x34(%rdi), %r8
+ mov %r8, -0x34(%rsi)
+L(P4Q5):
+ mov -0x2c(%rdi), %rcx
+ mov %rcx, -0x2c(%rsi)
+L(P4Q4):
+ mov -0x24(%rdi), %r10
+ mov %r10, -0x24(%rsi)
+L(P4Q3):
+ mov -0x1c(%rdi), %r8
+ mov %r8, -0x1c(%rsi)
+L(P4Q2):
+ mov -0x14(%rdi), %rcx
+ mov %rcx, -0x14(%rsi)
+L(P4Q1):
+ mov -0xc(%rdi), %r10
+ mov %r10, -0xc(%rsi)
+L(P4Q0):
+ mov -0x4(%rdi), %r8d
+ mov %r8d, -0x4(%rsi)
+ ret
+
+ .p2align 4
+L(P5Q9):
+ mov -0x4d(%rdi), %r8
+ mov %r8, -0x4d(%rsi)
+L(P5Q8):
+ mov -0x45(%rdi), %rcx
+ mov %rcx, -0x45(%rsi)
+L(P5Q7):
+ mov -0x3d(%rdi), %r10
+ mov %r10, -0x3d(%rsi)
+L(P5Q6):
+ mov -0x35(%rdi), %r8
+ mov %r8, -0x35(%rsi)
+L(P5Q5):
+ mov -0x2d(%rdi), %rcx
+ mov %rcx, -0x2d(%rsi)
+L(P5Q4):
+ mov -0x25(%rdi), %r10
+ mov %r10, -0x25(%rsi)
+L(P5Q3):
+ mov -0x1d(%rdi), %r8
+ mov %r8, -0x1d(%rsi)
+L(P5Q2):
+ mov -0x15(%rdi), %rcx
+ mov %rcx, -0x15(%rsi)
+L(P5Q1):
+ mov -0xd(%rdi), %r10
+ mov %r10, -0xd(%rsi)
+L(P5Q0):
+ mov -0x5(%rdi), %r8d
+ movzbq -0x1(%rdi), %r10
+ mov %r8d, -0x5(%rsi)
+ mov %r10b, -0x1(%rsi)
+ ret
+
+ .p2align 4
+L(P6Q9):
+ mov -0x4e(%rdi), %r8
+ mov %r8, -0x4e(%rsi)
+L(P6Q8):
+ mov -0x46(%rdi), %rcx
+ mov %rcx, -0x46(%rsi)
+L(P6Q7):
+ mov -0x3e(%rdi), %r10
+ mov %r10, -0x3e(%rsi)
+L(P6Q6):
+ mov -0x36(%rdi), %r8
+ mov %r8, -0x36(%rsi)
+L(P6Q5):
+ mov -0x2e(%rdi), %rcx
+ mov %rcx, -0x2e(%rsi)
+L(P6Q4):
+ mov -0x26(%rdi), %r10
+ mov %r10, -0x26(%rsi)
+L(P6Q3):
+ mov -0x1e(%rdi), %r8
+ mov %r8, -0x1e(%rsi)
+L(P6Q2):
+ mov -0x16(%rdi), %rcx
+ mov %rcx, -0x16(%rsi)
+L(P6Q1):
+ mov -0xe(%rdi), %r10
+ mov %r10, -0xe(%rsi)
+L(P6Q0):
+ mov -0x6(%rdi), %r8d
+ movzwq -0x2(%rdi), %r10
+ mov %r8d, -0x6(%rsi)
+ mov %r10w, -0x2(%rsi)
+ ret
+
+ .p2align 4
+L(P7Q9):
+ mov -0x4f(%rdi), %r8
+ mov %r8, -0x4f(%rsi)
+L(P7Q8):
+ mov -0x47(%rdi), %rcx
+ mov %rcx, -0x47(%rsi)
+L(P7Q7):
+ mov -0x3f(%rdi), %r10
+ mov %r10, -0x3f(%rsi)
+L(P7Q6):
+ mov -0x37(%rdi), %r8
+ mov %r8, -0x37(%rsi)
+L(P7Q5):
+ mov -0x2f(%rdi), %rcx
+ mov %rcx, -0x2f(%rsi)
+L(P7Q4):
+ mov -0x27(%rdi), %r10
+ mov %r10, -0x27(%rsi)
+L(P7Q3):
+ mov -0x1f(%rdi), %r8
+ mov %r8, -0x1f(%rsi)
+L(P7Q2):
+ mov -0x17(%rdi), %rcx
+ mov %rcx, -0x17(%rsi)
+L(P7Q1):
+ mov -0xf(%rdi), %r10
+ mov %r10, -0xf(%rsi)
+L(P7Q0):
+ mov -0x7(%rdi), %r8d
+ movzwq -0x3(%rdi), %r10
+ movzbq -0x1(%rdi), %rcx
+ mov %r8d, -0x7(%rsi)
+ mov %r10w, -0x3(%rsi)
+ mov %cl, -0x1(%rsi)
+ ret
+
+ /*
+ * For large sizes rep smovq is fastest.
+ * Transition point determined experimentally as measured on
+ * Intel Xeon processors (incl. Nehalem and previous generations) and
+ * AMD Opteron. The transition value is patched at boot time to avoid
+ * memory reference hit.
+ */
+ .globl bcopy_patch_start
+bcopy_patch_start:
+ cmpq $BCOPY_NHM_REP, %rdx
+ .globl bcopy_patch_end
+bcopy_patch_end:
+
+ .p2align 4
+ ALTENTRY(bcopy_ck_size)
+
+ cmpq $BCOPY_DFLT_REP, %rdx
+ jae L(use_rep)
+
+ /*
+ * Align to a 8-byte boundary. Avoids penalties from unaligned stores
+ * as well as from stores spanning cachelines.
+ */
+ test $0x7, %rsi
+ jz L(aligned_loop)
+ test $0x1, %rsi
+ jz 2f
+ movzbq (%rdi), %r8
+ dec %rdx
+ inc %rdi
+ mov %r8b, (%rsi)
+ inc %rsi
+2:
+ test $0x2, %rsi
+ jz 4f
+ movzwq (%rdi), %r8
+ sub $0x2, %rdx
+ add $0x2, %rdi
+ mov %r8w, (%rsi)
+ add $0x2, %rsi
+4:
+ test $0x4, %rsi
+ jz L(aligned_loop)
+ mov (%rdi), %r8d
+ sub $0x4, %rdx
+ add $0x4, %rdi
+ mov %r8d, (%rsi)
+ add $0x4, %rsi
+
+ /*
+ * Copy 64-bytes per loop
+ */
+ .p2align 4
+L(aligned_loop):
+ mov (%rdi), %r8
+ mov 0x8(%rdi), %r10
+ lea -0x40(%rdx), %rdx
+ mov %r8, (%rsi)
+ mov %r10, 0x8(%rsi)
+ mov 0x10(%rdi), %rcx
+ mov 0x18(%rdi), %r8
+ mov %rcx, 0x10(%rsi)
+ mov %r8, 0x18(%rsi)
+
+ cmp $0x40, %rdx
+ mov 0x20(%rdi), %r10
+ mov 0x28(%rdi), %rcx
+ mov %r10, 0x20(%rsi)
+ mov %rcx, 0x28(%rsi)
+ mov 0x30(%rdi), %r8
+ mov 0x38(%rdi), %r10
+ lea 0x40(%rdi), %rdi
+ mov %r8, 0x30(%rsi)
+ mov %r10, 0x38(%rsi)
+ lea 0x40(%rsi), %rsi
+ jae L(aligned_loop)
+
+ /*
+ * Copy remaining bytes (0-63)
+ */
+L(do_remainder):
+ leaq L(fwdPxQx)(%rip), %r10
+ addq %rdx, %rdi
+ addq %rdx, %rsi
+ movslq (%r10,%rdx,4), %rcx
+ leaq (%rcx,%r10,1), %r10
+ INDIRECT_JMP_REG(r10)
+
+ /*
+ * Use rep smovq. Clear remainder via unrolled code
+ */
+ .p2align 4
+L(use_rep):
+ xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
+ movq %rdx, %rcx /* %rcx = count */
+ shrq $3, %rcx /* 8-byte word count */
+ rep
+ smovq
+
+ xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
+ andq $7, %rdx /* remainder */
+ jnz L(do_remainder)
+ ret
+#undef L
+ SET_SIZE(bcopy_ck_size)
+
+#ifdef DEBUG
+ /*
+ * Setup frame on the run-time stack. The end of the input argument
+ * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
+ * always points to the end of the latest allocated stack frame.
+ * panic(const char *format, ...) is a varargs function. When a
+ * function taking variable arguments is called, %rax must be set
+ * to eight times the number of floating point parameters passed
+ * to the function in SSE registers.
+ */
+call_panic:
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ xorl %eax, %eax /* no variable arguments */
+ call panic /* %rdi = format string */
+#endif
+ SET_SIZE(bcopy_altentry)
+ SET_SIZE(bcopy)
+
+
+/*
+ * Zero a block of storage, returning an error code if we
+ * take a kernel pagefault which cannot be resolved.
+ * Returns errno value on pagefault error, 0 if all ok
+ */
+
+ ENTRY(kzero)
+#ifdef DEBUG
+ cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
+ jnb 0f
+ leaq .kzero_panic_msg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+0:
+#endif
+ /*
+ * pass lofault value as 3rd argument for fault return
+ */
+ leaq _kzeroerr(%rip), %rdx
+
+ movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
+ movq T_LOFAULT(%r9), %r11 /* save the current lofault */
+ movq %rdx, T_LOFAULT(%r9) /* new lofault */
+ call bzero_altentry
+ xorl %eax, %eax
+ movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
+ ret
+ /*
+ * A fault during bzero is indicated through an errno value
+ * in %rax when we iretq to here.
+ */
+_kzeroerr:
+ addq $8, %rsp /* pop bzero_altentry call ret addr */
+ movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
+ ret
+ SET_SIZE(kzero)
+
+/*
+ * Zero a block of storage.
+ */
+
+ ENTRY(bzero)
+#ifdef DEBUG
+ cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */
+ jnb 0f
+ leaq .bzero_panic_msg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+0:
+#endif
+ ALTENTRY(bzero_altentry)
+do_zero:
+#define L(s) .bzero/**/s
+ xorl %eax, %eax
+
+ cmpq $0x50, %rsi /* 80 */
+ jae L(ck_align)
+
+ /*
+ * Performance data shows many caller's are zeroing small buffers. So
+ * for best perf for these sizes unrolled code is used. Store zeros
+ * without worrying about alignment.
+ */
+ leaq L(setPxQx)(%rip), %r10
+ addq %rsi, %rdi
+ movslq (%r10,%rsi,4), %rcx
+ leaq (%rcx,%r10,1), %r10
+ INDIRECT_JMP_REG(r10)
+
+ .p2align 4
+L(setPxQx):
+ .int L(P0Q0)-L(setPxQx) /* 0 */
+ .int L(P1Q0)-L(setPxQx)
+ .int L(P2Q0)-L(setPxQx)
+ .int L(P3Q0)-L(setPxQx)
+ .int L(P4Q0)-L(setPxQx)
+ .int L(P5Q0)-L(setPxQx)
+ .int L(P6Q0)-L(setPxQx)
+ .int L(P7Q0)-L(setPxQx)
+
+ .int L(P0Q1)-L(setPxQx) /* 8 */
+ .int L(P1Q1)-L(setPxQx)
+ .int L(P2Q1)-L(setPxQx)
+ .int L(P3Q1)-L(setPxQx)
+ .int L(P4Q1)-L(setPxQx)
+ .int L(P5Q1)-L(setPxQx)
+ .int L(P6Q1)-L(setPxQx)
+ .int L(P7Q1)-L(setPxQx)
+
+ .int L(P0Q2)-L(setPxQx) /* 16 */
+ .int L(P1Q2)-L(setPxQx)
+ .int L(P2Q2)-L(setPxQx)
+ .int L(P3Q2)-L(setPxQx)
+ .int L(P4Q2)-L(setPxQx)
+ .int L(P5Q2)-L(setPxQx)
+ .int L(P6Q2)-L(setPxQx)
+ .int L(P7Q2)-L(setPxQx)
+
+ .int L(P0Q3)-L(setPxQx) /* 24 */
+ .int L(P1Q3)-L(setPxQx)
+ .int L(P2Q3)-L(setPxQx)
+ .int L(P3Q3)-L(setPxQx)
+ .int L(P4Q3)-L(setPxQx)
+ .int L(P5Q3)-L(setPxQx)
+ .int L(P6Q3)-L(setPxQx)
+ .int L(P7Q3)-L(setPxQx)
+
+ .int L(P0Q4)-L(setPxQx) /* 32 */
+ .int L(P1Q4)-L(setPxQx)
+ .int L(P2Q4)-L(setPxQx)
+ .int L(P3Q4)-L(setPxQx)
+ .int L(P4Q4)-L(setPxQx)
+ .int L(P5Q4)-L(setPxQx)
+ .int L(P6Q4)-L(setPxQx)
+ .int L(P7Q4)-L(setPxQx)
+
+ .int L(P0Q5)-L(setPxQx) /* 40 */
+ .int L(P1Q5)-L(setPxQx)
+ .int L(P2Q5)-L(setPxQx)
+ .int L(P3Q5)-L(setPxQx)
+ .int L(P4Q5)-L(setPxQx)
+ .int L(P5Q5)-L(setPxQx)
+ .int L(P6Q5)-L(setPxQx)
+ .int L(P7Q5)-L(setPxQx)
+
+ .int L(P0Q6)-L(setPxQx) /* 48 */
+ .int L(P1Q6)-L(setPxQx)
+ .int L(P2Q6)-L(setPxQx)
+ .int L(P3Q6)-L(setPxQx)
+ .int L(P4Q6)-L(setPxQx)
+ .int L(P5Q6)-L(setPxQx)
+ .int L(P6Q6)-L(setPxQx)
+ .int L(P7Q6)-L(setPxQx)
+
+ .int L(P0Q7)-L(setPxQx) /* 56 */
+ .int L(P1Q7)-L(setPxQx)
+ .int L(P2Q7)-L(setPxQx)
+ .int L(P3Q7)-L(setPxQx)
+ .int L(P4Q7)-L(setPxQx)
+ .int L(P5Q7)-L(setPxQx)
+ .int L(P6Q7)-L(setPxQx)
+ .int L(P7Q7)-L(setPxQx)
+
+ .int L(P0Q8)-L(setPxQx) /* 64 */
+ .int L(P1Q8)-L(setPxQx)
+ .int L(P2Q8)-L(setPxQx)
+ .int L(P3Q8)-L(setPxQx)
+ .int L(P4Q8)-L(setPxQx)
+ .int L(P5Q8)-L(setPxQx)
+ .int L(P6Q8)-L(setPxQx)
+ .int L(P7Q8)-L(setPxQx)
+
+ .int L(P0Q9)-L(setPxQx) /* 72 */
+ .int L(P1Q9)-L(setPxQx)
+ .int L(P2Q9)-L(setPxQx)
+ .int L(P3Q9)-L(setPxQx)
+ .int L(P4Q9)-L(setPxQx)
+ .int L(P5Q9)-L(setPxQx)
+ .int L(P6Q9)-L(setPxQx)
+ .int L(P7Q9)-L(setPxQx) /* 79 */
+
+ .p2align 4
+L(P0Q9): mov %rax, -0x48(%rdi)
+L(P0Q8): mov %rax, -0x40(%rdi)
+L(P0Q7): mov %rax, -0x38(%rdi)
+L(P0Q6): mov %rax, -0x30(%rdi)
+L(P0Q5): mov %rax, -0x28(%rdi)
+L(P0Q4): mov %rax, -0x20(%rdi)
+L(P0Q3): mov %rax, -0x18(%rdi)
+L(P0Q2): mov %rax, -0x10(%rdi)
+L(P0Q1): mov %rax, -0x8(%rdi)
+L(P0Q0):
+ ret
+
+ .p2align 4
+L(P1Q9): mov %rax, -0x49(%rdi)
+L(P1Q8): mov %rax, -0x41(%rdi)
+L(P1Q7): mov %rax, -0x39(%rdi)
+L(P1Q6): mov %rax, -0x31(%rdi)
+L(P1Q5): mov %rax, -0x29(%rdi)
+L(P1Q4): mov %rax, -0x21(%rdi)
+L(P1Q3): mov %rax, -0x19(%rdi)
+L(P1Q2): mov %rax, -0x11(%rdi)
+L(P1Q1): mov %rax, -0x9(%rdi)
+L(P1Q0): mov %al, -0x1(%rdi)
+ ret
+
+ .p2align 4
+L(P2Q9): mov %rax, -0x4a(%rdi)
+L(P2Q8): mov %rax, -0x42(%rdi)
+L(P2Q7): mov %rax, -0x3a(%rdi)
+L(P2Q6): mov %rax, -0x32(%rdi)
+L(P2Q5): mov %rax, -0x2a(%rdi)
+L(P2Q4): mov %rax, -0x22(%rdi)
+L(P2Q3): mov %rax, -0x1a(%rdi)
+L(P2Q2): mov %rax, -0x12(%rdi)
+L(P2Q1): mov %rax, -0xa(%rdi)
+L(P2Q0): mov %ax, -0x2(%rdi)
+ ret
+
+ .p2align 4
+L(P3Q9): mov %rax, -0x4b(%rdi)
+L(P3Q8): mov %rax, -0x43(%rdi)
+L(P3Q7): mov %rax, -0x3b(%rdi)
+L(P3Q6): mov %rax, -0x33(%rdi)
+L(P3Q5): mov %rax, -0x2b(%rdi)
+L(P3Q4): mov %rax, -0x23(%rdi)
+L(P3Q3): mov %rax, -0x1b(%rdi)
+L(P3Q2): mov %rax, -0x13(%rdi)
+L(P3Q1): mov %rax, -0xb(%rdi)
+L(P3Q0): mov %ax, -0x3(%rdi)
+ mov %al, -0x1(%rdi)
+ ret
+
+ .p2align 4
+L(P4Q9): mov %rax, -0x4c(%rdi)
+L(P4Q8): mov %rax, -0x44(%rdi)
+L(P4Q7): mov %rax, -0x3c(%rdi)
+L(P4Q6): mov %rax, -0x34(%rdi)
+L(P4Q5): mov %rax, -0x2c(%rdi)
+L(P4Q4): mov %rax, -0x24(%rdi)
+L(P4Q3): mov %rax, -0x1c(%rdi)
+L(P4Q2): mov %rax, -0x14(%rdi)
+L(P4Q1): mov %rax, -0xc(%rdi)
+L(P4Q0): mov %eax, -0x4(%rdi)
+ ret
+
+ .p2align 4
+L(P5Q9): mov %rax, -0x4d(%rdi)
+L(P5Q8): mov %rax, -0x45(%rdi)
+L(P5Q7): mov %rax, -0x3d(%rdi)
+L(P5Q6): mov %rax, -0x35(%rdi)
+L(P5Q5): mov %rax, -0x2d(%rdi)
+L(P5Q4): mov %rax, -0x25(%rdi)
+L(P5Q3): mov %rax, -0x1d(%rdi)
+L(P5Q2): mov %rax, -0x15(%rdi)
+L(P5Q1): mov %rax, -0xd(%rdi)
+L(P5Q0): mov %eax, -0x5(%rdi)
+ mov %al, -0x1(%rdi)
+ ret
+
+ .p2align 4
+L(P6Q9): mov %rax, -0x4e(%rdi)
+L(P6Q8): mov %rax, -0x46(%rdi)
+L(P6Q7): mov %rax, -0x3e(%rdi)
+L(P6Q6): mov %rax, -0x36(%rdi)
+L(P6Q5): mov %rax, -0x2e(%rdi)
+L(P6Q4): mov %rax, -0x26(%rdi)
+L(P6Q3): mov %rax, -0x1e(%rdi)
+L(P6Q2): mov %rax, -0x16(%rdi)
+L(P6Q1): mov %rax, -0xe(%rdi)
+L(P6Q0): mov %eax, -0x6(%rdi)
+ mov %ax, -0x2(%rdi)
+ ret
+
+ .p2align 4
+L(P7Q9): mov %rax, -0x4f(%rdi)
+L(P7Q8): mov %rax, -0x47(%rdi)
+L(P7Q7): mov %rax, -0x3f(%rdi)
+L(P7Q6): mov %rax, -0x37(%rdi)
+L(P7Q5): mov %rax, -0x2f(%rdi)
+L(P7Q4): mov %rax, -0x27(%rdi)
+L(P7Q3): mov %rax, -0x1f(%rdi)
+L(P7Q2): mov %rax, -0x17(%rdi)
+L(P7Q1): mov %rax, -0xf(%rdi)
+L(P7Q0): mov %eax, -0x7(%rdi)
+ mov %ax, -0x3(%rdi)
+ mov %al, -0x1(%rdi)
+ ret
+
+ /*
+ * Align to a 16-byte boundary. Avoids penalties from unaligned stores
+ * as well as from stores spanning cachelines. Note 16-byte alignment
+ * is better in case where rep sstosq is used.
+ */
+ .p2align 4
+L(ck_align):
+ test $0xf, %rdi
+ jz L(aligned_now)
+ test $1, %rdi
+ jz 2f
+ mov %al, (%rdi)
+ dec %rsi
+ lea 1(%rdi),%rdi
+2:
+ test $2, %rdi
+ jz 4f
+ mov %ax, (%rdi)
+ sub $2, %rsi
+ lea 2(%rdi),%rdi
+4:
+ test $4, %rdi
+ jz 8f
+ mov %eax, (%rdi)
+ sub $4, %rsi
+ lea 4(%rdi),%rdi
+8:
+ test $8, %rdi
+ jz L(aligned_now)
+ mov %rax, (%rdi)
+ sub $8, %rsi
+ lea 8(%rdi),%rdi
+
+ /*
+ * For large sizes rep sstoq is fastest.
+ * Transition point determined experimentally as measured on
+ * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
+ */
+L(aligned_now):
+ cmp $BZERO_USE_REP, %rsi
+ ja L(use_rep)
+
+ /*
+ * zero 64-bytes per loop
+ */
+ .p2align 4
+L(bzero_loop):
+ leaq -0x40(%rsi), %rsi
+ cmpq $0x40, %rsi
+ movq %rax, (%rdi)
+ movq %rax, 0x8(%rdi)
+ movq %rax, 0x10(%rdi)
+ movq %rax, 0x18(%rdi)
+ movq %rax, 0x20(%rdi)
+ movq %rax, 0x28(%rdi)
+ movq %rax, 0x30(%rdi)
+ movq %rax, 0x38(%rdi)
+ leaq 0x40(%rdi), %rdi
+ jae L(bzero_loop)
+
+ /*
+ * Clear any remaining bytes..
+ */
+9:
+ leaq L(setPxQx)(%rip), %r10
+ addq %rsi, %rdi
+ movslq (%r10,%rsi,4), %rcx
+ leaq (%rcx,%r10,1), %r10
+ INDIRECT_JMP_REG(r10)
+
+ /*
+ * Use rep sstoq. Clear any remainder via unrolled code
+ */
+ .p2align 4
+L(use_rep):
+ movq %rsi, %rcx /* get size in bytes */
+ shrq $3, %rcx /* count of 8-byte words to zero */
+ rep
+ sstoq /* %rcx = words to clear (%rax=0) */
+ andq $7, %rsi /* remaining bytes */
+ jnz 9b
+ ret
+#undef L
+ SET_SIZE(bzero_altentry)
+ SET_SIZE(bzero)
+
+/*
+ * Transfer data to and from user space -
+ * Note that these routines can cause faults
+ * It is assumed that the kernel has nothing at
+ * less than KERNELBASE in the virtual address space.
+ *
+ * Note that copyin(9F) and copyout(9F) are part of the
+ * DDI/DKI which specifies that they return '-1' on "errors."
+ *
+ * Sigh.
+ *
+ * So there's two extremely similar routines - xcopyin_nta() and
+ * xcopyout_nta() which return the errno that we've faithfully computed.
+ * This allows other callers (e.g. uiomove(9F)) to work correctly.
+ * Given that these are used pretty heavily, we expand the calling
+ * sequences inline for all flavours (rather than making wrappers).
+ */
+
+/*
+ * Copy user data to kernel space.
+ */
+
+ ENTRY(copyin)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $24, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rsi /* %rsi = kaddr */
+ jnb 1f
+ leaq .copyin_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _copyin_err(%rip), %rcx
+
+ movq %gs:CPU_THREAD, %r9
+ cmpq %rax, %rdi /* test uaddr < kernelbase */
+ jae 3f /* take copyop if uaddr > kernelbase */
+ SMAP_DISABLE_INSTR(0)
+ jmp do_copy_fault /* Takes care of leave for us */
+
+_copyin_err:
+ SMAP_ENABLE_INSTR(2)
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+ addq $8, %rsp /* pop bcopy_altentry call ret addr */
+3:
+ movq T_COPYOPS(%r9), %rax
+ cmpq $0, %rax
+ jz 2f
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ leave
+ movq CP_COPYIN(%rax), %rax
+ INDIRECT_JMP_REG(rax)
+
+2: movl $-1, %eax
+ leave
+ ret
+ SET_SIZE(copyin)
+
+ ENTRY(xcopyin_nta)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $24, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ * %rcx is consumed in this routine so we don't need to save
+ * it.
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rsi /* %rsi = kaddr */
+ jnb 1f
+ leaq .xcopyin_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ movq %gs:CPU_THREAD, %r9
+ cmpq %rax, %rdi /* test uaddr < kernelbase */
+ jae 4f
+ cmpq $0, %rcx /* No non-temporal access? */
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */
+ jnz 6f /* use regular access */
+ /*
+ * Make sure cnt is >= XCOPY_MIN_SIZE bytes
+ */
+ cmpq $XCOPY_MIN_SIZE, %rdx
+ jae 5f
+6:
+ SMAP_DISABLE_INSTR(1)
+ jmp do_copy_fault
+
+ /*
+ * Make sure src and dst are NTA_ALIGN_SIZE aligned,
+ * count is COUNT_ALIGN_SIZE aligned.
+ */
+5:
+ movq %rdi, %r10
+ orq %rsi, %r10
+ andq $NTA_ALIGN_MASK, %r10
+ orq %rdx, %r10
+ andq $COUNT_ALIGN_MASK, %r10
+ jnz 6b
+ leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */
+ SMAP_DISABLE_INSTR(2)
+ jmp do_copy_fault_nta /* use non-temporal access */
+
+4:
+ movl $EFAULT, %eax
+ jmp 3f
+
+ /*
+ * A fault during do_copy_fault or do_copy_fault_nta is
+ * indicated through an errno value in %rax and we iret from the
+ * trap handler to here.
+ */
+_xcopyin_err:
+ addq $8, %rsp /* pop bcopy_altentry call ret addr */
+_xcopyin_nta_err:
+ SMAP_ENABLE_INSTR(3)
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+3:
+ movq T_COPYOPS(%r9), %r8
+ cmpq $0, %r8
+ jz 2f
+
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ leave
+ movq CP_XCOPYIN(%r8), %r8
+ INDIRECT_JMP_REG(r8)
+
+2: leave
+ ret
+ SET_SIZE(xcopyin_nta)
+
+/*
+ * Copy kernel data to user space.
+ */
+
+ ENTRY(copyout)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $24, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rdi /* %rdi = kaddr */
+ jnb 1f
+ leaq .copyout_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _copyout_err(%rip), %rcx
+
+ movq %gs:CPU_THREAD, %r9
+ cmpq %rax, %rsi /* test uaddr < kernelbase */
+ jae 3f /* take copyop if uaddr > kernelbase */
+ SMAP_DISABLE_INSTR(3)
+ jmp do_copy_fault /* Calls leave for us */
+
+_copyout_err:
+ SMAP_ENABLE_INSTR(4)
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+ addq $8, %rsp /* pop bcopy_altentry call ret addr */
+3:
+ movq T_COPYOPS(%r9), %rax
+ cmpq $0, %rax
+ jz 2f
+
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ leave
+ movq CP_COPYOUT(%rax), %rax
+ INDIRECT_JMP_REG(rax)
+
+2: movl $-1, %eax
+ leave
+ ret
+ SET_SIZE(copyout)
+
+ ENTRY(xcopyout_nta)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $24, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rdi /* %rdi = kaddr */
+ jnb 1f
+ leaq .xcopyout_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ movq %gs:CPU_THREAD, %r9
+ cmpq %rax, %rsi /* test uaddr < kernelbase */
+ jae 4f
+
+ cmpq $0, %rcx /* No non-temporal access? */
+ /*
+ * pass lofault value as 4th argument to do_copy_fault
+ */
+ leaq _xcopyout_err(%rip), %rcx
+ jnz 6f
+ /*
+ * Make sure cnt is >= XCOPY_MIN_SIZE bytes
+ */
+ cmpq $XCOPY_MIN_SIZE, %rdx
+ jae 5f
+6:
+ SMAP_DISABLE_INSTR(4)
+ jmp do_copy_fault
+
+ /*
+ * Make sure src and dst are NTA_ALIGN_SIZE aligned,
+ * count is COUNT_ALIGN_SIZE aligned.
+ */
+5:
+ movq %rdi, %r10
+ orq %rsi, %r10
+ andq $NTA_ALIGN_MASK, %r10
+ orq %rdx, %r10
+ andq $COUNT_ALIGN_MASK, %r10
+ jnz 6b
+ leaq _xcopyout_nta_err(%rip), %rcx
+ SMAP_DISABLE_INSTR(5)
+ call do_copy_fault_nta
+ SMAP_ENABLE_INSTR(5)
+ ret
+
+4:
+ movl $EFAULT, %eax
+ jmp 3f
+
+ /*
+ * A fault during do_copy_fault or do_copy_fault_nta is
+ * indicated through an errno value in %rax and we iret from the
+ * trap handler to here.
+ */
+_xcopyout_err:
+ addq $8, %rsp /* pop bcopy_altentry call ret addr */
+_xcopyout_nta_err:
+ SMAP_ENABLE_INSTR(6)
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+3:
+ movq T_COPYOPS(%r9), %r8
+ cmpq $0, %r8
+ jz 2f
+
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ leave
+ movq CP_XCOPYOUT(%r8), %r8
+ INDIRECT_JMP_REG(r8)
+
+2: leave
+ ret
+ SET_SIZE(xcopyout_nta)
+
+/*
+ * Copy a null terminated string from one point to another in
+ * the kernel address space.
+ */
+
+ ENTRY(copystr)
+ pushq %rbp
+ movq %rsp, %rbp
+#ifdef DEBUG
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi /* %rdi = from */
+ jb 0f
+ cmpq %rax, %rsi /* %rsi = to */
+ jnb 1f
+0: leaq .copystr_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ movq %gs:CPU_THREAD, %r9
+ movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */
+ /* 5th argument to do_copystr */
+ xorl %r10d,%r10d /* pass smap restore need in %r10d */
+ /* as a non-ABI 6th arg */
+do_copystr:
+ movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */
+ movq T_LOFAULT(%r9), %r11 /* save the current lofault */
+ movq %r8, T_LOFAULT(%r9) /* new lofault */
+
+ movq %rdx, %r8 /* save maxlength */
+
+ cmpq $0, %rdx /* %rdx = maxlength */
+ je copystr_enametoolong /* maxlength == 0 */
+
+copystr_loop:
+ decq %r8
+ movb (%rdi), %al
+ incq %rdi
+ movb %al, (%rsi)
+ incq %rsi
+ cmpb $0, %al
+ je copystr_null /* null char */
+ cmpq $0, %r8
+ jne copystr_loop
+
+copystr_enametoolong:
+ movl $ENAMETOOLONG, %eax
+ jmp copystr_out
+
+copystr_null:
+ xorl %eax, %eax /* no error */
+
+copystr_out:
+ cmpq $0, %rcx /* want length? */
+ je copystr_smap /* no */
+ subq %r8, %rdx /* compute length and store it */
+ movq %rdx, (%rcx)
+
+copystr_smap:
+ cmpl $0, %r10d
+ jz copystr_done
+ SMAP_ENABLE_INSTR(7)
+
+copystr_done:
+ movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
+ leave
+ ret
+ SET_SIZE(copystr)
+
+/*
+ * Copy a null terminated string from the user address space into
+ * the kernel address space.
+ */
+
+ ENTRY(copyinstr)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $32, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+ movq %rcx, 0x18(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rsi /* %rsi = kaddr */
+ jnb 1f
+ leaq .copyinstr_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif
+ /*
+ * pass lofault value as 5th argument to do_copystr
+ * do_copystr expects whether or not we need smap in %r10d
+ */
+ leaq _copyinstr_error(%rip), %r8
+ movl $1, %r10d
+
+ cmpq %rax, %rdi /* test uaddr < kernelbase */
+ jae 4f
+ SMAP_DISABLE_INSTR(6)
+ jmp do_copystr
+4:
+ movq %gs:CPU_THREAD, %r9
+ jmp 3f
+
+_copyinstr_error:
+ SMAP_ENABLE_INSTR(8)
+ movq %r11, T_LOFAULT(%r9) /* restore original lofault */
+3:
+ movq T_COPYOPS(%r9), %rax
+ cmpq $0, %rax
+ jz 2f
+
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ movq 0x18(%rsp), %rcx
+ leave
+ movq CP_COPYINSTR(%rax), %rax
+ INDIRECT_JMP_REG(rax)
+
+2: movl $EFAULT, %eax /* return EFAULT */
+ leave
+ ret
+ SET_SIZE(copyinstr)
+
+/*
+ * Copy a null terminated string from the kernel
+ * address space to the user address space.
+ */
+
+ ENTRY(copyoutstr)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $32, %rsp
+
+ /*
+ * save args in case we trap and need to rerun as a copyop
+ */
+ movq %rdi, (%rsp)
+ movq %rsi, 0x8(%rsp)
+ movq %rdx, 0x10(%rsp)
+ movq %rcx, 0x18(%rsp)
+
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rdi /* %rdi = kaddr */
+ jnb 1f
+ leaq .copyoutstr_panic_msg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+1:
+#endif
+ /*
+ * pass lofault value as 5th argument to do_copystr
+ * pass one as 6th argument to do_copystr in %r10d
+ */
+ leaq _copyoutstr_error(%rip), %r8
+ movl $1, %r10d
+
+ cmpq %rax, %rsi /* test uaddr < kernelbase */
+ jae 4f
+ SMAP_DISABLE_INSTR(7)
+ jmp do_copystr
+4:
+ movq %gs:CPU_THREAD, %r9
+ jmp 3f
+
+_copyoutstr_error:
+ SMAP_ENABLE_INSTR(9)
+ movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
+3:
+ movq T_COPYOPS(%r9), %rax
+ cmpq $0, %rax
+ jz 2f
+
+ /*
+ * reload args for the copyop
+ */
+ movq (%rsp), %rdi
+ movq 0x8(%rsp), %rsi
+ movq 0x10(%rsp), %rdx
+ movq 0x18(%rsp), %rcx
+ leave
+ movq CP_COPYOUTSTR(%rax), %rax
+ INDIRECT_JMP_REG(rax)
+
+2: movl $EFAULT, %eax /* return EFAULT */
+ leave
+ ret
+ SET_SIZE(copyoutstr)
+
+/*
+ * Since all of the fuword() variants are so similar, we have a macro to spit
+ * them out. This allows us to create DTrace-unobservable functions easily.
+ */
+
+/*
+ * Note that we don't save and reload the arguments here
+ * because their values are not altered in the copy path.
+ * Additionally, when successful, the smap_enable jmp will
+ * actually return us to our original caller.
+ */
+
+#define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
+ ENTRY(NAME) \
+ movq %gs:CPU_THREAD, %r9; \
+ cmpq kernelbase(%rip), %rdi; \
+ jae 1f; \
+ leaq _flt_/**/NAME, %rdx; \
+ movq %rdx, T_LOFAULT(%r9); \
+ SMAP_DISABLE_INSTR(DISNUM) \
+ INSTR (%rdi), REG; \
+ movq $0, T_LOFAULT(%r9); \
+ INSTR REG, (%rsi); \
+ xorl %eax, %eax; \
+ SMAP_ENABLE_INSTR(EN1) \
+ ret; \
+_flt_/**/NAME: \
+ SMAP_ENABLE_INSTR(EN2) \
+ movq $0, T_LOFAULT(%r9); \
+1: \
+ movq T_COPYOPS(%r9), %rax; \
+ cmpq $0, %rax; \
+ jz 2f; \
+ movq COPYOP(%rax), %rax; \
+ INDIRECT_JMP_REG(rax); \
+2: \
+ movl $-1, %eax; \
+ ret; \
+ SET_SIZE(NAME)
+
+ FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
+ FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
+ FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
+ FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
+
+#undef FUWORD
+
+/*
+ * Set user word.
+ */
+
+/*
+ * Note that we don't save and reload the arguments here
+ * because their values are not altered in the copy path.
+ */
+
+#define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
+ ENTRY(NAME) \
+ movq %gs:CPU_THREAD, %r9; \
+ cmpq kernelbase(%rip), %rdi; \
+ jae 1f; \
+ leaq _flt_/**/NAME, %rdx; \
+ SMAP_DISABLE_INSTR(DISNUM) \
+ movq %rdx, T_LOFAULT(%r9); \
+ INSTR REG, (%rdi); \
+ movq $0, T_LOFAULT(%r9); \
+ xorl %eax, %eax; \
+ SMAP_ENABLE_INSTR(EN1) \
+ ret; \
+_flt_/**/NAME: \
+ SMAP_ENABLE_INSTR(EN2) \
+ movq $0, T_LOFAULT(%r9); \
+1: \
+ movq T_COPYOPS(%r9), %rax; \
+ cmpq $0, %rax; \
+ jz 3f; \
+ movq COPYOP(%rax), %rax; \
+ INDIRECT_JMP_REG(rax); \
+3: \
+ movl $-1, %eax; \
+ ret; \
+ SET_SIZE(NAME)
+
+ SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
+ SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
+ SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
+ SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
+
+#undef SUWORD
+
+#define FUWORD_NOERR(NAME, INSTR, REG) \
+ ENTRY(NAME) \
+ cmpq kernelbase(%rip), %rdi; \
+ cmovnbq kernelbase(%rip), %rdi; \
+ INSTR (%rdi), REG; \
+ INSTR REG, (%rsi); \
+ ret; \
+ SET_SIZE(NAME)
+
+ FUWORD_NOERR(fuword64_noerr, movq, %rax)
+ FUWORD_NOERR(fuword32_noerr, movl, %eax)
+ FUWORD_NOERR(fuword16_noerr, movw, %ax)
+ FUWORD_NOERR(fuword8_noerr, movb, %al)
+
+#undef FUWORD_NOERR
+
+#define SUWORD_NOERR(NAME, INSTR, REG) \
+ ENTRY(NAME) \
+ cmpq kernelbase(%rip), %rdi; \
+ cmovnbq kernelbase(%rip), %rdi; \
+ INSTR REG, (%rdi); \
+ ret; \
+ SET_SIZE(NAME)
+
+ SUWORD_NOERR(suword64_noerr, movq, %rsi)
+ SUWORD_NOERR(suword32_noerr, movl, %esi)
+ SUWORD_NOERR(suword16_noerr, movw, %si)
+ SUWORD_NOERR(suword8_noerr, movb, %sil)
+
+#undef SUWORD_NOERR
+
+
+ .weak subyte
+ subyte=suword8
+ .weak subyte_noerr
+ subyte_noerr=suword8_noerr
+
+ .weak fulword
+ fulword=fuword64
+ .weak fulword_noerr
+ fulword_noerr=fuword64_noerr
+ .weak sulword
+ sulword=suword64
+ .weak sulword_noerr
+ sulword_noerr=suword64_noerr
+
+ ENTRY(copyin_noerr)
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rsi /* %rsi = kto */
+ jae 1f
+ leaq .cpyin_ne_pmsg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+1:
+#endif
+ cmpq %rax, %rdi /* ufrom < kernelbase */
+ jb do_copy
+ movq %rax, %rdi /* force fault at kernelbase */
+ jmp do_copy
+ SET_SIZE(copyin_noerr)
+
+ ENTRY(copyout_noerr)
+ movq kernelbase(%rip), %rax
+#ifdef DEBUG
+ cmpq %rax, %rdi /* %rdi = kfrom */
+ jae 1f
+ leaq .cpyout_ne_pmsg(%rip), %rdi
+ jmp call_panic /* setup stack and call panic */
+1:
+#endif
+ cmpq %rax, %rsi /* uto < kernelbase */
+ jb do_copy
+ movq %rax, %rsi /* force fault at kernelbase */
+ jmp do_copy
+ SET_SIZE(copyout_noerr)
+
+ ENTRY(uzero)
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi
+ jb do_zero
+ movq %rax, %rdi /* force fault at kernelbase */
+ jmp do_zero
+ SET_SIZE(uzero)
+
+ ENTRY(ucopy)
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi
+ cmovaeq %rax, %rdi /* force fault at kernelbase */
+ cmpq %rax, %rsi
+ cmovaeq %rax, %rsi /* force fault at kernelbase */
+ jmp do_copy
+ SET_SIZE(ucopy)
+
+ /*
+ * Note, the frame pointer is required here becuase do_copystr expects
+ * to be able to pop it off!
+ */
+ ENTRY(ucopystr)
+ pushq %rbp
+ movq %rsp, %rbp
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi
+ cmovaeq %rax, %rdi /* force fault at kernelbase */
+ cmpq %rax, %rsi
+ cmovaeq %rax, %rsi /* force fault at kernelbase */
+ /* do_copystr expects lofault address in %r8 */
+ /* do_copystr expects whether or not we need smap in %r10 */
+ xorl %r10d, %r10d
+ movq %gs:CPU_THREAD, %r8
+ movq T_LOFAULT(%r8), %r8
+ jmp do_copystr
+ SET_SIZE(ucopystr)
+
+#ifdef DEBUG
+ .data
+.kcopy_panic_msg:
+ .string "kcopy: arguments below kernelbase"
+.bcopy_panic_msg:
+ .string "bcopy: arguments below kernelbase"
+.kzero_panic_msg:
+ .string "kzero: arguments below kernelbase"
+.bzero_panic_msg:
+ .string "bzero: arguments below kernelbase"
+.copyin_panic_msg:
+ .string "copyin: kaddr argument below kernelbase"
+.xcopyin_panic_msg:
+ .string "xcopyin: kaddr argument below kernelbase"
+.copyout_panic_msg:
+ .string "copyout: kaddr argument below kernelbase"
+.xcopyout_panic_msg:
+ .string "xcopyout: kaddr argument below kernelbase"
+.copystr_panic_msg:
+ .string "copystr: arguments in user space"
+.copyinstr_panic_msg:
+ .string "copyinstr: kaddr argument not in kernel address space"
+.copyoutstr_panic_msg:
+ .string "copyoutstr: kaddr argument not in kernel address space"
+.cpyin_ne_pmsg:
+ .string "copyin_noerr: argument not in kernel address space"
+.cpyout_ne_pmsg:
+ .string "copyout_noerr: argument not in kernel address space"
+#endif
+
+.data
+.align 4
+.globl _smap_enable_patch_count
+.type _smap_enable_patch_count,@object
+.size _smap_enable_patch_count, 4
+_smap_enable_patch_count:
+ .long SMAP_ENABLE_COUNT
+
+.globl _smap_disable_patch_count
+.type _smap_disable_patch_count,@object
+.size _smap_disable_patch_count, 4
+_smap_disable_patch_count:
+ .long SMAP_DISABLE_COUNT
diff --git a/usr/src/uts/intel/ml/ddi_i86_asm.s b/usr/src/uts/intel/ml/ddi_i86_asm.s
new file mode 100644
index 0000000000..2fa9bd75e9
--- /dev/null
+++ b/usr/src/uts/intel/ml/ddi_i86_asm.s
@@ -0,0 +1,522 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include "assym.h"
+
+ ENTRY(ddi_get8)
+ ALTENTRY(ddi_mem_get8)
+ ALTENTRY(ddi_io_get8)
+ movl ACC_ATTR(%rdi), %edx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx
+ jne 1f
+ movq %rsi, %rdx
+ xorq %rax, %rax
+ inb (%dx)
+ ret
+1:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx
+ jne 2f
+ movzbq (%rsi), %rax
+ ret
+2:
+ movq ACC_GETB(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_get8)
+ SET_SIZE(ddi_mem_get8)
+ SET_SIZE(ddi_io_get8)
+
+
+ ENTRY(ddi_get16)
+ ALTENTRY(ddi_mem_get16)
+ ALTENTRY(ddi_io_get16)
+ movl ACC_ATTR(%rdi), %edx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx
+ jne 3f
+ movq %rsi, %rdx
+ xorq %rax, %rax
+ inw (%dx)
+ ret
+3:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx
+ jne 4f
+ movzwq (%rsi), %rax
+ ret
+4:
+ movq ACC_GETW(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_get16)
+ SET_SIZE(ddi_mem_get16)
+ SET_SIZE(ddi_io_get16)
+
+
+ ENTRY(ddi_get32)
+ ALTENTRY(ddi_mem_get32)
+ ALTENTRY(ddi_io_get32)
+ movl ACC_ATTR(%rdi), %edx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx
+ jne 5f
+ movq %rsi, %rdx
+ inl (%dx)
+ ret
+5:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx
+ jne 6f
+ movl (%rsi), %eax
+ ret
+6:
+ movq ACC_GETL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_get32)
+ SET_SIZE(ddi_mem_get32)
+ SET_SIZE(ddi_io_get32)
+
+
+ ENTRY(ddi_get64)
+ ALTENTRY(ddi_mem_get64)
+ movq ACC_GETLL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_get64)
+ SET_SIZE(ddi_mem_get64)
+
+
+ ENTRY(ddi_put8)
+ ALTENTRY(ddi_mem_put8)
+ ALTENTRY(ddi_io_put8)
+ movl ACC_ATTR(%rdi), %ecx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx
+ jne 7f
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outb (%dx)
+ ret
+7:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx
+ jne 8f
+ movb %dl, (%rsi)
+ ret
+8:
+ movq ACC_PUTB(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_put8)
+ SET_SIZE(ddi_mem_put8)
+ SET_SIZE(ddi_io_put8)
+
+
+ ENTRY(ddi_put16)
+ ALTENTRY(ddi_mem_put16)
+ ALTENTRY(ddi_io_put16)
+ movl ACC_ATTR(%rdi), %ecx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx
+ jne 8f
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outw (%dx)
+ ret
+8:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx
+ jne 9f
+ movw %dx, (%rsi)
+ ret
+9:
+ movq ACC_PUTW(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_put16)
+ SET_SIZE(ddi_mem_put16)
+ SET_SIZE(ddi_io_put16)
+
+
+ ENTRY(ddi_put32)
+ ALTENTRY(ddi_mem_put32)
+ ALTENTRY(ddi_io_put32)
+ movl ACC_ATTR(%rdi), %ecx
+ cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx
+ jne 8f
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outl (%dx)
+ ret
+8:
+ cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx
+ jne 9f
+ movl %edx, (%rsi)
+ ret
+9:
+ movq ACC_PUTL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_put32)
+ SET_SIZE(ddi_mem_put32)
+ SET_SIZE(ddi_io_put32)
+
+
+ ENTRY(ddi_put64)
+ ALTENTRY(ddi_mem_put64)
+ movq ACC_PUTLL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_put64)
+ SET_SIZE(ddi_mem_put64)
+
+
+ ENTRY(ddi_rep_get8)
+ ALTENTRY(ddi_mem_rep_get8)
+ movq ACC_REP_GETB(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_get8)
+ SET_SIZE(ddi_mem_rep_get8)
+
+
+ ENTRY(ddi_rep_get16)
+ ALTENTRY(ddi_mem_rep_get16)
+ movq ACC_REP_GETW(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_get16)
+ SET_SIZE(ddi_mem_rep_get16)
+
+
+ ENTRY(ddi_rep_get32)
+ ALTENTRY(ddi_mem_rep_get32)
+ movq ACC_REP_GETL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_get32)
+ SET_SIZE(ddi_mem_rep_get32)
+
+
+ ENTRY(ddi_rep_get64)
+ ALTENTRY(ddi_mem_rep_get64)
+ movq ACC_REP_GETLL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_get64)
+ SET_SIZE(ddi_mem_rep_get64)
+
+
+ ENTRY(ddi_rep_put8)
+ ALTENTRY(ddi_mem_rep_put8)
+ movq ACC_REP_PUTB(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_put8)
+ SET_SIZE(ddi_mem_rep_put8)
+
+
+ ENTRY(ddi_rep_put16)
+ ALTENTRY(ddi_mem_rep_put16)
+ movq ACC_REP_PUTW(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_put16)
+ SET_SIZE(ddi_mem_rep_put16)
+
+
+ ENTRY(ddi_rep_put32)
+ ALTENTRY(ddi_mem_rep_put32)
+ movq ACC_REP_PUTL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_put32)
+ SET_SIZE(ddi_mem_rep_put32)
+
+
+ ENTRY(ddi_rep_put64)
+ ALTENTRY(ddi_mem_rep_put64)
+ movq ACC_REP_PUTLL(%rdi), %rax
+ INDIRECT_JMP_REG(rax)
+ SET_SIZE(ddi_rep_put64)
+ SET_SIZE(ddi_mem_rep_put64)
+
+ ENTRY(i_ddi_vaddr_get8)
+ movzbq (%rsi), %rax
+ ret
+ SET_SIZE(i_ddi_vaddr_get8)
+
+ ENTRY(i_ddi_vaddr_get16)
+ movzwq (%rsi), %rax
+ ret
+ SET_SIZE(i_ddi_vaddr_get16)
+
+
+ ENTRY(i_ddi_vaddr_get32)
+ movl (%rsi), %eax
+ ret
+ SET_SIZE(i_ddi_vaddr_get32)
+
+
+ ENTRY(i_ddi_vaddr_get64)
+ movq (%rsi), %rax
+ ret
+ SET_SIZE(i_ddi_vaddr_get64)
+
+
+ ENTRY(i_ddi_io_get8)
+ movq %rsi, %rdx
+ inb (%dx)
+ movzbq %al, %rax
+ ret
+ SET_SIZE(i_ddi_io_get8)
+
+
+ ENTRY(i_ddi_io_get16)
+ movq %rsi, %rdx
+ inw (%dx)
+ movzwq %ax, %rax
+ ret
+ SET_SIZE(i_ddi_io_get16)
+
+
+ ENTRY(i_ddi_io_get32)
+ movq %rsi, %rdx
+ inl (%dx)
+ ret
+ SET_SIZE(i_ddi_io_get32)
+
+ ENTRY(i_ddi_vaddr_put8)
+ movb %dl, (%rsi)
+ ret
+ SET_SIZE(i_ddi_vaddr_put8)
+
+
+ ENTRY(i_ddi_vaddr_put16)
+ movw %dx, (%rsi)
+ ret
+ SET_SIZE(i_ddi_vaddr_put16)
+
+
+ ENTRY(i_ddi_vaddr_put32)
+ movl %edx, (%rsi)
+ ret
+ SET_SIZE(i_ddi_vaddr_put32)
+
+
+ ENTRY(i_ddi_vaddr_put64)
+ movq %rdx, (%rsi)
+ ret
+ SET_SIZE(i_ddi_vaddr_put64)
+
+ ENTRY(i_ddi_io_put8)
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outb (%dx)
+ ret
+ SET_SIZE(i_ddi_io_put8)
+
+
+ ENTRY(i_ddi_io_put16)
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outw (%dx)
+ ret
+ SET_SIZE(i_ddi_io_put16)
+
+
+ ENTRY(i_ddi_io_put32)
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ outl (%dx)
+ ret
+ SET_SIZE(i_ddi_io_put32)
+
+ /*
+ * Incoming arguments
+ *
+ * %rdi : hdlp
+ * %rsi : host_addr
+ * %rdx : dev_addr
+ * %rcx : repcount
+ * %r8 : flags
+ *
+ * This routine will destroy values in %rdx, %rsi, %rcx.
+ */
+ ENTRY(i_ddi_io_rep_get8)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je gb_ioadv
+ movq %rsi, %rdi
+ rep
+ insb
+ ret
+
+gb_ioadv:
+ andq %rcx, %rcx
+ jz gb_ioadv_done
+gb_ioadv2:
+ inb (%dx)
+ movb %al, (%rsi)
+ incq %rdx
+ incq %rsi
+ decq %rcx
+ jg gb_ioadv2
+
+gb_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+
+ SET_SIZE(i_ddi_io_rep_get8)
+
+
+ ENTRY(i_ddi_io_rep_get16)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je gw_ioadv
+
+ movq %rsi, %rdi
+ rep
+ insw
+ ret
+
+gw_ioadv:
+ andq %rcx, %rcx
+ jz gw_ioadv_done
+gw_ioadv2:
+ inw (%dx)
+ movw %ax,(%rsi)
+ addq $2, %rsi
+ addq $2, %rdx
+ decq %rcx
+ jg gw_ioadv2
+
+gw_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(i_ddi_io_rep_get16)
+
+
+ ENTRY(i_ddi_io_rep_get32)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je gl_ioadv
+
+ movq %rsi, %rdi
+ rep
+ insl
+ ret
+
+gl_ioadv:
+ andq %rcx, %rcx
+ jz gl_ioadv_done
+gl_ioadv2:
+ inl (%dx)
+ movl %eax,(%rsi)
+ addq $4, %rsi
+ addq $4, %rdx
+ decq %rcx
+ jg gl_ioadv2
+
+gl_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+
+ SET_SIZE(i_ddi_io_rep_get32)
+
+ /*
+ * Incoming arguments
+ *
+ * %rdi : hdlp
+ * %rsi : host_addr
+ * %rdx : dev_addr
+ * %rcx : repcount
+ * %r8 : flags
+ *
+ * This routine will destroy values in %rdx, %rsi, %rcx.
+ */
+ ENTRY(i_ddi_io_rep_put8)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je pb_ioadv
+
+ movq %rsi, %rdi
+ rep
+ outsb
+ ret
+
+pb_ioadv:
+ andq %rcx, %rcx
+ jz pb_ioadv_done
+pb_ioadv2:
+ movb (%rsi), %al
+ outb (%dx)
+ incq %rsi
+ incq %rdx
+ decq %rcx
+ jg pb_ioadv2
+
+pb_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(i_ddi_io_rep_put8)
+
+ ENTRY(i_ddi_io_rep_put16)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je pw_ioadv
+
+ movq %rsi, %rdi
+ rep
+ outsw
+ ret
+
+pw_ioadv:
+ andq %rcx, %rcx
+ jz pw_ioadv_done
+pw_ioadv2:
+ movw (%rsi), %ax
+ outw (%dx)
+ addq $2, %rsi
+ addq $2, %rdx
+ decq %rcx
+ jg pw_ioadv2
+
+pw_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(i_ddi_io_rep_put16)
+
+
+ ENTRY(i_ddi_io_rep_put32)
+
+ cmpq $DDI_DEV_AUTOINCR, %r8
+ je pl_ioadv
+
+ movq %rsi, %rdi
+ rep
+ outsl
+ ret
+
+pl_ioadv:
+ andq %rcx, %rcx
+ jz pl_ioadv_done
+pl_ioadv2:
+ movl (%rsi), %eax
+ outl (%dx)
+ addq $4, %rsi
+ addq $4, %rdx
+ decq %rcx
+ jg pl_ioadv2
+
+pl_ioadv_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(i_ddi_io_rep_put32)
diff --git a/usr/src/uts/intel/ml/desctbls_asm.s b/usr/src/uts/intel/ml/desctbls_asm.s
new file mode 100644
index 0000000000..4528bc07ad
--- /dev/null
+++ b/usr/src/uts/intel/ml/desctbls_asm.s
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/panic.h>
+#include <sys/ontrap.h>
+#include <sys/privregs.h>
+#include <sys/segments.h>
+#include <sys/trap.h>
+
+#include "assym.h"
+
+ ENTRY_NP(rd_idtr)
+ sidt (%rdi)
+ ret
+ SET_SIZE(rd_idtr)
+
+ ENTRY_NP(wr_idtr)
+ lidt (%rdi)
+ ret
+ SET_SIZE(wr_idtr)
+
+ ENTRY_NP(rd_gdtr)
+ pushq %rbp
+ movq %rsp, %rbp
+ sgdt (%rdi)
+ leave
+ ret
+ SET_SIZE(rd_gdtr)
+
+ ENTRY_NP(wr_gdtr)
+ pushq %rbp
+ movq %rsp, %rbp
+ lgdt (%rdi)
+ jmp 1f
+ nop
+1:
+ leave
+ ret
+ SET_SIZE(wr_gdtr)
+
+ /*
+ * loads zero selector for ds and es.
+ */
+ ENTRY_NP(load_segment_registers)
+ pushq %rbp
+ movq %rsp, %rbp
+ pushq %rdi
+ pushq $.newcs
+ lretq
+.newcs:
+ /*
+ * zero %ds and %es - they're ignored anyway
+ */
+ xorl %eax, %eax
+ movw %ax, %ds
+ movw %ax, %es
+ movl %esi, %eax
+ movw %ax, %fs
+ movl %edx, %eax
+ movw %ax, %gs
+ movl %ecx, %eax
+ movw %ax, %ss
+ leave
+ ret
+ SET_SIZE(load_segment_registers)
+
+ ENTRY_NP(get_cs_register)
+ movq %cs, %rax
+ ret
+ SET_SIZE(get_cs_register)
+
+ ENTRY_NP(wr_ldtr)
+ movq %rdi, %rax
+ lldt %ax
+ ret
+ SET_SIZE(wr_ldtr)
+
+ ENTRY_NP(rd_ldtr)
+ xorl %eax, %eax
+ sldt %ax
+ ret
+ SET_SIZE(rd_ldtr)
+
+ ENTRY_NP(wr_tsr)
+ movq %rdi, %rax
+ ltr %ax
+ ret
+ SET_SIZE(wr_tsr)
+
diff --git a/usr/src/uts/intel/ml/exception.s b/usr/src/uts/intel/ml/exception.s
new file mode 100644
index 0000000000..92c410adc0
--- /dev/null
+++ b/usr/src/uts/intel/ml/exception.s
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/amd64/amd64/exception.S,v 1.113 2003/10/15 02:04:52 peter Exp $
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/trap.h>
+#include <sys/psw.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/dtrace.h>
+#include <sys/x86_archext.h>
+#include <sys/traptrace.h>
+#include <sys/machparam.h>
+
+#include "assym.h"
+
+/*
+ * push $0 on stack for traps that do not
+ * generate an error code. This is so the rest
+ * of the kernel can expect a consistent stack
+ * from from any exception.
+ *
+ * Note that for all exceptions for amd64
+ * %r11 and %rcx are on the stack. Just pop
+ * them back into their appropriate registers and let
+ * it get saved as is running native.
+ */
+
+#if defined(__xpv)
+
+#define NPTRAP_NOERR(trapno) \
+ pushq $0; \
+ pushq $trapno
+
+#define TRAP_NOERR(trapno) \
+ XPV_TRAP_POP; \
+ NPTRAP_NOERR(trapno)
+
+/*
+ * error code already pushed by hw
+ * onto stack.
+ */
+#define TRAP_ERR(trapno) \
+ XPV_TRAP_POP; \
+ pushq $trapno
+
+#else /* __xpv */
+
+#define TRAP_NOERR(trapno) \
+ push $0; \
+ push $trapno
+
+#define NPTRAP_NOERR(trapno) TRAP_NOERR(trapno)
+
+/*
+ * error code already pushed by hw
+ * onto stack.
+ */
+#define TRAP_ERR(trapno) \
+ push $trapno
+
+#endif /* __xpv */
+
+ /*
+ * These are the stacks used on cpu0 for taking double faults,
+ * NMIs and MCEs.
+ *
+ * We define them here instead of in a C file so that we can page-align
+ * them (gcc won't do that in a .c file).
+ */
+ .data
+ DGDEF3(dblfault_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
+ .fill DEFAULTSTKSZ, 1, 0
+ DGDEF3(nmi_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
+ .fill DEFAULTSTKSZ, 1, 0
+ DGDEF3(mce_stack0, DEFAULTSTKSZ, MMU_PAGESIZE)
+ .fill DEFAULTSTKSZ, 1, 0
+
+ /*
+ * #DE
+ */
+ ENTRY_NP(div0trap)
+ TRAP_NOERR(T_ZERODIV) /* $0 */
+ jmp cmntrap
+ SET_SIZE(div0trap)
+
+ /*
+ * #DB
+ *
+ * Fetch %dr6 and clear it, handing off the value to the
+ * cmntrap code in %r15/%esi
+ */
+ ENTRY_NP(dbgtrap)
+ TRAP_NOERR(T_SGLSTP) /* $1 */
+
+#if !defined(__xpv) /* no sysenter support yet */
+ /*
+ * If we get here as a result of single-stepping a sysenter
+ * instruction, we suddenly find ourselves taking a #db
+ * in kernel mode -before- we've swapgs'ed. So before we can
+ * take the trap, we do the swapgs here, and fix the return
+ * %rip in trap() so that we return immediately after the
+ * swapgs in the sysenter handler to avoid doing the swapgs again.
+ *
+ * Nobody said that the design of sysenter was particularly
+ * elegant, did they?
+ */
+
+ pushq %r11
+
+ /*
+ * At this point the stack looks like this:
+ *
+ * (high address) r_ss
+ * r_rsp
+ * r_rfl
+ * r_cs
+ * r_rip <-- %rsp + 24
+ * r_err <-- %rsp + 16
+ * r_trapno <-- %rsp + 8
+ * (low address) %r11 <-- %rsp
+ */
+ leaq sys_sysenter(%rip), %r11
+ cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */
+ je 1f
+ leaq brand_sys_sysenter(%rip), %r11
+ cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */
+ je 1f
+ leaq tr_sys_sysenter(%rip), %r11
+ cmpq %r11, 24(%rsp)
+ je 1f
+ leaq tr_brand_sys_sysenter(%rip), %r11
+ cmpq %r11, 24(%rsp)
+ jne 2f
+1: swapgs
+2: lfence /* swapgs mitigation */
+ popq %r11
+#endif /* !__xpv */
+
+ INTR_PUSH
+#if defined(__xpv)
+ movl $6, %edi
+ call kdi_dreg_get
+ movq %rax, %r15 /* %db6 -> %r15 */
+ movl $6, %edi
+ movl $0, %esi
+ call kdi_dreg_set /* 0 -> %db6 */
+#else
+ movq %db6, %r15
+ xorl %eax, %eax
+ movq %rax, %db6
+#endif
+
+ jmp cmntrap_pushed
+ SET_SIZE(dbgtrap)
+
+#if !defined(__xpv)
+
+/*
+ * Macro to set the gsbase or kgsbase to the address of the struct cpu
+ * for this processor. If we came from userland, set kgsbase else
+ * set gsbase. We find the proper cpu struct by looping through
+ * the cpu structs for all processors till we find a match for the gdt
+ * of the trapping processor. The stack is expected to be pointing at
+ * the standard regs pushed by hardware on a trap (plus error code and trapno).
+ *
+ * It's ok for us to clobber gsbase here (and possibly end up with both gsbase
+ * and kgsbase set to the same value) because we're not going back the normal
+ * way out of here (via IRET). Where we're going, we don't need no user %gs.
+ */
+#define SET_CPU_GSBASE \
+ subq $REGOFF_TRAPNO, %rsp; /* save regs */ \
+ movq %rax, REGOFF_RAX(%rsp); \
+ movq %rbx, REGOFF_RBX(%rsp); \
+ movq %rcx, REGOFF_RCX(%rsp); \
+ movq %rdx, REGOFF_RDX(%rsp); \
+ movq %rbp, REGOFF_RBP(%rsp); \
+ movq %rsp, %rbp; \
+ subq $16, %rsp; /* space for gdt */ \
+ sgdt 6(%rsp); \
+ movq 8(%rsp), %rcx; /* %rcx has gdt to match */ \
+ xorl %ebx, %ebx; /* loop index */ \
+ leaq cpu(%rip), %rdx; /* cpu pointer array */ \
+1: \
+ movq (%rdx, %rbx, CLONGSIZE), %rax; /* get cpu[i] */ \
+ cmpq $0x0, %rax; /* cpu[i] == NULL ? */ \
+ je 2f; /* yes, continue */ \
+ cmpq %rcx, CPU_GDT(%rax); /* gdt == cpu[i]->cpu_gdt ? */ \
+ je 3f; /* yes, go set gsbase */ \
+2: \
+ incl %ebx; /* i++ */ \
+ cmpl $NCPU, %ebx; /* i < NCPU ? */ \
+ jb 1b; /* yes, loop */ \
+/* XXX BIG trouble if we fall thru here. We didn't find a gdt match */ \
+3: \
+ movl $MSR_AMD_KGSBASE, %ecx; \
+ cmpw $KCS_SEL, REGOFF_CS(%rbp); /* trap from kernel? */ \
+ jne 4f; /* no, go set KGSBASE */ \
+ movl $MSR_AMD_GSBASE, %ecx; /* yes, set GSBASE */ \
+ mfence; /* OPTERON_ERRATUM_88 */ \
+4: \
+ movq %rax, %rdx; /* write base register */ \
+ shrq $32, %rdx; \
+ wrmsr; \
+ movq REGOFF_RDX(%rbp), %rdx; /* restore regs */ \
+ movq REGOFF_RCX(%rbp), %rcx; \
+ movq REGOFF_RBX(%rbp), %rbx; \
+ movq REGOFF_RAX(%rbp), %rax; \
+ movq %rbp, %rsp; \
+ movq REGOFF_RBP(%rsp), %rbp; \
+ addq $REGOFF_TRAPNO, %rsp /* pop stack */
+
+#else /* __xpv */
+
+#define SET_CPU_GSBASE /* noop on the hypervisor */
+
+#endif /* __xpv */
+
+
+ /*
+ * #NMI
+ *
+ * XXPV: See 6532669.
+ */
+ ENTRY_NP(nmiint)
+ TRAP_NOERR(T_NMIFLT) /* $2 */
+
+ SET_CPU_GSBASE
+
+ /*
+ * Save all registers and setup segment registers
+ * with kernel selectors.
+ */
+ INTR_PUSH
+ INTGATE_INIT_KERNEL_FLAGS
+
+ TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_TRAP)
+ TRACE_REGS(%r12, %rsp, %rax, %rbx)
+ TRACE_STAMP(%r12)
+
+ movq %rsp, %rbp
+
+ movq %rbp, %rdi
+ call av_dispatch_nmivect
+
+ INTR_POP
+ call x86_md_clear
+ jmp tr_iret_auto
+ /*NOTREACHED*/
+ SET_SIZE(nmiint)
+
+ /*
+ * #BP
+ */
+ ENTRY_NP(brktrap)
+ XPV_TRAP_POP
+ cmpw $KCS_SEL, 8(%rsp)
+ jne bp_user
+
+ /*
+ * This is a breakpoint in the kernel -- it is very likely that this
+ * is DTrace-induced. To unify DTrace handling, we spoof this as an
+ * invalid opcode (#UD) fault. Note that #BP is a trap, not a fault --
+ * we must decrement the trapping %rip to make it appear as a fault.
+ * We then push a non-zero error code to indicate that this is coming
+ * from #BP.
+ */
+ decq (%rsp)
+ push $1 /* error code -- non-zero for #BP */
+ jmp ud_kernel
+
+bp_user:
+
+ NPTRAP_NOERR(T_BPTFLT) /* $3 */
+ jmp dtrace_trap
+
+ SET_SIZE(brktrap)
+
+ /*
+ * #OF
+ */
+ ENTRY_NP(ovflotrap)
+ TRAP_NOERR(T_OVFLW) /* $4 */
+ jmp cmntrap
+ SET_SIZE(ovflotrap)
+
+ /*
+ * #BR
+ */
+ ENTRY_NP(boundstrap)
+ TRAP_NOERR(T_BOUNDFLT) /* $5 */
+ jmp cmntrap
+ SET_SIZE(boundstrap)
+
+ ENTRY_NP(invoptrap)
+
+ XPV_TRAP_POP
+
+ cmpw $KCS_SEL, 8(%rsp)
+ jne ud_user
+
+#if defined(__xpv)
+ movb $0, 12(%rsp) /* clear saved upcall_mask from %cs */
+#endif
+ push $0 /* error code -- zero for #UD */
+ud_kernel:
+ push $0xdddd /* a dummy trap number */
+ INTR_PUSH
+ movq REGOFF_RIP(%rsp), %rdi
+ movq REGOFF_RSP(%rsp), %rsi
+ movq REGOFF_RAX(%rsp), %rdx
+ pushq (%rsi)
+ movq %rsp, %rsi
+ subq $8, %rsp
+ call dtrace_invop
+ ALTENTRY(dtrace_invop_callsite)
+ addq $16, %rsp
+ cmpl $DTRACE_INVOP_PUSHL_EBP, %eax
+ je ud_push
+ cmpl $DTRACE_INVOP_LEAVE, %eax
+ je ud_leave
+ cmpl $DTRACE_INVOP_NOP, %eax
+ je ud_nop
+ cmpl $DTRACE_INVOP_RET, %eax
+ je ud_ret
+ jmp ud_trap
+
+ud_push:
+ /*
+ * We must emulate a "pushq %rbp". To do this, we pull the stack
+ * down 8 bytes, and then store the base pointer.
+ */
+ INTR_POP
+ subq $16, %rsp /* make room for %rbp */
+ pushq %rax /* push temp */
+ movq 24(%rsp), %rax /* load calling RIP */
+ addq $1, %rax /* increment over trapping instr */
+ movq %rax, 8(%rsp) /* store calling RIP */
+ movq 32(%rsp), %rax /* load calling CS */
+ movq %rax, 16(%rsp) /* store calling CS */
+ movq 40(%rsp), %rax /* load calling RFLAGS */
+ movq %rax, 24(%rsp) /* store calling RFLAGS */
+ movq 48(%rsp), %rax /* load calling RSP */
+ subq $8, %rax /* make room for %rbp */
+ movq %rax, 32(%rsp) /* store calling RSP */
+ movq 56(%rsp), %rax /* load calling SS */
+ movq %rax, 40(%rsp) /* store calling SS */
+ movq 32(%rsp), %rax /* reload calling RSP */
+ movq %rbp, (%rax) /* store %rbp there */
+ popq %rax /* pop off temp */
+ jmp tr_iret_kernel /* return from interrupt */
+ /*NOTREACHED*/
+
+ud_leave:
+ /*
+ * We must emulate a "leave", which is the same as a "movq %rbp,
+ * %rsp" followed by a "popq %rbp". We can exploit the fact
+ * that the %rsp is explicitly saved to effect the pop without
+ * having to reshuffle the other data pushed for the trap.
+ */
+
+ INTR_POP
+ pushq %rax /* push temp */
+ movq 8(%rsp), %rax /* load calling RIP */
+ addq $1, %rax /* increment over trapping instr */
+ movq %rax, 8(%rsp) /* store calling RIP */
+ movq (%rbp), %rax /* get new %rbp */
+ addq $8, %rbp /* adjust new %rsp */
+ movq %rbp, 32(%rsp) /* store new %rsp */
+ movq %rax, %rbp /* set new %rbp */
+ popq %rax /* pop off temp */
+ jmp tr_iret_kernel /* return from interrupt */
+ /*NOTREACHED*/
+
+ud_nop:
+ /*
+ * We must emulate a "nop". This is obviously not hard: we need only
+ * advance the %rip by one.
+ */
+ INTR_POP
+ incq (%rsp)
+ jmp tr_iret_kernel
+ /*NOTREACHED*/
+
+ud_ret:
+ INTR_POP
+ pushq %rax /* push temp */
+ movq 32(%rsp), %rax /* load %rsp */
+ movq (%rax), %rax /* load calling RIP */
+ movq %rax, 8(%rsp) /* store calling RIP */
+ addq $8, 32(%rsp) /* adjust new %rsp */
+ popq %rax /* pop off temp */
+ jmp tr_iret_kernel /* return from interrupt */
+ /*NOTREACHED*/
+
+ud_trap:
+ /*
+ * We're going to let the kernel handle this as a normal #UD. If,
+ * however, we came through #BP and are spoofing #UD (in this case,
+ * the stored error value will be non-zero), we need to de-spoof
+ * the trap by incrementing %rip and pushing T_BPTFLT.
+ */
+ cmpq $0, REGOFF_ERR(%rsp)
+ je ud_ud
+ incq REGOFF_RIP(%rsp)
+ addq $REGOFF_RIP, %rsp
+ NPTRAP_NOERR(T_BPTFLT) /* $3 */
+ jmp cmntrap
+
+ud_ud:
+ addq $REGOFF_RIP, %rsp
+ud_user:
+ NPTRAP_NOERR(T_ILLINST)
+ jmp cmntrap
+ SET_SIZE(invoptrap)
+
+ /*
+ * #NM
+ */
+
+ ENTRY_NP(ndptrap)
+ TRAP_NOERR(T_NOEXTFLT) /* $0 */
+ SET_CPU_GSBASE
+ jmp cmntrap
+ SET_SIZE(ndptrap)
+
+#if !defined(__xpv)
+
+ /*
+ * #DF
+ */
+ ENTRY_NP(syserrtrap)
+ pushq $T_DBLFLT
+ SET_CPU_GSBASE
+
+ /*
+ * We share this handler with kmdb (if kmdb is loaded). As such, we
+ * may have reached this point after encountering a #df in kmdb. If
+ * that happens, we'll still be on kmdb's IDT. We need to switch back
+ * to this CPU's IDT before proceeding. Furthermore, if we did arrive
+ * here from kmdb, kmdb is probably in a very sickly state, and
+ * shouldn't be entered from the panic flow. We'll suppress that
+ * entry by setting nopanicdebug.
+ */
+ pushq %rax
+ subq $DESCTBR_SIZE, %rsp
+ sidt (%rsp)
+ movq %gs:CPU_IDT, %rax
+ cmpq %rax, DTR_BASE(%rsp)
+ je 1f
+
+ movq %rax, DTR_BASE(%rsp)
+ movw $_MUL(NIDT, GATE_DESC_SIZE), DTR_LIMIT(%rsp)
+ lidt (%rsp)
+
+ movl $1, nopanicdebug
+
+1: addq $DESCTBR_SIZE, %rsp
+ popq %rax
+
+ DFTRAP_PUSH
+
+ /*
+ * freeze trap trace.
+ */
+#ifdef TRAPTRACE
+ leaq trap_trace_freeze(%rip), %r11
+ incl (%r11)
+#endif
+
+ ENABLE_INTR_FLAGS
+
+ movq %rsp, %rdi /* &regs */
+ xorl %esi, %esi /* clear address */
+ xorl %edx, %edx /* cpuid = 0 */
+ call trap
+
+ SET_SIZE(syserrtrap)
+
+#endif /* !__xpv */
+
+ /*
+ * #TS
+ */
+ ENTRY_NP(invtsstrap)
+ TRAP_ERR(T_TSSFLT) /* $10 already have error code on stack */
+ jmp cmntrap
+ SET_SIZE(invtsstrap)
+
+ /*
+ * #NP
+ */
+ ENTRY_NP(segnptrap)
+ TRAP_ERR(T_SEGFLT) /* $11 already have error code on stack */
+ SET_CPU_GSBASE
+ jmp cmntrap
+ SET_SIZE(segnptrap)
+
+ /*
+ * #SS
+ */
+ ENTRY_NP(stktrap)
+ TRAP_ERR(T_STKFLT) /* $12 already have error code on stack */
+ SET_CPU_GSBASE
+ jmp cmntrap
+ SET_SIZE(stktrap)
+
+ /*
+ * #GP
+ */
+ ENTRY_NP(gptrap)
+ TRAP_ERR(T_GPFLT) /* $13 already have error code on stack */
+ SET_CPU_GSBASE
+ jmp cmntrap
+ SET_SIZE(gptrap)
+
+ /*
+ * #PF
+ */
+ ENTRY_NP(pftrap)
+ TRAP_ERR(T_PGFLT) /* $14 already have error code on stack */
+ INTR_PUSH
+#if defined(__xpv)
+
+ movq %gs:CPU_VCPU_INFO, %r15
+ movq VCPU_INFO_ARCH_CR2(%r15), %r15 /* vcpu[].arch.cr2 */
+
+#else /* __xpv */
+
+ movq %cr2, %r15
+
+#endif /* __xpv */
+ jmp cmntrap_pushed
+ SET_SIZE(pftrap)
+
+ ENTRY_NP(resvtrap)
+ TRAP_NOERR(T_RESVTRAP) /* (reserved) */
+ jmp cmntrap
+ SET_SIZE(resvtrap)
+
+ /*
+ * #MF
+ */
+ ENTRY_NP(ndperr)
+ TRAP_NOERR(T_EXTERRFLT) /* $16 */
+ jmp cmninttrap
+ SET_SIZE(ndperr)
+
+ /*
+ * #AC
+ */
+ ENTRY_NP(achktrap)
+ TRAP_ERR(T_ALIGNMENT) /* $17 */
+ jmp cmntrap
+ SET_SIZE(achktrap)
+
+ /*
+ * #MC
+ */
+ .globl cmi_mca_trap /* see uts/i86pc/os/cmi.c */
+
+ ENTRY_NP(mcetrap)
+ TRAP_NOERR(T_MCE) /* $18 */
+
+ SET_CPU_GSBASE
+
+ INTR_PUSH
+ INTGATE_INIT_KERNEL_FLAGS
+
+ TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP)
+ TRACE_REGS(%rdi, %rsp, %rbx, %rcx)
+ TRACE_STAMP(%rdi)
+
+ movq %rsp, %rbp
+
+ movq %rsp, %rdi /* arg0 = struct regs *rp */
+ call cmi_mca_trap /* cmi_mca_trap(rp); */
+
+ jmp _sys_rtt
+ SET_SIZE(mcetrap)
+
+ /*
+ * #XF
+ */
+ ENTRY_NP(xmtrap)
+ TRAP_NOERR(T_SIMDFPE) /* $19 */
+ jmp cmninttrap
+ SET_SIZE(xmtrap)
+
+ ENTRY_NP(invaltrap)
+ TRAP_NOERR(T_INVALTRAP) /* very invalid */
+ jmp cmntrap
+ SET_SIZE(invaltrap)
+
+ .globl fasttable
+
+ ENTRY_NP(fasttrap)
+ cmpl $T_LASTFAST, %eax
+ ja 1f
+ orl %eax, %eax /* (zero extend top 32-bits) */
+ leaq fasttable(%rip), %r11
+ leaq (%r11, %rax, CLONGSIZE), %r11
+ movq (%r11), %r11
+ INDIRECT_JMP_REG(r11)
+1:
+ /*
+ * Fast syscall number was illegal. Make it look
+ * as if the INT failed. Modify %rip to point before the
+ * INT, push the expected error code and fake a GP fault.
+ *
+ * XXX Why make the error code be offset into idt + 1?
+ * Instead we should push a real (soft?) error code
+ * on the stack and #gp handler could know about fasttraps?
+ */
+ XPV_TRAP_POP
+
+ subq $2, (%rsp) /* XXX int insn 2-bytes */
+ pushq $_CONST(_MUL(T_FASTTRAP, GATE_DESC_SIZE) + 2)
+
+#if defined(__xpv)
+ pushq %r11
+ pushq %rcx
+#endif
+ jmp gptrap
+ SET_SIZE(fasttrap)
+
+ ENTRY_NP(dtrace_ret)
+ TRAP_NOERR(T_DTRACE_RET)
+ jmp dtrace_trap
+ SET_SIZE(dtrace_ret)
+
+ /*
+ * RFLAGS 24 bytes up the stack from %rsp.
+ * XXX a constant would be nicer.
+ */
+ ENTRY_NP(fast_null)
+ XPV_TRAP_POP
+ orq $PS_C, 24(%rsp) /* set carry bit in user flags */
+ call x86_md_clear
+ jmp tr_iret_auto
+ /*NOTREACHED*/
+ SET_SIZE(fast_null)
+
+ /*
+ * Interrupts start at 32
+ */
+#define MKIVCT(n) \
+ ENTRY_NP(ivct/**/n) \
+ push $0; \
+ push $n - 0x20; \
+ jmp cmnint; \
+ SET_SIZE(ivct/**/n)
+
+ MKIVCT(32)
+ MKIVCT(33)
+ MKIVCT(34)
+ MKIVCT(35)
+ MKIVCT(36)
+ MKIVCT(37)
+ MKIVCT(38)
+ MKIVCT(39)
+ MKIVCT(40)
+ MKIVCT(41)
+ MKIVCT(42)
+ MKIVCT(43)
+ MKIVCT(44)
+ MKIVCT(45)
+ MKIVCT(46)
+ MKIVCT(47)
+ MKIVCT(48)
+ MKIVCT(49)
+ MKIVCT(50)
+ MKIVCT(51)
+ MKIVCT(52)
+ MKIVCT(53)
+ MKIVCT(54)
+ MKIVCT(55)
+ MKIVCT(56)
+ MKIVCT(57)
+ MKIVCT(58)
+ MKIVCT(59)
+ MKIVCT(60)
+ MKIVCT(61)
+ MKIVCT(62)
+ MKIVCT(63)
+ MKIVCT(64)
+ MKIVCT(65)
+ MKIVCT(66)
+ MKIVCT(67)
+ MKIVCT(68)
+ MKIVCT(69)
+ MKIVCT(70)
+ MKIVCT(71)
+ MKIVCT(72)
+ MKIVCT(73)
+ MKIVCT(74)
+ MKIVCT(75)
+ MKIVCT(76)
+ MKIVCT(77)
+ MKIVCT(78)
+ MKIVCT(79)
+ MKIVCT(80)
+ MKIVCT(81)
+ MKIVCT(82)
+ MKIVCT(83)
+ MKIVCT(84)
+ MKIVCT(85)
+ MKIVCT(86)
+ MKIVCT(87)
+ MKIVCT(88)
+ MKIVCT(89)
+ MKIVCT(90)
+ MKIVCT(91)
+ MKIVCT(92)
+ MKIVCT(93)
+ MKIVCT(94)
+ MKIVCT(95)
+ MKIVCT(96)
+ MKIVCT(97)
+ MKIVCT(98)
+ MKIVCT(99)
+ MKIVCT(100)
+ MKIVCT(101)
+ MKIVCT(102)
+ MKIVCT(103)
+ MKIVCT(104)
+ MKIVCT(105)
+ MKIVCT(106)
+ MKIVCT(107)
+ MKIVCT(108)
+ MKIVCT(109)
+ MKIVCT(110)
+ MKIVCT(111)
+ MKIVCT(112)
+ MKIVCT(113)
+ MKIVCT(114)
+ MKIVCT(115)
+ MKIVCT(116)
+ MKIVCT(117)
+ MKIVCT(118)
+ MKIVCT(119)
+ MKIVCT(120)
+ MKIVCT(121)
+ MKIVCT(122)
+ MKIVCT(123)
+ MKIVCT(124)
+ MKIVCT(125)
+ MKIVCT(126)
+ MKIVCT(127)
+ MKIVCT(128)
+ MKIVCT(129)
+ MKIVCT(130)
+ MKIVCT(131)
+ MKIVCT(132)
+ MKIVCT(133)
+ MKIVCT(134)
+ MKIVCT(135)
+ MKIVCT(136)
+ MKIVCT(137)
+ MKIVCT(138)
+ MKIVCT(139)
+ MKIVCT(140)
+ MKIVCT(141)
+ MKIVCT(142)
+ MKIVCT(143)
+ MKIVCT(144)
+ MKIVCT(145)
+ MKIVCT(146)
+ MKIVCT(147)
+ MKIVCT(148)
+ MKIVCT(149)
+ MKIVCT(150)
+ MKIVCT(151)
+ MKIVCT(152)
+ MKIVCT(153)
+ MKIVCT(154)
+ MKIVCT(155)
+ MKIVCT(156)
+ MKIVCT(157)
+ MKIVCT(158)
+ MKIVCT(159)
+ MKIVCT(160)
+ MKIVCT(161)
+ MKIVCT(162)
+ MKIVCT(163)
+ MKIVCT(164)
+ MKIVCT(165)
+ MKIVCT(166)
+ MKIVCT(167)
+ MKIVCT(168)
+ MKIVCT(169)
+ MKIVCT(170)
+ MKIVCT(171)
+ MKIVCT(172)
+ MKIVCT(173)
+ MKIVCT(174)
+ MKIVCT(175)
+ MKIVCT(176)
+ MKIVCT(177)
+ MKIVCT(178)
+ MKIVCT(179)
+ MKIVCT(180)
+ MKIVCT(181)
+ MKIVCT(182)
+ MKIVCT(183)
+ MKIVCT(184)
+ MKIVCT(185)
+ MKIVCT(186)
+ MKIVCT(187)
+ MKIVCT(188)
+ MKIVCT(189)
+ MKIVCT(190)
+ MKIVCT(191)
+ MKIVCT(192)
+ MKIVCT(193)
+ MKIVCT(194)
+ MKIVCT(195)
+ MKIVCT(196)
+ MKIVCT(197)
+ MKIVCT(198)
+ MKIVCT(199)
+ MKIVCT(200)
+ MKIVCT(201)
+ MKIVCT(202)
+ MKIVCT(203)
+ MKIVCT(204)
+ MKIVCT(205)
+ MKIVCT(206)
+ MKIVCT(207)
+ MKIVCT(208)
+ MKIVCT(209)
+ MKIVCT(210)
+ MKIVCT(211)
+ MKIVCT(212)
+ MKIVCT(213)
+ MKIVCT(214)
+ MKIVCT(215)
+ MKIVCT(216)
+ MKIVCT(217)
+ MKIVCT(218)
+ MKIVCT(219)
+ MKIVCT(220)
+ MKIVCT(221)
+ MKIVCT(222)
+ MKIVCT(223)
+ MKIVCT(224)
+ MKIVCT(225)
+ MKIVCT(226)
+ MKIVCT(227)
+ MKIVCT(228)
+ MKIVCT(229)
+ MKIVCT(230)
+ MKIVCT(231)
+ MKIVCT(232)
+ MKIVCT(233)
+ MKIVCT(234)
+ MKIVCT(235)
+ MKIVCT(236)
+ MKIVCT(237)
+ MKIVCT(238)
+ MKIVCT(239)
+ MKIVCT(240)
+ MKIVCT(241)
+ MKIVCT(242)
+ MKIVCT(243)
+ MKIVCT(244)
+ MKIVCT(245)
+ MKIVCT(246)
+ MKIVCT(247)
+ MKIVCT(248)
+ MKIVCT(249)
+ MKIVCT(250)
+ MKIVCT(251)
+ MKIVCT(252)
+ MKIVCT(253)
+ MKIVCT(254)
+ MKIVCT(255)
+
diff --git a/usr/src/uts/intel/ml/float.s b/usr/src/uts/intel/ml/float.s
new file mode 100644
index 0000000000..807647f553
--- /dev/null
+++ b/usr/src/uts/intel/ml/float.s
@@ -0,0 +1,347 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
+/* All Rights Reserved */
+
+/* Copyright (c) 1987, 1988 Microsoft Corporation */
+/* All Rights Reserved */
+
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/x86_archext.h>
+
+#include "assym.h"
+
+ /*
+ * Returns zero if x87 "chip" is present(!)
+ */
+ ENTRY_NP(fpu_initial_probe)
+ CLTS
+ fninit
+ fnstsw %ax
+ movzbl %al, %eax
+ ret
+ SET_SIZE(fpu_initial_probe)
+
+ ENTRY_NP(fxsave_insn)
+ fxsaveq (%rdi)
+ ret
+ SET_SIZE(fxsave_insn)
+
+/*
+ * One of these routines is called from any lwp with floating
+ * point context as part of the prolog of a context switch.
+ */
+
+/*
+ * These three functions define the Intel "xsave" handling for CPUs with
+ * different features. Newer AMD CPUs can also use these functions. See the
+ * 'exception pointers' comment below.
+ */
+ ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ fxsaveq (%rdi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(fpxsave_ctxt)
+
+ ENTRY_NP(xsave_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsave (%rsi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_ctxt)
+
+ ENTRY_NP(xsaveopt_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsaveopt (%rsi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsaveopt_ctxt)
+
+/*
+ * On certain AMD processors, the "exception pointers" (i.e. the last
+ * instruction pointer, last data pointer, and last opcode) are saved by the
+ * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is
+ * set.
+ *
+ * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior.
+ * We can detect this via an AMD specific cpuid feature bit
+ * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions.
+ * Otherwise we use these more complex functions on AMD CPUs. All three follow
+ * the same logic after the xsave* instruction.
+ */
+ ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ fxsaveq (%rdi)
+ /*
+ * To ensure that we don't leak these values into the next context
+ * on the cpu, we could just issue an fninit here, but that's
+ * rather slow and so we issue an instruction sequence that
+ * clears them more quickly, if a little obscurely.
+ */
+ btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip)
+ /* dummy load changes all exception pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(fpxsave_excp_clr_ctxt)
+
+ ENTRY_NP(xsave_excp_clr_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsave (%rsi)
+ btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_excp_clr_ctxt)
+
+ ENTRY_NP(xsaveopt_excp_clr_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsaveopt (%rsi)
+ btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsaveopt_excp_clr_ctxt)
+
+ .align 8
+.fpzero_const:
+ .4byte 0x0
+ .4byte 0x0
+
+
+ ENTRY_NP(fpxsave)
+ CLTS
+ fxsaveq (%rdi)
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(fpxsave)
+
+ ENTRY_NP(xsave)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xsave (%rdi)
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsave)
+
+ ENTRY_NP(xsaveopt)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xsaveopt (%rdi)
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsaveopt)
+
+/*
+ * These functions are used when restoring the FPU as part of the epilogue of a
+ * context switch.
+ */
+
+ ENTRY(fpxrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ CLTS
+ fxrstorq (%rdi)
+1:
+ ret
+ SET_SIZE(fpxrestore_ctxt)
+
+ ENTRY(xrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ CLTS
+ xrstor (%rdi)
+1:
+ ret
+ SET_SIZE(xrestore_ctxt)
+
+
+ ENTRY_NP(fpxrestore)
+ CLTS
+ fxrstorq (%rdi)
+ ret
+ SET_SIZE(fpxrestore)
+
+ ENTRY_NP(xrestore)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xrstor (%rdi)
+ ret
+ SET_SIZE(xrestore)
+
+/*
+ * Disable the floating point unit.
+ */
+
+ ENTRY_NP(fpdisable)
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(fpdisable)
+
+/*
+ * Initialize the fpu hardware.
+ */
+
+ ENTRY_NP(fpinit)
+ CLTS
+ cmpl $FP_XSAVE, fp_save_mech
+ je 1f
+
+ /* fxsave */
+ leaq sse_initial(%rip), %rax
+ fxrstorq (%rax) /* load clean initial state */
+ ret
+
+1: /* xsave */
+ leaq avx_initial(%rip), %rcx
+ xorl %edx, %edx
+ movl $XFEATURE_AVX, %eax
+ btl $X86FSET_AVX, x86_featureset
+ cmovael %edx, %eax
+ orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+ xrstor (%rcx)
+ ret
+ SET_SIZE(fpinit)
+
+/*
+ * Clears FPU exception state.
+ * Returns the FP status word.
+ */
+
+ ENTRY_NP(fperr_reset)
+ CLTS
+ xorl %eax, %eax
+ fnstsw %ax
+ fnclex
+ ret
+ SET_SIZE(fperr_reset)
+
+ ENTRY_NP(fpxerr_reset)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ stmxcsr (%rsp)
+ movl (%rsp), %eax
+ andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
+ ldmxcsr (%rsp) /* clear processor exceptions */
+ leave
+ ret
+ SET_SIZE(fpxerr_reset)
+
+ ENTRY_NP(fpgetcwsw)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ fnstsw (%rsp) /* store the status word */
+ fnstcw 2(%rsp) /* store the control word */
+ movl (%rsp), %eax /* put both in %eax */
+ leave
+ ret
+ SET_SIZE(fpgetcwsw)
+
+/*
+ * Returns the MXCSR register.
+ */
+
+ ENTRY_NP(fpgetmxcsr)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ stmxcsr (%rsp)
+ movl (%rsp), %eax
+ leave
+ ret
+ SET_SIZE(fpgetmxcsr)
+
diff --git a/usr/src/uts/intel/ml/hypersubr.s b/usr/src/uts/intel/ml/hypersubr.s
new file mode 100644
index 0000000000..e6378d8518
--- /dev/null
+++ b/usr/src/uts/intel/ml/hypersubr.s
@@ -0,0 +1,164 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#ifndef __xpv
+#include <sys/xpv_support.h>
+#endif
+#include <sys/hypervisor.h>
+
+/*
+ * Hypervisor "system calls"
+ *
+ * amd64
+ * %rax == call number
+ * args in registers (%rdi, %rsi, %rdx, %r10, %r8, %r9)
+ *
+ * Note that we use %r10 instead of %rcx for passing 4th argument as in
+ * C calling convention since the "syscall" instruction clobbers %rcx.
+ *
+ * (These calls can be done more efficiently as gcc-style inlines, but
+ * for simplicity and help with initial debugging, we use these primitives
+ * to build the hypervisor calls up from C wrappers.)
+ */
+
+/*
+ * XXPV grr - assembler can't deal with an instruction in a quoted string
+ */
+#undef TRAP_INSTR /* cause it's currently "int $0x82" */
+
+/*
+ * The method for issuing a hypercall (i.e. a system call to the
+ * hypervisor) varies from platform to platform. In 32-bit PV domains, an
+ * 'int 82' triggers the call. In 64-bit PV domains, a 'syscall' does the
+ * trick.
+ *
+ * HVM domains are more complicated. In all cases, we want to issue a
+ * VMEXIT instruction, but AMD and Intel use different opcodes to represent
+ * that instruction. Rather than build CPU-specific modules with the
+ * different opcodes, we use the 'hypercall page' provided by Xen. This
+ * page contains a collection of code stubs that do nothing except issue
+ * hypercalls using the proper instructions for this machine. To keep the
+ * wrapper code as simple and efficient as possible, we preallocate that
+ * page below. When the module is loaded, we ask Xen to remap the
+ * underlying PFN to that of the hypercall page.
+ *
+ * Note: this same mechanism could be used in PV domains, but using
+ * hypercall page requires a call and several more instructions than simply
+ * issuing the proper trap.
+ */
+#if !defined(__xpv)
+
+#define HYPERCALL_PAGESIZE 0x1000
+#define HYPERCALL_SHINFO_PAGESIZE 0x1000
+
+ .data
+ .align HYPERCALL_SHINFO_PAGESIZE
+ .globl hypercall_shared_info_page
+ .type hypercall_shared_info_page, @object
+ .size hypercall_shared_info_page, HYPERCALL_SHINFO_PAGESIZE
+hypercall_shared_info_page:
+ .skip HYPERCALL_SHINFO_PAGESIZE
+
+ .text
+ .align HYPERCALL_PAGESIZE
+ .globl hypercall_page
+ .type hypercall_page, @function
+hypercall_page:
+ .skip HYPERCALL_PAGESIZE
+ .size hypercall_page, HYPERCALL_PAGESIZE
+#define TRAP_INSTR \
+ shll $5, %eax; \
+ addq $hypercall_page, %rax; \
+ INDIRECT_JMP_REG(rax);
+
+#else /* !_xpv */
+
+#define TRAP_INSTR syscall
+#endif /* !__xpv */
+
+
+ ENTRY_NP(__hypercall0)
+ ALTENTRY(__hypercall0_int)
+ movl %edi, %eax
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall0)
+
+ ENTRY_NP(__hypercall1)
+ ALTENTRY(__hypercall1_int)
+ movl %edi, %eax
+ movq %rsi, %rdi /* arg 1 */
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall1)
+
+ ENTRY_NP(__hypercall2)
+ ALTENTRY(__hypercall2_int)
+ movl %edi, %eax
+ movq %rsi, %rdi /* arg 1 */
+ movq %rdx, %rsi /* arg 2 */
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall2)
+
+ ENTRY_NP(__hypercall3)
+ ALTENTRY(__hypercall3_int)
+ movl %edi, %eax
+ movq %rsi, %rdi /* arg 1 */
+ movq %rdx, %rsi /* arg 2 */
+ movq %rcx, %rdx /* arg 3 */
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall3)
+
+ ENTRY_NP(__hypercall4)
+ ALTENTRY(__hypercall4_int)
+ movl %edi, %eax
+ movq %rsi, %rdi /* arg 1 */
+ movq %rdx, %rsi /* arg 2 */
+ movq %rcx, %rdx /* arg 3 */
+ movq %r8, %r10 /* r10 = 4th arg */
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall4)
+
+ ENTRY_NP(__hypercall5)
+ ALTENTRY(__hypercall5_int)
+ movl %edi, %eax
+ movq %rsi, %rdi /* arg 1 */
+ movq %rdx, %rsi /* arg 2 */
+ movq %rcx, %rdx /* arg 3 */
+ movq %r8, %r10 /* r10 = 4th arg */
+ movq %r9, %r8 /* arg 5 */
+ TRAP_INSTR
+ ret
+ SET_SIZE(__hypercall5)
+
diff --git a/usr/src/uts/intel/ml/i86_subr.s b/usr/src/uts/intel/ml/i86_subr.s
new file mode 100644
index 0000000000..2a1a183026
--- /dev/null
+++ b/usr/src/uts/intel/ml/i86_subr.s
@@ -0,0 +1,1629 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
+ * Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
+ * All Rights Reserved
+ */
+
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * General assembly language routines.
+ * It is the intent of this file to contain routines that are
+ * independent of the specific kernel architecture, and those that are
+ * common across kernel architectures.
+ * As architectures diverge, and implementations of specific
+ * architecture-dependent routines change, the routines should be moved
+ * from this file into the respective ../`arch -k`/subr.s file.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/panic.h>
+#include <sys/ontrap.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/reboot.h>
+#include <sys/psw.h>
+#include <sys/x86_archext.h>
+
+#include "assym.h"
+#include <sys/dditypes.h>
+
+/*
+ * on_fault()
+ *
+ * Catch lofault faults. Like setjmp except it returns one
+ * if code following causes uncorrectable fault. Turned off
+ * by calling no_fault(). Note that while under on_fault(),
+ * SMAP is disabled. For more information see
+ * uts/intel/ml/copy.s.
+ */
+
+ ENTRY(on_fault)
+ movq %gs:CPU_THREAD, %rsi
+ leaq catch_fault(%rip), %rdx
+ movq %rdi, T_ONFAULT(%rsi) /* jumpbuf in t_onfault */
+ movq %rdx, T_LOFAULT(%rsi) /* catch_fault in t_lofault */
+ call smap_disable /* allow user accesses */
+ jmp setjmp /* let setjmp do the rest */
+
+catch_fault:
+ movq %gs:CPU_THREAD, %rsi
+ movq T_ONFAULT(%rsi), %rdi /* address of save area */
+ xorl %eax, %eax
+ movq %rax, T_ONFAULT(%rsi) /* turn off onfault */
+ movq %rax, T_LOFAULT(%rsi) /* turn off lofault */
+ call smap_enable /* disallow user accesses */
+ jmp longjmp /* let longjmp do the rest */
+ SET_SIZE(on_fault)
+
+ ENTRY(no_fault)
+ movq %gs:CPU_THREAD, %rsi
+ xorl %eax, %eax
+ movq %rax, T_ONFAULT(%rsi) /* turn off onfault */
+ movq %rax, T_LOFAULT(%rsi) /* turn off lofault */
+ call smap_enable /* disallow user accesses */
+ ret
+ SET_SIZE(no_fault)
+
+/*
+ * Default trampoline code for on_trap() (see <sys/ontrap.h>). We just
+ * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
+ */
+
+ ENTRY(on_trap_trampoline)
+ movq %gs:CPU_THREAD, %rsi
+ movq T_ONTRAP(%rsi), %rdi
+ addq $OT_JMPBUF, %rdi
+ jmp longjmp
+ SET_SIZE(on_trap_trampoline)
+
+/*
+ * Push a new element on to the t_ontrap stack. Refer to <sys/ontrap.h> for
+ * more information about the on_trap() mechanism. If the on_trap_data is the
+ * same as the topmost stack element, we just modify that element.
+ */
+
+ ENTRY(on_trap)
+ movw %si, OT_PROT(%rdi) /* ot_prot = prot */
+ movw $0, OT_TRAP(%rdi) /* ot_trap = 0 */
+ leaq on_trap_trampoline(%rip), %rdx /* rdx = &on_trap_trampoline */
+ movq %rdx, OT_TRAMPOLINE(%rdi) /* ot_trampoline = rdx */
+ xorl %ecx, %ecx
+ movq %rcx, OT_HANDLE(%rdi) /* ot_handle = NULL */
+ movq %rcx, OT_PAD1(%rdi) /* ot_pad1 = NULL */
+ movq %gs:CPU_THREAD, %rdx /* rdx = curthread */
+ movq T_ONTRAP(%rdx), %rcx /* rcx = curthread->t_ontrap */
+ cmpq %rdi, %rcx /* if (otp == %rcx) */
+ je 0f /* don't modify t_ontrap */
+
+ movq %rcx, OT_PREV(%rdi) /* ot_prev = t_ontrap */
+ movq %rdi, T_ONTRAP(%rdx) /* curthread->t_ontrap = otp */
+
+0: addq $OT_JMPBUF, %rdi /* &ot_jmpbuf */
+ jmp setjmp
+ SET_SIZE(on_trap)
+
+/*
+ * Setjmp and longjmp implement non-local gotos using state vectors
+ * type label_t.
+ */
+
+#if LABEL_PC != 0
+#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
+#endif /* LABEL_PC != 0 */
+
+ ENTRY(setjmp)
+ movq %rsp, LABEL_SP(%rdi)
+ movq %rbp, LABEL_RBP(%rdi)
+ movq %rbx, LABEL_RBX(%rdi)
+ movq %r12, LABEL_R12(%rdi)
+ movq %r13, LABEL_R13(%rdi)
+ movq %r14, LABEL_R14(%rdi)
+ movq %r15, LABEL_R15(%rdi)
+ movq (%rsp), %rdx /* return address */
+ movq %rdx, (%rdi) /* LABEL_PC is 0 */
+ xorl %eax, %eax /* return 0 */
+ ret
+ SET_SIZE(setjmp)
+
+ ENTRY(longjmp)
+ movq LABEL_SP(%rdi), %rsp
+ movq LABEL_RBP(%rdi), %rbp
+ movq LABEL_RBX(%rdi), %rbx
+ movq LABEL_R12(%rdi), %r12
+ movq LABEL_R13(%rdi), %r13
+ movq LABEL_R14(%rdi), %r14
+ movq LABEL_R15(%rdi), %r15
+ movq (%rdi), %rdx /* return address; LABEL_PC is 0 */
+ movq %rdx, (%rsp)
+ xorl %eax, %eax
+ incl %eax /* return 1 */
+ ret
+ SET_SIZE(longjmp)
+
+/*
+ * if a() calls b() calls caller(),
+ * caller() returns return address in a().
+ * (Note: We assume a() and b() are C routines which do the normal entry/exit
+ * sequence.)
+ */
+
+ ENTRY(caller)
+ movq 8(%rbp), %rax /* b()'s return pc, in a() */
+ ret
+ SET_SIZE(caller)
+
+/*
+ * if a() calls callee(), callee() returns the
+ * return address in a();
+ */
+
+ ENTRY(callee)
+ movq (%rsp), %rax /* callee()'s return pc, in a() */
+ ret
+ SET_SIZE(callee)
+
+/*
+ * return the current frame pointer
+ */
+
+ ENTRY(getfp)
+ movq %rbp, %rax
+ ret
+ SET_SIZE(getfp)
+
+/*
+ * Invalidate a single page table entry in the TLB
+ */
+
+ ENTRY(mmu_invlpg)
+ invlpg (%rdi)
+ ret
+ SET_SIZE(mmu_invlpg)
+
+
+/*
+ * Get/Set the value of various control registers
+ */
+
+ ENTRY(getcr0)
+ movq %cr0, %rax
+ ret
+ SET_SIZE(getcr0)
+
+ ENTRY(setcr0)
+ movq %rdi, %cr0
+ ret
+ SET_SIZE(setcr0)
+
+ ENTRY(getcr2)
+#if defined(__xpv)
+ movq %gs:CPU_VCPU_INFO, %rax
+ movq VCPU_INFO_ARCH_CR2(%rax), %rax
+#else
+ movq %cr2, %rax
+#endif
+ ret
+ SET_SIZE(getcr2)
+
+ ENTRY(getcr3)
+ movq %cr3, %rax
+ ret
+ SET_SIZE(getcr3)
+
+#if !defined(__xpv)
+
+ ENTRY(setcr3)
+ movq %rdi, %cr3
+ ret
+ SET_SIZE(setcr3)
+
+ ENTRY(reload_cr3)
+ movq %cr3, %rdi
+ movq %rdi, %cr3
+ ret
+ SET_SIZE(reload_cr3)
+
+#endif /* __xpv */
+
+ ENTRY(getcr4)
+ movq %cr4, %rax
+ ret
+ SET_SIZE(getcr4)
+
+ ENTRY(setcr4)
+ movq %rdi, %cr4
+ ret
+ SET_SIZE(setcr4)
+
+ ENTRY(getcr8)
+ movq %cr8, %rax
+ ret
+ SET_SIZE(getcr8)
+
+ ENTRY(setcr8)
+ movq %rdi, %cr8
+ ret
+ SET_SIZE(setcr8)
+
+ ENTRY(__cpuid_insn)
+ movq %rbx, %r8
+ movq %rcx, %r9
+ movq %rdx, %r11
+ movl (%rdi), %eax /* %eax = regs->cp_eax */
+ movl 0x4(%rdi), %ebx /* %ebx = regs->cp_ebx */
+ movl 0x8(%rdi), %ecx /* %ecx = regs->cp_ecx */
+ movl 0xc(%rdi), %edx /* %edx = regs->cp_edx */
+ cpuid
+ movl %eax, (%rdi) /* regs->cp_eax = %eax */
+ movl %ebx, 0x4(%rdi) /* regs->cp_ebx = %ebx */
+ movl %ecx, 0x8(%rdi) /* regs->cp_ecx = %ecx */
+ movl %edx, 0xc(%rdi) /* regs->cp_edx = %edx */
+ movq %r8, %rbx
+ movq %r9, %rcx
+ movq %r11, %rdx
+ ret
+ SET_SIZE(__cpuid_insn)
+
+ ENTRY_NP(i86_monitor)
+ pushq %rbp
+ movq %rsp, %rbp
+ movq %rdi, %rax /* addr */
+ movq %rsi, %rcx /* extensions */
+ /* rdx contains input arg3: hints */
+ clflush (%rax)
+ .byte 0x0f, 0x01, 0xc8 /* monitor */
+ leave
+ ret
+ SET_SIZE(i86_monitor)
+
+ ENTRY_NP(i86_mwait)
+ pushq %rbp
+ call x86_md_clear
+ movq %rsp, %rbp
+ movq %rdi, %rax /* data */
+ movq %rsi, %rcx /* extensions */
+ .byte 0x0f, 0x01, 0xc9 /* mwait */
+ leave
+ ret
+ SET_SIZE(i86_mwait)
+
+#if defined(__xpv)
+ /*
+ * Defined in C
+ */
+#else
+
+ ENTRY_NP(tsc_read)
+ movq %rbx, %r11
+ movl $0, %eax
+ cpuid
+ rdtsc
+ movq %r11, %rbx
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ .globl _tsc_mfence_start
+_tsc_mfence_start:
+ mfence
+ rdtsc
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ .globl _tsc_mfence_end
+_tsc_mfence_end:
+ .globl _tscp_start
+_tscp_start:
+ .byte 0x0f, 0x01, 0xf9 /* rdtscp instruction */
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ .globl _tscp_end
+_tscp_end:
+ .globl _no_rdtsc_start
+_no_rdtsc_start:
+ xorl %edx, %edx
+ xorl %eax, %eax
+ ret
+ .globl _no_rdtsc_end
+_no_rdtsc_end:
+ .globl _tsc_lfence_start
+_tsc_lfence_start:
+ lfence
+ rdtsc
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ .globl _tsc_lfence_end
+_tsc_lfence_end:
+ SET_SIZE(tsc_read)
+
+
+#endif /* __xpv */
+
+ ENTRY_NP(randtick)
+ rdtsc
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(randtick)
+/*
+ * Insert entryp after predp in a doubly linked list.
+ */
+
+ ENTRY(_insque)
+ movq (%rsi), %rax /* predp->forw */
+ movq %rsi, CPTRSIZE(%rdi) /* entryp->back = predp */
+ movq %rax, (%rdi) /* entryp->forw = predp->forw */
+ movq %rdi, (%rsi) /* predp->forw = entryp */
+ movq %rdi, CPTRSIZE(%rax) /* predp->forw->back = entryp */
+ ret
+ SET_SIZE(_insque)
+
+/*
+ * Remove entryp from a doubly linked list
+ */
+
+ ENTRY(_remque)
+ movq (%rdi), %rax /* entry->forw */
+ movq CPTRSIZE(%rdi), %rdx /* entry->back */
+ movq %rax, (%rdx) /* entry->back->forw = entry->forw */
+ movq %rdx, CPTRSIZE(%rax) /* entry->forw->back = entry->back */
+ ret
+ SET_SIZE(_remque)
+
+/*
+ * Returns the number of
+ * non-NULL bytes in string argument.
+ */
+
+/*
+ * This is close to a simple transliteration of a C version of this
+ * routine. We should either just -make- this be a C version, or
+ * justify having it in assembler by making it significantly faster.
+ *
+ * size_t
+ * strlen(const char *s)
+ * {
+ * const char *s0;
+ * #if defined(DEBUG)
+ * if ((uintptr_t)s < KERNELBASE)
+ * panic(.str_panic_msg);
+ * #endif
+ * for (s0 = s; *s; s++)
+ * ;
+ * return (s - s0);
+ * }
+ */
+
+ ENTRY(strlen)
+#ifdef DEBUG
+ movq postbootkernelbase(%rip), %rax
+ cmpq %rax, %rdi
+ jae str_valid
+ pushq %rbp
+ movq %rsp, %rbp
+ leaq .str_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+#endif /* DEBUG */
+str_valid:
+ cmpb $0, (%rdi)
+ movq %rdi, %rax
+ je .null_found
+ .align 4
+.strlen_loop:
+ incq %rdi
+ cmpb $0, (%rdi)
+ jne .strlen_loop
+.null_found:
+ subq %rax, %rdi
+ movq %rdi, %rax
+ ret
+ SET_SIZE(strlen)
+
+#ifdef DEBUG
+ .text
+.str_panic_msg:
+ .string "strlen: argument below kernelbase"
+#endif /* DEBUG */
+
+ /*
+ * Berkeley 4.3 introduced symbolically named interrupt levels
+ * as a way deal with priority in a machine independent fashion.
+ * Numbered priorities are machine specific, and should be
+ * discouraged where possible.
+ *
+ * Note, for the machine specific priorities there are
+ * examples listed for devices that use a particular priority.
+ * It should not be construed that all devices of that
+ * type should be at that priority. It is currently were
+ * the current devices fit into the priority scheme based
+ * upon time criticalness.
+ *
+ * The underlying assumption of these assignments is that
+ * IPL 10 is the highest level from which a device
+ * routine can call wakeup. Devices that interrupt from higher
+ * levels are restricted in what they can do. If they need
+ * kernels services they should schedule a routine at a lower
+ * level (via software interrupt) to do the required
+ * processing.
+ *
+ * Examples of this higher usage:
+ * Level Usage
+ * 14 Profiling clock (and PROM uart polling clock)
+ * 12 Serial ports
+ *
+ * The serial ports request lower level processing on level 6.
+ *
+ * Also, almost all splN routines (where N is a number or a
+ * mnemonic) will do a RAISE(), on the assumption that they are
+ * never used to lower our priority.
+ * The exceptions are:
+ * spl8() Because you can't be above 15 to begin with!
+ * splzs() Because this is used at boot time to lower our
+ * priority, to allow the PROM to poll the uart.
+ * spl0() Used to lower priority to 0.
+ */
+
+#define SETPRI(level) \
+ movl $/**/level, %edi; /* new priority */ \
+ jmp do_splx /* redirect to do_splx */
+
+#define RAISE(level) \
+ movl $/**/level, %edi; /* new priority */ \
+ jmp splr /* redirect to splr */
+
+ /* locks out all interrupts, including memory errors */
+ ENTRY(spl8)
+ SETPRI(15)
+ SET_SIZE(spl8)
+
+ /* just below the level that profiling runs */
+ ENTRY(spl7)
+ RAISE(13)
+ SET_SIZE(spl7)
+
+ /* sun specific - highest priority onboard serial i/o asy ports */
+ ENTRY(splzs)
+ SETPRI(12) /* Can't be a RAISE, as it's used to lower us */
+ SET_SIZE(splzs)
+
+ ENTRY(splhi)
+ ALTENTRY(splhigh)
+ ALTENTRY(spl6)
+ ALTENTRY(i_ddi_splhigh)
+
+ RAISE(DISP_LEVEL)
+
+ SET_SIZE(i_ddi_splhigh)
+ SET_SIZE(spl6)
+ SET_SIZE(splhigh)
+ SET_SIZE(splhi)
+
+ /* allow all interrupts */
+ ENTRY(spl0)
+ SETPRI(0)
+ SET_SIZE(spl0)
+
+
+ /* splx implementation */
+ ENTRY(splx)
+ jmp do_splx /* redirect to common splx code */
+ SET_SIZE(splx)
+
+ ENTRY(wait_500ms)
+ pushq %rbx
+ movl $50000, %ebx
+1:
+ call tenmicrosec
+ decl %ebx
+ jnz 1b
+ popq %rbx
+ ret
+ SET_SIZE(wait_500ms)
+
+#define RESET_METHOD_KBC 1
+#define RESET_METHOD_PORT92 2
+#define RESET_METHOD_PCI 4
+
+ DGDEF3(pc_reset_methods, 4, 8)
+ .long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
+
+ ENTRY(pc_reset)
+
+ testl $RESET_METHOD_KBC, pc_reset_methods(%rip)
+ jz 1f
+
+ /
+ / Try the classic keyboard controller-triggered reset.
+ /
+ movw $0x64, %dx
+ movb $0xfe, %al
+ outb (%dx)
+
+ / Wait up to 500 milliseconds here for the keyboard controller
+ / to pull the reset line. On some systems where the keyboard
+ / controller is slow to pull the reset line, the next reset method
+ / may be executed (which may be bad if those systems hang when the
+ / next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
+ / and Ferrari 4000 (doesn't like the cf9 reset method))
+
+ call wait_500ms
+
+1:
+ testl $RESET_METHOD_PORT92, pc_reset_methods(%rip)
+ jz 3f
+
+ /
+ / Try port 0x92 fast reset
+ /
+ movw $0x92, %dx
+ inb (%dx)
+ cmpb $0xff, %al / If port's not there, we should get back 0xFF
+ je 1f
+ testb $1, %al / If bit 0
+ jz 2f / is clear, jump to perform the reset
+ andb $0xfe, %al / otherwise,
+ outb (%dx) / clear bit 0 first, then
+2:
+ orb $1, %al / Set bit 0
+ outb (%dx) / and reset the system
+1:
+
+ call wait_500ms
+
+3:
+ testl $RESET_METHOD_PCI, pc_reset_methods(%rip)
+ jz 4f
+
+ / Try the PCI (soft) reset vector (should work on all modern systems,
+ / but has been shown to cause problems on 450NX systems, and some newer
+ / systems (e.g. ATI IXP400-equipped systems))
+ / When resetting via this method, 2 writes are required. The first
+ / targets bit 1 (0=hard reset without power cycle, 1=hard reset with
+ / power cycle).
+ / The reset occurs on the second write, during bit 2's transition from
+ / 0->1.
+ movw $0xcf9, %dx
+ movb $0x2, %al / Reset mode = hard, no power cycle
+ outb (%dx)
+ movb $0x6, %al
+ outb (%dx)
+
+ call wait_500ms
+
+4:
+ /
+ / port 0xcf9 failed also. Last-ditch effort is to
+ / triple-fault the CPU.
+ / Also, use triple fault for EFI firmware
+ /
+ ENTRY(efi_reset)
+ pushq $0x0
+ pushq $0x0 / IDT base of 0, limit of 0 + 2 unused bytes
+ lidt (%rsp)
+ int $0x0 / Trigger interrupt, generate triple-fault
+
+ cli
+ hlt / Wait forever
+ /*NOTREACHED*/
+ SET_SIZE(efi_reset)
+ SET_SIZE(pc_reset)
+
+/*
+ * C callable in and out routines
+ */
+
+ ENTRY(outl)
+ movw %di, %dx
+ movl %esi, %eax
+ outl (%dx)
+ ret
+ SET_SIZE(outl)
+
+ ENTRY(outw)
+ movw %di, %dx
+ movw %si, %ax
+ D16 outl (%dx) /* XX64 why not outw? */
+ ret
+ SET_SIZE(outw)
+
+ ENTRY(outb)
+ movw %di, %dx
+ movb %sil, %al
+ outb (%dx)
+ ret
+ SET_SIZE(outb)
+
+ ENTRY(inl)
+ xorl %eax, %eax
+ movw %di, %dx
+ inl (%dx)
+ ret
+ SET_SIZE(inl)
+
+ ENTRY(inw)
+ xorl %eax, %eax
+ movw %di, %dx
+ D16 inl (%dx)
+ ret
+ SET_SIZE(inw)
+
+
+ ENTRY(inb)
+ xorl %eax, %eax
+ movw %di, %dx
+ inb (%dx)
+ ret
+ SET_SIZE(inb)
+
+/*
+ * void int3(void)
+ * void int18(void)
+ * void int20(void)
+ * void int_cmci(void)
+ */
+
+ ENTRY(int3)
+ int $T_BPTFLT
+ ret
+ SET_SIZE(int3)
+
+ ENTRY(int18)
+ int $T_MCE
+ ret
+ SET_SIZE(int18)
+
+ ENTRY(int20)
+ movl boothowto, %eax
+ andl $RB_DEBUG, %eax
+ jz 1f
+
+ int $T_DBGENTR
+1:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(int20)
+
+ ENTRY(int_cmci)
+ int $T_ENOEXTFLT
+ ret
+ SET_SIZE(int_cmci)
+
+ ENTRY(scanc)
+ /* rdi == size */
+ /* rsi == cp */
+ /* rdx == table */
+ /* rcx == mask */
+ addq %rsi, %rdi /* end = &cp[size] */
+.scanloop:
+ cmpq %rdi, %rsi /* while (cp < end */
+ jnb .scandone
+ movzbq (%rsi), %r8 /* %r8 = *cp */
+ incq %rsi /* cp++ */
+ testb %cl, (%r8, %rdx)
+ jz .scanloop /* && (table[*cp] & mask) == 0) */
+ decq %rsi /* (fix post-increment) */
+.scandone:
+ movl %edi, %eax
+ subl %esi, %eax /* return (end - cp) */
+ ret
+ SET_SIZE(scanc)
+
+/*
+ * Replacement functions for ones that are normally inlined.
+ * In addition to the copy in i86.il, they are defined here just in case.
+ */
+
+ ENTRY(intr_clear)
+ ENTRY(clear_int_flag)
+ pushfq
+ popq %rax
+#if defined(__xpv)
+ leaq xpv_panicking, %rdi
+ movl (%rdi), %edi
+ cmpl $0, %edi
+ jne 2f
+ CLIRET(%rdi, %dl) /* returns event mask in %dl */
+ /*
+ * Synthesize the PS_IE bit from the event mask bit
+ */
+ andq $_BITNOT(PS_IE), %rax
+ testb $1, %dl
+ jnz 1f
+ orq $PS_IE, %rax
+1:
+ ret
+2:
+#endif
+ CLI(%rdi)
+ ret
+ SET_SIZE(clear_int_flag)
+ SET_SIZE(intr_clear)
+
+ ENTRY(curcpup)
+ movq %gs:CPU_SELF, %rax
+ ret
+ SET_SIZE(curcpup)
+
+/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs()
+ * These functions reverse the byte order of the input parameter and returns
+ * the result. This is to convert the byte order from host byte order
+ * (little endian) to network byte order (big endian), or vice versa.
+ */
+
+ ENTRY(htonll)
+ ALTENTRY(ntohll)
+ movq %rdi, %rax
+ bswapq %rax
+ ret
+ SET_SIZE(ntohll)
+ SET_SIZE(htonll)
+
+ /* XX64 there must be shorter sequences for this */
+ ENTRY(htonl)
+ ALTENTRY(ntohl)
+ movl %edi, %eax
+ bswap %eax
+ ret
+ SET_SIZE(ntohl)
+ SET_SIZE(htonl)
+
+ /* XX64 there must be better sequences for this */
+ ENTRY(htons)
+ ALTENTRY(ntohs)
+ movl %edi, %eax
+ bswap %eax
+ shrl $16, %eax
+ ret
+ SET_SIZE(ntohs)
+ SET_SIZE(htons)
+
+
+ ENTRY(intr_restore)
+ ENTRY(restore_int_flag)
+ testq $PS_IE, %rdi
+ jz 1f
+#if defined(__xpv)
+ leaq xpv_panicking, %rsi
+ movl (%rsi), %esi
+ cmpl $0, %esi
+ jne 1f
+ /*
+ * Since we're -really- running unprivileged, our attempt
+ * to change the state of the IF bit will be ignored.
+ * The virtual IF bit is tweaked by CLI and STI.
+ */
+ IE_TO_EVENT_MASK(%rsi, %rdi)
+#else
+ sti
+#endif
+1:
+ ret
+ SET_SIZE(restore_int_flag)
+ SET_SIZE(intr_restore)
+
+ ENTRY(sti)
+ STI
+ ret
+ SET_SIZE(sti)
+
+ ENTRY(cli)
+ CLI(%rax)
+ ret
+ SET_SIZE(cli)
+
+ ENTRY(dtrace_interrupt_disable)
+ pushfq
+ popq %rax
+#if defined(__xpv)
+ leaq xpv_panicking, %rdi
+ movl (%rdi), %edi
+ cmpl $0, %edi
+ jne .dtrace_interrupt_disable_done
+ CLIRET(%rdi, %dl) /* returns event mask in %dl */
+ /*
+ * Synthesize the PS_IE bit from the event mask bit
+ */
+ andq $_BITNOT(PS_IE), %rax
+ testb $1, %dl
+ jnz .dtrace_interrupt_disable_done
+ orq $PS_IE, %rax
+#else
+ CLI(%rdx)
+#endif
+.dtrace_interrupt_disable_done:
+ ret
+ SET_SIZE(dtrace_interrupt_disable)
+
+ ENTRY(dtrace_interrupt_enable)
+ pushq %rdi
+ popfq
+#if defined(__xpv)
+ leaq xpv_panicking, %rdx
+ movl (%rdx), %edx
+ cmpl $0, %edx
+ jne .dtrace_interrupt_enable_done
+ /*
+ * Since we're -really- running unprivileged, our attempt
+ * to change the state of the IF bit will be ignored. The
+ * virtual IF bit is tweaked by CLI and STI.
+ */
+ IE_TO_EVENT_MASK(%rdx, %rdi)
+#endif
+.dtrace_interrupt_enable_done:
+ ret
+ SET_SIZE(dtrace_interrupt_enable)
+
+
+ ENTRY(dtrace_membar_producer)
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(dtrace_membar_producer)
+
+ ENTRY(dtrace_membar_consumer)
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(dtrace_membar_consumer)
+
+ ENTRY(threadp)
+ movq %gs:CPU_THREAD, %rax
+ ret
+ SET_SIZE(threadp)
+
+/*
+ * Checksum routine for Internet Protocol Headers
+ */
+
+ ENTRY(ip_ocsum)
+ pushq %rbp
+ movq %rsp, %rbp
+#ifdef DEBUG
+ movq postbootkernelbase(%rip), %rax
+ cmpq %rax, %rdi
+ jnb 1f
+ xorl %eax, %eax
+ movq %rdi, %rsi
+ leaq .ip_ocsum_panic_msg(%rip), %rdi
+ call panic
+ /*NOTREACHED*/
+.ip_ocsum_panic_msg:
+ .string "ip_ocsum: address 0x%p below kernelbase\n"
+1:
+#endif
+ movl %esi, %ecx /* halfword_count */
+ movq %rdi, %rsi /* address */
+ /* partial sum in %edx */
+ xorl %eax, %eax
+ testl %ecx, %ecx
+ jz .ip_ocsum_done
+ testq $3, %rsi
+ jnz .ip_csum_notaligned
+.ip_csum_aligned: /* XX64 opportunities for 8-byte operations? */
+.next_iter:
+ /* XX64 opportunities for prefetch? */
+ /* XX64 compute csum with 64 bit quantities? */
+ subl $32, %ecx
+ jl .less_than_32
+
+ addl 0(%rsi), %edx
+.only60:
+ adcl 4(%rsi), %eax
+.only56:
+ adcl 8(%rsi), %edx
+.only52:
+ adcl 12(%rsi), %eax
+.only48:
+ adcl 16(%rsi), %edx
+.only44:
+ adcl 20(%rsi), %eax
+.only40:
+ adcl 24(%rsi), %edx
+.only36:
+ adcl 28(%rsi), %eax
+.only32:
+ adcl 32(%rsi), %edx
+.only28:
+ adcl 36(%rsi), %eax
+.only24:
+ adcl 40(%rsi), %edx
+.only20:
+ adcl 44(%rsi), %eax
+.only16:
+ adcl 48(%rsi), %edx
+.only12:
+ adcl 52(%rsi), %eax
+.only8:
+ adcl 56(%rsi), %edx
+.only4:
+ adcl 60(%rsi), %eax /* could be adding -1 and -1 with a carry */
+.only0:
+ adcl $0, %eax /* could be adding -1 in eax with a carry */
+ adcl $0, %eax
+
+ addq $64, %rsi
+ testl %ecx, %ecx
+ jnz .next_iter
+
+.ip_ocsum_done:
+ addl %eax, %edx
+ adcl $0, %edx
+ movl %edx, %eax /* form a 16 bit checksum by */
+ shrl $16, %eax /* adding two halves of 32 bit checksum */
+ addw %dx, %ax
+ adcw $0, %ax
+ andl $0xffff, %eax
+ leave
+ ret
+
+.ip_csum_notaligned:
+ xorl %edi, %edi
+ movw (%rsi), %di
+ addl %edi, %edx
+ adcl $0, %edx
+ addq $2, %rsi
+ decl %ecx
+ jmp .ip_csum_aligned
+
+.less_than_32:
+ addl $32, %ecx
+ testl $1, %ecx
+ jz .size_aligned
+ andl $0xfe, %ecx
+ movzwl (%rsi, %rcx, 2), %edi
+ addl %edi, %edx
+ adcl $0, %edx
+.size_aligned:
+ movl %ecx, %edi
+ shrl $1, %ecx
+ shl $1, %edi
+ subq $64, %rdi
+ addq %rdi, %rsi
+ leaq .ip_ocsum_jmptbl(%rip), %rdi
+ leaq (%rdi, %rcx, 8), %rdi
+ xorl %ecx, %ecx
+ clc
+ movq (%rdi), %rdi
+ INDIRECT_JMP_REG(rdi)
+
+ .align 8
+.ip_ocsum_jmptbl:
+ .quad .only0, .only4, .only8, .only12, .only16, .only20
+ .quad .only24, .only28, .only32, .only36, .only40, .only44
+ .quad .only48, .only52, .only56, .only60
+ SET_SIZE(ip_ocsum)
+
+/*
+ * multiply two long numbers and yield a u_longlong_t result, callable from C.
+ * Provided to manipulate hrtime_t values.
+ */
+
+ ENTRY(mul32)
+ xorl %edx, %edx /* XX64 joe, paranoia? */
+ movl %edi, %eax
+ mull %esi
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(mul32)
+
+ ENTRY(scan_memory)
+ shrq $3, %rsi /* convert %rsi from byte to quadword count */
+ jz .scanm_done
+ movq %rsi, %rcx /* move count into rep control register */
+ movq %rdi, %rsi /* move addr into lodsq control reg. */
+ rep lodsq /* scan the memory range */
+.scanm_done:
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(scan_memory)
+
+
+ ENTRY(lowbit)
+ movl $-1, %eax
+ bsfq %rdi, %rdi
+ cmovnz %edi, %eax
+ incl %eax
+ ret
+ SET_SIZE(lowbit)
+
+ ENTRY(highbit)
+ ALTENTRY(highbit64)
+ movl $-1, %eax
+ bsrq %rdi, %rdi
+ cmovnz %edi, %eax
+ incl %eax
+ ret
+ SET_SIZE(highbit64)
+ SET_SIZE(highbit)
+
+#define XMSR_ACCESS_VAL $0x9c5a203a
+
+ ENTRY(rdmsr)
+ movl %edi, %ecx
+ rdmsr
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(rdmsr)
+
+ ENTRY(wrmsr)
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ movl %esi, %eax
+ movl %edi, %ecx
+ wrmsr
+ ret
+ SET_SIZE(wrmsr)
+
+ ENTRY(xrdmsr)
+ pushq %rbp
+ movq %rsp, %rbp
+ movl %edi, %ecx
+ movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */
+ rdmsr
+ shlq $32, %rdx
+ orq %rdx, %rax
+ leave
+ ret
+ SET_SIZE(xrdmsr)
+
+ ENTRY(xwrmsr)
+ pushq %rbp
+ movq %rsp, %rbp
+ movl %edi, %ecx
+ movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ movl %esi, %eax
+ wrmsr
+ leave
+ ret
+ SET_SIZE(xwrmsr)
+
+ ENTRY(get_xcr)
+ movl %edi, %ecx
+ #xgetbv
+ .byte 0x0f,0x01,0xd0
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(get_xcr)
+
+ ENTRY(set_xcr)
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ movl %esi, %eax
+ movl %edi, %ecx
+ #xsetbv
+ .byte 0x0f,0x01,0xd1
+ ret
+ SET_SIZE(set_xcr)
+
+ ENTRY(invalidate_cache)
+ wbinvd
+ ret
+ SET_SIZE(invalidate_cache)
+
+ ENTRY_NP(getcregs)
+#if defined(__xpv)
+ /*
+ * Only a few of the hardware control registers or descriptor tables
+ * are directly accessible to us, so just zero the structure.
+ *
+ * XXPV Perhaps it would be helpful for the hypervisor to return
+ * virtualized versions of these for post-mortem use.
+ * (Need to reevaluate - perhaps it already does!)
+ */
+ pushq %rdi /* save *crp */
+ movq $CREGSZ, %rsi
+ call bzero
+ popq %rdi
+
+ /*
+ * Dump what limited information we can
+ */
+ movq %cr0, %rax
+ movq %rax, CREG_CR0(%rdi) /* cr0 */
+ movq %cr2, %rax
+ movq %rax, CREG_CR2(%rdi) /* cr2 */
+ movq %cr3, %rax
+ movq %rax, CREG_CR3(%rdi) /* cr3 */
+ movq %cr4, %rax
+ movq %rax, CREG_CR4(%rdi) /* cr4 */
+
+#else /* __xpv */
+
+#define GETMSR(r, off, d) \
+ movl $r, %ecx; \
+ rdmsr; \
+ movl %eax, off(d); \
+ movl %edx, off+4(d)
+
+ xorl %eax, %eax
+ movq %rax, CREG_GDT+8(%rdi)
+ sgdt CREG_GDT(%rdi) /* 10 bytes */
+ movq %rax, CREG_IDT+8(%rdi)
+ sidt CREG_IDT(%rdi) /* 10 bytes */
+ movq %rax, CREG_LDT(%rdi)
+ sldt CREG_LDT(%rdi) /* 2 bytes */
+ movq %rax, CREG_TASKR(%rdi)
+ str CREG_TASKR(%rdi) /* 2 bytes */
+ movq %cr0, %rax
+ movq %rax, CREG_CR0(%rdi) /* cr0 */
+ movq %cr2, %rax
+ movq %rax, CREG_CR2(%rdi) /* cr2 */
+ movq %cr3, %rax
+ movq %rax, CREG_CR3(%rdi) /* cr3 */
+ movq %cr4, %rax
+ movq %rax, CREG_CR4(%rdi) /* cr4 */
+ movq %cr8, %rax
+ movq %rax, CREG_CR8(%rdi) /* cr8 */
+ GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
+ GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
+#endif /* __xpv */
+ ret
+ SET_SIZE(getcregs)
+
+#undef GETMSR
+
+
+/*
+ * A panic trigger is a word which is updated atomically and can only be set
+ * once. We atomically store 0xDEFACEDD and load the old value. If the
+ * previous value was 0, we succeed and return 1; otherwise return 0.
+ * This allows a partially corrupt trigger to still trigger correctly. DTrace
+ * has its own version of this function to allow it to panic correctly from
+ * probe context.
+ */
+
+ ENTRY_NP(panic_trigger)
+ xorl %eax, %eax
+ movl $0xdefacedd, %edx
+ lock
+ xchgl %edx, (%rdi)
+ cmpl $0, %edx
+ je 0f
+ movl $0, %eax
+ ret
+0: movl $1, %eax
+ ret
+ SET_SIZE(panic_trigger)
+
+ ENTRY_NP(dtrace_panic_trigger)
+ xorl %eax, %eax
+ movl $0xdefacedd, %edx
+ lock
+ xchgl %edx, (%rdi)
+ cmpl $0, %edx
+ je 0f
+ movl $0, %eax
+ ret
+0: movl $1, %eax
+ ret
+ SET_SIZE(dtrace_panic_trigger)
+
+/*
+ * The panic() and cmn_err() functions invoke vpanic() as a common entry point
+ * into the panic code implemented in panicsys(). vpanic() is responsible
+ * for passing through the format string and arguments, and constructing a
+ * regs structure on the stack into which it saves the current register
+ * values. If we are not dying due to a fatal trap, these registers will
+ * then be preserved in panicbuf as the current processor state. Before
+ * invoking panicsys(), vpanic() activates the first panic trigger (see
+ * common/os/panic.c) and switches to the panic_stack if successful. Note that
+ * DTrace takes a slightly different panic path if it must panic from probe
+ * context. Instead of calling panic, it calls into dtrace_vpanic(), which
+ * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
+ * branches back into vpanic().
+ */
+
+ ENTRY_NP(vpanic) /* Initial stack layout: */
+
+ pushq %rbp /* | %rip | 0x60 */
+ movq %rsp, %rbp /* | %rbp | 0x58 */
+ pushfq /* | rfl | 0x50 */
+ pushq %r11 /* | %r11 | 0x48 */
+ pushq %r10 /* | %r10 | 0x40 */
+ pushq %rbx /* | %rbx | 0x38 */
+ pushq %rax /* | %rax | 0x30 */
+ pushq %r9 /* | %r9 | 0x28 */
+ pushq %r8 /* | %r8 | 0x20 */
+ pushq %rcx /* | %rcx | 0x18 */
+ pushq %rdx /* | %rdx | 0x10 */
+ pushq %rsi /* | %rsi | 0x8 alist */
+ pushq %rdi /* | %rdi | 0x0 format */
+
+ movq %rsp, %rbx /* %rbx = current %rsp */
+
+ leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */
+ call panic_trigger /* %eax = panic_trigger() */
+
+vpanic_common:
+ /*
+ * The panic_trigger result is in %eax from the call above, and
+ * dtrace_panic places it in %eax before branching here.
+ * The rdmsr instructions that follow below will clobber %eax so
+ * we stash the panic_trigger result in %r11d.
+ */
+ movl %eax, %r11d
+ cmpl $0, %r11d
+ je 0f
+
+ /*
+ * If panic_trigger() was successful, we are the first to initiate a
+ * panic: we now switch to the reserved panic_stack before continuing.
+ */
+ leaq panic_stack(%rip), %rsp
+ addq $PANICSTKSIZE, %rsp
+0: subq $REGSIZE, %rsp
+ /*
+ * Now that we've got everything set up, store the register values as
+ * they were when we entered vpanic() to the designated location in
+ * the regs structure we allocated on the stack.
+ */
+ movq 0x0(%rbx), %rcx
+ movq %rcx, REGOFF_RDI(%rsp)
+ movq 0x8(%rbx), %rcx
+ movq %rcx, REGOFF_RSI(%rsp)
+ movq 0x10(%rbx), %rcx
+ movq %rcx, REGOFF_RDX(%rsp)
+ movq 0x18(%rbx), %rcx
+ movq %rcx, REGOFF_RCX(%rsp)
+ movq 0x20(%rbx), %rcx
+
+ movq %rcx, REGOFF_R8(%rsp)
+ movq 0x28(%rbx), %rcx
+ movq %rcx, REGOFF_R9(%rsp)
+ movq 0x30(%rbx), %rcx
+ movq %rcx, REGOFF_RAX(%rsp)
+ movq 0x38(%rbx), %rcx
+ movq %rcx, REGOFF_RBX(%rsp)
+ movq 0x58(%rbx), %rcx
+
+ movq %rcx, REGOFF_RBP(%rsp)
+ movq 0x40(%rbx), %rcx
+ movq %rcx, REGOFF_R10(%rsp)
+ movq 0x48(%rbx), %rcx
+ movq %rcx, REGOFF_R11(%rsp)
+ movq %r12, REGOFF_R12(%rsp)
+
+ movq %r13, REGOFF_R13(%rsp)
+ movq %r14, REGOFF_R14(%rsp)
+ movq %r15, REGOFF_R15(%rsp)
+
+ xorl %ecx, %ecx
+ movw %ds, %cx
+ movq %rcx, REGOFF_DS(%rsp)
+ movw %es, %cx
+ movq %rcx, REGOFF_ES(%rsp)
+ movw %fs, %cx
+ movq %rcx, REGOFF_FS(%rsp)
+ movw %gs, %cx
+ movq %rcx, REGOFF_GS(%rsp)
+
+ movq $0, REGOFF_TRAPNO(%rsp)
+
+ movq $0, REGOFF_ERR(%rsp)
+ leaq vpanic(%rip), %rcx
+ movq %rcx, REGOFF_RIP(%rsp)
+ movw %cs, %cx
+ movzwq %cx, %rcx
+ movq %rcx, REGOFF_CS(%rsp)
+ movq 0x50(%rbx), %rcx
+ movq %rcx, REGOFF_RFL(%rsp)
+ movq %rbx, %rcx
+ addq $0x60, %rcx
+ movq %rcx, REGOFF_RSP(%rsp)
+ movw %ss, %cx
+ movzwq %cx, %rcx
+ movq %rcx, REGOFF_SS(%rsp)
+
+ /*
+ * panicsys(format, alist, rp, on_panic_stack)
+ */
+ movq REGOFF_RDI(%rsp), %rdi /* format */
+ movq REGOFF_RSI(%rsp), %rsi /* alist */
+ movq %rsp, %rdx /* struct regs */
+ movl %r11d, %ecx /* on_panic_stack */
+ call panicsys
+ addq $REGSIZE, %rsp
+ popq %rdi
+ popq %rsi
+ popq %rdx
+ popq %rcx
+ popq %r8
+ popq %r9
+ popq %rax
+ popq %rbx
+ popq %r10
+ popq %r11
+ popfq
+ leave
+ ret
+ SET_SIZE(vpanic)
+
+ ENTRY_NP(dtrace_vpanic) /* Initial stack layout: */
+
+ pushq %rbp /* | %rip | 0x60 */
+ movq %rsp, %rbp /* | %rbp | 0x58 */
+ pushfq /* | rfl | 0x50 */
+ pushq %r11 /* | %r11 | 0x48 */
+ pushq %r10 /* | %r10 | 0x40 */
+ pushq %rbx /* | %rbx | 0x38 */
+ pushq %rax /* | %rax | 0x30 */
+ pushq %r9 /* | %r9 | 0x28 */
+ pushq %r8 /* | %r8 | 0x20 */
+ pushq %rcx /* | %rcx | 0x18 */
+ pushq %rdx /* | %rdx | 0x10 */
+ pushq %rsi /* | %rsi | 0x8 alist */
+ pushq %rdi /* | %rdi | 0x0 format */
+
+ movq %rsp, %rbx /* %rbx = current %rsp */
+
+ leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */
+ call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */
+ jmp vpanic_common
+
+ SET_SIZE(dtrace_vpanic)
+
+ DGDEF3(timedelta, 8, 8)
+ .long 0, 0
+
+ /*
+ * initialized to a non zero value to make pc_gethrtime()
+ * work correctly even before clock is initialized
+ */
+ DGDEF3(hrtime_base, 8, 8)
+ .long _MUL(NSEC_PER_CLOCK_TICK, 6), 0
+
+ DGDEF3(adj_shift, 4, 4)
+ .long ADJ_SHIFT
+
+ ENTRY_NP(hres_tick)
+ pushq %rbp
+ movq %rsp, %rbp
+
+ /*
+ * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
+ * hres_last_tick can only be modified while holding CLOCK_LOCK).
+ * At worst, performing this now instead of under CLOCK_LOCK may
+ * introduce some jitter in pc_gethrestime().
+ */
+ movq gethrtimef(%rip), %rsi
+ INDIRECT_CALL_REG(rsi)
+ movq %rax, %r8
+
+ leaq hres_lock(%rip), %rax
+ movb $-1, %dl
+.CL1:
+ xchgb %dl, (%rax)
+ testb %dl, %dl
+ jz .CL3 /* got it */
+.CL2:
+ cmpb $0, (%rax) /* possible to get lock? */
+ pause
+ jne .CL2
+ jmp .CL1 /* yes, try again */
+.CL3:
+ /*
+ * compute the interval since last time hres_tick was called
+ * and adjust hrtime_base and hrestime accordingly
+ * hrtime_base is an 8 byte value (in nsec), hrestime is
+ * a timestruc_t (sec, nsec)
+ */
+ leaq hres_last_tick(%rip), %rax
+ movq %r8, %r11
+ subq (%rax), %r8
+ addq %r8, hrtime_base(%rip) /* add interval to hrtime_base */
+ addq %r8, hrestime+8(%rip) /* add interval to hrestime.tv_nsec */
+ /*
+ * Now that we have CLOCK_LOCK, we can update hres_last_tick
+ */
+ movq %r11, (%rax)
+
+ call __adj_hrestime
+
+ /*
+ * release the hres_lock
+ */
+ incl hres_lock(%rip)
+ leave
+ ret
+ SET_SIZE(hres_tick)
+
+/*
+ * void prefetch_smap_w(void *)
+ *
+ * Prefetch ahead within a linear list of smap structures.
+ * Not implemented for ia32. Stub for compatibility.
+ */
+
+ ENTRY(prefetch_smap_w)
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(prefetch_smap_w)
+
+/*
+ * prefetch_page_r(page_t *)
+ * issue prefetch instructions for a page_t
+ */
+
+ ENTRY(prefetch_page_r)
+ rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(prefetch_page_r)
+
+ ENTRY(bcmp)
+ pushq %rbp
+ movq %rsp, %rbp
+#ifdef DEBUG
+ testq %rdx,%rdx
+ je 1f
+ movq postbootkernelbase(%rip), %r11
+ cmpq %r11, %rdi
+ jb 0f
+ cmpq %r11, %rsi
+ jnb 1f
+0: leaq .bcmp_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ call panic
+1:
+#endif /* DEBUG */
+ call memcmp
+ testl %eax, %eax
+ setne %dl
+ leave
+ movzbl %dl, %eax
+ ret
+ SET_SIZE(bcmp)
+
+#ifdef DEBUG
+ .text
+.bcmp_panic_msg:
+ .string "bcmp: arguments below kernelbase"
+#endif /* DEBUG */
+
+ ENTRY_NP(bsrw_insn)
+ xorl %eax, %eax
+ bsrw %di, %ax
+ ret
+ SET_SIZE(bsrw_insn)
+
+ ENTRY_NP(switch_sp_and_call)
+ pushq %rbp
+ movq %rsp, %rbp /* set up stack frame */
+ movq %rdi, %rsp /* switch stack pointer */
+ movq %rdx, %rdi /* pass func arg 1 */
+ movq %rsi, %r11 /* save function to call */
+ movq %rcx, %rsi /* pass func arg 2 */
+ INDIRECT_CALL_REG(r11) /* call function */
+ leave /* restore stack */
+ ret
+ SET_SIZE(switch_sp_and_call)
+
+ ENTRY_NP(kmdb_enter)
+ pushq %rbp
+ movq %rsp, %rbp
+
+ /*
+ * Save flags, do a 'cli' then return the saved flags
+ */
+ call intr_clear
+
+ int $T_DBGENTR
+
+ /*
+ * Restore the saved flags
+ */
+ movq %rax, %rdi
+ call intr_restore
+
+ leave
+ ret
+ SET_SIZE(kmdb_enter)
+
+ ENTRY_NP(return_instr)
+ rep; ret /* use 2 byte instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(return_instr)
+
+ ENTRY(getflags)
+ pushfq
+ popq %rax
+#if defined(__xpv)
+ CURTHREAD(%rdi)
+ KPREEMPT_DISABLE(%rdi)
+ /*
+ * Synthesize the PS_IE bit from the event mask bit
+ */
+ CURVCPU(%r11)
+ andq $_BITNOT(PS_IE), %rax
+ XEN_TEST_UPCALL_MASK(%r11)
+ jnz 1f
+ orq $PS_IE, %rax
+1:
+ KPREEMPT_ENABLE_NOKP(%rdi)
+#endif
+ ret
+ SET_SIZE(getflags)
+
+ ENTRY(ftrace_interrupt_disable)
+ pushfq
+ popq %rax
+ CLI(%rdx)
+ ret
+ SET_SIZE(ftrace_interrupt_disable)
+
+ ENTRY(ftrace_interrupt_enable)
+ pushq %rdi
+ popfq
+ ret
+ SET_SIZE(ftrace_interrupt_enable)
+
+ ENTRY(clflush_insn)
+ clflush (%rdi)
+ ret
+ SET_SIZE(clflush_insn)
+
+ ENTRY(mfence_insn)
+ mfence
+ ret
+ SET_SIZE(mfence_insn)
+
+/*
+ * VMware implements an I/O port that programs can query to detect if software
+ * is running in a VMware hypervisor. This hypervisor port behaves differently
+ * depending on magic values in certain registers and modifies some registers
+ * as a side effect.
+ *
+ * References: http://kb.vmware.com/kb/1009458
+ */
+
+ ENTRY(vmware_port)
+ pushq %rbx
+ movl $VMWARE_HVMAGIC, %eax
+ movl $0xffffffff, %ebx
+ movl %edi, %ecx
+ movl $VMWARE_HVPORT, %edx
+ inl (%dx)
+ movl %eax, (%rsi)
+ movl %ebx, 4(%rsi)
+ movl %ecx, 8(%rsi)
+ movl %edx, 12(%rsi)
+ popq %rbx
+ ret
+ SET_SIZE(vmware_port)
diff --git a/usr/src/uts/intel/ml/lock_prim.s b/usr/src/uts/intel/ml/lock_prim.s
new file mode 100644
index 0000000000..4267561bf7
--- /dev/null
+++ b/usr/src/uts/intel/ml/lock_prim.s
@@ -0,0 +1,714 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include "assym.h"
+
+#include <sys/mutex_impl.h>
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/rwlock_impl.h>
+#include <sys/lockstat.h>
+
+/*
+ * lock_try(lp), ulock_try(lp)
+ * - returns non-zero on success.
+ * - doesn't block interrupts so don't use this to spin on a lock.
+ *
+ * ulock_try() is for a lock in the user address space.
+ */
+
+ .globl kernelbase
+
+ ENTRY(lock_try)
+ movb $-1, %dl
+ movzbq %dl, %rax
+ xchgb %dl, (%rdi)
+ xorb %dl, %al
+.lock_try_lockstat_patch_point:
+ ret
+ testb %al, %al
+ jnz 0f
+ ret
+0:
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movq %rdi, %rsi /* rsi = lock addr */
+ movl $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
+ jmp lockstat_wrapper
+ SET_SIZE(lock_try)
+
+ ENTRY(lock_spin_try)
+ movb $-1, %dl
+ movzbq %dl, %rax
+ xchgb %dl, (%rdi)
+ xorb %dl, %al
+ ret
+ SET_SIZE(lock_spin_try)
+
+ ENTRY(ulock_try)
+#ifdef DEBUG
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi /* test uaddr < kernelbase */
+ jb ulock_pass /* uaddr < kernelbase, proceed */
+
+ movq %rdi, %r12 /* preserve lock ptr for debugging */
+ leaq .ulock_panic_msg(%rip), %rdi
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ xorl %eax, %eax /* clear for varargs */
+ call panic
+
+#endif /* DEBUG */
+
+ulock_pass:
+ movl $1, %eax
+ xchgb %al, (%rdi)
+ xorb $1, %al
+ ret
+ SET_SIZE(ulock_try)
+
+#ifdef DEBUG
+ .data
+.ulock_panic_msg:
+ .string "ulock_try: Argument is above kernelbase"
+ .text
+#endif /* DEBUG */
+
+/*
+ * lock_clear(lp)
+ * - unlock lock without changing interrupt priority level.
+ */
+
+ ENTRY(lock_clear)
+ movb $0, (%rdi)
+.lock_clear_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */
+ jmp lockstat_wrapper
+ SET_SIZE(lock_clear)
+
+ ENTRY(ulock_clear)
+#ifdef DEBUG
+ movq kernelbase(%rip), %rcx
+ cmpq %rcx, %rdi /* test uaddr < kernelbase */
+ jb ulock_clr /* uaddr < kernelbase, proceed */
+
+ leaq .ulock_clear_msg(%rip), %rdi
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ xorl %eax, %eax /* clear for varargs */
+ call panic
+#endif
+
+ulock_clr:
+ movb $0, (%rdi)
+ ret
+ SET_SIZE(ulock_clear)
+
+#ifdef DEBUG
+ .data
+.ulock_clear_msg:
+ .string "ulock_clear: Argument is above kernelbase"
+ .text
+#endif /* DEBUG */
+
+
+/*
+ * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
+ * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
+ */
+
+ ENTRY(lock_set_spl)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $32, %rsp
+ movl %esi, 8(%rsp) /* save priority level */
+ movq %rdx, 16(%rsp) /* save old pil ptr */
+ movq %rdi, 24(%rsp) /* save lock pointer */
+ movl %esi, %edi /* pass priority level */
+ call splr /* raise priority level */
+ movq 24(%rsp), %rdi /* rdi = lock addr */
+ movb $-1, %dl
+ xchgb %dl, (%rdi) /* try to set lock */
+ testb %dl, %dl /* did we get the lock? ... */
+ jnz .lss_miss /* ... no, go to C for the hard case */
+ movq 16(%rsp), %rdx /* rdx = old pil addr */
+ movw %ax, (%rdx) /* store old pil */
+ leave
+.lock_set_spl_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_SET_SPL_ACQUIRE, %edi
+ jmp lockstat_wrapper
+.lss_miss:
+ movl 8(%rsp), %esi /* new_pil */
+ movq 16(%rsp), %rdx /* old_pil_addr */
+ movl %eax, %ecx /* original pil */
+ leave /* unwind stack */
+ jmp lock_set_spl_spin
+ SET_SIZE(lock_set_spl)
+
+/*
+ * void
+ * lock_init(lp)
+ */
+
+ ENTRY(lock_init)
+ movb $0, (%rdi)
+ ret
+ SET_SIZE(lock_init)
+
+/*
+ * void
+ * lock_set(lp)
+ */
+
+ ENTRY(lock_set)
+ movb $-1, %dl
+ xchgb %dl, (%rdi) /* try to set lock */
+ testb %dl, %dl /* did we get it? */
+ jnz lock_set_spin /* no, go to C for the hard case */
+.lock_set_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_SET_ACQUIRE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(lock_set)
+
+/*
+ * lock_clear_splx(lp, s)
+ */
+
+ ENTRY(lock_clear_splx)
+ movb $0, (%rdi) /* clear lock */
+.lock_clear_splx_lockstat_patch_point:
+ jmp 0f
+0:
+ movl %esi, %edi /* arg for splx */
+ jmp splx /* let splx do its thing */
+.lock_clear_splx_lockstat:
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ subq $16, %rsp /* space to save args across splx */
+ movq %rdi, 8(%rsp) /* save lock ptr across splx call */
+ movl %esi, %edi /* arg for splx */
+ call splx /* lower the priority */
+ movq 8(%rsp), %rsi /* rsi = lock ptr */
+ leave /* unwind stack */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(lock_clear_splx)
+
+#if defined(__GNUC_AS__)
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
+ (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
+
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
+ (.lock_clear_splx_lockstat_patch_point + 1)
+#else
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
+ [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
+
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
+ [.lock_clear_splx_lockstat_patch_point + 1]
+#endif
+
+/*
+ * mutex_enter() and mutex_exit().
+ *
+ * These routines handle the simple cases of mutex_enter() (adaptive
+ * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
+ * If anything complicated is going on we punt to mutex_vector_enter().
+ *
+ * mutex_tryenter() is similar to mutex_enter() but returns zero if
+ * the lock cannot be acquired, nonzero on success.
+ *
+ * If mutex_exit() gets preempted in the window between checking waiters
+ * and clearing the lock, we can miss wakeups. Disabling preemption
+ * in the mutex code is prohibitively expensive, so instead we detect
+ * mutex preemption by examining the trapped PC in the interrupt path.
+ * If we interrupt a thread in mutex_exit() that has not yet cleared
+ * the lock, cmnint() resets its PC back to the beginning of
+ * mutex_exit() so it will check again for waiters when it resumes.
+ *
+ * The lockstat code below is activated when the lockstat driver
+ * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
+ * Note that we don't need to test lockstat_event_mask here -- we won't
+ * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
+ */
+
+ ENTRY_NP(mutex_enter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz mutex_vector_enter
+.mutex_enter_lockstat_patch_point:
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_enter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+ movq %rdi, %rsi
+ movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+/*
+ * expects %rdx=thread, %rsi=lock, %edi=lockstat event
+ */
+ ALTENTRY(lockstat_wrapper)
+ incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */
+ leaq lockstat_probemap(%rip), %rax
+ movl (%rax, %rdi, DTRACE_IDSIZE), %eax
+ testl %eax, %eax /* check for non-zero probe */
+ jz 1f
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ movl %eax, %edi
+ movq lockstat_probe, %rax
+ INDIRECT_CALL_REG(rax)
+ leave /* unwind stack */
+1:
+ movq %gs:CPU_THREAD, %rdx /* reload thread ptr */
+ decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */
+ movl $1, %eax /* return success if tryenter */
+ ret
+ SET_SIZE(lockstat_wrapper)
+ SET_SIZE(mutex_enter)
+
+/*
+ * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
+ */
+ ENTRY(lockstat_wrapper_arg)
+ incb T_LOCKSTAT(%rcx) /* curthread->t_lockstat++ */
+ leaq lockstat_probemap(%rip), %rax
+ movl (%rax, %rdi, DTRACE_IDSIZE), %eax
+ testl %eax, %eax /* check for non-zero probe */
+ jz 1f
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ movl %eax, %edi
+ movq lockstat_probe, %rax
+ INDIRECT_CALL_REG(rax)
+ leave /* unwind stack */
+1:
+ movq %gs:CPU_THREAD, %rdx /* reload thread ptr */
+ decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */
+ movl $1, %eax /* return success if tryenter */
+ ret
+ SET_SIZE(lockstat_wrapper_arg)
+
+
+ ENTRY(mutex_tryenter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz mutex_vector_tryenter
+ not %eax /* return success (nonzero) */
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_tryenter_lockstat_patch_point:
+.mutex_tryenter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+.mutex_tryenter_lockstat_patch_point:
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+ movq %rdi, %rsi
+ movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(mutex_tryenter)
+
+ ENTRY(mutex_adaptive_tryenter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz 0f
+ not %eax /* return success (nonzero) */
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_atryenter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+0:
+ xorl %eax, %eax /* return failure */
+ ret
+ SET_SIZE(mutex_adaptive_tryenter)
+
+ .globl mutex_owner_running_critical_start
+
+ ENTRY(mutex_owner_running)
+mutex_owner_running_critical_start:
+ movq (%rdi), %r11 /* get owner field */
+ andq $MUTEX_THREAD, %r11 /* remove waiters bit */
+ cmpq $0, %r11 /* if free, skip */
+ je 1f /* go return 0 */
+ movq T_CPU(%r11), %r8 /* get owner->t_cpu */
+ movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */
+.mutex_owner_running_critical_end:
+ cmpq %r11, %r9 /* owner == running thread? */
+ je 2f /* yes, go return cpu */
+1:
+ xorq %rax, %rax /* return 0 */
+ ret
+2:
+ movq %r8, %rax /* return cpu */
+ ret
+ SET_SIZE(mutex_owner_running)
+
+ .globl mutex_owner_running_critical_size
+ .type mutex_owner_running_critical_size, @object
+ .align CPTRSIZE
+mutex_owner_running_critical_size:
+ .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start
+ SET_SIZE(mutex_owner_running_critical_size)
+
+ .globl mutex_exit_critical_start
+
+ ENTRY(mutex_exit)
+mutex_exit_critical_start: /* If interrupted, restart here */
+ movq %gs:CPU_THREAD, %rdx
+ cmpq %rdx, (%rdi)
+ jne mutex_vector_exit /* wrong type or wrong owner */
+ movq $0, (%rdi) /* clear owner AND lock */
+.mutex_exit_critical_end:
+.mutex_exit_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi
+ movl $LS_MUTEX_EXIT_RELEASE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(mutex_exit)
+
+ .globl mutex_exit_critical_size
+ .type mutex_exit_critical_size, @object
+ .align CPTRSIZE
+mutex_exit_critical_size:
+ .quad .mutex_exit_critical_end - mutex_exit_critical_start
+ SET_SIZE(mutex_exit_critical_size)
+
+/*
+ * rw_enter() and rw_exit().
+ *
+ * These routines handle the simple cases of rw_enter (write-locking an unheld
+ * lock or read-locking a lock that's neither write-locked nor write-wanted)
+ * and rw_exit (no waiters or not the last reader). If anything complicated
+ * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
+ */
+
+ ENTRY(rw_enter)
+ cmpl $RW_WRITER, %esi
+ je .rw_write_enter
+ movq (%rdi), %rax /* rax = old rw_wwwh value */
+ testl $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
+ jnz rw_enter_sleep
+ leaq RW_READ_LOCK(%rax), %rdx /* rdx = new rw_wwwh value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to grab read lock */
+ jnz rw_enter_sleep
+.rw_read_enter_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_ENTER_ACQUIRE, %edi
+ movl $RW_READER, %edx
+ jmp lockstat_wrapper_arg
+.rw_write_enter:
+ movq %gs:CPU_THREAD, %rdx
+ orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */
+ xorl %eax, %eax /* rax = unheld value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to grab write lock */
+ jnz rw_enter_sleep
+
+#if defined(OPTERON_WORKAROUND_6323525)
+.rw_write_enter_lockstat_patch_point:
+.rw_write_enter_6323525_patch_point:
+ ret
+ nop
+ nop
+.rw_write_enter_lockstat_6323525_patch_point:
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+.rw_write_enter_lockstat_patch_point:
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_ENTER_ACQUIRE, %edi
+ movl $RW_WRITER, %edx
+ jmp lockstat_wrapper_arg
+ SET_SIZE(rw_enter)
+
+ ENTRY(rw_exit)
+ movq (%rdi), %rax /* rax = old rw_wwwh value */
+ cmpl $RW_READ_LOCK, %eax /* single-reader, no waiters? */
+ jne .rw_not_single_reader
+ xorl %edx, %edx /* rdx = new value (unheld) */
+.rw_read_exit:
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to drop read lock */
+ jnz rw_exit_wakeup
+.rw_read_exit_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_EXIT_RELEASE, %edi
+ movl $RW_READER, %edx
+ jmp lockstat_wrapper_arg
+.rw_not_single_reader:
+ testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */
+ jnz .rw_write_exit
+ leaq -RW_READ_LOCK(%rax), %rdx /* rdx = new value */
+ cmpl $RW_READ_LOCK, %edx
+ jge .rw_read_exit /* not last reader, safe to drop */
+ jmp rw_exit_wakeup /* last reader with waiters */
+.rw_write_exit:
+ movq %gs:CPU_THREAD, %rax /* rax = thread ptr */
+ xorl %edx, %edx /* rdx = new value (unheld) */
+ orq $RW_WRITE_LOCKED, %rax /* eax = write-locked value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to drop read lock */
+ jnz rw_exit_wakeup
+.rw_write_exit_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi - lock ptr */
+ movl $LS_RW_EXIT_RELEASE, %edi
+ movl $RW_WRITER, %edx
+ jmp lockstat_wrapper_arg
+ SET_SIZE(rw_exit)
+
+#if defined(OPTERON_WORKAROUND_6323525)
+
+/*
+ * If it is necessary to patch the lock enter routines with the lfence
+ * workaround, workaround_6323525_patched is set to a non-zero value so that
+ * the lockstat_hat_patch routine can patch to the new location of the 'ret'
+ * instruction.
+ */
+ DGDEF3(workaround_6323525_patched, 4, 4)
+ .long 0
+
+#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
+ movq $size, %rbx; \
+ movq $dstaddr, %r13; \
+ addq %rbx, %r13; \
+ movq $srcaddr, %r12; \
+ addq %rbx, %r12; \
+0: \
+ decq %r13; \
+ decq %r12; \
+ movzbl (%r12), %esi; \
+ movq $1, %rdx; \
+ movq %r13, %rdi; \
+ call hot_patch_kernel_text; \
+ decq %rbx; \
+ testq %rbx, %rbx; \
+ jg 0b;
+
+/*
+ * patch_workaround_6323525: provide workaround for 6323525
+ *
+ * The workaround is to place a fencing instruction (lfence) between the
+ * mutex operation and the subsequent read-modify-write instruction.
+ *
+ * This routine hot patches the lfence instruction on top of the space
+ * reserved by nops in the lock enter routines.
+ */
+ ENTRY_NP(patch_workaround_6323525)
+ pushq %rbp
+ movq %rsp, %rbp
+ pushq %r12
+ pushq %r13
+ pushq %rbx
+
+ /*
+ * lockstat_hot_patch() to use the alternate lockstat workaround
+ * 6323525 patch points (points past the lfence instruction to the
+ * new ret) when workaround_6323525_patched is set.
+ */
+ movl $1, workaround_6323525_patched
+
+ /*
+ * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
+ * routines. The 4 bytes are patched in reverse order so that the
+ * the existing ret is overwritten last. This provides lock enter
+ * sanity during the intermediate patching stages.
+ */
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
+
+ popq %rbx
+ popq %r13
+ popq %r12
+ movq %rbp, %rsp
+ popq %rbp
+ ret
+_lfence_insn:
+ lfence
+ ret
+ SET_SIZE(patch_workaround_6323525)
+
+
+#endif /* OPTERON_WORKAROUND_6323525 */
+
+
+#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
+ movq $normal_instr, %rsi; \
+ movq $active_instr, %rdi; \
+ leaq lockstat_probemap(%rip), %rax; \
+ movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
+ testl %eax, %eax; \
+ jz 9f; \
+ movq %rdi, %rsi; \
+9: \
+ movq $len, %rdx; \
+ movq $addr, %rdi; \
+ call hot_patch_kernel_text
+
+ ENTRY(lockstat_hot_patch)
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+
+#if defined(OPTERON_WORKAROUND_6323525)
+ cmpl $0, workaround_6323525_patched
+ je 1f
+ HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ jmp 2f
+1:
+ HOT_PATCH(.mutex_enter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+2:
+#else /* OPTERON_WORKAROUND_6323525 */
+ HOT_PATCH(.mutex_enter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+#endif /* !OPTERON_WORKAROUND_6323525 */
+ HOT_PATCH(.mutex_exit_lockstat_patch_point,
+ LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_read_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_exit_lockstat_patch_point,
+ LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_read_exit_lockstat_patch_point,
+ LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_set_lockstat_patch_point,
+ LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_try_lockstat_patch_point,
+ LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_clear_lockstat_patch_point,
+ LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_set_spl_lockstat_patch_point,
+ LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+
+ HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
+ LS_LOCK_CLEAR_SPLX_RELEASE,
+ LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
+ leave /* unwind stack */
+ ret
+ SET_SIZE(lockstat_hot_patch)
+
+ ENTRY(membar_enter)
+ ALTENTRY(membar_exit)
+ ALTENTRY(membar_sync)
+ mfence /* lighter weight than lock; xorq $0,(%rsp) */
+ ret
+ SET_SIZE(membar_sync)
+ SET_SIZE(membar_exit)
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_producer)
+ sfence
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lfence
+ ret
+ SET_SIZE(membar_consumer)
+
+/*
+ * thread_onproc()
+ * Set thread in onproc state for the specified CPU.
+ * Also set the thread lock pointer to the CPU's onproc lock.
+ * Since the new lock isn't held, the store ordering is important.
+ * If not done in assembler, the compiler could reorder the stores.
+ */
+
+ ENTRY(thread_onproc)
+ addq $CPU_THREAD_LOCK, %rsi /* pointer to disp_lock while running */
+ movl $ONPROC_THREAD, T_STATE(%rdi) /* set state to TS_ONPROC */
+ movq %rsi, T_LOCKP(%rdi) /* store new lock pointer */
+ ret
+ SET_SIZE(thread_onproc)
+
+/*
+ * mutex_delay_default(void)
+ * Spins for approx a few hundred processor cycles and returns to caller.
+ */
+
+ ENTRY(mutex_delay_default)
+ movq $92,%r11
+0: decq %r11
+ jg 0b
+ ret
+ SET_SIZE(mutex_delay_default)
+
diff --git a/usr/src/uts/intel/ml/modstubs.s b/usr/src/uts/intel/ml/modstubs.s
new file mode 100644
index 0000000000..4143c181a3
--- /dev/null
+++ b/usr/src/uts/intel/ml/modstubs.s
@@ -0,0 +1,1320 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
+ * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+#include <sys/asm_linkage.h>
+
+#include "assym.h"
+
+/*
+ * !!!!!!!! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! !!!!!!!!
+ *
+ * For functions which are either STUBs or WSTUBs the actual function
+ * need to be called using 'call' instruction because of preamble and
+ * postamble (i.e mod_hold_stub and mod_release_stub) around the
+ * function call. Due to this we need to copy arguments for the
+ * real function. On Intel we can't tell how many arguments are there
+ * on the stack so we have to either copy everything between esp and
+ * ebp or copy only a fixed number (MAXNARG - defined here) for
+ * all the stub functions. Currently we are using MAXNARG (it is a kludge
+ * but worth it?!).
+ *
+ * NOTE: Use NO_UNLOAD_STUBs if the module is NOT unloadable once it is
+ * loaded.
+ */
+#define MAXNARG 10
+
+/*
+ * WARNING: there is no check for forgetting to write END_MODULE,
+ * and if you do, the kernel will most likely crash. Be careful
+ *
+ * This file assumes that all of the contributions to the data segment
+ * will be contiguous in the output file, even though they are separated
+ * by pieces of text. This is safe for all assemblers I know of now...
+ */
+
+/*
+ * This file uses ansi preprocessor features:
+ *
+ * 1. #define mac(a) extra_ ## a --> mac(x) expands to extra_a
+ * The old version of this is
+ * #define mac(a) extra_/.*.*./a
+ * but this fails if the argument has spaces "mac ( x )"
+ * (Ignore the dots above, I had to put them in to keep this a comment.)
+ *
+ * 2. #define mac(a) #a --> mac(x) expands to "x"
+ * The old version is
+ * #define mac(a) "a"
+ *
+ * For some reason, the 5.0 preprocessor isn't happy with the above usage.
+ * For now, we're not using these ansi features.
+ *
+ * The reason is that "the 5.0 ANSI preprocessor" is built into the compiler
+ * and is a tokenizing preprocessor. This means, when confronted by something
+ * other than C token generation rules, strange things occur. In this case,
+ * when confronted by an assembly file, it would turn the token ".globl" into
+ * two tokens "." and "globl". For this reason, the traditional, non-ANSI
+ * preprocessor is used on assembly files.
+ *
+ * It would be desirable to have a non-tokenizing cpp (accp?) to use for this.
+ */
+
+/*
+ * This file contains the stubs routines for modules which can be autoloaded.
+ */
+
+/*
+ * See the 'struct mod_modinfo' definition to see what this declaration
+ * is trying to achieve here.
+ */
+#define MODULE(module,namespace) \
+ .data; \
+module/**/_modname: \
+ .string "namespace/module"; \
+ SET_SIZE(module/**/_modname); \
+ .align CPTRSIZE; \
+ .globl module/**/_modinfo; \
+ .type module/**/_modinfo, @object; \
+module/**/_modinfo: \
+ .quad module/**/_modname; \
+ .quad 0 /* storage for modctl pointer */
+
+ /* then mod_stub_info structures follow until a mods_func_adr is 0 */
+
+/* this puts a 0 where the next mods_func_adr would be */
+#define END_MODULE(module) \
+ .data; \
+ .align CPTRSIZE; \
+ .quad 0; \
+ SET_SIZE(module/**/_modinfo)
+
+/*
+ * The data section in the stub_common macro is the
+ * mod_stub_info structure for the stub function
+ */
+
+#define STUB_COMMON(module, fcnname, install_fcn, retfcn, weak) \
+ ENTRY(fcnname); \
+ leaq fcnname/**/_info(%rip), %rax; \
+ cmpl $0, MODS_FLAG(%rax); /* weak? */ \
+ je stubs_common_code; /* not weak */ \
+ testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \
+ jne stubs_common_code; /* yes, do the mod_hold */ \
+ movq MODS_RETFCN(%rax), %rax; /* no, load retfcn */ \
+ INDIRECT_JMP_REG(rax); /* no, jump to retfcn */ \
+ SET_SIZE(fcnname); \
+ .data; \
+ .align CPTRSIZE; \
+ .type fcnname/**/_info, @object; \
+fcnname/**/_info: \
+ .quad install_fcn; /* 0 */ \
+ .quad module/**/_modinfo; /* 0x8 */ \
+ .quad fcnname; /* 0x10 */ \
+ .quad retfcn; /* 0x18 */ \
+ .long weak; /* 0x20 */ \
+ SET_SIZE(fcnname/**/_info)
+
+#define STUB_NO_UNLOADABLE(module, fcnname, install_fcn, retfcn, weak) \
+ ENTRY(fcnname); \
+ leaq fcnname/**/_info(%rip), %rax; \
+ testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \
+ je 5f; /* no */ \
+ movq MODS_INSTFCN(%rax), %rax; /* yes, load install_fcn */ \
+ INDIRECT_JMP_REG(rax); /* yes, jump to install_fcn */ \
+5: testb $MODS_WEAK, MODS_FLAG(%rax); /* weak? */ \
+ je stubs_common_code; /* no, do mod load */ \
+ movq MODS_RETFCN(%rax), %rax; /* yes, load retfcn */ \
+ INDIRECT_JMP_REG(rax); /* yes, jump to retfcn */ \
+ SET_SIZE(fcnname); \
+ .data; \
+ .align CPTRSIZE; \
+ .type fcnname/**/_info, @object; \
+fcnname/**/_info: \
+ .quad install_fcn; /* 0 */ \
+ .quad module/**/_modinfo; /* 0x8 */ \
+ .quad fcnname; /* 0x10 */ \
+ .quad retfcn; /* 0x18 */ \
+ .long weak; /* 0x20 */ \
+ SET_SIZE(fcnname/**/_info)
+
+/*
+ * We branch here with the fcnname_info pointer in %rax
+ */
+ ENTRY_NP(stubs_common_code)
+ .globl mod_hold_stub
+ .globl mod_release_stub
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp
+ movq %r15, (%rsp) /* (caller saved) */
+ movq %rax, %r15 /* stash the fcnname_info pointer */
+ /*
+ * save incoming register arguments
+ */
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %r8
+ pushq %r9
+ /* (next 4 args, if any, are already on the stack above %rbp) */
+ movq %r15, %rdi
+ call mod_hold_stub /* mod_hold_stub(mod_stub_info *) */
+ cmpl $-1, %eax /* error? */
+ jne .L1
+ movq 0x18(%r15), %rax
+ INDIRECT_CALL_REG(rax)
+ addq $0x30, %rsp
+ jmp .L2
+.L1:
+ /*
+ * copy MAXNARG == 10 incoming arguments
+ */
+ popq %r9
+ popq %r8
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ /*
+ * stack:
+ * arg9 0x38(%rsp)
+ * arg8 0x30(%rsp)
+ * arg7 0x28(%rsp)
+ * arg6 0x20(%rsp)
+ * saved %rip 0x18(%rsp)
+ * saved %rbp 0x10(%rsp)
+ * <pad> 0x8(%rsp)
+ * saved %r15 0x0(%rsp)
+ */
+ movl $MAXNARG - 6 + 3, %r11d
+ pushq (%rsp, %r11, 8)
+ pushq (%rsp, %r11, 8)
+ pushq (%rsp, %r11, 8)
+ pushq (%rsp, %r11, 8)
+ movq (%r15), %rax
+ INDIRECT_CALL_REG(rax) /* call the stub fn(arg, ..) */
+ addq $0x20, %rsp /* pop off last 4 args */
+ pushq %rax /* save any return values */
+ pushq %rdx
+ movq %r15, %rdi
+ call mod_release_stub /* release hold on module */
+ popq %rdx /* restore return values */
+ popq %rax
+.L2:
+ popq %r15
+ leave
+ ret
+ SET_SIZE(stubs_common_code)
+
+#define STUB(module, fcnname, retfcn) \
+ STUB_COMMON(module, fcnname, mod_hold_stub, retfcn, 0)
+
+/*
+ * "weak stub", don't load on account of this call
+ */
+#define WSTUB(module, fcnname, retfcn) \
+ STUB_COMMON(module, fcnname, retfcn, retfcn, MODS_WEAK)
+
+/*
+ * "non-unloadable stub", don't bother 'holding' module if it's already loaded
+ * since the module cannot be unloaded.
+ *
+ * User *MUST* guarantee the module is not unloadable (no _fini routine).
+ */
+#define NO_UNLOAD_STUB(module, fcnname, retfcn) \
+ STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD)
+
+/*
+ * "weak stub" for non-unloadable module, don't load on account of this call
+ */
+#define NO_UNLOAD_WSTUB(module, fcnname, retfcn) \
+ STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD|MODS_WEAK)
+
+/*
+ * this is just a marker for the beginning area of text that contains stubs
+ */
+ ENTRY_NP(stubs_base)
+ nop
+
+/*
+ * WARNING WARNING WARNING!!!!!!
+ *
+ * On the MODULE macro you MUST NOT use any spaces!!! They are
+ * significant to the preprocessor. With ansi c there is a way around this
+ * but for some reason (yet to be investigated) ansi didn't work for other
+ * reasons!
+ *
+ * When zero is used as the return function, the system will call
+ * panic if the stub can't be resolved.
+ */
+
+/*
+ * Stubs for devfs. A non-unloadable module.
+ */
+
+#ifndef DEVFS_MODULE
+ MODULE(devfs,fs);
+ NO_UNLOAD_STUB(devfs, devfs_clean, nomod_minus_one);
+ NO_UNLOAD_STUB(devfs, devfs_lookupname, nomod_minus_one);
+ NO_UNLOAD_STUB(devfs, devfs_walk, nomod_minus_one);
+ NO_UNLOAD_STUB(devfs, devfs_devpolicy, nomod_minus_one);
+ NO_UNLOAD_STUB(devfs, devfs_reset_perm, nomod_minus_one);
+ NO_UNLOAD_STUB(devfs, devfs_remdrv_cleanup, nomod_minus_one);
+ END_MODULE(devfs);
+#endif
+
+#ifndef DEV_MODULE
+ MODULE(dev,fs);
+ NO_UNLOAD_STUB(dev, sdev_modctl_readdir, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, sdev_modctl_readdir_free, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, devname_filename_register, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, sdev_modctl_devexists, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, devname_profile_update, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, sdev_devstate_change, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, devvt_getvnodeops, nomod_minus_one);
+ NO_UNLOAD_STUB(dev, devpts_getvnodeops, nomod_zero);
+ END_MODULE(dev);
+#endif
+
+/*
+ * Stubs for specfs. A non-unloadable module.
+ */
+
+#ifndef SPEC_MODULE
+ MODULE(specfs,fs);
+ NO_UNLOAD_STUB(specfs, common_specvp, nomod_zero);
+ NO_UNLOAD_STUB(specfs, makectty, nomod_zero);
+ NO_UNLOAD_STUB(specfs, makespecvp, nomod_zero);
+ NO_UNLOAD_STUB(specfs, smark, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_segmap, nomod_einval);
+ NO_UNLOAD_STUB(specfs, specfind, nomod_zero);
+ NO_UNLOAD_STUB(specfs, specvp, nomod_zero);
+ NO_UNLOAD_STUB(specfs, devi_stillreferenced, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_getvnodeops, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_char_map, nomod_zero);
+ NO_UNLOAD_STUB(specfs, specvp_devfs, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_assoc_vp_with_devi, nomod_void);
+ NO_UNLOAD_STUB(specfs, spec_hold_devi_by_vp, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_snode_walk, nomod_void);
+ NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one);
+ NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one);
+ NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one);
+ END_MODULE(specfs);
+#endif
+
+
+/*
+ * Stubs for sockfs. A non-unloadable module.
+ */
+#ifndef SOCK_MODULE
+ MODULE(sockfs,fs);
+ NO_UNLOAD_STUB(sockfs, so_socket, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, so_socketpair, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, bind, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, listen, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, accept, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, connect, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, shutdown, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, recv, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, recvfrom, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, recvmsg, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, send, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sendmsg, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sendto, nomod_zero);
+#ifdef _SYSCALL32_IMPL
+ NO_UNLOAD_STUB(sockfs, recv32, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, recvfrom32, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, send32, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sendto32, nomod_zero);
+#endif /* _SYSCALL32_IMPL */
+ NO_UNLOAD_STUB(sockfs, getpeername, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, getsockname, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, getsockopt, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, setsockopt, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sockconfig, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sock_getmsg, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sock_putmsg, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sosendfile64, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, snf_segmap, nomod_einval);
+ NO_UNLOAD_STUB(sockfs, sock_getfasync, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, nl7c_sendfilev, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, sotpi_sototpi, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_sendmblk, nomod_zero);
+ NO_UNLOAD_STUB(sockfs, socket_setsockopt, nomod_zero);
+ END_MODULE(sockfs);
+#endif
+
+/*
+ * IPsec stubs.
+ */
+
+#ifndef IPSECAH_MODULE
+ MODULE(ipsecah,drv);
+ WSTUB(ipsecah, ipsec_construct_inverse_acquire, nomod_zero);
+ WSTUB(ipsecah, sadb_acquire, nomod_zero);
+ WSTUB(ipsecah, ipsecah_algs_changed, nomod_zero);
+ WSTUB(ipsecah, sadb_alg_update, nomod_zero);
+ WSTUB(ipsecah, sadb_unlinkassoc, nomod_zero);
+ WSTUB(ipsecah, sadb_insertassoc, nomod_zero);
+ WSTUB(ipsecah, ipsecah_in_assocfailure, nomod_zero);
+ WSTUB(ipsecah, sadb_set_lpkt, nomod_zero);
+ WSTUB(ipsecah, ipsecah_icmp_error, nomod_zero);
+ END_MODULE(ipsecah);
+#endif
+
+#ifndef IPSECESP_MODULE
+ MODULE(ipsecesp,drv);
+ WSTUB(ipsecesp, ipsecesp_fill_defs, nomod_zero);
+ WSTUB(ipsecesp, ipsecesp_algs_changed, nomod_zero);
+ WSTUB(ipsecesp, ipsecesp_in_assocfailure, nomod_zero);
+ WSTUB(ipsecesp, ipsecesp_init_funcs, nomod_zero);
+ WSTUB(ipsecesp, ipsecesp_icmp_error, nomod_zero);
+ WSTUB(ipsecesp, ipsecesp_send_keepalive, nomod_zero);
+ END_MODULE(ipsecesp);
+#endif
+
+#ifndef KEYSOCK_MODULE
+ MODULE(keysock, drv);
+ WSTUB(keysock, keysock_spdsock_wput_iocdata, nomod_void);
+ WSTUB(keysock, keysock_plumb_ipsec, nomod_zero);
+ WSTUB(keysock, keysock_extended_reg, nomod_zero);
+ WSTUB(keysock, keysock_next_seq, nomod_zero);
+ END_MODULE(keysock);
+#endif
+
+#ifndef SPDSOCK_MODULE
+ MODULE(spdsock,drv);
+ WSTUB(spdsock, spdsock_update_pending_algs, nomod_zero);
+ END_MODULE(spdsock);
+#endif
+
+/*
+ * Stubs for nfs common code.
+ * XXX nfs_getvnodeops should go away with removal of kludge in vnode.c
+ */
+#ifndef NFS_MODULE
+ MODULE(nfs,fs);
+ WSTUB(nfs, nfs_getvnodeops, nomod_zero);
+ WSTUB(nfs, nfs_perror, nomod_zero);
+ WSTUB(nfs, nfs_cmn_err, nomod_zero);
+ WSTUB(nfs, clcleanup_zone, nomod_zero);
+ WSTUB(nfs, clcleanup4_zone, nomod_zero);
+ END_MODULE(nfs);
+#endif
+
+
+/*
+ * Stubs for nfs_dlboot (diskless booting).
+ */
+#ifndef NFS_DLBOOT_MODULE
+ MODULE(nfs_dlboot,misc);
+ STUB(nfs_dlboot, mount_root, nomod_minus_one);
+ STUB(nfs_dlboot, dhcpinit, nomod_minus_one);
+ END_MODULE(nfs_dlboot);
+#endif
+
+/*
+ * Stubs for nfs server-only code.
+ */
+#ifndef NFSSRV_MODULE
+ MODULE(nfssrv,misc);
+ STUB(nfssrv, exportfs, nomod_minus_one);
+ STUB(nfssrv, nfs_getfh, nomod_minus_one);
+ STUB(nfssrv, nfsl_flush, nomod_minus_one);
+ STUB(nfssrv, rfs4_check_delegated, nomod_zero);
+ STUB(nfssrv, mountd_args, nomod_minus_one);
+ NO_UNLOAD_STUB(nfssrv, rdma_start, nomod_zero);
+ NO_UNLOAD_STUB(nfssrv, nfs_svc, nomod_zero);
+ END_MODULE(nfssrv);
+#endif
+
+/*
+ * Stubs for kernel lock manager.
+ */
+#ifndef KLM_MODULE
+ MODULE(klmmod,misc);
+ NO_UNLOAD_STUB(klmmod, lm_svc, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_shutdown, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_unexport, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_cprresume, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_cprsuspend, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_safelock, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_safemap, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_has_sleep, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_free_config, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_vp_active, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_get_sysid, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_rel_sysid, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_alloc_sysidt, nomod_minus_one);
+ NO_UNLOAD_STUB(klmmod, lm_free_sysidt, nomod_zero);
+ NO_UNLOAD_STUB(klmmod, lm_sysidt, nomod_minus_one);
+ END_MODULE(klmmod);
+#endif
+
+#ifndef KLMOPS_MODULE
+ MODULE(klmops,misc);
+ NO_UNLOAD_STUB(klmops, lm_frlock, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm4_frlock, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_shrlock, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm4_shrlock, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_nlm_dispatch, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_nlm4_dispatch, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_nlm_reclaim, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_nlm4_reclaim, nomod_zero);
+ NO_UNLOAD_STUB(klmops, lm_register_lock_locally, nomod_zero);
+ END_MODULE(klmops);
+#endif
+
+/*
+ * Stubs for kernel TLI module
+ * XXX currently we never allow this to unload
+ */
+#ifndef TLI_MODULE
+ MODULE(tlimod,misc);
+ NO_UNLOAD_STUB(tlimod, t_kopen, nomod_minus_one);
+ NO_UNLOAD_STUB(tlimod, t_kunbind, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kadvise, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_krcvudata, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_ksndudata, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kalloc, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kbind, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kclose, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kspoll, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_kfree, nomod_zero);
+ NO_UNLOAD_STUB(tlimod, t_koptmgmt, nomod_zero);
+ END_MODULE(tlimod);
+#endif
+
+/*
+ * Stubs for kernel RPC module
+ * XXX currently we never allow this to unload
+ */
+#ifndef RPC_MODULE
+ MODULE(rpcmod,strmod);
+ NO_UNLOAD_STUB(rpcmod, clnt_tli_kcreate, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, svc_tli_kcreate, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, bindresvport, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, rdma_register_mod, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, rdma_unregister_mod, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, svc_queuereq, nomod_minus_one);
+ NO_UNLOAD_STUB(rpcmod, clist_add, nomod_minus_one);
+ END_MODULE(rpcmod);
+#endif
+
+/*
+ * Stubs for des
+ */
+#ifndef DES_MODULE
+ MODULE(des,misc);
+ STUB(des, cbc_crypt, nomod_zero);
+ STUB(des, ecb_crypt, nomod_zero);
+ STUB(des, _des_crypt, nomod_zero);
+ END_MODULE(des);
+#endif
+
+/*
+ * Stubs for procfs. A non-unloadable module.
+ */
+#ifndef PROC_MODULE
+ MODULE(procfs,fs);
+ NO_UNLOAD_STUB(procfs, prfree, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prexit, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prlwpfree, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prlwpexit, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prinvalidate, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prnsegs, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetcred, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetpriv, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetprivsize, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetsecflags, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetstatus, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetlwpstatus, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetpsinfo, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetlwpsinfo, nomod_zero);
+ NO_UNLOAD_STUB(procfs, oprgetstatus, nomod_zero);
+ NO_UNLOAD_STUB(procfs, oprgetpsinfo, nomod_zero);
+#ifdef _SYSCALL32_IMPL
+ NO_UNLOAD_STUB(procfs, prgetstatus32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetlwpstatus32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetpsinfo32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prgetlwpsinfo32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, oprgetstatus32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, oprgetpsinfo32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, psinfo_kto32, nomod_zero);
+ NO_UNLOAD_STUB(procfs, lwpsinfo_kto32, nomod_zero);
+#endif /* _SYSCALL32_IMPL */
+ NO_UNLOAD_STUB(procfs, prnotify, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prexecstart, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prexecend, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prrelvm, nomod_zero);
+ NO_UNLOAD_STUB(procfs, prbarrier, nomod_zero);
+ NO_UNLOAD_STUB(procfs, estimate_msacct, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_getprot, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_getprot_done, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_getsegsize, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_isobject, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_isself, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_allstopped, nomod_zero);
+ NO_UNLOAD_STUB(procfs, pr_free_watched_pages, nomod_zero);
+ END_MODULE(procfs);
+#endif
+
+/*
+ * Stubs for fifofs
+ */
+#ifndef FIFO_MODULE
+ MODULE(fifofs,fs);
+ NO_UNLOAD_STUB(fifofs, fifovp, nomod_zero);
+ NO_UNLOAD_STUB(fifofs, fifo_getinfo, nomod_zero);
+ NO_UNLOAD_STUB(fifofs, fifo_vfastoff, nomod_zero);
+ END_MODULE(fifofs);
+#endif
+
+/*
+ * Stubs for ufs
+ *
+ * This is needed to support the old quotactl system call.
+ * When the old sysent stuff goes away, this will need to be revisited.
+ */
+#ifndef UFS_MODULE
+ MODULE(ufs,fs);
+ STUB(ufs, quotactl, nomod_minus_one);
+ END_MODULE(ufs);
+#endif
+
+/*
+ * Stubs for zfs
+ */
+#ifndef ZFS_MODULE
+ MODULE(zfs,fs);
+ STUB(zfs, dsl_prop_get, nomod_minus_one);
+ STUB(zfs, spa_boot_init, nomod_minus_one);
+ STUB(zfs, zfs_prop_to_name, nomod_zero);
+ END_MODULE(zfs);
+#endif
+
+/*
+ * Stubs for dcfs
+ */
+#ifndef DCFS_MODULE
+ MODULE(dcfs,fs);
+ STUB(dcfs, decompvp, 0);
+ END_MODULE(dcfs);
+#endif
+
+/*
+ * Stubs for namefs
+ */
+#ifndef NAMEFS_MODULE
+ MODULE(namefs,fs);
+ STUB(namefs, nm_unmountall, 0);
+ END_MODULE(namefs);
+#endif
+
+/*
+ * Stubs for sysdc
+ */
+#ifndef SDC_MODULE
+ MODULE(SDC,sched);
+ NO_UNLOAD_STUB(SDC, sysdc_thread_enter, nomod_zero);
+ END_MODULE(SDC);
+#endif
+
+/*
+ * Stubs for ts_dptbl
+ */
+#ifndef TS_DPTBL_MODULE
+ MODULE(TS_DPTBL,sched);
+ STUB(TS_DPTBL, ts_getdptbl, 0);
+ STUB(TS_DPTBL, ts_getkmdpris, 0);
+ STUB(TS_DPTBL, ts_getmaxumdpri, 0);
+ END_MODULE(TS_DPTBL);
+#endif
+
+/*
+ * Stubs for rt_dptbl
+ */
+#ifndef RT_DPTBL_MODULE
+ MODULE(RT_DPTBL,sched);
+ STUB(RT_DPTBL, rt_getdptbl, 0);
+ END_MODULE(RT_DPTBL);
+#endif
+
+/*
+ * Stubs for ia_dptbl
+ */
+#ifndef IA_DPTBL_MODULE
+ MODULE(IA_DPTBL,sched);
+ STUB(IA_DPTBL, ia_getdptbl, nomod_zero);
+ STUB(IA_DPTBL, ia_getkmdpris, nomod_zero);
+ STUB(IA_DPTBL, ia_getmaxumdpri, nomod_zero);
+ END_MODULE(IA_DPTBL);
+#endif
+
+/*
+ * Stubs for FSS scheduler
+ */
+#ifndef FSS_MODULE
+ MODULE(FSS,sched);
+ WSTUB(FSS, fss_allocbuf, nomod_zero);
+ WSTUB(FSS, fss_freebuf, nomod_zero);
+ WSTUB(FSS, fss_changeproj, nomod_zero);
+ WSTUB(FSS, fss_changepset, nomod_zero);
+ END_MODULE(FSS);
+#endif
+
+/*
+ * Stubs for fx_dptbl
+ */
+#ifndef FX_DPTBL_MODULE
+ MODULE(FX_DPTBL,sched);
+ STUB(FX_DPTBL, fx_getdptbl, 0);
+ STUB(FX_DPTBL, fx_getmaxumdpri, 0);
+ END_MODULE(FX_DPTBL);
+#endif
+
+/*
+ * Stubs for bootdev
+ */
+#ifndef BOOTDEV_MODULE
+ MODULE(bootdev,misc);
+ STUB(bootdev, i_promname_to_devname, 0);
+ STUB(bootdev, i_convert_boot_device_name, 0);
+ END_MODULE(bootdev);
+#endif
+
+/*
+ * stubs for strplumb...
+ */
+#ifndef STRPLUMB_MODULE
+ MODULE(strplumb,misc);
+ STUB(strplumb, strplumb, 0);
+ STUB(strplumb, strplumb_load, 0);
+ STUB(strplumb, strplumb_get_netdev_path, 0);
+ END_MODULE(strplumb);
+#endif
+
+/*
+ * Stubs for console configuration module
+ */
+#ifndef CONSCONFIG_MODULE
+ MODULE(consconfig,misc);
+ STUB(consconfig, consconfig, 0);
+ STUB(consconfig, consconfig_get_usb_kb_path, 0);
+ STUB(consconfig, consconfig_get_usb_ms_path, 0);
+ STUB(consconfig, consconfig_get_plat_fbpath, 0);
+ STUB(consconfig, consconfig_console_is_ready, 0);
+ END_MODULE(consconfig);
+#endif
+
+/*
+ * Stubs for accounting.
+ */
+#ifndef SYSACCT_MODULE
+ MODULE(sysacct,sys);
+ NO_UNLOAD_WSTUB(sysacct, acct, nomod_zero);
+ NO_UNLOAD_WSTUB(sysacct, acct_fs_in_use, nomod_zero);
+ END_MODULE(sysacct);
+#endif
+
+/*
+ * Stubs for semaphore routines. sem.c
+ */
+#ifndef SEMSYS_MODULE
+ MODULE(semsys,sys);
+ NO_UNLOAD_WSTUB(semsys, semexit, nomod_zero);
+ END_MODULE(semsys);
+#endif
+
+/*
+ * Stubs for shmem routines. shm.c
+ */
+#ifndef SHMSYS_MODULE
+ MODULE(shmsys,sys);
+ NO_UNLOAD_WSTUB(shmsys, shmexit, nomod_zero);
+ NO_UNLOAD_WSTUB(shmsys, shmfork, nomod_zero);
+ NO_UNLOAD_WSTUB(shmsys, shmgetid, nomod_minus_one);
+ END_MODULE(shmsys);
+#endif
+
+/*
+ * Stubs for doors
+ */
+#ifndef DOOR_MODULE
+ MODULE(doorfs,sys);
+ NO_UNLOAD_WSTUB(doorfs, door_slam, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_exit, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_revoke_all, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_fork, nomod_zero);
+ NO_UNLOAD_STUB(doorfs, door_upcall, nomod_einval);
+ NO_UNLOAD_STUB(doorfs, door_ki_create, nomod_einval);
+ NO_UNLOAD_STUB(doorfs, door_ki_open, nomod_einval);
+ NO_UNLOAD_STUB(doorfs, door_ki_lookup, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_ki_upcall, nomod_einval);
+ NO_UNLOAD_WSTUB(doorfs, door_ki_upcall_limited, nomod_einval);
+ NO_UNLOAD_WSTUB(doorfs, door_ki_hold, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_ki_rele, nomod_zero);
+ NO_UNLOAD_WSTUB(doorfs, door_ki_info, nomod_einval);
+ END_MODULE(doorfs);
+#endif
+
+/*
+ * Stubs for MD5
+ */
+#ifndef MD5_MODULE
+ MODULE(md5,misc);
+ WSTUB(md5, MD5Init, nomod_zero);
+ WSTUB(md5, MD5Update, nomod_zero);
+ WSTUB(md5, MD5Final, nomod_zero);
+ END_MODULE(md5);
+#endif
+
+/*
+ * Stubs for idmap
+ */
+#ifndef IDMAP_MODULE
+ MODULE(idmap,misc);
+ STUB(idmap, kidmap_batch_getgidbysid, nomod_zero);
+ STUB(idmap, kidmap_batch_getpidbysid, nomod_zero);
+ STUB(idmap, kidmap_batch_getsidbygid, nomod_zero);
+ STUB(idmap, kidmap_batch_getsidbyuid, nomod_zero);
+ STUB(idmap, kidmap_batch_getuidbysid, nomod_zero);
+ STUB(idmap, kidmap_get_create, nomod_zero);
+ STUB(idmap, kidmap_get_destroy, nomod_zero);
+ STUB(idmap, kidmap_get_mappings, nomod_zero);
+ STUB(idmap, kidmap_getgidbysid, nomod_zero);
+ STUB(idmap, kidmap_getpidbysid, nomod_zero);
+ STUB(idmap, kidmap_getsidbygid, nomod_zero);
+ STUB(idmap, kidmap_getsidbyuid, nomod_zero);
+ STUB(idmap, kidmap_getuidbysid, nomod_zero);
+ STUB(idmap, idmap_get_door, nomod_einval);
+ STUB(idmap, idmap_unreg_dh, nomod_einval);
+ STUB(idmap, idmap_reg_dh, nomod_einval);
+ STUB(idmap, idmap_purge_cache, nomod_einval);
+ END_MODULE(idmap);
+#endif
+
+/*
+ * Stubs for auditing.
+ */
+#ifndef C2AUDIT_MODULE
+ MODULE(c2audit,sys);
+ NO_UNLOAD_STUB(c2audit, audit_init_module, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_start, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_finish, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, auditdoor, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_closef, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_core_start, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_core_finish, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_strputmsg, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_savepath, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_anchorpath, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_exit, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_exec, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_symlink, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_symlink_create, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_vncreate_start, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_vncreate_finish, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_enterprom, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_exitprom, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_chdirec, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_setf, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_sock, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_strgetmsg, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_ipc, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_ipcget, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_fdsend, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_fdrecv, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_priv, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_setppriv, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_psecflags, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_devpolicy, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_setfsat_path, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_cryptoadm, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_kssl, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, audit_pf_policy, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, au_doormsg, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, au_uwrite, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, au_to_arg32, nomod_zero);
+ NO_UNLOAD_STUB(c2audit, au_free_rec, nomod_zero);
+ END_MODULE(c2audit);
+#endif
+
+/*
+ * Stubs for kernel rpc security service module
+ */
+#ifndef RPCSEC_MODULE
+ MODULE(rpcsec,misc);
+ NO_UNLOAD_STUB(rpcsec, sec_clnt_revoke, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec, authkern_create, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec, sec_svc_msg, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec, sec_svc_control, nomod_zero);
+ END_MODULE(rpcsec);
+#endif
+
+/*
+ * Stubs for rpc RPCSEC_GSS security service module
+ */
+#ifndef RPCSEC_GSS_MODULE
+ MODULE(rpcsec_gss,misc);
+ NO_UNLOAD_STUB(rpcsec_gss, __svcrpcsec_gss, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_getcred, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_callback, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secget, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secfree, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_seccreate, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_defaults, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_revauth, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secpurge, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_cleanup, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_versions, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_max_data_length, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_svc_max_data_length, nomod_zero);
+ NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_service_type, nomod_zero);
+ END_MODULE(rpcsec_gss);
+#endif
+
+/*
+ * Stubs for PCI configurator module (misc/pcicfg).
+ */
+#ifndef PCICFG_MODULE
+ MODULE(pcicfg,misc);
+ STUB(pcicfg, pcicfg_configure, 0);
+ STUB(pcicfg, pcicfg_unconfigure, 0);
+ END_MODULE(pcicfg);
+#endif
+
+/*
+ * Stubs for pcieb nexus driver.
+ */
+#ifndef PCIEB_MODULE
+ MODULE(pcieb,drv);
+ STUB(pcieb, pcieb_intel_error_workaround, 0);
+ END_MODULE(pcieb);
+#endif
+
+#ifndef IWSCN_MODULE
+ MODULE(iwscn,drv);
+ STUB(iwscn, srpop, 0);
+ END_MODULE(iwscn);
+#endif
+
+/*
+ * Stubs for checkpoint-resume module
+ */
+#ifndef CPR_MODULE
+ MODULE(cpr,misc);
+ STUB(cpr, cpr, 0);
+ END_MODULE(cpr);
+#endif
+
+/*
+ * Stubs for kernel probes (tnf module). Not unloadable.
+ */
+#ifndef TNF_MODULE
+ MODULE(tnf,drv);
+ NO_UNLOAD_STUB(tnf, tnf_ref32_1, nomod_zero);
+ NO_UNLOAD_STUB(tnf, tnf_string_1, nomod_zero);
+ NO_UNLOAD_STUB(tnf, tnf_opaque_array_1, nomod_zero);
+ NO_UNLOAD_STUB(tnf, tnf_struct_tag_1, nomod_zero);
+ NO_UNLOAD_STUB(tnf, tnf_allocate, nomod_zero);
+ END_MODULE(tnf);
+#endif
+
+/*
+ * Stubs for i86hvm bootstraping
+ */
+#ifndef HVM_BOOTSTRAP
+ MODULE(hvm_bootstrap,misc);
+ NO_UNLOAD_STUB(hvm_bootstrap, hvmboot_rootconf, nomod_zero);
+ END_MODULE(hvm_bootstrap);
+#endif
+
+/*
+ * Clustering: stubs for bootstrapping.
+ */
+#ifndef CL_BOOTSTRAP
+ MODULE(cl_bootstrap,misc);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clboot_modload, nomod_minus_one);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clboot_loadrootmodules, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clboot_rootconf, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clboot_mountroot, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clconf_init, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clconf_get_nodeid, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, clconf_maximum_nodeid, nomod_zero);
+ NO_UNLOAD_WSTUB(cl_bootstrap, cluster, nomod_zero);
+ END_MODULE(cl_bootstrap);
+#endif
+
+/*
+ * Clustering: stubs for cluster infrastructure.
+ */
+#ifndef CL_COMM_MODULE
+ MODULE(cl_comm,misc);
+ NO_UNLOAD_STUB(cl_comm, cladmin, nomod_minus_one);
+ END_MODULE(cl_comm);
+#endif
+
+/*
+ * Clustering: stubs for global file system operations.
+ */
+#ifndef PXFS_MODULE
+ MODULE(pxfs,fs);
+ NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_read, nomod_zero);
+ NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_write, nomod_zero);
+ NO_UNLOAD_WSTUB(pxfs, cl_flk_state_transition_notify, nomod_zero);
+ END_MODULE(pxfs);
+#endif
+
+/*
+ * Stubs for kernel cryptographic framework module (misc/kcf).
+ */
+#ifndef KCF_MODULE
+ MODULE(kcf,misc);
+ NO_UNLOAD_STUB(kcf, crypto_mech2id, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_register_provider, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_unregister_provider, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_provider_notification, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_op_notification, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_kmflag, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_digest_key_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_decrypt_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_get_all_mech_info, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_check, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_check_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_derive, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_generate, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_generate_pair, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_unwrap, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_key_wrap, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_verify, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_verify_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_copy, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_create, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_destroy, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_find_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_find_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_find, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_get_attribute_value, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_get_size, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_object_set_attribute_value, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_session_close, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_session_login, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_session_logout, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_session_open, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_create_ctx_template, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_destroy_ctx_template, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_get_mech_list, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_free_mech_list, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_cancel_req, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_cancel_ctx, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_bufcall_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_bufcall_free, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_bufcall, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_unbufcall, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_notify_events, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_unnotify_events, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_get_provider, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_get_provinfo, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_release_provider, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_recover, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_recover_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_sign_recover_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_init, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_update, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_final, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_recover, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_recover_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, crypto_verify_recover_init_prov, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, random_add_entropy, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, random_add_pseudo_entropy, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, random_get_blocking_bytes, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, random_get_bytes, nomod_minus_one);
+ NO_UNLOAD_STUB(kcf, random_get_pseudo_bytes, nomod_minus_one);
+ END_MODULE(kcf);
+#endif
+
+/*
+ * Stubs for sha1. A non-unloadable module.
+ */
+#ifndef SHA1_MODULE
+ MODULE(sha1,crypto);
+ NO_UNLOAD_STUB(sha1, SHA1Init, nomod_void);
+ NO_UNLOAD_STUB(sha1, SHA1Update, nomod_void);
+ NO_UNLOAD_STUB(sha1, SHA1Final, nomod_void);
+ END_MODULE(sha1);
+#endif
+
+/*
+ * The following stubs are used by the mac module.
+ * Since dld already depends on mac, these
+ * stubs are needed to avoid circular dependencies.
+ */
+#ifndef DLD_MODULE
+ MODULE(dld,drv);
+ STUB(dld, dld_init_ops, nomod_void);
+ STUB(dld, dld_fini_ops, nomod_void);
+ STUB(dld, dld_devt_to_instance, nomod_minus_one);
+ STUB(dld, dld_autopush, nomod_minus_one);
+ STUB(dld, dld_ioc_register, nomod_einval);
+ STUB(dld, dld_ioc_unregister, nomod_void);
+ END_MODULE(dld);
+#endif
+
+/*
+ * The following stubs are used by the mac module.
+ * Since dls already depends on mac, these
+ * stubs are needed to avoid circular dependencies.
+ */
+#ifndef DLS_MODULE
+ MODULE(dls,misc);
+ STUB(dls, dls_devnet_mac, nomod_zero);
+ STUB(dls, dls_devnet_hold_tmp, nomod_einval);
+ STUB(dls, dls_devnet_rele_tmp, nomod_void);
+ STUB(dls, dls_devnet_hold_link, nomod_einval);
+ STUB(dls, dls_devnet_rele_link, nomod_void);
+ STUB(dls, dls_devnet_prop_task_wait, nomod_void);
+ STUB(dls, dls_mgmt_get_linkid, nomod_einval);
+ STUB(dls, dls_devnet_macname2linkid, nomod_einval);
+ STUB(dls, dls_mgmt_get_linkinfo, nomod_einval);
+ END_MODULE(dls);
+#endif
+
+#ifndef SOFTMAC_MODULE
+ MODULE(softmac,drv);
+ STUB(softmac, softmac_hold_device, nomod_einval);
+ STUB(softmac, softmac_rele_device, nomod_void);
+ STUB(softmac, softmac_recreate, nomod_void);
+ END_MODULE(softmac);
+#endif
+
+#ifndef IPTUN_MODULE
+ MODULE(iptun,drv);
+ STUB(iptun, iptun_create, nomod_einval);
+ STUB(iptun, iptun_delete, nomod_einval);
+ STUB(iptun, iptun_set_policy, nomod_void) ;
+ END_MODULE(iptun);
+#endif
+
+/*
+ * Stubs for dcopy, for Intel IOAT KAPIs
+ */
+#ifndef DCOPY_MODULE
+ MODULE(dcopy,misc);
+ NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one);
+ END_MODULE(dcopy);
+#endif
+
+/*
+ * Stubs for acpica
+ */
+#ifndef ACPICA_MODULE
+ MODULE(acpica,misc);
+ NO_UNLOAD_STUB(acpica, AcpiOsReadPort, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiOsWritePort, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiInstallNotifyHandler, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiRemoveNotifyHandler, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiEvaluateObject, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiEvaluateObjectTyped, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiWriteBitRegister, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiReadBitRegister, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, AcpiOsFree, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, acpica_get_handle_cpu, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, acpica_get_global_FADT, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, acpica_write_cpupm_capabilities,
+ nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, __acpi_wbinvd, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpica, acpi_reset_system, nomod_minus_one) ;
+ END_MODULE(acpica);
+#endif
+
+/*
+ * Stubs for acpidev
+ */
+#ifndef ACPIDEV_MODULE
+ MODULE(acpidev,misc);
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info,
+ nomod_minus_one) ;
+ END_MODULE(acpidev);
+#endif
+
+#ifndef IPNET_MODULE
+ MODULE(ipnet,drv);
+ STUB(ipnet, ipnet_if_getdev, nomod_zero);
+ STUB(ipnet, ipnet_walk_if, nomod_zero);
+ END_MODULE(ipnet);
+#endif
+
+#ifndef IOMMULIB_MODULE
+ MODULE(iommulib,misc);
+ STUB(iommulib, iommulib_nex_close, nomod_void);
+ END_MODULE(iommulib);
+#endif
+
+/*
+ * Stubs for rootnex nexus driver.
+ */
+#ifndef ROOTNEX_MODULE
+ MODULE(rootnex,drv);
+ STUB(rootnex, immu_init, 0);
+ STUB(rootnex, immu_startup, 0);
+ STUB(rootnex, immu_physmem_update, 0);
+ END_MODULE(rootnex);
+#endif
+
+/*
+ * Stubs for kernel socket, for iscsi
+ */
+#ifndef KSOCKET_MODULE
+ MODULE(ksocket, misc);
+ NO_UNLOAD_STUB(ksocket, ksocket_setsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockopt, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getpeername, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_getsockname, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_socket, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_bind, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_listen, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_accept, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_connect, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recv, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvfrom, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_recvmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_send, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendto, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_sendmsg, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_ioctl, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_setcallbacks, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_hold, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_rele, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_shutdown, nomod_minus_one);
+ NO_UNLOAD_STUB(ksocket, ksocket_close, nomod_minus_one);
+ END_MODULE(ksocket);
+#endif
+
+/*
+ * Stubs for elfexec
+ */
+#ifndef ELFEXEC_MODULE
+ MODULE(elfexec,exec);
+ STUB(elfexec, elfexec, nomod_einval);
+ STUB(elfexec, mapexec_brand, nomod_einval);
+ STUB(elfexec, elf32exec, nomod_einval);
+ STUB(elfexec, mapexec32_brand, nomod_einval);
+ END_MODULE(elfexec);
+#endif
+
+/*
+ * Stub(s) for APIX module.
+ */
+#ifndef APIX_MODULE
+ MODULE(apix,mach);
+ WSTUB(apix, apix_loaded, nomod_zero);
+ END_MODULE(apix);
+#endif
+
+/*
+ * Stubs for ppt module (bhyve PCI passthrough driver)
+ */
+#ifndef PPT_MODULE
+ MODULE(ppt,drv);
+ WSTUB(ppt, ppt_unassign_all, nomod_zero);
+ WSTUB(ppt, ppt_map_mmio, nomod_einval);
+ WSTUB(ppt, ppt_unmap_mmio, nomod_einval);
+ WSTUB(ppt, ppt_setup_msi, nomod_einval);
+ WSTUB(ppt, ppt_setup_msix, nomod_einval);
+ WSTUB(ppt, ppt_disable_msix, nomod_einval);
+ WSTUB(ppt, ppt_assigned_devices, nomod_zero);
+ WSTUB(ppt, ppt_is_mmio, nomod_zero);
+ WSTUB(ppt, ppt_assign_device, nomod_einval);
+ WSTUB(ppt, ppt_unassign_device, nomod_einval);
+ WSTUB(ppt, ppt_get_limits, nomod_einval);
+ END_MODULE(ppt);
+#endif
+
+/*
+ * this is just a marker for the area of text that contains stubs
+ */
+ ENTRY_NP(stubs_end)
+ nop
+
diff --git a/usr/src/uts/intel/ml/ovbcopy.s b/usr/src/uts/intel/ml/ovbcopy.s
new file mode 100644
index 0000000000..0687e67e4b
--- /dev/null
+++ b/usr/src/uts/intel/ml/ovbcopy.s
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*-
+ * Copyright (c) 1993 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/amd64/amd64/support.S,v 1.102 2003/10/02 05:08:13 alc Exp $
+ */
+
+#include <sys/asm_linkage.h>
+
+/*
+ * Adapted from fbsd bcopy().
+ *
+ * bcopy(src, dst, cnt)
+ * rdi, rsi, rdx
+ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+
+ ENTRY(ovbcopy)
+ xchgq %rsi,%rdi
+ movq %rdx,%rcx
+
+ movq %rdi,%rax
+ subq %rsi,%rax
+ cmpq %rcx,%rax /* overlapping && src < dst? */
+ jb reverse
+
+ shrq $3,%rcx /* copy by 64-bit words */
+ cld /* nope, copy forwards */
+ rep
+ movsq
+ movq %rdx,%rcx
+ andq $7,%rcx /* any bytes left? */
+ rep
+ movsb
+ ret
+
+reverse:
+ addq %rcx,%rdi /* copy backwards */
+ addq %rcx,%rsi
+ decq %rdi
+ decq %rsi
+ andq $7,%rcx /* any fractional bytes? */
+ std
+ rep
+ movsb
+ movq %rdx,%rcx /* copy remainder by 32-bit words */
+ shrq $3,%rcx
+ subq $7,%rsi
+ subq $7,%rdi
+ rep
+ movsq
+ cld
+ ret
+ SET_SIZE(ovbcopy)
+
diff --git a/usr/src/uts/intel/ml/retpoline.s b/usr/src/uts/intel/ml/retpoline.s
new file mode 100644
index 0000000000..a68d9504c1
--- /dev/null
+++ b/usr/src/uts/intel/ml/retpoline.s
@@ -0,0 +1,211 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+ .file "retpoline.s"
+
+/*
+ * This file implements the various hooks that are needed for retpolines and
+ * return stack buffer (RSB) stuffing. For more information, please see the
+ * 'Speculative Execution CPU Side Channel Security' section of the
+ * uts/i86pc/os/cpuid.c big theory statement.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/x86_archext.h>
+
+#if defined(__amd64)
+
+/*
+ * This macro generates the default retpoline entry point that the compiler
+ * expects. It implements the expected retpoline form.
+ */
+#define RETPOLINE_MKTHUNK(reg) \
+ ENTRY(__x86_indirect_thunk_/**/reg) \
+ call 2f; \
+1: \
+ pause; \
+ lfence; \
+ jmp 1b; \
+2: \
+ movq %/**/reg, (%rsp); \
+ ret; \
+ SET_SIZE(__x86_indirect_thunk_/**/reg)
+
+/*
+ * This macro generates the default retpoline form. It exists in addition to the
+ * thunk so if we need to restore the default retpoline behavior to the thunk
+ * we can.
+ */
+#define RETPOLINE_MKGENERIC(reg) \
+ ENTRY(__x86_indirect_thunk_gen_/**/reg) \
+ call 2f; \
+1: \
+ pause; \
+ lfence; \
+ jmp 1b; \
+2: \
+ movq %/**/reg, (%rsp); \
+ ret; \
+ SET_SIZE(__x86_indirect_thunk_gen_/**/reg)
+
+/*
+ * This macro generates the AMD optimized form of a retpoline which will be used
+ * on systems where the lfence dispatch serializing behavior has been changed.
+ */
+#define RETPOLINE_MKLFENCE(reg) \
+ ENTRY(__x86_indirect_thunk_amd_/**/reg) \
+ lfence; \
+ jmp *%/**/reg; \
+ SET_SIZE(__x86_indirect_thunk_amd_/**/reg)
+
+
+/*
+ * This macro generates the no-op form of the retpoline which will be used if we
+ * either need to disable retpolines because we have enhanced IBRS or because we
+ * have been asked to disable mitigations.
+ */
+#define RETPOLINE_MKJUMP(reg) \
+ ENTRY(__x86_indirect_thunk_jmp_/**/reg) \
+ jmp *%/**/reg; \
+ SET_SIZE(__x86_indirect_thunk_jmp_/**/reg)
+
+ RETPOLINE_MKTHUNK(rax)
+ RETPOLINE_MKTHUNK(rbx)
+ RETPOLINE_MKTHUNK(rcx)
+ RETPOLINE_MKTHUNK(rdx)
+ RETPOLINE_MKTHUNK(rdi)
+ RETPOLINE_MKTHUNK(rsi)
+ RETPOLINE_MKTHUNK(rbp)
+ RETPOLINE_MKTHUNK(r8)
+ RETPOLINE_MKTHUNK(r9)
+ RETPOLINE_MKTHUNK(r10)
+ RETPOLINE_MKTHUNK(r11)
+ RETPOLINE_MKTHUNK(r12)
+ RETPOLINE_MKTHUNK(r13)
+ RETPOLINE_MKTHUNK(r14)
+ RETPOLINE_MKTHUNK(r15)
+
+ RETPOLINE_MKGENERIC(rax)
+ RETPOLINE_MKGENERIC(rbx)
+ RETPOLINE_MKGENERIC(rcx)
+ RETPOLINE_MKGENERIC(rdx)
+ RETPOLINE_MKGENERIC(rdi)
+ RETPOLINE_MKGENERIC(rsi)
+ RETPOLINE_MKGENERIC(rbp)
+ RETPOLINE_MKGENERIC(r8)
+ RETPOLINE_MKGENERIC(r9)
+ RETPOLINE_MKGENERIC(r10)
+ RETPOLINE_MKGENERIC(r11)
+ RETPOLINE_MKGENERIC(r12)
+ RETPOLINE_MKGENERIC(r13)
+ RETPOLINE_MKGENERIC(r14)
+ RETPOLINE_MKGENERIC(r15)
+
+ RETPOLINE_MKLFENCE(rax)
+ RETPOLINE_MKLFENCE(rbx)
+ RETPOLINE_MKLFENCE(rcx)
+ RETPOLINE_MKLFENCE(rdx)
+ RETPOLINE_MKLFENCE(rdi)
+ RETPOLINE_MKLFENCE(rsi)
+ RETPOLINE_MKLFENCE(rbp)
+ RETPOLINE_MKLFENCE(r8)
+ RETPOLINE_MKLFENCE(r9)
+ RETPOLINE_MKLFENCE(r10)
+ RETPOLINE_MKLFENCE(r11)
+ RETPOLINE_MKLFENCE(r12)
+ RETPOLINE_MKLFENCE(r13)
+ RETPOLINE_MKLFENCE(r14)
+ RETPOLINE_MKLFENCE(r15)
+
+ RETPOLINE_MKJUMP(rax)
+ RETPOLINE_MKJUMP(rbx)
+ RETPOLINE_MKJUMP(rcx)
+ RETPOLINE_MKJUMP(rdx)
+ RETPOLINE_MKJUMP(rdi)
+ RETPOLINE_MKJUMP(rsi)
+ RETPOLINE_MKJUMP(rbp)
+ RETPOLINE_MKJUMP(r8)
+ RETPOLINE_MKJUMP(r9)
+ RETPOLINE_MKJUMP(r10)
+ RETPOLINE_MKJUMP(r11)
+ RETPOLINE_MKJUMP(r12)
+ RETPOLINE_MKJUMP(r13)
+ RETPOLINE_MKJUMP(r14)
+ RETPOLINE_MKJUMP(r15)
+
+ /*
+ * The x86_rsb_stuff function is called from pretty arbitrary
+ * contexts. It's much easier for us to save and restore all the
+ * registers we touch rather than clobber them for callers. You must
+ * preserve this property or the system will panic at best.
+ */
+ ENTRY(x86_rsb_stuff)
+ /*
+ * These nops are present so we can patch a ret instruction if we need
+ * to disable RSB stuffing because enhanced IBRS is present or we're
+ * disabling mitigations.
+ */
+ nop
+ nop
+ pushq %rdi
+ pushq %rax
+ movl $16, %edi
+ movq %rsp, %rax
+rsb_loop:
+ call 2f
+1:
+ pause
+ call 1b
+2:
+ call 2f
+1:
+ pause
+ call 1b
+2:
+ subl $1, %edi
+ jnz rsb_loop
+ movq %rax, %rsp
+ popq %rax
+ popq %rdi
+ ret
+ SET_SIZE(x86_rsb_stuff)
+
+#elif defined(__i386)
+
+/*
+ * While the kernel is 64-bit only, dboot is still 32-bit, so there are a
+ * limited number of variants that are used for 32-bit. However as dboot is
+ * short lived and uses them sparingly, we only do the full variant and do not
+ * have an AMD specific version.
+ */
+
+#define RETPOLINE_MKTHUNK(reg) \
+ ENTRY(__x86_indirect_thunk_/**/reg) \
+ call 2f; \
+1: \
+ pause; \
+ lfence; \
+ jmp 1b; \
+2: \
+ movl %/**/reg, (%esp); \
+ ret; \
+ SET_SIZE(__x86_indirect_thunk_/**/reg)
+
+ RETPOLINE_MKTHUNK(edi)
+ RETPOLINE_MKTHUNK(eax)
+
+#else
+#error "Your architecture is in another castle."
+#endif
diff --git a/usr/src/uts/intel/ml/sseblk.s b/usr/src/uts/intel/ml/sseblk.s
new file mode 100644
index 0000000000..836b6b6c97
--- /dev/null
+++ b/usr/src/uts/intel/ml/sseblk.s
@@ -0,0 +1,280 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+
+#include "assym.h"
+
+/*
+ * Do block operations using Streaming SIMD extensions
+ */
+
+#if defined(DEBUG)
+#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \
+ movq %gs:CPU_THREAD, t; \
+ movsbl T_PREEMPT(t), r32; \
+ testl r32, r32; \
+ jne 5f; \
+ pushq %rbp; \
+ movq %rsp, %rbp; \
+ leaq msg(%rip), %rdi; \
+ xorl %eax, %eax; \
+ call panic; \
+5:
+#else /* DEBUG */
+#define ASSERT_KPREEMPT_DISABLED(t, r32, msg)
+#endif /* DEBUG */
+
+#define BLOCKSHIFT 6
+#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */
+#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */
+
+#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1)
+#error "mucked up constants"
+#endif
+
+#define SAVE_XMM0(r) \
+ SAVE_XMM_PROLOG(r, 1); \
+ movdqa %xmm0, (r)
+
+#define ZERO_LOOP_INIT_XMM(dst) \
+ pxor %xmm0, %xmm0
+
+#define ZERO_LOOP_BODY_XMM(dst, cnt) \
+ movntdq %xmm0, (dst); \
+ movntdq %xmm0, 0x10(dst); \
+ movntdq %xmm0, 0x20(dst); \
+ movntdq %xmm0, 0x30(dst); \
+ addq $BLOCKSIZE, dst; \
+ subq $1, cnt
+
+#define ZERO_LOOP_FINI_XMM(dst) \
+ mfence
+
+#define RSTOR_XMM0(r) \
+ movdqa 0x0(r), %xmm0; \
+ RSTOR_XMM_EPILOG(r, 1)
+
+ /*
+ * %rdi dst
+ * %rsi size
+ * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt)
+ * %r8 pointer to %xmm register save area
+ */
+ ENTRY(hwblkclr)
+ pushq %rbp
+ movq %rsp, %rbp
+ testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */
+ jne .dobzero
+ cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */
+ jl .dobzero
+ testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */
+ jne .dobzero
+ shrq $BLOCKSHIFT, %rsi
+
+ ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled)
+ movq %cr0, %rax
+ clts
+ testl $CR0_TS, %eax
+ jnz 1f
+
+ SAVE_XMM0(%r8)
+1: ZERO_LOOP_INIT_XMM(%rdi)
+9: ZERO_LOOP_BODY_XMM(%rdi, %rsi)
+ jnz 9b
+ ZERO_LOOP_FINI_XMM(%rdi)
+
+ testl $CR0_TS, %eax
+ jnz 2f
+ RSTOR_XMM0(%r8)
+2: movq %rax, %cr0
+ leave
+ ret
+.dobzero:
+ leave
+ jmp bzero
+ SET_SIZE(hwblkclr)
+
+
+#define PREFETCH_START(src) \
+ prefetchnta 0x0(src); \
+ prefetchnta 0x40(src)
+
+#define SAVE_XMMS(r) \
+ SAVE_XMM_PROLOG(r, 8); \
+ movdqa %xmm0, (r); \
+ movdqa %xmm1, 0x10(r); \
+ movdqa %xmm2, 0x20(r); \
+ movdqa %xmm3, 0x30(r); \
+ movdqa %xmm4, 0x40(r); \
+ movdqa %xmm5, 0x50(r); \
+ movdqa %xmm6, 0x60(r); \
+ movdqa %xmm7, 0x70(r)
+
+#define COPY_LOOP_INIT_XMM(src) \
+ prefetchnta 0x80(src); \
+ prefetchnta 0xc0(src); \
+ movdqa 0x0(src), %xmm0; \
+ movdqa 0x10(src), %xmm1; \
+ movdqa 0x20(src), %xmm2; \
+ movdqa 0x30(src), %xmm3; \
+ movdqa 0x40(src), %xmm4; \
+ movdqa 0x50(src), %xmm5; \
+ movdqa 0x60(src), %xmm6; \
+ movdqa 0x70(src), %xmm7; \
+ addq $0x80, src
+
+#define COPY_LOOP_BODY_XMM(src, dst, cnt) \
+ prefetchnta 0x80(src); \
+ prefetchnta 0xc0(src); \
+ prefetchnta 0x100(src); \
+ prefetchnta 0x140(src); \
+ movntdq %xmm0, (dst); \
+ movntdq %xmm1, 0x10(dst); \
+ movntdq %xmm2, 0x20(dst); \
+ movntdq %xmm3, 0x30(dst); \
+ movdqa 0x0(src), %xmm0; \
+ movdqa 0x10(src), %xmm1; \
+ movntdq %xmm4, 0x40(dst); \
+ movntdq %xmm5, 0x50(dst); \
+ movdqa 0x20(src), %xmm2; \
+ movdqa 0x30(src), %xmm3; \
+ movntdq %xmm6, 0x60(dst); \
+ movntdq %xmm7, 0x70(dst); \
+ movdqa 0x40(src), %xmm4; \
+ movdqa 0x50(src), %xmm5; \
+ addq $0x80, dst; \
+ movdqa 0x60(src), %xmm6; \
+ movdqa 0x70(src), %xmm7; \
+ addq $0x80, src; \
+ subl $1, cnt
+
+#define COPY_LOOP_FINI_XMM(dst) \
+ movntdq %xmm0, 0x0(dst); \
+ movntdq %xmm1, 0x10(dst); \
+ movntdq %xmm2, 0x20(dst); \
+ movntdq %xmm3, 0x30(dst); \
+ movntdq %xmm4, 0x40(dst); \
+ movntdq %xmm5, 0x50(dst); \
+ movntdq %xmm6, 0x60(dst); \
+ movntdq %xmm7, 0x70(dst)
+
+#define RSTOR_XMMS(r) \
+ movdqa 0x0(r), %xmm0; \
+ movdqa 0x10(r), %xmm1; \
+ movdqa 0x20(r), %xmm2; \
+ movdqa 0x30(r), %xmm3; \
+ movdqa 0x40(r), %xmm4; \
+ movdqa 0x50(r), %xmm5; \
+ movdqa 0x60(r), %xmm6; \
+ movdqa 0x70(r), %xmm7; \
+ RSTOR_XMM_EPILOG(r, 8)
+
+ /*
+ * %rdi src
+ * %rsi dst
+ * %rdx #if DEBUG then curthread
+ * %ecx loop count
+ * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt)
+ * %r8 pointer to %xmm register save area
+ */
+ ENTRY(hwblkpagecopy)
+ pushq %rbp
+ movq %rsp, %rbp
+ PREFETCH_START(%rdi)
+ /*
+ * PAGESIZE is 4096, each loop moves 128 bytes, but the initial
+ * load and final store save us on loop count
+ */
+ movl $_CONST(32 - 1), %ecx
+ ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled)
+ movq %cr0, %rax
+ clts
+ testl $CR0_TS, %eax
+ jnz 3f
+ SAVE_XMMS(%r8)
+3: COPY_LOOP_INIT_XMM(%rdi)
+4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx)
+ jnz 4b
+ COPY_LOOP_FINI_XMM(%rsi)
+ testl $CR0_TS, %eax
+ jnz 5f
+ RSTOR_XMMS(%r8)
+5: movq %rax, %cr0
+ mfence
+ leave
+ ret
+ SET_SIZE(hwblkpagecopy)
+
+ ENTRY(block_zero_no_xmm)
+ pushq %rbp
+ movq %rsp, %rbp
+ xorl %eax, %eax
+ addq %rsi, %rdi
+ negq %rsi
+1:
+ movnti %rax, (%rdi, %rsi)
+ movnti %rax, 8(%rdi, %rsi)
+ movnti %rax, 16(%rdi, %rsi)
+ movnti %rax, 24(%rdi, %rsi)
+ addq $32, %rsi
+ jnz 1b
+ mfence
+ leave
+ ret
+ SET_SIZE(block_zero_no_xmm)
+
+
+ ENTRY(page_copy_no_xmm)
+ movq $MMU_STD_PAGESIZE, %rcx
+ addq %rcx, %rdi
+ addq %rcx, %rsi
+ negq %rcx
+1:
+ movq (%rsi, %rcx), %rax
+ movnti %rax, (%rdi, %rcx)
+ movq 8(%rsi, %rcx), %rax
+ movnti %rax, 8(%rdi, %rcx)
+ movq 16(%rsi, %rcx), %rax
+ movnti %rax, 16(%rdi, %rcx)
+ movq 24(%rsi, %rcx), %rax
+ movnti %rax, 24(%rdi, %rcx)
+ addq $32, %rcx
+ jnz 1b
+ mfence
+ ret
+ SET_SIZE(page_copy_no_xmm)
+
+#if defined(DEBUG)
+ .text
+.not_disabled:
+ .string "sseblk: preemption not disabled!"
+#endif
diff --git a/usr/src/uts/intel/ml/swtch.s b/usr/src/uts/intel/ml/swtch.s
new file mode 100644
index 0000000000..c6c606b11e
--- /dev/null
+++ b/usr/src/uts/intel/ml/swtch.s
@@ -0,0 +1,509 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+/*
+ * Process switching routines.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/stack.h>
+#include <sys/segments.h>
+#include <sys/psw.h>
+
+#include "assym.h"
+
+/*
+ * resume(thread_id_t t);
+ *
+ * a thread can only run on one processor at a time. there
+ * exists a window on MPs where the current thread on one
+ * processor is capable of being dispatched by another processor.
+ * some overlap between outgoing and incoming threads can happen
+ * when they are the same thread. in this case where the threads
+ * are the same, resume() on one processor will spin on the incoming
+ * thread until resume() on the other processor has finished with
+ * the outgoing thread.
+ *
+ * The MMU context changes when the resuming thread resides in a different
+ * process. Kernel threads are known by resume to reside in process 0.
+ * The MMU context, therefore, only changes when resuming a thread in
+ * a process different from curproc.
+ *
+ * resume_from_intr() is called when the thread being resumed was not
+ * passivated by resume (e.g. was interrupted). This means that the
+ * resume lock is already held and that a restore context is not needed.
+ * Also, the MMU context is not changed on the resume in this case.
+ *
+ * resume_from_zombie() is the same as resume except the calling thread
+ * is a zombie and must be put on the deathrow list after the CPU is
+ * off the stack.
+ */
+
+#if LWP_PCB_FPU != 0
+#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work
+#endif /* LWP_PCB_FPU != 0 */
+
+/*
+ * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
+ *
+ * The stack frame must be created before the save of %rsp so that tracebacks
+ * of swtch()ed-out processes show the process as having last called swtch().
+ */
+#define SAVE_REGS(thread_t, retaddr) \
+ movq %rbp, T_RBP(thread_t); \
+ movq %rbx, T_RBX(thread_t); \
+ movq %r12, T_R12(thread_t); \
+ movq %r13, T_R13(thread_t); \
+ movq %r14, T_R14(thread_t); \
+ movq %r15, T_R15(thread_t); \
+ pushq %rbp; \
+ movq %rsp, %rbp; \
+ movq %rsp, T_SP(thread_t); \
+ movq retaddr, T_PC(thread_t); \
+ movq %rdi, %r12; \
+ call __dtrace_probe___sched_off__cpu
+
+/*
+ * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15)
+ *
+ * We load up %rsp from the label_t as part of the context switch, so
+ * we don't repeat that here.
+ *
+ * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t
+ * already has the effect of putting the stack back the way it was when
+ * we came in.
+ */
+#define RESTORE_REGS(scratch_reg) \
+ movq %gs:CPU_THREAD, scratch_reg; \
+ movq T_RBP(scratch_reg), %rbp; \
+ movq T_RBX(scratch_reg), %rbx; \
+ movq T_R12(scratch_reg), %r12; \
+ movq T_R13(scratch_reg), %r13; \
+ movq T_R14(scratch_reg), %r14; \
+ movq T_R15(scratch_reg), %r15
+
+/*
+ * Get pointer to a thread's hat structure
+ */
+#define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \
+ movq T_PROCP(thread_t), hatp; \
+ movq P_AS(hatp), scratch_reg; \
+ movq A_HAT(scratch_reg), hatp
+
+#define TSC_READ() \
+ call tsc_read; \
+ movq %rax, %r14;
+
+/*
+ * If we are resuming an interrupt thread, store a timestamp in the thread
+ * structure. If an interrupt occurs between tsc_read() and its subsequent
+ * store, the timestamp will be stale by the time it is stored. We can detect
+ * this by doing a compare-and-swap on the thread's timestamp, since any
+ * interrupt occurring in this window will put a new timestamp in the thread's
+ * t_intr_start field.
+ */
+#define STORE_INTR_START(thread_t) \
+ testw $T_INTR_THREAD, T_FLAGS(thread_t); \
+ jz 1f; \
+0: \
+ TSC_READ(); \
+ movq T_INTR_START(thread_t), %rax; \
+ cmpxchgq %r14, T_INTR_START(thread_t); \
+ jnz 0b; \
+1:
+
+ .global kpti_enable
+
+ ENTRY(resume)
+ movq %gs:CPU_THREAD, %rax
+ leaq resume_return(%rip), %r11
+
+ /*
+ * Deal with SMAP here. A thread may be switched out at any point while
+ * it is executing. The thread could be under on_fault() or it could be
+ * pre-empted while performing a copy interruption. If this happens and
+ * we're not in the context of an interrupt which happens to handle
+ * saving and restoring rflags correctly, we may lose our SMAP related
+ * state.
+ *
+ * To handle this, as part of being switched out, we first save whether
+ * or not userland access is allowed ($PS_ACHK in rflags) and store that
+ * in t_useracc on the kthread_t and unconditionally enable SMAP to
+ * protect the system.
+ *
+ * Later, when the thread finishes resuming, we potentially disable smap
+ * if PS_ACHK was present in rflags. See uts/intel/ml/copy.s for
+ * more information on rflags and SMAP.
+ */
+ pushfq
+ popq %rsi
+ andq $PS_ACHK, %rsi
+ movq %rsi, T_USERACC(%rax)
+ call smap_enable
+
+ /*
+ * Take a moment to potentially clear the RSB buffer. This is done to
+ * prevent various Spectre variant 2 and SpectreRSB attacks. This may
+ * not be sufficient. Please see uts/intel/ml/retpoline.s for more
+ * information about this.
+ */
+ call x86_rsb_stuff
+
+ /*
+ * Save non-volatile registers, and set return address for current
+ * thread to resume_return.
+ *
+ * %r12 = t (new thread) when done
+ */
+ SAVE_REGS(%rax, %r11)
+
+
+ LOADCPU(%r15) /* %r15 = CPU */
+ movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */
+
+ /*
+ * Call savectx if thread has installed context ops.
+ *
+ * Note that if we have floating point context, the save op
+ * (either fpsave_begin or fpxsave_begin) will issue the
+ * async save instruction (fnsave or fxsave respectively)
+ * that we fwait for below.
+ */
+ cmpq $0, T_CTX(%r13) /* should current thread savectx? */
+ je .nosavectx /* skip call when zero */
+
+ movq %r13, %rdi /* arg = thread pointer */
+ call savectx /* call ctx ops */
+.nosavectx:
+
+ /*
+ * Check that the curthread is not using the FPU while in the kernel.
+ */
+ call kernel_fpu_no_swtch
+
+ /*
+ * Call savepctx if process has installed context ops.
+ */
+ movq T_PROCP(%r13), %r14 /* %r14 = proc */
+ cmpq $0, P_PCTX(%r14) /* should current thread savepctx? */
+ je .nosavepctx /* skip call when zero */
+
+ movq %r14, %rdi /* arg = proc pointer */
+ call savepctx /* call ctx ops */
+.nosavepctx:
+
+ /*
+ * Temporarily switch to the idle thread's stack
+ */
+ movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */
+
+ /*
+ * Set the idle thread as the current thread
+ */
+ movq T_SP(%rax), %rsp /* It is safe to set rsp */
+ movq %rax, CPU_THREAD(%r15)
+
+ /*
+ * Switch in the hat context for the new thread
+ *
+ */
+ GET_THREAD_HATP(%rdi, %r12, %r11)
+ call hat_switch
+
+ /*
+ * Clear and unlock previous thread's t_lock
+ * to allow it to be dispatched by another processor.
+ */
+ movb $0, T_LOCK(%r13)
+
+ /*
+ * IMPORTANT: Registers at this point must be:
+ * %r12 = new thread
+ *
+ * Here we are in the idle thread, have dropped the old thread.
+ */
+ ALTENTRY(_resume_from_idle)
+ /*
+ * spin until dispatched thread's mutex has
+ * been unlocked. this mutex is unlocked when
+ * it becomes safe for the thread to run.
+ */
+.lock_thread_mutex:
+ lock
+ btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */
+ jnc .thread_mutex_locked /* got it */
+
+.spin_thread_mutex:
+ pause
+ cmpb $0, T_LOCK(%r12) /* check mutex status */
+ jz .lock_thread_mutex /* clear, retry lock */
+ jmp .spin_thread_mutex /* still locked, spin... */
+
+.thread_mutex_locked:
+ /*
+ * Fix CPU structure to indicate new running thread.
+ * Set pointer in new thread to the CPU structure.
+ */
+ LOADCPU(%r13) /* load current CPU pointer */
+ cmpq %r13, T_CPU(%r12)
+ je .setup_cpu
+
+ /* cp->cpu_stats.sys.cpumigrate++ */
+ incq CPU_STATS_SYS_CPUMIGRATE(%r13)
+ movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */
+
+.setup_cpu:
+ /*
+ * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
+ * structure. If this thread doesn't have a regs structure above
+ * the stack -- that is, if lwp_stk_init() was never called for the
+ * thread -- this will set rsp0 to the wrong value, but it's harmless
+ * as it's a kernel thread, and it won't actually attempt to implicitly
+ * use the rsp0 via a privilege change.
+ *
+ * Note that when we have KPTI enabled on amd64, we never use this
+ * value at all (since all the interrupts have an IST set).
+ */
+ movq CPU_TSS(%r13), %r14
+#if !defined(__xpv)
+ cmpq $1, kpti_enable
+ jne 1f
+ leaq CPU_KPTI_TR_RSP(%r13), %rax
+ jmp 2f
+1:
+ movq T_STACK(%r12), %rax
+ addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
+2:
+ movq %rax, TSS_RSP0(%r14)
+#else
+ movq T_STACK(%r12), %rax
+ addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */
+ movl $KDS_SEL, %edi
+ movq %rax, %rsi
+ call HYPERVISOR_stack_switch
+#endif /* __xpv */
+
+ movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */
+ mfence /* synchronize with mutex_exit() */
+ xorl %ebp, %ebp /* make $<threadlist behave better */
+ movq T_LWP(%r12), %rax /* set associated lwp to */
+ movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */
+
+ movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */
+ movq T_PC(%r12), %r13 /* saved return addr */
+
+ /*
+ * Call restorectx if context ops have been installed.
+ */
+ cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */
+ jz .norestorectx /* skip call when zero */
+ movq %r12, %rdi /* arg = thread pointer */
+ call restorectx /* call ctx ops */
+.norestorectx:
+
+ /*
+ * Call restorepctx if context ops have been installed for the proc.
+ */
+ movq T_PROCP(%r12), %rcx
+ cmpq $0, P_PCTX(%rcx)
+ jz .norestorepctx
+ movq %rcx, %rdi
+ call restorepctx
+.norestorepctx:
+
+ STORE_INTR_START(%r12)
+
+ /*
+ * If we came into swtch with the ability to access userland pages, go
+ * ahead and restore that fact by disabling SMAP. Clear the indicator
+ * flag out of paranoia.
+ */
+ movq T_USERACC(%r12), %rax /* should we disable smap? */
+ cmpq $0, %rax /* skip call when zero */
+ jz .nosmap
+ xorq %rax, %rax
+ movq %rax, T_USERACC(%r12)
+ call smap_disable
+.nosmap:
+
+ call smt_mark
+
+ /*
+ * Restore non-volatile registers, then have spl0 return to the
+ * resuming thread's PC after first setting the priority as low as
+ * possible and blocking all interrupt threads that may be active.
+ */
+ movq %r13, %rax /* save return address */
+ RESTORE_REGS(%r11)
+ pushq %rax /* push return address for spl0() */
+ call __dtrace_probe___sched_on__cpu
+ jmp spl0
+
+resume_return:
+ /*
+ * Remove stack frame created in SAVE_REGS()
+ */
+ addq $CLONGSIZE, %rsp
+ ret
+ SET_SIZE(_resume_from_idle)
+ SET_SIZE(resume)
+
+ ENTRY(resume_from_zombie)
+ movq %gs:CPU_THREAD, %rax
+ leaq resume_from_zombie_return(%rip), %r11
+
+ /*
+ * Save non-volatile registers, and set return address for current
+ * thread to resume_from_zombie_return.
+ *
+ * %r12 = t (new thread) when done
+ */
+ SAVE_REGS(%rax, %r11)
+
+ movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */
+
+ /* clean up the fp unit. It might be left enabled */
+
+#if defined(__xpv) /* XXPV XXtclayton */
+ /*
+ * Remove this after bringup.
+ * (Too many #gp's for an instrumented hypervisor.)
+ */
+ STTS(%rax)
+#else
+ movq %cr0, %rax
+ testq $CR0_TS, %rax
+ jnz .zfpu_disabled /* if TS already set, nothing to do */
+ fninit /* init fpu & discard pending error */
+ orq $CR0_TS, %rax
+ movq %rax, %cr0
+.zfpu_disabled:
+
+#endif /* __xpv */
+
+ /*
+ * Temporarily switch to the idle thread's stack so that the zombie
+ * thread's stack can be reclaimed by the reaper.
+ */
+ movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */
+ movq T_SP(%rax), %rsp /* get onto idle thread stack */
+
+ /*
+ * Sigh. If the idle thread has never run thread_start()
+ * then t_sp is mis-aligned by thread_load().
+ */
+ andq $_BITNOT(STACK_ALIGN-1), %rsp
+
+ /*
+ * Set the idle thread as the current thread.
+ */
+ movq %rax, %gs:CPU_THREAD
+
+ /* switch in the hat context for the new thread */
+ GET_THREAD_HATP(%rdi, %r12, %r11)
+ call hat_switch
+
+ /*
+ * Put the zombie on death-row.
+ */
+ movq %r13, %rdi
+ call reapq_add
+
+ jmp _resume_from_idle /* finish job of resume */
+
+resume_from_zombie_return:
+ RESTORE_REGS(%r11) /* restore non-volatile registers */
+ call __dtrace_probe___sched_on__cpu
+
+ /*
+ * Remove stack frame created in SAVE_REGS()
+ */
+ addq $CLONGSIZE, %rsp
+ ret
+ SET_SIZE(resume_from_zombie)
+
+ ENTRY(resume_from_intr)
+ movq %gs:CPU_THREAD, %rax
+ leaq resume_from_intr_return(%rip), %r11
+
+ /*
+ * Save non-volatile registers, and set return address for current
+ * thread to resume_from_intr_return.
+ *
+ * %r12 = t (new thread) when done
+ */
+ SAVE_REGS(%rax, %r11)
+
+ movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */
+ movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */
+ mfence /* synchronize with mutex_exit() */
+ movq T_SP(%r12), %rsp /* restore resuming thread's sp */
+ xorl %ebp, %ebp /* make $<threadlist behave better */
+
+ /*
+ * Unlock outgoing thread's mutex dispatched by another processor.
+ */
+ xorl %eax, %eax
+ xchgb %al, T_LOCK(%r13)
+
+ STORE_INTR_START(%r12)
+
+ call smt_mark
+
+ /*
+ * Restore non-volatile registers, then have spl0 return to the
+ * resuming thread's PC after first setting the priority as low as
+ * possible and blocking all interrupt threads that may be active.
+ */
+ movq T_PC(%r12), %rax /* saved return addr */
+ RESTORE_REGS(%r11);
+ pushq %rax /* push return address for spl0() */
+ call __dtrace_probe___sched_on__cpu
+ jmp spl0
+
+resume_from_intr_return:
+ /*
+ * Remove stack frame created in SAVE_REGS()
+ */
+ addq $CLONGSIZE, %rsp
+ ret
+ SET_SIZE(resume_from_intr)
+
+ ENTRY(thread_start)
+ popq %rax /* start() */
+ popq %rdi /* arg */
+ popq %rsi /* len */
+ movq %rsp, %rbp
+ INDIRECT_CALL_REG(rax)
+ call thread_exit /* destroy thread if it returns. */
+ /*NOTREACHED*/
+ SET_SIZE(thread_start)