diff options
author | Richard Lowe <richlowe@richlowe.net> | 2021-06-04 15:15:12 -0500 |
---|---|---|
committer | Richard Lowe <richlowe@richlowe.net> | 2021-08-16 12:46:39 -0500 |
commit | f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988 (patch) | |
tree | c4ac2f5e703ed459d50bcee7ddb38a993d961520 /usr/src/uts/intel/ml | |
parent | d083fed0c91296a88878f7a468910ad5b5c888ea (diff) | |
download | illumos-gate-f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988.tar.gz |
13941 intel code and headers should not look ia32 specific
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Patrick Mooney <pmooney@pfmooney.com>
Approved by: Garret D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/intel/ml')
-rw-r--r-- | usr/src/uts/intel/ml/copy.s | 1908 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/ddi_i86_asm.s | 522 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/desctbls_asm.s | 118 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/exception.s | 917 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/float.s | 347 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/hypersubr.s | 164 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/i86_subr.s | 1629 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/lock_prim.s | 714 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/modstubs.s | 1320 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/ovbcopy.s | 92 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/retpoline.s | 211 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/sseblk.s | 280 | ||||
-rw-r--r-- | usr/src/uts/intel/ml/swtch.s | 509 |
13 files changed, 8731 insertions, 0 deletions
diff --git a/usr/src/uts/intel/ml/copy.s b/usr/src/uts/intel/ml/copy.s new file mode 100644 index 0000000000..5e5f822518 --- /dev/null +++ b/usr/src/uts/intel/ml/copy.s @@ -0,0 +1,1908 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2009, Intel Corporation + * All rights reserved. + */ + +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ +/* All Rights Reserved */ + +/* Copyright (c) 1987, 1988 Microsoft Corporation */ +/* All Rights Reserved */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +#include <sys/errno.h> +#include <sys/asm_linkage.h> + +#include "assym.h" + +#define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ +#define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ +/* + * Non-temopral access (NTA) alignment requirement + */ +#define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ +#define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) +#define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ +#define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) + +/* + * With the introduction of Broadwell, Intel has introduced supervisor mode + * access protection -- SMAP. SMAP forces the kernel to set certain bits to + * enable access of user pages (AC in rflags, defines as PS_ACHK in + * <sys/psw.h>). One of the challenges is that the implementation of many of the + * userland copy routines directly use the kernel ones. For example, copyin and + * copyout simply go and jump to the do_copy_fault label and traditionally let + * those deal with the return for them. In fact, changing that is a can of frame + * pointers. + * + * Rules and Constraints: + * + * 1. For anything that's not in copy.s, we have it do explicit smap_disable() + * or smap_enable() calls. This is restricted to the following three places: + * DTrace, resume() in swtch.s and on_fault/no_fault. If you want to add it + * somewhere else, we should be thinking twice. + * + * 2. We try to toggle this at the smallest window possible. This means that if + * we take a fault, need to try to use a copyop in copyin() or copyout(), or any + * other function, we will always leave with SMAP enabled (the kernel cannot + * access user pages). + * + * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are + * explicitly only allowed to be called while in an on_fault()/no_fault() handler, + * which already takes care of ensuring that SMAP is enabled and disabled. Note + * this means that when under an on_fault()/no_fault() handler, one must not + * call the non-*_noerr() routines. + * + * 4. The first thing we should do after coming out of an lofault handler is to + * make sure that we call smap_enable() again to ensure that we are safely + * protected, as more often than not, we will have disabled smap to get there. + * + * 5. smap_enable() and smap_disable() don't exist: calls to these functions + * generate runtime relocations, that are then processed into the necessary + * clac/stac, via the krtld hotinlines mechanism and hotinline_smap(). + * + * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and + * SMAP_DISABLE_INSTR macro should be used. If the number of these is changed, + * you must update the constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below. + * + * 7. Generally this .s file is processed by a K&R style cpp. This means that it + * really has a lot of feelings about whitespace. In particular, if you have a + * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'. + * + * 8. In general, the kernel has its own value for rflags that gets used. This + * is maintained in a few different places which vary based on how the thread + * comes into existence and whether it's a user thread. In general, when the + * kernel takes a trap, it always will set ourselves to a known set of flags, + * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that + * PS_ACHK is cleared for us. In addition, when using the sysenter instruction, + * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for + * where that gets masked off. + */ + +/* + * The optimal 64-bit bcopy and kcopy for modern x86 processors uses + * "rep smovq" for large sizes. Performance data shows that many calls to + * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for + * these small sizes unrolled code is used. For medium sizes loops writing + * 64-bytes per loop are used. Transition points were determined experimentally. + */ +#define BZERO_USE_REP (1024) +#define BCOPY_DFLT_REP (128) +#define BCOPY_NHM_REP (768) + +/* + * Copy a block of storage, returning an error code if `from' or + * `to' takes a kernel pagefault which cannot be resolved. + * Returns errno value on pagefault error, 0 if all ok + */ + +/* + * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to + * additional call instructions. + */ +#define SMAP_DISABLE_COUNT 16 +#define SMAP_ENABLE_COUNT 26 + +#define SMAP_DISABLE_INSTR(ITER) \ + .globl _smap_disable_patch_/**/ITER; \ + _smap_disable_patch_/**/ITER/**/:; \ + nop; nop; nop; + +#define SMAP_ENABLE_INSTR(ITER) \ + .globl _smap_enable_patch_/**/ITER; \ + _smap_enable_patch_/**/ITER/**/:; \ + nop; nop; nop; + + .globl kernelbase + .globl postbootkernelbase + + ENTRY(kcopy) + pushq %rbp + movq %rsp, %rbp +#ifdef DEBUG + cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ + jb 0f + cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ + jnb 1f +0: leaq .kcopy_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _kcopy_copyerr(%rip), %rcx + movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ + +do_copy_fault: + movq T_LOFAULT(%r9), %r11 /* save the current lofault */ + movq %rcx, T_LOFAULT(%r9) /* new lofault */ + call bcopy_altentry + xorl %eax, %eax /* return 0 (success) */ + SMAP_ENABLE_INSTR(0) + + /* + * A fault during do_copy_fault is indicated through an errno value + * in %rax and we iretq from the trap handler to here. + */ +_kcopy_copyerr: + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ + leave + ret + SET_SIZE(kcopy) + +#undef ARG_FROM +#undef ARG_TO +#undef ARG_COUNT + +#define COPY_LOOP_INIT(src, dst, cnt) \ + addq cnt, src; \ + addq cnt, dst; \ + shrq $3, cnt; \ + neg cnt + + /* Copy 16 bytes per loop. Uses %rax and %r8 */ +#define COPY_LOOP_BODY(src, dst, cnt) \ + prefetchnta 0x100(src, cnt, 8); \ + movq (src, cnt, 8), %rax; \ + movq 0x8(src, cnt, 8), %r8; \ + movnti %rax, (dst, cnt, 8); \ + movnti %r8, 0x8(dst, cnt, 8); \ + addq $2, cnt + + ENTRY(kcopy_nta) + pushq %rbp + movq %rsp, %rbp +#ifdef DEBUG + cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ + jb 0f + cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ + jnb 1f +0: leaq .kcopy_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + + movq %gs:CPU_THREAD, %r9 + cmpq $0, %rcx /* No non-temporal access? */ + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ + jnz do_copy_fault /* use regular access */ + /* + * Make sure cnt is >= KCOPY_MIN_SIZE + */ + cmpq $KCOPY_MIN_SIZE, %rdx + jb do_copy_fault + + /* + * Make sure src and dst are NTA_ALIGN_SIZE aligned, + * count is COUNT_ALIGN_SIZE aligned. + */ + movq %rdi, %r10 + orq %rsi, %r10 + andq $NTA_ALIGN_MASK, %r10 + orq %rdx, %r10 + andq $COUNT_ALIGN_MASK, %r10 + jnz do_copy_fault + + ALTENTRY(do_copy_fault_nta) + movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ + movq T_LOFAULT(%r9), %r11 /* save the current lofault */ + movq %rcx, T_LOFAULT(%r9) /* new lofault */ + + /* + * COPY_LOOP_BODY uses %rax and %r8 + */ + COPY_LOOP_INIT(%rdi, %rsi, %rdx) +2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) + jnz 2b + + mfence + xorl %eax, %eax /* return 0 (success) */ + SMAP_ENABLE_INSTR(1) + +_kcopy_nta_copyerr: + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ + leave + ret + SET_SIZE(do_copy_fault_nta) + SET_SIZE(kcopy_nta) + + ENTRY(bcopy) +#ifdef DEBUG + orq %rdx, %rdx /* %rdx = count */ + jz 1f + cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ + jb 0f + cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ + jnb 1f +0: leaq .bcopy_panic_msg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +1: +#endif + /* + * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. + * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy + * uses these registers in future they must be saved and restored. + */ + ALTENTRY(bcopy_altentry) +do_copy: +#define L(s) .bcopy/**/s + cmpq $0x50, %rdx /* 80 */ + jae bcopy_ck_size + + /* + * Performance data shows many caller's copy small buffers. So for + * best perf for these sizes unrolled code is used. Store data without + * worrying about alignment. + */ + leaq L(fwdPxQx)(%rip), %r10 + addq %rdx, %rdi + addq %rdx, %rsi + movslq (%r10,%rdx,4), %rcx + leaq (%rcx,%r10,1), %r10 + INDIRECT_JMP_REG(r10) + + .p2align 4 +L(fwdPxQx): + .int L(P0Q0)-L(fwdPxQx) /* 0 */ + .int L(P1Q0)-L(fwdPxQx) + .int L(P2Q0)-L(fwdPxQx) + .int L(P3Q0)-L(fwdPxQx) + .int L(P4Q0)-L(fwdPxQx) + .int L(P5Q0)-L(fwdPxQx) + .int L(P6Q0)-L(fwdPxQx) + .int L(P7Q0)-L(fwdPxQx) + + .int L(P0Q1)-L(fwdPxQx) /* 8 */ + .int L(P1Q1)-L(fwdPxQx) + .int L(P2Q1)-L(fwdPxQx) + .int L(P3Q1)-L(fwdPxQx) + .int L(P4Q1)-L(fwdPxQx) + .int L(P5Q1)-L(fwdPxQx) + .int L(P6Q1)-L(fwdPxQx) + .int L(P7Q1)-L(fwdPxQx) + + .int L(P0Q2)-L(fwdPxQx) /* 16 */ + .int L(P1Q2)-L(fwdPxQx) + .int L(P2Q2)-L(fwdPxQx) + .int L(P3Q2)-L(fwdPxQx) + .int L(P4Q2)-L(fwdPxQx) + .int L(P5Q2)-L(fwdPxQx) + .int L(P6Q2)-L(fwdPxQx) + .int L(P7Q2)-L(fwdPxQx) + + .int L(P0Q3)-L(fwdPxQx) /* 24 */ + .int L(P1Q3)-L(fwdPxQx) + .int L(P2Q3)-L(fwdPxQx) + .int L(P3Q3)-L(fwdPxQx) + .int L(P4Q3)-L(fwdPxQx) + .int L(P5Q3)-L(fwdPxQx) + .int L(P6Q3)-L(fwdPxQx) + .int L(P7Q3)-L(fwdPxQx) + + .int L(P0Q4)-L(fwdPxQx) /* 32 */ + .int L(P1Q4)-L(fwdPxQx) + .int L(P2Q4)-L(fwdPxQx) + .int L(P3Q4)-L(fwdPxQx) + .int L(P4Q4)-L(fwdPxQx) + .int L(P5Q4)-L(fwdPxQx) + .int L(P6Q4)-L(fwdPxQx) + .int L(P7Q4)-L(fwdPxQx) + + .int L(P0Q5)-L(fwdPxQx) /* 40 */ + .int L(P1Q5)-L(fwdPxQx) + .int L(P2Q5)-L(fwdPxQx) + .int L(P3Q5)-L(fwdPxQx) + .int L(P4Q5)-L(fwdPxQx) + .int L(P5Q5)-L(fwdPxQx) + .int L(P6Q5)-L(fwdPxQx) + .int L(P7Q5)-L(fwdPxQx) + + .int L(P0Q6)-L(fwdPxQx) /* 48 */ + .int L(P1Q6)-L(fwdPxQx) + .int L(P2Q6)-L(fwdPxQx) + .int L(P3Q6)-L(fwdPxQx) + .int L(P4Q6)-L(fwdPxQx) + .int L(P5Q6)-L(fwdPxQx) + .int L(P6Q6)-L(fwdPxQx) + .int L(P7Q6)-L(fwdPxQx) + + .int L(P0Q7)-L(fwdPxQx) /* 56 */ + .int L(P1Q7)-L(fwdPxQx) + .int L(P2Q7)-L(fwdPxQx) + .int L(P3Q7)-L(fwdPxQx) + .int L(P4Q7)-L(fwdPxQx) + .int L(P5Q7)-L(fwdPxQx) + .int L(P6Q7)-L(fwdPxQx) + .int L(P7Q7)-L(fwdPxQx) + + .int L(P0Q8)-L(fwdPxQx) /* 64 */ + .int L(P1Q8)-L(fwdPxQx) + .int L(P2Q8)-L(fwdPxQx) + .int L(P3Q8)-L(fwdPxQx) + .int L(P4Q8)-L(fwdPxQx) + .int L(P5Q8)-L(fwdPxQx) + .int L(P6Q8)-L(fwdPxQx) + .int L(P7Q8)-L(fwdPxQx) + + .int L(P0Q9)-L(fwdPxQx) /* 72 */ + .int L(P1Q9)-L(fwdPxQx) + .int L(P2Q9)-L(fwdPxQx) + .int L(P3Q9)-L(fwdPxQx) + .int L(P4Q9)-L(fwdPxQx) + .int L(P5Q9)-L(fwdPxQx) + .int L(P6Q9)-L(fwdPxQx) + .int L(P7Q9)-L(fwdPxQx) /* 79 */ + + .p2align 4 +L(P0Q9): + mov -0x48(%rdi), %rcx + mov %rcx, -0x48(%rsi) +L(P0Q8): + mov -0x40(%rdi), %r10 + mov %r10, -0x40(%rsi) +L(P0Q7): + mov -0x38(%rdi), %r8 + mov %r8, -0x38(%rsi) +L(P0Q6): + mov -0x30(%rdi), %rcx + mov %rcx, -0x30(%rsi) +L(P0Q5): + mov -0x28(%rdi), %r10 + mov %r10, -0x28(%rsi) +L(P0Q4): + mov -0x20(%rdi), %r8 + mov %r8, -0x20(%rsi) +L(P0Q3): + mov -0x18(%rdi), %rcx + mov %rcx, -0x18(%rsi) +L(P0Q2): + mov -0x10(%rdi), %r10 + mov %r10, -0x10(%rsi) +L(P0Q1): + mov -0x8(%rdi), %r8 + mov %r8, -0x8(%rsi) +L(P0Q0): + ret + + .p2align 4 +L(P1Q9): + mov -0x49(%rdi), %r8 + mov %r8, -0x49(%rsi) +L(P1Q8): + mov -0x41(%rdi), %rcx + mov %rcx, -0x41(%rsi) +L(P1Q7): + mov -0x39(%rdi), %r10 + mov %r10, -0x39(%rsi) +L(P1Q6): + mov -0x31(%rdi), %r8 + mov %r8, -0x31(%rsi) +L(P1Q5): + mov -0x29(%rdi), %rcx + mov %rcx, -0x29(%rsi) +L(P1Q4): + mov -0x21(%rdi), %r10 + mov %r10, -0x21(%rsi) +L(P1Q3): + mov -0x19(%rdi), %r8 + mov %r8, -0x19(%rsi) +L(P1Q2): + mov -0x11(%rdi), %rcx + mov %rcx, -0x11(%rsi) +L(P1Q1): + mov -0x9(%rdi), %r10 + mov %r10, -0x9(%rsi) +L(P1Q0): + movzbq -0x1(%rdi), %r8 + mov %r8b, -0x1(%rsi) + ret + + .p2align 4 +L(P2Q9): + mov -0x4a(%rdi), %r8 + mov %r8, -0x4a(%rsi) +L(P2Q8): + mov -0x42(%rdi), %rcx + mov %rcx, -0x42(%rsi) +L(P2Q7): + mov -0x3a(%rdi), %r10 + mov %r10, -0x3a(%rsi) +L(P2Q6): + mov -0x32(%rdi), %r8 + mov %r8, -0x32(%rsi) +L(P2Q5): + mov -0x2a(%rdi), %rcx + mov %rcx, -0x2a(%rsi) +L(P2Q4): + mov -0x22(%rdi), %r10 + mov %r10, -0x22(%rsi) +L(P2Q3): + mov -0x1a(%rdi), %r8 + mov %r8, -0x1a(%rsi) +L(P2Q2): + mov -0x12(%rdi), %rcx + mov %rcx, -0x12(%rsi) +L(P2Q1): + mov -0xa(%rdi), %r10 + mov %r10, -0xa(%rsi) +L(P2Q0): + movzwq -0x2(%rdi), %r8 + mov %r8w, -0x2(%rsi) + ret + + .p2align 4 +L(P3Q9): + mov -0x4b(%rdi), %r8 + mov %r8, -0x4b(%rsi) +L(P3Q8): + mov -0x43(%rdi), %rcx + mov %rcx, -0x43(%rsi) +L(P3Q7): + mov -0x3b(%rdi), %r10 + mov %r10, -0x3b(%rsi) +L(P3Q6): + mov -0x33(%rdi), %r8 + mov %r8, -0x33(%rsi) +L(P3Q5): + mov -0x2b(%rdi), %rcx + mov %rcx, -0x2b(%rsi) +L(P3Q4): + mov -0x23(%rdi), %r10 + mov %r10, -0x23(%rsi) +L(P3Q3): + mov -0x1b(%rdi), %r8 + mov %r8, -0x1b(%rsi) +L(P3Q2): + mov -0x13(%rdi), %rcx + mov %rcx, -0x13(%rsi) +L(P3Q1): + mov -0xb(%rdi), %r10 + mov %r10, -0xb(%rsi) + /* + * These trailing loads/stores have to do all their loads 1st, + * then do the stores. + */ +L(P3Q0): + movzwq -0x3(%rdi), %r8 + movzbq -0x1(%rdi), %r10 + mov %r8w, -0x3(%rsi) + mov %r10b, -0x1(%rsi) + ret + + .p2align 4 +L(P4Q9): + mov -0x4c(%rdi), %r8 + mov %r8, -0x4c(%rsi) +L(P4Q8): + mov -0x44(%rdi), %rcx + mov %rcx, -0x44(%rsi) +L(P4Q7): + mov -0x3c(%rdi), %r10 + mov %r10, -0x3c(%rsi) +L(P4Q6): + mov -0x34(%rdi), %r8 + mov %r8, -0x34(%rsi) +L(P4Q5): + mov -0x2c(%rdi), %rcx + mov %rcx, -0x2c(%rsi) +L(P4Q4): + mov -0x24(%rdi), %r10 + mov %r10, -0x24(%rsi) +L(P4Q3): + mov -0x1c(%rdi), %r8 + mov %r8, -0x1c(%rsi) +L(P4Q2): + mov -0x14(%rdi), %rcx + mov %rcx, -0x14(%rsi) +L(P4Q1): + mov -0xc(%rdi), %r10 + mov %r10, -0xc(%rsi) +L(P4Q0): + mov -0x4(%rdi), %r8d + mov %r8d, -0x4(%rsi) + ret + + .p2align 4 +L(P5Q9): + mov -0x4d(%rdi), %r8 + mov %r8, -0x4d(%rsi) +L(P5Q8): + mov -0x45(%rdi), %rcx + mov %rcx, -0x45(%rsi) +L(P5Q7): + mov -0x3d(%rdi), %r10 + mov %r10, -0x3d(%rsi) +L(P5Q6): + mov -0x35(%rdi), %r8 + mov %r8, -0x35(%rsi) +L(P5Q5): + mov -0x2d(%rdi), %rcx + mov %rcx, -0x2d(%rsi) +L(P5Q4): + mov -0x25(%rdi), %r10 + mov %r10, -0x25(%rsi) +L(P5Q3): + mov -0x1d(%rdi), %r8 + mov %r8, -0x1d(%rsi) +L(P5Q2): + mov -0x15(%rdi), %rcx + mov %rcx, -0x15(%rsi) +L(P5Q1): + mov -0xd(%rdi), %r10 + mov %r10, -0xd(%rsi) +L(P5Q0): + mov -0x5(%rdi), %r8d + movzbq -0x1(%rdi), %r10 + mov %r8d, -0x5(%rsi) + mov %r10b, -0x1(%rsi) + ret + + .p2align 4 +L(P6Q9): + mov -0x4e(%rdi), %r8 + mov %r8, -0x4e(%rsi) +L(P6Q8): + mov -0x46(%rdi), %rcx + mov %rcx, -0x46(%rsi) +L(P6Q7): + mov -0x3e(%rdi), %r10 + mov %r10, -0x3e(%rsi) +L(P6Q6): + mov -0x36(%rdi), %r8 + mov %r8, -0x36(%rsi) +L(P6Q5): + mov -0x2e(%rdi), %rcx + mov %rcx, -0x2e(%rsi) +L(P6Q4): + mov -0x26(%rdi), %r10 + mov %r10, -0x26(%rsi) +L(P6Q3): + mov -0x1e(%rdi), %r8 + mov %r8, -0x1e(%rsi) +L(P6Q2): + mov -0x16(%rdi), %rcx + mov %rcx, -0x16(%rsi) +L(P6Q1): + mov -0xe(%rdi), %r10 + mov %r10, -0xe(%rsi) +L(P6Q0): + mov -0x6(%rdi), %r8d + movzwq -0x2(%rdi), %r10 + mov %r8d, -0x6(%rsi) + mov %r10w, -0x2(%rsi) + ret + + .p2align 4 +L(P7Q9): + mov -0x4f(%rdi), %r8 + mov %r8, -0x4f(%rsi) +L(P7Q8): + mov -0x47(%rdi), %rcx + mov %rcx, -0x47(%rsi) +L(P7Q7): + mov -0x3f(%rdi), %r10 + mov %r10, -0x3f(%rsi) +L(P7Q6): + mov -0x37(%rdi), %r8 + mov %r8, -0x37(%rsi) +L(P7Q5): + mov -0x2f(%rdi), %rcx + mov %rcx, -0x2f(%rsi) +L(P7Q4): + mov -0x27(%rdi), %r10 + mov %r10, -0x27(%rsi) +L(P7Q3): + mov -0x1f(%rdi), %r8 + mov %r8, -0x1f(%rsi) +L(P7Q2): + mov -0x17(%rdi), %rcx + mov %rcx, -0x17(%rsi) +L(P7Q1): + mov -0xf(%rdi), %r10 + mov %r10, -0xf(%rsi) +L(P7Q0): + mov -0x7(%rdi), %r8d + movzwq -0x3(%rdi), %r10 + movzbq -0x1(%rdi), %rcx + mov %r8d, -0x7(%rsi) + mov %r10w, -0x3(%rsi) + mov %cl, -0x1(%rsi) + ret + + /* + * For large sizes rep smovq is fastest. + * Transition point determined experimentally as measured on + * Intel Xeon processors (incl. Nehalem and previous generations) and + * AMD Opteron. The transition value is patched at boot time to avoid + * memory reference hit. + */ + .globl bcopy_patch_start +bcopy_patch_start: + cmpq $BCOPY_NHM_REP, %rdx + .globl bcopy_patch_end +bcopy_patch_end: + + .p2align 4 + ALTENTRY(bcopy_ck_size) + + cmpq $BCOPY_DFLT_REP, %rdx + jae L(use_rep) + + /* + * Align to a 8-byte boundary. Avoids penalties from unaligned stores + * as well as from stores spanning cachelines. + */ + test $0x7, %rsi + jz L(aligned_loop) + test $0x1, %rsi + jz 2f + movzbq (%rdi), %r8 + dec %rdx + inc %rdi + mov %r8b, (%rsi) + inc %rsi +2: + test $0x2, %rsi + jz 4f + movzwq (%rdi), %r8 + sub $0x2, %rdx + add $0x2, %rdi + mov %r8w, (%rsi) + add $0x2, %rsi +4: + test $0x4, %rsi + jz L(aligned_loop) + mov (%rdi), %r8d + sub $0x4, %rdx + add $0x4, %rdi + mov %r8d, (%rsi) + add $0x4, %rsi + + /* + * Copy 64-bytes per loop + */ + .p2align 4 +L(aligned_loop): + mov (%rdi), %r8 + mov 0x8(%rdi), %r10 + lea -0x40(%rdx), %rdx + mov %r8, (%rsi) + mov %r10, 0x8(%rsi) + mov 0x10(%rdi), %rcx + mov 0x18(%rdi), %r8 + mov %rcx, 0x10(%rsi) + mov %r8, 0x18(%rsi) + + cmp $0x40, %rdx + mov 0x20(%rdi), %r10 + mov 0x28(%rdi), %rcx + mov %r10, 0x20(%rsi) + mov %rcx, 0x28(%rsi) + mov 0x30(%rdi), %r8 + mov 0x38(%rdi), %r10 + lea 0x40(%rdi), %rdi + mov %r8, 0x30(%rsi) + mov %r10, 0x38(%rsi) + lea 0x40(%rsi), %rsi + jae L(aligned_loop) + + /* + * Copy remaining bytes (0-63) + */ +L(do_remainder): + leaq L(fwdPxQx)(%rip), %r10 + addq %rdx, %rdi + addq %rdx, %rsi + movslq (%r10,%rdx,4), %rcx + leaq (%rcx,%r10,1), %r10 + INDIRECT_JMP_REG(r10) + + /* + * Use rep smovq. Clear remainder via unrolled code + */ + .p2align 4 +L(use_rep): + xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ + movq %rdx, %rcx /* %rcx = count */ + shrq $3, %rcx /* 8-byte word count */ + rep + smovq + + xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ + andq $7, %rdx /* remainder */ + jnz L(do_remainder) + ret +#undef L + SET_SIZE(bcopy_ck_size) + +#ifdef DEBUG + /* + * Setup frame on the run-time stack. The end of the input argument + * area must be aligned on a 16 byte boundary. The stack pointer %rsp, + * always points to the end of the latest allocated stack frame. + * panic(const char *format, ...) is a varargs function. When a + * function taking variable arguments is called, %rax must be set + * to eight times the number of floating point parameters passed + * to the function in SSE registers. + */ +call_panic: + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + xorl %eax, %eax /* no variable arguments */ + call panic /* %rdi = format string */ +#endif + SET_SIZE(bcopy_altentry) + SET_SIZE(bcopy) + + +/* + * Zero a block of storage, returning an error code if we + * take a kernel pagefault which cannot be resolved. + * Returns errno value on pagefault error, 0 if all ok + */ + + ENTRY(kzero) +#ifdef DEBUG + cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ + jnb 0f + leaq .kzero_panic_msg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +0: +#endif + /* + * pass lofault value as 3rd argument for fault return + */ + leaq _kzeroerr(%rip), %rdx + + movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ + movq T_LOFAULT(%r9), %r11 /* save the current lofault */ + movq %rdx, T_LOFAULT(%r9) /* new lofault */ + call bzero_altentry + xorl %eax, %eax + movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ + ret + /* + * A fault during bzero is indicated through an errno value + * in %rax when we iretq to here. + */ +_kzeroerr: + addq $8, %rsp /* pop bzero_altentry call ret addr */ + movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ + ret + SET_SIZE(kzero) + +/* + * Zero a block of storage. + */ + + ENTRY(bzero) +#ifdef DEBUG + cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ + jnb 0f + leaq .bzero_panic_msg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +0: +#endif + ALTENTRY(bzero_altentry) +do_zero: +#define L(s) .bzero/**/s + xorl %eax, %eax + + cmpq $0x50, %rsi /* 80 */ + jae L(ck_align) + + /* + * Performance data shows many caller's are zeroing small buffers. So + * for best perf for these sizes unrolled code is used. Store zeros + * without worrying about alignment. + */ + leaq L(setPxQx)(%rip), %r10 + addq %rsi, %rdi + movslq (%r10,%rsi,4), %rcx + leaq (%rcx,%r10,1), %r10 + INDIRECT_JMP_REG(r10) + + .p2align 4 +L(setPxQx): + .int L(P0Q0)-L(setPxQx) /* 0 */ + .int L(P1Q0)-L(setPxQx) + .int L(P2Q0)-L(setPxQx) + .int L(P3Q0)-L(setPxQx) + .int L(P4Q0)-L(setPxQx) + .int L(P5Q0)-L(setPxQx) + .int L(P6Q0)-L(setPxQx) + .int L(P7Q0)-L(setPxQx) + + .int L(P0Q1)-L(setPxQx) /* 8 */ + .int L(P1Q1)-L(setPxQx) + .int L(P2Q1)-L(setPxQx) + .int L(P3Q1)-L(setPxQx) + .int L(P4Q1)-L(setPxQx) + .int L(P5Q1)-L(setPxQx) + .int L(P6Q1)-L(setPxQx) + .int L(P7Q1)-L(setPxQx) + + .int L(P0Q2)-L(setPxQx) /* 16 */ + .int L(P1Q2)-L(setPxQx) + .int L(P2Q2)-L(setPxQx) + .int L(P3Q2)-L(setPxQx) + .int L(P4Q2)-L(setPxQx) + .int L(P5Q2)-L(setPxQx) + .int L(P6Q2)-L(setPxQx) + .int L(P7Q2)-L(setPxQx) + + .int L(P0Q3)-L(setPxQx) /* 24 */ + .int L(P1Q3)-L(setPxQx) + .int L(P2Q3)-L(setPxQx) + .int L(P3Q3)-L(setPxQx) + .int L(P4Q3)-L(setPxQx) + .int L(P5Q3)-L(setPxQx) + .int L(P6Q3)-L(setPxQx) + .int L(P7Q3)-L(setPxQx) + + .int L(P0Q4)-L(setPxQx) /* 32 */ + .int L(P1Q4)-L(setPxQx) + .int L(P2Q4)-L(setPxQx) + .int L(P3Q4)-L(setPxQx) + .int L(P4Q4)-L(setPxQx) + .int L(P5Q4)-L(setPxQx) + .int L(P6Q4)-L(setPxQx) + .int L(P7Q4)-L(setPxQx) + + .int L(P0Q5)-L(setPxQx) /* 40 */ + .int L(P1Q5)-L(setPxQx) + .int L(P2Q5)-L(setPxQx) + .int L(P3Q5)-L(setPxQx) + .int L(P4Q5)-L(setPxQx) + .int L(P5Q5)-L(setPxQx) + .int L(P6Q5)-L(setPxQx) + .int L(P7Q5)-L(setPxQx) + + .int L(P0Q6)-L(setPxQx) /* 48 */ + .int L(P1Q6)-L(setPxQx) + .int L(P2Q6)-L(setPxQx) + .int L(P3Q6)-L(setPxQx) + .int L(P4Q6)-L(setPxQx) + .int L(P5Q6)-L(setPxQx) + .int L(P6Q6)-L(setPxQx) + .int L(P7Q6)-L(setPxQx) + + .int L(P0Q7)-L(setPxQx) /* 56 */ + .int L(P1Q7)-L(setPxQx) + .int L(P2Q7)-L(setPxQx) + .int L(P3Q7)-L(setPxQx) + .int L(P4Q7)-L(setPxQx) + .int L(P5Q7)-L(setPxQx) + .int L(P6Q7)-L(setPxQx) + .int L(P7Q7)-L(setPxQx) + + .int L(P0Q8)-L(setPxQx) /* 64 */ + .int L(P1Q8)-L(setPxQx) + .int L(P2Q8)-L(setPxQx) + .int L(P3Q8)-L(setPxQx) + .int L(P4Q8)-L(setPxQx) + .int L(P5Q8)-L(setPxQx) + .int L(P6Q8)-L(setPxQx) + .int L(P7Q8)-L(setPxQx) + + .int L(P0Q9)-L(setPxQx) /* 72 */ + .int L(P1Q9)-L(setPxQx) + .int L(P2Q9)-L(setPxQx) + .int L(P3Q9)-L(setPxQx) + .int L(P4Q9)-L(setPxQx) + .int L(P5Q9)-L(setPxQx) + .int L(P6Q9)-L(setPxQx) + .int L(P7Q9)-L(setPxQx) /* 79 */ + + .p2align 4 +L(P0Q9): mov %rax, -0x48(%rdi) +L(P0Q8): mov %rax, -0x40(%rdi) +L(P0Q7): mov %rax, -0x38(%rdi) +L(P0Q6): mov %rax, -0x30(%rdi) +L(P0Q5): mov %rax, -0x28(%rdi) +L(P0Q4): mov %rax, -0x20(%rdi) +L(P0Q3): mov %rax, -0x18(%rdi) +L(P0Q2): mov %rax, -0x10(%rdi) +L(P0Q1): mov %rax, -0x8(%rdi) +L(P0Q0): + ret + + .p2align 4 +L(P1Q9): mov %rax, -0x49(%rdi) +L(P1Q8): mov %rax, -0x41(%rdi) +L(P1Q7): mov %rax, -0x39(%rdi) +L(P1Q6): mov %rax, -0x31(%rdi) +L(P1Q5): mov %rax, -0x29(%rdi) +L(P1Q4): mov %rax, -0x21(%rdi) +L(P1Q3): mov %rax, -0x19(%rdi) +L(P1Q2): mov %rax, -0x11(%rdi) +L(P1Q1): mov %rax, -0x9(%rdi) +L(P1Q0): mov %al, -0x1(%rdi) + ret + + .p2align 4 +L(P2Q9): mov %rax, -0x4a(%rdi) +L(P2Q8): mov %rax, -0x42(%rdi) +L(P2Q7): mov %rax, -0x3a(%rdi) +L(P2Q6): mov %rax, -0x32(%rdi) +L(P2Q5): mov %rax, -0x2a(%rdi) +L(P2Q4): mov %rax, -0x22(%rdi) +L(P2Q3): mov %rax, -0x1a(%rdi) +L(P2Q2): mov %rax, -0x12(%rdi) +L(P2Q1): mov %rax, -0xa(%rdi) +L(P2Q0): mov %ax, -0x2(%rdi) + ret + + .p2align 4 +L(P3Q9): mov %rax, -0x4b(%rdi) +L(P3Q8): mov %rax, -0x43(%rdi) +L(P3Q7): mov %rax, -0x3b(%rdi) +L(P3Q6): mov %rax, -0x33(%rdi) +L(P3Q5): mov %rax, -0x2b(%rdi) +L(P3Q4): mov %rax, -0x23(%rdi) +L(P3Q3): mov %rax, -0x1b(%rdi) +L(P3Q2): mov %rax, -0x13(%rdi) +L(P3Q1): mov %rax, -0xb(%rdi) +L(P3Q0): mov %ax, -0x3(%rdi) + mov %al, -0x1(%rdi) + ret + + .p2align 4 +L(P4Q9): mov %rax, -0x4c(%rdi) +L(P4Q8): mov %rax, -0x44(%rdi) +L(P4Q7): mov %rax, -0x3c(%rdi) +L(P4Q6): mov %rax, -0x34(%rdi) +L(P4Q5): mov %rax, -0x2c(%rdi) +L(P4Q4): mov %rax, -0x24(%rdi) +L(P4Q3): mov %rax, -0x1c(%rdi) +L(P4Q2): mov %rax, -0x14(%rdi) +L(P4Q1): mov %rax, -0xc(%rdi) +L(P4Q0): mov %eax, -0x4(%rdi) + ret + + .p2align 4 +L(P5Q9): mov %rax, -0x4d(%rdi) +L(P5Q8): mov %rax, -0x45(%rdi) +L(P5Q7): mov %rax, -0x3d(%rdi) +L(P5Q6): mov %rax, -0x35(%rdi) +L(P5Q5): mov %rax, -0x2d(%rdi) +L(P5Q4): mov %rax, -0x25(%rdi) +L(P5Q3): mov %rax, -0x1d(%rdi) +L(P5Q2): mov %rax, -0x15(%rdi) +L(P5Q1): mov %rax, -0xd(%rdi) +L(P5Q0): mov %eax, -0x5(%rdi) + mov %al, -0x1(%rdi) + ret + + .p2align 4 +L(P6Q9): mov %rax, -0x4e(%rdi) +L(P6Q8): mov %rax, -0x46(%rdi) +L(P6Q7): mov %rax, -0x3e(%rdi) +L(P6Q6): mov %rax, -0x36(%rdi) +L(P6Q5): mov %rax, -0x2e(%rdi) +L(P6Q4): mov %rax, -0x26(%rdi) +L(P6Q3): mov %rax, -0x1e(%rdi) +L(P6Q2): mov %rax, -0x16(%rdi) +L(P6Q1): mov %rax, -0xe(%rdi) +L(P6Q0): mov %eax, -0x6(%rdi) + mov %ax, -0x2(%rdi) + ret + + .p2align 4 +L(P7Q9): mov %rax, -0x4f(%rdi) +L(P7Q8): mov %rax, -0x47(%rdi) +L(P7Q7): mov %rax, -0x3f(%rdi) +L(P7Q6): mov %rax, -0x37(%rdi) +L(P7Q5): mov %rax, -0x2f(%rdi) +L(P7Q4): mov %rax, -0x27(%rdi) +L(P7Q3): mov %rax, -0x1f(%rdi) +L(P7Q2): mov %rax, -0x17(%rdi) +L(P7Q1): mov %rax, -0xf(%rdi) +L(P7Q0): mov %eax, -0x7(%rdi) + mov %ax, -0x3(%rdi) + mov %al, -0x1(%rdi) + ret + + /* + * Align to a 16-byte boundary. Avoids penalties from unaligned stores + * as well as from stores spanning cachelines. Note 16-byte alignment + * is better in case where rep sstosq is used. + */ + .p2align 4 +L(ck_align): + test $0xf, %rdi + jz L(aligned_now) + test $1, %rdi + jz 2f + mov %al, (%rdi) + dec %rsi + lea 1(%rdi),%rdi +2: + test $2, %rdi + jz 4f + mov %ax, (%rdi) + sub $2, %rsi + lea 2(%rdi),%rdi +4: + test $4, %rdi + jz 8f + mov %eax, (%rdi) + sub $4, %rsi + lea 4(%rdi),%rdi +8: + test $8, %rdi + jz L(aligned_now) + mov %rax, (%rdi) + sub $8, %rsi + lea 8(%rdi),%rdi + + /* + * For large sizes rep sstoq is fastest. + * Transition point determined experimentally as measured on + * Intel Xeon processors (incl. Nehalem) and AMD Opteron. + */ +L(aligned_now): + cmp $BZERO_USE_REP, %rsi + ja L(use_rep) + + /* + * zero 64-bytes per loop + */ + .p2align 4 +L(bzero_loop): + leaq -0x40(%rsi), %rsi + cmpq $0x40, %rsi + movq %rax, (%rdi) + movq %rax, 0x8(%rdi) + movq %rax, 0x10(%rdi) + movq %rax, 0x18(%rdi) + movq %rax, 0x20(%rdi) + movq %rax, 0x28(%rdi) + movq %rax, 0x30(%rdi) + movq %rax, 0x38(%rdi) + leaq 0x40(%rdi), %rdi + jae L(bzero_loop) + + /* + * Clear any remaining bytes.. + */ +9: + leaq L(setPxQx)(%rip), %r10 + addq %rsi, %rdi + movslq (%r10,%rsi,4), %rcx + leaq (%rcx,%r10,1), %r10 + INDIRECT_JMP_REG(r10) + + /* + * Use rep sstoq. Clear any remainder via unrolled code + */ + .p2align 4 +L(use_rep): + movq %rsi, %rcx /* get size in bytes */ + shrq $3, %rcx /* count of 8-byte words to zero */ + rep + sstoq /* %rcx = words to clear (%rax=0) */ + andq $7, %rsi /* remaining bytes */ + jnz 9b + ret +#undef L + SET_SIZE(bzero_altentry) + SET_SIZE(bzero) + +/* + * Transfer data to and from user space - + * Note that these routines can cause faults + * It is assumed that the kernel has nothing at + * less than KERNELBASE in the virtual address space. + * + * Note that copyin(9F) and copyout(9F) are part of the + * DDI/DKI which specifies that they return '-1' on "errors." + * + * Sigh. + * + * So there's two extremely similar routines - xcopyin_nta() and + * xcopyout_nta() which return the errno that we've faithfully computed. + * This allows other callers (e.g. uiomove(9F)) to work correctly. + * Given that these are used pretty heavily, we expand the calling + * sequences inline for all flavours (rather than making wrappers). + */ + +/* + * Copy user data to kernel space. + */ + + ENTRY(copyin) + pushq %rbp + movq %rsp, %rbp + subq $24, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rsi /* %rsi = kaddr */ + jnb 1f + leaq .copyin_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _copyin_err(%rip), %rcx + + movq %gs:CPU_THREAD, %r9 + cmpq %rax, %rdi /* test uaddr < kernelbase */ + jae 3f /* take copyop if uaddr > kernelbase */ + SMAP_DISABLE_INSTR(0) + jmp do_copy_fault /* Takes care of leave for us */ + +_copyin_err: + SMAP_ENABLE_INSTR(2) + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ + addq $8, %rsp /* pop bcopy_altentry call ret addr */ +3: + movq T_COPYOPS(%r9), %rax + cmpq $0, %rax + jz 2f + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + leave + movq CP_COPYIN(%rax), %rax + INDIRECT_JMP_REG(rax) + +2: movl $-1, %eax + leave + ret + SET_SIZE(copyin) + + ENTRY(xcopyin_nta) + pushq %rbp + movq %rsp, %rbp + subq $24, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + * %rcx is consumed in this routine so we don't need to save + * it. + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rsi /* %rsi = kaddr */ + jnb 1f + leaq .xcopyin_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + movq %gs:CPU_THREAD, %r9 + cmpq %rax, %rdi /* test uaddr < kernelbase */ + jae 4f + cmpq $0, %rcx /* No non-temporal access? */ + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ + jnz 6f /* use regular access */ + /* + * Make sure cnt is >= XCOPY_MIN_SIZE bytes + */ + cmpq $XCOPY_MIN_SIZE, %rdx + jae 5f +6: + SMAP_DISABLE_INSTR(1) + jmp do_copy_fault + + /* + * Make sure src and dst are NTA_ALIGN_SIZE aligned, + * count is COUNT_ALIGN_SIZE aligned. + */ +5: + movq %rdi, %r10 + orq %rsi, %r10 + andq $NTA_ALIGN_MASK, %r10 + orq %rdx, %r10 + andq $COUNT_ALIGN_MASK, %r10 + jnz 6b + leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ + SMAP_DISABLE_INSTR(2) + jmp do_copy_fault_nta /* use non-temporal access */ + +4: + movl $EFAULT, %eax + jmp 3f + + /* + * A fault during do_copy_fault or do_copy_fault_nta is + * indicated through an errno value in %rax and we iret from the + * trap handler to here. + */ +_xcopyin_err: + addq $8, %rsp /* pop bcopy_altentry call ret addr */ +_xcopyin_nta_err: + SMAP_ENABLE_INSTR(3) + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ +3: + movq T_COPYOPS(%r9), %r8 + cmpq $0, %r8 + jz 2f + + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + leave + movq CP_XCOPYIN(%r8), %r8 + INDIRECT_JMP_REG(r8) + +2: leave + ret + SET_SIZE(xcopyin_nta) + +/* + * Copy kernel data to user space. + */ + + ENTRY(copyout) + pushq %rbp + movq %rsp, %rbp + subq $24, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rdi /* %rdi = kaddr */ + jnb 1f + leaq .copyout_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _copyout_err(%rip), %rcx + + movq %gs:CPU_THREAD, %r9 + cmpq %rax, %rsi /* test uaddr < kernelbase */ + jae 3f /* take copyop if uaddr > kernelbase */ + SMAP_DISABLE_INSTR(3) + jmp do_copy_fault /* Calls leave for us */ + +_copyout_err: + SMAP_ENABLE_INSTR(4) + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ + addq $8, %rsp /* pop bcopy_altentry call ret addr */ +3: + movq T_COPYOPS(%r9), %rax + cmpq $0, %rax + jz 2f + + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + leave + movq CP_COPYOUT(%rax), %rax + INDIRECT_JMP_REG(rax) + +2: movl $-1, %eax + leave + ret + SET_SIZE(copyout) + + ENTRY(xcopyout_nta) + pushq %rbp + movq %rsp, %rbp + subq $24, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rdi /* %rdi = kaddr */ + jnb 1f + leaq .xcopyout_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + movq %gs:CPU_THREAD, %r9 + cmpq %rax, %rsi /* test uaddr < kernelbase */ + jae 4f + + cmpq $0, %rcx /* No non-temporal access? */ + /* + * pass lofault value as 4th argument to do_copy_fault + */ + leaq _xcopyout_err(%rip), %rcx + jnz 6f + /* + * Make sure cnt is >= XCOPY_MIN_SIZE bytes + */ + cmpq $XCOPY_MIN_SIZE, %rdx + jae 5f +6: + SMAP_DISABLE_INSTR(4) + jmp do_copy_fault + + /* + * Make sure src and dst are NTA_ALIGN_SIZE aligned, + * count is COUNT_ALIGN_SIZE aligned. + */ +5: + movq %rdi, %r10 + orq %rsi, %r10 + andq $NTA_ALIGN_MASK, %r10 + orq %rdx, %r10 + andq $COUNT_ALIGN_MASK, %r10 + jnz 6b + leaq _xcopyout_nta_err(%rip), %rcx + SMAP_DISABLE_INSTR(5) + call do_copy_fault_nta + SMAP_ENABLE_INSTR(5) + ret + +4: + movl $EFAULT, %eax + jmp 3f + + /* + * A fault during do_copy_fault or do_copy_fault_nta is + * indicated through an errno value in %rax and we iret from the + * trap handler to here. + */ +_xcopyout_err: + addq $8, %rsp /* pop bcopy_altentry call ret addr */ +_xcopyout_nta_err: + SMAP_ENABLE_INSTR(6) + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ +3: + movq T_COPYOPS(%r9), %r8 + cmpq $0, %r8 + jz 2f + + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + leave + movq CP_XCOPYOUT(%r8), %r8 + INDIRECT_JMP_REG(r8) + +2: leave + ret + SET_SIZE(xcopyout_nta) + +/* + * Copy a null terminated string from one point to another in + * the kernel address space. + */ + + ENTRY(copystr) + pushq %rbp + movq %rsp, %rbp +#ifdef DEBUG + movq kernelbase(%rip), %rax + cmpq %rax, %rdi /* %rdi = from */ + jb 0f + cmpq %rax, %rsi /* %rsi = to */ + jnb 1f +0: leaq .copystr_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + movq %gs:CPU_THREAD, %r9 + movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ + /* 5th argument to do_copystr */ + xorl %r10d,%r10d /* pass smap restore need in %r10d */ + /* as a non-ABI 6th arg */ +do_copystr: + movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ + movq T_LOFAULT(%r9), %r11 /* save the current lofault */ + movq %r8, T_LOFAULT(%r9) /* new lofault */ + + movq %rdx, %r8 /* save maxlength */ + + cmpq $0, %rdx /* %rdx = maxlength */ + je copystr_enametoolong /* maxlength == 0 */ + +copystr_loop: + decq %r8 + movb (%rdi), %al + incq %rdi + movb %al, (%rsi) + incq %rsi + cmpb $0, %al + je copystr_null /* null char */ + cmpq $0, %r8 + jne copystr_loop + +copystr_enametoolong: + movl $ENAMETOOLONG, %eax + jmp copystr_out + +copystr_null: + xorl %eax, %eax /* no error */ + +copystr_out: + cmpq $0, %rcx /* want length? */ + je copystr_smap /* no */ + subq %r8, %rdx /* compute length and store it */ + movq %rdx, (%rcx) + +copystr_smap: + cmpl $0, %r10d + jz copystr_done + SMAP_ENABLE_INSTR(7) + +copystr_done: + movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ + leave + ret + SET_SIZE(copystr) + +/* + * Copy a null terminated string from the user address space into + * the kernel address space. + */ + + ENTRY(copyinstr) + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + movq %rcx, 0x18(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rsi /* %rsi = kaddr */ + jnb 1f + leaq .copyinstr_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif + /* + * pass lofault value as 5th argument to do_copystr + * do_copystr expects whether or not we need smap in %r10d + */ + leaq _copyinstr_error(%rip), %r8 + movl $1, %r10d + + cmpq %rax, %rdi /* test uaddr < kernelbase */ + jae 4f + SMAP_DISABLE_INSTR(6) + jmp do_copystr +4: + movq %gs:CPU_THREAD, %r9 + jmp 3f + +_copyinstr_error: + SMAP_ENABLE_INSTR(8) + movq %r11, T_LOFAULT(%r9) /* restore original lofault */ +3: + movq T_COPYOPS(%r9), %rax + cmpq $0, %rax + jz 2f + + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + movq 0x18(%rsp), %rcx + leave + movq CP_COPYINSTR(%rax), %rax + INDIRECT_JMP_REG(rax) + +2: movl $EFAULT, %eax /* return EFAULT */ + leave + ret + SET_SIZE(copyinstr) + +/* + * Copy a null terminated string from the kernel + * address space to the user address space. + */ + + ENTRY(copyoutstr) + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + + /* + * save args in case we trap and need to rerun as a copyop + */ + movq %rdi, (%rsp) + movq %rsi, 0x8(%rsp) + movq %rdx, 0x10(%rsp) + movq %rcx, 0x18(%rsp) + + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rdi /* %rdi = kaddr */ + jnb 1f + leaq .copyoutstr_panic_msg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +1: +#endif + /* + * pass lofault value as 5th argument to do_copystr + * pass one as 6th argument to do_copystr in %r10d + */ + leaq _copyoutstr_error(%rip), %r8 + movl $1, %r10d + + cmpq %rax, %rsi /* test uaddr < kernelbase */ + jae 4f + SMAP_DISABLE_INSTR(7) + jmp do_copystr +4: + movq %gs:CPU_THREAD, %r9 + jmp 3f + +_copyoutstr_error: + SMAP_ENABLE_INSTR(9) + movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ +3: + movq T_COPYOPS(%r9), %rax + cmpq $0, %rax + jz 2f + + /* + * reload args for the copyop + */ + movq (%rsp), %rdi + movq 0x8(%rsp), %rsi + movq 0x10(%rsp), %rdx + movq 0x18(%rsp), %rcx + leave + movq CP_COPYOUTSTR(%rax), %rax + INDIRECT_JMP_REG(rax) + +2: movl $EFAULT, %eax /* return EFAULT */ + leave + ret + SET_SIZE(copyoutstr) + +/* + * Since all of the fuword() variants are so similar, we have a macro to spit + * them out. This allows us to create DTrace-unobservable functions easily. + */ + +/* + * Note that we don't save and reload the arguments here + * because their values are not altered in the copy path. + * Additionally, when successful, the smap_enable jmp will + * actually return us to our original caller. + */ + +#define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ + ENTRY(NAME) \ + movq %gs:CPU_THREAD, %r9; \ + cmpq kernelbase(%rip), %rdi; \ + jae 1f; \ + leaq _flt_/**/NAME, %rdx; \ + movq %rdx, T_LOFAULT(%r9); \ + SMAP_DISABLE_INSTR(DISNUM) \ + INSTR (%rdi), REG; \ + movq $0, T_LOFAULT(%r9); \ + INSTR REG, (%rsi); \ + xorl %eax, %eax; \ + SMAP_ENABLE_INSTR(EN1) \ + ret; \ +_flt_/**/NAME: \ + SMAP_ENABLE_INSTR(EN2) \ + movq $0, T_LOFAULT(%r9); \ +1: \ + movq T_COPYOPS(%r9), %rax; \ + cmpq $0, %rax; \ + jz 2f; \ + movq COPYOP(%rax), %rax; \ + INDIRECT_JMP_REG(rax); \ +2: \ + movl $-1, %eax; \ + ret; \ + SET_SIZE(NAME) + + FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11) + FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13) + FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15) + FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17) + +#undef FUWORD + +/* + * Set user word. + */ + +/* + * Note that we don't save and reload the arguments here + * because their values are not altered in the copy path. + */ + +#define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ + ENTRY(NAME) \ + movq %gs:CPU_THREAD, %r9; \ + cmpq kernelbase(%rip), %rdi; \ + jae 1f; \ + leaq _flt_/**/NAME, %rdx; \ + SMAP_DISABLE_INSTR(DISNUM) \ + movq %rdx, T_LOFAULT(%r9); \ + INSTR REG, (%rdi); \ + movq $0, T_LOFAULT(%r9); \ + xorl %eax, %eax; \ + SMAP_ENABLE_INSTR(EN1) \ + ret; \ +_flt_/**/NAME: \ + SMAP_ENABLE_INSTR(EN2) \ + movq $0, T_LOFAULT(%r9); \ +1: \ + movq T_COPYOPS(%r9), %rax; \ + cmpq $0, %rax; \ + jz 3f; \ + movq COPYOP(%rax), %rax; \ + INDIRECT_JMP_REG(rax); \ +3: \ + movl $-1, %eax; \ + ret; \ + SET_SIZE(NAME) + + SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19) + SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21) + SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23) + SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25) + +#undef SUWORD + +#define FUWORD_NOERR(NAME, INSTR, REG) \ + ENTRY(NAME) \ + cmpq kernelbase(%rip), %rdi; \ + cmovnbq kernelbase(%rip), %rdi; \ + INSTR (%rdi), REG; \ + INSTR REG, (%rsi); \ + ret; \ + SET_SIZE(NAME) + + FUWORD_NOERR(fuword64_noerr, movq, %rax) + FUWORD_NOERR(fuword32_noerr, movl, %eax) + FUWORD_NOERR(fuword16_noerr, movw, %ax) + FUWORD_NOERR(fuword8_noerr, movb, %al) + +#undef FUWORD_NOERR + +#define SUWORD_NOERR(NAME, INSTR, REG) \ + ENTRY(NAME) \ + cmpq kernelbase(%rip), %rdi; \ + cmovnbq kernelbase(%rip), %rdi; \ + INSTR REG, (%rdi); \ + ret; \ + SET_SIZE(NAME) + + SUWORD_NOERR(suword64_noerr, movq, %rsi) + SUWORD_NOERR(suword32_noerr, movl, %esi) + SUWORD_NOERR(suword16_noerr, movw, %si) + SUWORD_NOERR(suword8_noerr, movb, %sil) + +#undef SUWORD_NOERR + + + .weak subyte + subyte=suword8 + .weak subyte_noerr + subyte_noerr=suword8_noerr + + .weak fulword + fulword=fuword64 + .weak fulword_noerr + fulword_noerr=fuword64_noerr + .weak sulword + sulword=suword64 + .weak sulword_noerr + sulword_noerr=suword64_noerr + + ENTRY(copyin_noerr) + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rsi /* %rsi = kto */ + jae 1f + leaq .cpyin_ne_pmsg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +1: +#endif + cmpq %rax, %rdi /* ufrom < kernelbase */ + jb do_copy + movq %rax, %rdi /* force fault at kernelbase */ + jmp do_copy + SET_SIZE(copyin_noerr) + + ENTRY(copyout_noerr) + movq kernelbase(%rip), %rax +#ifdef DEBUG + cmpq %rax, %rdi /* %rdi = kfrom */ + jae 1f + leaq .cpyout_ne_pmsg(%rip), %rdi + jmp call_panic /* setup stack and call panic */ +1: +#endif + cmpq %rax, %rsi /* uto < kernelbase */ + jb do_copy + movq %rax, %rsi /* force fault at kernelbase */ + jmp do_copy + SET_SIZE(copyout_noerr) + + ENTRY(uzero) + movq kernelbase(%rip), %rax + cmpq %rax, %rdi + jb do_zero + movq %rax, %rdi /* force fault at kernelbase */ + jmp do_zero + SET_SIZE(uzero) + + ENTRY(ucopy) + movq kernelbase(%rip), %rax + cmpq %rax, %rdi + cmovaeq %rax, %rdi /* force fault at kernelbase */ + cmpq %rax, %rsi + cmovaeq %rax, %rsi /* force fault at kernelbase */ + jmp do_copy + SET_SIZE(ucopy) + + /* + * Note, the frame pointer is required here becuase do_copystr expects + * to be able to pop it off! + */ + ENTRY(ucopystr) + pushq %rbp + movq %rsp, %rbp + movq kernelbase(%rip), %rax + cmpq %rax, %rdi + cmovaeq %rax, %rdi /* force fault at kernelbase */ + cmpq %rax, %rsi + cmovaeq %rax, %rsi /* force fault at kernelbase */ + /* do_copystr expects lofault address in %r8 */ + /* do_copystr expects whether or not we need smap in %r10 */ + xorl %r10d, %r10d + movq %gs:CPU_THREAD, %r8 + movq T_LOFAULT(%r8), %r8 + jmp do_copystr + SET_SIZE(ucopystr) + +#ifdef DEBUG + .data +.kcopy_panic_msg: + .string "kcopy: arguments below kernelbase" +.bcopy_panic_msg: + .string "bcopy: arguments below kernelbase" +.kzero_panic_msg: + .string "kzero: arguments below kernelbase" +.bzero_panic_msg: + .string "bzero: arguments below kernelbase" +.copyin_panic_msg: + .string "copyin: kaddr argument below kernelbase" +.xcopyin_panic_msg: + .string "xcopyin: kaddr argument below kernelbase" +.copyout_panic_msg: + .string "copyout: kaddr argument below kernelbase" +.xcopyout_panic_msg: + .string "xcopyout: kaddr argument below kernelbase" +.copystr_panic_msg: + .string "copystr: arguments in user space" +.copyinstr_panic_msg: + .string "copyinstr: kaddr argument not in kernel address space" +.copyoutstr_panic_msg: + .string "copyoutstr: kaddr argument not in kernel address space" +.cpyin_ne_pmsg: + .string "copyin_noerr: argument not in kernel address space" +.cpyout_ne_pmsg: + .string "copyout_noerr: argument not in kernel address space" +#endif + +.data +.align 4 +.globl _smap_enable_patch_count +.type _smap_enable_patch_count,@object +.size _smap_enable_patch_count, 4 +_smap_enable_patch_count: + .long SMAP_ENABLE_COUNT + +.globl _smap_disable_patch_count +.type _smap_disable_patch_count,@object +.size _smap_disable_patch_count, 4 +_smap_disable_patch_count: + .long SMAP_DISABLE_COUNT diff --git a/usr/src/uts/intel/ml/ddi_i86_asm.s b/usr/src/uts/intel/ml/ddi_i86_asm.s new file mode 100644 index 0000000000..2fa9bd75e9 --- /dev/null +++ b/usr/src/uts/intel/ml/ddi_i86_asm.s @@ -0,0 +1,522 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include "assym.h" + + ENTRY(ddi_get8) + ALTENTRY(ddi_mem_get8) + ALTENTRY(ddi_io_get8) + movl ACC_ATTR(%rdi), %edx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx + jne 1f + movq %rsi, %rdx + xorq %rax, %rax + inb (%dx) + ret +1: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx + jne 2f + movzbq (%rsi), %rax + ret +2: + movq ACC_GETB(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_get8) + SET_SIZE(ddi_mem_get8) + SET_SIZE(ddi_io_get8) + + + ENTRY(ddi_get16) + ALTENTRY(ddi_mem_get16) + ALTENTRY(ddi_io_get16) + movl ACC_ATTR(%rdi), %edx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx + jne 3f + movq %rsi, %rdx + xorq %rax, %rax + inw (%dx) + ret +3: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx + jne 4f + movzwq (%rsi), %rax + ret +4: + movq ACC_GETW(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_get16) + SET_SIZE(ddi_mem_get16) + SET_SIZE(ddi_io_get16) + + + ENTRY(ddi_get32) + ALTENTRY(ddi_mem_get32) + ALTENTRY(ddi_io_get32) + movl ACC_ATTR(%rdi), %edx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx + jne 5f + movq %rsi, %rdx + inl (%dx) + ret +5: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx + jne 6f + movl (%rsi), %eax + ret +6: + movq ACC_GETL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_get32) + SET_SIZE(ddi_mem_get32) + SET_SIZE(ddi_io_get32) + + + ENTRY(ddi_get64) + ALTENTRY(ddi_mem_get64) + movq ACC_GETLL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_get64) + SET_SIZE(ddi_mem_get64) + + + ENTRY(ddi_put8) + ALTENTRY(ddi_mem_put8) + ALTENTRY(ddi_io_put8) + movl ACC_ATTR(%rdi), %ecx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx + jne 7f + movq %rdx, %rax + movq %rsi, %rdx + outb (%dx) + ret +7: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx + jne 8f + movb %dl, (%rsi) + ret +8: + movq ACC_PUTB(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_put8) + SET_SIZE(ddi_mem_put8) + SET_SIZE(ddi_io_put8) + + + ENTRY(ddi_put16) + ALTENTRY(ddi_mem_put16) + ALTENTRY(ddi_io_put16) + movl ACC_ATTR(%rdi), %ecx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx + jne 8f + movq %rdx, %rax + movq %rsi, %rdx + outw (%dx) + ret +8: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx + jne 9f + movw %dx, (%rsi) + ret +9: + movq ACC_PUTW(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_put16) + SET_SIZE(ddi_mem_put16) + SET_SIZE(ddi_io_put16) + + + ENTRY(ddi_put32) + ALTENTRY(ddi_mem_put32) + ALTENTRY(ddi_io_put32) + movl ACC_ATTR(%rdi), %ecx + cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx + jne 8f + movq %rdx, %rax + movq %rsi, %rdx + outl (%dx) + ret +8: + cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx + jne 9f + movl %edx, (%rsi) + ret +9: + movq ACC_PUTL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_put32) + SET_SIZE(ddi_mem_put32) + SET_SIZE(ddi_io_put32) + + + ENTRY(ddi_put64) + ALTENTRY(ddi_mem_put64) + movq ACC_PUTLL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_put64) + SET_SIZE(ddi_mem_put64) + + + ENTRY(ddi_rep_get8) + ALTENTRY(ddi_mem_rep_get8) + movq ACC_REP_GETB(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_get8) + SET_SIZE(ddi_mem_rep_get8) + + + ENTRY(ddi_rep_get16) + ALTENTRY(ddi_mem_rep_get16) + movq ACC_REP_GETW(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_get16) + SET_SIZE(ddi_mem_rep_get16) + + + ENTRY(ddi_rep_get32) + ALTENTRY(ddi_mem_rep_get32) + movq ACC_REP_GETL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_get32) + SET_SIZE(ddi_mem_rep_get32) + + + ENTRY(ddi_rep_get64) + ALTENTRY(ddi_mem_rep_get64) + movq ACC_REP_GETLL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_get64) + SET_SIZE(ddi_mem_rep_get64) + + + ENTRY(ddi_rep_put8) + ALTENTRY(ddi_mem_rep_put8) + movq ACC_REP_PUTB(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_put8) + SET_SIZE(ddi_mem_rep_put8) + + + ENTRY(ddi_rep_put16) + ALTENTRY(ddi_mem_rep_put16) + movq ACC_REP_PUTW(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_put16) + SET_SIZE(ddi_mem_rep_put16) + + + ENTRY(ddi_rep_put32) + ALTENTRY(ddi_mem_rep_put32) + movq ACC_REP_PUTL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_put32) + SET_SIZE(ddi_mem_rep_put32) + + + ENTRY(ddi_rep_put64) + ALTENTRY(ddi_mem_rep_put64) + movq ACC_REP_PUTLL(%rdi), %rax + INDIRECT_JMP_REG(rax) + SET_SIZE(ddi_rep_put64) + SET_SIZE(ddi_mem_rep_put64) + + ENTRY(i_ddi_vaddr_get8) + movzbq (%rsi), %rax + ret + SET_SIZE(i_ddi_vaddr_get8) + + ENTRY(i_ddi_vaddr_get16) + movzwq (%rsi), %rax + ret + SET_SIZE(i_ddi_vaddr_get16) + + + ENTRY(i_ddi_vaddr_get32) + movl (%rsi), %eax + ret + SET_SIZE(i_ddi_vaddr_get32) + + + ENTRY(i_ddi_vaddr_get64) + movq (%rsi), %rax + ret + SET_SIZE(i_ddi_vaddr_get64) + + + ENTRY(i_ddi_io_get8) + movq %rsi, %rdx + inb (%dx) + movzbq %al, %rax + ret + SET_SIZE(i_ddi_io_get8) + + + ENTRY(i_ddi_io_get16) + movq %rsi, %rdx + inw (%dx) + movzwq %ax, %rax + ret + SET_SIZE(i_ddi_io_get16) + + + ENTRY(i_ddi_io_get32) + movq %rsi, %rdx + inl (%dx) + ret + SET_SIZE(i_ddi_io_get32) + + ENTRY(i_ddi_vaddr_put8) + movb %dl, (%rsi) + ret + SET_SIZE(i_ddi_vaddr_put8) + + + ENTRY(i_ddi_vaddr_put16) + movw %dx, (%rsi) + ret + SET_SIZE(i_ddi_vaddr_put16) + + + ENTRY(i_ddi_vaddr_put32) + movl %edx, (%rsi) + ret + SET_SIZE(i_ddi_vaddr_put32) + + + ENTRY(i_ddi_vaddr_put64) + movq %rdx, (%rsi) + ret + SET_SIZE(i_ddi_vaddr_put64) + + ENTRY(i_ddi_io_put8) + movq %rdx, %rax + movq %rsi, %rdx + outb (%dx) + ret + SET_SIZE(i_ddi_io_put8) + + + ENTRY(i_ddi_io_put16) + movq %rdx, %rax + movq %rsi, %rdx + outw (%dx) + ret + SET_SIZE(i_ddi_io_put16) + + + ENTRY(i_ddi_io_put32) + movq %rdx, %rax + movq %rsi, %rdx + outl (%dx) + ret + SET_SIZE(i_ddi_io_put32) + + /* + * Incoming arguments + * + * %rdi : hdlp + * %rsi : host_addr + * %rdx : dev_addr + * %rcx : repcount + * %r8 : flags + * + * This routine will destroy values in %rdx, %rsi, %rcx. + */ + ENTRY(i_ddi_io_rep_get8) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je gb_ioadv + movq %rsi, %rdi + rep + insb + ret + +gb_ioadv: + andq %rcx, %rcx + jz gb_ioadv_done +gb_ioadv2: + inb (%dx) + movb %al, (%rsi) + incq %rdx + incq %rsi + decq %rcx + jg gb_ioadv2 + +gb_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + + SET_SIZE(i_ddi_io_rep_get8) + + + ENTRY(i_ddi_io_rep_get16) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je gw_ioadv + + movq %rsi, %rdi + rep + insw + ret + +gw_ioadv: + andq %rcx, %rcx + jz gw_ioadv_done +gw_ioadv2: + inw (%dx) + movw %ax,(%rsi) + addq $2, %rsi + addq $2, %rdx + decq %rcx + jg gw_ioadv2 + +gw_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(i_ddi_io_rep_get16) + + + ENTRY(i_ddi_io_rep_get32) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je gl_ioadv + + movq %rsi, %rdi + rep + insl + ret + +gl_ioadv: + andq %rcx, %rcx + jz gl_ioadv_done +gl_ioadv2: + inl (%dx) + movl %eax,(%rsi) + addq $4, %rsi + addq $4, %rdx + decq %rcx + jg gl_ioadv2 + +gl_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + + SET_SIZE(i_ddi_io_rep_get32) + + /* + * Incoming arguments + * + * %rdi : hdlp + * %rsi : host_addr + * %rdx : dev_addr + * %rcx : repcount + * %r8 : flags + * + * This routine will destroy values in %rdx, %rsi, %rcx. + */ + ENTRY(i_ddi_io_rep_put8) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je pb_ioadv + + movq %rsi, %rdi + rep + outsb + ret + +pb_ioadv: + andq %rcx, %rcx + jz pb_ioadv_done +pb_ioadv2: + movb (%rsi), %al + outb (%dx) + incq %rsi + incq %rdx + decq %rcx + jg pb_ioadv2 + +pb_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(i_ddi_io_rep_put8) + + ENTRY(i_ddi_io_rep_put16) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je pw_ioadv + + movq %rsi, %rdi + rep + outsw + ret + +pw_ioadv: + andq %rcx, %rcx + jz pw_ioadv_done +pw_ioadv2: + movw (%rsi), %ax + outw (%dx) + addq $2, %rsi + addq $2, %rdx + decq %rcx + jg pw_ioadv2 + +pw_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(i_ddi_io_rep_put16) + + + ENTRY(i_ddi_io_rep_put32) + + cmpq $DDI_DEV_AUTOINCR, %r8 + je pl_ioadv + + movq %rsi, %rdi + rep + outsl + ret + +pl_ioadv: + andq %rcx, %rcx + jz pl_ioadv_done +pl_ioadv2: + movl (%rsi), %eax + outl (%dx) + addq $4, %rsi + addq $4, %rdx + decq %rcx + jg pl_ioadv2 + +pl_ioadv_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(i_ddi_io_rep_put32) diff --git a/usr/src/uts/intel/ml/desctbls_asm.s b/usr/src/uts/intel/ml/desctbls_asm.s new file mode 100644 index 0000000000..4528bc07ad --- /dev/null +++ b/usr/src/uts/intel/ml/desctbls_asm.s @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/regset.h> +#include <sys/panic.h> +#include <sys/ontrap.h> +#include <sys/privregs.h> +#include <sys/segments.h> +#include <sys/trap.h> + +#include "assym.h" + + ENTRY_NP(rd_idtr) + sidt (%rdi) + ret + SET_SIZE(rd_idtr) + + ENTRY_NP(wr_idtr) + lidt (%rdi) + ret + SET_SIZE(wr_idtr) + + ENTRY_NP(rd_gdtr) + pushq %rbp + movq %rsp, %rbp + sgdt (%rdi) + leave + ret + SET_SIZE(rd_gdtr) + + ENTRY_NP(wr_gdtr) + pushq %rbp + movq %rsp, %rbp + lgdt (%rdi) + jmp 1f + nop +1: + leave + ret + SET_SIZE(wr_gdtr) + + /* + * loads zero selector for ds and es. + */ + ENTRY_NP(load_segment_registers) + pushq %rbp + movq %rsp, %rbp + pushq %rdi + pushq $.newcs + lretq +.newcs: + /* + * zero %ds and %es - they're ignored anyway + */ + xorl %eax, %eax + movw %ax, %ds + movw %ax, %es + movl %esi, %eax + movw %ax, %fs + movl %edx, %eax + movw %ax, %gs + movl %ecx, %eax + movw %ax, %ss + leave + ret + SET_SIZE(load_segment_registers) + + ENTRY_NP(get_cs_register) + movq %cs, %rax + ret + SET_SIZE(get_cs_register) + + ENTRY_NP(wr_ldtr) + movq %rdi, %rax + lldt %ax + ret + SET_SIZE(wr_ldtr) + + ENTRY_NP(rd_ldtr) + xorl %eax, %eax + sldt %ax + ret + SET_SIZE(rd_ldtr) + + ENTRY_NP(wr_tsr) + movq %rdi, %rax + ltr %ax + ret + SET_SIZE(wr_tsr) + diff --git a/usr/src/uts/intel/ml/exception.s b/usr/src/uts/intel/ml/exception.s new file mode 100644 index 0000000000..92c410adc0 --- /dev/null +++ b/usr/src/uts/intel/ml/exception.s @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. + */ + +/* + * Copyright (c) 1989, 1990 William F. Jolitz. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/amd64/amd64/exception.S,v 1.113 2003/10/15 02:04:52 peter Exp $ + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/trap.h> +#include <sys/psw.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/dtrace.h> +#include <sys/x86_archext.h> +#include <sys/traptrace.h> +#include <sys/machparam.h> + +#include "assym.h" + +/* + * push $0 on stack for traps that do not + * generate an error code. This is so the rest + * of the kernel can expect a consistent stack + * from from any exception. + * + * Note that for all exceptions for amd64 + * %r11 and %rcx are on the stack. Just pop + * them back into their appropriate registers and let + * it get saved as is running native. + */ + +#if defined(__xpv) + +#define NPTRAP_NOERR(trapno) \ + pushq $0; \ + pushq $trapno + +#define TRAP_NOERR(trapno) \ + XPV_TRAP_POP; \ + NPTRAP_NOERR(trapno) + +/* + * error code already pushed by hw + * onto stack. + */ +#define TRAP_ERR(trapno) \ + XPV_TRAP_POP; \ + pushq $trapno + +#else /* __xpv */ + +#define TRAP_NOERR(trapno) \ + push $0; \ + push $trapno + +#define NPTRAP_NOERR(trapno) TRAP_NOERR(trapno) + +/* + * error code already pushed by hw + * onto stack. + */ +#define TRAP_ERR(trapno) \ + push $trapno + +#endif /* __xpv */ + + /* + * These are the stacks used on cpu0 for taking double faults, + * NMIs and MCEs. + * + * We define them here instead of in a C file so that we can page-align + * them (gcc won't do that in a .c file). + */ + .data + DGDEF3(dblfault_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) + .fill DEFAULTSTKSZ, 1, 0 + DGDEF3(nmi_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) + .fill DEFAULTSTKSZ, 1, 0 + DGDEF3(mce_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) + .fill DEFAULTSTKSZ, 1, 0 + + /* + * #DE + */ + ENTRY_NP(div0trap) + TRAP_NOERR(T_ZERODIV) /* $0 */ + jmp cmntrap + SET_SIZE(div0trap) + + /* + * #DB + * + * Fetch %dr6 and clear it, handing off the value to the + * cmntrap code in %r15/%esi + */ + ENTRY_NP(dbgtrap) + TRAP_NOERR(T_SGLSTP) /* $1 */ + +#if !defined(__xpv) /* no sysenter support yet */ + /* + * If we get here as a result of single-stepping a sysenter + * instruction, we suddenly find ourselves taking a #db + * in kernel mode -before- we've swapgs'ed. So before we can + * take the trap, we do the swapgs here, and fix the return + * %rip in trap() so that we return immediately after the + * swapgs in the sysenter handler to avoid doing the swapgs again. + * + * Nobody said that the design of sysenter was particularly + * elegant, did they? + */ + + pushq %r11 + + /* + * At this point the stack looks like this: + * + * (high address) r_ss + * r_rsp + * r_rfl + * r_cs + * r_rip <-- %rsp + 24 + * r_err <-- %rsp + 16 + * r_trapno <-- %rsp + 8 + * (low address) %r11 <-- %rsp + */ + leaq sys_sysenter(%rip), %r11 + cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */ + je 1f + leaq brand_sys_sysenter(%rip), %r11 + cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */ + je 1f + leaq tr_sys_sysenter(%rip), %r11 + cmpq %r11, 24(%rsp) + je 1f + leaq tr_brand_sys_sysenter(%rip), %r11 + cmpq %r11, 24(%rsp) + jne 2f +1: swapgs +2: lfence /* swapgs mitigation */ + popq %r11 +#endif /* !__xpv */ + + INTR_PUSH +#if defined(__xpv) + movl $6, %edi + call kdi_dreg_get + movq %rax, %r15 /* %db6 -> %r15 */ + movl $6, %edi + movl $0, %esi + call kdi_dreg_set /* 0 -> %db6 */ +#else + movq %db6, %r15 + xorl %eax, %eax + movq %rax, %db6 +#endif + + jmp cmntrap_pushed + SET_SIZE(dbgtrap) + +#if !defined(__xpv) + +/* + * Macro to set the gsbase or kgsbase to the address of the struct cpu + * for this processor. If we came from userland, set kgsbase else + * set gsbase. We find the proper cpu struct by looping through + * the cpu structs for all processors till we find a match for the gdt + * of the trapping processor. The stack is expected to be pointing at + * the standard regs pushed by hardware on a trap (plus error code and trapno). + * + * It's ok for us to clobber gsbase here (and possibly end up with both gsbase + * and kgsbase set to the same value) because we're not going back the normal + * way out of here (via IRET). Where we're going, we don't need no user %gs. + */ +#define SET_CPU_GSBASE \ + subq $REGOFF_TRAPNO, %rsp; /* save regs */ \ + movq %rax, REGOFF_RAX(%rsp); \ + movq %rbx, REGOFF_RBX(%rsp); \ + movq %rcx, REGOFF_RCX(%rsp); \ + movq %rdx, REGOFF_RDX(%rsp); \ + movq %rbp, REGOFF_RBP(%rsp); \ + movq %rsp, %rbp; \ + subq $16, %rsp; /* space for gdt */ \ + sgdt 6(%rsp); \ + movq 8(%rsp), %rcx; /* %rcx has gdt to match */ \ + xorl %ebx, %ebx; /* loop index */ \ + leaq cpu(%rip), %rdx; /* cpu pointer array */ \ +1: \ + movq (%rdx, %rbx, CLONGSIZE), %rax; /* get cpu[i] */ \ + cmpq $0x0, %rax; /* cpu[i] == NULL ? */ \ + je 2f; /* yes, continue */ \ + cmpq %rcx, CPU_GDT(%rax); /* gdt == cpu[i]->cpu_gdt ? */ \ + je 3f; /* yes, go set gsbase */ \ +2: \ + incl %ebx; /* i++ */ \ + cmpl $NCPU, %ebx; /* i < NCPU ? */ \ + jb 1b; /* yes, loop */ \ +/* XXX BIG trouble if we fall thru here. We didn't find a gdt match */ \ +3: \ + movl $MSR_AMD_KGSBASE, %ecx; \ + cmpw $KCS_SEL, REGOFF_CS(%rbp); /* trap from kernel? */ \ + jne 4f; /* no, go set KGSBASE */ \ + movl $MSR_AMD_GSBASE, %ecx; /* yes, set GSBASE */ \ + mfence; /* OPTERON_ERRATUM_88 */ \ +4: \ + movq %rax, %rdx; /* write base register */ \ + shrq $32, %rdx; \ + wrmsr; \ + movq REGOFF_RDX(%rbp), %rdx; /* restore regs */ \ + movq REGOFF_RCX(%rbp), %rcx; \ + movq REGOFF_RBX(%rbp), %rbx; \ + movq REGOFF_RAX(%rbp), %rax; \ + movq %rbp, %rsp; \ + movq REGOFF_RBP(%rsp), %rbp; \ + addq $REGOFF_TRAPNO, %rsp /* pop stack */ + +#else /* __xpv */ + +#define SET_CPU_GSBASE /* noop on the hypervisor */ + +#endif /* __xpv */ + + + /* + * #NMI + * + * XXPV: See 6532669. + */ + ENTRY_NP(nmiint) + TRAP_NOERR(T_NMIFLT) /* $2 */ + + SET_CPU_GSBASE + + /* + * Save all registers and setup segment registers + * with kernel selectors. + */ + INTR_PUSH + INTGATE_INIT_KERNEL_FLAGS + + TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_TRAP) + TRACE_REGS(%r12, %rsp, %rax, %rbx) + TRACE_STAMP(%r12) + + movq %rsp, %rbp + + movq %rbp, %rdi + call av_dispatch_nmivect + + INTR_POP + call x86_md_clear + jmp tr_iret_auto + /*NOTREACHED*/ + SET_SIZE(nmiint) + + /* + * #BP + */ + ENTRY_NP(brktrap) + XPV_TRAP_POP + cmpw $KCS_SEL, 8(%rsp) + jne bp_user + + /* + * This is a breakpoint in the kernel -- it is very likely that this + * is DTrace-induced. To unify DTrace handling, we spoof this as an + * invalid opcode (#UD) fault. Note that #BP is a trap, not a fault -- + * we must decrement the trapping %rip to make it appear as a fault. + * We then push a non-zero error code to indicate that this is coming + * from #BP. + */ + decq (%rsp) + push $1 /* error code -- non-zero for #BP */ + jmp ud_kernel + +bp_user: + + NPTRAP_NOERR(T_BPTFLT) /* $3 */ + jmp dtrace_trap + + SET_SIZE(brktrap) + + /* + * #OF + */ + ENTRY_NP(ovflotrap) + TRAP_NOERR(T_OVFLW) /* $4 */ + jmp cmntrap + SET_SIZE(ovflotrap) + + /* + * #BR + */ + ENTRY_NP(boundstrap) + TRAP_NOERR(T_BOUNDFLT) /* $5 */ + jmp cmntrap + SET_SIZE(boundstrap) + + ENTRY_NP(invoptrap) + + XPV_TRAP_POP + + cmpw $KCS_SEL, 8(%rsp) + jne ud_user + +#if defined(__xpv) + movb $0, 12(%rsp) /* clear saved upcall_mask from %cs */ +#endif + push $0 /* error code -- zero for #UD */ +ud_kernel: + push $0xdddd /* a dummy trap number */ + INTR_PUSH + movq REGOFF_RIP(%rsp), %rdi + movq REGOFF_RSP(%rsp), %rsi + movq REGOFF_RAX(%rsp), %rdx + pushq (%rsi) + movq %rsp, %rsi + subq $8, %rsp + call dtrace_invop + ALTENTRY(dtrace_invop_callsite) + addq $16, %rsp + cmpl $DTRACE_INVOP_PUSHL_EBP, %eax + je ud_push + cmpl $DTRACE_INVOP_LEAVE, %eax + je ud_leave + cmpl $DTRACE_INVOP_NOP, %eax + je ud_nop + cmpl $DTRACE_INVOP_RET, %eax + je ud_ret + jmp ud_trap + +ud_push: + /* + * We must emulate a "pushq %rbp". To do this, we pull the stack + * down 8 bytes, and then store the base pointer. + */ + INTR_POP + subq $16, %rsp /* make room for %rbp */ + pushq %rax /* push temp */ + movq 24(%rsp), %rax /* load calling RIP */ + addq $1, %rax /* increment over trapping instr */ + movq %rax, 8(%rsp) /* store calling RIP */ + movq 32(%rsp), %rax /* load calling CS */ + movq %rax, 16(%rsp) /* store calling CS */ + movq 40(%rsp), %rax /* load calling RFLAGS */ + movq %rax, 24(%rsp) /* store calling RFLAGS */ + movq 48(%rsp), %rax /* load calling RSP */ + subq $8, %rax /* make room for %rbp */ + movq %rax, 32(%rsp) /* store calling RSP */ + movq 56(%rsp), %rax /* load calling SS */ + movq %rax, 40(%rsp) /* store calling SS */ + movq 32(%rsp), %rax /* reload calling RSP */ + movq %rbp, (%rax) /* store %rbp there */ + popq %rax /* pop off temp */ + jmp tr_iret_kernel /* return from interrupt */ + /*NOTREACHED*/ + +ud_leave: + /* + * We must emulate a "leave", which is the same as a "movq %rbp, + * %rsp" followed by a "popq %rbp". We can exploit the fact + * that the %rsp is explicitly saved to effect the pop without + * having to reshuffle the other data pushed for the trap. + */ + + INTR_POP + pushq %rax /* push temp */ + movq 8(%rsp), %rax /* load calling RIP */ + addq $1, %rax /* increment over trapping instr */ + movq %rax, 8(%rsp) /* store calling RIP */ + movq (%rbp), %rax /* get new %rbp */ + addq $8, %rbp /* adjust new %rsp */ + movq %rbp, 32(%rsp) /* store new %rsp */ + movq %rax, %rbp /* set new %rbp */ + popq %rax /* pop off temp */ + jmp tr_iret_kernel /* return from interrupt */ + /*NOTREACHED*/ + +ud_nop: + /* + * We must emulate a "nop". This is obviously not hard: we need only + * advance the %rip by one. + */ + INTR_POP + incq (%rsp) + jmp tr_iret_kernel + /*NOTREACHED*/ + +ud_ret: + INTR_POP + pushq %rax /* push temp */ + movq 32(%rsp), %rax /* load %rsp */ + movq (%rax), %rax /* load calling RIP */ + movq %rax, 8(%rsp) /* store calling RIP */ + addq $8, 32(%rsp) /* adjust new %rsp */ + popq %rax /* pop off temp */ + jmp tr_iret_kernel /* return from interrupt */ + /*NOTREACHED*/ + +ud_trap: + /* + * We're going to let the kernel handle this as a normal #UD. If, + * however, we came through #BP and are spoofing #UD (in this case, + * the stored error value will be non-zero), we need to de-spoof + * the trap by incrementing %rip and pushing T_BPTFLT. + */ + cmpq $0, REGOFF_ERR(%rsp) + je ud_ud + incq REGOFF_RIP(%rsp) + addq $REGOFF_RIP, %rsp + NPTRAP_NOERR(T_BPTFLT) /* $3 */ + jmp cmntrap + +ud_ud: + addq $REGOFF_RIP, %rsp +ud_user: + NPTRAP_NOERR(T_ILLINST) + jmp cmntrap + SET_SIZE(invoptrap) + + /* + * #NM + */ + + ENTRY_NP(ndptrap) + TRAP_NOERR(T_NOEXTFLT) /* $0 */ + SET_CPU_GSBASE + jmp cmntrap + SET_SIZE(ndptrap) + +#if !defined(__xpv) + + /* + * #DF + */ + ENTRY_NP(syserrtrap) + pushq $T_DBLFLT + SET_CPU_GSBASE + + /* + * We share this handler with kmdb (if kmdb is loaded). As such, we + * may have reached this point after encountering a #df in kmdb. If + * that happens, we'll still be on kmdb's IDT. We need to switch back + * to this CPU's IDT before proceeding. Furthermore, if we did arrive + * here from kmdb, kmdb is probably in a very sickly state, and + * shouldn't be entered from the panic flow. We'll suppress that + * entry by setting nopanicdebug. + */ + pushq %rax + subq $DESCTBR_SIZE, %rsp + sidt (%rsp) + movq %gs:CPU_IDT, %rax + cmpq %rax, DTR_BASE(%rsp) + je 1f + + movq %rax, DTR_BASE(%rsp) + movw $_MUL(NIDT, GATE_DESC_SIZE), DTR_LIMIT(%rsp) + lidt (%rsp) + + movl $1, nopanicdebug + +1: addq $DESCTBR_SIZE, %rsp + popq %rax + + DFTRAP_PUSH + + /* + * freeze trap trace. + */ +#ifdef TRAPTRACE + leaq trap_trace_freeze(%rip), %r11 + incl (%r11) +#endif + + ENABLE_INTR_FLAGS + + movq %rsp, %rdi /* ®s */ + xorl %esi, %esi /* clear address */ + xorl %edx, %edx /* cpuid = 0 */ + call trap + + SET_SIZE(syserrtrap) + +#endif /* !__xpv */ + + /* + * #TS + */ + ENTRY_NP(invtsstrap) + TRAP_ERR(T_TSSFLT) /* $10 already have error code on stack */ + jmp cmntrap + SET_SIZE(invtsstrap) + + /* + * #NP + */ + ENTRY_NP(segnptrap) + TRAP_ERR(T_SEGFLT) /* $11 already have error code on stack */ + SET_CPU_GSBASE + jmp cmntrap + SET_SIZE(segnptrap) + + /* + * #SS + */ + ENTRY_NP(stktrap) + TRAP_ERR(T_STKFLT) /* $12 already have error code on stack */ + SET_CPU_GSBASE + jmp cmntrap + SET_SIZE(stktrap) + + /* + * #GP + */ + ENTRY_NP(gptrap) + TRAP_ERR(T_GPFLT) /* $13 already have error code on stack */ + SET_CPU_GSBASE + jmp cmntrap + SET_SIZE(gptrap) + + /* + * #PF + */ + ENTRY_NP(pftrap) + TRAP_ERR(T_PGFLT) /* $14 already have error code on stack */ + INTR_PUSH +#if defined(__xpv) + + movq %gs:CPU_VCPU_INFO, %r15 + movq VCPU_INFO_ARCH_CR2(%r15), %r15 /* vcpu[].arch.cr2 */ + +#else /* __xpv */ + + movq %cr2, %r15 + +#endif /* __xpv */ + jmp cmntrap_pushed + SET_SIZE(pftrap) + + ENTRY_NP(resvtrap) + TRAP_NOERR(T_RESVTRAP) /* (reserved) */ + jmp cmntrap + SET_SIZE(resvtrap) + + /* + * #MF + */ + ENTRY_NP(ndperr) + TRAP_NOERR(T_EXTERRFLT) /* $16 */ + jmp cmninttrap + SET_SIZE(ndperr) + + /* + * #AC + */ + ENTRY_NP(achktrap) + TRAP_ERR(T_ALIGNMENT) /* $17 */ + jmp cmntrap + SET_SIZE(achktrap) + + /* + * #MC + */ + .globl cmi_mca_trap /* see uts/i86pc/os/cmi.c */ + + ENTRY_NP(mcetrap) + TRAP_NOERR(T_MCE) /* $18 */ + + SET_CPU_GSBASE + + INTR_PUSH + INTGATE_INIT_KERNEL_FLAGS + + TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) + TRACE_REGS(%rdi, %rsp, %rbx, %rcx) + TRACE_STAMP(%rdi) + + movq %rsp, %rbp + + movq %rsp, %rdi /* arg0 = struct regs *rp */ + call cmi_mca_trap /* cmi_mca_trap(rp); */ + + jmp _sys_rtt + SET_SIZE(mcetrap) + + /* + * #XF + */ + ENTRY_NP(xmtrap) + TRAP_NOERR(T_SIMDFPE) /* $19 */ + jmp cmninttrap + SET_SIZE(xmtrap) + + ENTRY_NP(invaltrap) + TRAP_NOERR(T_INVALTRAP) /* very invalid */ + jmp cmntrap + SET_SIZE(invaltrap) + + .globl fasttable + + ENTRY_NP(fasttrap) + cmpl $T_LASTFAST, %eax + ja 1f + orl %eax, %eax /* (zero extend top 32-bits) */ + leaq fasttable(%rip), %r11 + leaq (%r11, %rax, CLONGSIZE), %r11 + movq (%r11), %r11 + INDIRECT_JMP_REG(r11) +1: + /* + * Fast syscall number was illegal. Make it look + * as if the INT failed. Modify %rip to point before the + * INT, push the expected error code and fake a GP fault. + * + * XXX Why make the error code be offset into idt + 1? + * Instead we should push a real (soft?) error code + * on the stack and #gp handler could know about fasttraps? + */ + XPV_TRAP_POP + + subq $2, (%rsp) /* XXX int insn 2-bytes */ + pushq $_CONST(_MUL(T_FASTTRAP, GATE_DESC_SIZE) + 2) + +#if defined(__xpv) + pushq %r11 + pushq %rcx +#endif + jmp gptrap + SET_SIZE(fasttrap) + + ENTRY_NP(dtrace_ret) + TRAP_NOERR(T_DTRACE_RET) + jmp dtrace_trap + SET_SIZE(dtrace_ret) + + /* + * RFLAGS 24 bytes up the stack from %rsp. + * XXX a constant would be nicer. + */ + ENTRY_NP(fast_null) + XPV_TRAP_POP + orq $PS_C, 24(%rsp) /* set carry bit in user flags */ + call x86_md_clear + jmp tr_iret_auto + /*NOTREACHED*/ + SET_SIZE(fast_null) + + /* + * Interrupts start at 32 + */ +#define MKIVCT(n) \ + ENTRY_NP(ivct/**/n) \ + push $0; \ + push $n - 0x20; \ + jmp cmnint; \ + SET_SIZE(ivct/**/n) + + MKIVCT(32) + MKIVCT(33) + MKIVCT(34) + MKIVCT(35) + MKIVCT(36) + MKIVCT(37) + MKIVCT(38) + MKIVCT(39) + MKIVCT(40) + MKIVCT(41) + MKIVCT(42) + MKIVCT(43) + MKIVCT(44) + MKIVCT(45) + MKIVCT(46) + MKIVCT(47) + MKIVCT(48) + MKIVCT(49) + MKIVCT(50) + MKIVCT(51) + MKIVCT(52) + MKIVCT(53) + MKIVCT(54) + MKIVCT(55) + MKIVCT(56) + MKIVCT(57) + MKIVCT(58) + MKIVCT(59) + MKIVCT(60) + MKIVCT(61) + MKIVCT(62) + MKIVCT(63) + MKIVCT(64) + MKIVCT(65) + MKIVCT(66) + MKIVCT(67) + MKIVCT(68) + MKIVCT(69) + MKIVCT(70) + MKIVCT(71) + MKIVCT(72) + MKIVCT(73) + MKIVCT(74) + MKIVCT(75) + MKIVCT(76) + MKIVCT(77) + MKIVCT(78) + MKIVCT(79) + MKIVCT(80) + MKIVCT(81) + MKIVCT(82) + MKIVCT(83) + MKIVCT(84) + MKIVCT(85) + MKIVCT(86) + MKIVCT(87) + MKIVCT(88) + MKIVCT(89) + MKIVCT(90) + MKIVCT(91) + MKIVCT(92) + MKIVCT(93) + MKIVCT(94) + MKIVCT(95) + MKIVCT(96) + MKIVCT(97) + MKIVCT(98) + MKIVCT(99) + MKIVCT(100) + MKIVCT(101) + MKIVCT(102) + MKIVCT(103) + MKIVCT(104) + MKIVCT(105) + MKIVCT(106) + MKIVCT(107) + MKIVCT(108) + MKIVCT(109) + MKIVCT(110) + MKIVCT(111) + MKIVCT(112) + MKIVCT(113) + MKIVCT(114) + MKIVCT(115) + MKIVCT(116) + MKIVCT(117) + MKIVCT(118) + MKIVCT(119) + MKIVCT(120) + MKIVCT(121) + MKIVCT(122) + MKIVCT(123) + MKIVCT(124) + MKIVCT(125) + MKIVCT(126) + MKIVCT(127) + MKIVCT(128) + MKIVCT(129) + MKIVCT(130) + MKIVCT(131) + MKIVCT(132) + MKIVCT(133) + MKIVCT(134) + MKIVCT(135) + MKIVCT(136) + MKIVCT(137) + MKIVCT(138) + MKIVCT(139) + MKIVCT(140) + MKIVCT(141) + MKIVCT(142) + MKIVCT(143) + MKIVCT(144) + MKIVCT(145) + MKIVCT(146) + MKIVCT(147) + MKIVCT(148) + MKIVCT(149) + MKIVCT(150) + MKIVCT(151) + MKIVCT(152) + MKIVCT(153) + MKIVCT(154) + MKIVCT(155) + MKIVCT(156) + MKIVCT(157) + MKIVCT(158) + MKIVCT(159) + MKIVCT(160) + MKIVCT(161) + MKIVCT(162) + MKIVCT(163) + MKIVCT(164) + MKIVCT(165) + MKIVCT(166) + MKIVCT(167) + MKIVCT(168) + MKIVCT(169) + MKIVCT(170) + MKIVCT(171) + MKIVCT(172) + MKIVCT(173) + MKIVCT(174) + MKIVCT(175) + MKIVCT(176) + MKIVCT(177) + MKIVCT(178) + MKIVCT(179) + MKIVCT(180) + MKIVCT(181) + MKIVCT(182) + MKIVCT(183) + MKIVCT(184) + MKIVCT(185) + MKIVCT(186) + MKIVCT(187) + MKIVCT(188) + MKIVCT(189) + MKIVCT(190) + MKIVCT(191) + MKIVCT(192) + MKIVCT(193) + MKIVCT(194) + MKIVCT(195) + MKIVCT(196) + MKIVCT(197) + MKIVCT(198) + MKIVCT(199) + MKIVCT(200) + MKIVCT(201) + MKIVCT(202) + MKIVCT(203) + MKIVCT(204) + MKIVCT(205) + MKIVCT(206) + MKIVCT(207) + MKIVCT(208) + MKIVCT(209) + MKIVCT(210) + MKIVCT(211) + MKIVCT(212) + MKIVCT(213) + MKIVCT(214) + MKIVCT(215) + MKIVCT(216) + MKIVCT(217) + MKIVCT(218) + MKIVCT(219) + MKIVCT(220) + MKIVCT(221) + MKIVCT(222) + MKIVCT(223) + MKIVCT(224) + MKIVCT(225) + MKIVCT(226) + MKIVCT(227) + MKIVCT(228) + MKIVCT(229) + MKIVCT(230) + MKIVCT(231) + MKIVCT(232) + MKIVCT(233) + MKIVCT(234) + MKIVCT(235) + MKIVCT(236) + MKIVCT(237) + MKIVCT(238) + MKIVCT(239) + MKIVCT(240) + MKIVCT(241) + MKIVCT(242) + MKIVCT(243) + MKIVCT(244) + MKIVCT(245) + MKIVCT(246) + MKIVCT(247) + MKIVCT(248) + MKIVCT(249) + MKIVCT(250) + MKIVCT(251) + MKIVCT(252) + MKIVCT(253) + MKIVCT(254) + MKIVCT(255) + diff --git a/usr/src/uts/intel/ml/float.s b/usr/src/uts/intel/ml/float.s new file mode 100644 index 0000000000..807647f553 --- /dev/null +++ b/usr/src/uts/intel/ml/float.s @@ -0,0 +1,347 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + */ + +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ +/* All Rights Reserved */ + +/* Copyright (c) 1987, 1988 Microsoft Corporation */ +/* All Rights Reserved */ + +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/x86_archext.h> + +#include "assym.h" + + /* + * Returns zero if x87 "chip" is present(!) + */ + ENTRY_NP(fpu_initial_probe) + CLTS + fninit + fnstsw %ax + movzbl %al, %eax + ret + SET_SIZE(fpu_initial_probe) + + ENTRY_NP(fxsave_insn) + fxsaveq (%rdi) + ret + SET_SIZE(fxsave_insn) + +/* + * One of these routines is called from any lwp with floating + * point context as part of the prolog of a context switch. + */ + +/* + * These three functions define the Intel "xsave" handling for CPUs with + * different features. Newer AMD CPUs can also use these functions. See the + * 'exception pointers' comment below. + */ + ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */ + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ + fxsaveq (%rdi) + STTS(%rsi) /* trap on next fpu touch */ +1: rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(fpxsave_ctxt) + + ENTRY_NP(xsave_ctxt) + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + xsave (%rsi) + STTS(%rsi) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsave_ctxt) + + ENTRY_NP(xsaveopt_ctxt) + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + xsaveopt (%rsi) + STTS(%rsi) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsaveopt_ctxt) + +/* + * On certain AMD processors, the "exception pointers" (i.e. the last + * instruction pointer, last data pointer, and last opcode) are saved by the + * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is + * set. + * + * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior. + * We can detect this via an AMD specific cpuid feature bit + * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions. + * Otherwise we use these more complex functions on AMD CPUs. All three follow + * the same logic after the xsave* instruction. + */ + ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */ + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ + fxsaveq (%rdi) + /* + * To ensure that we don't leak these values into the next context + * on the cpu, we could just issue an fninit here, but that's + * rather slow and so we issue an instruction sequence that + * clears them more quickly, if a little obscurely. + */ + btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ + jnc 0f /* jump if ES = 0 */ + fnclex /* clear pending x87 exceptions */ +0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ + fildl .fpzero_const(%rip) + /* dummy load changes all exception pointers */ + STTS(%rsi) /* trap on next fpu touch */ +1: rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(fpxsave_excp_clr_ctxt) + + ENTRY_NP(xsave_excp_clr_ctxt) + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + xsave (%rsi) + btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ + jnc 0f /* jump if ES = 0 */ + fnclex /* clear pending x87 exceptions */ +0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ + fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ + STTS(%rsi) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsave_excp_clr_ctxt) + + ENTRY_NP(xsaveopt_excp_clr_ctxt) + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + xsaveopt (%rsi) + btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ + jnc 0f /* jump if ES = 0 */ + fnclex /* clear pending x87 exceptions */ +0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ + fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ + STTS(%rsi) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsaveopt_excp_clr_ctxt) + + .align 8 +.fpzero_const: + .4byte 0x0 + .4byte 0x0 + + + ENTRY_NP(fpxsave) + CLTS + fxsaveq (%rdi) + fninit /* clear exceptions, init x87 tags */ + STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(fpxsave) + + ENTRY_NP(xsave) + CLTS + movl %esi, %eax /* bv mask */ + movq %rsi, %rdx + shrq $32, %rdx + xsave (%rdi) + + fninit /* clear exceptions, init x87 tags */ + STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(xsave) + + ENTRY_NP(xsaveopt) + CLTS + movl %esi, %eax /* bv mask */ + movq %rsi, %rdx + shrq $32, %rdx + xsaveopt (%rdi) + + fninit /* clear exceptions, init x87 tags */ + STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(xsaveopt) + +/* + * These functions are used when restoring the FPU as part of the epilogue of a + * context switch. + */ + + ENTRY(fpxrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ + CLTS + fxrstorq (%rdi) +1: + ret + SET_SIZE(fpxrestore_ctxt) + + ENTRY(xrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + CLTS + xrstor (%rdi) +1: + ret + SET_SIZE(xrestore_ctxt) + + + ENTRY_NP(fpxrestore) + CLTS + fxrstorq (%rdi) + ret + SET_SIZE(fpxrestore) + + ENTRY_NP(xrestore) + CLTS + movl %esi, %eax /* bv mask */ + movq %rsi, %rdx + shrq $32, %rdx + xrstor (%rdi) + ret + SET_SIZE(xrestore) + +/* + * Disable the floating point unit. + */ + + ENTRY_NP(fpdisable) + STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(fpdisable) + +/* + * Initialize the fpu hardware. + */ + + ENTRY_NP(fpinit) + CLTS + cmpl $FP_XSAVE, fp_save_mech + je 1f + + /* fxsave */ + leaq sse_initial(%rip), %rax + fxrstorq (%rax) /* load clean initial state */ + ret + +1: /* xsave */ + leaq avx_initial(%rip), %rcx + xorl %edx, %edx + movl $XFEATURE_AVX, %eax + btl $X86FSET_AVX, x86_featureset + cmovael %edx, %eax + orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax + xrstor (%rcx) + ret + SET_SIZE(fpinit) + +/* + * Clears FPU exception state. + * Returns the FP status word. + */ + + ENTRY_NP(fperr_reset) + CLTS + xorl %eax, %eax + fnstsw %ax + fnclex + ret + SET_SIZE(fperr_reset) + + ENTRY_NP(fpxerr_reset) + pushq %rbp + movq %rsp, %rbp + subq $0x10, %rsp /* make some temporary space */ + CLTS + stmxcsr (%rsp) + movl (%rsp), %eax + andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) + ldmxcsr (%rsp) /* clear processor exceptions */ + leave + ret + SET_SIZE(fpxerr_reset) + + ENTRY_NP(fpgetcwsw) + pushq %rbp + movq %rsp, %rbp + subq $0x10, %rsp /* make some temporary space */ + CLTS + fnstsw (%rsp) /* store the status word */ + fnstcw 2(%rsp) /* store the control word */ + movl (%rsp), %eax /* put both in %eax */ + leave + ret + SET_SIZE(fpgetcwsw) + +/* + * Returns the MXCSR register. + */ + + ENTRY_NP(fpgetmxcsr) + pushq %rbp + movq %rsp, %rbp + subq $0x10, %rsp /* make some temporary space */ + CLTS + stmxcsr (%rsp) + movl (%rsp), %eax + leave + ret + SET_SIZE(fpgetmxcsr) + diff --git a/usr/src/uts/intel/ml/hypersubr.s b/usr/src/uts/intel/ml/hypersubr.s new file mode 100644 index 0000000000..e6378d8518 --- /dev/null +++ b/usr/src/uts/intel/ml/hypersubr.s @@ -0,0 +1,164 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/asm_linkage.h> +#ifndef __xpv +#include <sys/xpv_support.h> +#endif +#include <sys/hypervisor.h> + +/* + * Hypervisor "system calls" + * + * amd64 + * %rax == call number + * args in registers (%rdi, %rsi, %rdx, %r10, %r8, %r9) + * + * Note that we use %r10 instead of %rcx for passing 4th argument as in + * C calling convention since the "syscall" instruction clobbers %rcx. + * + * (These calls can be done more efficiently as gcc-style inlines, but + * for simplicity and help with initial debugging, we use these primitives + * to build the hypervisor calls up from C wrappers.) + */ + +/* + * XXPV grr - assembler can't deal with an instruction in a quoted string + */ +#undef TRAP_INSTR /* cause it's currently "int $0x82" */ + +/* + * The method for issuing a hypercall (i.e. a system call to the + * hypervisor) varies from platform to platform. In 32-bit PV domains, an + * 'int 82' triggers the call. In 64-bit PV domains, a 'syscall' does the + * trick. + * + * HVM domains are more complicated. In all cases, we want to issue a + * VMEXIT instruction, but AMD and Intel use different opcodes to represent + * that instruction. Rather than build CPU-specific modules with the + * different opcodes, we use the 'hypercall page' provided by Xen. This + * page contains a collection of code stubs that do nothing except issue + * hypercalls using the proper instructions for this machine. To keep the + * wrapper code as simple and efficient as possible, we preallocate that + * page below. When the module is loaded, we ask Xen to remap the + * underlying PFN to that of the hypercall page. + * + * Note: this same mechanism could be used in PV domains, but using + * hypercall page requires a call and several more instructions than simply + * issuing the proper trap. + */ +#if !defined(__xpv) + +#define HYPERCALL_PAGESIZE 0x1000 +#define HYPERCALL_SHINFO_PAGESIZE 0x1000 + + .data + .align HYPERCALL_SHINFO_PAGESIZE + .globl hypercall_shared_info_page + .type hypercall_shared_info_page, @object + .size hypercall_shared_info_page, HYPERCALL_SHINFO_PAGESIZE +hypercall_shared_info_page: + .skip HYPERCALL_SHINFO_PAGESIZE + + .text + .align HYPERCALL_PAGESIZE + .globl hypercall_page + .type hypercall_page, @function +hypercall_page: + .skip HYPERCALL_PAGESIZE + .size hypercall_page, HYPERCALL_PAGESIZE +#define TRAP_INSTR \ + shll $5, %eax; \ + addq $hypercall_page, %rax; \ + INDIRECT_JMP_REG(rax); + +#else /* !_xpv */ + +#define TRAP_INSTR syscall +#endif /* !__xpv */ + + + ENTRY_NP(__hypercall0) + ALTENTRY(__hypercall0_int) + movl %edi, %eax + TRAP_INSTR + ret + SET_SIZE(__hypercall0) + + ENTRY_NP(__hypercall1) + ALTENTRY(__hypercall1_int) + movl %edi, %eax + movq %rsi, %rdi /* arg 1 */ + TRAP_INSTR + ret + SET_SIZE(__hypercall1) + + ENTRY_NP(__hypercall2) + ALTENTRY(__hypercall2_int) + movl %edi, %eax + movq %rsi, %rdi /* arg 1 */ + movq %rdx, %rsi /* arg 2 */ + TRAP_INSTR + ret + SET_SIZE(__hypercall2) + + ENTRY_NP(__hypercall3) + ALTENTRY(__hypercall3_int) + movl %edi, %eax + movq %rsi, %rdi /* arg 1 */ + movq %rdx, %rsi /* arg 2 */ + movq %rcx, %rdx /* arg 3 */ + TRAP_INSTR + ret + SET_SIZE(__hypercall3) + + ENTRY_NP(__hypercall4) + ALTENTRY(__hypercall4_int) + movl %edi, %eax + movq %rsi, %rdi /* arg 1 */ + movq %rdx, %rsi /* arg 2 */ + movq %rcx, %rdx /* arg 3 */ + movq %r8, %r10 /* r10 = 4th arg */ + TRAP_INSTR + ret + SET_SIZE(__hypercall4) + + ENTRY_NP(__hypercall5) + ALTENTRY(__hypercall5_int) + movl %edi, %eax + movq %rsi, %rdi /* arg 1 */ + movq %rdx, %rsi /* arg 2 */ + movq %rcx, %rdx /* arg 3 */ + movq %r8, %r10 /* r10 = 4th arg */ + movq %r9, %r8 /* arg 5 */ + TRAP_INSTR + ret + SET_SIZE(__hypercall5) + diff --git a/usr/src/uts/intel/ml/i86_subr.s b/usr/src/uts/intel/ml/i86_subr.s new file mode 100644 index 0000000000..2a1a183026 --- /dev/null +++ b/usr/src/uts/intel/ml/i86_subr.s @@ -0,0 +1,1629 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. + */ + +/* + * Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. + * Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T + * All Rights Reserved + */ + +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* + * General assembly language routines. + * It is the intent of this file to contain routines that are + * independent of the specific kernel architecture, and those that are + * common across kernel architectures. + * As architectures diverge, and implementations of specific + * architecture-dependent routines change, the routines should be moved + * from this file into the respective ../`arch -k`/subr.s file. + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/panic.h> +#include <sys/ontrap.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/reboot.h> +#include <sys/psw.h> +#include <sys/x86_archext.h> + +#include "assym.h" +#include <sys/dditypes.h> + +/* + * on_fault() + * + * Catch lofault faults. Like setjmp except it returns one + * if code following causes uncorrectable fault. Turned off + * by calling no_fault(). Note that while under on_fault(), + * SMAP is disabled. For more information see + * uts/intel/ml/copy.s. + */ + + ENTRY(on_fault) + movq %gs:CPU_THREAD, %rsi + leaq catch_fault(%rip), %rdx + movq %rdi, T_ONFAULT(%rsi) /* jumpbuf in t_onfault */ + movq %rdx, T_LOFAULT(%rsi) /* catch_fault in t_lofault */ + call smap_disable /* allow user accesses */ + jmp setjmp /* let setjmp do the rest */ + +catch_fault: + movq %gs:CPU_THREAD, %rsi + movq T_ONFAULT(%rsi), %rdi /* address of save area */ + xorl %eax, %eax + movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ + movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ + call smap_enable /* disallow user accesses */ + jmp longjmp /* let longjmp do the rest */ + SET_SIZE(on_fault) + + ENTRY(no_fault) + movq %gs:CPU_THREAD, %rsi + xorl %eax, %eax + movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ + movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ + call smap_enable /* disallow user accesses */ + ret + SET_SIZE(no_fault) + +/* + * Default trampoline code for on_trap() (see <sys/ontrap.h>). We just + * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called. + */ + + ENTRY(on_trap_trampoline) + movq %gs:CPU_THREAD, %rsi + movq T_ONTRAP(%rsi), %rdi + addq $OT_JMPBUF, %rdi + jmp longjmp + SET_SIZE(on_trap_trampoline) + +/* + * Push a new element on to the t_ontrap stack. Refer to <sys/ontrap.h> for + * more information about the on_trap() mechanism. If the on_trap_data is the + * same as the topmost stack element, we just modify that element. + */ + + ENTRY(on_trap) + movw %si, OT_PROT(%rdi) /* ot_prot = prot */ + movw $0, OT_TRAP(%rdi) /* ot_trap = 0 */ + leaq on_trap_trampoline(%rip), %rdx /* rdx = &on_trap_trampoline */ + movq %rdx, OT_TRAMPOLINE(%rdi) /* ot_trampoline = rdx */ + xorl %ecx, %ecx + movq %rcx, OT_HANDLE(%rdi) /* ot_handle = NULL */ + movq %rcx, OT_PAD1(%rdi) /* ot_pad1 = NULL */ + movq %gs:CPU_THREAD, %rdx /* rdx = curthread */ + movq T_ONTRAP(%rdx), %rcx /* rcx = curthread->t_ontrap */ + cmpq %rdi, %rcx /* if (otp == %rcx) */ + je 0f /* don't modify t_ontrap */ + + movq %rcx, OT_PREV(%rdi) /* ot_prev = t_ontrap */ + movq %rdi, T_ONTRAP(%rdx) /* curthread->t_ontrap = otp */ + +0: addq $OT_JMPBUF, %rdi /* &ot_jmpbuf */ + jmp setjmp + SET_SIZE(on_trap) + +/* + * Setjmp and longjmp implement non-local gotos using state vectors + * type label_t. + */ + +#if LABEL_PC != 0 +#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded +#endif /* LABEL_PC != 0 */ + + ENTRY(setjmp) + movq %rsp, LABEL_SP(%rdi) + movq %rbp, LABEL_RBP(%rdi) + movq %rbx, LABEL_RBX(%rdi) + movq %r12, LABEL_R12(%rdi) + movq %r13, LABEL_R13(%rdi) + movq %r14, LABEL_R14(%rdi) + movq %r15, LABEL_R15(%rdi) + movq (%rsp), %rdx /* return address */ + movq %rdx, (%rdi) /* LABEL_PC is 0 */ + xorl %eax, %eax /* return 0 */ + ret + SET_SIZE(setjmp) + + ENTRY(longjmp) + movq LABEL_SP(%rdi), %rsp + movq LABEL_RBP(%rdi), %rbp + movq LABEL_RBX(%rdi), %rbx + movq LABEL_R12(%rdi), %r12 + movq LABEL_R13(%rdi), %r13 + movq LABEL_R14(%rdi), %r14 + movq LABEL_R15(%rdi), %r15 + movq (%rdi), %rdx /* return address; LABEL_PC is 0 */ + movq %rdx, (%rsp) + xorl %eax, %eax + incl %eax /* return 1 */ + ret + SET_SIZE(longjmp) + +/* + * if a() calls b() calls caller(), + * caller() returns return address in a(). + * (Note: We assume a() and b() are C routines which do the normal entry/exit + * sequence.) + */ + + ENTRY(caller) + movq 8(%rbp), %rax /* b()'s return pc, in a() */ + ret + SET_SIZE(caller) + +/* + * if a() calls callee(), callee() returns the + * return address in a(); + */ + + ENTRY(callee) + movq (%rsp), %rax /* callee()'s return pc, in a() */ + ret + SET_SIZE(callee) + +/* + * return the current frame pointer + */ + + ENTRY(getfp) + movq %rbp, %rax + ret + SET_SIZE(getfp) + +/* + * Invalidate a single page table entry in the TLB + */ + + ENTRY(mmu_invlpg) + invlpg (%rdi) + ret + SET_SIZE(mmu_invlpg) + + +/* + * Get/Set the value of various control registers + */ + + ENTRY(getcr0) + movq %cr0, %rax + ret + SET_SIZE(getcr0) + + ENTRY(setcr0) + movq %rdi, %cr0 + ret + SET_SIZE(setcr0) + + ENTRY(getcr2) +#if defined(__xpv) + movq %gs:CPU_VCPU_INFO, %rax + movq VCPU_INFO_ARCH_CR2(%rax), %rax +#else + movq %cr2, %rax +#endif + ret + SET_SIZE(getcr2) + + ENTRY(getcr3) + movq %cr3, %rax + ret + SET_SIZE(getcr3) + +#if !defined(__xpv) + + ENTRY(setcr3) + movq %rdi, %cr3 + ret + SET_SIZE(setcr3) + + ENTRY(reload_cr3) + movq %cr3, %rdi + movq %rdi, %cr3 + ret + SET_SIZE(reload_cr3) + +#endif /* __xpv */ + + ENTRY(getcr4) + movq %cr4, %rax + ret + SET_SIZE(getcr4) + + ENTRY(setcr4) + movq %rdi, %cr4 + ret + SET_SIZE(setcr4) + + ENTRY(getcr8) + movq %cr8, %rax + ret + SET_SIZE(getcr8) + + ENTRY(setcr8) + movq %rdi, %cr8 + ret + SET_SIZE(setcr8) + + ENTRY(__cpuid_insn) + movq %rbx, %r8 + movq %rcx, %r9 + movq %rdx, %r11 + movl (%rdi), %eax /* %eax = regs->cp_eax */ + movl 0x4(%rdi), %ebx /* %ebx = regs->cp_ebx */ + movl 0x8(%rdi), %ecx /* %ecx = regs->cp_ecx */ + movl 0xc(%rdi), %edx /* %edx = regs->cp_edx */ + cpuid + movl %eax, (%rdi) /* regs->cp_eax = %eax */ + movl %ebx, 0x4(%rdi) /* regs->cp_ebx = %ebx */ + movl %ecx, 0x8(%rdi) /* regs->cp_ecx = %ecx */ + movl %edx, 0xc(%rdi) /* regs->cp_edx = %edx */ + movq %r8, %rbx + movq %r9, %rcx + movq %r11, %rdx + ret + SET_SIZE(__cpuid_insn) + + ENTRY_NP(i86_monitor) + pushq %rbp + movq %rsp, %rbp + movq %rdi, %rax /* addr */ + movq %rsi, %rcx /* extensions */ + /* rdx contains input arg3: hints */ + clflush (%rax) + .byte 0x0f, 0x01, 0xc8 /* monitor */ + leave + ret + SET_SIZE(i86_monitor) + + ENTRY_NP(i86_mwait) + pushq %rbp + call x86_md_clear + movq %rsp, %rbp + movq %rdi, %rax /* data */ + movq %rsi, %rcx /* extensions */ + .byte 0x0f, 0x01, 0xc9 /* mwait */ + leave + ret + SET_SIZE(i86_mwait) + +#if defined(__xpv) + /* + * Defined in C + */ +#else + + ENTRY_NP(tsc_read) + movq %rbx, %r11 + movl $0, %eax + cpuid + rdtsc + movq %r11, %rbx + shlq $32, %rdx + orq %rdx, %rax + ret + .globl _tsc_mfence_start +_tsc_mfence_start: + mfence + rdtsc + shlq $32, %rdx + orq %rdx, %rax + ret + .globl _tsc_mfence_end +_tsc_mfence_end: + .globl _tscp_start +_tscp_start: + .byte 0x0f, 0x01, 0xf9 /* rdtscp instruction */ + shlq $32, %rdx + orq %rdx, %rax + ret + .globl _tscp_end +_tscp_end: + .globl _no_rdtsc_start +_no_rdtsc_start: + xorl %edx, %edx + xorl %eax, %eax + ret + .globl _no_rdtsc_end +_no_rdtsc_end: + .globl _tsc_lfence_start +_tsc_lfence_start: + lfence + rdtsc + shlq $32, %rdx + orq %rdx, %rax + ret + .globl _tsc_lfence_end +_tsc_lfence_end: + SET_SIZE(tsc_read) + + +#endif /* __xpv */ + + ENTRY_NP(randtick) + rdtsc + shlq $32, %rdx + orq %rdx, %rax + ret + SET_SIZE(randtick) +/* + * Insert entryp after predp in a doubly linked list. + */ + + ENTRY(_insque) + movq (%rsi), %rax /* predp->forw */ + movq %rsi, CPTRSIZE(%rdi) /* entryp->back = predp */ + movq %rax, (%rdi) /* entryp->forw = predp->forw */ + movq %rdi, (%rsi) /* predp->forw = entryp */ + movq %rdi, CPTRSIZE(%rax) /* predp->forw->back = entryp */ + ret + SET_SIZE(_insque) + +/* + * Remove entryp from a doubly linked list + */ + + ENTRY(_remque) + movq (%rdi), %rax /* entry->forw */ + movq CPTRSIZE(%rdi), %rdx /* entry->back */ + movq %rax, (%rdx) /* entry->back->forw = entry->forw */ + movq %rdx, CPTRSIZE(%rax) /* entry->forw->back = entry->back */ + ret + SET_SIZE(_remque) + +/* + * Returns the number of + * non-NULL bytes in string argument. + */ + +/* + * This is close to a simple transliteration of a C version of this + * routine. We should either just -make- this be a C version, or + * justify having it in assembler by making it significantly faster. + * + * size_t + * strlen(const char *s) + * { + * const char *s0; + * #if defined(DEBUG) + * if ((uintptr_t)s < KERNELBASE) + * panic(.str_panic_msg); + * #endif + * for (s0 = s; *s; s++) + * ; + * return (s - s0); + * } + */ + + ENTRY(strlen) +#ifdef DEBUG + movq postbootkernelbase(%rip), %rax + cmpq %rax, %rdi + jae str_valid + pushq %rbp + movq %rsp, %rbp + leaq .str_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +#endif /* DEBUG */ +str_valid: + cmpb $0, (%rdi) + movq %rdi, %rax + je .null_found + .align 4 +.strlen_loop: + incq %rdi + cmpb $0, (%rdi) + jne .strlen_loop +.null_found: + subq %rax, %rdi + movq %rdi, %rax + ret + SET_SIZE(strlen) + +#ifdef DEBUG + .text +.str_panic_msg: + .string "strlen: argument below kernelbase" +#endif /* DEBUG */ + + /* + * Berkeley 4.3 introduced symbolically named interrupt levels + * as a way deal with priority in a machine independent fashion. + * Numbered priorities are machine specific, and should be + * discouraged where possible. + * + * Note, for the machine specific priorities there are + * examples listed for devices that use a particular priority. + * It should not be construed that all devices of that + * type should be at that priority. It is currently were + * the current devices fit into the priority scheme based + * upon time criticalness. + * + * The underlying assumption of these assignments is that + * IPL 10 is the highest level from which a device + * routine can call wakeup. Devices that interrupt from higher + * levels are restricted in what they can do. If they need + * kernels services they should schedule a routine at a lower + * level (via software interrupt) to do the required + * processing. + * + * Examples of this higher usage: + * Level Usage + * 14 Profiling clock (and PROM uart polling clock) + * 12 Serial ports + * + * The serial ports request lower level processing on level 6. + * + * Also, almost all splN routines (where N is a number or a + * mnemonic) will do a RAISE(), on the assumption that they are + * never used to lower our priority. + * The exceptions are: + * spl8() Because you can't be above 15 to begin with! + * splzs() Because this is used at boot time to lower our + * priority, to allow the PROM to poll the uart. + * spl0() Used to lower priority to 0. + */ + +#define SETPRI(level) \ + movl $/**/level, %edi; /* new priority */ \ + jmp do_splx /* redirect to do_splx */ + +#define RAISE(level) \ + movl $/**/level, %edi; /* new priority */ \ + jmp splr /* redirect to splr */ + + /* locks out all interrupts, including memory errors */ + ENTRY(spl8) + SETPRI(15) + SET_SIZE(spl8) + + /* just below the level that profiling runs */ + ENTRY(spl7) + RAISE(13) + SET_SIZE(spl7) + + /* sun specific - highest priority onboard serial i/o asy ports */ + ENTRY(splzs) + SETPRI(12) /* Can't be a RAISE, as it's used to lower us */ + SET_SIZE(splzs) + + ENTRY(splhi) + ALTENTRY(splhigh) + ALTENTRY(spl6) + ALTENTRY(i_ddi_splhigh) + + RAISE(DISP_LEVEL) + + SET_SIZE(i_ddi_splhigh) + SET_SIZE(spl6) + SET_SIZE(splhigh) + SET_SIZE(splhi) + + /* allow all interrupts */ + ENTRY(spl0) + SETPRI(0) + SET_SIZE(spl0) + + + /* splx implementation */ + ENTRY(splx) + jmp do_splx /* redirect to common splx code */ + SET_SIZE(splx) + + ENTRY(wait_500ms) + pushq %rbx + movl $50000, %ebx +1: + call tenmicrosec + decl %ebx + jnz 1b + popq %rbx + ret + SET_SIZE(wait_500ms) + +#define RESET_METHOD_KBC 1 +#define RESET_METHOD_PORT92 2 +#define RESET_METHOD_PCI 4 + + DGDEF3(pc_reset_methods, 4, 8) + .long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI; + + ENTRY(pc_reset) + + testl $RESET_METHOD_KBC, pc_reset_methods(%rip) + jz 1f + + / + / Try the classic keyboard controller-triggered reset. + / + movw $0x64, %dx + movb $0xfe, %al + outb (%dx) + + / Wait up to 500 milliseconds here for the keyboard controller + / to pull the reset line. On some systems where the keyboard + / controller is slow to pull the reset line, the next reset method + / may be executed (which may be bad if those systems hang when the + / next reset method is used, e.g. Ferrari 3400 (doesn't like port 92), + / and Ferrari 4000 (doesn't like the cf9 reset method)) + + call wait_500ms + +1: + testl $RESET_METHOD_PORT92, pc_reset_methods(%rip) + jz 3f + + / + / Try port 0x92 fast reset + / + movw $0x92, %dx + inb (%dx) + cmpb $0xff, %al / If port's not there, we should get back 0xFF + je 1f + testb $1, %al / If bit 0 + jz 2f / is clear, jump to perform the reset + andb $0xfe, %al / otherwise, + outb (%dx) / clear bit 0 first, then +2: + orb $1, %al / Set bit 0 + outb (%dx) / and reset the system +1: + + call wait_500ms + +3: + testl $RESET_METHOD_PCI, pc_reset_methods(%rip) + jz 4f + + / Try the PCI (soft) reset vector (should work on all modern systems, + / but has been shown to cause problems on 450NX systems, and some newer + / systems (e.g. ATI IXP400-equipped systems)) + / When resetting via this method, 2 writes are required. The first + / targets bit 1 (0=hard reset without power cycle, 1=hard reset with + / power cycle). + / The reset occurs on the second write, during bit 2's transition from + / 0->1. + movw $0xcf9, %dx + movb $0x2, %al / Reset mode = hard, no power cycle + outb (%dx) + movb $0x6, %al + outb (%dx) + + call wait_500ms + +4: + / + / port 0xcf9 failed also. Last-ditch effort is to + / triple-fault the CPU. + / Also, use triple fault for EFI firmware + / + ENTRY(efi_reset) + pushq $0x0 + pushq $0x0 / IDT base of 0, limit of 0 + 2 unused bytes + lidt (%rsp) + int $0x0 / Trigger interrupt, generate triple-fault + + cli + hlt / Wait forever + /*NOTREACHED*/ + SET_SIZE(efi_reset) + SET_SIZE(pc_reset) + +/* + * C callable in and out routines + */ + + ENTRY(outl) + movw %di, %dx + movl %esi, %eax + outl (%dx) + ret + SET_SIZE(outl) + + ENTRY(outw) + movw %di, %dx + movw %si, %ax + D16 outl (%dx) /* XX64 why not outw? */ + ret + SET_SIZE(outw) + + ENTRY(outb) + movw %di, %dx + movb %sil, %al + outb (%dx) + ret + SET_SIZE(outb) + + ENTRY(inl) + xorl %eax, %eax + movw %di, %dx + inl (%dx) + ret + SET_SIZE(inl) + + ENTRY(inw) + xorl %eax, %eax + movw %di, %dx + D16 inl (%dx) + ret + SET_SIZE(inw) + + + ENTRY(inb) + xorl %eax, %eax + movw %di, %dx + inb (%dx) + ret + SET_SIZE(inb) + +/* + * void int3(void) + * void int18(void) + * void int20(void) + * void int_cmci(void) + */ + + ENTRY(int3) + int $T_BPTFLT + ret + SET_SIZE(int3) + + ENTRY(int18) + int $T_MCE + ret + SET_SIZE(int18) + + ENTRY(int20) + movl boothowto, %eax + andl $RB_DEBUG, %eax + jz 1f + + int $T_DBGENTR +1: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(int20) + + ENTRY(int_cmci) + int $T_ENOEXTFLT + ret + SET_SIZE(int_cmci) + + ENTRY(scanc) + /* rdi == size */ + /* rsi == cp */ + /* rdx == table */ + /* rcx == mask */ + addq %rsi, %rdi /* end = &cp[size] */ +.scanloop: + cmpq %rdi, %rsi /* while (cp < end */ + jnb .scandone + movzbq (%rsi), %r8 /* %r8 = *cp */ + incq %rsi /* cp++ */ + testb %cl, (%r8, %rdx) + jz .scanloop /* && (table[*cp] & mask) == 0) */ + decq %rsi /* (fix post-increment) */ +.scandone: + movl %edi, %eax + subl %esi, %eax /* return (end - cp) */ + ret + SET_SIZE(scanc) + +/* + * Replacement functions for ones that are normally inlined. + * In addition to the copy in i86.il, they are defined here just in case. + */ + + ENTRY(intr_clear) + ENTRY(clear_int_flag) + pushfq + popq %rax +#if defined(__xpv) + leaq xpv_panicking, %rdi + movl (%rdi), %edi + cmpl $0, %edi + jne 2f + CLIRET(%rdi, %dl) /* returns event mask in %dl */ + /* + * Synthesize the PS_IE bit from the event mask bit + */ + andq $_BITNOT(PS_IE), %rax + testb $1, %dl + jnz 1f + orq $PS_IE, %rax +1: + ret +2: +#endif + CLI(%rdi) + ret + SET_SIZE(clear_int_flag) + SET_SIZE(intr_clear) + + ENTRY(curcpup) + movq %gs:CPU_SELF, %rax + ret + SET_SIZE(curcpup) + +/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs() + * These functions reverse the byte order of the input parameter and returns + * the result. This is to convert the byte order from host byte order + * (little endian) to network byte order (big endian), or vice versa. + */ + + ENTRY(htonll) + ALTENTRY(ntohll) + movq %rdi, %rax + bswapq %rax + ret + SET_SIZE(ntohll) + SET_SIZE(htonll) + + /* XX64 there must be shorter sequences for this */ + ENTRY(htonl) + ALTENTRY(ntohl) + movl %edi, %eax + bswap %eax + ret + SET_SIZE(ntohl) + SET_SIZE(htonl) + + /* XX64 there must be better sequences for this */ + ENTRY(htons) + ALTENTRY(ntohs) + movl %edi, %eax + bswap %eax + shrl $16, %eax + ret + SET_SIZE(ntohs) + SET_SIZE(htons) + + + ENTRY(intr_restore) + ENTRY(restore_int_flag) + testq $PS_IE, %rdi + jz 1f +#if defined(__xpv) + leaq xpv_panicking, %rsi + movl (%rsi), %esi + cmpl $0, %esi + jne 1f + /* + * Since we're -really- running unprivileged, our attempt + * to change the state of the IF bit will be ignored. + * The virtual IF bit is tweaked by CLI and STI. + */ + IE_TO_EVENT_MASK(%rsi, %rdi) +#else + sti +#endif +1: + ret + SET_SIZE(restore_int_flag) + SET_SIZE(intr_restore) + + ENTRY(sti) + STI + ret + SET_SIZE(sti) + + ENTRY(cli) + CLI(%rax) + ret + SET_SIZE(cli) + + ENTRY(dtrace_interrupt_disable) + pushfq + popq %rax +#if defined(__xpv) + leaq xpv_panicking, %rdi + movl (%rdi), %edi + cmpl $0, %edi + jne .dtrace_interrupt_disable_done + CLIRET(%rdi, %dl) /* returns event mask in %dl */ + /* + * Synthesize the PS_IE bit from the event mask bit + */ + andq $_BITNOT(PS_IE), %rax + testb $1, %dl + jnz .dtrace_interrupt_disable_done + orq $PS_IE, %rax +#else + CLI(%rdx) +#endif +.dtrace_interrupt_disable_done: + ret + SET_SIZE(dtrace_interrupt_disable) + + ENTRY(dtrace_interrupt_enable) + pushq %rdi + popfq +#if defined(__xpv) + leaq xpv_panicking, %rdx + movl (%rdx), %edx + cmpl $0, %edx + jne .dtrace_interrupt_enable_done + /* + * Since we're -really- running unprivileged, our attempt + * to change the state of the IF bit will be ignored. The + * virtual IF bit is tweaked by CLI and STI. + */ + IE_TO_EVENT_MASK(%rdx, %rdi) +#endif +.dtrace_interrupt_enable_done: + ret + SET_SIZE(dtrace_interrupt_enable) + + + ENTRY(dtrace_membar_producer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(dtrace_membar_producer) + + ENTRY(dtrace_membar_consumer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(dtrace_membar_consumer) + + ENTRY(threadp) + movq %gs:CPU_THREAD, %rax + ret + SET_SIZE(threadp) + +/* + * Checksum routine for Internet Protocol Headers + */ + + ENTRY(ip_ocsum) + pushq %rbp + movq %rsp, %rbp +#ifdef DEBUG + movq postbootkernelbase(%rip), %rax + cmpq %rax, %rdi + jnb 1f + xorl %eax, %eax + movq %rdi, %rsi + leaq .ip_ocsum_panic_msg(%rip), %rdi + call panic + /*NOTREACHED*/ +.ip_ocsum_panic_msg: + .string "ip_ocsum: address 0x%p below kernelbase\n" +1: +#endif + movl %esi, %ecx /* halfword_count */ + movq %rdi, %rsi /* address */ + /* partial sum in %edx */ + xorl %eax, %eax + testl %ecx, %ecx + jz .ip_ocsum_done + testq $3, %rsi + jnz .ip_csum_notaligned +.ip_csum_aligned: /* XX64 opportunities for 8-byte operations? */ +.next_iter: + /* XX64 opportunities for prefetch? */ + /* XX64 compute csum with 64 bit quantities? */ + subl $32, %ecx + jl .less_than_32 + + addl 0(%rsi), %edx +.only60: + adcl 4(%rsi), %eax +.only56: + adcl 8(%rsi), %edx +.only52: + adcl 12(%rsi), %eax +.only48: + adcl 16(%rsi), %edx +.only44: + adcl 20(%rsi), %eax +.only40: + adcl 24(%rsi), %edx +.only36: + adcl 28(%rsi), %eax +.only32: + adcl 32(%rsi), %edx +.only28: + adcl 36(%rsi), %eax +.only24: + adcl 40(%rsi), %edx +.only20: + adcl 44(%rsi), %eax +.only16: + adcl 48(%rsi), %edx +.only12: + adcl 52(%rsi), %eax +.only8: + adcl 56(%rsi), %edx +.only4: + adcl 60(%rsi), %eax /* could be adding -1 and -1 with a carry */ +.only0: + adcl $0, %eax /* could be adding -1 in eax with a carry */ + adcl $0, %eax + + addq $64, %rsi + testl %ecx, %ecx + jnz .next_iter + +.ip_ocsum_done: + addl %eax, %edx + adcl $0, %edx + movl %edx, %eax /* form a 16 bit checksum by */ + shrl $16, %eax /* adding two halves of 32 bit checksum */ + addw %dx, %ax + adcw $0, %ax + andl $0xffff, %eax + leave + ret + +.ip_csum_notaligned: + xorl %edi, %edi + movw (%rsi), %di + addl %edi, %edx + adcl $0, %edx + addq $2, %rsi + decl %ecx + jmp .ip_csum_aligned + +.less_than_32: + addl $32, %ecx + testl $1, %ecx + jz .size_aligned + andl $0xfe, %ecx + movzwl (%rsi, %rcx, 2), %edi + addl %edi, %edx + adcl $0, %edx +.size_aligned: + movl %ecx, %edi + shrl $1, %ecx + shl $1, %edi + subq $64, %rdi + addq %rdi, %rsi + leaq .ip_ocsum_jmptbl(%rip), %rdi + leaq (%rdi, %rcx, 8), %rdi + xorl %ecx, %ecx + clc + movq (%rdi), %rdi + INDIRECT_JMP_REG(rdi) + + .align 8 +.ip_ocsum_jmptbl: + .quad .only0, .only4, .only8, .only12, .only16, .only20 + .quad .only24, .only28, .only32, .only36, .only40, .only44 + .quad .only48, .only52, .only56, .only60 + SET_SIZE(ip_ocsum) + +/* + * multiply two long numbers and yield a u_longlong_t result, callable from C. + * Provided to manipulate hrtime_t values. + */ + + ENTRY(mul32) + xorl %edx, %edx /* XX64 joe, paranoia? */ + movl %edi, %eax + mull %esi + shlq $32, %rdx + orq %rdx, %rax + ret + SET_SIZE(mul32) + + ENTRY(scan_memory) + shrq $3, %rsi /* convert %rsi from byte to quadword count */ + jz .scanm_done + movq %rsi, %rcx /* move count into rep control register */ + movq %rdi, %rsi /* move addr into lodsq control reg. */ + rep lodsq /* scan the memory range */ +.scanm_done: + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(scan_memory) + + + ENTRY(lowbit) + movl $-1, %eax + bsfq %rdi, %rdi + cmovnz %edi, %eax + incl %eax + ret + SET_SIZE(lowbit) + + ENTRY(highbit) + ALTENTRY(highbit64) + movl $-1, %eax + bsrq %rdi, %rdi + cmovnz %edi, %eax + incl %eax + ret + SET_SIZE(highbit64) + SET_SIZE(highbit) + +#define XMSR_ACCESS_VAL $0x9c5a203a + + ENTRY(rdmsr) + movl %edi, %ecx + rdmsr + shlq $32, %rdx + orq %rdx, %rax + ret + SET_SIZE(rdmsr) + + ENTRY(wrmsr) + movq %rsi, %rdx + shrq $32, %rdx + movl %esi, %eax + movl %edi, %ecx + wrmsr + ret + SET_SIZE(wrmsr) + + ENTRY(xrdmsr) + pushq %rbp + movq %rsp, %rbp + movl %edi, %ecx + movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ + rdmsr + shlq $32, %rdx + orq %rdx, %rax + leave + ret + SET_SIZE(xrdmsr) + + ENTRY(xwrmsr) + pushq %rbp + movq %rsp, %rbp + movl %edi, %ecx + movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ + movq %rsi, %rdx + shrq $32, %rdx + movl %esi, %eax + wrmsr + leave + ret + SET_SIZE(xwrmsr) + + ENTRY(get_xcr) + movl %edi, %ecx + #xgetbv + .byte 0x0f,0x01,0xd0 + shlq $32, %rdx + orq %rdx, %rax + ret + SET_SIZE(get_xcr) + + ENTRY(set_xcr) + movq %rsi, %rdx + shrq $32, %rdx + movl %esi, %eax + movl %edi, %ecx + #xsetbv + .byte 0x0f,0x01,0xd1 + ret + SET_SIZE(set_xcr) + + ENTRY(invalidate_cache) + wbinvd + ret + SET_SIZE(invalidate_cache) + + ENTRY_NP(getcregs) +#if defined(__xpv) + /* + * Only a few of the hardware control registers or descriptor tables + * are directly accessible to us, so just zero the structure. + * + * XXPV Perhaps it would be helpful for the hypervisor to return + * virtualized versions of these for post-mortem use. + * (Need to reevaluate - perhaps it already does!) + */ + pushq %rdi /* save *crp */ + movq $CREGSZ, %rsi + call bzero + popq %rdi + + /* + * Dump what limited information we can + */ + movq %cr0, %rax + movq %rax, CREG_CR0(%rdi) /* cr0 */ + movq %cr2, %rax + movq %rax, CREG_CR2(%rdi) /* cr2 */ + movq %cr3, %rax + movq %rax, CREG_CR3(%rdi) /* cr3 */ + movq %cr4, %rax + movq %rax, CREG_CR4(%rdi) /* cr4 */ + +#else /* __xpv */ + +#define GETMSR(r, off, d) \ + movl $r, %ecx; \ + rdmsr; \ + movl %eax, off(d); \ + movl %edx, off+4(d) + + xorl %eax, %eax + movq %rax, CREG_GDT+8(%rdi) + sgdt CREG_GDT(%rdi) /* 10 bytes */ + movq %rax, CREG_IDT+8(%rdi) + sidt CREG_IDT(%rdi) /* 10 bytes */ + movq %rax, CREG_LDT(%rdi) + sldt CREG_LDT(%rdi) /* 2 bytes */ + movq %rax, CREG_TASKR(%rdi) + str CREG_TASKR(%rdi) /* 2 bytes */ + movq %cr0, %rax + movq %rax, CREG_CR0(%rdi) /* cr0 */ + movq %cr2, %rax + movq %rax, CREG_CR2(%rdi) /* cr2 */ + movq %cr3, %rax + movq %rax, CREG_CR3(%rdi) /* cr3 */ + movq %cr4, %rax + movq %rax, CREG_CR4(%rdi) /* cr4 */ + movq %cr8, %rax + movq %rax, CREG_CR8(%rdi) /* cr8 */ + GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi) + GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi) +#endif /* __xpv */ + ret + SET_SIZE(getcregs) + +#undef GETMSR + + +/* + * A panic trigger is a word which is updated atomically and can only be set + * once. We atomically store 0xDEFACEDD and load the old value. If the + * previous value was 0, we succeed and return 1; otherwise return 0. + * This allows a partially corrupt trigger to still trigger correctly. DTrace + * has its own version of this function to allow it to panic correctly from + * probe context. + */ + + ENTRY_NP(panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%rdi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + SET_SIZE(panic_trigger) + + ENTRY_NP(dtrace_panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%rdi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + SET_SIZE(dtrace_panic_trigger) + +/* + * The panic() and cmn_err() functions invoke vpanic() as a common entry point + * into the panic code implemented in panicsys(). vpanic() is responsible + * for passing through the format string and arguments, and constructing a + * regs structure on the stack into which it saves the current register + * values. If we are not dying due to a fatal trap, these registers will + * then be preserved in panicbuf as the current processor state. Before + * invoking panicsys(), vpanic() activates the first panic trigger (see + * common/os/panic.c) and switches to the panic_stack if successful. Note that + * DTrace takes a slightly different panic path if it must panic from probe + * context. Instead of calling panic, it calls into dtrace_vpanic(), which + * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and + * branches back into vpanic(). + */ + + ENTRY_NP(vpanic) /* Initial stack layout: */ + + pushq %rbp /* | %rip | 0x60 */ + movq %rsp, %rbp /* | %rbp | 0x58 */ + pushfq /* | rfl | 0x50 */ + pushq %r11 /* | %r11 | 0x48 */ + pushq %r10 /* | %r10 | 0x40 */ + pushq %rbx /* | %rbx | 0x38 */ + pushq %rax /* | %rax | 0x30 */ + pushq %r9 /* | %r9 | 0x28 */ + pushq %r8 /* | %r8 | 0x20 */ + pushq %rcx /* | %rcx | 0x18 */ + pushq %rdx /* | %rdx | 0x10 */ + pushq %rsi /* | %rsi | 0x8 alist */ + pushq %rdi /* | %rdi | 0x0 format */ + + movq %rsp, %rbx /* %rbx = current %rsp */ + + leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ + call panic_trigger /* %eax = panic_trigger() */ + +vpanic_common: + /* + * The panic_trigger result is in %eax from the call above, and + * dtrace_panic places it in %eax before branching here. + * The rdmsr instructions that follow below will clobber %eax so + * we stash the panic_trigger result in %r11d. + */ + movl %eax, %r11d + cmpl $0, %r11d + je 0f + + /* + * If panic_trigger() was successful, we are the first to initiate a + * panic: we now switch to the reserved panic_stack before continuing. + */ + leaq panic_stack(%rip), %rsp + addq $PANICSTKSIZE, %rsp +0: subq $REGSIZE, %rsp + /* + * Now that we've got everything set up, store the register values as + * they were when we entered vpanic() to the designated location in + * the regs structure we allocated on the stack. + */ + movq 0x0(%rbx), %rcx + movq %rcx, REGOFF_RDI(%rsp) + movq 0x8(%rbx), %rcx + movq %rcx, REGOFF_RSI(%rsp) + movq 0x10(%rbx), %rcx + movq %rcx, REGOFF_RDX(%rsp) + movq 0x18(%rbx), %rcx + movq %rcx, REGOFF_RCX(%rsp) + movq 0x20(%rbx), %rcx + + movq %rcx, REGOFF_R8(%rsp) + movq 0x28(%rbx), %rcx + movq %rcx, REGOFF_R9(%rsp) + movq 0x30(%rbx), %rcx + movq %rcx, REGOFF_RAX(%rsp) + movq 0x38(%rbx), %rcx + movq %rcx, REGOFF_RBX(%rsp) + movq 0x58(%rbx), %rcx + + movq %rcx, REGOFF_RBP(%rsp) + movq 0x40(%rbx), %rcx + movq %rcx, REGOFF_R10(%rsp) + movq 0x48(%rbx), %rcx + movq %rcx, REGOFF_R11(%rsp) + movq %r12, REGOFF_R12(%rsp) + + movq %r13, REGOFF_R13(%rsp) + movq %r14, REGOFF_R14(%rsp) + movq %r15, REGOFF_R15(%rsp) + + xorl %ecx, %ecx + movw %ds, %cx + movq %rcx, REGOFF_DS(%rsp) + movw %es, %cx + movq %rcx, REGOFF_ES(%rsp) + movw %fs, %cx + movq %rcx, REGOFF_FS(%rsp) + movw %gs, %cx + movq %rcx, REGOFF_GS(%rsp) + + movq $0, REGOFF_TRAPNO(%rsp) + + movq $0, REGOFF_ERR(%rsp) + leaq vpanic(%rip), %rcx + movq %rcx, REGOFF_RIP(%rsp) + movw %cs, %cx + movzwq %cx, %rcx + movq %rcx, REGOFF_CS(%rsp) + movq 0x50(%rbx), %rcx + movq %rcx, REGOFF_RFL(%rsp) + movq %rbx, %rcx + addq $0x60, %rcx + movq %rcx, REGOFF_RSP(%rsp) + movw %ss, %cx + movzwq %cx, %rcx + movq %rcx, REGOFF_SS(%rsp) + + /* + * panicsys(format, alist, rp, on_panic_stack) + */ + movq REGOFF_RDI(%rsp), %rdi /* format */ + movq REGOFF_RSI(%rsp), %rsi /* alist */ + movq %rsp, %rdx /* struct regs */ + movl %r11d, %ecx /* on_panic_stack */ + call panicsys + addq $REGSIZE, %rsp + popq %rdi + popq %rsi + popq %rdx + popq %rcx + popq %r8 + popq %r9 + popq %rax + popq %rbx + popq %r10 + popq %r11 + popfq + leave + ret + SET_SIZE(vpanic) + + ENTRY_NP(dtrace_vpanic) /* Initial stack layout: */ + + pushq %rbp /* | %rip | 0x60 */ + movq %rsp, %rbp /* | %rbp | 0x58 */ + pushfq /* | rfl | 0x50 */ + pushq %r11 /* | %r11 | 0x48 */ + pushq %r10 /* | %r10 | 0x40 */ + pushq %rbx /* | %rbx | 0x38 */ + pushq %rax /* | %rax | 0x30 */ + pushq %r9 /* | %r9 | 0x28 */ + pushq %r8 /* | %r8 | 0x20 */ + pushq %rcx /* | %rcx | 0x18 */ + pushq %rdx /* | %rdx | 0x10 */ + pushq %rsi /* | %rsi | 0x8 alist */ + pushq %rdi /* | %rdi | 0x0 format */ + + movq %rsp, %rbx /* %rbx = current %rsp */ + + leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ + call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */ + jmp vpanic_common + + SET_SIZE(dtrace_vpanic) + + DGDEF3(timedelta, 8, 8) + .long 0, 0 + + /* + * initialized to a non zero value to make pc_gethrtime() + * work correctly even before clock is initialized + */ + DGDEF3(hrtime_base, 8, 8) + .long _MUL(NSEC_PER_CLOCK_TICK, 6), 0 + + DGDEF3(adj_shift, 4, 4) + .long ADJ_SHIFT + + ENTRY_NP(hres_tick) + pushq %rbp + movq %rsp, %rbp + + /* + * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously, + * hres_last_tick can only be modified while holding CLOCK_LOCK). + * At worst, performing this now instead of under CLOCK_LOCK may + * introduce some jitter in pc_gethrestime(). + */ + movq gethrtimef(%rip), %rsi + INDIRECT_CALL_REG(rsi) + movq %rax, %r8 + + leaq hres_lock(%rip), %rax + movb $-1, %dl +.CL1: + xchgb %dl, (%rax) + testb %dl, %dl + jz .CL3 /* got it */ +.CL2: + cmpb $0, (%rax) /* possible to get lock? */ + pause + jne .CL2 + jmp .CL1 /* yes, try again */ +.CL3: + /* + * compute the interval since last time hres_tick was called + * and adjust hrtime_base and hrestime accordingly + * hrtime_base is an 8 byte value (in nsec), hrestime is + * a timestruc_t (sec, nsec) + */ + leaq hres_last_tick(%rip), %rax + movq %r8, %r11 + subq (%rax), %r8 + addq %r8, hrtime_base(%rip) /* add interval to hrtime_base */ + addq %r8, hrestime+8(%rip) /* add interval to hrestime.tv_nsec */ + /* + * Now that we have CLOCK_LOCK, we can update hres_last_tick + */ + movq %r11, (%rax) + + call __adj_hrestime + + /* + * release the hres_lock + */ + incl hres_lock(%rip) + leave + ret + SET_SIZE(hres_tick) + +/* + * void prefetch_smap_w(void *) + * + * Prefetch ahead within a linear list of smap structures. + * Not implemented for ia32. Stub for compatibility. + */ + + ENTRY(prefetch_smap_w) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(prefetch_smap_w) + +/* + * prefetch_page_r(page_t *) + * issue prefetch instructions for a page_t + */ + + ENTRY(prefetch_page_r) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(prefetch_page_r) + + ENTRY(bcmp) + pushq %rbp + movq %rsp, %rbp +#ifdef DEBUG + testq %rdx,%rdx + je 1f + movq postbootkernelbase(%rip), %r11 + cmpq %r11, %rdi + jb 0f + cmpq %r11, %rsi + jnb 1f +0: leaq .bcmp_panic_msg(%rip), %rdi + xorl %eax, %eax + call panic +1: +#endif /* DEBUG */ + call memcmp + testl %eax, %eax + setne %dl + leave + movzbl %dl, %eax + ret + SET_SIZE(bcmp) + +#ifdef DEBUG + .text +.bcmp_panic_msg: + .string "bcmp: arguments below kernelbase" +#endif /* DEBUG */ + + ENTRY_NP(bsrw_insn) + xorl %eax, %eax + bsrw %di, %ax + ret + SET_SIZE(bsrw_insn) + + ENTRY_NP(switch_sp_and_call) + pushq %rbp + movq %rsp, %rbp /* set up stack frame */ + movq %rdi, %rsp /* switch stack pointer */ + movq %rdx, %rdi /* pass func arg 1 */ + movq %rsi, %r11 /* save function to call */ + movq %rcx, %rsi /* pass func arg 2 */ + INDIRECT_CALL_REG(r11) /* call function */ + leave /* restore stack */ + ret + SET_SIZE(switch_sp_and_call) + + ENTRY_NP(kmdb_enter) + pushq %rbp + movq %rsp, %rbp + + /* + * Save flags, do a 'cli' then return the saved flags + */ + call intr_clear + + int $T_DBGENTR + + /* + * Restore the saved flags + */ + movq %rax, %rdi + call intr_restore + + leave + ret + SET_SIZE(kmdb_enter) + + ENTRY_NP(return_instr) + rep; ret /* use 2 byte instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + SET_SIZE(return_instr) + + ENTRY(getflags) + pushfq + popq %rax +#if defined(__xpv) + CURTHREAD(%rdi) + KPREEMPT_DISABLE(%rdi) + /* + * Synthesize the PS_IE bit from the event mask bit + */ + CURVCPU(%r11) + andq $_BITNOT(PS_IE), %rax + XEN_TEST_UPCALL_MASK(%r11) + jnz 1f + orq $PS_IE, %rax +1: + KPREEMPT_ENABLE_NOKP(%rdi) +#endif + ret + SET_SIZE(getflags) + + ENTRY(ftrace_interrupt_disable) + pushfq + popq %rax + CLI(%rdx) + ret + SET_SIZE(ftrace_interrupt_disable) + + ENTRY(ftrace_interrupt_enable) + pushq %rdi + popfq + ret + SET_SIZE(ftrace_interrupt_enable) + + ENTRY(clflush_insn) + clflush (%rdi) + ret + SET_SIZE(clflush_insn) + + ENTRY(mfence_insn) + mfence + ret + SET_SIZE(mfence_insn) + +/* + * VMware implements an I/O port that programs can query to detect if software + * is running in a VMware hypervisor. This hypervisor port behaves differently + * depending on magic values in certain registers and modifies some registers + * as a side effect. + * + * References: http://kb.vmware.com/kb/1009458 + */ + + ENTRY(vmware_port) + pushq %rbx + movl $VMWARE_HVMAGIC, %eax + movl $0xffffffff, %ebx + movl %edi, %ecx + movl $VMWARE_HVPORT, %edx + inl (%dx) + movl %eax, (%rsi) + movl %ebx, 4(%rsi) + movl %ecx, 8(%rsi) + movl %edx, 12(%rsi) + popq %rbx + ret + SET_SIZE(vmware_port) diff --git a/usr/src/uts/intel/ml/lock_prim.s b/usr/src/uts/intel/ml/lock_prim.s new file mode 100644 index 0000000000..4267561bf7 --- /dev/null +++ b/usr/src/uts/intel/ml/lock_prim.s @@ -0,0 +1,714 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include "assym.h" + +#include <sys/mutex_impl.h> +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/regset.h> +#include <sys/rwlock_impl.h> +#include <sys/lockstat.h> + +/* + * lock_try(lp), ulock_try(lp) + * - returns non-zero on success. + * - doesn't block interrupts so don't use this to spin on a lock. + * + * ulock_try() is for a lock in the user address space. + */ + + .globl kernelbase + + ENTRY(lock_try) + movb $-1, %dl + movzbq %dl, %rax + xchgb %dl, (%rdi) + xorb %dl, %al +.lock_try_lockstat_patch_point: + ret + testb %al, %al + jnz 0f + ret +0: + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ + movq %rdi, %rsi /* rsi = lock addr */ + movl $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */ + jmp lockstat_wrapper + SET_SIZE(lock_try) + + ENTRY(lock_spin_try) + movb $-1, %dl + movzbq %dl, %rax + xchgb %dl, (%rdi) + xorb %dl, %al + ret + SET_SIZE(lock_spin_try) + + ENTRY(ulock_try) +#ifdef DEBUG + movq kernelbase(%rip), %rax + cmpq %rax, %rdi /* test uaddr < kernelbase */ + jb ulock_pass /* uaddr < kernelbase, proceed */ + + movq %rdi, %r12 /* preserve lock ptr for debugging */ + leaq .ulock_panic_msg(%rip), %rdi + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + xorl %eax, %eax /* clear for varargs */ + call panic + +#endif /* DEBUG */ + +ulock_pass: + movl $1, %eax + xchgb %al, (%rdi) + xorb $1, %al + ret + SET_SIZE(ulock_try) + +#ifdef DEBUG + .data +.ulock_panic_msg: + .string "ulock_try: Argument is above kernelbase" + .text +#endif /* DEBUG */ + +/* + * lock_clear(lp) + * - unlock lock without changing interrupt priority level. + */ + + ENTRY(lock_clear) + movb $0, (%rdi) +.lock_clear_lockstat_patch_point: + ret + movq %rdi, %rsi /* rsi = lock addr */ + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ + movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */ + jmp lockstat_wrapper + SET_SIZE(lock_clear) + + ENTRY(ulock_clear) +#ifdef DEBUG + movq kernelbase(%rip), %rcx + cmpq %rcx, %rdi /* test uaddr < kernelbase */ + jb ulock_clr /* uaddr < kernelbase, proceed */ + + leaq .ulock_clear_msg(%rip), %rdi + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + xorl %eax, %eax /* clear for varargs */ + call panic +#endif + +ulock_clr: + movb $0, (%rdi) + ret + SET_SIZE(ulock_clear) + +#ifdef DEBUG + .data +.ulock_clear_msg: + .string "ulock_clear: Argument is above kernelbase" + .text +#endif /* DEBUG */ + + +/* + * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil) + * Drops lp, sets pil to new_pil, stores old pil in *old_pil. + */ + + ENTRY(lock_set_spl) + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + movl %esi, 8(%rsp) /* save priority level */ + movq %rdx, 16(%rsp) /* save old pil ptr */ + movq %rdi, 24(%rsp) /* save lock pointer */ + movl %esi, %edi /* pass priority level */ + call splr /* raise priority level */ + movq 24(%rsp), %rdi /* rdi = lock addr */ + movb $-1, %dl + xchgb %dl, (%rdi) /* try to set lock */ + testb %dl, %dl /* did we get the lock? ... */ + jnz .lss_miss /* ... no, go to C for the hard case */ + movq 16(%rsp), %rdx /* rdx = old pil addr */ + movw %ax, (%rdx) /* store old pil */ + leave +.lock_set_spl_lockstat_patch_point: + ret + movq %rdi, %rsi /* rsi = lock addr */ + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ + movl $LS_LOCK_SET_SPL_ACQUIRE, %edi + jmp lockstat_wrapper +.lss_miss: + movl 8(%rsp), %esi /* new_pil */ + movq 16(%rsp), %rdx /* old_pil_addr */ + movl %eax, %ecx /* original pil */ + leave /* unwind stack */ + jmp lock_set_spl_spin + SET_SIZE(lock_set_spl) + +/* + * void + * lock_init(lp) + */ + + ENTRY(lock_init) + movb $0, (%rdi) + ret + SET_SIZE(lock_init) + +/* + * void + * lock_set(lp) + */ + + ENTRY(lock_set) + movb $-1, %dl + xchgb %dl, (%rdi) /* try to set lock */ + testb %dl, %dl /* did we get it? */ + jnz lock_set_spin /* no, go to C for the hard case */ +.lock_set_lockstat_patch_point: + ret + movq %rdi, %rsi /* rsi = lock addr */ + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ + movl $LS_LOCK_SET_ACQUIRE, %edi + jmp lockstat_wrapper + SET_SIZE(lock_set) + +/* + * lock_clear_splx(lp, s) + */ + + ENTRY(lock_clear_splx) + movb $0, (%rdi) /* clear lock */ +.lock_clear_splx_lockstat_patch_point: + jmp 0f +0: + movl %esi, %edi /* arg for splx */ + jmp splx /* let splx do its thing */ +.lock_clear_splx_lockstat: + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + subq $16, %rsp /* space to save args across splx */ + movq %rdi, 8(%rsp) /* save lock ptr across splx call */ + movl %esi, %edi /* arg for splx */ + call splx /* lower the priority */ + movq 8(%rsp), %rsi /* rsi = lock ptr */ + leave /* unwind stack */ + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ + movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi + jmp lockstat_wrapper + SET_SIZE(lock_clear_splx) + +#if defined(__GNUC_AS__) +#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ + (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2) + +#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ + (.lock_clear_splx_lockstat_patch_point + 1) +#else +#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ + [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2] + +#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ + [.lock_clear_splx_lockstat_patch_point + 1] +#endif + +/* + * mutex_enter() and mutex_exit(). + * + * These routines handle the simple cases of mutex_enter() (adaptive + * lock, not held) and mutex_exit() (adaptive lock, held, no waiters). + * If anything complicated is going on we punt to mutex_vector_enter(). + * + * mutex_tryenter() is similar to mutex_enter() but returns zero if + * the lock cannot be acquired, nonzero on success. + * + * If mutex_exit() gets preempted in the window between checking waiters + * and clearing the lock, we can miss wakeups. Disabling preemption + * in the mutex code is prohibitively expensive, so instead we detect + * mutex preemption by examining the trapped PC in the interrupt path. + * If we interrupt a thread in mutex_exit() that has not yet cleared + * the lock, cmnint() resets its PC back to the beginning of + * mutex_exit() so it will check again for waiters when it resumes. + * + * The lockstat code below is activated when the lockstat driver + * calls lockstat_hot_patch() to hot-patch the kernel mutex code. + * Note that we don't need to test lockstat_event_mask here -- we won't + * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats. + */ + + ENTRY_NP(mutex_enter) + movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ + xorl %eax, %eax /* rax = 0 (unheld adaptive) */ + lock + cmpxchgq %rdx, (%rdi) + jnz mutex_vector_enter +.mutex_enter_lockstat_patch_point: +#if defined(OPTERON_WORKAROUND_6323525) +.mutex_enter_6323525_patch_point: + ret /* nop space for lfence */ + nop + nop +.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */ + nop +#else /* OPTERON_WORKAROUND_6323525 */ + ret +#endif /* OPTERON_WORKAROUND_6323525 */ + movq %rdi, %rsi + movl $LS_MUTEX_ENTER_ACQUIRE, %edi +/* + * expects %rdx=thread, %rsi=lock, %edi=lockstat event + */ + ALTENTRY(lockstat_wrapper) + incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */ + leaq lockstat_probemap(%rip), %rax + movl (%rax, %rdi, DTRACE_IDSIZE), %eax + testl %eax, %eax /* check for non-zero probe */ + jz 1f + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + movl %eax, %edi + movq lockstat_probe, %rax + INDIRECT_CALL_REG(rax) + leave /* unwind stack */ +1: + movq %gs:CPU_THREAD, %rdx /* reload thread ptr */ + decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */ + movl $1, %eax /* return success if tryenter */ + ret + SET_SIZE(lockstat_wrapper) + SET_SIZE(mutex_enter) + +/* + * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event + */ + ENTRY(lockstat_wrapper_arg) + incb T_LOCKSTAT(%rcx) /* curthread->t_lockstat++ */ + leaq lockstat_probemap(%rip), %rax + movl (%rax, %rdi, DTRACE_IDSIZE), %eax + testl %eax, %eax /* check for non-zero probe */ + jz 1f + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + movl %eax, %edi + movq lockstat_probe, %rax + INDIRECT_CALL_REG(rax) + leave /* unwind stack */ +1: + movq %gs:CPU_THREAD, %rdx /* reload thread ptr */ + decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */ + movl $1, %eax /* return success if tryenter */ + ret + SET_SIZE(lockstat_wrapper_arg) + + + ENTRY(mutex_tryenter) + movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ + xorl %eax, %eax /* rax = 0 (unheld adaptive) */ + lock + cmpxchgq %rdx, (%rdi) + jnz mutex_vector_tryenter + not %eax /* return success (nonzero) */ +#if defined(OPTERON_WORKAROUND_6323525) +.mutex_tryenter_lockstat_patch_point: +.mutex_tryenter_6323525_patch_point: + ret /* nop space for lfence */ + nop + nop +.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */ + nop +#else /* OPTERON_WORKAROUND_6323525 */ +.mutex_tryenter_lockstat_patch_point: + ret +#endif /* OPTERON_WORKAROUND_6323525 */ + movq %rdi, %rsi + movl $LS_MUTEX_ENTER_ACQUIRE, %edi + jmp lockstat_wrapper + SET_SIZE(mutex_tryenter) + + ENTRY(mutex_adaptive_tryenter) + movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ + xorl %eax, %eax /* rax = 0 (unheld adaptive) */ + lock + cmpxchgq %rdx, (%rdi) + jnz 0f + not %eax /* return success (nonzero) */ +#if defined(OPTERON_WORKAROUND_6323525) +.mutex_atryenter_6323525_patch_point: + ret /* nop space for lfence */ + nop + nop + nop +#else /* OPTERON_WORKAROUND_6323525 */ + ret +#endif /* OPTERON_WORKAROUND_6323525 */ +0: + xorl %eax, %eax /* return failure */ + ret + SET_SIZE(mutex_adaptive_tryenter) + + .globl mutex_owner_running_critical_start + + ENTRY(mutex_owner_running) +mutex_owner_running_critical_start: + movq (%rdi), %r11 /* get owner field */ + andq $MUTEX_THREAD, %r11 /* remove waiters bit */ + cmpq $0, %r11 /* if free, skip */ + je 1f /* go return 0 */ + movq T_CPU(%r11), %r8 /* get owner->t_cpu */ + movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */ +.mutex_owner_running_critical_end: + cmpq %r11, %r9 /* owner == running thread? */ + je 2f /* yes, go return cpu */ +1: + xorq %rax, %rax /* return 0 */ + ret +2: + movq %r8, %rax /* return cpu */ + ret + SET_SIZE(mutex_owner_running) + + .globl mutex_owner_running_critical_size + .type mutex_owner_running_critical_size, @object + .align CPTRSIZE +mutex_owner_running_critical_size: + .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start + SET_SIZE(mutex_owner_running_critical_size) + + .globl mutex_exit_critical_start + + ENTRY(mutex_exit) +mutex_exit_critical_start: /* If interrupted, restart here */ + movq %gs:CPU_THREAD, %rdx + cmpq %rdx, (%rdi) + jne mutex_vector_exit /* wrong type or wrong owner */ + movq $0, (%rdi) /* clear owner AND lock */ +.mutex_exit_critical_end: +.mutex_exit_lockstat_patch_point: + ret + movq %rdi, %rsi + movl $LS_MUTEX_EXIT_RELEASE, %edi + jmp lockstat_wrapper + SET_SIZE(mutex_exit) + + .globl mutex_exit_critical_size + .type mutex_exit_critical_size, @object + .align CPTRSIZE +mutex_exit_critical_size: + .quad .mutex_exit_critical_end - mutex_exit_critical_start + SET_SIZE(mutex_exit_critical_size) + +/* + * rw_enter() and rw_exit(). + * + * These routines handle the simple cases of rw_enter (write-locking an unheld + * lock or read-locking a lock that's neither write-locked nor write-wanted) + * and rw_exit (no waiters or not the last reader). If anything complicated + * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively. + */ + + ENTRY(rw_enter) + cmpl $RW_WRITER, %esi + je .rw_write_enter + movq (%rdi), %rax /* rax = old rw_wwwh value */ + testl $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax + jnz rw_enter_sleep + leaq RW_READ_LOCK(%rax), %rdx /* rdx = new rw_wwwh value */ + lock + cmpxchgq %rdx, (%rdi) /* try to grab read lock */ + jnz rw_enter_sleep +.rw_read_enter_lockstat_patch_point: + ret + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ + movq %rdi, %rsi /* rsi = lock ptr */ + movl $LS_RW_ENTER_ACQUIRE, %edi + movl $RW_READER, %edx + jmp lockstat_wrapper_arg +.rw_write_enter: + movq %gs:CPU_THREAD, %rdx + orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */ + xorl %eax, %eax /* rax = unheld value */ + lock + cmpxchgq %rdx, (%rdi) /* try to grab write lock */ + jnz rw_enter_sleep + +#if defined(OPTERON_WORKAROUND_6323525) +.rw_write_enter_lockstat_patch_point: +.rw_write_enter_6323525_patch_point: + ret + nop + nop +.rw_write_enter_lockstat_6323525_patch_point: + nop +#else /* OPTERON_WORKAROUND_6323525 */ +.rw_write_enter_lockstat_patch_point: + ret +#endif /* OPTERON_WORKAROUND_6323525 */ + + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ + movq %rdi, %rsi /* rsi = lock ptr */ + movl $LS_RW_ENTER_ACQUIRE, %edi + movl $RW_WRITER, %edx + jmp lockstat_wrapper_arg + SET_SIZE(rw_enter) + + ENTRY(rw_exit) + movq (%rdi), %rax /* rax = old rw_wwwh value */ + cmpl $RW_READ_LOCK, %eax /* single-reader, no waiters? */ + jne .rw_not_single_reader + xorl %edx, %edx /* rdx = new value (unheld) */ +.rw_read_exit: + lock + cmpxchgq %rdx, (%rdi) /* try to drop read lock */ + jnz rw_exit_wakeup +.rw_read_exit_lockstat_patch_point: + ret + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ + movq %rdi, %rsi /* rsi = lock ptr */ + movl $LS_RW_EXIT_RELEASE, %edi + movl $RW_READER, %edx + jmp lockstat_wrapper_arg +.rw_not_single_reader: + testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */ + jnz .rw_write_exit + leaq -RW_READ_LOCK(%rax), %rdx /* rdx = new value */ + cmpl $RW_READ_LOCK, %edx + jge .rw_read_exit /* not last reader, safe to drop */ + jmp rw_exit_wakeup /* last reader with waiters */ +.rw_write_exit: + movq %gs:CPU_THREAD, %rax /* rax = thread ptr */ + xorl %edx, %edx /* rdx = new value (unheld) */ + orq $RW_WRITE_LOCKED, %rax /* eax = write-locked value */ + lock + cmpxchgq %rdx, (%rdi) /* try to drop read lock */ + jnz rw_exit_wakeup +.rw_write_exit_lockstat_patch_point: + ret + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ + movq %rdi, %rsi /* rsi - lock ptr */ + movl $LS_RW_EXIT_RELEASE, %edi + movl $RW_WRITER, %edx + jmp lockstat_wrapper_arg + SET_SIZE(rw_exit) + +#if defined(OPTERON_WORKAROUND_6323525) + +/* + * If it is necessary to patch the lock enter routines with the lfence + * workaround, workaround_6323525_patched is set to a non-zero value so that + * the lockstat_hat_patch routine can patch to the new location of the 'ret' + * instruction. + */ + DGDEF3(workaround_6323525_patched, 4, 4) + .long 0 + +#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \ + movq $size, %rbx; \ + movq $dstaddr, %r13; \ + addq %rbx, %r13; \ + movq $srcaddr, %r12; \ + addq %rbx, %r12; \ +0: \ + decq %r13; \ + decq %r12; \ + movzbl (%r12), %esi; \ + movq $1, %rdx; \ + movq %r13, %rdi; \ + call hot_patch_kernel_text; \ + decq %rbx; \ + testq %rbx, %rbx; \ + jg 0b; + +/* + * patch_workaround_6323525: provide workaround for 6323525 + * + * The workaround is to place a fencing instruction (lfence) between the + * mutex operation and the subsequent read-modify-write instruction. + * + * This routine hot patches the lfence instruction on top of the space + * reserved by nops in the lock enter routines. + */ + ENTRY_NP(patch_workaround_6323525) + pushq %rbp + movq %rsp, %rbp + pushq %r12 + pushq %r13 + pushq %rbx + + /* + * lockstat_hot_patch() to use the alternate lockstat workaround + * 6323525 patch points (points past the lfence instruction to the + * new ret) when workaround_6323525_patched is set. + */ + movl $1, workaround_6323525_patched + + /* + * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter + * routines. The 4 bytes are patched in reverse order so that the + * the existing ret is overwritten last. This provides lock enter + * sanity during the intermediate patching stages. + */ + HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4) + HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4) + HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4) + HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4) + + popq %rbx + popq %r13 + popq %r12 + movq %rbp, %rsp + popq %rbp + ret +_lfence_insn: + lfence + ret + SET_SIZE(patch_workaround_6323525) + + +#endif /* OPTERON_WORKAROUND_6323525 */ + + +#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \ + movq $normal_instr, %rsi; \ + movq $active_instr, %rdi; \ + leaq lockstat_probemap(%rip), %rax; \ + movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \ + testl %eax, %eax; \ + jz 9f; \ + movq %rdi, %rsi; \ +9: \ + movq $len, %rdx; \ + movq $addr, %rdi; \ + call hot_patch_kernel_text + + ENTRY(lockstat_hot_patch) + pushq %rbp /* align stack properly */ + movq %rsp, %rbp + +#if defined(OPTERON_WORKAROUND_6323525) + cmpl $0, workaround_6323525_patched + je 1f + HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point, + LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + jmp 2f +1: + HOT_PATCH(.mutex_enter_lockstat_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.mutex_tryenter_lockstat_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_write_enter_lockstat_patch_point, + LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) +2: +#else /* OPTERON_WORKAROUND_6323525 */ + HOT_PATCH(.mutex_enter_lockstat_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.mutex_tryenter_lockstat_patch_point, + LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_write_enter_lockstat_patch_point, + LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) +#endif /* !OPTERON_WORKAROUND_6323525 */ + HOT_PATCH(.mutex_exit_lockstat_patch_point, + LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_read_enter_lockstat_patch_point, + LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_write_exit_lockstat_patch_point, + LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.rw_read_exit_lockstat_patch_point, + LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.lock_set_lockstat_patch_point, + LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.lock_try_lockstat_patch_point, + LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.lock_clear_lockstat_patch_point, + LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1) + HOT_PATCH(.lock_set_spl_lockstat_patch_point, + LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1) + + HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT, + LS_LOCK_CLEAR_SPLX_RELEASE, + LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1); + leave /* unwind stack */ + ret + SET_SIZE(lockstat_hot_patch) + + ENTRY(membar_enter) + ALTENTRY(membar_exit) + ALTENTRY(membar_sync) + mfence /* lighter weight than lock; xorq $0,(%rsp) */ + ret + SET_SIZE(membar_sync) + SET_SIZE(membar_exit) + SET_SIZE(membar_enter) + + ENTRY(membar_producer) + sfence + ret + SET_SIZE(membar_producer) + + ENTRY(membar_consumer) + lfence + ret + SET_SIZE(membar_consumer) + +/* + * thread_onproc() + * Set thread in onproc state for the specified CPU. + * Also set the thread lock pointer to the CPU's onproc lock. + * Since the new lock isn't held, the store ordering is important. + * If not done in assembler, the compiler could reorder the stores. + */ + + ENTRY(thread_onproc) + addq $CPU_THREAD_LOCK, %rsi /* pointer to disp_lock while running */ + movl $ONPROC_THREAD, T_STATE(%rdi) /* set state to TS_ONPROC */ + movq %rsi, T_LOCKP(%rdi) /* store new lock pointer */ + ret + SET_SIZE(thread_onproc) + +/* + * mutex_delay_default(void) + * Spins for approx a few hundred processor cycles and returns to caller. + */ + + ENTRY(mutex_delay_default) + movq $92,%r11 +0: decq %r11 + jg 0b + ret + SET_SIZE(mutex_delay_default) + diff --git a/usr/src/uts/intel/ml/modstubs.s b/usr/src/uts/intel/ml/modstubs.s new file mode 100644 index 0000000000..4143c181a3 --- /dev/null +++ b/usr/src/uts/intel/ml/modstubs.s @@ -0,0 +1,1320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2019 Joyent, Inc. + * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. + */ + +#include <sys/asm_linkage.h> + +#include "assym.h" + +/* + * !!!!!!!! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! !!!!!!!! + * + * For functions which are either STUBs or WSTUBs the actual function + * need to be called using 'call' instruction because of preamble and + * postamble (i.e mod_hold_stub and mod_release_stub) around the + * function call. Due to this we need to copy arguments for the + * real function. On Intel we can't tell how many arguments are there + * on the stack so we have to either copy everything between esp and + * ebp or copy only a fixed number (MAXNARG - defined here) for + * all the stub functions. Currently we are using MAXNARG (it is a kludge + * but worth it?!). + * + * NOTE: Use NO_UNLOAD_STUBs if the module is NOT unloadable once it is + * loaded. + */ +#define MAXNARG 10 + +/* + * WARNING: there is no check for forgetting to write END_MODULE, + * and if you do, the kernel will most likely crash. Be careful + * + * This file assumes that all of the contributions to the data segment + * will be contiguous in the output file, even though they are separated + * by pieces of text. This is safe for all assemblers I know of now... + */ + +/* + * This file uses ansi preprocessor features: + * + * 1. #define mac(a) extra_ ## a --> mac(x) expands to extra_a + * The old version of this is + * #define mac(a) extra_/.*.*./a + * but this fails if the argument has spaces "mac ( x )" + * (Ignore the dots above, I had to put them in to keep this a comment.) + * + * 2. #define mac(a) #a --> mac(x) expands to "x" + * The old version is + * #define mac(a) "a" + * + * For some reason, the 5.0 preprocessor isn't happy with the above usage. + * For now, we're not using these ansi features. + * + * The reason is that "the 5.0 ANSI preprocessor" is built into the compiler + * and is a tokenizing preprocessor. This means, when confronted by something + * other than C token generation rules, strange things occur. In this case, + * when confronted by an assembly file, it would turn the token ".globl" into + * two tokens "." and "globl". For this reason, the traditional, non-ANSI + * preprocessor is used on assembly files. + * + * It would be desirable to have a non-tokenizing cpp (accp?) to use for this. + */ + +/* + * This file contains the stubs routines for modules which can be autoloaded. + */ + +/* + * See the 'struct mod_modinfo' definition to see what this declaration + * is trying to achieve here. + */ +#define MODULE(module,namespace) \ + .data; \ +module/**/_modname: \ + .string "namespace/module"; \ + SET_SIZE(module/**/_modname); \ + .align CPTRSIZE; \ + .globl module/**/_modinfo; \ + .type module/**/_modinfo, @object; \ +module/**/_modinfo: \ + .quad module/**/_modname; \ + .quad 0 /* storage for modctl pointer */ + + /* then mod_stub_info structures follow until a mods_func_adr is 0 */ + +/* this puts a 0 where the next mods_func_adr would be */ +#define END_MODULE(module) \ + .data; \ + .align CPTRSIZE; \ + .quad 0; \ + SET_SIZE(module/**/_modinfo) + +/* + * The data section in the stub_common macro is the + * mod_stub_info structure for the stub function + */ + +#define STUB_COMMON(module, fcnname, install_fcn, retfcn, weak) \ + ENTRY(fcnname); \ + leaq fcnname/**/_info(%rip), %rax; \ + cmpl $0, MODS_FLAG(%rax); /* weak? */ \ + je stubs_common_code; /* not weak */ \ + testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \ + jne stubs_common_code; /* yes, do the mod_hold */ \ + movq MODS_RETFCN(%rax), %rax; /* no, load retfcn */ \ + INDIRECT_JMP_REG(rax); /* no, jump to retfcn */ \ + SET_SIZE(fcnname); \ + .data; \ + .align CPTRSIZE; \ + .type fcnname/**/_info, @object; \ +fcnname/**/_info: \ + .quad install_fcn; /* 0 */ \ + .quad module/**/_modinfo; /* 0x8 */ \ + .quad fcnname; /* 0x10 */ \ + .quad retfcn; /* 0x18 */ \ + .long weak; /* 0x20 */ \ + SET_SIZE(fcnname/**/_info) + +#define STUB_NO_UNLOADABLE(module, fcnname, install_fcn, retfcn, weak) \ + ENTRY(fcnname); \ + leaq fcnname/**/_info(%rip), %rax; \ + testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \ + je 5f; /* no */ \ + movq MODS_INSTFCN(%rax), %rax; /* yes, load install_fcn */ \ + INDIRECT_JMP_REG(rax); /* yes, jump to install_fcn */ \ +5: testb $MODS_WEAK, MODS_FLAG(%rax); /* weak? */ \ + je stubs_common_code; /* no, do mod load */ \ + movq MODS_RETFCN(%rax), %rax; /* yes, load retfcn */ \ + INDIRECT_JMP_REG(rax); /* yes, jump to retfcn */ \ + SET_SIZE(fcnname); \ + .data; \ + .align CPTRSIZE; \ + .type fcnname/**/_info, @object; \ +fcnname/**/_info: \ + .quad install_fcn; /* 0 */ \ + .quad module/**/_modinfo; /* 0x8 */ \ + .quad fcnname; /* 0x10 */ \ + .quad retfcn; /* 0x18 */ \ + .long weak; /* 0x20 */ \ + SET_SIZE(fcnname/**/_info) + +/* + * We branch here with the fcnname_info pointer in %rax + */ + ENTRY_NP(stubs_common_code) + .globl mod_hold_stub + .globl mod_release_stub + pushq %rbp + movq %rsp, %rbp + subq $0x10, %rsp + movq %r15, (%rsp) /* (caller saved) */ + movq %rax, %r15 /* stash the fcnname_info pointer */ + /* + * save incoming register arguments + */ + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + pushq %r9 + /* (next 4 args, if any, are already on the stack above %rbp) */ + movq %r15, %rdi + call mod_hold_stub /* mod_hold_stub(mod_stub_info *) */ + cmpl $-1, %eax /* error? */ + jne .L1 + movq 0x18(%r15), %rax + INDIRECT_CALL_REG(rax) + addq $0x30, %rsp + jmp .L2 +.L1: + /* + * copy MAXNARG == 10 incoming arguments + */ + popq %r9 + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + /* + * stack: + * arg9 0x38(%rsp) + * arg8 0x30(%rsp) + * arg7 0x28(%rsp) + * arg6 0x20(%rsp) + * saved %rip 0x18(%rsp) + * saved %rbp 0x10(%rsp) + * <pad> 0x8(%rsp) + * saved %r15 0x0(%rsp) + */ + movl $MAXNARG - 6 + 3, %r11d + pushq (%rsp, %r11, 8) + pushq (%rsp, %r11, 8) + pushq (%rsp, %r11, 8) + pushq (%rsp, %r11, 8) + movq (%r15), %rax + INDIRECT_CALL_REG(rax) /* call the stub fn(arg, ..) */ + addq $0x20, %rsp /* pop off last 4 args */ + pushq %rax /* save any return values */ + pushq %rdx + movq %r15, %rdi + call mod_release_stub /* release hold on module */ + popq %rdx /* restore return values */ + popq %rax +.L2: + popq %r15 + leave + ret + SET_SIZE(stubs_common_code) + +#define STUB(module, fcnname, retfcn) \ + STUB_COMMON(module, fcnname, mod_hold_stub, retfcn, 0) + +/* + * "weak stub", don't load on account of this call + */ +#define WSTUB(module, fcnname, retfcn) \ + STUB_COMMON(module, fcnname, retfcn, retfcn, MODS_WEAK) + +/* + * "non-unloadable stub", don't bother 'holding' module if it's already loaded + * since the module cannot be unloaded. + * + * User *MUST* guarantee the module is not unloadable (no _fini routine). + */ +#define NO_UNLOAD_STUB(module, fcnname, retfcn) \ + STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD) + +/* + * "weak stub" for non-unloadable module, don't load on account of this call + */ +#define NO_UNLOAD_WSTUB(module, fcnname, retfcn) \ + STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD|MODS_WEAK) + +/* + * this is just a marker for the beginning area of text that contains stubs + */ + ENTRY_NP(stubs_base) + nop + +/* + * WARNING WARNING WARNING!!!!!! + * + * On the MODULE macro you MUST NOT use any spaces!!! They are + * significant to the preprocessor. With ansi c there is a way around this + * but for some reason (yet to be investigated) ansi didn't work for other + * reasons! + * + * When zero is used as the return function, the system will call + * panic if the stub can't be resolved. + */ + +/* + * Stubs for devfs. A non-unloadable module. + */ + +#ifndef DEVFS_MODULE + MODULE(devfs,fs); + NO_UNLOAD_STUB(devfs, devfs_clean, nomod_minus_one); + NO_UNLOAD_STUB(devfs, devfs_lookupname, nomod_minus_one); + NO_UNLOAD_STUB(devfs, devfs_walk, nomod_minus_one); + NO_UNLOAD_STUB(devfs, devfs_devpolicy, nomod_minus_one); + NO_UNLOAD_STUB(devfs, devfs_reset_perm, nomod_minus_one); + NO_UNLOAD_STUB(devfs, devfs_remdrv_cleanup, nomod_minus_one); + END_MODULE(devfs); +#endif + +#ifndef DEV_MODULE + MODULE(dev,fs); + NO_UNLOAD_STUB(dev, sdev_modctl_readdir, nomod_minus_one); + NO_UNLOAD_STUB(dev, sdev_modctl_readdir_free, nomod_minus_one); + NO_UNLOAD_STUB(dev, devname_filename_register, nomod_minus_one); + NO_UNLOAD_STUB(dev, sdev_modctl_devexists, nomod_minus_one); + NO_UNLOAD_STUB(dev, devname_profile_update, nomod_minus_one); + NO_UNLOAD_STUB(dev, sdev_devstate_change, nomod_minus_one); + NO_UNLOAD_STUB(dev, devvt_getvnodeops, nomod_minus_one); + NO_UNLOAD_STUB(dev, devpts_getvnodeops, nomod_zero); + END_MODULE(dev); +#endif + +/* + * Stubs for specfs. A non-unloadable module. + */ + +#ifndef SPEC_MODULE + MODULE(specfs,fs); + NO_UNLOAD_STUB(specfs, common_specvp, nomod_zero); + NO_UNLOAD_STUB(specfs, makectty, nomod_zero); + NO_UNLOAD_STUB(specfs, makespecvp, nomod_zero); + NO_UNLOAD_STUB(specfs, smark, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_segmap, nomod_einval); + NO_UNLOAD_STUB(specfs, specfind, nomod_zero); + NO_UNLOAD_STUB(specfs, specvp, nomod_zero); + NO_UNLOAD_STUB(specfs, devi_stillreferenced, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_getvnodeops, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_char_map, nomod_zero); + NO_UNLOAD_STUB(specfs, specvp_devfs, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_assoc_vp_with_devi, nomod_void); + NO_UNLOAD_STUB(specfs, spec_hold_devi_by_vp, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_snode_walk, nomod_void); + NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one); + NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one); + NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one); + END_MODULE(specfs); +#endif + + +/* + * Stubs for sockfs. A non-unloadable module. + */ +#ifndef SOCK_MODULE + MODULE(sockfs,fs); + NO_UNLOAD_STUB(sockfs, so_socket, nomod_zero); + NO_UNLOAD_STUB(sockfs, so_socketpair, nomod_zero); + NO_UNLOAD_STUB(sockfs, bind, nomod_zero); + NO_UNLOAD_STUB(sockfs, listen, nomod_zero); + NO_UNLOAD_STUB(sockfs, accept, nomod_zero); + NO_UNLOAD_STUB(sockfs, connect, nomod_zero); + NO_UNLOAD_STUB(sockfs, shutdown, nomod_zero); + NO_UNLOAD_STUB(sockfs, recv, nomod_zero); + NO_UNLOAD_STUB(sockfs, recvfrom, nomod_zero); + NO_UNLOAD_STUB(sockfs, recvmsg, nomod_zero); + NO_UNLOAD_STUB(sockfs, send, nomod_zero); + NO_UNLOAD_STUB(sockfs, sendmsg, nomod_zero); + NO_UNLOAD_STUB(sockfs, sendto, nomod_zero); +#ifdef _SYSCALL32_IMPL + NO_UNLOAD_STUB(sockfs, recv32, nomod_zero); + NO_UNLOAD_STUB(sockfs, recvfrom32, nomod_zero); + NO_UNLOAD_STUB(sockfs, send32, nomod_zero); + NO_UNLOAD_STUB(sockfs, sendto32, nomod_zero); +#endif /* _SYSCALL32_IMPL */ + NO_UNLOAD_STUB(sockfs, getpeername, nomod_zero); + NO_UNLOAD_STUB(sockfs, getsockname, nomod_zero); + NO_UNLOAD_STUB(sockfs, getsockopt, nomod_zero); + NO_UNLOAD_STUB(sockfs, setsockopt, nomod_zero); + NO_UNLOAD_STUB(sockfs, sockconfig, nomod_zero); + NO_UNLOAD_STUB(sockfs, sock_getmsg, nomod_zero); + NO_UNLOAD_STUB(sockfs, sock_putmsg, nomod_zero); + NO_UNLOAD_STUB(sockfs, sosendfile64, nomod_zero); + NO_UNLOAD_STUB(sockfs, snf_segmap, nomod_einval); + NO_UNLOAD_STUB(sockfs, sock_getfasync, nomod_zero); + NO_UNLOAD_STUB(sockfs, nl7c_sendfilev, nomod_zero); + NO_UNLOAD_STUB(sockfs, sotpi_sototpi, nomod_zero); + NO_UNLOAD_STUB(sockfs, socket_sendmblk, nomod_zero); + NO_UNLOAD_STUB(sockfs, socket_setsockopt, nomod_zero); + END_MODULE(sockfs); +#endif + +/* + * IPsec stubs. + */ + +#ifndef IPSECAH_MODULE + MODULE(ipsecah,drv); + WSTUB(ipsecah, ipsec_construct_inverse_acquire, nomod_zero); + WSTUB(ipsecah, sadb_acquire, nomod_zero); + WSTUB(ipsecah, ipsecah_algs_changed, nomod_zero); + WSTUB(ipsecah, sadb_alg_update, nomod_zero); + WSTUB(ipsecah, sadb_unlinkassoc, nomod_zero); + WSTUB(ipsecah, sadb_insertassoc, nomod_zero); + WSTUB(ipsecah, ipsecah_in_assocfailure, nomod_zero); + WSTUB(ipsecah, sadb_set_lpkt, nomod_zero); + WSTUB(ipsecah, ipsecah_icmp_error, nomod_zero); + END_MODULE(ipsecah); +#endif + +#ifndef IPSECESP_MODULE + MODULE(ipsecesp,drv); + WSTUB(ipsecesp, ipsecesp_fill_defs, nomod_zero); + WSTUB(ipsecesp, ipsecesp_algs_changed, nomod_zero); + WSTUB(ipsecesp, ipsecesp_in_assocfailure, nomod_zero); + WSTUB(ipsecesp, ipsecesp_init_funcs, nomod_zero); + WSTUB(ipsecesp, ipsecesp_icmp_error, nomod_zero); + WSTUB(ipsecesp, ipsecesp_send_keepalive, nomod_zero); + END_MODULE(ipsecesp); +#endif + +#ifndef KEYSOCK_MODULE + MODULE(keysock, drv); + WSTUB(keysock, keysock_spdsock_wput_iocdata, nomod_void); + WSTUB(keysock, keysock_plumb_ipsec, nomod_zero); + WSTUB(keysock, keysock_extended_reg, nomod_zero); + WSTUB(keysock, keysock_next_seq, nomod_zero); + END_MODULE(keysock); +#endif + +#ifndef SPDSOCK_MODULE + MODULE(spdsock,drv); + WSTUB(spdsock, spdsock_update_pending_algs, nomod_zero); + END_MODULE(spdsock); +#endif + +/* + * Stubs for nfs common code. + * XXX nfs_getvnodeops should go away with removal of kludge in vnode.c + */ +#ifndef NFS_MODULE + MODULE(nfs,fs); + WSTUB(nfs, nfs_getvnodeops, nomod_zero); + WSTUB(nfs, nfs_perror, nomod_zero); + WSTUB(nfs, nfs_cmn_err, nomod_zero); + WSTUB(nfs, clcleanup_zone, nomod_zero); + WSTUB(nfs, clcleanup4_zone, nomod_zero); + END_MODULE(nfs); +#endif + + +/* + * Stubs for nfs_dlboot (diskless booting). + */ +#ifndef NFS_DLBOOT_MODULE + MODULE(nfs_dlboot,misc); + STUB(nfs_dlboot, mount_root, nomod_minus_one); + STUB(nfs_dlboot, dhcpinit, nomod_minus_one); + END_MODULE(nfs_dlboot); +#endif + +/* + * Stubs for nfs server-only code. + */ +#ifndef NFSSRV_MODULE + MODULE(nfssrv,misc); + STUB(nfssrv, exportfs, nomod_minus_one); + STUB(nfssrv, nfs_getfh, nomod_minus_one); + STUB(nfssrv, nfsl_flush, nomod_minus_one); + STUB(nfssrv, rfs4_check_delegated, nomod_zero); + STUB(nfssrv, mountd_args, nomod_minus_one); + NO_UNLOAD_STUB(nfssrv, rdma_start, nomod_zero); + NO_UNLOAD_STUB(nfssrv, nfs_svc, nomod_zero); + END_MODULE(nfssrv); +#endif + +/* + * Stubs for kernel lock manager. + */ +#ifndef KLM_MODULE + MODULE(klmmod,misc); + NO_UNLOAD_STUB(klmmod, lm_svc, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_shutdown, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_unexport, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_cprresume, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_cprsuspend, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_safelock, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_safemap, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_has_sleep, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_free_config, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_vp_active, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_get_sysid, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_rel_sysid, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_alloc_sysidt, nomod_minus_one); + NO_UNLOAD_STUB(klmmod, lm_free_sysidt, nomod_zero); + NO_UNLOAD_STUB(klmmod, lm_sysidt, nomod_minus_one); + END_MODULE(klmmod); +#endif + +#ifndef KLMOPS_MODULE + MODULE(klmops,misc); + NO_UNLOAD_STUB(klmops, lm_frlock, nomod_zero); + NO_UNLOAD_STUB(klmops, lm4_frlock, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_shrlock, nomod_zero); + NO_UNLOAD_STUB(klmops, lm4_shrlock, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_nlm_dispatch, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_nlm4_dispatch, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_nlm_reclaim, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_nlm4_reclaim, nomod_zero); + NO_UNLOAD_STUB(klmops, lm_register_lock_locally, nomod_zero); + END_MODULE(klmops); +#endif + +/* + * Stubs for kernel TLI module + * XXX currently we never allow this to unload + */ +#ifndef TLI_MODULE + MODULE(tlimod,misc); + NO_UNLOAD_STUB(tlimod, t_kopen, nomod_minus_one); + NO_UNLOAD_STUB(tlimod, t_kunbind, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kadvise, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_krcvudata, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_ksndudata, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kalloc, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kbind, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kclose, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kspoll, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_kfree, nomod_zero); + NO_UNLOAD_STUB(tlimod, t_koptmgmt, nomod_zero); + END_MODULE(tlimod); +#endif + +/* + * Stubs for kernel RPC module + * XXX currently we never allow this to unload + */ +#ifndef RPC_MODULE + MODULE(rpcmod,strmod); + NO_UNLOAD_STUB(rpcmod, clnt_tli_kcreate, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, svc_tli_kcreate, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, bindresvport, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, rdma_register_mod, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, rdma_unregister_mod, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, svc_queuereq, nomod_minus_one); + NO_UNLOAD_STUB(rpcmod, clist_add, nomod_minus_one); + END_MODULE(rpcmod); +#endif + +/* + * Stubs for des + */ +#ifndef DES_MODULE + MODULE(des,misc); + STUB(des, cbc_crypt, nomod_zero); + STUB(des, ecb_crypt, nomod_zero); + STUB(des, _des_crypt, nomod_zero); + END_MODULE(des); +#endif + +/* + * Stubs for procfs. A non-unloadable module. + */ +#ifndef PROC_MODULE + MODULE(procfs,fs); + NO_UNLOAD_STUB(procfs, prfree, nomod_zero); + NO_UNLOAD_STUB(procfs, prexit, nomod_zero); + NO_UNLOAD_STUB(procfs, prlwpfree, nomod_zero); + NO_UNLOAD_STUB(procfs, prlwpexit, nomod_zero); + NO_UNLOAD_STUB(procfs, prinvalidate, nomod_zero); + NO_UNLOAD_STUB(procfs, prnsegs, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetcred, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetpriv, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetprivsize, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetsecflags, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetstatus, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetlwpstatus, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetpsinfo, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetlwpsinfo, nomod_zero); + NO_UNLOAD_STUB(procfs, oprgetstatus, nomod_zero); + NO_UNLOAD_STUB(procfs, oprgetpsinfo, nomod_zero); +#ifdef _SYSCALL32_IMPL + NO_UNLOAD_STUB(procfs, prgetstatus32, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetlwpstatus32, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetpsinfo32, nomod_zero); + NO_UNLOAD_STUB(procfs, prgetlwpsinfo32, nomod_zero); + NO_UNLOAD_STUB(procfs, oprgetstatus32, nomod_zero); + NO_UNLOAD_STUB(procfs, oprgetpsinfo32, nomod_zero); + NO_UNLOAD_STUB(procfs, psinfo_kto32, nomod_zero); + NO_UNLOAD_STUB(procfs, lwpsinfo_kto32, nomod_zero); +#endif /* _SYSCALL32_IMPL */ + NO_UNLOAD_STUB(procfs, prnotify, nomod_zero); + NO_UNLOAD_STUB(procfs, prexecstart, nomod_zero); + NO_UNLOAD_STUB(procfs, prexecend, nomod_zero); + NO_UNLOAD_STUB(procfs, prrelvm, nomod_zero); + NO_UNLOAD_STUB(procfs, prbarrier, nomod_zero); + NO_UNLOAD_STUB(procfs, estimate_msacct, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_getprot, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_getprot_done, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_getsegsize, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_isobject, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_isself, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_allstopped, nomod_zero); + NO_UNLOAD_STUB(procfs, pr_free_watched_pages, nomod_zero); + END_MODULE(procfs); +#endif + +/* + * Stubs for fifofs + */ +#ifndef FIFO_MODULE + MODULE(fifofs,fs); + NO_UNLOAD_STUB(fifofs, fifovp, nomod_zero); + NO_UNLOAD_STUB(fifofs, fifo_getinfo, nomod_zero); + NO_UNLOAD_STUB(fifofs, fifo_vfastoff, nomod_zero); + END_MODULE(fifofs); +#endif + +/* + * Stubs for ufs + * + * This is needed to support the old quotactl system call. + * When the old sysent stuff goes away, this will need to be revisited. + */ +#ifndef UFS_MODULE + MODULE(ufs,fs); + STUB(ufs, quotactl, nomod_minus_one); + END_MODULE(ufs); +#endif + +/* + * Stubs for zfs + */ +#ifndef ZFS_MODULE + MODULE(zfs,fs); + STUB(zfs, dsl_prop_get, nomod_minus_one); + STUB(zfs, spa_boot_init, nomod_minus_one); + STUB(zfs, zfs_prop_to_name, nomod_zero); + END_MODULE(zfs); +#endif + +/* + * Stubs for dcfs + */ +#ifndef DCFS_MODULE + MODULE(dcfs,fs); + STUB(dcfs, decompvp, 0); + END_MODULE(dcfs); +#endif + +/* + * Stubs for namefs + */ +#ifndef NAMEFS_MODULE + MODULE(namefs,fs); + STUB(namefs, nm_unmountall, 0); + END_MODULE(namefs); +#endif + +/* + * Stubs for sysdc + */ +#ifndef SDC_MODULE + MODULE(SDC,sched); + NO_UNLOAD_STUB(SDC, sysdc_thread_enter, nomod_zero); + END_MODULE(SDC); +#endif + +/* + * Stubs for ts_dptbl + */ +#ifndef TS_DPTBL_MODULE + MODULE(TS_DPTBL,sched); + STUB(TS_DPTBL, ts_getdptbl, 0); + STUB(TS_DPTBL, ts_getkmdpris, 0); + STUB(TS_DPTBL, ts_getmaxumdpri, 0); + END_MODULE(TS_DPTBL); +#endif + +/* + * Stubs for rt_dptbl + */ +#ifndef RT_DPTBL_MODULE + MODULE(RT_DPTBL,sched); + STUB(RT_DPTBL, rt_getdptbl, 0); + END_MODULE(RT_DPTBL); +#endif + +/* + * Stubs for ia_dptbl + */ +#ifndef IA_DPTBL_MODULE + MODULE(IA_DPTBL,sched); + STUB(IA_DPTBL, ia_getdptbl, nomod_zero); + STUB(IA_DPTBL, ia_getkmdpris, nomod_zero); + STUB(IA_DPTBL, ia_getmaxumdpri, nomod_zero); + END_MODULE(IA_DPTBL); +#endif + +/* + * Stubs for FSS scheduler + */ +#ifndef FSS_MODULE + MODULE(FSS,sched); + WSTUB(FSS, fss_allocbuf, nomod_zero); + WSTUB(FSS, fss_freebuf, nomod_zero); + WSTUB(FSS, fss_changeproj, nomod_zero); + WSTUB(FSS, fss_changepset, nomod_zero); + END_MODULE(FSS); +#endif + +/* + * Stubs for fx_dptbl + */ +#ifndef FX_DPTBL_MODULE + MODULE(FX_DPTBL,sched); + STUB(FX_DPTBL, fx_getdptbl, 0); + STUB(FX_DPTBL, fx_getmaxumdpri, 0); + END_MODULE(FX_DPTBL); +#endif + +/* + * Stubs for bootdev + */ +#ifndef BOOTDEV_MODULE + MODULE(bootdev,misc); + STUB(bootdev, i_promname_to_devname, 0); + STUB(bootdev, i_convert_boot_device_name, 0); + END_MODULE(bootdev); +#endif + +/* + * stubs for strplumb... + */ +#ifndef STRPLUMB_MODULE + MODULE(strplumb,misc); + STUB(strplumb, strplumb, 0); + STUB(strplumb, strplumb_load, 0); + STUB(strplumb, strplumb_get_netdev_path, 0); + END_MODULE(strplumb); +#endif + +/* + * Stubs for console configuration module + */ +#ifndef CONSCONFIG_MODULE + MODULE(consconfig,misc); + STUB(consconfig, consconfig, 0); + STUB(consconfig, consconfig_get_usb_kb_path, 0); + STUB(consconfig, consconfig_get_usb_ms_path, 0); + STUB(consconfig, consconfig_get_plat_fbpath, 0); + STUB(consconfig, consconfig_console_is_ready, 0); + END_MODULE(consconfig); +#endif + +/* + * Stubs for accounting. + */ +#ifndef SYSACCT_MODULE + MODULE(sysacct,sys); + NO_UNLOAD_WSTUB(sysacct, acct, nomod_zero); + NO_UNLOAD_WSTUB(sysacct, acct_fs_in_use, nomod_zero); + END_MODULE(sysacct); +#endif + +/* + * Stubs for semaphore routines. sem.c + */ +#ifndef SEMSYS_MODULE + MODULE(semsys,sys); + NO_UNLOAD_WSTUB(semsys, semexit, nomod_zero); + END_MODULE(semsys); +#endif + +/* + * Stubs for shmem routines. shm.c + */ +#ifndef SHMSYS_MODULE + MODULE(shmsys,sys); + NO_UNLOAD_WSTUB(shmsys, shmexit, nomod_zero); + NO_UNLOAD_WSTUB(shmsys, shmfork, nomod_zero); + NO_UNLOAD_WSTUB(shmsys, shmgetid, nomod_minus_one); + END_MODULE(shmsys); +#endif + +/* + * Stubs for doors + */ +#ifndef DOOR_MODULE + MODULE(doorfs,sys); + NO_UNLOAD_WSTUB(doorfs, door_slam, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_exit, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_revoke_all, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_fork, nomod_zero); + NO_UNLOAD_STUB(doorfs, door_upcall, nomod_einval); + NO_UNLOAD_STUB(doorfs, door_ki_create, nomod_einval); + NO_UNLOAD_STUB(doorfs, door_ki_open, nomod_einval); + NO_UNLOAD_STUB(doorfs, door_ki_lookup, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_ki_upcall, nomod_einval); + NO_UNLOAD_WSTUB(doorfs, door_ki_upcall_limited, nomod_einval); + NO_UNLOAD_WSTUB(doorfs, door_ki_hold, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_ki_rele, nomod_zero); + NO_UNLOAD_WSTUB(doorfs, door_ki_info, nomod_einval); + END_MODULE(doorfs); +#endif + +/* + * Stubs for MD5 + */ +#ifndef MD5_MODULE + MODULE(md5,misc); + WSTUB(md5, MD5Init, nomod_zero); + WSTUB(md5, MD5Update, nomod_zero); + WSTUB(md5, MD5Final, nomod_zero); + END_MODULE(md5); +#endif + +/* + * Stubs for idmap + */ +#ifndef IDMAP_MODULE + MODULE(idmap,misc); + STUB(idmap, kidmap_batch_getgidbysid, nomod_zero); + STUB(idmap, kidmap_batch_getpidbysid, nomod_zero); + STUB(idmap, kidmap_batch_getsidbygid, nomod_zero); + STUB(idmap, kidmap_batch_getsidbyuid, nomod_zero); + STUB(idmap, kidmap_batch_getuidbysid, nomod_zero); + STUB(idmap, kidmap_get_create, nomod_zero); + STUB(idmap, kidmap_get_destroy, nomod_zero); + STUB(idmap, kidmap_get_mappings, nomod_zero); + STUB(idmap, kidmap_getgidbysid, nomod_zero); + STUB(idmap, kidmap_getpidbysid, nomod_zero); + STUB(idmap, kidmap_getsidbygid, nomod_zero); + STUB(idmap, kidmap_getsidbyuid, nomod_zero); + STUB(idmap, kidmap_getuidbysid, nomod_zero); + STUB(idmap, idmap_get_door, nomod_einval); + STUB(idmap, idmap_unreg_dh, nomod_einval); + STUB(idmap, idmap_reg_dh, nomod_einval); + STUB(idmap, idmap_purge_cache, nomod_einval); + END_MODULE(idmap); +#endif + +/* + * Stubs for auditing. + */ +#ifndef C2AUDIT_MODULE + MODULE(c2audit,sys); + NO_UNLOAD_STUB(c2audit, audit_init_module, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_start, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_finish, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit, nomod_zero); + NO_UNLOAD_STUB(c2audit, auditdoor, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_closef, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_core_start, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_core_finish, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_strputmsg, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_savepath, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_anchorpath, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_exit, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_exec, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_symlink, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_symlink_create, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_vncreate_start, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_vncreate_finish, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_enterprom, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_exitprom, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_chdirec, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_setf, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_sock, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_strgetmsg, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_ipc, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_ipcget, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_fdsend, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_fdrecv, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_priv, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_setppriv, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_psecflags, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_devpolicy, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_setfsat_path, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_cryptoadm, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_kssl, nomod_zero); + NO_UNLOAD_STUB(c2audit, audit_pf_policy, nomod_zero); + NO_UNLOAD_STUB(c2audit, au_doormsg, nomod_zero); + NO_UNLOAD_STUB(c2audit, au_uwrite, nomod_zero); + NO_UNLOAD_STUB(c2audit, au_to_arg32, nomod_zero); + NO_UNLOAD_STUB(c2audit, au_free_rec, nomod_zero); + END_MODULE(c2audit); +#endif + +/* + * Stubs for kernel rpc security service module + */ +#ifndef RPCSEC_MODULE + MODULE(rpcsec,misc); + NO_UNLOAD_STUB(rpcsec, sec_clnt_revoke, nomod_zero); + NO_UNLOAD_STUB(rpcsec, authkern_create, nomod_zero); + NO_UNLOAD_STUB(rpcsec, sec_svc_msg, nomod_zero); + NO_UNLOAD_STUB(rpcsec, sec_svc_control, nomod_zero); + END_MODULE(rpcsec); +#endif + +/* + * Stubs for rpc RPCSEC_GSS security service module + */ +#ifndef RPCSEC_GSS_MODULE + MODULE(rpcsec_gss,misc); + NO_UNLOAD_STUB(rpcsec_gss, __svcrpcsec_gss, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_getcred, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_callback, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secget, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secfree, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_seccreate, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_defaults, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_revauth, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secpurge, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_cleanup, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_versions, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_max_data_length, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_svc_max_data_length, nomod_zero); + NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_service_type, nomod_zero); + END_MODULE(rpcsec_gss); +#endif + +/* + * Stubs for PCI configurator module (misc/pcicfg). + */ +#ifndef PCICFG_MODULE + MODULE(pcicfg,misc); + STUB(pcicfg, pcicfg_configure, 0); + STUB(pcicfg, pcicfg_unconfigure, 0); + END_MODULE(pcicfg); +#endif + +/* + * Stubs for pcieb nexus driver. + */ +#ifndef PCIEB_MODULE + MODULE(pcieb,drv); + STUB(pcieb, pcieb_intel_error_workaround, 0); + END_MODULE(pcieb); +#endif + +#ifndef IWSCN_MODULE + MODULE(iwscn,drv); + STUB(iwscn, srpop, 0); + END_MODULE(iwscn); +#endif + +/* + * Stubs for checkpoint-resume module + */ +#ifndef CPR_MODULE + MODULE(cpr,misc); + STUB(cpr, cpr, 0); + END_MODULE(cpr); +#endif + +/* + * Stubs for kernel probes (tnf module). Not unloadable. + */ +#ifndef TNF_MODULE + MODULE(tnf,drv); + NO_UNLOAD_STUB(tnf, tnf_ref32_1, nomod_zero); + NO_UNLOAD_STUB(tnf, tnf_string_1, nomod_zero); + NO_UNLOAD_STUB(tnf, tnf_opaque_array_1, nomod_zero); + NO_UNLOAD_STUB(tnf, tnf_struct_tag_1, nomod_zero); + NO_UNLOAD_STUB(tnf, tnf_allocate, nomod_zero); + END_MODULE(tnf); +#endif + +/* + * Stubs for i86hvm bootstraping + */ +#ifndef HVM_BOOTSTRAP + MODULE(hvm_bootstrap,misc); + NO_UNLOAD_STUB(hvm_bootstrap, hvmboot_rootconf, nomod_zero); + END_MODULE(hvm_bootstrap); +#endif + +/* + * Clustering: stubs for bootstrapping. + */ +#ifndef CL_BOOTSTRAP + MODULE(cl_bootstrap,misc); + NO_UNLOAD_WSTUB(cl_bootstrap, clboot_modload, nomod_minus_one); + NO_UNLOAD_WSTUB(cl_bootstrap, clboot_loadrootmodules, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, clboot_rootconf, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, clboot_mountroot, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, clconf_init, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, clconf_get_nodeid, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, clconf_maximum_nodeid, nomod_zero); + NO_UNLOAD_WSTUB(cl_bootstrap, cluster, nomod_zero); + END_MODULE(cl_bootstrap); +#endif + +/* + * Clustering: stubs for cluster infrastructure. + */ +#ifndef CL_COMM_MODULE + MODULE(cl_comm,misc); + NO_UNLOAD_STUB(cl_comm, cladmin, nomod_minus_one); + END_MODULE(cl_comm); +#endif + +/* + * Clustering: stubs for global file system operations. + */ +#ifndef PXFS_MODULE + MODULE(pxfs,fs); + NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_read, nomod_zero); + NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_write, nomod_zero); + NO_UNLOAD_WSTUB(pxfs, cl_flk_state_transition_notify, nomod_zero); + END_MODULE(pxfs); +#endif + +/* + * Stubs for kernel cryptographic framework module (misc/kcf). + */ +#ifndef KCF_MODULE + MODULE(kcf,misc); + NO_UNLOAD_STUB(kcf, crypto_mech2id, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_register_provider, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_unregister_provider, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_provider_notification, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_op_notification, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_kmflag, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_digest_key_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_decrypt_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_get_all_mech_info, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_check, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_check_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_derive, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_generate, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_generate_pair, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_unwrap, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_key_wrap, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_verify, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_verify_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_copy, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_create, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_destroy, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_find_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_find_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_find, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_get_attribute_value, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_get_size, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_object_set_attribute_value, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_session_close, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_session_login, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_session_logout, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_session_open, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_create_ctx_template, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_destroy_ctx_template, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_get_mech_list, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_free_mech_list, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_cancel_req, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_cancel_ctx, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_bufcall_alloc, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_bufcall_free, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_bufcall, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_unbufcall, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_notify_events, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_unnotify_events, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_get_provider, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_get_provinfo, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_release_provider, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_recover, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_recover_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_sign_recover_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_init, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_update, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_final, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_recover, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_recover_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, crypto_verify_recover_init_prov, nomod_minus_one); + NO_UNLOAD_STUB(kcf, random_add_entropy, nomod_minus_one); + NO_UNLOAD_STUB(kcf, random_add_pseudo_entropy, nomod_minus_one); + NO_UNLOAD_STUB(kcf, random_get_blocking_bytes, nomod_minus_one); + NO_UNLOAD_STUB(kcf, random_get_bytes, nomod_minus_one); + NO_UNLOAD_STUB(kcf, random_get_pseudo_bytes, nomod_minus_one); + END_MODULE(kcf); +#endif + +/* + * Stubs for sha1. A non-unloadable module. + */ +#ifndef SHA1_MODULE + MODULE(sha1,crypto); + NO_UNLOAD_STUB(sha1, SHA1Init, nomod_void); + NO_UNLOAD_STUB(sha1, SHA1Update, nomod_void); + NO_UNLOAD_STUB(sha1, SHA1Final, nomod_void); + END_MODULE(sha1); +#endif + +/* + * The following stubs are used by the mac module. + * Since dld already depends on mac, these + * stubs are needed to avoid circular dependencies. + */ +#ifndef DLD_MODULE + MODULE(dld,drv); + STUB(dld, dld_init_ops, nomod_void); + STUB(dld, dld_fini_ops, nomod_void); + STUB(dld, dld_devt_to_instance, nomod_minus_one); + STUB(dld, dld_autopush, nomod_minus_one); + STUB(dld, dld_ioc_register, nomod_einval); + STUB(dld, dld_ioc_unregister, nomod_void); + END_MODULE(dld); +#endif + +/* + * The following stubs are used by the mac module. + * Since dls already depends on mac, these + * stubs are needed to avoid circular dependencies. + */ +#ifndef DLS_MODULE + MODULE(dls,misc); + STUB(dls, dls_devnet_mac, nomod_zero); + STUB(dls, dls_devnet_hold_tmp, nomod_einval); + STUB(dls, dls_devnet_rele_tmp, nomod_void); + STUB(dls, dls_devnet_hold_link, nomod_einval); + STUB(dls, dls_devnet_rele_link, nomod_void); + STUB(dls, dls_devnet_prop_task_wait, nomod_void); + STUB(dls, dls_mgmt_get_linkid, nomod_einval); + STUB(dls, dls_devnet_macname2linkid, nomod_einval); + STUB(dls, dls_mgmt_get_linkinfo, nomod_einval); + END_MODULE(dls); +#endif + +#ifndef SOFTMAC_MODULE + MODULE(softmac,drv); + STUB(softmac, softmac_hold_device, nomod_einval); + STUB(softmac, softmac_rele_device, nomod_void); + STUB(softmac, softmac_recreate, nomod_void); + END_MODULE(softmac); +#endif + +#ifndef IPTUN_MODULE + MODULE(iptun,drv); + STUB(iptun, iptun_create, nomod_einval); + STUB(iptun, iptun_delete, nomod_einval); + STUB(iptun, iptun_set_policy, nomod_void) ; + END_MODULE(iptun); +#endif + +/* + * Stubs for dcopy, for Intel IOAT KAPIs + */ +#ifndef DCOPY_MODULE + MODULE(dcopy,misc); + NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one); + END_MODULE(dcopy); +#endif + +/* + * Stubs for acpica + */ +#ifndef ACPICA_MODULE + MODULE(acpica,misc); + NO_UNLOAD_STUB(acpica, AcpiOsReadPort, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiOsWritePort, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiInstallNotifyHandler, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiRemoveNotifyHandler, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiEvaluateObject, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiEvaluateObjectTyped, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiWriteBitRegister, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiReadBitRegister, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, AcpiOsFree, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, acpica_get_handle_cpu, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, acpica_get_global_FADT, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, acpica_write_cpupm_capabilities, + nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, __acpi_wbinvd, nomod_minus_one) ; + NO_UNLOAD_STUB(acpica, acpi_reset_system, nomod_minus_one) ; + END_MODULE(acpica); +#endif + +/* + * Stubs for acpidev + */ +#ifndef ACPIDEV_MODULE + MODULE(acpidev,misc); + NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ; + NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info, + nomod_minus_one) ; + END_MODULE(acpidev); +#endif + +#ifndef IPNET_MODULE + MODULE(ipnet,drv); + STUB(ipnet, ipnet_if_getdev, nomod_zero); + STUB(ipnet, ipnet_walk_if, nomod_zero); + END_MODULE(ipnet); +#endif + +#ifndef IOMMULIB_MODULE + MODULE(iommulib,misc); + STUB(iommulib, iommulib_nex_close, nomod_void); + END_MODULE(iommulib); +#endif + +/* + * Stubs for rootnex nexus driver. + */ +#ifndef ROOTNEX_MODULE + MODULE(rootnex,drv); + STUB(rootnex, immu_init, 0); + STUB(rootnex, immu_startup, 0); + STUB(rootnex, immu_physmem_update, 0); + END_MODULE(rootnex); +#endif + +/* + * Stubs for kernel socket, for iscsi + */ +#ifndef KSOCKET_MODULE + MODULE(ksocket, misc); + NO_UNLOAD_STUB(ksocket, ksocket_setsockopt, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_getsockopt, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_getpeername, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_getsockname, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_socket, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_bind, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_listen, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_accept, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_connect, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_recv, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_recvfrom, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_recvmsg, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_send, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_sendto, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_sendmsg, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_ioctl, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_setcallbacks, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_hold, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_rele, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_shutdown, nomod_minus_one); + NO_UNLOAD_STUB(ksocket, ksocket_close, nomod_minus_one); + END_MODULE(ksocket); +#endif + +/* + * Stubs for elfexec + */ +#ifndef ELFEXEC_MODULE + MODULE(elfexec,exec); + STUB(elfexec, elfexec, nomod_einval); + STUB(elfexec, mapexec_brand, nomod_einval); + STUB(elfexec, elf32exec, nomod_einval); + STUB(elfexec, mapexec32_brand, nomod_einval); + END_MODULE(elfexec); +#endif + +/* + * Stub(s) for APIX module. + */ +#ifndef APIX_MODULE + MODULE(apix,mach); + WSTUB(apix, apix_loaded, nomod_zero); + END_MODULE(apix); +#endif + +/* + * Stubs for ppt module (bhyve PCI passthrough driver) + */ +#ifndef PPT_MODULE + MODULE(ppt,drv); + WSTUB(ppt, ppt_unassign_all, nomod_zero); + WSTUB(ppt, ppt_map_mmio, nomod_einval); + WSTUB(ppt, ppt_unmap_mmio, nomod_einval); + WSTUB(ppt, ppt_setup_msi, nomod_einval); + WSTUB(ppt, ppt_setup_msix, nomod_einval); + WSTUB(ppt, ppt_disable_msix, nomod_einval); + WSTUB(ppt, ppt_assigned_devices, nomod_zero); + WSTUB(ppt, ppt_is_mmio, nomod_zero); + WSTUB(ppt, ppt_assign_device, nomod_einval); + WSTUB(ppt, ppt_unassign_device, nomod_einval); + WSTUB(ppt, ppt_get_limits, nomod_einval); + END_MODULE(ppt); +#endif + +/* + * this is just a marker for the area of text that contains stubs + */ + ENTRY_NP(stubs_end) + nop + diff --git a/usr/src/uts/intel/ml/ovbcopy.s b/usr/src/uts/intel/ml/ovbcopy.s new file mode 100644 index 0000000000..0687e67e4b --- /dev/null +++ b/usr/src/uts/intel/ml/ovbcopy.s @@ -0,0 +1,92 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +/*- + * Copyright (c) 1993 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/amd64/amd64/support.S,v 1.102 2003/10/02 05:08:13 alc Exp $ + */ + +#include <sys/asm_linkage.h> + +/* + * Adapted from fbsd bcopy(). + * + * bcopy(src, dst, cnt) + * rdi, rsi, rdx + * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 + */ + + ENTRY(ovbcopy) + xchgq %rsi,%rdi + movq %rdx,%rcx + + movq %rdi,%rax + subq %rsi,%rax + cmpq %rcx,%rax /* overlapping && src < dst? */ + jb reverse + + shrq $3,%rcx /* copy by 64-bit words */ + cld /* nope, copy forwards */ + rep + movsq + movq %rdx,%rcx + andq $7,%rcx /* any bytes left? */ + rep + movsb + ret + +reverse: + addq %rcx,%rdi /* copy backwards */ + addq %rcx,%rsi + decq %rdi + decq %rsi + andq $7,%rcx /* any fractional bytes? */ + std + rep + movsb + movq %rdx,%rcx /* copy remainder by 32-bit words */ + shrq $3,%rcx + subq $7,%rsi + subq $7,%rdi + rep + movsq + cld + ret + SET_SIZE(ovbcopy) + diff --git a/usr/src/uts/intel/ml/retpoline.s b/usr/src/uts/intel/ml/retpoline.s new file mode 100644 index 0000000000..a68d9504c1 --- /dev/null +++ b/usr/src/uts/intel/ml/retpoline.s @@ -0,0 +1,211 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + + .file "retpoline.s" + +/* + * This file implements the various hooks that are needed for retpolines and + * return stack buffer (RSB) stuffing. For more information, please see the + * 'Speculative Execution CPU Side Channel Security' section of the + * uts/i86pc/os/cpuid.c big theory statement. + */ + +#include <sys/asm_linkage.h> +#include <sys/x86_archext.h> + +#if defined(__amd64) + +/* + * This macro generates the default retpoline entry point that the compiler + * expects. It implements the expected retpoline form. + */ +#define RETPOLINE_MKTHUNK(reg) \ + ENTRY(__x86_indirect_thunk_/**/reg) \ + call 2f; \ +1: \ + pause; \ + lfence; \ + jmp 1b; \ +2: \ + movq %/**/reg, (%rsp); \ + ret; \ + SET_SIZE(__x86_indirect_thunk_/**/reg) + +/* + * This macro generates the default retpoline form. It exists in addition to the + * thunk so if we need to restore the default retpoline behavior to the thunk + * we can. + */ +#define RETPOLINE_MKGENERIC(reg) \ + ENTRY(__x86_indirect_thunk_gen_/**/reg) \ + call 2f; \ +1: \ + pause; \ + lfence; \ + jmp 1b; \ +2: \ + movq %/**/reg, (%rsp); \ + ret; \ + SET_SIZE(__x86_indirect_thunk_gen_/**/reg) + +/* + * This macro generates the AMD optimized form of a retpoline which will be used + * on systems where the lfence dispatch serializing behavior has been changed. + */ +#define RETPOLINE_MKLFENCE(reg) \ + ENTRY(__x86_indirect_thunk_amd_/**/reg) \ + lfence; \ + jmp *%/**/reg; \ + SET_SIZE(__x86_indirect_thunk_amd_/**/reg) + + +/* + * This macro generates the no-op form of the retpoline which will be used if we + * either need to disable retpolines because we have enhanced IBRS or because we + * have been asked to disable mitigations. + */ +#define RETPOLINE_MKJUMP(reg) \ + ENTRY(__x86_indirect_thunk_jmp_/**/reg) \ + jmp *%/**/reg; \ + SET_SIZE(__x86_indirect_thunk_jmp_/**/reg) + + RETPOLINE_MKTHUNK(rax) + RETPOLINE_MKTHUNK(rbx) + RETPOLINE_MKTHUNK(rcx) + RETPOLINE_MKTHUNK(rdx) + RETPOLINE_MKTHUNK(rdi) + RETPOLINE_MKTHUNK(rsi) + RETPOLINE_MKTHUNK(rbp) + RETPOLINE_MKTHUNK(r8) + RETPOLINE_MKTHUNK(r9) + RETPOLINE_MKTHUNK(r10) + RETPOLINE_MKTHUNK(r11) + RETPOLINE_MKTHUNK(r12) + RETPOLINE_MKTHUNK(r13) + RETPOLINE_MKTHUNK(r14) + RETPOLINE_MKTHUNK(r15) + + RETPOLINE_MKGENERIC(rax) + RETPOLINE_MKGENERIC(rbx) + RETPOLINE_MKGENERIC(rcx) + RETPOLINE_MKGENERIC(rdx) + RETPOLINE_MKGENERIC(rdi) + RETPOLINE_MKGENERIC(rsi) + RETPOLINE_MKGENERIC(rbp) + RETPOLINE_MKGENERIC(r8) + RETPOLINE_MKGENERIC(r9) + RETPOLINE_MKGENERIC(r10) + RETPOLINE_MKGENERIC(r11) + RETPOLINE_MKGENERIC(r12) + RETPOLINE_MKGENERIC(r13) + RETPOLINE_MKGENERIC(r14) + RETPOLINE_MKGENERIC(r15) + + RETPOLINE_MKLFENCE(rax) + RETPOLINE_MKLFENCE(rbx) + RETPOLINE_MKLFENCE(rcx) + RETPOLINE_MKLFENCE(rdx) + RETPOLINE_MKLFENCE(rdi) + RETPOLINE_MKLFENCE(rsi) + RETPOLINE_MKLFENCE(rbp) + RETPOLINE_MKLFENCE(r8) + RETPOLINE_MKLFENCE(r9) + RETPOLINE_MKLFENCE(r10) + RETPOLINE_MKLFENCE(r11) + RETPOLINE_MKLFENCE(r12) + RETPOLINE_MKLFENCE(r13) + RETPOLINE_MKLFENCE(r14) + RETPOLINE_MKLFENCE(r15) + + RETPOLINE_MKJUMP(rax) + RETPOLINE_MKJUMP(rbx) + RETPOLINE_MKJUMP(rcx) + RETPOLINE_MKJUMP(rdx) + RETPOLINE_MKJUMP(rdi) + RETPOLINE_MKJUMP(rsi) + RETPOLINE_MKJUMP(rbp) + RETPOLINE_MKJUMP(r8) + RETPOLINE_MKJUMP(r9) + RETPOLINE_MKJUMP(r10) + RETPOLINE_MKJUMP(r11) + RETPOLINE_MKJUMP(r12) + RETPOLINE_MKJUMP(r13) + RETPOLINE_MKJUMP(r14) + RETPOLINE_MKJUMP(r15) + + /* + * The x86_rsb_stuff function is called from pretty arbitrary + * contexts. It's much easier for us to save and restore all the + * registers we touch rather than clobber them for callers. You must + * preserve this property or the system will panic at best. + */ + ENTRY(x86_rsb_stuff) + /* + * These nops are present so we can patch a ret instruction if we need + * to disable RSB stuffing because enhanced IBRS is present or we're + * disabling mitigations. + */ + nop + nop + pushq %rdi + pushq %rax + movl $16, %edi + movq %rsp, %rax +rsb_loop: + call 2f +1: + pause + call 1b +2: + call 2f +1: + pause + call 1b +2: + subl $1, %edi + jnz rsb_loop + movq %rax, %rsp + popq %rax + popq %rdi + ret + SET_SIZE(x86_rsb_stuff) + +#elif defined(__i386) + +/* + * While the kernel is 64-bit only, dboot is still 32-bit, so there are a + * limited number of variants that are used for 32-bit. However as dboot is + * short lived and uses them sparingly, we only do the full variant and do not + * have an AMD specific version. + */ + +#define RETPOLINE_MKTHUNK(reg) \ + ENTRY(__x86_indirect_thunk_/**/reg) \ + call 2f; \ +1: \ + pause; \ + lfence; \ + jmp 1b; \ +2: \ + movl %/**/reg, (%esp); \ + ret; \ + SET_SIZE(__x86_indirect_thunk_/**/reg) + + RETPOLINE_MKTHUNK(edi) + RETPOLINE_MKTHUNK(eax) + +#else +#error "Your architecture is in another castle." +#endif diff --git a/usr/src/uts/intel/ml/sseblk.s b/usr/src/uts/intel/ml/sseblk.s new file mode 100644 index 0000000000..836b6b6c97 --- /dev/null +++ b/usr/src/uts/intel/ml/sseblk.s @@ -0,0 +1,280 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/asm_linkage.h> +#include <sys/regset.h> +#include <sys/privregs.h> + +#include "assym.h" + +/* + * Do block operations using Streaming SIMD extensions + */ + +#if defined(DEBUG) +#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ + movq %gs:CPU_THREAD, t; \ + movsbl T_PREEMPT(t), r32; \ + testl r32, r32; \ + jne 5f; \ + pushq %rbp; \ + movq %rsp, %rbp; \ + leaq msg(%rip), %rdi; \ + xorl %eax, %eax; \ + call panic; \ +5: +#else /* DEBUG */ +#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) +#endif /* DEBUG */ + +#define BLOCKSHIFT 6 +#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ +#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ + +#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) +#error "mucked up constants" +#endif + +#define SAVE_XMM0(r) \ + SAVE_XMM_PROLOG(r, 1); \ + movdqa %xmm0, (r) + +#define ZERO_LOOP_INIT_XMM(dst) \ + pxor %xmm0, %xmm0 + +#define ZERO_LOOP_BODY_XMM(dst, cnt) \ + movntdq %xmm0, (dst); \ + movntdq %xmm0, 0x10(dst); \ + movntdq %xmm0, 0x20(dst); \ + movntdq %xmm0, 0x30(dst); \ + addq $BLOCKSIZE, dst; \ + subq $1, cnt + +#define ZERO_LOOP_FINI_XMM(dst) \ + mfence + +#define RSTOR_XMM0(r) \ + movdqa 0x0(r), %xmm0; \ + RSTOR_XMM_EPILOG(r, 1) + + /* + * %rdi dst + * %rsi size + * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) + * %r8 pointer to %xmm register save area + */ + ENTRY(hwblkclr) + pushq %rbp + movq %rsp, %rbp + testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ + jne .dobzero + cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ + jl .dobzero + testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ + jne .dobzero + shrq $BLOCKSHIFT, %rsi + + ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) + movq %cr0, %rax + clts + testl $CR0_TS, %eax + jnz 1f + + SAVE_XMM0(%r8) +1: ZERO_LOOP_INIT_XMM(%rdi) +9: ZERO_LOOP_BODY_XMM(%rdi, %rsi) + jnz 9b + ZERO_LOOP_FINI_XMM(%rdi) + + testl $CR0_TS, %eax + jnz 2f + RSTOR_XMM0(%r8) +2: movq %rax, %cr0 + leave + ret +.dobzero: + leave + jmp bzero + SET_SIZE(hwblkclr) + + +#define PREFETCH_START(src) \ + prefetchnta 0x0(src); \ + prefetchnta 0x40(src) + +#define SAVE_XMMS(r) \ + SAVE_XMM_PROLOG(r, 8); \ + movdqa %xmm0, (r); \ + movdqa %xmm1, 0x10(r); \ + movdqa %xmm2, 0x20(r); \ + movdqa %xmm3, 0x30(r); \ + movdqa %xmm4, 0x40(r); \ + movdqa %xmm5, 0x50(r); \ + movdqa %xmm6, 0x60(r); \ + movdqa %xmm7, 0x70(r) + +#define COPY_LOOP_INIT_XMM(src) \ + prefetchnta 0x80(src); \ + prefetchnta 0xc0(src); \ + movdqa 0x0(src), %xmm0; \ + movdqa 0x10(src), %xmm1; \ + movdqa 0x20(src), %xmm2; \ + movdqa 0x30(src), %xmm3; \ + movdqa 0x40(src), %xmm4; \ + movdqa 0x50(src), %xmm5; \ + movdqa 0x60(src), %xmm6; \ + movdqa 0x70(src), %xmm7; \ + addq $0x80, src + +#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ + prefetchnta 0x80(src); \ + prefetchnta 0xc0(src); \ + prefetchnta 0x100(src); \ + prefetchnta 0x140(src); \ + movntdq %xmm0, (dst); \ + movntdq %xmm1, 0x10(dst); \ + movntdq %xmm2, 0x20(dst); \ + movntdq %xmm3, 0x30(dst); \ + movdqa 0x0(src), %xmm0; \ + movdqa 0x10(src), %xmm1; \ + movntdq %xmm4, 0x40(dst); \ + movntdq %xmm5, 0x50(dst); \ + movdqa 0x20(src), %xmm2; \ + movdqa 0x30(src), %xmm3; \ + movntdq %xmm6, 0x60(dst); \ + movntdq %xmm7, 0x70(dst); \ + movdqa 0x40(src), %xmm4; \ + movdqa 0x50(src), %xmm5; \ + addq $0x80, dst; \ + movdqa 0x60(src), %xmm6; \ + movdqa 0x70(src), %xmm7; \ + addq $0x80, src; \ + subl $1, cnt + +#define COPY_LOOP_FINI_XMM(dst) \ + movntdq %xmm0, 0x0(dst); \ + movntdq %xmm1, 0x10(dst); \ + movntdq %xmm2, 0x20(dst); \ + movntdq %xmm3, 0x30(dst); \ + movntdq %xmm4, 0x40(dst); \ + movntdq %xmm5, 0x50(dst); \ + movntdq %xmm6, 0x60(dst); \ + movntdq %xmm7, 0x70(dst) + +#define RSTOR_XMMS(r) \ + movdqa 0x0(r), %xmm0; \ + movdqa 0x10(r), %xmm1; \ + movdqa 0x20(r), %xmm2; \ + movdqa 0x30(r), %xmm3; \ + movdqa 0x40(r), %xmm4; \ + movdqa 0x50(r), %xmm5; \ + movdqa 0x60(r), %xmm6; \ + movdqa 0x70(r), %xmm7; \ + RSTOR_XMM_EPILOG(r, 8) + + /* + * %rdi src + * %rsi dst + * %rdx #if DEBUG then curthread + * %ecx loop count + * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) + * %r8 pointer to %xmm register save area + */ + ENTRY(hwblkpagecopy) + pushq %rbp + movq %rsp, %rbp + PREFETCH_START(%rdi) + /* + * PAGESIZE is 4096, each loop moves 128 bytes, but the initial + * load and final store save us on loop count + */ + movl $_CONST(32 - 1), %ecx + ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) + movq %cr0, %rax + clts + testl $CR0_TS, %eax + jnz 3f + SAVE_XMMS(%r8) +3: COPY_LOOP_INIT_XMM(%rdi) +4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) + jnz 4b + COPY_LOOP_FINI_XMM(%rsi) + testl $CR0_TS, %eax + jnz 5f + RSTOR_XMMS(%r8) +5: movq %rax, %cr0 + mfence + leave + ret + SET_SIZE(hwblkpagecopy) + + ENTRY(block_zero_no_xmm) + pushq %rbp + movq %rsp, %rbp + xorl %eax, %eax + addq %rsi, %rdi + negq %rsi +1: + movnti %rax, (%rdi, %rsi) + movnti %rax, 8(%rdi, %rsi) + movnti %rax, 16(%rdi, %rsi) + movnti %rax, 24(%rdi, %rsi) + addq $32, %rsi + jnz 1b + mfence + leave + ret + SET_SIZE(block_zero_no_xmm) + + + ENTRY(page_copy_no_xmm) + movq $MMU_STD_PAGESIZE, %rcx + addq %rcx, %rdi + addq %rcx, %rsi + negq %rcx +1: + movq (%rsi, %rcx), %rax + movnti %rax, (%rdi, %rcx) + movq 8(%rsi, %rcx), %rax + movnti %rax, 8(%rdi, %rcx) + movq 16(%rsi, %rcx), %rax + movnti %rax, 16(%rdi, %rcx) + movq 24(%rsi, %rcx), %rax + movnti %rax, 24(%rdi, %rcx) + addq $32, %rcx + jnz 1b + mfence + ret + SET_SIZE(page_copy_no_xmm) + +#if defined(DEBUG) + .text +.not_disabled: + .string "sseblk: preemption not disabled!" +#endif diff --git a/usr/src/uts/intel/ml/swtch.s b/usr/src/uts/intel/ml/swtch.s new file mode 100644 index 0000000000..c6c606b11e --- /dev/null +++ b/usr/src/uts/intel/ml/swtch.s @@ -0,0 +1,509 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +/* + * Process switching routines. + */ + +#include <sys/asm_linkage.h> +#include <sys/asm_misc.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/stack.h> +#include <sys/segments.h> +#include <sys/psw.h> + +#include "assym.h" + +/* + * resume(thread_id_t t); + * + * a thread can only run on one processor at a time. there + * exists a window on MPs where the current thread on one + * processor is capable of being dispatched by another processor. + * some overlap between outgoing and incoming threads can happen + * when they are the same thread. in this case where the threads + * are the same, resume() on one processor will spin on the incoming + * thread until resume() on the other processor has finished with + * the outgoing thread. + * + * The MMU context changes when the resuming thread resides in a different + * process. Kernel threads are known by resume to reside in process 0. + * The MMU context, therefore, only changes when resuming a thread in + * a process different from curproc. + * + * resume_from_intr() is called when the thread being resumed was not + * passivated by resume (e.g. was interrupted). This means that the + * resume lock is already held and that a restore context is not needed. + * Also, the MMU context is not changed on the resume in this case. + * + * resume_from_zombie() is the same as resume except the calling thread + * is a zombie and must be put on the deathrow list after the CPU is + * off the stack. + */ + +#if LWP_PCB_FPU != 0 +#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work +#endif /* LWP_PCB_FPU != 0 */ + +/* + * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) + * + * The stack frame must be created before the save of %rsp so that tracebacks + * of swtch()ed-out processes show the process as having last called swtch(). + */ +#define SAVE_REGS(thread_t, retaddr) \ + movq %rbp, T_RBP(thread_t); \ + movq %rbx, T_RBX(thread_t); \ + movq %r12, T_R12(thread_t); \ + movq %r13, T_R13(thread_t); \ + movq %r14, T_R14(thread_t); \ + movq %r15, T_R15(thread_t); \ + pushq %rbp; \ + movq %rsp, %rbp; \ + movq %rsp, T_SP(thread_t); \ + movq retaddr, T_PC(thread_t); \ + movq %rdi, %r12; \ + call __dtrace_probe___sched_off__cpu + +/* + * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) + * + * We load up %rsp from the label_t as part of the context switch, so + * we don't repeat that here. + * + * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t + * already has the effect of putting the stack back the way it was when + * we came in. + */ +#define RESTORE_REGS(scratch_reg) \ + movq %gs:CPU_THREAD, scratch_reg; \ + movq T_RBP(scratch_reg), %rbp; \ + movq T_RBX(scratch_reg), %rbx; \ + movq T_R12(scratch_reg), %r12; \ + movq T_R13(scratch_reg), %r13; \ + movq T_R14(scratch_reg), %r14; \ + movq T_R15(scratch_reg), %r15 + +/* + * Get pointer to a thread's hat structure + */ +#define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ + movq T_PROCP(thread_t), hatp; \ + movq P_AS(hatp), scratch_reg; \ + movq A_HAT(scratch_reg), hatp + +#define TSC_READ() \ + call tsc_read; \ + movq %rax, %r14; + +/* + * If we are resuming an interrupt thread, store a timestamp in the thread + * structure. If an interrupt occurs between tsc_read() and its subsequent + * store, the timestamp will be stale by the time it is stored. We can detect + * this by doing a compare-and-swap on the thread's timestamp, since any + * interrupt occurring in this window will put a new timestamp in the thread's + * t_intr_start field. + */ +#define STORE_INTR_START(thread_t) \ + testw $T_INTR_THREAD, T_FLAGS(thread_t); \ + jz 1f; \ +0: \ + TSC_READ(); \ + movq T_INTR_START(thread_t), %rax; \ + cmpxchgq %r14, T_INTR_START(thread_t); \ + jnz 0b; \ +1: + + .global kpti_enable + + ENTRY(resume) + movq %gs:CPU_THREAD, %rax + leaq resume_return(%rip), %r11 + + /* + * Deal with SMAP here. A thread may be switched out at any point while + * it is executing. The thread could be under on_fault() or it could be + * pre-empted while performing a copy interruption. If this happens and + * we're not in the context of an interrupt which happens to handle + * saving and restoring rflags correctly, we may lose our SMAP related + * state. + * + * To handle this, as part of being switched out, we first save whether + * or not userland access is allowed ($PS_ACHK in rflags) and store that + * in t_useracc on the kthread_t and unconditionally enable SMAP to + * protect the system. + * + * Later, when the thread finishes resuming, we potentially disable smap + * if PS_ACHK was present in rflags. See uts/intel/ml/copy.s for + * more information on rflags and SMAP. + */ + pushfq + popq %rsi + andq $PS_ACHK, %rsi + movq %rsi, T_USERACC(%rax) + call smap_enable + + /* + * Take a moment to potentially clear the RSB buffer. This is done to + * prevent various Spectre variant 2 and SpectreRSB attacks. This may + * not be sufficient. Please see uts/intel/ml/retpoline.s for more + * information about this. + */ + call x86_rsb_stuff + + /* + * Save non-volatile registers, and set return address for current + * thread to resume_return. + * + * %r12 = t (new thread) when done + */ + SAVE_REGS(%rax, %r11) + + + LOADCPU(%r15) /* %r15 = CPU */ + movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ + + /* + * Call savectx if thread has installed context ops. + * + * Note that if we have floating point context, the save op + * (either fpsave_begin or fpxsave_begin) will issue the + * async save instruction (fnsave or fxsave respectively) + * that we fwait for below. + */ + cmpq $0, T_CTX(%r13) /* should current thread savectx? */ + je .nosavectx /* skip call when zero */ + + movq %r13, %rdi /* arg = thread pointer */ + call savectx /* call ctx ops */ +.nosavectx: + + /* + * Check that the curthread is not using the FPU while in the kernel. + */ + call kernel_fpu_no_swtch + + /* + * Call savepctx if process has installed context ops. + */ + movq T_PROCP(%r13), %r14 /* %r14 = proc */ + cmpq $0, P_PCTX(%r14) /* should current thread savepctx? */ + je .nosavepctx /* skip call when zero */ + + movq %r14, %rdi /* arg = proc pointer */ + call savepctx /* call ctx ops */ +.nosavepctx: + + /* + * Temporarily switch to the idle thread's stack + */ + movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ + + /* + * Set the idle thread as the current thread + */ + movq T_SP(%rax), %rsp /* It is safe to set rsp */ + movq %rax, CPU_THREAD(%r15) + + /* + * Switch in the hat context for the new thread + * + */ + GET_THREAD_HATP(%rdi, %r12, %r11) + call hat_switch + + /* + * Clear and unlock previous thread's t_lock + * to allow it to be dispatched by another processor. + */ + movb $0, T_LOCK(%r13) + + /* + * IMPORTANT: Registers at this point must be: + * %r12 = new thread + * + * Here we are in the idle thread, have dropped the old thread. + */ + ALTENTRY(_resume_from_idle) + /* + * spin until dispatched thread's mutex has + * been unlocked. this mutex is unlocked when + * it becomes safe for the thread to run. + */ +.lock_thread_mutex: + lock + btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ + jnc .thread_mutex_locked /* got it */ + +.spin_thread_mutex: + pause + cmpb $0, T_LOCK(%r12) /* check mutex status */ + jz .lock_thread_mutex /* clear, retry lock */ + jmp .spin_thread_mutex /* still locked, spin... */ + +.thread_mutex_locked: + /* + * Fix CPU structure to indicate new running thread. + * Set pointer in new thread to the CPU structure. + */ + LOADCPU(%r13) /* load current CPU pointer */ + cmpq %r13, T_CPU(%r12) + je .setup_cpu + + /* cp->cpu_stats.sys.cpumigrate++ */ + incq CPU_STATS_SYS_CPUMIGRATE(%r13) + movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ + +.setup_cpu: + /* + * Setup rsp0 (kernel stack) in TSS to curthread's saved regs + * structure. If this thread doesn't have a regs structure above + * the stack -- that is, if lwp_stk_init() was never called for the + * thread -- this will set rsp0 to the wrong value, but it's harmless + * as it's a kernel thread, and it won't actually attempt to implicitly + * use the rsp0 via a privilege change. + * + * Note that when we have KPTI enabled on amd64, we never use this + * value at all (since all the interrupts have an IST set). + */ + movq CPU_TSS(%r13), %r14 +#if !defined(__xpv) + cmpq $1, kpti_enable + jne 1f + leaq CPU_KPTI_TR_RSP(%r13), %rax + jmp 2f +1: + movq T_STACK(%r12), %rax + addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ +2: + movq %rax, TSS_RSP0(%r14) +#else + movq T_STACK(%r12), %rax + addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ + movl $KDS_SEL, %edi + movq %rax, %rsi + call HYPERVISOR_stack_switch +#endif /* __xpv */ + + movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ + mfence /* synchronize with mutex_exit() */ + xorl %ebp, %ebp /* make $<threadlist behave better */ + movq T_LWP(%r12), %rax /* set associated lwp to */ + movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */ + + movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */ + movq T_PC(%r12), %r13 /* saved return addr */ + + /* + * Call restorectx if context ops have been installed. + */ + cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */ + jz .norestorectx /* skip call when zero */ + movq %r12, %rdi /* arg = thread pointer */ + call restorectx /* call ctx ops */ +.norestorectx: + + /* + * Call restorepctx if context ops have been installed for the proc. + */ + movq T_PROCP(%r12), %rcx + cmpq $0, P_PCTX(%rcx) + jz .norestorepctx + movq %rcx, %rdi + call restorepctx +.norestorepctx: + + STORE_INTR_START(%r12) + + /* + * If we came into swtch with the ability to access userland pages, go + * ahead and restore that fact by disabling SMAP. Clear the indicator + * flag out of paranoia. + */ + movq T_USERACC(%r12), %rax /* should we disable smap? */ + cmpq $0, %rax /* skip call when zero */ + jz .nosmap + xorq %rax, %rax + movq %rax, T_USERACC(%r12) + call smap_disable +.nosmap: + + call smt_mark + + /* + * Restore non-volatile registers, then have spl0 return to the + * resuming thread's PC after first setting the priority as low as + * possible and blocking all interrupt threads that may be active. + */ + movq %r13, %rax /* save return address */ + RESTORE_REGS(%r11) + pushq %rax /* push return address for spl0() */ + call __dtrace_probe___sched_on__cpu + jmp spl0 + +resume_return: + /* + * Remove stack frame created in SAVE_REGS() + */ + addq $CLONGSIZE, %rsp + ret + SET_SIZE(_resume_from_idle) + SET_SIZE(resume) + + ENTRY(resume_from_zombie) + movq %gs:CPU_THREAD, %rax + leaq resume_from_zombie_return(%rip), %r11 + + /* + * Save non-volatile registers, and set return address for current + * thread to resume_from_zombie_return. + * + * %r12 = t (new thread) when done + */ + SAVE_REGS(%rax, %r11) + + movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ + + /* clean up the fp unit. It might be left enabled */ + +#if defined(__xpv) /* XXPV XXtclayton */ + /* + * Remove this after bringup. + * (Too many #gp's for an instrumented hypervisor.) + */ + STTS(%rax) +#else + movq %cr0, %rax + testq $CR0_TS, %rax + jnz .zfpu_disabled /* if TS already set, nothing to do */ + fninit /* init fpu & discard pending error */ + orq $CR0_TS, %rax + movq %rax, %cr0 +.zfpu_disabled: + +#endif /* __xpv */ + + /* + * Temporarily switch to the idle thread's stack so that the zombie + * thread's stack can be reclaimed by the reaper. + */ + movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */ + movq T_SP(%rax), %rsp /* get onto idle thread stack */ + + /* + * Sigh. If the idle thread has never run thread_start() + * then t_sp is mis-aligned by thread_load(). + */ + andq $_BITNOT(STACK_ALIGN-1), %rsp + + /* + * Set the idle thread as the current thread. + */ + movq %rax, %gs:CPU_THREAD + + /* switch in the hat context for the new thread */ + GET_THREAD_HATP(%rdi, %r12, %r11) + call hat_switch + + /* + * Put the zombie on death-row. + */ + movq %r13, %rdi + call reapq_add + + jmp _resume_from_idle /* finish job of resume */ + +resume_from_zombie_return: + RESTORE_REGS(%r11) /* restore non-volatile registers */ + call __dtrace_probe___sched_on__cpu + + /* + * Remove stack frame created in SAVE_REGS() + */ + addq $CLONGSIZE, %rsp + ret + SET_SIZE(resume_from_zombie) + + ENTRY(resume_from_intr) + movq %gs:CPU_THREAD, %rax + leaq resume_from_intr_return(%rip), %r11 + + /* + * Save non-volatile registers, and set return address for current + * thread to resume_from_intr_return. + * + * %r12 = t (new thread) when done + */ + SAVE_REGS(%rax, %r11) + + movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ + movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */ + mfence /* synchronize with mutex_exit() */ + movq T_SP(%r12), %rsp /* restore resuming thread's sp */ + xorl %ebp, %ebp /* make $<threadlist behave better */ + + /* + * Unlock outgoing thread's mutex dispatched by another processor. + */ + xorl %eax, %eax + xchgb %al, T_LOCK(%r13) + + STORE_INTR_START(%r12) + + call smt_mark + + /* + * Restore non-volatile registers, then have spl0 return to the + * resuming thread's PC after first setting the priority as low as + * possible and blocking all interrupt threads that may be active. + */ + movq T_PC(%r12), %rax /* saved return addr */ + RESTORE_REGS(%r11); + pushq %rax /* push return address for spl0() */ + call __dtrace_probe___sched_on__cpu + jmp spl0 + +resume_from_intr_return: + /* + * Remove stack frame created in SAVE_REGS() + */ + addq $CLONGSIZE, %rsp + ret + SET_SIZE(resume_from_intr) + + ENTRY(thread_start) + popq %rax /* start() */ + popq %rdi /* arg */ + popq %rsi /* len */ + movq %rsp, %rbp + INDIRECT_CALL_REG(rax) + call thread_exit /* destroy thread if it returns. */ + /*NOTREACHED*/ + SET_SIZE(thread_start) |