diff options
author | Richard Lowe <richlowe@richlowe.net> | 2021-06-04 15:15:12 -0500 |
---|---|---|
committer | Richard Lowe <richlowe@richlowe.net> | 2021-08-16 12:46:39 -0500 |
commit | f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988 (patch) | |
tree | c4ac2f5e703ed459d50bcee7ddb38a993d961520 /usr/src/uts/intel/ia32 | |
parent | d083fed0c91296a88878f7a468910ad5b5c888ea (diff) | |
download | illumos-joyent-f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988.tar.gz |
13941 intel code and headers should not look ia32 specific
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Patrick Mooney <pmooney@pfmooney.com>
Approved by: Garret D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/intel/ia32')
42 files changed, 44 insertions, 21182 deletions
diff --git a/usr/src/uts/intel/ia32/README b/usr/src/uts/intel/ia32/README new file mode 100644 index 0000000000..e547c5bd23 --- /dev/null +++ b/usr/src/uts/intel/ia32/README @@ -0,0 +1,22 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# Copyright 2021, Richard Lowe. + +This directory contains ia32 specific code that is nevertheless still used. +This includes + +krtld + the relocation engine shared with the userland link-editor. + +sys + headers still used by various parts of the system, largely describing + the register set of the intel 32bit processor diff --git a/usr/src/uts/intel/ia32/ml/copy.s b/usr/src/uts/intel/ia32/ml/copy.s deleted file mode 100644 index 5e5f822518..0000000000 --- a/usr/src/uts/intel/ia32/ml/copy.s +++ /dev/null @@ -1,1908 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2009, Intel Corporation - * All rights reserved. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* Copyright (c) 1987, 1988 Microsoft Corporation */ -/* All Rights Reserved */ - -/* - * Copyright 2020 Joyent, Inc. - */ - -#include <sys/errno.h> -#include <sys/asm_linkage.h> - -#include "assym.h" - -#define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ -#define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ -/* - * Non-temopral access (NTA) alignment requirement - */ -#define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ -#define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) -#define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ -#define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) - -/* - * With the introduction of Broadwell, Intel has introduced supervisor mode - * access protection -- SMAP. SMAP forces the kernel to set certain bits to - * enable access of user pages (AC in rflags, defines as PS_ACHK in - * <sys/psw.h>). One of the challenges is that the implementation of many of the - * userland copy routines directly use the kernel ones. For example, copyin and - * copyout simply go and jump to the do_copy_fault label and traditionally let - * those deal with the return for them. In fact, changing that is a can of frame - * pointers. - * - * Rules and Constraints: - * - * 1. For anything that's not in copy.s, we have it do explicit smap_disable() - * or smap_enable() calls. This is restricted to the following three places: - * DTrace, resume() in swtch.s and on_fault/no_fault. If you want to add it - * somewhere else, we should be thinking twice. - * - * 2. We try to toggle this at the smallest window possible. This means that if - * we take a fault, need to try to use a copyop in copyin() or copyout(), or any - * other function, we will always leave with SMAP enabled (the kernel cannot - * access user pages). - * - * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are - * explicitly only allowed to be called while in an on_fault()/no_fault() handler, - * which already takes care of ensuring that SMAP is enabled and disabled. Note - * this means that when under an on_fault()/no_fault() handler, one must not - * call the non-*_noerr() routines. - * - * 4. The first thing we should do after coming out of an lofault handler is to - * make sure that we call smap_enable() again to ensure that we are safely - * protected, as more often than not, we will have disabled smap to get there. - * - * 5. smap_enable() and smap_disable() don't exist: calls to these functions - * generate runtime relocations, that are then processed into the necessary - * clac/stac, via the krtld hotinlines mechanism and hotinline_smap(). - * - * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and - * SMAP_DISABLE_INSTR macro should be used. If the number of these is changed, - * you must update the constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below. - * - * 7. Generally this .s file is processed by a K&R style cpp. This means that it - * really has a lot of feelings about whitespace. In particular, if you have a - * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'. - * - * 8. In general, the kernel has its own value for rflags that gets used. This - * is maintained in a few different places which vary based on how the thread - * comes into existence and whether it's a user thread. In general, when the - * kernel takes a trap, it always will set ourselves to a known set of flags, - * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that - * PS_ACHK is cleared for us. In addition, when using the sysenter instruction, - * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for - * where that gets masked off. - */ - -/* - * The optimal 64-bit bcopy and kcopy for modern x86 processors uses - * "rep smovq" for large sizes. Performance data shows that many calls to - * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for - * these small sizes unrolled code is used. For medium sizes loops writing - * 64-bytes per loop are used. Transition points were determined experimentally. - */ -#define BZERO_USE_REP (1024) -#define BCOPY_DFLT_REP (128) -#define BCOPY_NHM_REP (768) - -/* - * Copy a block of storage, returning an error code if `from' or - * `to' takes a kernel pagefault which cannot be resolved. - * Returns errno value on pagefault error, 0 if all ok - */ - -/* - * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to - * additional call instructions. - */ -#define SMAP_DISABLE_COUNT 16 -#define SMAP_ENABLE_COUNT 26 - -#define SMAP_DISABLE_INSTR(ITER) \ - .globl _smap_disable_patch_/**/ITER; \ - _smap_disable_patch_/**/ITER/**/:; \ - nop; nop; nop; - -#define SMAP_ENABLE_INSTR(ITER) \ - .globl _smap_enable_patch_/**/ITER; \ - _smap_enable_patch_/**/ITER/**/:; \ - nop; nop; nop; - - .globl kernelbase - .globl postbootkernelbase - - ENTRY(kcopy) - pushq %rbp - movq %rsp, %rbp -#ifdef DEBUG - cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ - jb 0f - cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ - jnb 1f -0: leaq .kcopy_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _kcopy_copyerr(%rip), %rcx - movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ - -do_copy_fault: - movq T_LOFAULT(%r9), %r11 /* save the current lofault */ - movq %rcx, T_LOFAULT(%r9) /* new lofault */ - call bcopy_altentry - xorl %eax, %eax /* return 0 (success) */ - SMAP_ENABLE_INSTR(0) - - /* - * A fault during do_copy_fault is indicated through an errno value - * in %rax and we iretq from the trap handler to here. - */ -_kcopy_copyerr: - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ - leave - ret - SET_SIZE(kcopy) - -#undef ARG_FROM -#undef ARG_TO -#undef ARG_COUNT - -#define COPY_LOOP_INIT(src, dst, cnt) \ - addq cnt, src; \ - addq cnt, dst; \ - shrq $3, cnt; \ - neg cnt - - /* Copy 16 bytes per loop. Uses %rax and %r8 */ -#define COPY_LOOP_BODY(src, dst, cnt) \ - prefetchnta 0x100(src, cnt, 8); \ - movq (src, cnt, 8), %rax; \ - movq 0x8(src, cnt, 8), %r8; \ - movnti %rax, (dst, cnt, 8); \ - movnti %r8, 0x8(dst, cnt, 8); \ - addq $2, cnt - - ENTRY(kcopy_nta) - pushq %rbp - movq %rsp, %rbp -#ifdef DEBUG - cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ - jb 0f - cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ - jnb 1f -0: leaq .kcopy_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - - movq %gs:CPU_THREAD, %r9 - cmpq $0, %rcx /* No non-temporal access? */ - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ - jnz do_copy_fault /* use regular access */ - /* - * Make sure cnt is >= KCOPY_MIN_SIZE - */ - cmpq $KCOPY_MIN_SIZE, %rdx - jb do_copy_fault - - /* - * Make sure src and dst are NTA_ALIGN_SIZE aligned, - * count is COUNT_ALIGN_SIZE aligned. - */ - movq %rdi, %r10 - orq %rsi, %r10 - andq $NTA_ALIGN_MASK, %r10 - orq %rdx, %r10 - andq $COUNT_ALIGN_MASK, %r10 - jnz do_copy_fault - - ALTENTRY(do_copy_fault_nta) - movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ - movq T_LOFAULT(%r9), %r11 /* save the current lofault */ - movq %rcx, T_LOFAULT(%r9) /* new lofault */ - - /* - * COPY_LOOP_BODY uses %rax and %r8 - */ - COPY_LOOP_INIT(%rdi, %rsi, %rdx) -2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) - jnz 2b - - mfence - xorl %eax, %eax /* return 0 (success) */ - SMAP_ENABLE_INSTR(1) - -_kcopy_nta_copyerr: - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ - leave - ret - SET_SIZE(do_copy_fault_nta) - SET_SIZE(kcopy_nta) - - ENTRY(bcopy) -#ifdef DEBUG - orq %rdx, %rdx /* %rdx = count */ - jz 1f - cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ - jb 0f - cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ - jnb 1f -0: leaq .bcopy_panic_msg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -1: -#endif - /* - * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. - * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy - * uses these registers in future they must be saved and restored. - */ - ALTENTRY(bcopy_altentry) -do_copy: -#define L(s) .bcopy/**/s - cmpq $0x50, %rdx /* 80 */ - jae bcopy_ck_size - - /* - * Performance data shows many caller's copy small buffers. So for - * best perf for these sizes unrolled code is used. Store data without - * worrying about alignment. - */ - leaq L(fwdPxQx)(%rip), %r10 - addq %rdx, %rdi - addq %rdx, %rsi - movslq (%r10,%rdx,4), %rcx - leaq (%rcx,%r10,1), %r10 - INDIRECT_JMP_REG(r10) - - .p2align 4 -L(fwdPxQx): - .int L(P0Q0)-L(fwdPxQx) /* 0 */ - .int L(P1Q0)-L(fwdPxQx) - .int L(P2Q0)-L(fwdPxQx) - .int L(P3Q0)-L(fwdPxQx) - .int L(P4Q0)-L(fwdPxQx) - .int L(P5Q0)-L(fwdPxQx) - .int L(P6Q0)-L(fwdPxQx) - .int L(P7Q0)-L(fwdPxQx) - - .int L(P0Q1)-L(fwdPxQx) /* 8 */ - .int L(P1Q1)-L(fwdPxQx) - .int L(P2Q1)-L(fwdPxQx) - .int L(P3Q1)-L(fwdPxQx) - .int L(P4Q1)-L(fwdPxQx) - .int L(P5Q1)-L(fwdPxQx) - .int L(P6Q1)-L(fwdPxQx) - .int L(P7Q1)-L(fwdPxQx) - - .int L(P0Q2)-L(fwdPxQx) /* 16 */ - .int L(P1Q2)-L(fwdPxQx) - .int L(P2Q2)-L(fwdPxQx) - .int L(P3Q2)-L(fwdPxQx) - .int L(P4Q2)-L(fwdPxQx) - .int L(P5Q2)-L(fwdPxQx) - .int L(P6Q2)-L(fwdPxQx) - .int L(P7Q2)-L(fwdPxQx) - - .int L(P0Q3)-L(fwdPxQx) /* 24 */ - .int L(P1Q3)-L(fwdPxQx) - .int L(P2Q3)-L(fwdPxQx) - .int L(P3Q3)-L(fwdPxQx) - .int L(P4Q3)-L(fwdPxQx) - .int L(P5Q3)-L(fwdPxQx) - .int L(P6Q3)-L(fwdPxQx) - .int L(P7Q3)-L(fwdPxQx) - - .int L(P0Q4)-L(fwdPxQx) /* 32 */ - .int L(P1Q4)-L(fwdPxQx) - .int L(P2Q4)-L(fwdPxQx) - .int L(P3Q4)-L(fwdPxQx) - .int L(P4Q4)-L(fwdPxQx) - .int L(P5Q4)-L(fwdPxQx) - .int L(P6Q4)-L(fwdPxQx) - .int L(P7Q4)-L(fwdPxQx) - - .int L(P0Q5)-L(fwdPxQx) /* 40 */ - .int L(P1Q5)-L(fwdPxQx) - .int L(P2Q5)-L(fwdPxQx) - .int L(P3Q5)-L(fwdPxQx) - .int L(P4Q5)-L(fwdPxQx) - .int L(P5Q5)-L(fwdPxQx) - .int L(P6Q5)-L(fwdPxQx) - .int L(P7Q5)-L(fwdPxQx) - - .int L(P0Q6)-L(fwdPxQx) /* 48 */ - .int L(P1Q6)-L(fwdPxQx) - .int L(P2Q6)-L(fwdPxQx) - .int L(P3Q6)-L(fwdPxQx) - .int L(P4Q6)-L(fwdPxQx) - .int L(P5Q6)-L(fwdPxQx) - .int L(P6Q6)-L(fwdPxQx) - .int L(P7Q6)-L(fwdPxQx) - - .int L(P0Q7)-L(fwdPxQx) /* 56 */ - .int L(P1Q7)-L(fwdPxQx) - .int L(P2Q7)-L(fwdPxQx) - .int L(P3Q7)-L(fwdPxQx) - .int L(P4Q7)-L(fwdPxQx) - .int L(P5Q7)-L(fwdPxQx) - .int L(P6Q7)-L(fwdPxQx) - .int L(P7Q7)-L(fwdPxQx) - - .int L(P0Q8)-L(fwdPxQx) /* 64 */ - .int L(P1Q8)-L(fwdPxQx) - .int L(P2Q8)-L(fwdPxQx) - .int L(P3Q8)-L(fwdPxQx) - .int L(P4Q8)-L(fwdPxQx) - .int L(P5Q8)-L(fwdPxQx) - .int L(P6Q8)-L(fwdPxQx) - .int L(P7Q8)-L(fwdPxQx) - - .int L(P0Q9)-L(fwdPxQx) /* 72 */ - .int L(P1Q9)-L(fwdPxQx) - .int L(P2Q9)-L(fwdPxQx) - .int L(P3Q9)-L(fwdPxQx) - .int L(P4Q9)-L(fwdPxQx) - .int L(P5Q9)-L(fwdPxQx) - .int L(P6Q9)-L(fwdPxQx) - .int L(P7Q9)-L(fwdPxQx) /* 79 */ - - .p2align 4 -L(P0Q9): - mov -0x48(%rdi), %rcx - mov %rcx, -0x48(%rsi) -L(P0Q8): - mov -0x40(%rdi), %r10 - mov %r10, -0x40(%rsi) -L(P0Q7): - mov -0x38(%rdi), %r8 - mov %r8, -0x38(%rsi) -L(P0Q6): - mov -0x30(%rdi), %rcx - mov %rcx, -0x30(%rsi) -L(P0Q5): - mov -0x28(%rdi), %r10 - mov %r10, -0x28(%rsi) -L(P0Q4): - mov -0x20(%rdi), %r8 - mov %r8, -0x20(%rsi) -L(P0Q3): - mov -0x18(%rdi), %rcx - mov %rcx, -0x18(%rsi) -L(P0Q2): - mov -0x10(%rdi), %r10 - mov %r10, -0x10(%rsi) -L(P0Q1): - mov -0x8(%rdi), %r8 - mov %r8, -0x8(%rsi) -L(P0Q0): - ret - - .p2align 4 -L(P1Q9): - mov -0x49(%rdi), %r8 - mov %r8, -0x49(%rsi) -L(P1Q8): - mov -0x41(%rdi), %rcx - mov %rcx, -0x41(%rsi) -L(P1Q7): - mov -0x39(%rdi), %r10 - mov %r10, -0x39(%rsi) -L(P1Q6): - mov -0x31(%rdi), %r8 - mov %r8, -0x31(%rsi) -L(P1Q5): - mov -0x29(%rdi), %rcx - mov %rcx, -0x29(%rsi) -L(P1Q4): - mov -0x21(%rdi), %r10 - mov %r10, -0x21(%rsi) -L(P1Q3): - mov -0x19(%rdi), %r8 - mov %r8, -0x19(%rsi) -L(P1Q2): - mov -0x11(%rdi), %rcx - mov %rcx, -0x11(%rsi) -L(P1Q1): - mov -0x9(%rdi), %r10 - mov %r10, -0x9(%rsi) -L(P1Q0): - movzbq -0x1(%rdi), %r8 - mov %r8b, -0x1(%rsi) - ret - - .p2align 4 -L(P2Q9): - mov -0x4a(%rdi), %r8 - mov %r8, -0x4a(%rsi) -L(P2Q8): - mov -0x42(%rdi), %rcx - mov %rcx, -0x42(%rsi) -L(P2Q7): - mov -0x3a(%rdi), %r10 - mov %r10, -0x3a(%rsi) -L(P2Q6): - mov -0x32(%rdi), %r8 - mov %r8, -0x32(%rsi) -L(P2Q5): - mov -0x2a(%rdi), %rcx - mov %rcx, -0x2a(%rsi) -L(P2Q4): - mov -0x22(%rdi), %r10 - mov %r10, -0x22(%rsi) -L(P2Q3): - mov -0x1a(%rdi), %r8 - mov %r8, -0x1a(%rsi) -L(P2Q2): - mov -0x12(%rdi), %rcx - mov %rcx, -0x12(%rsi) -L(P2Q1): - mov -0xa(%rdi), %r10 - mov %r10, -0xa(%rsi) -L(P2Q0): - movzwq -0x2(%rdi), %r8 - mov %r8w, -0x2(%rsi) - ret - - .p2align 4 -L(P3Q9): - mov -0x4b(%rdi), %r8 - mov %r8, -0x4b(%rsi) -L(P3Q8): - mov -0x43(%rdi), %rcx - mov %rcx, -0x43(%rsi) -L(P3Q7): - mov -0x3b(%rdi), %r10 - mov %r10, -0x3b(%rsi) -L(P3Q6): - mov -0x33(%rdi), %r8 - mov %r8, -0x33(%rsi) -L(P3Q5): - mov -0x2b(%rdi), %rcx - mov %rcx, -0x2b(%rsi) -L(P3Q4): - mov -0x23(%rdi), %r10 - mov %r10, -0x23(%rsi) -L(P3Q3): - mov -0x1b(%rdi), %r8 - mov %r8, -0x1b(%rsi) -L(P3Q2): - mov -0x13(%rdi), %rcx - mov %rcx, -0x13(%rsi) -L(P3Q1): - mov -0xb(%rdi), %r10 - mov %r10, -0xb(%rsi) - /* - * These trailing loads/stores have to do all their loads 1st, - * then do the stores. - */ -L(P3Q0): - movzwq -0x3(%rdi), %r8 - movzbq -0x1(%rdi), %r10 - mov %r8w, -0x3(%rsi) - mov %r10b, -0x1(%rsi) - ret - - .p2align 4 -L(P4Q9): - mov -0x4c(%rdi), %r8 - mov %r8, -0x4c(%rsi) -L(P4Q8): - mov -0x44(%rdi), %rcx - mov %rcx, -0x44(%rsi) -L(P4Q7): - mov -0x3c(%rdi), %r10 - mov %r10, -0x3c(%rsi) -L(P4Q6): - mov -0x34(%rdi), %r8 - mov %r8, -0x34(%rsi) -L(P4Q5): - mov -0x2c(%rdi), %rcx - mov %rcx, -0x2c(%rsi) -L(P4Q4): - mov -0x24(%rdi), %r10 - mov %r10, -0x24(%rsi) -L(P4Q3): - mov -0x1c(%rdi), %r8 - mov %r8, -0x1c(%rsi) -L(P4Q2): - mov -0x14(%rdi), %rcx - mov %rcx, -0x14(%rsi) -L(P4Q1): - mov -0xc(%rdi), %r10 - mov %r10, -0xc(%rsi) -L(P4Q0): - mov -0x4(%rdi), %r8d - mov %r8d, -0x4(%rsi) - ret - - .p2align 4 -L(P5Q9): - mov -0x4d(%rdi), %r8 - mov %r8, -0x4d(%rsi) -L(P5Q8): - mov -0x45(%rdi), %rcx - mov %rcx, -0x45(%rsi) -L(P5Q7): - mov -0x3d(%rdi), %r10 - mov %r10, -0x3d(%rsi) -L(P5Q6): - mov -0x35(%rdi), %r8 - mov %r8, -0x35(%rsi) -L(P5Q5): - mov -0x2d(%rdi), %rcx - mov %rcx, -0x2d(%rsi) -L(P5Q4): - mov -0x25(%rdi), %r10 - mov %r10, -0x25(%rsi) -L(P5Q3): - mov -0x1d(%rdi), %r8 - mov %r8, -0x1d(%rsi) -L(P5Q2): - mov -0x15(%rdi), %rcx - mov %rcx, -0x15(%rsi) -L(P5Q1): - mov -0xd(%rdi), %r10 - mov %r10, -0xd(%rsi) -L(P5Q0): - mov -0x5(%rdi), %r8d - movzbq -0x1(%rdi), %r10 - mov %r8d, -0x5(%rsi) - mov %r10b, -0x1(%rsi) - ret - - .p2align 4 -L(P6Q9): - mov -0x4e(%rdi), %r8 - mov %r8, -0x4e(%rsi) -L(P6Q8): - mov -0x46(%rdi), %rcx - mov %rcx, -0x46(%rsi) -L(P6Q7): - mov -0x3e(%rdi), %r10 - mov %r10, -0x3e(%rsi) -L(P6Q6): - mov -0x36(%rdi), %r8 - mov %r8, -0x36(%rsi) -L(P6Q5): - mov -0x2e(%rdi), %rcx - mov %rcx, -0x2e(%rsi) -L(P6Q4): - mov -0x26(%rdi), %r10 - mov %r10, -0x26(%rsi) -L(P6Q3): - mov -0x1e(%rdi), %r8 - mov %r8, -0x1e(%rsi) -L(P6Q2): - mov -0x16(%rdi), %rcx - mov %rcx, -0x16(%rsi) -L(P6Q1): - mov -0xe(%rdi), %r10 - mov %r10, -0xe(%rsi) -L(P6Q0): - mov -0x6(%rdi), %r8d - movzwq -0x2(%rdi), %r10 - mov %r8d, -0x6(%rsi) - mov %r10w, -0x2(%rsi) - ret - - .p2align 4 -L(P7Q9): - mov -0x4f(%rdi), %r8 - mov %r8, -0x4f(%rsi) -L(P7Q8): - mov -0x47(%rdi), %rcx - mov %rcx, -0x47(%rsi) -L(P7Q7): - mov -0x3f(%rdi), %r10 - mov %r10, -0x3f(%rsi) -L(P7Q6): - mov -0x37(%rdi), %r8 - mov %r8, -0x37(%rsi) -L(P7Q5): - mov -0x2f(%rdi), %rcx - mov %rcx, -0x2f(%rsi) -L(P7Q4): - mov -0x27(%rdi), %r10 - mov %r10, -0x27(%rsi) -L(P7Q3): - mov -0x1f(%rdi), %r8 - mov %r8, -0x1f(%rsi) -L(P7Q2): - mov -0x17(%rdi), %rcx - mov %rcx, -0x17(%rsi) -L(P7Q1): - mov -0xf(%rdi), %r10 - mov %r10, -0xf(%rsi) -L(P7Q0): - mov -0x7(%rdi), %r8d - movzwq -0x3(%rdi), %r10 - movzbq -0x1(%rdi), %rcx - mov %r8d, -0x7(%rsi) - mov %r10w, -0x3(%rsi) - mov %cl, -0x1(%rsi) - ret - - /* - * For large sizes rep smovq is fastest. - * Transition point determined experimentally as measured on - * Intel Xeon processors (incl. Nehalem and previous generations) and - * AMD Opteron. The transition value is patched at boot time to avoid - * memory reference hit. - */ - .globl bcopy_patch_start -bcopy_patch_start: - cmpq $BCOPY_NHM_REP, %rdx - .globl bcopy_patch_end -bcopy_patch_end: - - .p2align 4 - ALTENTRY(bcopy_ck_size) - - cmpq $BCOPY_DFLT_REP, %rdx - jae L(use_rep) - - /* - * Align to a 8-byte boundary. Avoids penalties from unaligned stores - * as well as from stores spanning cachelines. - */ - test $0x7, %rsi - jz L(aligned_loop) - test $0x1, %rsi - jz 2f - movzbq (%rdi), %r8 - dec %rdx - inc %rdi - mov %r8b, (%rsi) - inc %rsi -2: - test $0x2, %rsi - jz 4f - movzwq (%rdi), %r8 - sub $0x2, %rdx - add $0x2, %rdi - mov %r8w, (%rsi) - add $0x2, %rsi -4: - test $0x4, %rsi - jz L(aligned_loop) - mov (%rdi), %r8d - sub $0x4, %rdx - add $0x4, %rdi - mov %r8d, (%rsi) - add $0x4, %rsi - - /* - * Copy 64-bytes per loop - */ - .p2align 4 -L(aligned_loop): - mov (%rdi), %r8 - mov 0x8(%rdi), %r10 - lea -0x40(%rdx), %rdx - mov %r8, (%rsi) - mov %r10, 0x8(%rsi) - mov 0x10(%rdi), %rcx - mov 0x18(%rdi), %r8 - mov %rcx, 0x10(%rsi) - mov %r8, 0x18(%rsi) - - cmp $0x40, %rdx - mov 0x20(%rdi), %r10 - mov 0x28(%rdi), %rcx - mov %r10, 0x20(%rsi) - mov %rcx, 0x28(%rsi) - mov 0x30(%rdi), %r8 - mov 0x38(%rdi), %r10 - lea 0x40(%rdi), %rdi - mov %r8, 0x30(%rsi) - mov %r10, 0x38(%rsi) - lea 0x40(%rsi), %rsi - jae L(aligned_loop) - - /* - * Copy remaining bytes (0-63) - */ -L(do_remainder): - leaq L(fwdPxQx)(%rip), %r10 - addq %rdx, %rdi - addq %rdx, %rsi - movslq (%r10,%rdx,4), %rcx - leaq (%rcx,%r10,1), %r10 - INDIRECT_JMP_REG(r10) - - /* - * Use rep smovq. Clear remainder via unrolled code - */ - .p2align 4 -L(use_rep): - xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ - movq %rdx, %rcx /* %rcx = count */ - shrq $3, %rcx /* 8-byte word count */ - rep - smovq - - xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ - andq $7, %rdx /* remainder */ - jnz L(do_remainder) - ret -#undef L - SET_SIZE(bcopy_ck_size) - -#ifdef DEBUG - /* - * Setup frame on the run-time stack. The end of the input argument - * area must be aligned on a 16 byte boundary. The stack pointer %rsp, - * always points to the end of the latest allocated stack frame. - * panic(const char *format, ...) is a varargs function. When a - * function taking variable arguments is called, %rax must be set - * to eight times the number of floating point parameters passed - * to the function in SSE registers. - */ -call_panic: - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - xorl %eax, %eax /* no variable arguments */ - call panic /* %rdi = format string */ -#endif - SET_SIZE(bcopy_altentry) - SET_SIZE(bcopy) - - -/* - * Zero a block of storage, returning an error code if we - * take a kernel pagefault which cannot be resolved. - * Returns errno value on pagefault error, 0 if all ok - */ - - ENTRY(kzero) -#ifdef DEBUG - cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ - jnb 0f - leaq .kzero_panic_msg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -0: -#endif - /* - * pass lofault value as 3rd argument for fault return - */ - leaq _kzeroerr(%rip), %rdx - - movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ - movq T_LOFAULT(%r9), %r11 /* save the current lofault */ - movq %rdx, T_LOFAULT(%r9) /* new lofault */ - call bzero_altentry - xorl %eax, %eax - movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ - ret - /* - * A fault during bzero is indicated through an errno value - * in %rax when we iretq to here. - */ -_kzeroerr: - addq $8, %rsp /* pop bzero_altentry call ret addr */ - movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ - ret - SET_SIZE(kzero) - -/* - * Zero a block of storage. - */ - - ENTRY(bzero) -#ifdef DEBUG - cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ - jnb 0f - leaq .bzero_panic_msg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -0: -#endif - ALTENTRY(bzero_altentry) -do_zero: -#define L(s) .bzero/**/s - xorl %eax, %eax - - cmpq $0x50, %rsi /* 80 */ - jae L(ck_align) - - /* - * Performance data shows many caller's are zeroing small buffers. So - * for best perf for these sizes unrolled code is used. Store zeros - * without worrying about alignment. - */ - leaq L(setPxQx)(%rip), %r10 - addq %rsi, %rdi - movslq (%r10,%rsi,4), %rcx - leaq (%rcx,%r10,1), %r10 - INDIRECT_JMP_REG(r10) - - .p2align 4 -L(setPxQx): - .int L(P0Q0)-L(setPxQx) /* 0 */ - .int L(P1Q0)-L(setPxQx) - .int L(P2Q0)-L(setPxQx) - .int L(P3Q0)-L(setPxQx) - .int L(P4Q0)-L(setPxQx) - .int L(P5Q0)-L(setPxQx) - .int L(P6Q0)-L(setPxQx) - .int L(P7Q0)-L(setPxQx) - - .int L(P0Q1)-L(setPxQx) /* 8 */ - .int L(P1Q1)-L(setPxQx) - .int L(P2Q1)-L(setPxQx) - .int L(P3Q1)-L(setPxQx) - .int L(P4Q1)-L(setPxQx) - .int L(P5Q1)-L(setPxQx) - .int L(P6Q1)-L(setPxQx) - .int L(P7Q1)-L(setPxQx) - - .int L(P0Q2)-L(setPxQx) /* 16 */ - .int L(P1Q2)-L(setPxQx) - .int L(P2Q2)-L(setPxQx) - .int L(P3Q2)-L(setPxQx) - .int L(P4Q2)-L(setPxQx) - .int L(P5Q2)-L(setPxQx) - .int L(P6Q2)-L(setPxQx) - .int L(P7Q2)-L(setPxQx) - - .int L(P0Q3)-L(setPxQx) /* 24 */ - .int L(P1Q3)-L(setPxQx) - .int L(P2Q3)-L(setPxQx) - .int L(P3Q3)-L(setPxQx) - .int L(P4Q3)-L(setPxQx) - .int L(P5Q3)-L(setPxQx) - .int L(P6Q3)-L(setPxQx) - .int L(P7Q3)-L(setPxQx) - - .int L(P0Q4)-L(setPxQx) /* 32 */ - .int L(P1Q4)-L(setPxQx) - .int L(P2Q4)-L(setPxQx) - .int L(P3Q4)-L(setPxQx) - .int L(P4Q4)-L(setPxQx) - .int L(P5Q4)-L(setPxQx) - .int L(P6Q4)-L(setPxQx) - .int L(P7Q4)-L(setPxQx) - - .int L(P0Q5)-L(setPxQx) /* 40 */ - .int L(P1Q5)-L(setPxQx) - .int L(P2Q5)-L(setPxQx) - .int L(P3Q5)-L(setPxQx) - .int L(P4Q5)-L(setPxQx) - .int L(P5Q5)-L(setPxQx) - .int L(P6Q5)-L(setPxQx) - .int L(P7Q5)-L(setPxQx) - - .int L(P0Q6)-L(setPxQx) /* 48 */ - .int L(P1Q6)-L(setPxQx) - .int L(P2Q6)-L(setPxQx) - .int L(P3Q6)-L(setPxQx) - .int L(P4Q6)-L(setPxQx) - .int L(P5Q6)-L(setPxQx) - .int L(P6Q6)-L(setPxQx) - .int L(P7Q6)-L(setPxQx) - - .int L(P0Q7)-L(setPxQx) /* 56 */ - .int L(P1Q7)-L(setPxQx) - .int L(P2Q7)-L(setPxQx) - .int L(P3Q7)-L(setPxQx) - .int L(P4Q7)-L(setPxQx) - .int L(P5Q7)-L(setPxQx) - .int L(P6Q7)-L(setPxQx) - .int L(P7Q7)-L(setPxQx) - - .int L(P0Q8)-L(setPxQx) /* 64 */ - .int L(P1Q8)-L(setPxQx) - .int L(P2Q8)-L(setPxQx) - .int L(P3Q8)-L(setPxQx) - .int L(P4Q8)-L(setPxQx) - .int L(P5Q8)-L(setPxQx) - .int L(P6Q8)-L(setPxQx) - .int L(P7Q8)-L(setPxQx) - - .int L(P0Q9)-L(setPxQx) /* 72 */ - .int L(P1Q9)-L(setPxQx) - .int L(P2Q9)-L(setPxQx) - .int L(P3Q9)-L(setPxQx) - .int L(P4Q9)-L(setPxQx) - .int L(P5Q9)-L(setPxQx) - .int L(P6Q9)-L(setPxQx) - .int L(P7Q9)-L(setPxQx) /* 79 */ - - .p2align 4 -L(P0Q9): mov %rax, -0x48(%rdi) -L(P0Q8): mov %rax, -0x40(%rdi) -L(P0Q7): mov %rax, -0x38(%rdi) -L(P0Q6): mov %rax, -0x30(%rdi) -L(P0Q5): mov %rax, -0x28(%rdi) -L(P0Q4): mov %rax, -0x20(%rdi) -L(P0Q3): mov %rax, -0x18(%rdi) -L(P0Q2): mov %rax, -0x10(%rdi) -L(P0Q1): mov %rax, -0x8(%rdi) -L(P0Q0): - ret - - .p2align 4 -L(P1Q9): mov %rax, -0x49(%rdi) -L(P1Q8): mov %rax, -0x41(%rdi) -L(P1Q7): mov %rax, -0x39(%rdi) -L(P1Q6): mov %rax, -0x31(%rdi) -L(P1Q5): mov %rax, -0x29(%rdi) -L(P1Q4): mov %rax, -0x21(%rdi) -L(P1Q3): mov %rax, -0x19(%rdi) -L(P1Q2): mov %rax, -0x11(%rdi) -L(P1Q1): mov %rax, -0x9(%rdi) -L(P1Q0): mov %al, -0x1(%rdi) - ret - - .p2align 4 -L(P2Q9): mov %rax, -0x4a(%rdi) -L(P2Q8): mov %rax, -0x42(%rdi) -L(P2Q7): mov %rax, -0x3a(%rdi) -L(P2Q6): mov %rax, -0x32(%rdi) -L(P2Q5): mov %rax, -0x2a(%rdi) -L(P2Q4): mov %rax, -0x22(%rdi) -L(P2Q3): mov %rax, -0x1a(%rdi) -L(P2Q2): mov %rax, -0x12(%rdi) -L(P2Q1): mov %rax, -0xa(%rdi) -L(P2Q0): mov %ax, -0x2(%rdi) - ret - - .p2align 4 -L(P3Q9): mov %rax, -0x4b(%rdi) -L(P3Q8): mov %rax, -0x43(%rdi) -L(P3Q7): mov %rax, -0x3b(%rdi) -L(P3Q6): mov %rax, -0x33(%rdi) -L(P3Q5): mov %rax, -0x2b(%rdi) -L(P3Q4): mov %rax, -0x23(%rdi) -L(P3Q3): mov %rax, -0x1b(%rdi) -L(P3Q2): mov %rax, -0x13(%rdi) -L(P3Q1): mov %rax, -0xb(%rdi) -L(P3Q0): mov %ax, -0x3(%rdi) - mov %al, -0x1(%rdi) - ret - - .p2align 4 -L(P4Q9): mov %rax, -0x4c(%rdi) -L(P4Q8): mov %rax, -0x44(%rdi) -L(P4Q7): mov %rax, -0x3c(%rdi) -L(P4Q6): mov %rax, -0x34(%rdi) -L(P4Q5): mov %rax, -0x2c(%rdi) -L(P4Q4): mov %rax, -0x24(%rdi) -L(P4Q3): mov %rax, -0x1c(%rdi) -L(P4Q2): mov %rax, -0x14(%rdi) -L(P4Q1): mov %rax, -0xc(%rdi) -L(P4Q0): mov %eax, -0x4(%rdi) - ret - - .p2align 4 -L(P5Q9): mov %rax, -0x4d(%rdi) -L(P5Q8): mov %rax, -0x45(%rdi) -L(P5Q7): mov %rax, -0x3d(%rdi) -L(P5Q6): mov %rax, -0x35(%rdi) -L(P5Q5): mov %rax, -0x2d(%rdi) -L(P5Q4): mov %rax, -0x25(%rdi) -L(P5Q3): mov %rax, -0x1d(%rdi) -L(P5Q2): mov %rax, -0x15(%rdi) -L(P5Q1): mov %rax, -0xd(%rdi) -L(P5Q0): mov %eax, -0x5(%rdi) - mov %al, -0x1(%rdi) - ret - - .p2align 4 -L(P6Q9): mov %rax, -0x4e(%rdi) -L(P6Q8): mov %rax, -0x46(%rdi) -L(P6Q7): mov %rax, -0x3e(%rdi) -L(P6Q6): mov %rax, -0x36(%rdi) -L(P6Q5): mov %rax, -0x2e(%rdi) -L(P6Q4): mov %rax, -0x26(%rdi) -L(P6Q3): mov %rax, -0x1e(%rdi) -L(P6Q2): mov %rax, -0x16(%rdi) -L(P6Q1): mov %rax, -0xe(%rdi) -L(P6Q0): mov %eax, -0x6(%rdi) - mov %ax, -0x2(%rdi) - ret - - .p2align 4 -L(P7Q9): mov %rax, -0x4f(%rdi) -L(P7Q8): mov %rax, -0x47(%rdi) -L(P7Q7): mov %rax, -0x3f(%rdi) -L(P7Q6): mov %rax, -0x37(%rdi) -L(P7Q5): mov %rax, -0x2f(%rdi) -L(P7Q4): mov %rax, -0x27(%rdi) -L(P7Q3): mov %rax, -0x1f(%rdi) -L(P7Q2): mov %rax, -0x17(%rdi) -L(P7Q1): mov %rax, -0xf(%rdi) -L(P7Q0): mov %eax, -0x7(%rdi) - mov %ax, -0x3(%rdi) - mov %al, -0x1(%rdi) - ret - - /* - * Align to a 16-byte boundary. Avoids penalties from unaligned stores - * as well as from stores spanning cachelines. Note 16-byte alignment - * is better in case where rep sstosq is used. - */ - .p2align 4 -L(ck_align): - test $0xf, %rdi - jz L(aligned_now) - test $1, %rdi - jz 2f - mov %al, (%rdi) - dec %rsi - lea 1(%rdi),%rdi -2: - test $2, %rdi - jz 4f - mov %ax, (%rdi) - sub $2, %rsi - lea 2(%rdi),%rdi -4: - test $4, %rdi - jz 8f - mov %eax, (%rdi) - sub $4, %rsi - lea 4(%rdi),%rdi -8: - test $8, %rdi - jz L(aligned_now) - mov %rax, (%rdi) - sub $8, %rsi - lea 8(%rdi),%rdi - - /* - * For large sizes rep sstoq is fastest. - * Transition point determined experimentally as measured on - * Intel Xeon processors (incl. Nehalem) and AMD Opteron. - */ -L(aligned_now): - cmp $BZERO_USE_REP, %rsi - ja L(use_rep) - - /* - * zero 64-bytes per loop - */ - .p2align 4 -L(bzero_loop): - leaq -0x40(%rsi), %rsi - cmpq $0x40, %rsi - movq %rax, (%rdi) - movq %rax, 0x8(%rdi) - movq %rax, 0x10(%rdi) - movq %rax, 0x18(%rdi) - movq %rax, 0x20(%rdi) - movq %rax, 0x28(%rdi) - movq %rax, 0x30(%rdi) - movq %rax, 0x38(%rdi) - leaq 0x40(%rdi), %rdi - jae L(bzero_loop) - - /* - * Clear any remaining bytes.. - */ -9: - leaq L(setPxQx)(%rip), %r10 - addq %rsi, %rdi - movslq (%r10,%rsi,4), %rcx - leaq (%rcx,%r10,1), %r10 - INDIRECT_JMP_REG(r10) - - /* - * Use rep sstoq. Clear any remainder via unrolled code - */ - .p2align 4 -L(use_rep): - movq %rsi, %rcx /* get size in bytes */ - shrq $3, %rcx /* count of 8-byte words to zero */ - rep - sstoq /* %rcx = words to clear (%rax=0) */ - andq $7, %rsi /* remaining bytes */ - jnz 9b - ret -#undef L - SET_SIZE(bzero_altentry) - SET_SIZE(bzero) - -/* - * Transfer data to and from user space - - * Note that these routines can cause faults - * It is assumed that the kernel has nothing at - * less than KERNELBASE in the virtual address space. - * - * Note that copyin(9F) and copyout(9F) are part of the - * DDI/DKI which specifies that they return '-1' on "errors." - * - * Sigh. - * - * So there's two extremely similar routines - xcopyin_nta() and - * xcopyout_nta() which return the errno that we've faithfully computed. - * This allows other callers (e.g. uiomove(9F)) to work correctly. - * Given that these are used pretty heavily, we expand the calling - * sequences inline for all flavours (rather than making wrappers). - */ - -/* - * Copy user data to kernel space. - */ - - ENTRY(copyin) - pushq %rbp - movq %rsp, %rbp - subq $24, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rsi /* %rsi = kaddr */ - jnb 1f - leaq .copyin_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _copyin_err(%rip), %rcx - - movq %gs:CPU_THREAD, %r9 - cmpq %rax, %rdi /* test uaddr < kernelbase */ - jae 3f /* take copyop if uaddr > kernelbase */ - SMAP_DISABLE_INSTR(0) - jmp do_copy_fault /* Takes care of leave for us */ - -_copyin_err: - SMAP_ENABLE_INSTR(2) - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ - addq $8, %rsp /* pop bcopy_altentry call ret addr */ -3: - movq T_COPYOPS(%r9), %rax - cmpq $0, %rax - jz 2f - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - leave - movq CP_COPYIN(%rax), %rax - INDIRECT_JMP_REG(rax) - -2: movl $-1, %eax - leave - ret - SET_SIZE(copyin) - - ENTRY(xcopyin_nta) - pushq %rbp - movq %rsp, %rbp - subq $24, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - * %rcx is consumed in this routine so we don't need to save - * it. - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rsi /* %rsi = kaddr */ - jnb 1f - leaq .xcopyin_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - movq %gs:CPU_THREAD, %r9 - cmpq %rax, %rdi /* test uaddr < kernelbase */ - jae 4f - cmpq $0, %rcx /* No non-temporal access? */ - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ - jnz 6f /* use regular access */ - /* - * Make sure cnt is >= XCOPY_MIN_SIZE bytes - */ - cmpq $XCOPY_MIN_SIZE, %rdx - jae 5f -6: - SMAP_DISABLE_INSTR(1) - jmp do_copy_fault - - /* - * Make sure src and dst are NTA_ALIGN_SIZE aligned, - * count is COUNT_ALIGN_SIZE aligned. - */ -5: - movq %rdi, %r10 - orq %rsi, %r10 - andq $NTA_ALIGN_MASK, %r10 - orq %rdx, %r10 - andq $COUNT_ALIGN_MASK, %r10 - jnz 6b - leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ - SMAP_DISABLE_INSTR(2) - jmp do_copy_fault_nta /* use non-temporal access */ - -4: - movl $EFAULT, %eax - jmp 3f - - /* - * A fault during do_copy_fault or do_copy_fault_nta is - * indicated through an errno value in %rax and we iret from the - * trap handler to here. - */ -_xcopyin_err: - addq $8, %rsp /* pop bcopy_altentry call ret addr */ -_xcopyin_nta_err: - SMAP_ENABLE_INSTR(3) - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ -3: - movq T_COPYOPS(%r9), %r8 - cmpq $0, %r8 - jz 2f - - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - leave - movq CP_XCOPYIN(%r8), %r8 - INDIRECT_JMP_REG(r8) - -2: leave - ret - SET_SIZE(xcopyin_nta) - -/* - * Copy kernel data to user space. - */ - - ENTRY(copyout) - pushq %rbp - movq %rsp, %rbp - subq $24, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rdi /* %rdi = kaddr */ - jnb 1f - leaq .copyout_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _copyout_err(%rip), %rcx - - movq %gs:CPU_THREAD, %r9 - cmpq %rax, %rsi /* test uaddr < kernelbase */ - jae 3f /* take copyop if uaddr > kernelbase */ - SMAP_DISABLE_INSTR(3) - jmp do_copy_fault /* Calls leave for us */ - -_copyout_err: - SMAP_ENABLE_INSTR(4) - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ - addq $8, %rsp /* pop bcopy_altentry call ret addr */ -3: - movq T_COPYOPS(%r9), %rax - cmpq $0, %rax - jz 2f - - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - leave - movq CP_COPYOUT(%rax), %rax - INDIRECT_JMP_REG(rax) - -2: movl $-1, %eax - leave - ret - SET_SIZE(copyout) - - ENTRY(xcopyout_nta) - pushq %rbp - movq %rsp, %rbp - subq $24, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rdi /* %rdi = kaddr */ - jnb 1f - leaq .xcopyout_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - movq %gs:CPU_THREAD, %r9 - cmpq %rax, %rsi /* test uaddr < kernelbase */ - jae 4f - - cmpq $0, %rcx /* No non-temporal access? */ - /* - * pass lofault value as 4th argument to do_copy_fault - */ - leaq _xcopyout_err(%rip), %rcx - jnz 6f - /* - * Make sure cnt is >= XCOPY_MIN_SIZE bytes - */ - cmpq $XCOPY_MIN_SIZE, %rdx - jae 5f -6: - SMAP_DISABLE_INSTR(4) - jmp do_copy_fault - - /* - * Make sure src and dst are NTA_ALIGN_SIZE aligned, - * count is COUNT_ALIGN_SIZE aligned. - */ -5: - movq %rdi, %r10 - orq %rsi, %r10 - andq $NTA_ALIGN_MASK, %r10 - orq %rdx, %r10 - andq $COUNT_ALIGN_MASK, %r10 - jnz 6b - leaq _xcopyout_nta_err(%rip), %rcx - SMAP_DISABLE_INSTR(5) - call do_copy_fault_nta - SMAP_ENABLE_INSTR(5) - ret - -4: - movl $EFAULT, %eax - jmp 3f - - /* - * A fault during do_copy_fault or do_copy_fault_nta is - * indicated through an errno value in %rax and we iret from the - * trap handler to here. - */ -_xcopyout_err: - addq $8, %rsp /* pop bcopy_altentry call ret addr */ -_xcopyout_nta_err: - SMAP_ENABLE_INSTR(6) - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ -3: - movq T_COPYOPS(%r9), %r8 - cmpq $0, %r8 - jz 2f - - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - leave - movq CP_XCOPYOUT(%r8), %r8 - INDIRECT_JMP_REG(r8) - -2: leave - ret - SET_SIZE(xcopyout_nta) - -/* - * Copy a null terminated string from one point to another in - * the kernel address space. - */ - - ENTRY(copystr) - pushq %rbp - movq %rsp, %rbp -#ifdef DEBUG - movq kernelbase(%rip), %rax - cmpq %rax, %rdi /* %rdi = from */ - jb 0f - cmpq %rax, %rsi /* %rsi = to */ - jnb 1f -0: leaq .copystr_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - movq %gs:CPU_THREAD, %r9 - movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ - /* 5th argument to do_copystr */ - xorl %r10d,%r10d /* pass smap restore need in %r10d */ - /* as a non-ABI 6th arg */ -do_copystr: - movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ - movq T_LOFAULT(%r9), %r11 /* save the current lofault */ - movq %r8, T_LOFAULT(%r9) /* new lofault */ - - movq %rdx, %r8 /* save maxlength */ - - cmpq $0, %rdx /* %rdx = maxlength */ - je copystr_enametoolong /* maxlength == 0 */ - -copystr_loop: - decq %r8 - movb (%rdi), %al - incq %rdi - movb %al, (%rsi) - incq %rsi - cmpb $0, %al - je copystr_null /* null char */ - cmpq $0, %r8 - jne copystr_loop - -copystr_enametoolong: - movl $ENAMETOOLONG, %eax - jmp copystr_out - -copystr_null: - xorl %eax, %eax /* no error */ - -copystr_out: - cmpq $0, %rcx /* want length? */ - je copystr_smap /* no */ - subq %r8, %rdx /* compute length and store it */ - movq %rdx, (%rcx) - -copystr_smap: - cmpl $0, %r10d - jz copystr_done - SMAP_ENABLE_INSTR(7) - -copystr_done: - movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ - leave - ret - SET_SIZE(copystr) - -/* - * Copy a null terminated string from the user address space into - * the kernel address space. - */ - - ENTRY(copyinstr) - pushq %rbp - movq %rsp, %rbp - subq $32, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - movq %rcx, 0x18(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rsi /* %rsi = kaddr */ - jnb 1f - leaq .copyinstr_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif - /* - * pass lofault value as 5th argument to do_copystr - * do_copystr expects whether or not we need smap in %r10d - */ - leaq _copyinstr_error(%rip), %r8 - movl $1, %r10d - - cmpq %rax, %rdi /* test uaddr < kernelbase */ - jae 4f - SMAP_DISABLE_INSTR(6) - jmp do_copystr -4: - movq %gs:CPU_THREAD, %r9 - jmp 3f - -_copyinstr_error: - SMAP_ENABLE_INSTR(8) - movq %r11, T_LOFAULT(%r9) /* restore original lofault */ -3: - movq T_COPYOPS(%r9), %rax - cmpq $0, %rax - jz 2f - - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - movq 0x18(%rsp), %rcx - leave - movq CP_COPYINSTR(%rax), %rax - INDIRECT_JMP_REG(rax) - -2: movl $EFAULT, %eax /* return EFAULT */ - leave - ret - SET_SIZE(copyinstr) - -/* - * Copy a null terminated string from the kernel - * address space to the user address space. - */ - - ENTRY(copyoutstr) - pushq %rbp - movq %rsp, %rbp - subq $32, %rsp - - /* - * save args in case we trap and need to rerun as a copyop - */ - movq %rdi, (%rsp) - movq %rsi, 0x8(%rsp) - movq %rdx, 0x10(%rsp) - movq %rcx, 0x18(%rsp) - - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rdi /* %rdi = kaddr */ - jnb 1f - leaq .copyoutstr_panic_msg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -1: -#endif - /* - * pass lofault value as 5th argument to do_copystr - * pass one as 6th argument to do_copystr in %r10d - */ - leaq _copyoutstr_error(%rip), %r8 - movl $1, %r10d - - cmpq %rax, %rsi /* test uaddr < kernelbase */ - jae 4f - SMAP_DISABLE_INSTR(7) - jmp do_copystr -4: - movq %gs:CPU_THREAD, %r9 - jmp 3f - -_copyoutstr_error: - SMAP_ENABLE_INSTR(9) - movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ -3: - movq T_COPYOPS(%r9), %rax - cmpq $0, %rax - jz 2f - - /* - * reload args for the copyop - */ - movq (%rsp), %rdi - movq 0x8(%rsp), %rsi - movq 0x10(%rsp), %rdx - movq 0x18(%rsp), %rcx - leave - movq CP_COPYOUTSTR(%rax), %rax - INDIRECT_JMP_REG(rax) - -2: movl $EFAULT, %eax /* return EFAULT */ - leave - ret - SET_SIZE(copyoutstr) - -/* - * Since all of the fuword() variants are so similar, we have a macro to spit - * them out. This allows us to create DTrace-unobservable functions easily. - */ - -/* - * Note that we don't save and reload the arguments here - * because their values are not altered in the copy path. - * Additionally, when successful, the smap_enable jmp will - * actually return us to our original caller. - */ - -#define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ - ENTRY(NAME) \ - movq %gs:CPU_THREAD, %r9; \ - cmpq kernelbase(%rip), %rdi; \ - jae 1f; \ - leaq _flt_/**/NAME, %rdx; \ - movq %rdx, T_LOFAULT(%r9); \ - SMAP_DISABLE_INSTR(DISNUM) \ - INSTR (%rdi), REG; \ - movq $0, T_LOFAULT(%r9); \ - INSTR REG, (%rsi); \ - xorl %eax, %eax; \ - SMAP_ENABLE_INSTR(EN1) \ - ret; \ -_flt_/**/NAME: \ - SMAP_ENABLE_INSTR(EN2) \ - movq $0, T_LOFAULT(%r9); \ -1: \ - movq T_COPYOPS(%r9), %rax; \ - cmpq $0, %rax; \ - jz 2f; \ - movq COPYOP(%rax), %rax; \ - INDIRECT_JMP_REG(rax); \ -2: \ - movl $-1, %eax; \ - ret; \ - SET_SIZE(NAME) - - FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11) - FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13) - FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15) - FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17) - -#undef FUWORD - -/* - * Set user word. - */ - -/* - * Note that we don't save and reload the arguments here - * because their values are not altered in the copy path. - */ - -#define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \ - ENTRY(NAME) \ - movq %gs:CPU_THREAD, %r9; \ - cmpq kernelbase(%rip), %rdi; \ - jae 1f; \ - leaq _flt_/**/NAME, %rdx; \ - SMAP_DISABLE_INSTR(DISNUM) \ - movq %rdx, T_LOFAULT(%r9); \ - INSTR REG, (%rdi); \ - movq $0, T_LOFAULT(%r9); \ - xorl %eax, %eax; \ - SMAP_ENABLE_INSTR(EN1) \ - ret; \ -_flt_/**/NAME: \ - SMAP_ENABLE_INSTR(EN2) \ - movq $0, T_LOFAULT(%r9); \ -1: \ - movq T_COPYOPS(%r9), %rax; \ - cmpq $0, %rax; \ - jz 3f; \ - movq COPYOP(%rax), %rax; \ - INDIRECT_JMP_REG(rax); \ -3: \ - movl $-1, %eax; \ - ret; \ - SET_SIZE(NAME) - - SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19) - SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21) - SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23) - SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25) - -#undef SUWORD - -#define FUWORD_NOERR(NAME, INSTR, REG) \ - ENTRY(NAME) \ - cmpq kernelbase(%rip), %rdi; \ - cmovnbq kernelbase(%rip), %rdi; \ - INSTR (%rdi), REG; \ - INSTR REG, (%rsi); \ - ret; \ - SET_SIZE(NAME) - - FUWORD_NOERR(fuword64_noerr, movq, %rax) - FUWORD_NOERR(fuword32_noerr, movl, %eax) - FUWORD_NOERR(fuword16_noerr, movw, %ax) - FUWORD_NOERR(fuword8_noerr, movb, %al) - -#undef FUWORD_NOERR - -#define SUWORD_NOERR(NAME, INSTR, REG) \ - ENTRY(NAME) \ - cmpq kernelbase(%rip), %rdi; \ - cmovnbq kernelbase(%rip), %rdi; \ - INSTR REG, (%rdi); \ - ret; \ - SET_SIZE(NAME) - - SUWORD_NOERR(suword64_noerr, movq, %rsi) - SUWORD_NOERR(suword32_noerr, movl, %esi) - SUWORD_NOERR(suword16_noerr, movw, %si) - SUWORD_NOERR(suword8_noerr, movb, %sil) - -#undef SUWORD_NOERR - - - .weak subyte - subyte=suword8 - .weak subyte_noerr - subyte_noerr=suword8_noerr - - .weak fulword - fulword=fuword64 - .weak fulword_noerr - fulword_noerr=fuword64_noerr - .weak sulword - sulword=suword64 - .weak sulword_noerr - sulword_noerr=suword64_noerr - - ENTRY(copyin_noerr) - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rsi /* %rsi = kto */ - jae 1f - leaq .cpyin_ne_pmsg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -1: -#endif - cmpq %rax, %rdi /* ufrom < kernelbase */ - jb do_copy - movq %rax, %rdi /* force fault at kernelbase */ - jmp do_copy - SET_SIZE(copyin_noerr) - - ENTRY(copyout_noerr) - movq kernelbase(%rip), %rax -#ifdef DEBUG - cmpq %rax, %rdi /* %rdi = kfrom */ - jae 1f - leaq .cpyout_ne_pmsg(%rip), %rdi - jmp call_panic /* setup stack and call panic */ -1: -#endif - cmpq %rax, %rsi /* uto < kernelbase */ - jb do_copy - movq %rax, %rsi /* force fault at kernelbase */ - jmp do_copy - SET_SIZE(copyout_noerr) - - ENTRY(uzero) - movq kernelbase(%rip), %rax - cmpq %rax, %rdi - jb do_zero - movq %rax, %rdi /* force fault at kernelbase */ - jmp do_zero - SET_SIZE(uzero) - - ENTRY(ucopy) - movq kernelbase(%rip), %rax - cmpq %rax, %rdi - cmovaeq %rax, %rdi /* force fault at kernelbase */ - cmpq %rax, %rsi - cmovaeq %rax, %rsi /* force fault at kernelbase */ - jmp do_copy - SET_SIZE(ucopy) - - /* - * Note, the frame pointer is required here becuase do_copystr expects - * to be able to pop it off! - */ - ENTRY(ucopystr) - pushq %rbp - movq %rsp, %rbp - movq kernelbase(%rip), %rax - cmpq %rax, %rdi - cmovaeq %rax, %rdi /* force fault at kernelbase */ - cmpq %rax, %rsi - cmovaeq %rax, %rsi /* force fault at kernelbase */ - /* do_copystr expects lofault address in %r8 */ - /* do_copystr expects whether or not we need smap in %r10 */ - xorl %r10d, %r10d - movq %gs:CPU_THREAD, %r8 - movq T_LOFAULT(%r8), %r8 - jmp do_copystr - SET_SIZE(ucopystr) - -#ifdef DEBUG - .data -.kcopy_panic_msg: - .string "kcopy: arguments below kernelbase" -.bcopy_panic_msg: - .string "bcopy: arguments below kernelbase" -.kzero_panic_msg: - .string "kzero: arguments below kernelbase" -.bzero_panic_msg: - .string "bzero: arguments below kernelbase" -.copyin_panic_msg: - .string "copyin: kaddr argument below kernelbase" -.xcopyin_panic_msg: - .string "xcopyin: kaddr argument below kernelbase" -.copyout_panic_msg: - .string "copyout: kaddr argument below kernelbase" -.xcopyout_panic_msg: - .string "xcopyout: kaddr argument below kernelbase" -.copystr_panic_msg: - .string "copystr: arguments in user space" -.copyinstr_panic_msg: - .string "copyinstr: kaddr argument not in kernel address space" -.copyoutstr_panic_msg: - .string "copyoutstr: kaddr argument not in kernel address space" -.cpyin_ne_pmsg: - .string "copyin_noerr: argument not in kernel address space" -.cpyout_ne_pmsg: - .string "copyout_noerr: argument not in kernel address space" -#endif - -.data -.align 4 -.globl _smap_enable_patch_count -.type _smap_enable_patch_count,@object -.size _smap_enable_patch_count, 4 -_smap_enable_patch_count: - .long SMAP_ENABLE_COUNT - -.globl _smap_disable_patch_count -.type _smap_disable_patch_count,@object -.size _smap_disable_patch_count, 4 -_smap_disable_patch_count: - .long SMAP_DISABLE_COUNT diff --git a/usr/src/uts/intel/ia32/ml/ddi_i86_asm.s b/usr/src/uts/intel/ia32/ml/ddi_i86_asm.s deleted file mode 100644 index 2fa9bd75e9..0000000000 --- a/usr/src/uts/intel/ia32/ml/ddi_i86_asm.s +++ /dev/null @@ -1,522 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include "assym.h" - - ENTRY(ddi_get8) - ALTENTRY(ddi_mem_get8) - ALTENTRY(ddi_io_get8) - movl ACC_ATTR(%rdi), %edx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx - jne 1f - movq %rsi, %rdx - xorq %rax, %rax - inb (%dx) - ret -1: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx - jne 2f - movzbq (%rsi), %rax - ret -2: - movq ACC_GETB(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_get8) - SET_SIZE(ddi_mem_get8) - SET_SIZE(ddi_io_get8) - - - ENTRY(ddi_get16) - ALTENTRY(ddi_mem_get16) - ALTENTRY(ddi_io_get16) - movl ACC_ATTR(%rdi), %edx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx - jne 3f - movq %rsi, %rdx - xorq %rax, %rax - inw (%dx) - ret -3: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx - jne 4f - movzwq (%rsi), %rax - ret -4: - movq ACC_GETW(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_get16) - SET_SIZE(ddi_mem_get16) - SET_SIZE(ddi_io_get16) - - - ENTRY(ddi_get32) - ALTENTRY(ddi_mem_get32) - ALTENTRY(ddi_io_get32) - movl ACC_ATTR(%rdi), %edx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %edx - jne 5f - movq %rsi, %rdx - inl (%dx) - ret -5: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %edx - jne 6f - movl (%rsi), %eax - ret -6: - movq ACC_GETL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_get32) - SET_SIZE(ddi_mem_get32) - SET_SIZE(ddi_io_get32) - - - ENTRY(ddi_get64) - ALTENTRY(ddi_mem_get64) - movq ACC_GETLL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_get64) - SET_SIZE(ddi_mem_get64) - - - ENTRY(ddi_put8) - ALTENTRY(ddi_mem_put8) - ALTENTRY(ddi_io_put8) - movl ACC_ATTR(%rdi), %ecx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx - jne 7f - movq %rdx, %rax - movq %rsi, %rdx - outb (%dx) - ret -7: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx - jne 8f - movb %dl, (%rsi) - ret -8: - movq ACC_PUTB(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_put8) - SET_SIZE(ddi_mem_put8) - SET_SIZE(ddi_io_put8) - - - ENTRY(ddi_put16) - ALTENTRY(ddi_mem_put16) - ALTENTRY(ddi_io_put16) - movl ACC_ATTR(%rdi), %ecx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx - jne 8f - movq %rdx, %rax - movq %rsi, %rdx - outw (%dx) - ret -8: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx - jne 9f - movw %dx, (%rsi) - ret -9: - movq ACC_PUTW(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_put16) - SET_SIZE(ddi_mem_put16) - SET_SIZE(ddi_io_put16) - - - ENTRY(ddi_put32) - ALTENTRY(ddi_mem_put32) - ALTENTRY(ddi_io_put32) - movl ACC_ATTR(%rdi), %ecx - cmpl $_CONST(DDI_ACCATTR_IO_SPACE|DDI_ACCATTR_DIRECT), %ecx - jne 8f - movq %rdx, %rax - movq %rsi, %rdx - outl (%dx) - ret -8: - cmpl $_CONST(DDI_ACCATTR_CPU_VADDR|DDI_ACCATTR_DIRECT), %ecx - jne 9f - movl %edx, (%rsi) - ret -9: - movq ACC_PUTL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_put32) - SET_SIZE(ddi_mem_put32) - SET_SIZE(ddi_io_put32) - - - ENTRY(ddi_put64) - ALTENTRY(ddi_mem_put64) - movq ACC_PUTLL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_put64) - SET_SIZE(ddi_mem_put64) - - - ENTRY(ddi_rep_get8) - ALTENTRY(ddi_mem_rep_get8) - movq ACC_REP_GETB(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_get8) - SET_SIZE(ddi_mem_rep_get8) - - - ENTRY(ddi_rep_get16) - ALTENTRY(ddi_mem_rep_get16) - movq ACC_REP_GETW(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_get16) - SET_SIZE(ddi_mem_rep_get16) - - - ENTRY(ddi_rep_get32) - ALTENTRY(ddi_mem_rep_get32) - movq ACC_REP_GETL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_get32) - SET_SIZE(ddi_mem_rep_get32) - - - ENTRY(ddi_rep_get64) - ALTENTRY(ddi_mem_rep_get64) - movq ACC_REP_GETLL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_get64) - SET_SIZE(ddi_mem_rep_get64) - - - ENTRY(ddi_rep_put8) - ALTENTRY(ddi_mem_rep_put8) - movq ACC_REP_PUTB(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_put8) - SET_SIZE(ddi_mem_rep_put8) - - - ENTRY(ddi_rep_put16) - ALTENTRY(ddi_mem_rep_put16) - movq ACC_REP_PUTW(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_put16) - SET_SIZE(ddi_mem_rep_put16) - - - ENTRY(ddi_rep_put32) - ALTENTRY(ddi_mem_rep_put32) - movq ACC_REP_PUTL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_put32) - SET_SIZE(ddi_mem_rep_put32) - - - ENTRY(ddi_rep_put64) - ALTENTRY(ddi_mem_rep_put64) - movq ACC_REP_PUTLL(%rdi), %rax - INDIRECT_JMP_REG(rax) - SET_SIZE(ddi_rep_put64) - SET_SIZE(ddi_mem_rep_put64) - - ENTRY(i_ddi_vaddr_get8) - movzbq (%rsi), %rax - ret - SET_SIZE(i_ddi_vaddr_get8) - - ENTRY(i_ddi_vaddr_get16) - movzwq (%rsi), %rax - ret - SET_SIZE(i_ddi_vaddr_get16) - - - ENTRY(i_ddi_vaddr_get32) - movl (%rsi), %eax - ret - SET_SIZE(i_ddi_vaddr_get32) - - - ENTRY(i_ddi_vaddr_get64) - movq (%rsi), %rax - ret - SET_SIZE(i_ddi_vaddr_get64) - - - ENTRY(i_ddi_io_get8) - movq %rsi, %rdx - inb (%dx) - movzbq %al, %rax - ret - SET_SIZE(i_ddi_io_get8) - - - ENTRY(i_ddi_io_get16) - movq %rsi, %rdx - inw (%dx) - movzwq %ax, %rax - ret - SET_SIZE(i_ddi_io_get16) - - - ENTRY(i_ddi_io_get32) - movq %rsi, %rdx - inl (%dx) - ret - SET_SIZE(i_ddi_io_get32) - - ENTRY(i_ddi_vaddr_put8) - movb %dl, (%rsi) - ret - SET_SIZE(i_ddi_vaddr_put8) - - - ENTRY(i_ddi_vaddr_put16) - movw %dx, (%rsi) - ret - SET_SIZE(i_ddi_vaddr_put16) - - - ENTRY(i_ddi_vaddr_put32) - movl %edx, (%rsi) - ret - SET_SIZE(i_ddi_vaddr_put32) - - - ENTRY(i_ddi_vaddr_put64) - movq %rdx, (%rsi) - ret - SET_SIZE(i_ddi_vaddr_put64) - - ENTRY(i_ddi_io_put8) - movq %rdx, %rax - movq %rsi, %rdx - outb (%dx) - ret - SET_SIZE(i_ddi_io_put8) - - - ENTRY(i_ddi_io_put16) - movq %rdx, %rax - movq %rsi, %rdx - outw (%dx) - ret - SET_SIZE(i_ddi_io_put16) - - - ENTRY(i_ddi_io_put32) - movq %rdx, %rax - movq %rsi, %rdx - outl (%dx) - ret - SET_SIZE(i_ddi_io_put32) - - /* - * Incoming arguments - * - * %rdi : hdlp - * %rsi : host_addr - * %rdx : dev_addr - * %rcx : repcount - * %r8 : flags - * - * This routine will destroy values in %rdx, %rsi, %rcx. - */ - ENTRY(i_ddi_io_rep_get8) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je gb_ioadv - movq %rsi, %rdi - rep - insb - ret - -gb_ioadv: - andq %rcx, %rcx - jz gb_ioadv_done -gb_ioadv2: - inb (%dx) - movb %al, (%rsi) - incq %rdx - incq %rsi - decq %rcx - jg gb_ioadv2 - -gb_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - - SET_SIZE(i_ddi_io_rep_get8) - - - ENTRY(i_ddi_io_rep_get16) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je gw_ioadv - - movq %rsi, %rdi - rep - insw - ret - -gw_ioadv: - andq %rcx, %rcx - jz gw_ioadv_done -gw_ioadv2: - inw (%dx) - movw %ax,(%rsi) - addq $2, %rsi - addq $2, %rdx - decq %rcx - jg gw_ioadv2 - -gw_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(i_ddi_io_rep_get16) - - - ENTRY(i_ddi_io_rep_get32) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je gl_ioadv - - movq %rsi, %rdi - rep - insl - ret - -gl_ioadv: - andq %rcx, %rcx - jz gl_ioadv_done -gl_ioadv2: - inl (%dx) - movl %eax,(%rsi) - addq $4, %rsi - addq $4, %rdx - decq %rcx - jg gl_ioadv2 - -gl_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - - SET_SIZE(i_ddi_io_rep_get32) - - /* - * Incoming arguments - * - * %rdi : hdlp - * %rsi : host_addr - * %rdx : dev_addr - * %rcx : repcount - * %r8 : flags - * - * This routine will destroy values in %rdx, %rsi, %rcx. - */ - ENTRY(i_ddi_io_rep_put8) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je pb_ioadv - - movq %rsi, %rdi - rep - outsb - ret - -pb_ioadv: - andq %rcx, %rcx - jz pb_ioadv_done -pb_ioadv2: - movb (%rsi), %al - outb (%dx) - incq %rsi - incq %rdx - decq %rcx - jg pb_ioadv2 - -pb_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(i_ddi_io_rep_put8) - - ENTRY(i_ddi_io_rep_put16) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je pw_ioadv - - movq %rsi, %rdi - rep - outsw - ret - -pw_ioadv: - andq %rcx, %rcx - jz pw_ioadv_done -pw_ioadv2: - movw (%rsi), %ax - outw (%dx) - addq $2, %rsi - addq $2, %rdx - decq %rcx - jg pw_ioadv2 - -pw_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(i_ddi_io_rep_put16) - - - ENTRY(i_ddi_io_rep_put32) - - cmpq $DDI_DEV_AUTOINCR, %r8 - je pl_ioadv - - movq %rsi, %rdi - rep - outsl - ret - -pl_ioadv: - andq %rcx, %rcx - jz pl_ioadv_done -pl_ioadv2: - movl (%rsi), %eax - outl (%dx) - addq $4, %rsi - addq $4, %rdx - decq %rcx - jg pl_ioadv2 - -pl_ioadv_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(i_ddi_io_rep_put32) diff --git a/usr/src/uts/intel/ia32/ml/desctbls_asm.s b/usr/src/uts/intel/ia32/ml/desctbls_asm.s deleted file mode 100644 index 4528bc07ad..0000000000 --- a/usr/src/uts/intel/ia32/ml/desctbls_asm.s +++ /dev/null @@ -1,118 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/regset.h> -#include <sys/panic.h> -#include <sys/ontrap.h> -#include <sys/privregs.h> -#include <sys/segments.h> -#include <sys/trap.h> - -#include "assym.h" - - ENTRY_NP(rd_idtr) - sidt (%rdi) - ret - SET_SIZE(rd_idtr) - - ENTRY_NP(wr_idtr) - lidt (%rdi) - ret - SET_SIZE(wr_idtr) - - ENTRY_NP(rd_gdtr) - pushq %rbp - movq %rsp, %rbp - sgdt (%rdi) - leave - ret - SET_SIZE(rd_gdtr) - - ENTRY_NP(wr_gdtr) - pushq %rbp - movq %rsp, %rbp - lgdt (%rdi) - jmp 1f - nop -1: - leave - ret - SET_SIZE(wr_gdtr) - - /* - * loads zero selector for ds and es. - */ - ENTRY_NP(load_segment_registers) - pushq %rbp - movq %rsp, %rbp - pushq %rdi - pushq $.newcs - lretq -.newcs: - /* - * zero %ds and %es - they're ignored anyway - */ - xorl %eax, %eax - movw %ax, %ds - movw %ax, %es - movl %esi, %eax - movw %ax, %fs - movl %edx, %eax - movw %ax, %gs - movl %ecx, %eax - movw %ax, %ss - leave - ret - SET_SIZE(load_segment_registers) - - ENTRY_NP(get_cs_register) - movq %cs, %rax - ret - SET_SIZE(get_cs_register) - - ENTRY_NP(wr_ldtr) - movq %rdi, %rax - lldt %ax - ret - SET_SIZE(wr_ldtr) - - ENTRY_NP(rd_ldtr) - xorl %eax, %eax - sldt %ax - ret - SET_SIZE(rd_ldtr) - - ENTRY_NP(wr_tsr) - movq %rdi, %rax - ltr %ax - ret - SET_SIZE(wr_tsr) - diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s deleted file mode 100644 index 92c410adc0..0000000000 --- a/usr/src/uts/intel/ia32/ml/exception.s +++ /dev/null @@ -1,917 +0,0 @@ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. - */ - -/* - * Copyright (c) 1989, 1990 William F. Jolitz. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/amd64/amd64/exception.S,v 1.113 2003/10/15 02:04:52 peter Exp $ - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/trap.h> -#include <sys/psw.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/dtrace.h> -#include <sys/x86_archext.h> -#include <sys/traptrace.h> -#include <sys/machparam.h> - -#include "assym.h" - -/* - * push $0 on stack for traps that do not - * generate an error code. This is so the rest - * of the kernel can expect a consistent stack - * from from any exception. - * - * Note that for all exceptions for amd64 - * %r11 and %rcx are on the stack. Just pop - * them back into their appropriate registers and let - * it get saved as is running native. - */ - -#if defined(__xpv) - -#define NPTRAP_NOERR(trapno) \ - pushq $0; \ - pushq $trapno - -#define TRAP_NOERR(trapno) \ - XPV_TRAP_POP; \ - NPTRAP_NOERR(trapno) - -/* - * error code already pushed by hw - * onto stack. - */ -#define TRAP_ERR(trapno) \ - XPV_TRAP_POP; \ - pushq $trapno - -#else /* __xpv */ - -#define TRAP_NOERR(trapno) \ - push $0; \ - push $trapno - -#define NPTRAP_NOERR(trapno) TRAP_NOERR(trapno) - -/* - * error code already pushed by hw - * onto stack. - */ -#define TRAP_ERR(trapno) \ - push $trapno - -#endif /* __xpv */ - - /* - * These are the stacks used on cpu0 for taking double faults, - * NMIs and MCEs. - * - * We define them here instead of in a C file so that we can page-align - * them (gcc won't do that in a .c file). - */ - .data - DGDEF3(dblfault_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) - .fill DEFAULTSTKSZ, 1, 0 - DGDEF3(nmi_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) - .fill DEFAULTSTKSZ, 1, 0 - DGDEF3(mce_stack0, DEFAULTSTKSZ, MMU_PAGESIZE) - .fill DEFAULTSTKSZ, 1, 0 - - /* - * #DE - */ - ENTRY_NP(div0trap) - TRAP_NOERR(T_ZERODIV) /* $0 */ - jmp cmntrap - SET_SIZE(div0trap) - - /* - * #DB - * - * Fetch %dr6 and clear it, handing off the value to the - * cmntrap code in %r15/%esi - */ - ENTRY_NP(dbgtrap) - TRAP_NOERR(T_SGLSTP) /* $1 */ - -#if !defined(__xpv) /* no sysenter support yet */ - /* - * If we get here as a result of single-stepping a sysenter - * instruction, we suddenly find ourselves taking a #db - * in kernel mode -before- we've swapgs'ed. So before we can - * take the trap, we do the swapgs here, and fix the return - * %rip in trap() so that we return immediately after the - * swapgs in the sysenter handler to avoid doing the swapgs again. - * - * Nobody said that the design of sysenter was particularly - * elegant, did they? - */ - - pushq %r11 - - /* - * At this point the stack looks like this: - * - * (high address) r_ss - * r_rsp - * r_rfl - * r_cs - * r_rip <-- %rsp + 24 - * r_err <-- %rsp + 16 - * r_trapno <-- %rsp + 8 - * (low address) %r11 <-- %rsp - */ - leaq sys_sysenter(%rip), %r11 - cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */ - je 1f - leaq brand_sys_sysenter(%rip), %r11 - cmpq %r11, 24(%rsp) /* Compare to saved r_rip on the stack */ - je 1f - leaq tr_sys_sysenter(%rip), %r11 - cmpq %r11, 24(%rsp) - je 1f - leaq tr_brand_sys_sysenter(%rip), %r11 - cmpq %r11, 24(%rsp) - jne 2f -1: swapgs -2: lfence /* swapgs mitigation */ - popq %r11 -#endif /* !__xpv */ - - INTR_PUSH -#if defined(__xpv) - movl $6, %edi - call kdi_dreg_get - movq %rax, %r15 /* %db6 -> %r15 */ - movl $6, %edi - movl $0, %esi - call kdi_dreg_set /* 0 -> %db6 */ -#else - movq %db6, %r15 - xorl %eax, %eax - movq %rax, %db6 -#endif - - jmp cmntrap_pushed - SET_SIZE(dbgtrap) - -#if !defined(__xpv) - -/* - * Macro to set the gsbase or kgsbase to the address of the struct cpu - * for this processor. If we came from userland, set kgsbase else - * set gsbase. We find the proper cpu struct by looping through - * the cpu structs for all processors till we find a match for the gdt - * of the trapping processor. The stack is expected to be pointing at - * the standard regs pushed by hardware on a trap (plus error code and trapno). - * - * It's ok for us to clobber gsbase here (and possibly end up with both gsbase - * and kgsbase set to the same value) because we're not going back the normal - * way out of here (via IRET). Where we're going, we don't need no user %gs. - */ -#define SET_CPU_GSBASE \ - subq $REGOFF_TRAPNO, %rsp; /* save regs */ \ - movq %rax, REGOFF_RAX(%rsp); \ - movq %rbx, REGOFF_RBX(%rsp); \ - movq %rcx, REGOFF_RCX(%rsp); \ - movq %rdx, REGOFF_RDX(%rsp); \ - movq %rbp, REGOFF_RBP(%rsp); \ - movq %rsp, %rbp; \ - subq $16, %rsp; /* space for gdt */ \ - sgdt 6(%rsp); \ - movq 8(%rsp), %rcx; /* %rcx has gdt to match */ \ - xorl %ebx, %ebx; /* loop index */ \ - leaq cpu(%rip), %rdx; /* cpu pointer array */ \ -1: \ - movq (%rdx, %rbx, CLONGSIZE), %rax; /* get cpu[i] */ \ - cmpq $0x0, %rax; /* cpu[i] == NULL ? */ \ - je 2f; /* yes, continue */ \ - cmpq %rcx, CPU_GDT(%rax); /* gdt == cpu[i]->cpu_gdt ? */ \ - je 3f; /* yes, go set gsbase */ \ -2: \ - incl %ebx; /* i++ */ \ - cmpl $NCPU, %ebx; /* i < NCPU ? */ \ - jb 1b; /* yes, loop */ \ -/* XXX BIG trouble if we fall thru here. We didn't find a gdt match */ \ -3: \ - movl $MSR_AMD_KGSBASE, %ecx; \ - cmpw $KCS_SEL, REGOFF_CS(%rbp); /* trap from kernel? */ \ - jne 4f; /* no, go set KGSBASE */ \ - movl $MSR_AMD_GSBASE, %ecx; /* yes, set GSBASE */ \ - mfence; /* OPTERON_ERRATUM_88 */ \ -4: \ - movq %rax, %rdx; /* write base register */ \ - shrq $32, %rdx; \ - wrmsr; \ - movq REGOFF_RDX(%rbp), %rdx; /* restore regs */ \ - movq REGOFF_RCX(%rbp), %rcx; \ - movq REGOFF_RBX(%rbp), %rbx; \ - movq REGOFF_RAX(%rbp), %rax; \ - movq %rbp, %rsp; \ - movq REGOFF_RBP(%rsp), %rbp; \ - addq $REGOFF_TRAPNO, %rsp /* pop stack */ - -#else /* __xpv */ - -#define SET_CPU_GSBASE /* noop on the hypervisor */ - -#endif /* __xpv */ - - - /* - * #NMI - * - * XXPV: See 6532669. - */ - ENTRY_NP(nmiint) - TRAP_NOERR(T_NMIFLT) /* $2 */ - - SET_CPU_GSBASE - - /* - * Save all registers and setup segment registers - * with kernel selectors. - */ - INTR_PUSH - INTGATE_INIT_KERNEL_FLAGS - - TRACE_PTR(%r12, %rax, %eax, %rdx, $TT_TRAP) - TRACE_REGS(%r12, %rsp, %rax, %rbx) - TRACE_STAMP(%r12) - - movq %rsp, %rbp - - movq %rbp, %rdi - call av_dispatch_nmivect - - INTR_POP - call x86_md_clear - jmp tr_iret_auto - /*NOTREACHED*/ - SET_SIZE(nmiint) - - /* - * #BP - */ - ENTRY_NP(brktrap) - XPV_TRAP_POP - cmpw $KCS_SEL, 8(%rsp) - jne bp_user - - /* - * This is a breakpoint in the kernel -- it is very likely that this - * is DTrace-induced. To unify DTrace handling, we spoof this as an - * invalid opcode (#UD) fault. Note that #BP is a trap, not a fault -- - * we must decrement the trapping %rip to make it appear as a fault. - * We then push a non-zero error code to indicate that this is coming - * from #BP. - */ - decq (%rsp) - push $1 /* error code -- non-zero for #BP */ - jmp ud_kernel - -bp_user: - - NPTRAP_NOERR(T_BPTFLT) /* $3 */ - jmp dtrace_trap - - SET_SIZE(brktrap) - - /* - * #OF - */ - ENTRY_NP(ovflotrap) - TRAP_NOERR(T_OVFLW) /* $4 */ - jmp cmntrap - SET_SIZE(ovflotrap) - - /* - * #BR - */ - ENTRY_NP(boundstrap) - TRAP_NOERR(T_BOUNDFLT) /* $5 */ - jmp cmntrap - SET_SIZE(boundstrap) - - ENTRY_NP(invoptrap) - - XPV_TRAP_POP - - cmpw $KCS_SEL, 8(%rsp) - jne ud_user - -#if defined(__xpv) - movb $0, 12(%rsp) /* clear saved upcall_mask from %cs */ -#endif - push $0 /* error code -- zero for #UD */ -ud_kernel: - push $0xdddd /* a dummy trap number */ - INTR_PUSH - movq REGOFF_RIP(%rsp), %rdi - movq REGOFF_RSP(%rsp), %rsi - movq REGOFF_RAX(%rsp), %rdx - pushq (%rsi) - movq %rsp, %rsi - subq $8, %rsp - call dtrace_invop - ALTENTRY(dtrace_invop_callsite) - addq $16, %rsp - cmpl $DTRACE_INVOP_PUSHL_EBP, %eax - je ud_push - cmpl $DTRACE_INVOP_LEAVE, %eax - je ud_leave - cmpl $DTRACE_INVOP_NOP, %eax - je ud_nop - cmpl $DTRACE_INVOP_RET, %eax - je ud_ret - jmp ud_trap - -ud_push: - /* - * We must emulate a "pushq %rbp". To do this, we pull the stack - * down 8 bytes, and then store the base pointer. - */ - INTR_POP - subq $16, %rsp /* make room for %rbp */ - pushq %rax /* push temp */ - movq 24(%rsp), %rax /* load calling RIP */ - addq $1, %rax /* increment over trapping instr */ - movq %rax, 8(%rsp) /* store calling RIP */ - movq 32(%rsp), %rax /* load calling CS */ - movq %rax, 16(%rsp) /* store calling CS */ - movq 40(%rsp), %rax /* load calling RFLAGS */ - movq %rax, 24(%rsp) /* store calling RFLAGS */ - movq 48(%rsp), %rax /* load calling RSP */ - subq $8, %rax /* make room for %rbp */ - movq %rax, 32(%rsp) /* store calling RSP */ - movq 56(%rsp), %rax /* load calling SS */ - movq %rax, 40(%rsp) /* store calling SS */ - movq 32(%rsp), %rax /* reload calling RSP */ - movq %rbp, (%rax) /* store %rbp there */ - popq %rax /* pop off temp */ - jmp tr_iret_kernel /* return from interrupt */ - /*NOTREACHED*/ - -ud_leave: - /* - * We must emulate a "leave", which is the same as a "movq %rbp, - * %rsp" followed by a "popq %rbp". We can exploit the fact - * that the %rsp is explicitly saved to effect the pop without - * having to reshuffle the other data pushed for the trap. - */ - - INTR_POP - pushq %rax /* push temp */ - movq 8(%rsp), %rax /* load calling RIP */ - addq $1, %rax /* increment over trapping instr */ - movq %rax, 8(%rsp) /* store calling RIP */ - movq (%rbp), %rax /* get new %rbp */ - addq $8, %rbp /* adjust new %rsp */ - movq %rbp, 32(%rsp) /* store new %rsp */ - movq %rax, %rbp /* set new %rbp */ - popq %rax /* pop off temp */ - jmp tr_iret_kernel /* return from interrupt */ - /*NOTREACHED*/ - -ud_nop: - /* - * We must emulate a "nop". This is obviously not hard: we need only - * advance the %rip by one. - */ - INTR_POP - incq (%rsp) - jmp tr_iret_kernel - /*NOTREACHED*/ - -ud_ret: - INTR_POP - pushq %rax /* push temp */ - movq 32(%rsp), %rax /* load %rsp */ - movq (%rax), %rax /* load calling RIP */ - movq %rax, 8(%rsp) /* store calling RIP */ - addq $8, 32(%rsp) /* adjust new %rsp */ - popq %rax /* pop off temp */ - jmp tr_iret_kernel /* return from interrupt */ - /*NOTREACHED*/ - -ud_trap: - /* - * We're going to let the kernel handle this as a normal #UD. If, - * however, we came through #BP and are spoofing #UD (in this case, - * the stored error value will be non-zero), we need to de-spoof - * the trap by incrementing %rip and pushing T_BPTFLT. - */ - cmpq $0, REGOFF_ERR(%rsp) - je ud_ud - incq REGOFF_RIP(%rsp) - addq $REGOFF_RIP, %rsp - NPTRAP_NOERR(T_BPTFLT) /* $3 */ - jmp cmntrap - -ud_ud: - addq $REGOFF_RIP, %rsp -ud_user: - NPTRAP_NOERR(T_ILLINST) - jmp cmntrap - SET_SIZE(invoptrap) - - /* - * #NM - */ - - ENTRY_NP(ndptrap) - TRAP_NOERR(T_NOEXTFLT) /* $0 */ - SET_CPU_GSBASE - jmp cmntrap - SET_SIZE(ndptrap) - -#if !defined(__xpv) - - /* - * #DF - */ - ENTRY_NP(syserrtrap) - pushq $T_DBLFLT - SET_CPU_GSBASE - - /* - * We share this handler with kmdb (if kmdb is loaded). As such, we - * may have reached this point after encountering a #df in kmdb. If - * that happens, we'll still be on kmdb's IDT. We need to switch back - * to this CPU's IDT before proceeding. Furthermore, if we did arrive - * here from kmdb, kmdb is probably in a very sickly state, and - * shouldn't be entered from the panic flow. We'll suppress that - * entry by setting nopanicdebug. - */ - pushq %rax - subq $DESCTBR_SIZE, %rsp - sidt (%rsp) - movq %gs:CPU_IDT, %rax - cmpq %rax, DTR_BASE(%rsp) - je 1f - - movq %rax, DTR_BASE(%rsp) - movw $_MUL(NIDT, GATE_DESC_SIZE), DTR_LIMIT(%rsp) - lidt (%rsp) - - movl $1, nopanicdebug - -1: addq $DESCTBR_SIZE, %rsp - popq %rax - - DFTRAP_PUSH - - /* - * freeze trap trace. - */ -#ifdef TRAPTRACE - leaq trap_trace_freeze(%rip), %r11 - incl (%r11) -#endif - - ENABLE_INTR_FLAGS - - movq %rsp, %rdi /* ®s */ - xorl %esi, %esi /* clear address */ - xorl %edx, %edx /* cpuid = 0 */ - call trap - - SET_SIZE(syserrtrap) - -#endif /* !__xpv */ - - /* - * #TS - */ - ENTRY_NP(invtsstrap) - TRAP_ERR(T_TSSFLT) /* $10 already have error code on stack */ - jmp cmntrap - SET_SIZE(invtsstrap) - - /* - * #NP - */ - ENTRY_NP(segnptrap) - TRAP_ERR(T_SEGFLT) /* $11 already have error code on stack */ - SET_CPU_GSBASE - jmp cmntrap - SET_SIZE(segnptrap) - - /* - * #SS - */ - ENTRY_NP(stktrap) - TRAP_ERR(T_STKFLT) /* $12 already have error code on stack */ - SET_CPU_GSBASE - jmp cmntrap - SET_SIZE(stktrap) - - /* - * #GP - */ - ENTRY_NP(gptrap) - TRAP_ERR(T_GPFLT) /* $13 already have error code on stack */ - SET_CPU_GSBASE - jmp cmntrap - SET_SIZE(gptrap) - - /* - * #PF - */ - ENTRY_NP(pftrap) - TRAP_ERR(T_PGFLT) /* $14 already have error code on stack */ - INTR_PUSH -#if defined(__xpv) - - movq %gs:CPU_VCPU_INFO, %r15 - movq VCPU_INFO_ARCH_CR2(%r15), %r15 /* vcpu[].arch.cr2 */ - -#else /* __xpv */ - - movq %cr2, %r15 - -#endif /* __xpv */ - jmp cmntrap_pushed - SET_SIZE(pftrap) - - ENTRY_NP(resvtrap) - TRAP_NOERR(T_RESVTRAP) /* (reserved) */ - jmp cmntrap - SET_SIZE(resvtrap) - - /* - * #MF - */ - ENTRY_NP(ndperr) - TRAP_NOERR(T_EXTERRFLT) /* $16 */ - jmp cmninttrap - SET_SIZE(ndperr) - - /* - * #AC - */ - ENTRY_NP(achktrap) - TRAP_ERR(T_ALIGNMENT) /* $17 */ - jmp cmntrap - SET_SIZE(achktrap) - - /* - * #MC - */ - .globl cmi_mca_trap /* see uts/i86pc/os/cmi.c */ - - ENTRY_NP(mcetrap) - TRAP_NOERR(T_MCE) /* $18 */ - - SET_CPU_GSBASE - - INTR_PUSH - INTGATE_INIT_KERNEL_FLAGS - - TRACE_PTR(%rdi, %rbx, %ebx, %rcx, $TT_TRAP) - TRACE_REGS(%rdi, %rsp, %rbx, %rcx) - TRACE_STAMP(%rdi) - - movq %rsp, %rbp - - movq %rsp, %rdi /* arg0 = struct regs *rp */ - call cmi_mca_trap /* cmi_mca_trap(rp); */ - - jmp _sys_rtt - SET_SIZE(mcetrap) - - /* - * #XF - */ - ENTRY_NP(xmtrap) - TRAP_NOERR(T_SIMDFPE) /* $19 */ - jmp cmninttrap - SET_SIZE(xmtrap) - - ENTRY_NP(invaltrap) - TRAP_NOERR(T_INVALTRAP) /* very invalid */ - jmp cmntrap - SET_SIZE(invaltrap) - - .globl fasttable - - ENTRY_NP(fasttrap) - cmpl $T_LASTFAST, %eax - ja 1f - orl %eax, %eax /* (zero extend top 32-bits) */ - leaq fasttable(%rip), %r11 - leaq (%r11, %rax, CLONGSIZE), %r11 - movq (%r11), %r11 - INDIRECT_JMP_REG(r11) -1: - /* - * Fast syscall number was illegal. Make it look - * as if the INT failed. Modify %rip to point before the - * INT, push the expected error code and fake a GP fault. - * - * XXX Why make the error code be offset into idt + 1? - * Instead we should push a real (soft?) error code - * on the stack and #gp handler could know about fasttraps? - */ - XPV_TRAP_POP - - subq $2, (%rsp) /* XXX int insn 2-bytes */ - pushq $_CONST(_MUL(T_FASTTRAP, GATE_DESC_SIZE) + 2) - -#if defined(__xpv) - pushq %r11 - pushq %rcx -#endif - jmp gptrap - SET_SIZE(fasttrap) - - ENTRY_NP(dtrace_ret) - TRAP_NOERR(T_DTRACE_RET) - jmp dtrace_trap - SET_SIZE(dtrace_ret) - - /* - * RFLAGS 24 bytes up the stack from %rsp. - * XXX a constant would be nicer. - */ - ENTRY_NP(fast_null) - XPV_TRAP_POP - orq $PS_C, 24(%rsp) /* set carry bit in user flags */ - call x86_md_clear - jmp tr_iret_auto - /*NOTREACHED*/ - SET_SIZE(fast_null) - - /* - * Interrupts start at 32 - */ -#define MKIVCT(n) \ - ENTRY_NP(ivct/**/n) \ - push $0; \ - push $n - 0x20; \ - jmp cmnint; \ - SET_SIZE(ivct/**/n) - - MKIVCT(32) - MKIVCT(33) - MKIVCT(34) - MKIVCT(35) - MKIVCT(36) - MKIVCT(37) - MKIVCT(38) - MKIVCT(39) - MKIVCT(40) - MKIVCT(41) - MKIVCT(42) - MKIVCT(43) - MKIVCT(44) - MKIVCT(45) - MKIVCT(46) - MKIVCT(47) - MKIVCT(48) - MKIVCT(49) - MKIVCT(50) - MKIVCT(51) - MKIVCT(52) - MKIVCT(53) - MKIVCT(54) - MKIVCT(55) - MKIVCT(56) - MKIVCT(57) - MKIVCT(58) - MKIVCT(59) - MKIVCT(60) - MKIVCT(61) - MKIVCT(62) - MKIVCT(63) - MKIVCT(64) - MKIVCT(65) - MKIVCT(66) - MKIVCT(67) - MKIVCT(68) - MKIVCT(69) - MKIVCT(70) - MKIVCT(71) - MKIVCT(72) - MKIVCT(73) - MKIVCT(74) - MKIVCT(75) - MKIVCT(76) - MKIVCT(77) - MKIVCT(78) - MKIVCT(79) - MKIVCT(80) - MKIVCT(81) - MKIVCT(82) - MKIVCT(83) - MKIVCT(84) - MKIVCT(85) - MKIVCT(86) - MKIVCT(87) - MKIVCT(88) - MKIVCT(89) - MKIVCT(90) - MKIVCT(91) - MKIVCT(92) - MKIVCT(93) - MKIVCT(94) - MKIVCT(95) - MKIVCT(96) - MKIVCT(97) - MKIVCT(98) - MKIVCT(99) - MKIVCT(100) - MKIVCT(101) - MKIVCT(102) - MKIVCT(103) - MKIVCT(104) - MKIVCT(105) - MKIVCT(106) - MKIVCT(107) - MKIVCT(108) - MKIVCT(109) - MKIVCT(110) - MKIVCT(111) - MKIVCT(112) - MKIVCT(113) - MKIVCT(114) - MKIVCT(115) - MKIVCT(116) - MKIVCT(117) - MKIVCT(118) - MKIVCT(119) - MKIVCT(120) - MKIVCT(121) - MKIVCT(122) - MKIVCT(123) - MKIVCT(124) - MKIVCT(125) - MKIVCT(126) - MKIVCT(127) - MKIVCT(128) - MKIVCT(129) - MKIVCT(130) - MKIVCT(131) - MKIVCT(132) - MKIVCT(133) - MKIVCT(134) - MKIVCT(135) - MKIVCT(136) - MKIVCT(137) - MKIVCT(138) - MKIVCT(139) - MKIVCT(140) - MKIVCT(141) - MKIVCT(142) - MKIVCT(143) - MKIVCT(144) - MKIVCT(145) - MKIVCT(146) - MKIVCT(147) - MKIVCT(148) - MKIVCT(149) - MKIVCT(150) - MKIVCT(151) - MKIVCT(152) - MKIVCT(153) - MKIVCT(154) - MKIVCT(155) - MKIVCT(156) - MKIVCT(157) - MKIVCT(158) - MKIVCT(159) - MKIVCT(160) - MKIVCT(161) - MKIVCT(162) - MKIVCT(163) - MKIVCT(164) - MKIVCT(165) - MKIVCT(166) - MKIVCT(167) - MKIVCT(168) - MKIVCT(169) - MKIVCT(170) - MKIVCT(171) - MKIVCT(172) - MKIVCT(173) - MKIVCT(174) - MKIVCT(175) - MKIVCT(176) - MKIVCT(177) - MKIVCT(178) - MKIVCT(179) - MKIVCT(180) - MKIVCT(181) - MKIVCT(182) - MKIVCT(183) - MKIVCT(184) - MKIVCT(185) - MKIVCT(186) - MKIVCT(187) - MKIVCT(188) - MKIVCT(189) - MKIVCT(190) - MKIVCT(191) - MKIVCT(192) - MKIVCT(193) - MKIVCT(194) - MKIVCT(195) - MKIVCT(196) - MKIVCT(197) - MKIVCT(198) - MKIVCT(199) - MKIVCT(200) - MKIVCT(201) - MKIVCT(202) - MKIVCT(203) - MKIVCT(204) - MKIVCT(205) - MKIVCT(206) - MKIVCT(207) - MKIVCT(208) - MKIVCT(209) - MKIVCT(210) - MKIVCT(211) - MKIVCT(212) - MKIVCT(213) - MKIVCT(214) - MKIVCT(215) - MKIVCT(216) - MKIVCT(217) - MKIVCT(218) - MKIVCT(219) - MKIVCT(220) - MKIVCT(221) - MKIVCT(222) - MKIVCT(223) - MKIVCT(224) - MKIVCT(225) - MKIVCT(226) - MKIVCT(227) - MKIVCT(228) - MKIVCT(229) - MKIVCT(230) - MKIVCT(231) - MKIVCT(232) - MKIVCT(233) - MKIVCT(234) - MKIVCT(235) - MKIVCT(236) - MKIVCT(237) - MKIVCT(238) - MKIVCT(239) - MKIVCT(240) - MKIVCT(241) - MKIVCT(242) - MKIVCT(243) - MKIVCT(244) - MKIVCT(245) - MKIVCT(246) - MKIVCT(247) - MKIVCT(248) - MKIVCT(249) - MKIVCT(250) - MKIVCT(251) - MKIVCT(252) - MKIVCT(253) - MKIVCT(254) - MKIVCT(255) - diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s deleted file mode 100644 index 807647f553..0000000000 --- a/usr/src/uts/intel/ia32/ml/float.s +++ /dev/null @@ -1,347 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. - * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* Copyright (c) 1987, 1988 Microsoft Corporation */ -/* All Rights Reserved */ - -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/x86_archext.h> - -#include "assym.h" - - /* - * Returns zero if x87 "chip" is present(!) - */ - ENTRY_NP(fpu_initial_probe) - CLTS - fninit - fnstsw %ax - movzbl %al, %eax - ret - SET_SIZE(fpu_initial_probe) - - ENTRY_NP(fxsave_insn) - fxsaveq (%rdi) - ret - SET_SIZE(fxsave_insn) - -/* - * One of these routines is called from any lwp with floating - * point context as part of the prolog of a context switch. - */ - -/* - * These three functions define the Intel "xsave" handling for CPUs with - * different features. Newer AMD CPUs can also use these functions. See the - * 'exception pointers' comment below. - */ - ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ - fxsaveq (%rdi) - STTS(%rsi) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_ctxt) - - ENTRY_NP(xsave_ctxt) - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx - movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%rsi) - STTS(%rsi) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_ctxt) - - ENTRY_NP(xsaveopt_ctxt) - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx - movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%rsi) - STTS(%rsi) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_ctxt) - -/* - * On certain AMD processors, the "exception pointers" (i.e. the last - * instruction pointer, last data pointer, and last opcode) are saved by the - * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is - * set. - * - * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior. - * We can detect this via an AMD specific cpuid feature bit - * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions. - * Otherwise we use these more complex functions on AMD CPUs. All three follow - * the same logic after the xsave* instruction. - */ - ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ - fxsaveq (%rdi) - /* - * To ensure that we don't leak these values into the next context - * on the cpu, we could just issue an fninit here, but that's - * rather slow and so we issue an instruction sequence that - * clears them more quickly, if a little obscurely. - */ - btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const(%rip) - /* dummy load changes all exception pointers */ - STTS(%rsi) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_excp_clr_ctxt) - - ENTRY_NP(xsave_excp_clr_ctxt) - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx - movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%rsi) - btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ - STTS(%rsi) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_excp_clr_ctxt) - - ENTRY_NP(xsaveopt_excp_clr_ctxt) - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx - movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%rsi) - btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ - STTS(%rsi) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_excp_clr_ctxt) - - .align 8 -.fpzero_const: - .4byte 0x0 - .4byte 0x0 - - - ENTRY_NP(fpxsave) - CLTS - fxsaveq (%rdi) - fninit /* clear exceptions, init x87 tags */ - STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpxsave) - - ENTRY_NP(xsave) - CLTS - movl %esi, %eax /* bv mask */ - movq %rsi, %rdx - shrq $32, %rdx - xsave (%rdi) - - fninit /* clear exceptions, init x87 tags */ - STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(xsave) - - ENTRY_NP(xsaveopt) - CLTS - movl %esi, %eax /* bv mask */ - movq %rsi, %rdx - shrq $32, %rdx - xsaveopt (%rdi) - - fninit /* clear exceptions, init x87 tags */ - STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(xsaveopt) - -/* - * These functions are used when restoring the FPU as part of the epilogue of a - * context switch. - */ - - ENTRY(fpxrestore_ctxt) - cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ - CLTS - fxrstorq (%rdi) -1: - ret - SET_SIZE(fpxrestore_ctxt) - - ENTRY(xrestore_ctxt) - cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) - jne 1f - movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ - CLTS - xrstor (%rdi) -1: - ret - SET_SIZE(xrestore_ctxt) - - - ENTRY_NP(fpxrestore) - CLTS - fxrstorq (%rdi) - ret - SET_SIZE(fpxrestore) - - ENTRY_NP(xrestore) - CLTS - movl %esi, %eax /* bv mask */ - movq %rsi, %rdx - shrq $32, %rdx - xrstor (%rdi) - ret - SET_SIZE(xrestore) - -/* - * Disable the floating point unit. - */ - - ENTRY_NP(fpdisable) - STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpdisable) - -/* - * Initialize the fpu hardware. - */ - - ENTRY_NP(fpinit) - CLTS - cmpl $FP_XSAVE, fp_save_mech - je 1f - - /* fxsave */ - leaq sse_initial(%rip), %rax - fxrstorq (%rax) /* load clean initial state */ - ret - -1: /* xsave */ - leaq avx_initial(%rip), %rcx - xorl %edx, %edx - movl $XFEATURE_AVX, %eax - btl $X86FSET_AVX, x86_featureset - cmovael %edx, %eax - orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax - xrstor (%rcx) - ret - SET_SIZE(fpinit) - -/* - * Clears FPU exception state. - * Returns the FP status word. - */ - - ENTRY_NP(fperr_reset) - CLTS - xorl %eax, %eax - fnstsw %ax - fnclex - ret - SET_SIZE(fperr_reset) - - ENTRY_NP(fpxerr_reset) - pushq %rbp - movq %rsp, %rbp - subq $0x10, %rsp /* make some temporary space */ - CLTS - stmxcsr (%rsp) - movl (%rsp), %eax - andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) - ldmxcsr (%rsp) /* clear processor exceptions */ - leave - ret - SET_SIZE(fpxerr_reset) - - ENTRY_NP(fpgetcwsw) - pushq %rbp - movq %rsp, %rbp - subq $0x10, %rsp /* make some temporary space */ - CLTS - fnstsw (%rsp) /* store the status word */ - fnstcw 2(%rsp) /* store the control word */ - movl (%rsp), %eax /* put both in %eax */ - leave - ret - SET_SIZE(fpgetcwsw) - -/* - * Returns the MXCSR register. - */ - - ENTRY_NP(fpgetmxcsr) - pushq %rbp - movq %rsp, %rbp - subq $0x10, %rsp /* make some temporary space */ - CLTS - stmxcsr (%rsp) - movl (%rsp), %eax - leave - ret - SET_SIZE(fpgetmxcsr) - diff --git a/usr/src/uts/intel/ia32/ml/hypersubr.s b/usr/src/uts/intel/ia32/ml/hypersubr.s deleted file mode 100644 index e6378d8518..0000000000 --- a/usr/src/uts/intel/ia32/ml/hypersubr.s +++ /dev/null @@ -1,164 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#include <sys/asm_linkage.h> -#ifndef __xpv -#include <sys/xpv_support.h> -#endif -#include <sys/hypervisor.h> - -/* - * Hypervisor "system calls" - * - * amd64 - * %rax == call number - * args in registers (%rdi, %rsi, %rdx, %r10, %r8, %r9) - * - * Note that we use %r10 instead of %rcx for passing 4th argument as in - * C calling convention since the "syscall" instruction clobbers %rcx. - * - * (These calls can be done more efficiently as gcc-style inlines, but - * for simplicity and help with initial debugging, we use these primitives - * to build the hypervisor calls up from C wrappers.) - */ - -/* - * XXPV grr - assembler can't deal with an instruction in a quoted string - */ -#undef TRAP_INSTR /* cause it's currently "int $0x82" */ - -/* - * The method for issuing a hypercall (i.e. a system call to the - * hypervisor) varies from platform to platform. In 32-bit PV domains, an - * 'int 82' triggers the call. In 64-bit PV domains, a 'syscall' does the - * trick. - * - * HVM domains are more complicated. In all cases, we want to issue a - * VMEXIT instruction, but AMD and Intel use different opcodes to represent - * that instruction. Rather than build CPU-specific modules with the - * different opcodes, we use the 'hypercall page' provided by Xen. This - * page contains a collection of code stubs that do nothing except issue - * hypercalls using the proper instructions for this machine. To keep the - * wrapper code as simple and efficient as possible, we preallocate that - * page below. When the module is loaded, we ask Xen to remap the - * underlying PFN to that of the hypercall page. - * - * Note: this same mechanism could be used in PV domains, but using - * hypercall page requires a call and several more instructions than simply - * issuing the proper trap. - */ -#if !defined(__xpv) - -#define HYPERCALL_PAGESIZE 0x1000 -#define HYPERCALL_SHINFO_PAGESIZE 0x1000 - - .data - .align HYPERCALL_SHINFO_PAGESIZE - .globl hypercall_shared_info_page - .type hypercall_shared_info_page, @object - .size hypercall_shared_info_page, HYPERCALL_SHINFO_PAGESIZE -hypercall_shared_info_page: - .skip HYPERCALL_SHINFO_PAGESIZE - - .text - .align HYPERCALL_PAGESIZE - .globl hypercall_page - .type hypercall_page, @function -hypercall_page: - .skip HYPERCALL_PAGESIZE - .size hypercall_page, HYPERCALL_PAGESIZE -#define TRAP_INSTR \ - shll $5, %eax; \ - addq $hypercall_page, %rax; \ - INDIRECT_JMP_REG(rax); - -#else /* !_xpv */ - -#define TRAP_INSTR syscall -#endif /* !__xpv */ - - - ENTRY_NP(__hypercall0) - ALTENTRY(__hypercall0_int) - movl %edi, %eax - TRAP_INSTR - ret - SET_SIZE(__hypercall0) - - ENTRY_NP(__hypercall1) - ALTENTRY(__hypercall1_int) - movl %edi, %eax - movq %rsi, %rdi /* arg 1 */ - TRAP_INSTR - ret - SET_SIZE(__hypercall1) - - ENTRY_NP(__hypercall2) - ALTENTRY(__hypercall2_int) - movl %edi, %eax - movq %rsi, %rdi /* arg 1 */ - movq %rdx, %rsi /* arg 2 */ - TRAP_INSTR - ret - SET_SIZE(__hypercall2) - - ENTRY_NP(__hypercall3) - ALTENTRY(__hypercall3_int) - movl %edi, %eax - movq %rsi, %rdi /* arg 1 */ - movq %rdx, %rsi /* arg 2 */ - movq %rcx, %rdx /* arg 3 */ - TRAP_INSTR - ret - SET_SIZE(__hypercall3) - - ENTRY_NP(__hypercall4) - ALTENTRY(__hypercall4_int) - movl %edi, %eax - movq %rsi, %rdi /* arg 1 */ - movq %rdx, %rsi /* arg 2 */ - movq %rcx, %rdx /* arg 3 */ - movq %r8, %r10 /* r10 = 4th arg */ - TRAP_INSTR - ret - SET_SIZE(__hypercall4) - - ENTRY_NP(__hypercall5) - ALTENTRY(__hypercall5_int) - movl %edi, %eax - movq %rsi, %rdi /* arg 1 */ - movq %rdx, %rsi /* arg 2 */ - movq %rcx, %rdx /* arg 3 */ - movq %r8, %r10 /* r10 = 4th arg */ - movq %r9, %r8 /* arg 5 */ - TRAP_INSTR - ret - SET_SIZE(__hypercall5) - diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s deleted file mode 100644 index 1227ac69bf..0000000000 --- a/usr/src/uts/intel/ia32/ml/i86_subr.s +++ /dev/null @@ -1,1629 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2014 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. - */ - -/* - * Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. - * Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T - * All Rights Reserved - */ - -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ - -/* - * General assembly language routines. - * It is the intent of this file to contain routines that are - * independent of the specific kernel architecture, and those that are - * common across kernel architectures. - * As architectures diverge, and implementations of specific - * architecture-dependent routines change, the routines should be moved - * from this file into the respective ../`arch -k`/subr.s file. - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/panic.h> -#include <sys/ontrap.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/reboot.h> -#include <sys/psw.h> -#include <sys/x86_archext.h> - -#include "assym.h" -#include <sys/dditypes.h> - -/* - * on_fault() - * - * Catch lofault faults. Like setjmp except it returns one - * if code following causes uncorrectable fault. Turned off - * by calling no_fault(). Note that while under on_fault(), - * SMAP is disabled. For more information see - * uts/intel/ia32/ml/copy.s. - */ - - ENTRY(on_fault) - movq %gs:CPU_THREAD, %rsi - leaq catch_fault(%rip), %rdx - movq %rdi, T_ONFAULT(%rsi) /* jumpbuf in t_onfault */ - movq %rdx, T_LOFAULT(%rsi) /* catch_fault in t_lofault */ - call smap_disable /* allow user accesses */ - jmp setjmp /* let setjmp do the rest */ - -catch_fault: - movq %gs:CPU_THREAD, %rsi - movq T_ONFAULT(%rsi), %rdi /* address of save area */ - xorl %eax, %eax - movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ - movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ - call smap_enable /* disallow user accesses */ - jmp longjmp /* let longjmp do the rest */ - SET_SIZE(on_fault) - - ENTRY(no_fault) - movq %gs:CPU_THREAD, %rsi - xorl %eax, %eax - movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ - movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ - call smap_enable /* disallow user accesses */ - ret - SET_SIZE(no_fault) - -/* - * Default trampoline code for on_trap() (see <sys/ontrap.h>). We just - * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called. - */ - - ENTRY(on_trap_trampoline) - movq %gs:CPU_THREAD, %rsi - movq T_ONTRAP(%rsi), %rdi - addq $OT_JMPBUF, %rdi - jmp longjmp - SET_SIZE(on_trap_trampoline) - -/* - * Push a new element on to the t_ontrap stack. Refer to <sys/ontrap.h> for - * more information about the on_trap() mechanism. If the on_trap_data is the - * same as the topmost stack element, we just modify that element. - */ - - ENTRY(on_trap) - movw %si, OT_PROT(%rdi) /* ot_prot = prot */ - movw $0, OT_TRAP(%rdi) /* ot_trap = 0 */ - leaq on_trap_trampoline(%rip), %rdx /* rdx = &on_trap_trampoline */ - movq %rdx, OT_TRAMPOLINE(%rdi) /* ot_trampoline = rdx */ - xorl %ecx, %ecx - movq %rcx, OT_HANDLE(%rdi) /* ot_handle = NULL */ - movq %rcx, OT_PAD1(%rdi) /* ot_pad1 = NULL */ - movq %gs:CPU_THREAD, %rdx /* rdx = curthread */ - movq T_ONTRAP(%rdx), %rcx /* rcx = curthread->t_ontrap */ - cmpq %rdi, %rcx /* if (otp == %rcx) */ - je 0f /* don't modify t_ontrap */ - - movq %rcx, OT_PREV(%rdi) /* ot_prev = t_ontrap */ - movq %rdi, T_ONTRAP(%rdx) /* curthread->t_ontrap = otp */ - -0: addq $OT_JMPBUF, %rdi /* &ot_jmpbuf */ - jmp setjmp - SET_SIZE(on_trap) - -/* - * Setjmp and longjmp implement non-local gotos using state vectors - * type label_t. - */ - -#if LABEL_PC != 0 -#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded -#endif /* LABEL_PC != 0 */ - - ENTRY(setjmp) - movq %rsp, LABEL_SP(%rdi) - movq %rbp, LABEL_RBP(%rdi) - movq %rbx, LABEL_RBX(%rdi) - movq %r12, LABEL_R12(%rdi) - movq %r13, LABEL_R13(%rdi) - movq %r14, LABEL_R14(%rdi) - movq %r15, LABEL_R15(%rdi) - movq (%rsp), %rdx /* return address */ - movq %rdx, (%rdi) /* LABEL_PC is 0 */ - xorl %eax, %eax /* return 0 */ - ret - SET_SIZE(setjmp) - - ENTRY(longjmp) - movq LABEL_SP(%rdi), %rsp - movq LABEL_RBP(%rdi), %rbp - movq LABEL_RBX(%rdi), %rbx - movq LABEL_R12(%rdi), %r12 - movq LABEL_R13(%rdi), %r13 - movq LABEL_R14(%rdi), %r14 - movq LABEL_R15(%rdi), %r15 - movq (%rdi), %rdx /* return address; LABEL_PC is 0 */ - movq %rdx, (%rsp) - xorl %eax, %eax - incl %eax /* return 1 */ - ret - SET_SIZE(longjmp) - -/* - * if a() calls b() calls caller(), - * caller() returns return address in a(). - * (Note: We assume a() and b() are C routines which do the normal entry/exit - * sequence.) - */ - - ENTRY(caller) - movq 8(%rbp), %rax /* b()'s return pc, in a() */ - ret - SET_SIZE(caller) - -/* - * if a() calls callee(), callee() returns the - * return address in a(); - */ - - ENTRY(callee) - movq (%rsp), %rax /* callee()'s return pc, in a() */ - ret - SET_SIZE(callee) - -/* - * return the current frame pointer - */ - - ENTRY(getfp) - movq %rbp, %rax - ret - SET_SIZE(getfp) - -/* - * Invalidate a single page table entry in the TLB - */ - - ENTRY(mmu_invlpg) - invlpg (%rdi) - ret - SET_SIZE(mmu_invlpg) - - -/* - * Get/Set the value of various control registers - */ - - ENTRY(getcr0) - movq %cr0, %rax - ret - SET_SIZE(getcr0) - - ENTRY(setcr0) - movq %rdi, %cr0 - ret - SET_SIZE(setcr0) - - ENTRY(getcr2) -#if defined(__xpv) - movq %gs:CPU_VCPU_INFO, %rax - movq VCPU_INFO_ARCH_CR2(%rax), %rax -#else - movq %cr2, %rax -#endif - ret - SET_SIZE(getcr2) - - ENTRY(getcr3) - movq %cr3, %rax - ret - SET_SIZE(getcr3) - -#if !defined(__xpv) - - ENTRY(setcr3) - movq %rdi, %cr3 - ret - SET_SIZE(setcr3) - - ENTRY(reload_cr3) - movq %cr3, %rdi - movq %rdi, %cr3 - ret - SET_SIZE(reload_cr3) - -#endif /* __xpv */ - - ENTRY(getcr4) - movq %cr4, %rax - ret - SET_SIZE(getcr4) - - ENTRY(setcr4) - movq %rdi, %cr4 - ret - SET_SIZE(setcr4) - - ENTRY(getcr8) - movq %cr8, %rax - ret - SET_SIZE(getcr8) - - ENTRY(setcr8) - movq %rdi, %cr8 - ret - SET_SIZE(setcr8) - - ENTRY(__cpuid_insn) - movq %rbx, %r8 - movq %rcx, %r9 - movq %rdx, %r11 - movl (%rdi), %eax /* %eax = regs->cp_eax */ - movl 0x4(%rdi), %ebx /* %ebx = regs->cp_ebx */ - movl 0x8(%rdi), %ecx /* %ecx = regs->cp_ecx */ - movl 0xc(%rdi), %edx /* %edx = regs->cp_edx */ - cpuid - movl %eax, (%rdi) /* regs->cp_eax = %eax */ - movl %ebx, 0x4(%rdi) /* regs->cp_ebx = %ebx */ - movl %ecx, 0x8(%rdi) /* regs->cp_ecx = %ecx */ - movl %edx, 0xc(%rdi) /* regs->cp_edx = %edx */ - movq %r8, %rbx - movq %r9, %rcx - movq %r11, %rdx - ret - SET_SIZE(__cpuid_insn) - - ENTRY_NP(i86_monitor) - pushq %rbp - movq %rsp, %rbp - movq %rdi, %rax /* addr */ - movq %rsi, %rcx /* extensions */ - /* rdx contains input arg3: hints */ - clflush (%rax) - .byte 0x0f, 0x01, 0xc8 /* monitor */ - leave - ret - SET_SIZE(i86_monitor) - - ENTRY_NP(i86_mwait) - pushq %rbp - call x86_md_clear - movq %rsp, %rbp - movq %rdi, %rax /* data */ - movq %rsi, %rcx /* extensions */ - .byte 0x0f, 0x01, 0xc9 /* mwait */ - leave - ret - SET_SIZE(i86_mwait) - -#if defined(__xpv) - /* - * Defined in C - */ -#else - - ENTRY_NP(tsc_read) - movq %rbx, %r11 - movl $0, %eax - cpuid - rdtsc - movq %r11, %rbx - shlq $32, %rdx - orq %rdx, %rax - ret - .globl _tsc_mfence_start -_tsc_mfence_start: - mfence - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - .globl _tsc_mfence_end -_tsc_mfence_end: - .globl _tscp_start -_tscp_start: - .byte 0x0f, 0x01, 0xf9 /* rdtscp instruction */ - shlq $32, %rdx - orq %rdx, %rax - ret - .globl _tscp_end -_tscp_end: - .globl _no_rdtsc_start -_no_rdtsc_start: - xorl %edx, %edx - xorl %eax, %eax - ret - .globl _no_rdtsc_end -_no_rdtsc_end: - .globl _tsc_lfence_start -_tsc_lfence_start: - lfence - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - .globl _tsc_lfence_end -_tsc_lfence_end: - SET_SIZE(tsc_read) - - -#endif /* __xpv */ - - ENTRY_NP(randtick) - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - SET_SIZE(randtick) -/* - * Insert entryp after predp in a doubly linked list. - */ - - ENTRY(_insque) - movq (%rsi), %rax /* predp->forw */ - movq %rsi, CPTRSIZE(%rdi) /* entryp->back = predp */ - movq %rax, (%rdi) /* entryp->forw = predp->forw */ - movq %rdi, (%rsi) /* predp->forw = entryp */ - movq %rdi, CPTRSIZE(%rax) /* predp->forw->back = entryp */ - ret - SET_SIZE(_insque) - -/* - * Remove entryp from a doubly linked list - */ - - ENTRY(_remque) - movq (%rdi), %rax /* entry->forw */ - movq CPTRSIZE(%rdi), %rdx /* entry->back */ - movq %rax, (%rdx) /* entry->back->forw = entry->forw */ - movq %rdx, CPTRSIZE(%rax) /* entry->forw->back = entry->back */ - ret - SET_SIZE(_remque) - -/* - * Returns the number of - * non-NULL bytes in string argument. - */ - -/* - * This is close to a simple transliteration of a C version of this - * routine. We should either just -make- this be a C version, or - * justify having it in assembler by making it significantly faster. - * - * size_t - * strlen(const char *s) - * { - * const char *s0; - * #if defined(DEBUG) - * if ((uintptr_t)s < KERNELBASE) - * panic(.str_panic_msg); - * #endif - * for (s0 = s; *s; s++) - * ; - * return (s - s0); - * } - */ - - ENTRY(strlen) -#ifdef DEBUG - movq postbootkernelbase(%rip), %rax - cmpq %rax, %rdi - jae str_valid - pushq %rbp - movq %rsp, %rbp - leaq .str_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -#endif /* DEBUG */ -str_valid: - cmpb $0, (%rdi) - movq %rdi, %rax - je .null_found - .align 4 -.strlen_loop: - incq %rdi - cmpb $0, (%rdi) - jne .strlen_loop -.null_found: - subq %rax, %rdi - movq %rdi, %rax - ret - SET_SIZE(strlen) - -#ifdef DEBUG - .text -.str_panic_msg: - .string "strlen: argument below kernelbase" -#endif /* DEBUG */ - - /* - * Berkeley 4.3 introduced symbolically named interrupt levels - * as a way deal with priority in a machine independent fashion. - * Numbered priorities are machine specific, and should be - * discouraged where possible. - * - * Note, for the machine specific priorities there are - * examples listed for devices that use a particular priority. - * It should not be construed that all devices of that - * type should be at that priority. It is currently were - * the current devices fit into the priority scheme based - * upon time criticalness. - * - * The underlying assumption of these assignments is that - * IPL 10 is the highest level from which a device - * routine can call wakeup. Devices that interrupt from higher - * levels are restricted in what they can do. If they need - * kernels services they should schedule a routine at a lower - * level (via software interrupt) to do the required - * processing. - * - * Examples of this higher usage: - * Level Usage - * 14 Profiling clock (and PROM uart polling clock) - * 12 Serial ports - * - * The serial ports request lower level processing on level 6. - * - * Also, almost all splN routines (where N is a number or a - * mnemonic) will do a RAISE(), on the assumption that they are - * never used to lower our priority. - * The exceptions are: - * spl8() Because you can't be above 15 to begin with! - * splzs() Because this is used at boot time to lower our - * priority, to allow the PROM to poll the uart. - * spl0() Used to lower priority to 0. - */ - -#define SETPRI(level) \ - movl $/**/level, %edi; /* new priority */ \ - jmp do_splx /* redirect to do_splx */ - -#define RAISE(level) \ - movl $/**/level, %edi; /* new priority */ \ - jmp splr /* redirect to splr */ - - /* locks out all interrupts, including memory errors */ - ENTRY(spl8) - SETPRI(15) - SET_SIZE(spl8) - - /* just below the level that profiling runs */ - ENTRY(spl7) - RAISE(13) - SET_SIZE(spl7) - - /* sun specific - highest priority onboard serial i/o asy ports */ - ENTRY(splzs) - SETPRI(12) /* Can't be a RAISE, as it's used to lower us */ - SET_SIZE(splzs) - - ENTRY(splhi) - ALTENTRY(splhigh) - ALTENTRY(spl6) - ALTENTRY(i_ddi_splhigh) - - RAISE(DISP_LEVEL) - - SET_SIZE(i_ddi_splhigh) - SET_SIZE(spl6) - SET_SIZE(splhigh) - SET_SIZE(splhi) - - /* allow all interrupts */ - ENTRY(spl0) - SETPRI(0) - SET_SIZE(spl0) - - - /* splx implementation */ - ENTRY(splx) - jmp do_splx /* redirect to common splx code */ - SET_SIZE(splx) - - ENTRY(wait_500ms) - pushq %rbx - movl $50000, %ebx -1: - call tenmicrosec - decl %ebx - jnz 1b - popq %rbx - ret - SET_SIZE(wait_500ms) - -#define RESET_METHOD_KBC 1 -#define RESET_METHOD_PORT92 2 -#define RESET_METHOD_PCI 4 - - DGDEF3(pc_reset_methods, 4, 8) - .long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI; - - ENTRY(pc_reset) - - testl $RESET_METHOD_KBC, pc_reset_methods(%rip) - jz 1f - - / - / Try the classic keyboard controller-triggered reset. - / - movw $0x64, %dx - movb $0xfe, %al - outb (%dx) - - / Wait up to 500 milliseconds here for the keyboard controller - / to pull the reset line. On some systems where the keyboard - / controller is slow to pull the reset line, the next reset method - / may be executed (which may be bad if those systems hang when the - / next reset method is used, e.g. Ferrari 3400 (doesn't like port 92), - / and Ferrari 4000 (doesn't like the cf9 reset method)) - - call wait_500ms - -1: - testl $RESET_METHOD_PORT92, pc_reset_methods(%rip) - jz 3f - - / - / Try port 0x92 fast reset - / - movw $0x92, %dx - inb (%dx) - cmpb $0xff, %al / If port's not there, we should get back 0xFF - je 1f - testb $1, %al / If bit 0 - jz 2f / is clear, jump to perform the reset - andb $0xfe, %al / otherwise, - outb (%dx) / clear bit 0 first, then -2: - orb $1, %al / Set bit 0 - outb (%dx) / and reset the system -1: - - call wait_500ms - -3: - testl $RESET_METHOD_PCI, pc_reset_methods(%rip) - jz 4f - - / Try the PCI (soft) reset vector (should work on all modern systems, - / but has been shown to cause problems on 450NX systems, and some newer - / systems (e.g. ATI IXP400-equipped systems)) - / When resetting via this method, 2 writes are required. The first - / targets bit 1 (0=hard reset without power cycle, 1=hard reset with - / power cycle). - / The reset occurs on the second write, during bit 2's transition from - / 0->1. - movw $0xcf9, %dx - movb $0x2, %al / Reset mode = hard, no power cycle - outb (%dx) - movb $0x6, %al - outb (%dx) - - call wait_500ms - -4: - / - / port 0xcf9 failed also. Last-ditch effort is to - / triple-fault the CPU. - / Also, use triple fault for EFI firmware - / - ENTRY(efi_reset) - pushq $0x0 - pushq $0x0 / IDT base of 0, limit of 0 + 2 unused bytes - lidt (%rsp) - int $0x0 / Trigger interrupt, generate triple-fault - - cli - hlt / Wait forever - /*NOTREACHED*/ - SET_SIZE(efi_reset) - SET_SIZE(pc_reset) - -/* - * C callable in and out routines - */ - - ENTRY(outl) - movw %di, %dx - movl %esi, %eax - outl (%dx) - ret - SET_SIZE(outl) - - ENTRY(outw) - movw %di, %dx - movw %si, %ax - D16 outl (%dx) /* XX64 why not outw? */ - ret - SET_SIZE(outw) - - ENTRY(outb) - movw %di, %dx - movb %sil, %al - outb (%dx) - ret - SET_SIZE(outb) - - ENTRY(inl) - xorl %eax, %eax - movw %di, %dx - inl (%dx) - ret - SET_SIZE(inl) - - ENTRY(inw) - xorl %eax, %eax - movw %di, %dx - D16 inl (%dx) - ret - SET_SIZE(inw) - - - ENTRY(inb) - xorl %eax, %eax - movw %di, %dx - inb (%dx) - ret - SET_SIZE(inb) - -/* - * void int3(void) - * void int18(void) - * void int20(void) - * void int_cmci(void) - */ - - ENTRY(int3) - int $T_BPTFLT - ret - SET_SIZE(int3) - - ENTRY(int18) - int $T_MCE - ret - SET_SIZE(int18) - - ENTRY(int20) - movl boothowto, %eax - andl $RB_DEBUG, %eax - jz 1f - - int $T_DBGENTR -1: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(int20) - - ENTRY(int_cmci) - int $T_ENOEXTFLT - ret - SET_SIZE(int_cmci) - - ENTRY(scanc) - /* rdi == size */ - /* rsi == cp */ - /* rdx == table */ - /* rcx == mask */ - addq %rsi, %rdi /* end = &cp[size] */ -.scanloop: - cmpq %rdi, %rsi /* while (cp < end */ - jnb .scandone - movzbq (%rsi), %r8 /* %r8 = *cp */ - incq %rsi /* cp++ */ - testb %cl, (%r8, %rdx) - jz .scanloop /* && (table[*cp] & mask) == 0) */ - decq %rsi /* (fix post-increment) */ -.scandone: - movl %edi, %eax - subl %esi, %eax /* return (end - cp) */ - ret - SET_SIZE(scanc) - -/* - * Replacement functions for ones that are normally inlined. - * In addition to the copy in i86.il, they are defined here just in case. - */ - - ENTRY(intr_clear) - ENTRY(clear_int_flag) - pushfq - popq %rax -#if defined(__xpv) - leaq xpv_panicking, %rdi - movl (%rdi), %edi - cmpl $0, %edi - jne 2f - CLIRET(%rdi, %dl) /* returns event mask in %dl */ - /* - * Synthesize the PS_IE bit from the event mask bit - */ - andq $_BITNOT(PS_IE), %rax - testb $1, %dl - jnz 1f - orq $PS_IE, %rax -1: - ret -2: -#endif - CLI(%rdi) - ret - SET_SIZE(clear_int_flag) - SET_SIZE(intr_clear) - - ENTRY(curcpup) - movq %gs:CPU_SELF, %rax - ret - SET_SIZE(curcpup) - -/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs() - * These functions reverse the byte order of the input parameter and returns - * the result. This is to convert the byte order from host byte order - * (little endian) to network byte order (big endian), or vice versa. - */ - - ENTRY(htonll) - ALTENTRY(ntohll) - movq %rdi, %rax - bswapq %rax - ret - SET_SIZE(ntohll) - SET_SIZE(htonll) - - /* XX64 there must be shorter sequences for this */ - ENTRY(htonl) - ALTENTRY(ntohl) - movl %edi, %eax - bswap %eax - ret - SET_SIZE(ntohl) - SET_SIZE(htonl) - - /* XX64 there must be better sequences for this */ - ENTRY(htons) - ALTENTRY(ntohs) - movl %edi, %eax - bswap %eax - shrl $16, %eax - ret - SET_SIZE(ntohs) - SET_SIZE(htons) - - - ENTRY(intr_restore) - ENTRY(restore_int_flag) - testq $PS_IE, %rdi - jz 1f -#if defined(__xpv) - leaq xpv_panicking, %rsi - movl (%rsi), %esi - cmpl $0, %esi - jne 1f - /* - * Since we're -really- running unprivileged, our attempt - * to change the state of the IF bit will be ignored. - * The virtual IF bit is tweaked by CLI and STI. - */ - IE_TO_EVENT_MASK(%rsi, %rdi) -#else - sti -#endif -1: - ret - SET_SIZE(restore_int_flag) - SET_SIZE(intr_restore) - - ENTRY(sti) - STI - ret - SET_SIZE(sti) - - ENTRY(cli) - CLI(%rax) - ret - SET_SIZE(cli) - - ENTRY(dtrace_interrupt_disable) - pushfq - popq %rax -#if defined(__xpv) - leaq xpv_panicking, %rdi - movl (%rdi), %edi - cmpl $0, %edi - jne .dtrace_interrupt_disable_done - CLIRET(%rdi, %dl) /* returns event mask in %dl */ - /* - * Synthesize the PS_IE bit from the event mask bit - */ - andq $_BITNOT(PS_IE), %rax - testb $1, %dl - jnz .dtrace_interrupt_disable_done - orq $PS_IE, %rax -#else - CLI(%rdx) -#endif -.dtrace_interrupt_disable_done: - ret - SET_SIZE(dtrace_interrupt_disable) - - ENTRY(dtrace_interrupt_enable) - pushq %rdi - popfq -#if defined(__xpv) - leaq xpv_panicking, %rdx - movl (%rdx), %edx - cmpl $0, %edx - jne .dtrace_interrupt_enable_done - /* - * Since we're -really- running unprivileged, our attempt - * to change the state of the IF bit will be ignored. The - * virtual IF bit is tweaked by CLI and STI. - */ - IE_TO_EVENT_MASK(%rdx, %rdi) -#endif -.dtrace_interrupt_enable_done: - ret - SET_SIZE(dtrace_interrupt_enable) - - - ENTRY(dtrace_membar_producer) - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(dtrace_membar_producer) - - ENTRY(dtrace_membar_consumer) - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(dtrace_membar_consumer) - - ENTRY(threadp) - movq %gs:CPU_THREAD, %rax - ret - SET_SIZE(threadp) - -/* - * Checksum routine for Internet Protocol Headers - */ - - ENTRY(ip_ocsum) - pushq %rbp - movq %rsp, %rbp -#ifdef DEBUG - movq postbootkernelbase(%rip), %rax - cmpq %rax, %rdi - jnb 1f - xorl %eax, %eax - movq %rdi, %rsi - leaq .ip_ocsum_panic_msg(%rip), %rdi - call panic - /*NOTREACHED*/ -.ip_ocsum_panic_msg: - .string "ip_ocsum: address 0x%p below kernelbase\n" -1: -#endif - movl %esi, %ecx /* halfword_count */ - movq %rdi, %rsi /* address */ - /* partial sum in %edx */ - xorl %eax, %eax - testl %ecx, %ecx - jz .ip_ocsum_done - testq $3, %rsi - jnz .ip_csum_notaligned -.ip_csum_aligned: /* XX64 opportunities for 8-byte operations? */ -.next_iter: - /* XX64 opportunities for prefetch? */ - /* XX64 compute csum with 64 bit quantities? */ - subl $32, %ecx - jl .less_than_32 - - addl 0(%rsi), %edx -.only60: - adcl 4(%rsi), %eax -.only56: - adcl 8(%rsi), %edx -.only52: - adcl 12(%rsi), %eax -.only48: - adcl 16(%rsi), %edx -.only44: - adcl 20(%rsi), %eax -.only40: - adcl 24(%rsi), %edx -.only36: - adcl 28(%rsi), %eax -.only32: - adcl 32(%rsi), %edx -.only28: - adcl 36(%rsi), %eax -.only24: - adcl 40(%rsi), %edx -.only20: - adcl 44(%rsi), %eax -.only16: - adcl 48(%rsi), %edx -.only12: - adcl 52(%rsi), %eax -.only8: - adcl 56(%rsi), %edx -.only4: - adcl 60(%rsi), %eax /* could be adding -1 and -1 with a carry */ -.only0: - adcl $0, %eax /* could be adding -1 in eax with a carry */ - adcl $0, %eax - - addq $64, %rsi - testl %ecx, %ecx - jnz .next_iter - -.ip_ocsum_done: - addl %eax, %edx - adcl $0, %edx - movl %edx, %eax /* form a 16 bit checksum by */ - shrl $16, %eax /* adding two halves of 32 bit checksum */ - addw %dx, %ax - adcw $0, %ax - andl $0xffff, %eax - leave - ret - -.ip_csum_notaligned: - xorl %edi, %edi - movw (%rsi), %di - addl %edi, %edx - adcl $0, %edx - addq $2, %rsi - decl %ecx - jmp .ip_csum_aligned - -.less_than_32: - addl $32, %ecx - testl $1, %ecx - jz .size_aligned - andl $0xfe, %ecx - movzwl (%rsi, %rcx, 2), %edi - addl %edi, %edx - adcl $0, %edx -.size_aligned: - movl %ecx, %edi - shrl $1, %ecx - shl $1, %edi - subq $64, %rdi - addq %rdi, %rsi - leaq .ip_ocsum_jmptbl(%rip), %rdi - leaq (%rdi, %rcx, 8), %rdi - xorl %ecx, %ecx - clc - movq (%rdi), %rdi - INDIRECT_JMP_REG(rdi) - - .align 8 -.ip_ocsum_jmptbl: - .quad .only0, .only4, .only8, .only12, .only16, .only20 - .quad .only24, .only28, .only32, .only36, .only40, .only44 - .quad .only48, .only52, .only56, .only60 - SET_SIZE(ip_ocsum) - -/* - * multiply two long numbers and yield a u_longlong_t result, callable from C. - * Provided to manipulate hrtime_t values. - */ - - ENTRY(mul32) - xorl %edx, %edx /* XX64 joe, paranoia? */ - movl %edi, %eax - mull %esi - shlq $32, %rdx - orq %rdx, %rax - ret - SET_SIZE(mul32) - - ENTRY(scan_memory) - shrq $3, %rsi /* convert %rsi from byte to quadword count */ - jz .scanm_done - movq %rsi, %rcx /* move count into rep control register */ - movq %rdi, %rsi /* move addr into lodsq control reg. */ - rep lodsq /* scan the memory range */ -.scanm_done: - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(scan_memory) - - - ENTRY(lowbit) - movl $-1, %eax - bsfq %rdi, %rdi - cmovnz %edi, %eax - incl %eax - ret - SET_SIZE(lowbit) - - ENTRY(highbit) - ALTENTRY(highbit64) - movl $-1, %eax - bsrq %rdi, %rdi - cmovnz %edi, %eax - incl %eax - ret - SET_SIZE(highbit64) - SET_SIZE(highbit) - -#define XMSR_ACCESS_VAL $0x9c5a203a - - ENTRY(rdmsr) - movl %edi, %ecx - rdmsr - shlq $32, %rdx - orq %rdx, %rax - ret - SET_SIZE(rdmsr) - - ENTRY(wrmsr) - movq %rsi, %rdx - shrq $32, %rdx - movl %esi, %eax - movl %edi, %ecx - wrmsr - ret - SET_SIZE(wrmsr) - - ENTRY(xrdmsr) - pushq %rbp - movq %rsp, %rbp - movl %edi, %ecx - movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ - rdmsr - shlq $32, %rdx - orq %rdx, %rax - leave - ret - SET_SIZE(xrdmsr) - - ENTRY(xwrmsr) - pushq %rbp - movq %rsp, %rbp - movl %edi, %ecx - movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ - movq %rsi, %rdx - shrq $32, %rdx - movl %esi, %eax - wrmsr - leave - ret - SET_SIZE(xwrmsr) - - ENTRY(get_xcr) - movl %edi, %ecx - #xgetbv - .byte 0x0f,0x01,0xd0 - shlq $32, %rdx - orq %rdx, %rax - ret - SET_SIZE(get_xcr) - - ENTRY(set_xcr) - movq %rsi, %rdx - shrq $32, %rdx - movl %esi, %eax - movl %edi, %ecx - #xsetbv - .byte 0x0f,0x01,0xd1 - ret - SET_SIZE(set_xcr) - - ENTRY(invalidate_cache) - wbinvd - ret - SET_SIZE(invalidate_cache) - - ENTRY_NP(getcregs) -#if defined(__xpv) - /* - * Only a few of the hardware control registers or descriptor tables - * are directly accessible to us, so just zero the structure. - * - * XXPV Perhaps it would be helpful for the hypervisor to return - * virtualized versions of these for post-mortem use. - * (Need to reevaluate - perhaps it already does!) - */ - pushq %rdi /* save *crp */ - movq $CREGSZ, %rsi - call bzero - popq %rdi - - /* - * Dump what limited information we can - */ - movq %cr0, %rax - movq %rax, CREG_CR0(%rdi) /* cr0 */ - movq %cr2, %rax - movq %rax, CREG_CR2(%rdi) /* cr2 */ - movq %cr3, %rax - movq %rax, CREG_CR3(%rdi) /* cr3 */ - movq %cr4, %rax - movq %rax, CREG_CR4(%rdi) /* cr4 */ - -#else /* __xpv */ - -#define GETMSR(r, off, d) \ - movl $r, %ecx; \ - rdmsr; \ - movl %eax, off(d); \ - movl %edx, off+4(d) - - xorl %eax, %eax - movq %rax, CREG_GDT+8(%rdi) - sgdt CREG_GDT(%rdi) /* 10 bytes */ - movq %rax, CREG_IDT+8(%rdi) - sidt CREG_IDT(%rdi) /* 10 bytes */ - movq %rax, CREG_LDT(%rdi) - sldt CREG_LDT(%rdi) /* 2 bytes */ - movq %rax, CREG_TASKR(%rdi) - str CREG_TASKR(%rdi) /* 2 bytes */ - movq %cr0, %rax - movq %rax, CREG_CR0(%rdi) /* cr0 */ - movq %cr2, %rax - movq %rax, CREG_CR2(%rdi) /* cr2 */ - movq %cr3, %rax - movq %rax, CREG_CR3(%rdi) /* cr3 */ - movq %cr4, %rax - movq %rax, CREG_CR4(%rdi) /* cr4 */ - movq %cr8, %rax - movq %rax, CREG_CR8(%rdi) /* cr8 */ - GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi) - GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi) -#endif /* __xpv */ - ret - SET_SIZE(getcregs) - -#undef GETMSR - - -/* - * A panic trigger is a word which is updated atomically and can only be set - * once. We atomically store 0xDEFACEDD and load the old value. If the - * previous value was 0, we succeed and return 1; otherwise return 0. - * This allows a partially corrupt trigger to still trigger correctly. DTrace - * has its own version of this function to allow it to panic correctly from - * probe context. - */ - - ENTRY_NP(panic_trigger) - xorl %eax, %eax - movl $0xdefacedd, %edx - lock - xchgl %edx, (%rdi) - cmpl $0, %edx - je 0f - movl $0, %eax - ret -0: movl $1, %eax - ret - SET_SIZE(panic_trigger) - - ENTRY_NP(dtrace_panic_trigger) - xorl %eax, %eax - movl $0xdefacedd, %edx - lock - xchgl %edx, (%rdi) - cmpl $0, %edx - je 0f - movl $0, %eax - ret -0: movl $1, %eax - ret - SET_SIZE(dtrace_panic_trigger) - -/* - * The panic() and cmn_err() functions invoke vpanic() as a common entry point - * into the panic code implemented in panicsys(). vpanic() is responsible - * for passing through the format string and arguments, and constructing a - * regs structure on the stack into which it saves the current register - * values. If we are not dying due to a fatal trap, these registers will - * then be preserved in panicbuf as the current processor state. Before - * invoking panicsys(), vpanic() activates the first panic trigger (see - * common/os/panic.c) and switches to the panic_stack if successful. Note that - * DTrace takes a slightly different panic path if it must panic from probe - * context. Instead of calling panic, it calls into dtrace_vpanic(), which - * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and - * branches back into vpanic(). - */ - - ENTRY_NP(vpanic) /* Initial stack layout: */ - - pushq %rbp /* | %rip | 0x60 */ - movq %rsp, %rbp /* | %rbp | 0x58 */ - pushfq /* | rfl | 0x50 */ - pushq %r11 /* | %r11 | 0x48 */ - pushq %r10 /* | %r10 | 0x40 */ - pushq %rbx /* | %rbx | 0x38 */ - pushq %rax /* | %rax | 0x30 */ - pushq %r9 /* | %r9 | 0x28 */ - pushq %r8 /* | %r8 | 0x20 */ - pushq %rcx /* | %rcx | 0x18 */ - pushq %rdx /* | %rdx | 0x10 */ - pushq %rsi /* | %rsi | 0x8 alist */ - pushq %rdi /* | %rdi | 0x0 format */ - - movq %rsp, %rbx /* %rbx = current %rsp */ - - leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ - call panic_trigger /* %eax = panic_trigger() */ - -vpanic_common: - /* - * The panic_trigger result is in %eax from the call above, and - * dtrace_panic places it in %eax before branching here. - * The rdmsr instructions that follow below will clobber %eax so - * we stash the panic_trigger result in %r11d. - */ - movl %eax, %r11d - cmpl $0, %r11d - je 0f - - /* - * If panic_trigger() was successful, we are the first to initiate a - * panic: we now switch to the reserved panic_stack before continuing. - */ - leaq panic_stack(%rip), %rsp - addq $PANICSTKSIZE, %rsp -0: subq $REGSIZE, %rsp - /* - * Now that we've got everything set up, store the register values as - * they were when we entered vpanic() to the designated location in - * the regs structure we allocated on the stack. - */ - movq 0x0(%rbx), %rcx - movq %rcx, REGOFF_RDI(%rsp) - movq 0x8(%rbx), %rcx - movq %rcx, REGOFF_RSI(%rsp) - movq 0x10(%rbx), %rcx - movq %rcx, REGOFF_RDX(%rsp) - movq 0x18(%rbx), %rcx - movq %rcx, REGOFF_RCX(%rsp) - movq 0x20(%rbx), %rcx - - movq %rcx, REGOFF_R8(%rsp) - movq 0x28(%rbx), %rcx - movq %rcx, REGOFF_R9(%rsp) - movq 0x30(%rbx), %rcx - movq %rcx, REGOFF_RAX(%rsp) - movq 0x38(%rbx), %rcx - movq %rcx, REGOFF_RBX(%rsp) - movq 0x58(%rbx), %rcx - - movq %rcx, REGOFF_RBP(%rsp) - movq 0x40(%rbx), %rcx - movq %rcx, REGOFF_R10(%rsp) - movq 0x48(%rbx), %rcx - movq %rcx, REGOFF_R11(%rsp) - movq %r12, REGOFF_R12(%rsp) - - movq %r13, REGOFF_R13(%rsp) - movq %r14, REGOFF_R14(%rsp) - movq %r15, REGOFF_R15(%rsp) - - xorl %ecx, %ecx - movw %ds, %cx - movq %rcx, REGOFF_DS(%rsp) - movw %es, %cx - movq %rcx, REGOFF_ES(%rsp) - movw %fs, %cx - movq %rcx, REGOFF_FS(%rsp) - movw %gs, %cx - movq %rcx, REGOFF_GS(%rsp) - - movq $0, REGOFF_TRAPNO(%rsp) - - movq $0, REGOFF_ERR(%rsp) - leaq vpanic(%rip), %rcx - movq %rcx, REGOFF_RIP(%rsp) - movw %cs, %cx - movzwq %cx, %rcx - movq %rcx, REGOFF_CS(%rsp) - movq 0x50(%rbx), %rcx - movq %rcx, REGOFF_RFL(%rsp) - movq %rbx, %rcx - addq $0x60, %rcx - movq %rcx, REGOFF_RSP(%rsp) - movw %ss, %cx - movzwq %cx, %rcx - movq %rcx, REGOFF_SS(%rsp) - - /* - * panicsys(format, alist, rp, on_panic_stack) - */ - movq REGOFF_RDI(%rsp), %rdi /* format */ - movq REGOFF_RSI(%rsp), %rsi /* alist */ - movq %rsp, %rdx /* struct regs */ - movl %r11d, %ecx /* on_panic_stack */ - call panicsys - addq $REGSIZE, %rsp - popq %rdi - popq %rsi - popq %rdx - popq %rcx - popq %r8 - popq %r9 - popq %rax - popq %rbx - popq %r10 - popq %r11 - popfq - leave - ret - SET_SIZE(vpanic) - - ENTRY_NP(dtrace_vpanic) /* Initial stack layout: */ - - pushq %rbp /* | %rip | 0x60 */ - movq %rsp, %rbp /* | %rbp | 0x58 */ - pushfq /* | rfl | 0x50 */ - pushq %r11 /* | %r11 | 0x48 */ - pushq %r10 /* | %r10 | 0x40 */ - pushq %rbx /* | %rbx | 0x38 */ - pushq %rax /* | %rax | 0x30 */ - pushq %r9 /* | %r9 | 0x28 */ - pushq %r8 /* | %r8 | 0x20 */ - pushq %rcx /* | %rcx | 0x18 */ - pushq %rdx /* | %rdx | 0x10 */ - pushq %rsi /* | %rsi | 0x8 alist */ - pushq %rdi /* | %rdi | 0x0 format */ - - movq %rsp, %rbx /* %rbx = current %rsp */ - - leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ - call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */ - jmp vpanic_common - - SET_SIZE(dtrace_vpanic) - - DGDEF3(timedelta, 8, 8) - .long 0, 0 - - /* - * initialized to a non zero value to make pc_gethrtime() - * work correctly even before clock is initialized - */ - DGDEF3(hrtime_base, 8, 8) - .long _MUL(NSEC_PER_CLOCK_TICK, 6), 0 - - DGDEF3(adj_shift, 4, 4) - .long ADJ_SHIFT - - ENTRY_NP(hres_tick) - pushq %rbp - movq %rsp, %rbp - - /* - * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously, - * hres_last_tick can only be modified while holding CLOCK_LOCK). - * At worst, performing this now instead of under CLOCK_LOCK may - * introduce some jitter in pc_gethrestime(). - */ - movq gethrtimef(%rip), %rsi - INDIRECT_CALL_REG(rsi) - movq %rax, %r8 - - leaq hres_lock(%rip), %rax - movb $-1, %dl -.CL1: - xchgb %dl, (%rax) - testb %dl, %dl - jz .CL3 /* got it */ -.CL2: - cmpb $0, (%rax) /* possible to get lock? */ - pause - jne .CL2 - jmp .CL1 /* yes, try again */ -.CL3: - /* - * compute the interval since last time hres_tick was called - * and adjust hrtime_base and hrestime accordingly - * hrtime_base is an 8 byte value (in nsec), hrestime is - * a timestruc_t (sec, nsec) - */ - leaq hres_last_tick(%rip), %rax - movq %r8, %r11 - subq (%rax), %r8 - addq %r8, hrtime_base(%rip) /* add interval to hrtime_base */ - addq %r8, hrestime+8(%rip) /* add interval to hrestime.tv_nsec */ - /* - * Now that we have CLOCK_LOCK, we can update hres_last_tick - */ - movq %r11, (%rax) - - call __adj_hrestime - - /* - * release the hres_lock - */ - incl hres_lock(%rip) - leave - ret - SET_SIZE(hres_tick) - -/* - * void prefetch_smap_w(void *) - * - * Prefetch ahead within a linear list of smap structures. - * Not implemented for ia32. Stub for compatibility. - */ - - ENTRY(prefetch_smap_w) - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(prefetch_smap_w) - -/* - * prefetch_page_r(page_t *) - * issue prefetch instructions for a page_t - */ - - ENTRY(prefetch_page_r) - rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(prefetch_page_r) - - ENTRY(bcmp) - pushq %rbp - movq %rsp, %rbp -#ifdef DEBUG - testq %rdx,%rdx - je 1f - movq postbootkernelbase(%rip), %r11 - cmpq %r11, %rdi - jb 0f - cmpq %r11, %rsi - jnb 1f -0: leaq .bcmp_panic_msg(%rip), %rdi - xorl %eax, %eax - call panic -1: -#endif /* DEBUG */ - call memcmp - testl %eax, %eax - setne %dl - leave - movzbl %dl, %eax - ret - SET_SIZE(bcmp) - -#ifdef DEBUG - .text -.bcmp_panic_msg: - .string "bcmp: arguments below kernelbase" -#endif /* DEBUG */ - - ENTRY_NP(bsrw_insn) - xorl %eax, %eax - bsrw %di, %ax - ret - SET_SIZE(bsrw_insn) - - ENTRY_NP(switch_sp_and_call) - pushq %rbp - movq %rsp, %rbp /* set up stack frame */ - movq %rdi, %rsp /* switch stack pointer */ - movq %rdx, %rdi /* pass func arg 1 */ - movq %rsi, %r11 /* save function to call */ - movq %rcx, %rsi /* pass func arg 2 */ - INDIRECT_CALL_REG(r11) /* call function */ - leave /* restore stack */ - ret - SET_SIZE(switch_sp_and_call) - - ENTRY_NP(kmdb_enter) - pushq %rbp - movq %rsp, %rbp - - /* - * Save flags, do a 'cli' then return the saved flags - */ - call intr_clear - - int $T_DBGENTR - - /* - * Restore the saved flags - */ - movq %rax, %rdi - call intr_restore - - leave - ret - SET_SIZE(kmdb_enter) - - ENTRY_NP(return_instr) - rep; ret /* use 2 byte instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(return_instr) - - ENTRY(getflags) - pushfq - popq %rax -#if defined(__xpv) - CURTHREAD(%rdi) - KPREEMPT_DISABLE(%rdi) - /* - * Synthesize the PS_IE bit from the event mask bit - */ - CURVCPU(%r11) - andq $_BITNOT(PS_IE), %rax - XEN_TEST_UPCALL_MASK(%r11) - jnz 1f - orq $PS_IE, %rax -1: - KPREEMPT_ENABLE_NOKP(%rdi) -#endif - ret - SET_SIZE(getflags) - - ENTRY(ftrace_interrupt_disable) - pushfq - popq %rax - CLI(%rdx) - ret - SET_SIZE(ftrace_interrupt_disable) - - ENTRY(ftrace_interrupt_enable) - pushq %rdi - popfq - ret - SET_SIZE(ftrace_interrupt_enable) - - ENTRY(clflush_insn) - clflush (%rdi) - ret - SET_SIZE(clflush_insn) - - ENTRY(mfence_insn) - mfence - ret - SET_SIZE(mfence_insn) - -/* - * VMware implements an I/O port that programs can query to detect if software - * is running in a VMware hypervisor. This hypervisor port behaves differently - * depending on magic values in certain registers and modifies some registers - * as a side effect. - * - * References: http://kb.vmware.com/kb/1009458 - */ - - ENTRY(vmware_port) - pushq %rbx - movl $VMWARE_HVMAGIC, %eax - movl $0xffffffff, %ebx - movl %edi, %ecx - movl $VMWARE_HVPORT, %edx - inl (%dx) - movl %eax, (%rsi) - movl %ebx, 4(%rsi) - movl %ecx, 8(%rsi) - movl %edx, 12(%rsi) - popq %rbx - ret - SET_SIZE(vmware_port) diff --git a/usr/src/uts/intel/ia32/ml/lock_prim.s b/usr/src/uts/intel/ia32/ml/lock_prim.s deleted file mode 100644 index 4267561bf7..0000000000 --- a/usr/src/uts/intel/ia32/ml/lock_prim.s +++ /dev/null @@ -1,714 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#include "assym.h" - -#include <sys/mutex_impl.h> -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/regset.h> -#include <sys/rwlock_impl.h> -#include <sys/lockstat.h> - -/* - * lock_try(lp), ulock_try(lp) - * - returns non-zero on success. - * - doesn't block interrupts so don't use this to spin on a lock. - * - * ulock_try() is for a lock in the user address space. - */ - - .globl kernelbase - - ENTRY(lock_try) - movb $-1, %dl - movzbq %dl, %rax - xchgb %dl, (%rdi) - xorb %dl, %al -.lock_try_lockstat_patch_point: - ret - testb %al, %al - jnz 0f - ret -0: - movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ - movq %rdi, %rsi /* rsi = lock addr */ - movl $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */ - jmp lockstat_wrapper - SET_SIZE(lock_try) - - ENTRY(lock_spin_try) - movb $-1, %dl - movzbq %dl, %rax - xchgb %dl, (%rdi) - xorb %dl, %al - ret - SET_SIZE(lock_spin_try) - - ENTRY(ulock_try) -#ifdef DEBUG - movq kernelbase(%rip), %rax - cmpq %rax, %rdi /* test uaddr < kernelbase */ - jb ulock_pass /* uaddr < kernelbase, proceed */ - - movq %rdi, %r12 /* preserve lock ptr for debugging */ - leaq .ulock_panic_msg(%rip), %rdi - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - xorl %eax, %eax /* clear for varargs */ - call panic - -#endif /* DEBUG */ - -ulock_pass: - movl $1, %eax - xchgb %al, (%rdi) - xorb $1, %al - ret - SET_SIZE(ulock_try) - -#ifdef DEBUG - .data -.ulock_panic_msg: - .string "ulock_try: Argument is above kernelbase" - .text -#endif /* DEBUG */ - -/* - * lock_clear(lp) - * - unlock lock without changing interrupt priority level. - */ - - ENTRY(lock_clear) - movb $0, (%rdi) -.lock_clear_lockstat_patch_point: - ret - movq %rdi, %rsi /* rsi = lock addr */ - movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ - movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */ - jmp lockstat_wrapper - SET_SIZE(lock_clear) - - ENTRY(ulock_clear) -#ifdef DEBUG - movq kernelbase(%rip), %rcx - cmpq %rcx, %rdi /* test uaddr < kernelbase */ - jb ulock_clr /* uaddr < kernelbase, proceed */ - - leaq .ulock_clear_msg(%rip), %rdi - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - xorl %eax, %eax /* clear for varargs */ - call panic -#endif - -ulock_clr: - movb $0, (%rdi) - ret - SET_SIZE(ulock_clear) - -#ifdef DEBUG - .data -.ulock_clear_msg: - .string "ulock_clear: Argument is above kernelbase" - .text -#endif /* DEBUG */ - - -/* - * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil) - * Drops lp, sets pil to new_pil, stores old pil in *old_pil. - */ - - ENTRY(lock_set_spl) - pushq %rbp - movq %rsp, %rbp - subq $32, %rsp - movl %esi, 8(%rsp) /* save priority level */ - movq %rdx, 16(%rsp) /* save old pil ptr */ - movq %rdi, 24(%rsp) /* save lock pointer */ - movl %esi, %edi /* pass priority level */ - call splr /* raise priority level */ - movq 24(%rsp), %rdi /* rdi = lock addr */ - movb $-1, %dl - xchgb %dl, (%rdi) /* try to set lock */ - testb %dl, %dl /* did we get the lock? ... */ - jnz .lss_miss /* ... no, go to C for the hard case */ - movq 16(%rsp), %rdx /* rdx = old pil addr */ - movw %ax, (%rdx) /* store old pil */ - leave -.lock_set_spl_lockstat_patch_point: - ret - movq %rdi, %rsi /* rsi = lock addr */ - movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ - movl $LS_LOCK_SET_SPL_ACQUIRE, %edi - jmp lockstat_wrapper -.lss_miss: - movl 8(%rsp), %esi /* new_pil */ - movq 16(%rsp), %rdx /* old_pil_addr */ - movl %eax, %ecx /* original pil */ - leave /* unwind stack */ - jmp lock_set_spl_spin - SET_SIZE(lock_set_spl) - -/* - * void - * lock_init(lp) - */ - - ENTRY(lock_init) - movb $0, (%rdi) - ret - SET_SIZE(lock_init) - -/* - * void - * lock_set(lp) - */ - - ENTRY(lock_set) - movb $-1, %dl - xchgb %dl, (%rdi) /* try to set lock */ - testb %dl, %dl /* did we get it? */ - jnz lock_set_spin /* no, go to C for the hard case */ -.lock_set_lockstat_patch_point: - ret - movq %rdi, %rsi /* rsi = lock addr */ - movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ - movl $LS_LOCK_SET_ACQUIRE, %edi - jmp lockstat_wrapper - SET_SIZE(lock_set) - -/* - * lock_clear_splx(lp, s) - */ - - ENTRY(lock_clear_splx) - movb $0, (%rdi) /* clear lock */ -.lock_clear_splx_lockstat_patch_point: - jmp 0f -0: - movl %esi, %edi /* arg for splx */ - jmp splx /* let splx do its thing */ -.lock_clear_splx_lockstat: - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - subq $16, %rsp /* space to save args across splx */ - movq %rdi, 8(%rsp) /* save lock ptr across splx call */ - movl %esi, %edi /* arg for splx */ - call splx /* lower the priority */ - movq 8(%rsp), %rsi /* rsi = lock ptr */ - leave /* unwind stack */ - movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ - movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi - jmp lockstat_wrapper - SET_SIZE(lock_clear_splx) - -#if defined(__GNUC_AS__) -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ - (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2) - -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ - (.lock_clear_splx_lockstat_patch_point + 1) -#else -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ - [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2] - -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ - [.lock_clear_splx_lockstat_patch_point + 1] -#endif - -/* - * mutex_enter() and mutex_exit(). - * - * These routines handle the simple cases of mutex_enter() (adaptive - * lock, not held) and mutex_exit() (adaptive lock, held, no waiters). - * If anything complicated is going on we punt to mutex_vector_enter(). - * - * mutex_tryenter() is similar to mutex_enter() but returns zero if - * the lock cannot be acquired, nonzero on success. - * - * If mutex_exit() gets preempted in the window between checking waiters - * and clearing the lock, we can miss wakeups. Disabling preemption - * in the mutex code is prohibitively expensive, so instead we detect - * mutex preemption by examining the trapped PC in the interrupt path. - * If we interrupt a thread in mutex_exit() that has not yet cleared - * the lock, cmnint() resets its PC back to the beginning of - * mutex_exit() so it will check again for waiters when it resumes. - * - * The lockstat code below is activated when the lockstat driver - * calls lockstat_hot_patch() to hot-patch the kernel mutex code. - * Note that we don't need to test lockstat_event_mask here -- we won't - * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats. - */ - - ENTRY_NP(mutex_enter) - movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ - xorl %eax, %eax /* rax = 0 (unheld adaptive) */ - lock - cmpxchgq %rdx, (%rdi) - jnz mutex_vector_enter -.mutex_enter_lockstat_patch_point: -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_enter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop -.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */ - nop -#else /* OPTERON_WORKAROUND_6323525 */ - ret -#endif /* OPTERON_WORKAROUND_6323525 */ - movq %rdi, %rsi - movl $LS_MUTEX_ENTER_ACQUIRE, %edi -/* - * expects %rdx=thread, %rsi=lock, %edi=lockstat event - */ - ALTENTRY(lockstat_wrapper) - incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */ - leaq lockstat_probemap(%rip), %rax - movl (%rax, %rdi, DTRACE_IDSIZE), %eax - testl %eax, %eax /* check for non-zero probe */ - jz 1f - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - movl %eax, %edi - movq lockstat_probe, %rax - INDIRECT_CALL_REG(rax) - leave /* unwind stack */ -1: - movq %gs:CPU_THREAD, %rdx /* reload thread ptr */ - decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */ - movl $1, %eax /* return success if tryenter */ - ret - SET_SIZE(lockstat_wrapper) - SET_SIZE(mutex_enter) - -/* - * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event - */ - ENTRY(lockstat_wrapper_arg) - incb T_LOCKSTAT(%rcx) /* curthread->t_lockstat++ */ - leaq lockstat_probemap(%rip), %rax - movl (%rax, %rdi, DTRACE_IDSIZE), %eax - testl %eax, %eax /* check for non-zero probe */ - jz 1f - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - movl %eax, %edi - movq lockstat_probe, %rax - INDIRECT_CALL_REG(rax) - leave /* unwind stack */ -1: - movq %gs:CPU_THREAD, %rdx /* reload thread ptr */ - decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */ - movl $1, %eax /* return success if tryenter */ - ret - SET_SIZE(lockstat_wrapper_arg) - - - ENTRY(mutex_tryenter) - movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ - xorl %eax, %eax /* rax = 0 (unheld adaptive) */ - lock - cmpxchgq %rdx, (%rdi) - jnz mutex_vector_tryenter - not %eax /* return success (nonzero) */ -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_tryenter_lockstat_patch_point: -.mutex_tryenter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop -.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */ - nop -#else /* OPTERON_WORKAROUND_6323525 */ -.mutex_tryenter_lockstat_patch_point: - ret -#endif /* OPTERON_WORKAROUND_6323525 */ - movq %rdi, %rsi - movl $LS_MUTEX_ENTER_ACQUIRE, %edi - jmp lockstat_wrapper - SET_SIZE(mutex_tryenter) - - ENTRY(mutex_adaptive_tryenter) - movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */ - xorl %eax, %eax /* rax = 0 (unheld adaptive) */ - lock - cmpxchgq %rdx, (%rdi) - jnz 0f - not %eax /* return success (nonzero) */ -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_atryenter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop - nop -#else /* OPTERON_WORKAROUND_6323525 */ - ret -#endif /* OPTERON_WORKAROUND_6323525 */ -0: - xorl %eax, %eax /* return failure */ - ret - SET_SIZE(mutex_adaptive_tryenter) - - .globl mutex_owner_running_critical_start - - ENTRY(mutex_owner_running) -mutex_owner_running_critical_start: - movq (%rdi), %r11 /* get owner field */ - andq $MUTEX_THREAD, %r11 /* remove waiters bit */ - cmpq $0, %r11 /* if free, skip */ - je 1f /* go return 0 */ - movq T_CPU(%r11), %r8 /* get owner->t_cpu */ - movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */ -.mutex_owner_running_critical_end: - cmpq %r11, %r9 /* owner == running thread? */ - je 2f /* yes, go return cpu */ -1: - xorq %rax, %rax /* return 0 */ - ret -2: - movq %r8, %rax /* return cpu */ - ret - SET_SIZE(mutex_owner_running) - - .globl mutex_owner_running_critical_size - .type mutex_owner_running_critical_size, @object - .align CPTRSIZE -mutex_owner_running_critical_size: - .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start - SET_SIZE(mutex_owner_running_critical_size) - - .globl mutex_exit_critical_start - - ENTRY(mutex_exit) -mutex_exit_critical_start: /* If interrupted, restart here */ - movq %gs:CPU_THREAD, %rdx - cmpq %rdx, (%rdi) - jne mutex_vector_exit /* wrong type or wrong owner */ - movq $0, (%rdi) /* clear owner AND lock */ -.mutex_exit_critical_end: -.mutex_exit_lockstat_patch_point: - ret - movq %rdi, %rsi - movl $LS_MUTEX_EXIT_RELEASE, %edi - jmp lockstat_wrapper - SET_SIZE(mutex_exit) - - .globl mutex_exit_critical_size - .type mutex_exit_critical_size, @object - .align CPTRSIZE -mutex_exit_critical_size: - .quad .mutex_exit_critical_end - mutex_exit_critical_start - SET_SIZE(mutex_exit_critical_size) - -/* - * rw_enter() and rw_exit(). - * - * These routines handle the simple cases of rw_enter (write-locking an unheld - * lock or read-locking a lock that's neither write-locked nor write-wanted) - * and rw_exit (no waiters or not the last reader). If anything complicated - * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively. - */ - - ENTRY(rw_enter) - cmpl $RW_WRITER, %esi - je .rw_write_enter - movq (%rdi), %rax /* rax = old rw_wwwh value */ - testl $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax - jnz rw_enter_sleep - leaq RW_READ_LOCK(%rax), %rdx /* rdx = new rw_wwwh value */ - lock - cmpxchgq %rdx, (%rdi) /* try to grab read lock */ - jnz rw_enter_sleep -.rw_read_enter_lockstat_patch_point: - ret - movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ - movq %rdi, %rsi /* rsi = lock ptr */ - movl $LS_RW_ENTER_ACQUIRE, %edi - movl $RW_READER, %edx - jmp lockstat_wrapper_arg -.rw_write_enter: - movq %gs:CPU_THREAD, %rdx - orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */ - xorl %eax, %eax /* rax = unheld value */ - lock - cmpxchgq %rdx, (%rdi) /* try to grab write lock */ - jnz rw_enter_sleep - -#if defined(OPTERON_WORKAROUND_6323525) -.rw_write_enter_lockstat_patch_point: -.rw_write_enter_6323525_patch_point: - ret - nop - nop -.rw_write_enter_lockstat_6323525_patch_point: - nop -#else /* OPTERON_WORKAROUND_6323525 */ -.rw_write_enter_lockstat_patch_point: - ret -#endif /* OPTERON_WORKAROUND_6323525 */ - - movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ - movq %rdi, %rsi /* rsi = lock ptr */ - movl $LS_RW_ENTER_ACQUIRE, %edi - movl $RW_WRITER, %edx - jmp lockstat_wrapper_arg - SET_SIZE(rw_enter) - - ENTRY(rw_exit) - movq (%rdi), %rax /* rax = old rw_wwwh value */ - cmpl $RW_READ_LOCK, %eax /* single-reader, no waiters? */ - jne .rw_not_single_reader - xorl %edx, %edx /* rdx = new value (unheld) */ -.rw_read_exit: - lock - cmpxchgq %rdx, (%rdi) /* try to drop read lock */ - jnz rw_exit_wakeup -.rw_read_exit_lockstat_patch_point: - ret - movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ - movq %rdi, %rsi /* rsi = lock ptr */ - movl $LS_RW_EXIT_RELEASE, %edi - movl $RW_READER, %edx - jmp lockstat_wrapper_arg -.rw_not_single_reader: - testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */ - jnz .rw_write_exit - leaq -RW_READ_LOCK(%rax), %rdx /* rdx = new value */ - cmpl $RW_READ_LOCK, %edx - jge .rw_read_exit /* not last reader, safe to drop */ - jmp rw_exit_wakeup /* last reader with waiters */ -.rw_write_exit: - movq %gs:CPU_THREAD, %rax /* rax = thread ptr */ - xorl %edx, %edx /* rdx = new value (unheld) */ - orq $RW_WRITE_LOCKED, %rax /* eax = write-locked value */ - lock - cmpxchgq %rdx, (%rdi) /* try to drop read lock */ - jnz rw_exit_wakeup -.rw_write_exit_lockstat_patch_point: - ret - movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ - movq %rdi, %rsi /* rsi - lock ptr */ - movl $LS_RW_EXIT_RELEASE, %edi - movl $RW_WRITER, %edx - jmp lockstat_wrapper_arg - SET_SIZE(rw_exit) - -#if defined(OPTERON_WORKAROUND_6323525) - -/* - * If it is necessary to patch the lock enter routines with the lfence - * workaround, workaround_6323525_patched is set to a non-zero value so that - * the lockstat_hat_patch routine can patch to the new location of the 'ret' - * instruction. - */ - DGDEF3(workaround_6323525_patched, 4, 4) - .long 0 - -#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \ - movq $size, %rbx; \ - movq $dstaddr, %r13; \ - addq %rbx, %r13; \ - movq $srcaddr, %r12; \ - addq %rbx, %r12; \ -0: \ - decq %r13; \ - decq %r12; \ - movzbl (%r12), %esi; \ - movq $1, %rdx; \ - movq %r13, %rdi; \ - call hot_patch_kernel_text; \ - decq %rbx; \ - testq %rbx, %rbx; \ - jg 0b; - -/* - * patch_workaround_6323525: provide workaround for 6323525 - * - * The workaround is to place a fencing instruction (lfence) between the - * mutex operation and the subsequent read-modify-write instruction. - * - * This routine hot patches the lfence instruction on top of the space - * reserved by nops in the lock enter routines. - */ - ENTRY_NP(patch_workaround_6323525) - pushq %rbp - movq %rsp, %rbp - pushq %r12 - pushq %r13 - pushq %rbx - - /* - * lockstat_hot_patch() to use the alternate lockstat workaround - * 6323525 patch points (points past the lfence instruction to the - * new ret) when workaround_6323525_patched is set. - */ - movl $1, workaround_6323525_patched - - /* - * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter - * routines. The 4 bytes are patched in reverse order so that the - * the existing ret is overwritten last. This provides lock enter - * sanity during the intermediate patching stages. - */ - HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4) - - popq %rbx - popq %r13 - popq %r12 - movq %rbp, %rsp - popq %rbp - ret -_lfence_insn: - lfence - ret - SET_SIZE(patch_workaround_6323525) - - -#endif /* OPTERON_WORKAROUND_6323525 */ - - -#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \ - movq $normal_instr, %rsi; \ - movq $active_instr, %rdi; \ - leaq lockstat_probemap(%rip), %rax; \ - movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \ - testl %eax, %eax; \ - jz 9f; \ - movq %rdi, %rsi; \ -9: \ - movq $len, %rdx; \ - movq $addr, %rdi; \ - call hot_patch_kernel_text - - ENTRY(lockstat_hot_patch) - pushq %rbp /* align stack properly */ - movq %rsp, %rbp - -#if defined(OPTERON_WORKAROUND_6323525) - cmpl $0, workaround_6323525_patched - je 1f - HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - jmp 2f -1: - HOT_PATCH(.mutex_enter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) -2: -#else /* OPTERON_WORKAROUND_6323525 */ - HOT_PATCH(.mutex_enter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) -#endif /* !OPTERON_WORKAROUND_6323525 */ - HOT_PATCH(.mutex_exit_lockstat_patch_point, - LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_read_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_exit_lockstat_patch_point, - LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_read_exit_lockstat_patch_point, - LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_set_lockstat_patch_point, - LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_try_lockstat_patch_point, - LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_clear_lockstat_patch_point, - LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_set_spl_lockstat_patch_point, - LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - - HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT, - LS_LOCK_CLEAR_SPLX_RELEASE, - LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1); - leave /* unwind stack */ - ret - SET_SIZE(lockstat_hot_patch) - - ENTRY(membar_enter) - ALTENTRY(membar_exit) - ALTENTRY(membar_sync) - mfence /* lighter weight than lock; xorq $0,(%rsp) */ - ret - SET_SIZE(membar_sync) - SET_SIZE(membar_exit) - SET_SIZE(membar_enter) - - ENTRY(membar_producer) - sfence - ret - SET_SIZE(membar_producer) - - ENTRY(membar_consumer) - lfence - ret - SET_SIZE(membar_consumer) - -/* - * thread_onproc() - * Set thread in onproc state for the specified CPU. - * Also set the thread lock pointer to the CPU's onproc lock. - * Since the new lock isn't held, the store ordering is important. - * If not done in assembler, the compiler could reorder the stores. - */ - - ENTRY(thread_onproc) - addq $CPU_THREAD_LOCK, %rsi /* pointer to disp_lock while running */ - movl $ONPROC_THREAD, T_STATE(%rdi) /* set state to TS_ONPROC */ - movq %rsi, T_LOCKP(%rdi) /* store new lock pointer */ - ret - SET_SIZE(thread_onproc) - -/* - * mutex_delay_default(void) - * Spins for approx a few hundred processor cycles and returns to caller. - */ - - ENTRY(mutex_delay_default) - movq $92,%r11 -0: decq %r11 - jg 0b - ret - SET_SIZE(mutex_delay_default) - diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s deleted file mode 100644 index 4143c181a3..0000000000 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ /dev/null @@ -1,1320 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. - * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. - */ - -#include <sys/asm_linkage.h> - -#include "assym.h" - -/* - * !!!!!!!! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! !!!!!!!! - * - * For functions which are either STUBs or WSTUBs the actual function - * need to be called using 'call' instruction because of preamble and - * postamble (i.e mod_hold_stub and mod_release_stub) around the - * function call. Due to this we need to copy arguments for the - * real function. On Intel we can't tell how many arguments are there - * on the stack so we have to either copy everything between esp and - * ebp or copy only a fixed number (MAXNARG - defined here) for - * all the stub functions. Currently we are using MAXNARG (it is a kludge - * but worth it?!). - * - * NOTE: Use NO_UNLOAD_STUBs if the module is NOT unloadable once it is - * loaded. - */ -#define MAXNARG 10 - -/* - * WARNING: there is no check for forgetting to write END_MODULE, - * and if you do, the kernel will most likely crash. Be careful - * - * This file assumes that all of the contributions to the data segment - * will be contiguous in the output file, even though they are separated - * by pieces of text. This is safe for all assemblers I know of now... - */ - -/* - * This file uses ansi preprocessor features: - * - * 1. #define mac(a) extra_ ## a --> mac(x) expands to extra_a - * The old version of this is - * #define mac(a) extra_/.*.*./a - * but this fails if the argument has spaces "mac ( x )" - * (Ignore the dots above, I had to put them in to keep this a comment.) - * - * 2. #define mac(a) #a --> mac(x) expands to "x" - * The old version is - * #define mac(a) "a" - * - * For some reason, the 5.0 preprocessor isn't happy with the above usage. - * For now, we're not using these ansi features. - * - * The reason is that "the 5.0 ANSI preprocessor" is built into the compiler - * and is a tokenizing preprocessor. This means, when confronted by something - * other than C token generation rules, strange things occur. In this case, - * when confronted by an assembly file, it would turn the token ".globl" into - * two tokens "." and "globl". For this reason, the traditional, non-ANSI - * preprocessor is used on assembly files. - * - * It would be desirable to have a non-tokenizing cpp (accp?) to use for this. - */ - -/* - * This file contains the stubs routines for modules which can be autoloaded. - */ - -/* - * See the 'struct mod_modinfo' definition to see what this declaration - * is trying to achieve here. - */ -#define MODULE(module,namespace) \ - .data; \ -module/**/_modname: \ - .string "namespace/module"; \ - SET_SIZE(module/**/_modname); \ - .align CPTRSIZE; \ - .globl module/**/_modinfo; \ - .type module/**/_modinfo, @object; \ -module/**/_modinfo: \ - .quad module/**/_modname; \ - .quad 0 /* storage for modctl pointer */ - - /* then mod_stub_info structures follow until a mods_func_adr is 0 */ - -/* this puts a 0 where the next mods_func_adr would be */ -#define END_MODULE(module) \ - .data; \ - .align CPTRSIZE; \ - .quad 0; \ - SET_SIZE(module/**/_modinfo) - -/* - * The data section in the stub_common macro is the - * mod_stub_info structure for the stub function - */ - -#define STUB_COMMON(module, fcnname, install_fcn, retfcn, weak) \ - ENTRY(fcnname); \ - leaq fcnname/**/_info(%rip), %rax; \ - cmpl $0, MODS_FLAG(%rax); /* weak? */ \ - je stubs_common_code; /* not weak */ \ - testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \ - jne stubs_common_code; /* yes, do the mod_hold */ \ - movq MODS_RETFCN(%rax), %rax; /* no, load retfcn */ \ - INDIRECT_JMP_REG(rax); /* no, jump to retfcn */ \ - SET_SIZE(fcnname); \ - .data; \ - .align CPTRSIZE; \ - .type fcnname/**/_info, @object; \ -fcnname/**/_info: \ - .quad install_fcn; /* 0 */ \ - .quad module/**/_modinfo; /* 0x8 */ \ - .quad fcnname; /* 0x10 */ \ - .quad retfcn; /* 0x18 */ \ - .long weak; /* 0x20 */ \ - SET_SIZE(fcnname/**/_info) - -#define STUB_NO_UNLOADABLE(module, fcnname, install_fcn, retfcn, weak) \ - ENTRY(fcnname); \ - leaq fcnname/**/_info(%rip), %rax; \ - testb $MODS_INSTALLED, MODS_FLAG(%rax); /* installed? */ \ - je 5f; /* no */ \ - movq MODS_INSTFCN(%rax), %rax; /* yes, load install_fcn */ \ - INDIRECT_JMP_REG(rax); /* yes, jump to install_fcn */ \ -5: testb $MODS_WEAK, MODS_FLAG(%rax); /* weak? */ \ - je stubs_common_code; /* no, do mod load */ \ - movq MODS_RETFCN(%rax), %rax; /* yes, load retfcn */ \ - INDIRECT_JMP_REG(rax); /* yes, jump to retfcn */ \ - SET_SIZE(fcnname); \ - .data; \ - .align CPTRSIZE; \ - .type fcnname/**/_info, @object; \ -fcnname/**/_info: \ - .quad install_fcn; /* 0 */ \ - .quad module/**/_modinfo; /* 0x8 */ \ - .quad fcnname; /* 0x10 */ \ - .quad retfcn; /* 0x18 */ \ - .long weak; /* 0x20 */ \ - SET_SIZE(fcnname/**/_info) - -/* - * We branch here with the fcnname_info pointer in %rax - */ - ENTRY_NP(stubs_common_code) - .globl mod_hold_stub - .globl mod_release_stub - pushq %rbp - movq %rsp, %rbp - subq $0x10, %rsp - movq %r15, (%rsp) /* (caller saved) */ - movq %rax, %r15 /* stash the fcnname_info pointer */ - /* - * save incoming register arguments - */ - pushq %rdi - pushq %rsi - pushq %rdx - pushq %rcx - pushq %r8 - pushq %r9 - /* (next 4 args, if any, are already on the stack above %rbp) */ - movq %r15, %rdi - call mod_hold_stub /* mod_hold_stub(mod_stub_info *) */ - cmpl $-1, %eax /* error? */ - jne .L1 - movq 0x18(%r15), %rax - INDIRECT_CALL_REG(rax) - addq $0x30, %rsp - jmp .L2 -.L1: - /* - * copy MAXNARG == 10 incoming arguments - */ - popq %r9 - popq %r8 - popq %rcx - popq %rdx - popq %rsi - popq %rdi - /* - * stack: - * arg9 0x38(%rsp) - * arg8 0x30(%rsp) - * arg7 0x28(%rsp) - * arg6 0x20(%rsp) - * saved %rip 0x18(%rsp) - * saved %rbp 0x10(%rsp) - * <pad> 0x8(%rsp) - * saved %r15 0x0(%rsp) - */ - movl $MAXNARG - 6 + 3, %r11d - pushq (%rsp, %r11, 8) - pushq (%rsp, %r11, 8) - pushq (%rsp, %r11, 8) - pushq (%rsp, %r11, 8) - movq (%r15), %rax - INDIRECT_CALL_REG(rax) /* call the stub fn(arg, ..) */ - addq $0x20, %rsp /* pop off last 4 args */ - pushq %rax /* save any return values */ - pushq %rdx - movq %r15, %rdi - call mod_release_stub /* release hold on module */ - popq %rdx /* restore return values */ - popq %rax -.L2: - popq %r15 - leave - ret - SET_SIZE(stubs_common_code) - -#define STUB(module, fcnname, retfcn) \ - STUB_COMMON(module, fcnname, mod_hold_stub, retfcn, 0) - -/* - * "weak stub", don't load on account of this call - */ -#define WSTUB(module, fcnname, retfcn) \ - STUB_COMMON(module, fcnname, retfcn, retfcn, MODS_WEAK) - -/* - * "non-unloadable stub", don't bother 'holding' module if it's already loaded - * since the module cannot be unloaded. - * - * User *MUST* guarantee the module is not unloadable (no _fini routine). - */ -#define NO_UNLOAD_STUB(module, fcnname, retfcn) \ - STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD) - -/* - * "weak stub" for non-unloadable module, don't load on account of this call - */ -#define NO_UNLOAD_WSTUB(module, fcnname, retfcn) \ - STUB_NO_UNLOADABLE(module, fcnname, retfcn, retfcn, MODS_NOUNLOAD|MODS_WEAK) - -/* - * this is just a marker for the beginning area of text that contains stubs - */ - ENTRY_NP(stubs_base) - nop - -/* - * WARNING WARNING WARNING!!!!!! - * - * On the MODULE macro you MUST NOT use any spaces!!! They are - * significant to the preprocessor. With ansi c there is a way around this - * but for some reason (yet to be investigated) ansi didn't work for other - * reasons! - * - * When zero is used as the return function, the system will call - * panic if the stub can't be resolved. - */ - -/* - * Stubs for devfs. A non-unloadable module. - */ - -#ifndef DEVFS_MODULE - MODULE(devfs,fs); - NO_UNLOAD_STUB(devfs, devfs_clean, nomod_minus_one); - NO_UNLOAD_STUB(devfs, devfs_lookupname, nomod_minus_one); - NO_UNLOAD_STUB(devfs, devfs_walk, nomod_minus_one); - NO_UNLOAD_STUB(devfs, devfs_devpolicy, nomod_minus_one); - NO_UNLOAD_STUB(devfs, devfs_reset_perm, nomod_minus_one); - NO_UNLOAD_STUB(devfs, devfs_remdrv_cleanup, nomod_minus_one); - END_MODULE(devfs); -#endif - -#ifndef DEV_MODULE - MODULE(dev,fs); - NO_UNLOAD_STUB(dev, sdev_modctl_readdir, nomod_minus_one); - NO_UNLOAD_STUB(dev, sdev_modctl_readdir_free, nomod_minus_one); - NO_UNLOAD_STUB(dev, devname_filename_register, nomod_minus_one); - NO_UNLOAD_STUB(dev, sdev_modctl_devexists, nomod_minus_one); - NO_UNLOAD_STUB(dev, devname_profile_update, nomod_minus_one); - NO_UNLOAD_STUB(dev, sdev_devstate_change, nomod_minus_one); - NO_UNLOAD_STUB(dev, devvt_getvnodeops, nomod_minus_one); - NO_UNLOAD_STUB(dev, devpts_getvnodeops, nomod_zero); - END_MODULE(dev); -#endif - -/* - * Stubs for specfs. A non-unloadable module. - */ - -#ifndef SPEC_MODULE - MODULE(specfs,fs); - NO_UNLOAD_STUB(specfs, common_specvp, nomod_zero); - NO_UNLOAD_STUB(specfs, makectty, nomod_zero); - NO_UNLOAD_STUB(specfs, makespecvp, nomod_zero); - NO_UNLOAD_STUB(specfs, smark, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_segmap, nomod_einval); - NO_UNLOAD_STUB(specfs, specfind, nomod_zero); - NO_UNLOAD_STUB(specfs, specvp, nomod_zero); - NO_UNLOAD_STUB(specfs, devi_stillreferenced, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_getvnodeops, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_char_map, nomod_zero); - NO_UNLOAD_STUB(specfs, specvp_devfs, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_assoc_vp_with_devi, nomod_void); - NO_UNLOAD_STUB(specfs, spec_hold_devi_by_vp, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_snode_walk, nomod_void); - NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one); - NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero); - NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one); - NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one); - END_MODULE(specfs); -#endif - - -/* - * Stubs for sockfs. A non-unloadable module. - */ -#ifndef SOCK_MODULE - MODULE(sockfs,fs); - NO_UNLOAD_STUB(sockfs, so_socket, nomod_zero); - NO_UNLOAD_STUB(sockfs, so_socketpair, nomod_zero); - NO_UNLOAD_STUB(sockfs, bind, nomod_zero); - NO_UNLOAD_STUB(sockfs, listen, nomod_zero); - NO_UNLOAD_STUB(sockfs, accept, nomod_zero); - NO_UNLOAD_STUB(sockfs, connect, nomod_zero); - NO_UNLOAD_STUB(sockfs, shutdown, nomod_zero); - NO_UNLOAD_STUB(sockfs, recv, nomod_zero); - NO_UNLOAD_STUB(sockfs, recvfrom, nomod_zero); - NO_UNLOAD_STUB(sockfs, recvmsg, nomod_zero); - NO_UNLOAD_STUB(sockfs, send, nomod_zero); - NO_UNLOAD_STUB(sockfs, sendmsg, nomod_zero); - NO_UNLOAD_STUB(sockfs, sendto, nomod_zero); -#ifdef _SYSCALL32_IMPL - NO_UNLOAD_STUB(sockfs, recv32, nomod_zero); - NO_UNLOAD_STUB(sockfs, recvfrom32, nomod_zero); - NO_UNLOAD_STUB(sockfs, send32, nomod_zero); - NO_UNLOAD_STUB(sockfs, sendto32, nomod_zero); -#endif /* _SYSCALL32_IMPL */ - NO_UNLOAD_STUB(sockfs, getpeername, nomod_zero); - NO_UNLOAD_STUB(sockfs, getsockname, nomod_zero); - NO_UNLOAD_STUB(sockfs, getsockopt, nomod_zero); - NO_UNLOAD_STUB(sockfs, setsockopt, nomod_zero); - NO_UNLOAD_STUB(sockfs, sockconfig, nomod_zero); - NO_UNLOAD_STUB(sockfs, sock_getmsg, nomod_zero); - NO_UNLOAD_STUB(sockfs, sock_putmsg, nomod_zero); - NO_UNLOAD_STUB(sockfs, sosendfile64, nomod_zero); - NO_UNLOAD_STUB(sockfs, snf_segmap, nomod_einval); - NO_UNLOAD_STUB(sockfs, sock_getfasync, nomod_zero); - NO_UNLOAD_STUB(sockfs, nl7c_sendfilev, nomod_zero); - NO_UNLOAD_STUB(sockfs, sotpi_sototpi, nomod_zero); - NO_UNLOAD_STUB(sockfs, socket_sendmblk, nomod_zero); - NO_UNLOAD_STUB(sockfs, socket_setsockopt, nomod_zero); - END_MODULE(sockfs); -#endif - -/* - * IPsec stubs. - */ - -#ifndef IPSECAH_MODULE - MODULE(ipsecah,drv); - WSTUB(ipsecah, ipsec_construct_inverse_acquire, nomod_zero); - WSTUB(ipsecah, sadb_acquire, nomod_zero); - WSTUB(ipsecah, ipsecah_algs_changed, nomod_zero); - WSTUB(ipsecah, sadb_alg_update, nomod_zero); - WSTUB(ipsecah, sadb_unlinkassoc, nomod_zero); - WSTUB(ipsecah, sadb_insertassoc, nomod_zero); - WSTUB(ipsecah, ipsecah_in_assocfailure, nomod_zero); - WSTUB(ipsecah, sadb_set_lpkt, nomod_zero); - WSTUB(ipsecah, ipsecah_icmp_error, nomod_zero); - END_MODULE(ipsecah); -#endif - -#ifndef IPSECESP_MODULE - MODULE(ipsecesp,drv); - WSTUB(ipsecesp, ipsecesp_fill_defs, nomod_zero); - WSTUB(ipsecesp, ipsecesp_algs_changed, nomod_zero); - WSTUB(ipsecesp, ipsecesp_in_assocfailure, nomod_zero); - WSTUB(ipsecesp, ipsecesp_init_funcs, nomod_zero); - WSTUB(ipsecesp, ipsecesp_icmp_error, nomod_zero); - WSTUB(ipsecesp, ipsecesp_send_keepalive, nomod_zero); - END_MODULE(ipsecesp); -#endif - -#ifndef KEYSOCK_MODULE - MODULE(keysock, drv); - WSTUB(keysock, keysock_spdsock_wput_iocdata, nomod_void); - WSTUB(keysock, keysock_plumb_ipsec, nomod_zero); - WSTUB(keysock, keysock_extended_reg, nomod_zero); - WSTUB(keysock, keysock_next_seq, nomod_zero); - END_MODULE(keysock); -#endif - -#ifndef SPDSOCK_MODULE - MODULE(spdsock,drv); - WSTUB(spdsock, spdsock_update_pending_algs, nomod_zero); - END_MODULE(spdsock); -#endif - -/* - * Stubs for nfs common code. - * XXX nfs_getvnodeops should go away with removal of kludge in vnode.c - */ -#ifndef NFS_MODULE - MODULE(nfs,fs); - WSTUB(nfs, nfs_getvnodeops, nomod_zero); - WSTUB(nfs, nfs_perror, nomod_zero); - WSTUB(nfs, nfs_cmn_err, nomod_zero); - WSTUB(nfs, clcleanup_zone, nomod_zero); - WSTUB(nfs, clcleanup4_zone, nomod_zero); - END_MODULE(nfs); -#endif - - -/* - * Stubs for nfs_dlboot (diskless booting). - */ -#ifndef NFS_DLBOOT_MODULE - MODULE(nfs_dlboot,misc); - STUB(nfs_dlboot, mount_root, nomod_minus_one); - STUB(nfs_dlboot, dhcpinit, nomod_minus_one); - END_MODULE(nfs_dlboot); -#endif - -/* - * Stubs for nfs server-only code. - */ -#ifndef NFSSRV_MODULE - MODULE(nfssrv,misc); - STUB(nfssrv, exportfs, nomod_minus_one); - STUB(nfssrv, nfs_getfh, nomod_minus_one); - STUB(nfssrv, nfsl_flush, nomod_minus_one); - STUB(nfssrv, rfs4_check_delegated, nomod_zero); - STUB(nfssrv, mountd_args, nomod_minus_one); - NO_UNLOAD_STUB(nfssrv, rdma_start, nomod_zero); - NO_UNLOAD_STUB(nfssrv, nfs_svc, nomod_zero); - END_MODULE(nfssrv); -#endif - -/* - * Stubs for kernel lock manager. - */ -#ifndef KLM_MODULE - MODULE(klmmod,misc); - NO_UNLOAD_STUB(klmmod, lm_svc, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_shutdown, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_unexport, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_cprresume, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_cprsuspend, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_safelock, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_safemap, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_has_sleep, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_free_config, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_vp_active, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_get_sysid, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_rel_sysid, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_alloc_sysidt, nomod_minus_one); - NO_UNLOAD_STUB(klmmod, lm_free_sysidt, nomod_zero); - NO_UNLOAD_STUB(klmmod, lm_sysidt, nomod_minus_one); - END_MODULE(klmmod); -#endif - -#ifndef KLMOPS_MODULE - MODULE(klmops,misc); - NO_UNLOAD_STUB(klmops, lm_frlock, nomod_zero); - NO_UNLOAD_STUB(klmops, lm4_frlock, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_shrlock, nomod_zero); - NO_UNLOAD_STUB(klmops, lm4_shrlock, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_nlm_dispatch, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_nlm4_dispatch, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_nlm_reclaim, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_nlm4_reclaim, nomod_zero); - NO_UNLOAD_STUB(klmops, lm_register_lock_locally, nomod_zero); - END_MODULE(klmops); -#endif - -/* - * Stubs for kernel TLI module - * XXX currently we never allow this to unload - */ -#ifndef TLI_MODULE - MODULE(tlimod,misc); - NO_UNLOAD_STUB(tlimod, t_kopen, nomod_minus_one); - NO_UNLOAD_STUB(tlimod, t_kunbind, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kadvise, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_krcvudata, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_ksndudata, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kalloc, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kbind, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kclose, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kspoll, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_kfree, nomod_zero); - NO_UNLOAD_STUB(tlimod, t_koptmgmt, nomod_zero); - END_MODULE(tlimod); -#endif - -/* - * Stubs for kernel RPC module - * XXX currently we never allow this to unload - */ -#ifndef RPC_MODULE - MODULE(rpcmod,strmod); - NO_UNLOAD_STUB(rpcmod, clnt_tli_kcreate, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, svc_tli_kcreate, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, bindresvport, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, rdma_register_mod, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, rdma_unregister_mod, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, svc_queuereq, nomod_minus_one); - NO_UNLOAD_STUB(rpcmod, clist_add, nomod_minus_one); - END_MODULE(rpcmod); -#endif - -/* - * Stubs for des - */ -#ifndef DES_MODULE - MODULE(des,misc); - STUB(des, cbc_crypt, nomod_zero); - STUB(des, ecb_crypt, nomod_zero); - STUB(des, _des_crypt, nomod_zero); - END_MODULE(des); -#endif - -/* - * Stubs for procfs. A non-unloadable module. - */ -#ifndef PROC_MODULE - MODULE(procfs,fs); - NO_UNLOAD_STUB(procfs, prfree, nomod_zero); - NO_UNLOAD_STUB(procfs, prexit, nomod_zero); - NO_UNLOAD_STUB(procfs, prlwpfree, nomod_zero); - NO_UNLOAD_STUB(procfs, prlwpexit, nomod_zero); - NO_UNLOAD_STUB(procfs, prinvalidate, nomod_zero); - NO_UNLOAD_STUB(procfs, prnsegs, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetcred, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetpriv, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetprivsize, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetsecflags, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetstatus, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetlwpstatus, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetpsinfo, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetlwpsinfo, nomod_zero); - NO_UNLOAD_STUB(procfs, oprgetstatus, nomod_zero); - NO_UNLOAD_STUB(procfs, oprgetpsinfo, nomod_zero); -#ifdef _SYSCALL32_IMPL - NO_UNLOAD_STUB(procfs, prgetstatus32, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetlwpstatus32, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetpsinfo32, nomod_zero); - NO_UNLOAD_STUB(procfs, prgetlwpsinfo32, nomod_zero); - NO_UNLOAD_STUB(procfs, oprgetstatus32, nomod_zero); - NO_UNLOAD_STUB(procfs, oprgetpsinfo32, nomod_zero); - NO_UNLOAD_STUB(procfs, psinfo_kto32, nomod_zero); - NO_UNLOAD_STUB(procfs, lwpsinfo_kto32, nomod_zero); -#endif /* _SYSCALL32_IMPL */ - NO_UNLOAD_STUB(procfs, prnotify, nomod_zero); - NO_UNLOAD_STUB(procfs, prexecstart, nomod_zero); - NO_UNLOAD_STUB(procfs, prexecend, nomod_zero); - NO_UNLOAD_STUB(procfs, prrelvm, nomod_zero); - NO_UNLOAD_STUB(procfs, prbarrier, nomod_zero); - NO_UNLOAD_STUB(procfs, estimate_msacct, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_getprot, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_getprot_done, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_getsegsize, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_isobject, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_isself, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_allstopped, nomod_zero); - NO_UNLOAD_STUB(procfs, pr_free_watched_pages, nomod_zero); - END_MODULE(procfs); -#endif - -/* - * Stubs for fifofs - */ -#ifndef FIFO_MODULE - MODULE(fifofs,fs); - NO_UNLOAD_STUB(fifofs, fifovp, nomod_zero); - NO_UNLOAD_STUB(fifofs, fifo_getinfo, nomod_zero); - NO_UNLOAD_STUB(fifofs, fifo_vfastoff, nomod_zero); - END_MODULE(fifofs); -#endif - -/* - * Stubs for ufs - * - * This is needed to support the old quotactl system call. - * When the old sysent stuff goes away, this will need to be revisited. - */ -#ifndef UFS_MODULE - MODULE(ufs,fs); - STUB(ufs, quotactl, nomod_minus_one); - END_MODULE(ufs); -#endif - -/* - * Stubs for zfs - */ -#ifndef ZFS_MODULE - MODULE(zfs,fs); - STUB(zfs, dsl_prop_get, nomod_minus_one); - STUB(zfs, spa_boot_init, nomod_minus_one); - STUB(zfs, zfs_prop_to_name, nomod_zero); - END_MODULE(zfs); -#endif - -/* - * Stubs for dcfs - */ -#ifndef DCFS_MODULE - MODULE(dcfs,fs); - STUB(dcfs, decompvp, 0); - END_MODULE(dcfs); -#endif - -/* - * Stubs for namefs - */ -#ifndef NAMEFS_MODULE - MODULE(namefs,fs); - STUB(namefs, nm_unmountall, 0); - END_MODULE(namefs); -#endif - -/* - * Stubs for sysdc - */ -#ifndef SDC_MODULE - MODULE(SDC,sched); - NO_UNLOAD_STUB(SDC, sysdc_thread_enter, nomod_zero); - END_MODULE(SDC); -#endif - -/* - * Stubs for ts_dptbl - */ -#ifndef TS_DPTBL_MODULE - MODULE(TS_DPTBL,sched); - STUB(TS_DPTBL, ts_getdptbl, 0); - STUB(TS_DPTBL, ts_getkmdpris, 0); - STUB(TS_DPTBL, ts_getmaxumdpri, 0); - END_MODULE(TS_DPTBL); -#endif - -/* - * Stubs for rt_dptbl - */ -#ifndef RT_DPTBL_MODULE - MODULE(RT_DPTBL,sched); - STUB(RT_DPTBL, rt_getdptbl, 0); - END_MODULE(RT_DPTBL); -#endif - -/* - * Stubs for ia_dptbl - */ -#ifndef IA_DPTBL_MODULE - MODULE(IA_DPTBL,sched); - STUB(IA_DPTBL, ia_getdptbl, nomod_zero); - STUB(IA_DPTBL, ia_getkmdpris, nomod_zero); - STUB(IA_DPTBL, ia_getmaxumdpri, nomod_zero); - END_MODULE(IA_DPTBL); -#endif - -/* - * Stubs for FSS scheduler - */ -#ifndef FSS_MODULE - MODULE(FSS,sched); - WSTUB(FSS, fss_allocbuf, nomod_zero); - WSTUB(FSS, fss_freebuf, nomod_zero); - WSTUB(FSS, fss_changeproj, nomod_zero); - WSTUB(FSS, fss_changepset, nomod_zero); - END_MODULE(FSS); -#endif - -/* - * Stubs for fx_dptbl - */ -#ifndef FX_DPTBL_MODULE - MODULE(FX_DPTBL,sched); - STUB(FX_DPTBL, fx_getdptbl, 0); - STUB(FX_DPTBL, fx_getmaxumdpri, 0); - END_MODULE(FX_DPTBL); -#endif - -/* - * Stubs for bootdev - */ -#ifndef BOOTDEV_MODULE - MODULE(bootdev,misc); - STUB(bootdev, i_promname_to_devname, 0); - STUB(bootdev, i_convert_boot_device_name, 0); - END_MODULE(bootdev); -#endif - -/* - * stubs for strplumb... - */ -#ifndef STRPLUMB_MODULE - MODULE(strplumb,misc); - STUB(strplumb, strplumb, 0); - STUB(strplumb, strplumb_load, 0); - STUB(strplumb, strplumb_get_netdev_path, 0); - END_MODULE(strplumb); -#endif - -/* - * Stubs for console configuration module - */ -#ifndef CONSCONFIG_MODULE - MODULE(consconfig,misc); - STUB(consconfig, consconfig, 0); - STUB(consconfig, consconfig_get_usb_kb_path, 0); - STUB(consconfig, consconfig_get_usb_ms_path, 0); - STUB(consconfig, consconfig_get_plat_fbpath, 0); - STUB(consconfig, consconfig_console_is_ready, 0); - END_MODULE(consconfig); -#endif - -/* - * Stubs for accounting. - */ -#ifndef SYSACCT_MODULE - MODULE(sysacct,sys); - NO_UNLOAD_WSTUB(sysacct, acct, nomod_zero); - NO_UNLOAD_WSTUB(sysacct, acct_fs_in_use, nomod_zero); - END_MODULE(sysacct); -#endif - -/* - * Stubs for semaphore routines. sem.c - */ -#ifndef SEMSYS_MODULE - MODULE(semsys,sys); - NO_UNLOAD_WSTUB(semsys, semexit, nomod_zero); - END_MODULE(semsys); -#endif - -/* - * Stubs for shmem routines. shm.c - */ -#ifndef SHMSYS_MODULE - MODULE(shmsys,sys); - NO_UNLOAD_WSTUB(shmsys, shmexit, nomod_zero); - NO_UNLOAD_WSTUB(shmsys, shmfork, nomod_zero); - NO_UNLOAD_WSTUB(shmsys, shmgetid, nomod_minus_one); - END_MODULE(shmsys); -#endif - -/* - * Stubs for doors - */ -#ifndef DOOR_MODULE - MODULE(doorfs,sys); - NO_UNLOAD_WSTUB(doorfs, door_slam, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_exit, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_revoke_all, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_fork, nomod_zero); - NO_UNLOAD_STUB(doorfs, door_upcall, nomod_einval); - NO_UNLOAD_STUB(doorfs, door_ki_create, nomod_einval); - NO_UNLOAD_STUB(doorfs, door_ki_open, nomod_einval); - NO_UNLOAD_STUB(doorfs, door_ki_lookup, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_ki_upcall, nomod_einval); - NO_UNLOAD_WSTUB(doorfs, door_ki_upcall_limited, nomod_einval); - NO_UNLOAD_WSTUB(doorfs, door_ki_hold, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_ki_rele, nomod_zero); - NO_UNLOAD_WSTUB(doorfs, door_ki_info, nomod_einval); - END_MODULE(doorfs); -#endif - -/* - * Stubs for MD5 - */ -#ifndef MD5_MODULE - MODULE(md5,misc); - WSTUB(md5, MD5Init, nomod_zero); - WSTUB(md5, MD5Update, nomod_zero); - WSTUB(md5, MD5Final, nomod_zero); - END_MODULE(md5); -#endif - -/* - * Stubs for idmap - */ -#ifndef IDMAP_MODULE - MODULE(idmap,misc); - STUB(idmap, kidmap_batch_getgidbysid, nomod_zero); - STUB(idmap, kidmap_batch_getpidbysid, nomod_zero); - STUB(idmap, kidmap_batch_getsidbygid, nomod_zero); - STUB(idmap, kidmap_batch_getsidbyuid, nomod_zero); - STUB(idmap, kidmap_batch_getuidbysid, nomod_zero); - STUB(idmap, kidmap_get_create, nomod_zero); - STUB(idmap, kidmap_get_destroy, nomod_zero); - STUB(idmap, kidmap_get_mappings, nomod_zero); - STUB(idmap, kidmap_getgidbysid, nomod_zero); - STUB(idmap, kidmap_getpidbysid, nomod_zero); - STUB(idmap, kidmap_getsidbygid, nomod_zero); - STUB(idmap, kidmap_getsidbyuid, nomod_zero); - STUB(idmap, kidmap_getuidbysid, nomod_zero); - STUB(idmap, idmap_get_door, nomod_einval); - STUB(idmap, idmap_unreg_dh, nomod_einval); - STUB(idmap, idmap_reg_dh, nomod_einval); - STUB(idmap, idmap_purge_cache, nomod_einval); - END_MODULE(idmap); -#endif - -/* - * Stubs for auditing. - */ -#ifndef C2AUDIT_MODULE - MODULE(c2audit,sys); - NO_UNLOAD_STUB(c2audit, audit_init_module, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_start, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_finish, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit, nomod_zero); - NO_UNLOAD_STUB(c2audit, auditdoor, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_closef, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_core_start, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_core_finish, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_strputmsg, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_savepath, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_anchorpath, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_exit, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_exec, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_symlink, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_symlink_create, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_vncreate_start, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_vncreate_finish, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_enterprom, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_exitprom, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_chdirec, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_setf, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_sock, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_strgetmsg, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_ipc, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_ipcget, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_fdsend, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_fdrecv, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_priv, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_setppriv, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_psecflags, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_devpolicy, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_setfsat_path, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_cryptoadm, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_kssl, nomod_zero); - NO_UNLOAD_STUB(c2audit, audit_pf_policy, nomod_zero); - NO_UNLOAD_STUB(c2audit, au_doormsg, nomod_zero); - NO_UNLOAD_STUB(c2audit, au_uwrite, nomod_zero); - NO_UNLOAD_STUB(c2audit, au_to_arg32, nomod_zero); - NO_UNLOAD_STUB(c2audit, au_free_rec, nomod_zero); - END_MODULE(c2audit); -#endif - -/* - * Stubs for kernel rpc security service module - */ -#ifndef RPCSEC_MODULE - MODULE(rpcsec,misc); - NO_UNLOAD_STUB(rpcsec, sec_clnt_revoke, nomod_zero); - NO_UNLOAD_STUB(rpcsec, authkern_create, nomod_zero); - NO_UNLOAD_STUB(rpcsec, sec_svc_msg, nomod_zero); - NO_UNLOAD_STUB(rpcsec, sec_svc_control, nomod_zero); - END_MODULE(rpcsec); -#endif - -/* - * Stubs for rpc RPCSEC_GSS security service module - */ -#ifndef RPCSEC_GSS_MODULE - MODULE(rpcsec_gss,misc); - NO_UNLOAD_STUB(rpcsec_gss, __svcrpcsec_gss, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_getcred, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_callback, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secget, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secfree, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_seccreate, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_set_defaults, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_revauth, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_secpurge, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_cleanup, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_versions, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_max_data_length, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_svc_max_data_length, nomod_zero); - NO_UNLOAD_STUB(rpcsec_gss, rpc_gss_get_service_type, nomod_zero); - END_MODULE(rpcsec_gss); -#endif - -/* - * Stubs for PCI configurator module (misc/pcicfg). - */ -#ifndef PCICFG_MODULE - MODULE(pcicfg,misc); - STUB(pcicfg, pcicfg_configure, 0); - STUB(pcicfg, pcicfg_unconfigure, 0); - END_MODULE(pcicfg); -#endif - -/* - * Stubs for pcieb nexus driver. - */ -#ifndef PCIEB_MODULE - MODULE(pcieb,drv); - STUB(pcieb, pcieb_intel_error_workaround, 0); - END_MODULE(pcieb); -#endif - -#ifndef IWSCN_MODULE - MODULE(iwscn,drv); - STUB(iwscn, srpop, 0); - END_MODULE(iwscn); -#endif - -/* - * Stubs for checkpoint-resume module - */ -#ifndef CPR_MODULE - MODULE(cpr,misc); - STUB(cpr, cpr, 0); - END_MODULE(cpr); -#endif - -/* - * Stubs for kernel probes (tnf module). Not unloadable. - */ -#ifndef TNF_MODULE - MODULE(tnf,drv); - NO_UNLOAD_STUB(tnf, tnf_ref32_1, nomod_zero); - NO_UNLOAD_STUB(tnf, tnf_string_1, nomod_zero); - NO_UNLOAD_STUB(tnf, tnf_opaque_array_1, nomod_zero); - NO_UNLOAD_STUB(tnf, tnf_struct_tag_1, nomod_zero); - NO_UNLOAD_STUB(tnf, tnf_allocate, nomod_zero); - END_MODULE(tnf); -#endif - -/* - * Stubs for i86hvm bootstraping - */ -#ifndef HVM_BOOTSTRAP - MODULE(hvm_bootstrap,misc); - NO_UNLOAD_STUB(hvm_bootstrap, hvmboot_rootconf, nomod_zero); - END_MODULE(hvm_bootstrap); -#endif - -/* - * Clustering: stubs for bootstrapping. - */ -#ifndef CL_BOOTSTRAP - MODULE(cl_bootstrap,misc); - NO_UNLOAD_WSTUB(cl_bootstrap, clboot_modload, nomod_minus_one); - NO_UNLOAD_WSTUB(cl_bootstrap, clboot_loadrootmodules, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, clboot_rootconf, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, clboot_mountroot, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, clconf_init, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, clconf_get_nodeid, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, clconf_maximum_nodeid, nomod_zero); - NO_UNLOAD_WSTUB(cl_bootstrap, cluster, nomod_zero); - END_MODULE(cl_bootstrap); -#endif - -/* - * Clustering: stubs for cluster infrastructure. - */ -#ifndef CL_COMM_MODULE - MODULE(cl_comm,misc); - NO_UNLOAD_STUB(cl_comm, cladmin, nomod_minus_one); - END_MODULE(cl_comm); -#endif - -/* - * Clustering: stubs for global file system operations. - */ -#ifndef PXFS_MODULE - MODULE(pxfs,fs); - NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_read, nomod_zero); - NO_UNLOAD_WSTUB(pxfs, clpxfs_aio_write, nomod_zero); - NO_UNLOAD_WSTUB(pxfs, cl_flk_state_transition_notify, nomod_zero); - END_MODULE(pxfs); -#endif - -/* - * Stubs for kernel cryptographic framework module (misc/kcf). - */ -#ifndef KCF_MODULE - MODULE(kcf,misc); - NO_UNLOAD_STUB(kcf, crypto_mech2id, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_register_provider, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_unregister_provider, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_provider_notification, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_op_notification, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_kmflag, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_digest_key_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_decrypt_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_get_all_mech_info, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_check, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_check_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_derive, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_generate, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_generate_pair, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_unwrap, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_key_wrap, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_verify, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_verify_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_verify_decrypt_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_mac_decrypt_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_copy, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_create, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_destroy, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_find_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_find_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_find, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_get_attribute_value, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_get_size, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_object_set_attribute_value, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_session_close, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_session_login, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_session_logout, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_session_open, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_encrypt_mac_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_create_ctx_template, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_destroy_ctx_template, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_get_mech_list, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_free_mech_list, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_cancel_req, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_cancel_ctx, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_bufcall_alloc, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_bufcall_free, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_bufcall, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_unbufcall, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_notify_events, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_unnotify_events, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_get_provider, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_get_provinfo, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_release_provider, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_recover, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_recover_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_sign_recover_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_init, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_update, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_final, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_recover, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_recover_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, crypto_verify_recover_init_prov, nomod_minus_one); - NO_UNLOAD_STUB(kcf, random_add_entropy, nomod_minus_one); - NO_UNLOAD_STUB(kcf, random_add_pseudo_entropy, nomod_minus_one); - NO_UNLOAD_STUB(kcf, random_get_blocking_bytes, nomod_minus_one); - NO_UNLOAD_STUB(kcf, random_get_bytes, nomod_minus_one); - NO_UNLOAD_STUB(kcf, random_get_pseudo_bytes, nomod_minus_one); - END_MODULE(kcf); -#endif - -/* - * Stubs for sha1. A non-unloadable module. - */ -#ifndef SHA1_MODULE - MODULE(sha1,crypto); - NO_UNLOAD_STUB(sha1, SHA1Init, nomod_void); - NO_UNLOAD_STUB(sha1, SHA1Update, nomod_void); - NO_UNLOAD_STUB(sha1, SHA1Final, nomod_void); - END_MODULE(sha1); -#endif - -/* - * The following stubs are used by the mac module. - * Since dld already depends on mac, these - * stubs are needed to avoid circular dependencies. - */ -#ifndef DLD_MODULE - MODULE(dld,drv); - STUB(dld, dld_init_ops, nomod_void); - STUB(dld, dld_fini_ops, nomod_void); - STUB(dld, dld_devt_to_instance, nomod_minus_one); - STUB(dld, dld_autopush, nomod_minus_one); - STUB(dld, dld_ioc_register, nomod_einval); - STUB(dld, dld_ioc_unregister, nomod_void); - END_MODULE(dld); -#endif - -/* - * The following stubs are used by the mac module. - * Since dls already depends on mac, these - * stubs are needed to avoid circular dependencies. - */ -#ifndef DLS_MODULE - MODULE(dls,misc); - STUB(dls, dls_devnet_mac, nomod_zero); - STUB(dls, dls_devnet_hold_tmp, nomod_einval); - STUB(dls, dls_devnet_rele_tmp, nomod_void); - STUB(dls, dls_devnet_hold_link, nomod_einval); - STUB(dls, dls_devnet_rele_link, nomod_void); - STUB(dls, dls_devnet_prop_task_wait, nomod_void); - STUB(dls, dls_mgmt_get_linkid, nomod_einval); - STUB(dls, dls_devnet_macname2linkid, nomod_einval); - STUB(dls, dls_mgmt_get_linkinfo, nomod_einval); - END_MODULE(dls); -#endif - -#ifndef SOFTMAC_MODULE - MODULE(softmac,drv); - STUB(softmac, softmac_hold_device, nomod_einval); - STUB(softmac, softmac_rele_device, nomod_void); - STUB(softmac, softmac_recreate, nomod_void); - END_MODULE(softmac); -#endif - -#ifndef IPTUN_MODULE - MODULE(iptun,drv); - STUB(iptun, iptun_create, nomod_einval); - STUB(iptun, iptun_delete, nomod_einval); - STUB(iptun, iptun_set_policy, nomod_void) ; - END_MODULE(iptun); -#endif - -/* - * Stubs for dcopy, for Intel IOAT KAPIs - */ -#ifndef DCOPY_MODULE - MODULE(dcopy,misc); - NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void); - NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one); - NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one); - END_MODULE(dcopy); -#endif - -/* - * Stubs for acpica - */ -#ifndef ACPICA_MODULE - MODULE(acpica,misc); - NO_UNLOAD_STUB(acpica, AcpiOsReadPort, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiOsWritePort, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiInstallNotifyHandler, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiRemoveNotifyHandler, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiEvaluateObject, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiEvaluateObjectTyped, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiWriteBitRegister, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiReadBitRegister, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, AcpiOsFree, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, acpica_get_handle_cpu, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, acpica_get_global_FADT, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, acpica_write_cpupm_capabilities, - nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, __acpi_wbinvd, nomod_minus_one) ; - NO_UNLOAD_STUB(acpica, acpi_reset_system, nomod_minus_one) ; - END_MODULE(acpica); -#endif - -/* - * Stubs for acpidev - */ -#ifndef ACPIDEV_MODULE - MODULE(acpidev,misc); - NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ; - NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info, - nomod_minus_one) ; - END_MODULE(acpidev); -#endif - -#ifndef IPNET_MODULE - MODULE(ipnet,drv); - STUB(ipnet, ipnet_if_getdev, nomod_zero); - STUB(ipnet, ipnet_walk_if, nomod_zero); - END_MODULE(ipnet); -#endif - -#ifndef IOMMULIB_MODULE - MODULE(iommulib,misc); - STUB(iommulib, iommulib_nex_close, nomod_void); - END_MODULE(iommulib); -#endif - -/* - * Stubs for rootnex nexus driver. - */ -#ifndef ROOTNEX_MODULE - MODULE(rootnex,drv); - STUB(rootnex, immu_init, 0); - STUB(rootnex, immu_startup, 0); - STUB(rootnex, immu_physmem_update, 0); - END_MODULE(rootnex); -#endif - -/* - * Stubs for kernel socket, for iscsi - */ -#ifndef KSOCKET_MODULE - MODULE(ksocket, misc); - NO_UNLOAD_STUB(ksocket, ksocket_setsockopt, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_getsockopt, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_getpeername, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_getsockname, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_socket, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_bind, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_listen, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_accept, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_connect, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_recv, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_recvfrom, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_recvmsg, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_send, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_sendto, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_sendmsg, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_ioctl, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_setcallbacks, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_hold, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_rele, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_shutdown, nomod_minus_one); - NO_UNLOAD_STUB(ksocket, ksocket_close, nomod_minus_one); - END_MODULE(ksocket); -#endif - -/* - * Stubs for elfexec - */ -#ifndef ELFEXEC_MODULE - MODULE(elfexec,exec); - STUB(elfexec, elfexec, nomod_einval); - STUB(elfexec, mapexec_brand, nomod_einval); - STUB(elfexec, elf32exec, nomod_einval); - STUB(elfexec, mapexec32_brand, nomod_einval); - END_MODULE(elfexec); -#endif - -/* - * Stub(s) for APIX module. - */ -#ifndef APIX_MODULE - MODULE(apix,mach); - WSTUB(apix, apix_loaded, nomod_zero); - END_MODULE(apix); -#endif - -/* - * Stubs for ppt module (bhyve PCI passthrough driver) - */ -#ifndef PPT_MODULE - MODULE(ppt,drv); - WSTUB(ppt, ppt_unassign_all, nomod_zero); - WSTUB(ppt, ppt_map_mmio, nomod_einval); - WSTUB(ppt, ppt_unmap_mmio, nomod_einval); - WSTUB(ppt, ppt_setup_msi, nomod_einval); - WSTUB(ppt, ppt_setup_msix, nomod_einval); - WSTUB(ppt, ppt_disable_msix, nomod_einval); - WSTUB(ppt, ppt_assigned_devices, nomod_zero); - WSTUB(ppt, ppt_is_mmio, nomod_zero); - WSTUB(ppt, ppt_assign_device, nomod_einval); - WSTUB(ppt, ppt_unassign_device, nomod_einval); - WSTUB(ppt, ppt_get_limits, nomod_einval); - END_MODULE(ppt); -#endif - -/* - * this is just a marker for the area of text that contains stubs - */ - ENTRY_NP(stubs_end) - nop - diff --git a/usr/src/uts/intel/ia32/ml/ovbcopy.s b/usr/src/uts/intel/ia32/ml/ovbcopy.s deleted file mode 100644 index 0687e67e4b..0000000000 --- a/usr/src/uts/intel/ia32/ml/ovbcopy.s +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -/*- - * Copyright (c) 1993 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/amd64/amd64/support.S,v 1.102 2003/10/02 05:08:13 alc Exp $ - */ - -#include <sys/asm_linkage.h> - -/* - * Adapted from fbsd bcopy(). - * - * bcopy(src, dst, cnt) - * rdi, rsi, rdx - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 - */ - - ENTRY(ovbcopy) - xchgq %rsi,%rdi - movq %rdx,%rcx - - movq %rdi,%rax - subq %rsi,%rax - cmpq %rcx,%rax /* overlapping && src < dst? */ - jb reverse - - shrq $3,%rcx /* copy by 64-bit words */ - cld /* nope, copy forwards */ - rep - movsq - movq %rdx,%rcx - andq $7,%rcx /* any bytes left? */ - rep - movsb - ret - -reverse: - addq %rcx,%rdi /* copy backwards */ - addq %rcx,%rsi - decq %rdi - decq %rsi - andq $7,%rcx /* any fractional bytes? */ - std - rep - movsb - movq %rdx,%rcx /* copy remainder by 32-bit words */ - shrq $3,%rcx - subq $7,%rsi - subq $7,%rdi - rep - movsq - cld - ret - SET_SIZE(ovbcopy) - diff --git a/usr/src/uts/intel/ia32/ml/retpoline.s b/usr/src/uts/intel/ia32/ml/retpoline.s deleted file mode 100644 index a68d9504c1..0000000000 --- a/usr/src/uts/intel/ia32/ml/retpoline.s +++ /dev/null @@ -1,211 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - - .file "retpoline.s" - -/* - * This file implements the various hooks that are needed for retpolines and - * return stack buffer (RSB) stuffing. For more information, please see the - * 'Speculative Execution CPU Side Channel Security' section of the - * uts/i86pc/os/cpuid.c big theory statement. - */ - -#include <sys/asm_linkage.h> -#include <sys/x86_archext.h> - -#if defined(__amd64) - -/* - * This macro generates the default retpoline entry point that the compiler - * expects. It implements the expected retpoline form. - */ -#define RETPOLINE_MKTHUNK(reg) \ - ENTRY(__x86_indirect_thunk_/**/reg) \ - call 2f; \ -1: \ - pause; \ - lfence; \ - jmp 1b; \ -2: \ - movq %/**/reg, (%rsp); \ - ret; \ - SET_SIZE(__x86_indirect_thunk_/**/reg) - -/* - * This macro generates the default retpoline form. It exists in addition to the - * thunk so if we need to restore the default retpoline behavior to the thunk - * we can. - */ -#define RETPOLINE_MKGENERIC(reg) \ - ENTRY(__x86_indirect_thunk_gen_/**/reg) \ - call 2f; \ -1: \ - pause; \ - lfence; \ - jmp 1b; \ -2: \ - movq %/**/reg, (%rsp); \ - ret; \ - SET_SIZE(__x86_indirect_thunk_gen_/**/reg) - -/* - * This macro generates the AMD optimized form of a retpoline which will be used - * on systems where the lfence dispatch serializing behavior has been changed. - */ -#define RETPOLINE_MKLFENCE(reg) \ - ENTRY(__x86_indirect_thunk_amd_/**/reg) \ - lfence; \ - jmp *%/**/reg; \ - SET_SIZE(__x86_indirect_thunk_amd_/**/reg) - - -/* - * This macro generates the no-op form of the retpoline which will be used if we - * either need to disable retpolines because we have enhanced IBRS or because we - * have been asked to disable mitigations. - */ -#define RETPOLINE_MKJUMP(reg) \ - ENTRY(__x86_indirect_thunk_jmp_/**/reg) \ - jmp *%/**/reg; \ - SET_SIZE(__x86_indirect_thunk_jmp_/**/reg) - - RETPOLINE_MKTHUNK(rax) - RETPOLINE_MKTHUNK(rbx) - RETPOLINE_MKTHUNK(rcx) - RETPOLINE_MKTHUNK(rdx) - RETPOLINE_MKTHUNK(rdi) - RETPOLINE_MKTHUNK(rsi) - RETPOLINE_MKTHUNK(rbp) - RETPOLINE_MKTHUNK(r8) - RETPOLINE_MKTHUNK(r9) - RETPOLINE_MKTHUNK(r10) - RETPOLINE_MKTHUNK(r11) - RETPOLINE_MKTHUNK(r12) - RETPOLINE_MKTHUNK(r13) - RETPOLINE_MKTHUNK(r14) - RETPOLINE_MKTHUNK(r15) - - RETPOLINE_MKGENERIC(rax) - RETPOLINE_MKGENERIC(rbx) - RETPOLINE_MKGENERIC(rcx) - RETPOLINE_MKGENERIC(rdx) - RETPOLINE_MKGENERIC(rdi) - RETPOLINE_MKGENERIC(rsi) - RETPOLINE_MKGENERIC(rbp) - RETPOLINE_MKGENERIC(r8) - RETPOLINE_MKGENERIC(r9) - RETPOLINE_MKGENERIC(r10) - RETPOLINE_MKGENERIC(r11) - RETPOLINE_MKGENERIC(r12) - RETPOLINE_MKGENERIC(r13) - RETPOLINE_MKGENERIC(r14) - RETPOLINE_MKGENERIC(r15) - - RETPOLINE_MKLFENCE(rax) - RETPOLINE_MKLFENCE(rbx) - RETPOLINE_MKLFENCE(rcx) - RETPOLINE_MKLFENCE(rdx) - RETPOLINE_MKLFENCE(rdi) - RETPOLINE_MKLFENCE(rsi) - RETPOLINE_MKLFENCE(rbp) - RETPOLINE_MKLFENCE(r8) - RETPOLINE_MKLFENCE(r9) - RETPOLINE_MKLFENCE(r10) - RETPOLINE_MKLFENCE(r11) - RETPOLINE_MKLFENCE(r12) - RETPOLINE_MKLFENCE(r13) - RETPOLINE_MKLFENCE(r14) - RETPOLINE_MKLFENCE(r15) - - RETPOLINE_MKJUMP(rax) - RETPOLINE_MKJUMP(rbx) - RETPOLINE_MKJUMP(rcx) - RETPOLINE_MKJUMP(rdx) - RETPOLINE_MKJUMP(rdi) - RETPOLINE_MKJUMP(rsi) - RETPOLINE_MKJUMP(rbp) - RETPOLINE_MKJUMP(r8) - RETPOLINE_MKJUMP(r9) - RETPOLINE_MKJUMP(r10) - RETPOLINE_MKJUMP(r11) - RETPOLINE_MKJUMP(r12) - RETPOLINE_MKJUMP(r13) - RETPOLINE_MKJUMP(r14) - RETPOLINE_MKJUMP(r15) - - /* - * The x86_rsb_stuff function is called from pretty arbitrary - * contexts. It's much easier for us to save and restore all the - * registers we touch rather than clobber them for callers. You must - * preserve this property or the system will panic at best. - */ - ENTRY(x86_rsb_stuff) - /* - * These nops are present so we can patch a ret instruction if we need - * to disable RSB stuffing because enhanced IBRS is present or we're - * disabling mitigations. - */ - nop - nop - pushq %rdi - pushq %rax - movl $16, %edi - movq %rsp, %rax -rsb_loop: - call 2f -1: - pause - call 1b -2: - call 2f -1: - pause - call 1b -2: - subl $1, %edi - jnz rsb_loop - movq %rax, %rsp - popq %rax - popq %rdi - ret - SET_SIZE(x86_rsb_stuff) - -#elif defined(__i386) - -/* - * While the kernel is 64-bit only, dboot is still 32-bit, so there are a - * limited number of variants that are used for 32-bit. However as dboot is - * short lived and uses them sparingly, we only do the full variant and do not - * have an AMD specific version. - */ - -#define RETPOLINE_MKTHUNK(reg) \ - ENTRY(__x86_indirect_thunk_/**/reg) \ - call 2f; \ -1: \ - pause; \ - lfence; \ - jmp 1b; \ -2: \ - movl %/**/reg, (%esp); \ - ret; \ - SET_SIZE(__x86_indirect_thunk_/**/reg) - - RETPOLINE_MKTHUNK(edi) - RETPOLINE_MKTHUNK(eax) - -#else -#error "Your architecture is in another castle." -#endif diff --git a/usr/src/uts/intel/ia32/ml/sseblk.s b/usr/src/uts/intel/ia32/ml/sseblk.s deleted file mode 100644 index 836b6b6c97..0000000000 --- a/usr/src/uts/intel/ia32/ml/sseblk.s +++ /dev/null @@ -1,280 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#include <sys/asm_linkage.h> -#include <sys/regset.h> -#include <sys/privregs.h> - -#include "assym.h" - -/* - * Do block operations using Streaming SIMD extensions - */ - -#if defined(DEBUG) -#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ - movq %gs:CPU_THREAD, t; \ - movsbl T_PREEMPT(t), r32; \ - testl r32, r32; \ - jne 5f; \ - pushq %rbp; \ - movq %rsp, %rbp; \ - leaq msg(%rip), %rdi; \ - xorl %eax, %eax; \ - call panic; \ -5: -#else /* DEBUG */ -#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) -#endif /* DEBUG */ - -#define BLOCKSHIFT 6 -#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ -#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ - -#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) -#error "mucked up constants" -#endif - -#define SAVE_XMM0(r) \ - SAVE_XMM_PROLOG(r, 1); \ - movdqa %xmm0, (r) - -#define ZERO_LOOP_INIT_XMM(dst) \ - pxor %xmm0, %xmm0 - -#define ZERO_LOOP_BODY_XMM(dst, cnt) \ - movntdq %xmm0, (dst); \ - movntdq %xmm0, 0x10(dst); \ - movntdq %xmm0, 0x20(dst); \ - movntdq %xmm0, 0x30(dst); \ - addq $BLOCKSIZE, dst; \ - subq $1, cnt - -#define ZERO_LOOP_FINI_XMM(dst) \ - mfence - -#define RSTOR_XMM0(r) \ - movdqa 0x0(r), %xmm0; \ - RSTOR_XMM_EPILOG(r, 1) - - /* - * %rdi dst - * %rsi size - * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) - * %r8 pointer to %xmm register save area - */ - ENTRY(hwblkclr) - pushq %rbp - movq %rsp, %rbp - testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ - jne .dobzero - cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ - jl .dobzero - testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ - jne .dobzero - shrq $BLOCKSHIFT, %rsi - - ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) - movq %cr0, %rax - clts - testl $CR0_TS, %eax - jnz 1f - - SAVE_XMM0(%r8) -1: ZERO_LOOP_INIT_XMM(%rdi) -9: ZERO_LOOP_BODY_XMM(%rdi, %rsi) - jnz 9b - ZERO_LOOP_FINI_XMM(%rdi) - - testl $CR0_TS, %eax - jnz 2f - RSTOR_XMM0(%r8) -2: movq %rax, %cr0 - leave - ret -.dobzero: - leave - jmp bzero - SET_SIZE(hwblkclr) - - -#define PREFETCH_START(src) \ - prefetchnta 0x0(src); \ - prefetchnta 0x40(src) - -#define SAVE_XMMS(r) \ - SAVE_XMM_PROLOG(r, 8); \ - movdqa %xmm0, (r); \ - movdqa %xmm1, 0x10(r); \ - movdqa %xmm2, 0x20(r); \ - movdqa %xmm3, 0x30(r); \ - movdqa %xmm4, 0x40(r); \ - movdqa %xmm5, 0x50(r); \ - movdqa %xmm6, 0x60(r); \ - movdqa %xmm7, 0x70(r) - -#define COPY_LOOP_INIT_XMM(src) \ - prefetchnta 0x80(src); \ - prefetchnta 0xc0(src); \ - movdqa 0x0(src), %xmm0; \ - movdqa 0x10(src), %xmm1; \ - movdqa 0x20(src), %xmm2; \ - movdqa 0x30(src), %xmm3; \ - movdqa 0x40(src), %xmm4; \ - movdqa 0x50(src), %xmm5; \ - movdqa 0x60(src), %xmm6; \ - movdqa 0x70(src), %xmm7; \ - addq $0x80, src - -#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ - prefetchnta 0x80(src); \ - prefetchnta 0xc0(src); \ - prefetchnta 0x100(src); \ - prefetchnta 0x140(src); \ - movntdq %xmm0, (dst); \ - movntdq %xmm1, 0x10(dst); \ - movntdq %xmm2, 0x20(dst); \ - movntdq %xmm3, 0x30(dst); \ - movdqa 0x0(src), %xmm0; \ - movdqa 0x10(src), %xmm1; \ - movntdq %xmm4, 0x40(dst); \ - movntdq %xmm5, 0x50(dst); \ - movdqa 0x20(src), %xmm2; \ - movdqa 0x30(src), %xmm3; \ - movntdq %xmm6, 0x60(dst); \ - movntdq %xmm7, 0x70(dst); \ - movdqa 0x40(src), %xmm4; \ - movdqa 0x50(src), %xmm5; \ - addq $0x80, dst; \ - movdqa 0x60(src), %xmm6; \ - movdqa 0x70(src), %xmm7; \ - addq $0x80, src; \ - subl $1, cnt - -#define COPY_LOOP_FINI_XMM(dst) \ - movntdq %xmm0, 0x0(dst); \ - movntdq %xmm1, 0x10(dst); \ - movntdq %xmm2, 0x20(dst); \ - movntdq %xmm3, 0x30(dst); \ - movntdq %xmm4, 0x40(dst); \ - movntdq %xmm5, 0x50(dst); \ - movntdq %xmm6, 0x60(dst); \ - movntdq %xmm7, 0x70(dst) - -#define RSTOR_XMMS(r) \ - movdqa 0x0(r), %xmm0; \ - movdqa 0x10(r), %xmm1; \ - movdqa 0x20(r), %xmm2; \ - movdqa 0x30(r), %xmm3; \ - movdqa 0x40(r), %xmm4; \ - movdqa 0x50(r), %xmm5; \ - movdqa 0x60(r), %xmm6; \ - movdqa 0x70(r), %xmm7; \ - RSTOR_XMM_EPILOG(r, 8) - - /* - * %rdi src - * %rsi dst - * %rdx #if DEBUG then curthread - * %ecx loop count - * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) - * %r8 pointer to %xmm register save area - */ - ENTRY(hwblkpagecopy) - pushq %rbp - movq %rsp, %rbp - PREFETCH_START(%rdi) - /* - * PAGESIZE is 4096, each loop moves 128 bytes, but the initial - * load and final store save us on loop count - */ - movl $_CONST(32 - 1), %ecx - ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) - movq %cr0, %rax - clts - testl $CR0_TS, %eax - jnz 3f - SAVE_XMMS(%r8) -3: COPY_LOOP_INIT_XMM(%rdi) -4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) - jnz 4b - COPY_LOOP_FINI_XMM(%rsi) - testl $CR0_TS, %eax - jnz 5f - RSTOR_XMMS(%r8) -5: movq %rax, %cr0 - mfence - leave - ret - SET_SIZE(hwblkpagecopy) - - ENTRY(block_zero_no_xmm) - pushq %rbp - movq %rsp, %rbp - xorl %eax, %eax - addq %rsi, %rdi - negq %rsi -1: - movnti %rax, (%rdi, %rsi) - movnti %rax, 8(%rdi, %rsi) - movnti %rax, 16(%rdi, %rsi) - movnti %rax, 24(%rdi, %rsi) - addq $32, %rsi - jnz 1b - mfence - leave - ret - SET_SIZE(block_zero_no_xmm) - - - ENTRY(page_copy_no_xmm) - movq $MMU_STD_PAGESIZE, %rcx - addq %rcx, %rdi - addq %rcx, %rsi - negq %rcx -1: - movq (%rsi, %rcx), %rax - movnti %rax, (%rdi, %rcx) - movq 8(%rsi, %rcx), %rax - movnti %rax, 8(%rdi, %rcx) - movq 16(%rsi, %rcx), %rax - movnti %rax, 16(%rdi, %rcx) - movq 24(%rsi, %rcx), %rax - movnti %rax, 24(%rdi, %rcx) - addq $32, %rcx - jnz 1b - mfence - ret - SET_SIZE(page_copy_no_xmm) - -#if defined(DEBUG) - .text -.not_disabled: - .string "sseblk: preemption not disabled!" -#endif diff --git a/usr/src/uts/intel/ia32/ml/swtch.s b/usr/src/uts/intel/ia32/ml/swtch.s deleted file mode 100644 index b1d577a2a4..0000000000 --- a/usr/src/uts/intel/ia32/ml/swtch.s +++ /dev/null @@ -1,509 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2020 Joyent, Inc. - */ - -/* - * Process switching routines. - */ - -#include <sys/asm_linkage.h> -#include <sys/asm_misc.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/stack.h> -#include <sys/segments.h> -#include <sys/psw.h> - -#include "assym.h" - -/* - * resume(thread_id_t t); - * - * a thread can only run on one processor at a time. there - * exists a window on MPs where the current thread on one - * processor is capable of being dispatched by another processor. - * some overlap between outgoing and incoming threads can happen - * when they are the same thread. in this case where the threads - * are the same, resume() on one processor will spin on the incoming - * thread until resume() on the other processor has finished with - * the outgoing thread. - * - * The MMU context changes when the resuming thread resides in a different - * process. Kernel threads are known by resume to reside in process 0. - * The MMU context, therefore, only changes when resuming a thread in - * a process different from curproc. - * - * resume_from_intr() is called when the thread being resumed was not - * passivated by resume (e.g. was interrupted). This means that the - * resume lock is already held and that a restore context is not needed. - * Also, the MMU context is not changed on the resume in this case. - * - * resume_from_zombie() is the same as resume except the calling thread - * is a zombie and must be put on the deathrow list after the CPU is - * off the stack. - */ - -#if LWP_PCB_FPU != 0 -#error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work -#endif /* LWP_PCB_FPU != 0 */ - -/* - * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) - * - * The stack frame must be created before the save of %rsp so that tracebacks - * of swtch()ed-out processes show the process as having last called swtch(). - */ -#define SAVE_REGS(thread_t, retaddr) \ - movq %rbp, T_RBP(thread_t); \ - movq %rbx, T_RBX(thread_t); \ - movq %r12, T_R12(thread_t); \ - movq %r13, T_R13(thread_t); \ - movq %r14, T_R14(thread_t); \ - movq %r15, T_R15(thread_t); \ - pushq %rbp; \ - movq %rsp, %rbp; \ - movq %rsp, T_SP(thread_t); \ - movq retaddr, T_PC(thread_t); \ - movq %rdi, %r12; \ - call __dtrace_probe___sched_off__cpu - -/* - * Restore non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) - * - * We load up %rsp from the label_t as part of the context switch, so - * we don't repeat that here. - * - * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t - * already has the effect of putting the stack back the way it was when - * we came in. - */ -#define RESTORE_REGS(scratch_reg) \ - movq %gs:CPU_THREAD, scratch_reg; \ - movq T_RBP(scratch_reg), %rbp; \ - movq T_RBX(scratch_reg), %rbx; \ - movq T_R12(scratch_reg), %r12; \ - movq T_R13(scratch_reg), %r13; \ - movq T_R14(scratch_reg), %r14; \ - movq T_R15(scratch_reg), %r15 - -/* - * Get pointer to a thread's hat structure - */ -#define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ - movq T_PROCP(thread_t), hatp; \ - movq P_AS(hatp), scratch_reg; \ - movq A_HAT(scratch_reg), hatp - -#define TSC_READ() \ - call tsc_read; \ - movq %rax, %r14; - -/* - * If we are resuming an interrupt thread, store a timestamp in the thread - * structure. If an interrupt occurs between tsc_read() and its subsequent - * store, the timestamp will be stale by the time it is stored. We can detect - * this by doing a compare-and-swap on the thread's timestamp, since any - * interrupt occurring in this window will put a new timestamp in the thread's - * t_intr_start field. - */ -#define STORE_INTR_START(thread_t) \ - testw $T_INTR_THREAD, T_FLAGS(thread_t); \ - jz 1f; \ -0: \ - TSC_READ(); \ - movq T_INTR_START(thread_t), %rax; \ - cmpxchgq %r14, T_INTR_START(thread_t); \ - jnz 0b; \ -1: - - .global kpti_enable - - ENTRY(resume) - movq %gs:CPU_THREAD, %rax - leaq resume_return(%rip), %r11 - - /* - * Deal with SMAP here. A thread may be switched out at any point while - * it is executing. The thread could be under on_fault() or it could be - * pre-empted while performing a copy interruption. If this happens and - * we're not in the context of an interrupt which happens to handle - * saving and restoring rflags correctly, we may lose our SMAP related - * state. - * - * To handle this, as part of being switched out, we first save whether - * or not userland access is allowed ($PS_ACHK in rflags) and store that - * in t_useracc on the kthread_t and unconditionally enable SMAP to - * protect the system. - * - * Later, when the thread finishes resuming, we potentially disable smap - * if PS_ACHK was present in rflags. See uts/intel/ia32/ml/copy.s for - * more information on rflags and SMAP. - */ - pushfq - popq %rsi - andq $PS_ACHK, %rsi - movq %rsi, T_USERACC(%rax) - call smap_enable - - /* - * Take a moment to potentially clear the RSB buffer. This is done to - * prevent various Spectre variant 2 and SpectreRSB attacks. This may - * not be sufficient. Please see uts/intel/ia32/ml/retpoline.s for more - * information about this. - */ - call x86_rsb_stuff - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_return. - * - * %r12 = t (new thread) when done - */ - SAVE_REGS(%rax, %r11) - - - LOADCPU(%r15) /* %r15 = CPU */ - movq CPU_THREAD(%r15), %r13 /* %r13 = curthread */ - - /* - * Call savectx if thread has installed context ops. - * - * Note that if we have floating point context, the save op - * (either fpsave_begin or fpxsave_begin) will issue the - * async save instruction (fnsave or fxsave respectively) - * that we fwait for below. - */ - cmpq $0, T_CTX(%r13) /* should current thread savectx? */ - je .nosavectx /* skip call when zero */ - - movq %r13, %rdi /* arg = thread pointer */ - call savectx /* call ctx ops */ -.nosavectx: - - /* - * Check that the curthread is not using the FPU while in the kernel. - */ - call kernel_fpu_no_swtch - - /* - * Call savepctx if process has installed context ops. - */ - movq T_PROCP(%r13), %r14 /* %r14 = proc */ - cmpq $0, P_PCTX(%r14) /* should current thread savepctx? */ - je .nosavepctx /* skip call when zero */ - - movq %r14, %rdi /* arg = proc pointer */ - call savepctx /* call ctx ops */ -.nosavepctx: - - /* - * Temporarily switch to the idle thread's stack - */ - movq CPU_IDLE_THREAD(%r15), %rax /* idle thread pointer */ - - /* - * Set the idle thread as the current thread - */ - movq T_SP(%rax), %rsp /* It is safe to set rsp */ - movq %rax, CPU_THREAD(%r15) - - /* - * Switch in the hat context for the new thread - * - */ - GET_THREAD_HATP(%rdi, %r12, %r11) - call hat_switch - - /* - * Clear and unlock previous thread's t_lock - * to allow it to be dispatched by another processor. - */ - movb $0, T_LOCK(%r13) - - /* - * IMPORTANT: Registers at this point must be: - * %r12 = new thread - * - * Here we are in the idle thread, have dropped the old thread. - */ - ALTENTRY(_resume_from_idle) - /* - * spin until dispatched thread's mutex has - * been unlocked. this mutex is unlocked when - * it becomes safe for the thread to run. - */ -.lock_thread_mutex: - lock - btsl $0, T_LOCK(%r12) /* attempt to lock new thread's mutex */ - jnc .thread_mutex_locked /* got it */ - -.spin_thread_mutex: - pause - cmpb $0, T_LOCK(%r12) /* check mutex status */ - jz .lock_thread_mutex /* clear, retry lock */ - jmp .spin_thread_mutex /* still locked, spin... */ - -.thread_mutex_locked: - /* - * Fix CPU structure to indicate new running thread. - * Set pointer in new thread to the CPU structure. - */ - LOADCPU(%r13) /* load current CPU pointer */ - cmpq %r13, T_CPU(%r12) - je .setup_cpu - - /* cp->cpu_stats.sys.cpumigrate++ */ - incq CPU_STATS_SYS_CPUMIGRATE(%r13) - movq %r13, T_CPU(%r12) /* set new thread's CPU pointer */ - -.setup_cpu: - /* - * Setup rsp0 (kernel stack) in TSS to curthread's saved regs - * structure. If this thread doesn't have a regs structure above - * the stack -- that is, if lwp_stk_init() was never called for the - * thread -- this will set rsp0 to the wrong value, but it's harmless - * as it's a kernel thread, and it won't actually attempt to implicitly - * use the rsp0 via a privilege change. - * - * Note that when we have KPTI enabled on amd64, we never use this - * value at all (since all the interrupts have an IST set). - */ - movq CPU_TSS(%r13), %r14 -#if !defined(__xpv) - cmpq $1, kpti_enable - jne 1f - leaq CPU_KPTI_TR_RSP(%r13), %rax - jmp 2f -1: - movq T_STACK(%r12), %rax - addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ -2: - movq %rax, TSS_RSP0(%r14) -#else - movq T_STACK(%r12), %rax - addq $REGSIZE+MINFRAME, %rax /* to the bottom of thread stack */ - movl $KDS_SEL, %edi - movq %rax, %rsi - call HYPERVISOR_stack_switch -#endif /* __xpv */ - - movq %r12, CPU_THREAD(%r13) /* set CPU's thread pointer */ - mfence /* synchronize with mutex_exit() */ - xorl %ebp, %ebp /* make $<threadlist behave better */ - movq T_LWP(%r12), %rax /* set associated lwp to */ - movq %rax, CPU_LWP(%r13) /* CPU's lwp ptr */ - - movq T_SP(%r12), %rsp /* switch to outgoing thread's stack */ - movq T_PC(%r12), %r13 /* saved return addr */ - - /* - * Call restorectx if context ops have been installed. - */ - cmpq $0, T_CTX(%r12) /* should resumed thread restorectx? */ - jz .norestorectx /* skip call when zero */ - movq %r12, %rdi /* arg = thread pointer */ - call restorectx /* call ctx ops */ -.norestorectx: - - /* - * Call restorepctx if context ops have been installed for the proc. - */ - movq T_PROCP(%r12), %rcx - cmpq $0, P_PCTX(%rcx) - jz .norestorepctx - movq %rcx, %rdi - call restorepctx -.norestorepctx: - - STORE_INTR_START(%r12) - - /* - * If we came into swtch with the ability to access userland pages, go - * ahead and restore that fact by disabling SMAP. Clear the indicator - * flag out of paranoia. - */ - movq T_USERACC(%r12), %rax /* should we disable smap? */ - cmpq $0, %rax /* skip call when zero */ - jz .nosmap - xorq %rax, %rax - movq %rax, T_USERACC(%r12) - call smap_disable -.nosmap: - - call smt_mark - - /* - * Restore non-volatile registers, then have spl0 return to the - * resuming thread's PC after first setting the priority as low as - * possible and blocking all interrupt threads that may be active. - */ - movq %r13, %rax /* save return address */ - RESTORE_REGS(%r11) - pushq %rax /* push return address for spl0() */ - call __dtrace_probe___sched_on__cpu - jmp spl0 - -resume_return: - /* - * Remove stack frame created in SAVE_REGS() - */ - addq $CLONGSIZE, %rsp - ret - SET_SIZE(_resume_from_idle) - SET_SIZE(resume) - - ENTRY(resume_from_zombie) - movq %gs:CPU_THREAD, %rax - leaq resume_from_zombie_return(%rip), %r11 - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_from_zombie_return. - * - * %r12 = t (new thread) when done - */ - SAVE_REGS(%rax, %r11) - - movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ - - /* clean up the fp unit. It might be left enabled */ - -#if defined(__xpv) /* XXPV XXtclayton */ - /* - * Remove this after bringup. - * (Too many #gp's for an instrumented hypervisor.) - */ - STTS(%rax) -#else - movq %cr0, %rax - testq $CR0_TS, %rax - jnz .zfpu_disabled /* if TS already set, nothing to do */ - fninit /* init fpu & discard pending error */ - orq $CR0_TS, %rax - movq %rax, %cr0 -.zfpu_disabled: - -#endif /* __xpv */ - - /* - * Temporarily switch to the idle thread's stack so that the zombie - * thread's stack can be reclaimed by the reaper. - */ - movq %gs:CPU_IDLE_THREAD, %rax /* idle thread pointer */ - movq T_SP(%rax), %rsp /* get onto idle thread stack */ - - /* - * Sigh. If the idle thread has never run thread_start() - * then t_sp is mis-aligned by thread_load(). - */ - andq $_BITNOT(STACK_ALIGN-1), %rsp - - /* - * Set the idle thread as the current thread. - */ - movq %rax, %gs:CPU_THREAD - - /* switch in the hat context for the new thread */ - GET_THREAD_HATP(%rdi, %r12, %r11) - call hat_switch - - /* - * Put the zombie on death-row. - */ - movq %r13, %rdi - call reapq_add - - jmp _resume_from_idle /* finish job of resume */ - -resume_from_zombie_return: - RESTORE_REGS(%r11) /* restore non-volatile registers */ - call __dtrace_probe___sched_on__cpu - - /* - * Remove stack frame created in SAVE_REGS() - */ - addq $CLONGSIZE, %rsp - ret - SET_SIZE(resume_from_zombie) - - ENTRY(resume_from_intr) - movq %gs:CPU_THREAD, %rax - leaq resume_from_intr_return(%rip), %r11 - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_from_intr_return. - * - * %r12 = t (new thread) when done - */ - SAVE_REGS(%rax, %r11) - - movq %gs:CPU_THREAD, %r13 /* %r13 = curthread */ - movq %r12, %gs:CPU_THREAD /* set CPU's thread pointer */ - mfence /* synchronize with mutex_exit() */ - movq T_SP(%r12), %rsp /* restore resuming thread's sp */ - xorl %ebp, %ebp /* make $<threadlist behave better */ - - /* - * Unlock outgoing thread's mutex dispatched by another processor. - */ - xorl %eax, %eax - xchgb %al, T_LOCK(%r13) - - STORE_INTR_START(%r12) - - call smt_mark - - /* - * Restore non-volatile registers, then have spl0 return to the - * resuming thread's PC after first setting the priority as low as - * possible and blocking all interrupt threads that may be active. - */ - movq T_PC(%r12), %rax /* saved return addr */ - RESTORE_REGS(%r11); - pushq %rax /* push return address for spl0() */ - call __dtrace_probe___sched_on__cpu - jmp spl0 - -resume_from_intr_return: - /* - * Remove stack frame created in SAVE_REGS() - */ - addq $CLONGSIZE, %rsp - ret - SET_SIZE(resume_from_intr) - - ENTRY(thread_start) - popq %rax /* start() */ - popq %rdi /* arg */ - popq %rsi /* len */ - movq %rsp, %rbp - INDIRECT_CALL_REG(rax) - call thread_exit /* destroy thread if it returns. */ - /*NOTREACHED*/ - SET_SIZE(thread_start) diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c deleted file mode 100644 index 14d20bb487..0000000000 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ /dev/null @@ -1,1240 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ -/* - * Copyright (c) 2018, Joyent, Inc. - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. - */ - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/vmparam.h> -#include <sys/systm.h> -#include <sys/signal.h> -#include <sys/stack.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/frame.h> -#include <sys/proc.h> -#include <sys/psw.h> -#include <sys/siginfo.h> -#include <sys/cpuvar.h> -#include <sys/asm_linkage.h> -#include <sys/kmem.h> -#include <sys/errno.h> -#include <sys/bootconf.h> -#include <sys/archsystm.h> -#include <sys/debug.h> -#include <sys/elf.h> -#include <sys/spl.h> -#include <sys/time.h> -#include <sys/atomic.h> -#include <sys/sysmacros.h> -#include <sys/cmn_err.h> -#include <sys/modctl.h> -#include <sys/kobj.h> -#include <sys/panic.h> -#include <sys/reboot.h> -#include <sys/time.h> -#include <sys/fp.h> -#include <sys/x86_archext.h> -#include <sys/auxv.h> -#include <sys/auxv_386.h> -#include <sys/dtrace.h> -#include <sys/brand.h> -#include <sys/machbrand.h> -#include <sys/cmn_err.h> - -/* - * Map an fnsave-formatted save area into an fxsave-formatted save area. - * - * Most fields are the same width, content and semantics. However - * the tag word is compressed. - */ -static void -fnsave_to_fxsave(const struct fnsave_state *fn, struct fxsave_state *fx) -{ - uint_t i, tagbits; - - fx->fx_fcw = fn->f_fcw; - fx->fx_fsw = fn->f_fsw; - - /* - * copy element by element (because of holes) - */ - for (i = 0; i < 8; i++) - bcopy(&fn->f_st[i].fpr_16[0], &fx->fx_st[i].fpr_16[0], - sizeof (fn->f_st[0].fpr_16)); /* 80-bit x87-style floats */ - - /* - * synthesize compressed tag bits - */ - fx->fx_fctw = 0; - for (tagbits = fn->f_ftw, i = 0; i < 8; i++, tagbits >>= 2) - if ((tagbits & 3) != 3) - fx->fx_fctw |= (1 << i); - - fx->fx_fop = fn->f_fop; - - fx->fx_rip = (uint64_t)fn->f_eip; - fx->fx_rdp = (uint64_t)fn->f_dp; -} - -/* - * Map from an fxsave-format save area to an fnsave-format save area. - */ -static void -fxsave_to_fnsave(const struct fxsave_state *fx, struct fnsave_state *fn) -{ - uint_t i, top, tagbits; - - fn->f_fcw = fx->fx_fcw; - fn->__f_ign0 = 0; - fn->f_fsw = fx->fx_fsw; - fn->__f_ign1 = 0; - - top = (fx->fx_fsw & FPS_TOP) >> 11; - - /* - * copy element by element (because of holes) - */ - for (i = 0; i < 8; i++) - bcopy(&fx->fx_st[i].fpr_16[0], &fn->f_st[i].fpr_16[0], - sizeof (fn->f_st[0].fpr_16)); /* 80-bit x87-style floats */ - - /* - * synthesize uncompressed tag bits - */ - fn->f_ftw = 0; - for (tagbits = fx->fx_fctw, i = 0; i < 8; i++, tagbits >>= 1) { - uint_t ibit, expo; - const uint16_t *fpp; - static const uint16_t zero[5] = { 0, 0, 0, 0, 0 }; - - if ((tagbits & 1) == 0) { - fn->f_ftw |= 3 << (i << 1); /* empty */ - continue; - } - - /* - * (tags refer to *physical* registers) - */ - fpp = &fx->fx_st[(i - top + 8) & 7].fpr_16[0]; - ibit = fpp[3] >> 15; - expo = fpp[4] & 0x7fff; - - if (ibit && expo != 0 && expo != 0x7fff) - continue; /* valid fp number */ - - if (bcmp(fpp, &zero, sizeof (zero))) - fn->f_ftw |= 2 << (i << 1); /* NaN */ - else - fn->f_ftw |= 1 << (i << 1); /* fp zero */ - } - - fn->f_fop = fx->fx_fop; - - fn->__f_ign2 = 0; - fn->f_eip = (uint32_t)fx->fx_rip; - fn->f_cs = U32CS_SEL; - fn->f_dp = (uint32_t)fx->fx_rdp; - fn->f_ds = UDS_SEL; - fn->__f_ign3 = 0; -} - -/* - * Map from an fpregset_t into an fxsave-format save area - */ -static void -fpregset_to_fxsave(const fpregset_t *fp, struct fxsave_state *fx) -{ - bcopy(fp, fx, sizeof (*fx)); - /* - * avoid useless #gp exceptions - mask reserved bits - */ - fx->fx_mxcsr &= sse_mxcsr_mask; -} - -/* - * Map from an fxsave-format save area into a fpregset_t - */ -static void -fxsave_to_fpregset(const struct fxsave_state *fx, fpregset_t *fp) -{ - bcopy(fx, fp, sizeof (*fx)); -} - -#if defined(_SYSCALL32_IMPL) -static void -fpregset32_to_fxsave(const fpregset32_t *fp, struct fxsave_state *fx) -{ - const struct fpchip32_state *fc = &fp->fp_reg_set.fpchip_state; - - fnsave_to_fxsave((const struct fnsave_state *)fc, fx); - /* - * avoid useless #gp exceptions - mask reserved bits - */ - fx->fx_mxcsr = sse_mxcsr_mask & fc->mxcsr; - bcopy(&fc->xmm[0], &fx->fx_xmm[0], sizeof (fc->xmm)); -} - -static void -fxsave_to_fpregset32(const struct fxsave_state *fx, fpregset32_t *fp) -{ - struct fpchip32_state *fc = &fp->fp_reg_set.fpchip_state; - - fxsave_to_fnsave(fx, (struct fnsave_state *)fc); - fc->mxcsr = fx->fx_mxcsr; - bcopy(&fx->fx_xmm[0], &fc->xmm[0], sizeof (fc->xmm)); -} - -static void -fpregset_nto32(const fpregset_t *src, fpregset32_t *dst) -{ - fxsave_to_fpregset32((struct fxsave_state *)src, dst); - dst->fp_reg_set.fpchip_state.status = - src->fp_reg_set.fpchip_state.status; - dst->fp_reg_set.fpchip_state.xstatus = - src->fp_reg_set.fpchip_state.xstatus; -} - -static void -fpregset_32ton(const fpregset32_t *src, fpregset_t *dst) -{ - fpregset32_to_fxsave(src, (struct fxsave_state *)dst); - dst->fp_reg_set.fpchip_state.status = - src->fp_reg_set.fpchip_state.status; - dst->fp_reg_set.fpchip_state.xstatus = - src->fp_reg_set.fpchip_state.xstatus; -} -#endif - -/* - * Set floating-point registers from a native fpregset_t. - */ -void -setfpregs(klwp_t *lwp, fpregset_t *fp) -{ - struct fpu_ctx *fpu = &lwp->lwp_pcb.pcb_fpu; - - if (fpu->fpu_flags & FPU_EN) { - if (!(fpu->fpu_flags & FPU_VALID)) { - /* - * FPU context is still active, release the - * ownership. - */ - fp_free(fpu, 0); - } - } - /* - * Else: if we are trying to change the FPU state of a thread which - * hasn't yet initialized floating point, store the state in - * the pcb and indicate that the state is valid. When the - * thread enables floating point, it will use this state instead - * of the default state. - */ - - switch (fp_save_mech) { - case FP_FXSAVE: - fpregset_to_fxsave(fp, fpu->fpu_regs.kfpu_u.kfpu_fx); - fpu->fpu_regs.kfpu_xstatus = - fp->fp_reg_set.fpchip_state.xstatus; - break; - - case FP_XSAVE: - fpregset_to_fxsave(fp, - &fpu->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave); - fpu->fpu_regs.kfpu_xstatus = - fp->fp_reg_set.fpchip_state.xstatus; - fpu->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= - (XFEATURE_LEGACY_FP | XFEATURE_SSE); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; - fpu->fpu_flags |= FPU_VALID; - PCB_SET_UPDATE_FPU(&lwp->lwp_pcb); -} - -/* - * Get floating-point registers into a native fpregset_t. - */ -void -getfpregs(klwp_t *lwp, fpregset_t *fp) -{ - struct fpu_ctx *fpu = &lwp->lwp_pcb.pcb_fpu; - - kpreempt_disable(); - if (fpu->fpu_flags & FPU_EN) { - /* - * If we have FPU hw and the thread's pcb doesn't have - * a valid FPU state then get the state from the hw. - */ - if (fpu_exists && ttolwp(curthread) == lwp && - !(fpu->fpu_flags & FPU_VALID)) - fp_save(fpu); /* get the current FPU state */ - } - - /* - * There are 3 possible cases we have to be aware of here: - * - * 1. FPU is enabled. FPU state is stored in the current LWP. - * - * 2. FPU is not enabled, and there have been no intervening /proc - * modifications. Return initial FPU state. - * - * 3. FPU is not enabled, but a /proc consumer has modified FPU state. - * FPU state is stored in the current LWP. - */ - if ((fpu->fpu_flags & FPU_EN) || (fpu->fpu_flags & FPU_VALID)) { - /* - * Cases 1 and 3. - */ - switch (fp_save_mech) { - case FP_FXSAVE: - fxsave_to_fpregset(fpu->fpu_regs.kfpu_u.kfpu_fx, fp); - fp->fp_reg_set.fpchip_state.xstatus = - fpu->fpu_regs.kfpu_xstatus; - break; - case FP_XSAVE: - fxsave_to_fpregset( - &fpu->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave, fp); - fp->fp_reg_set.fpchip_state.xstatus = - fpu->fpu_regs.kfpu_xstatus; - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; - } else { - /* - * Case 2. - */ - switch (fp_save_mech) { - case FP_FXSAVE: - case FP_XSAVE: - /* - * For now, we don't have any AVX specific field in ABI. - * If we add any in the future, we need to initial them - * as well. - */ - fxsave_to_fpregset(&sse_initial, fp); - fp->fp_reg_set.fpchip_state.xstatus = - fpu->fpu_regs.kfpu_xstatus; - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; - } - kpreempt_enable(); -} - -#if defined(_SYSCALL32_IMPL) - -/* - * Set floating-point registers from an fpregset32_t. - */ -void -setfpregs32(klwp_t *lwp, fpregset32_t *fp) -{ - fpregset_t fpregs; - - fpregset_32ton(fp, &fpregs); - setfpregs(lwp, &fpregs); -} - -/* - * Get floating-point registers into an fpregset32_t. - */ -void -getfpregs32(klwp_t *lwp, fpregset32_t *fp) -{ - fpregset_t fpregs; - - getfpregs(lwp, &fpregs); - fpregset_nto32(&fpregs, fp); -} - -#endif /* _SYSCALL32_IMPL */ - -/* - * Return the general registers - */ -void -getgregs(klwp_t *lwp, gregset_t grp) -{ - struct regs *rp = lwptoregs(lwp); - struct pcb *pcb = &lwp->lwp_pcb; - int thisthread = lwptot(lwp) == curthread; - - grp[REG_RDI] = rp->r_rdi; - grp[REG_RSI] = rp->r_rsi; - grp[REG_RDX] = rp->r_rdx; - grp[REG_RCX] = rp->r_rcx; - grp[REG_R8] = rp->r_r8; - grp[REG_R9] = rp->r_r9; - grp[REG_RAX] = rp->r_rax; - grp[REG_RBX] = rp->r_rbx; - grp[REG_RBP] = rp->r_rbp; - grp[REG_R10] = rp->r_r10; - grp[REG_R11] = rp->r_r11; - grp[REG_R12] = rp->r_r12; - grp[REG_R13] = rp->r_r13; - grp[REG_R14] = rp->r_r14; - grp[REG_R15] = rp->r_r15; - grp[REG_FSBASE] = pcb->pcb_fsbase; - grp[REG_GSBASE] = pcb->pcb_gsbase; - if (thisthread) - kpreempt_disable(); - if (PCB_NEED_UPDATE_SEGS(pcb)) { - grp[REG_DS] = pcb->pcb_ds; - grp[REG_ES] = pcb->pcb_es; - grp[REG_FS] = pcb->pcb_fs; - grp[REG_GS] = pcb->pcb_gs; - } else { - grp[REG_DS] = rp->r_ds; - grp[REG_ES] = rp->r_es; - grp[REG_FS] = rp->r_fs; - grp[REG_GS] = rp->r_gs; - } - if (thisthread) - kpreempt_enable(); - grp[REG_TRAPNO] = rp->r_trapno; - grp[REG_ERR] = rp->r_err; - grp[REG_RIP] = rp->r_rip; - grp[REG_CS] = rp->r_cs; - grp[REG_SS] = rp->r_ss; - grp[REG_RFL] = rp->r_rfl; - grp[REG_RSP] = rp->r_rsp; -} - -#if defined(_SYSCALL32_IMPL) - -void -getgregs32(klwp_t *lwp, gregset32_t grp) -{ - struct regs *rp = lwptoregs(lwp); - struct pcb *pcb = &lwp->lwp_pcb; - int thisthread = lwptot(lwp) == curthread; - - if (thisthread) - kpreempt_disable(); - if (PCB_NEED_UPDATE_SEGS(pcb)) { - grp[GS] = (uint16_t)pcb->pcb_gs; - grp[FS] = (uint16_t)pcb->pcb_fs; - grp[DS] = (uint16_t)pcb->pcb_ds; - grp[ES] = (uint16_t)pcb->pcb_es; - } else { - grp[GS] = (uint16_t)rp->r_gs; - grp[FS] = (uint16_t)rp->r_fs; - grp[DS] = (uint16_t)rp->r_ds; - grp[ES] = (uint16_t)rp->r_es; - } - if (thisthread) - kpreempt_enable(); - grp[EDI] = (greg32_t)rp->r_rdi; - grp[ESI] = (greg32_t)rp->r_rsi; - grp[EBP] = (greg32_t)rp->r_rbp; - grp[ESP] = 0; - grp[EBX] = (greg32_t)rp->r_rbx; - grp[EDX] = (greg32_t)rp->r_rdx; - grp[ECX] = (greg32_t)rp->r_rcx; - grp[EAX] = (greg32_t)rp->r_rax; - grp[TRAPNO] = (greg32_t)rp->r_trapno; - grp[ERR] = (greg32_t)rp->r_err; - grp[EIP] = (greg32_t)rp->r_rip; - grp[CS] = (uint16_t)rp->r_cs; - grp[EFL] = (greg32_t)rp->r_rfl; - grp[UESP] = (greg32_t)rp->r_rsp; - grp[SS] = (uint16_t)rp->r_ss; -} - -void -ucontext_32ton(const ucontext32_t *src, ucontext_t *dst) -{ - mcontext_t *dmc = &dst->uc_mcontext; - const mcontext32_t *smc = &src->uc_mcontext; - - bzero(dst, sizeof (*dst)); - dst->uc_flags = src->uc_flags; - dst->uc_link = (ucontext_t *)(uintptr_t)src->uc_link; - - bcopy(&src->uc_sigmask, &dst->uc_sigmask, sizeof (dst->uc_sigmask)); - - dst->uc_stack.ss_sp = (void *)(uintptr_t)src->uc_stack.ss_sp; - dst->uc_stack.ss_size = (size_t)src->uc_stack.ss_size; - dst->uc_stack.ss_flags = src->uc_stack.ss_flags; - - dmc->gregs[REG_GS] = (greg_t)(uint32_t)smc->gregs[GS]; - dmc->gregs[REG_FS] = (greg_t)(uint32_t)smc->gregs[FS]; - dmc->gregs[REG_ES] = (greg_t)(uint32_t)smc->gregs[ES]; - dmc->gregs[REG_DS] = (greg_t)(uint32_t)smc->gregs[DS]; - dmc->gregs[REG_RDI] = (greg_t)(uint32_t)smc->gregs[EDI]; - dmc->gregs[REG_RSI] = (greg_t)(uint32_t)smc->gregs[ESI]; - dmc->gregs[REG_RBP] = (greg_t)(uint32_t)smc->gregs[EBP]; - dmc->gregs[REG_RBX] = (greg_t)(uint32_t)smc->gregs[EBX]; - dmc->gregs[REG_RDX] = (greg_t)(uint32_t)smc->gregs[EDX]; - dmc->gregs[REG_RCX] = (greg_t)(uint32_t)smc->gregs[ECX]; - dmc->gregs[REG_RAX] = (greg_t)(uint32_t)smc->gregs[EAX]; - dmc->gregs[REG_TRAPNO] = (greg_t)(uint32_t)smc->gregs[TRAPNO]; - dmc->gregs[REG_ERR] = (greg_t)(uint32_t)smc->gregs[ERR]; - dmc->gregs[REG_RIP] = (greg_t)(uint32_t)smc->gregs[EIP]; - dmc->gregs[REG_CS] = (greg_t)(uint32_t)smc->gregs[CS]; - dmc->gregs[REG_RFL] = (greg_t)(uint32_t)smc->gregs[EFL]; - dmc->gregs[REG_RSP] = (greg_t)(uint32_t)smc->gregs[UESP]; - dmc->gregs[REG_SS] = (greg_t)(uint32_t)smc->gregs[SS]; - - /* - * A valid fpregs is only copied in if uc.uc_flags has UC_FPU set - * otherwise there is no guarantee that anything in fpregs is valid. - */ - if (src->uc_flags & UC_FPU) - fpregset_32ton(&src->uc_mcontext.fpregs, - &dst->uc_mcontext.fpregs); -} - -#endif /* _SYSCALL32_IMPL */ - -/* - * Return the user-level PC. - * If in a system call, return the address of the syscall trap. - */ -greg_t -getuserpc() -{ - greg_t upc = lwptoregs(ttolwp(curthread))->r_pc; - uint32_t insn; - - if (curthread->t_sysnum == 0) - return (upc); - - /* - * We might've gotten here from sysenter (0xf 0x34), - * syscall (0xf 0x5) or lcall (0x9a 0 0 0 0 0x27 0). - * - * Go peek at the binary to figure it out.. - */ - if (fuword32((void *)(upc - 2), &insn) != -1 && - (insn & 0xffff) == 0x340f || (insn & 0xffff) == 0x050f) - return (upc - 2); - return (upc - 7); -} - -/* - * Protect segment registers from non-user privilege levels and GDT selectors - * other than USER_CS, USER_DS and lwp FS and GS values. If the segment - * selector is non-null and not USER_CS/USER_DS, we make sure that the - * TI bit is set to point into the LDT and that the RPL is set to 3. - * - * Since struct regs stores each 16-bit segment register as a 32-bit greg_t, we - * also explicitly zero the top 16 bits since they may be coming from the - * user's address space via setcontext(2) or /proc. - * - * Note about null selector. When running on the hypervisor if we allow a - * process to set its %cs to null selector with RPL of 0 the hypervisor will - * crash the domain. If running on bare metal we would get a #gp fault and - * be able to kill the process and continue on. Therefore we make sure to - * force RPL to SEL_UPL even for null selector when setting %cs. - */ - -#if defined(IS_CS) || defined(IS_NOT_CS) -#error "IS_CS and IS_NOT_CS already defined" -#endif - -#define IS_CS 1 -#define IS_NOT_CS 0 - -/*ARGSUSED*/ -static greg_t -fix_segreg(greg_t sr, int iscs, model_t datamodel) -{ - switch (sr &= 0xffff) { - - case 0: - if (iscs == IS_CS) - return (0 | SEL_UPL); - else - return (0); - - /* - * If lwp attempts to switch data model then force their - * code selector to be null selector. - */ - case U32CS_SEL: - if (datamodel == DATAMODEL_NATIVE) - return (0 | SEL_UPL); - else - return (sr); - - case UCS_SEL: - if (datamodel == DATAMODEL_ILP32) - return (0 | SEL_UPL); - /*FALLTHROUGH*/ - case UDS_SEL: - case LWPFS_SEL: - case LWPGS_SEL: - case SEL_UPL: - return (sr); - default: - break; - } - - /* - * Force it into the LDT in ring 3 for 32-bit processes, which by - * default do not have an LDT, so that any attempt to use an invalid - * selector will reference the (non-existant) LDT, and cause a #gp - * fault for the process. - * - * 64-bit processes get the null gdt selector since they - * are not allowed to have a private LDT. - */ - if (datamodel == DATAMODEL_ILP32) { - return (sr | SEL_TI_LDT | SEL_UPL); - } else { - if (iscs == IS_CS) - return (0 | SEL_UPL); - else - return (0); - } - -} - -/* - * Set general registers. - */ -void -setgregs(klwp_t *lwp, gregset_t grp) -{ - struct regs *rp = lwptoregs(lwp); - model_t datamodel = lwp_getdatamodel(lwp); - - struct pcb *pcb = &lwp->lwp_pcb; - int thisthread = lwptot(lwp) == curthread; - - if (datamodel == DATAMODEL_NATIVE) { - if (thisthread) - (void) save_syscall_args(); /* copy the args */ - - rp->r_rdi = grp[REG_RDI]; - rp->r_rsi = grp[REG_RSI]; - rp->r_rdx = grp[REG_RDX]; - rp->r_rcx = grp[REG_RCX]; - rp->r_r8 = grp[REG_R8]; - rp->r_r9 = grp[REG_R9]; - rp->r_rax = grp[REG_RAX]; - rp->r_rbx = grp[REG_RBX]; - rp->r_rbp = grp[REG_RBP]; - rp->r_r10 = grp[REG_R10]; - rp->r_r11 = grp[REG_R11]; - rp->r_r12 = grp[REG_R12]; - rp->r_r13 = grp[REG_R13]; - rp->r_r14 = grp[REG_R14]; - rp->r_r15 = grp[REG_R15]; - rp->r_trapno = grp[REG_TRAPNO]; - rp->r_err = grp[REG_ERR]; - rp->r_rip = grp[REG_RIP]; - /* - * Setting %cs or %ss to anything else is quietly but - * quite definitely forbidden! - */ - rp->r_cs = UCS_SEL; - rp->r_ss = UDS_SEL; - rp->r_rsp = grp[REG_RSP]; - - if (thisthread) - kpreempt_disable(); - - pcb->pcb_ds = UDS_SEL; - pcb->pcb_es = UDS_SEL; - - /* - * 64-bit processes -are- allowed to set their fsbase/gsbase - * values directly, but only if they're using the segment - * selectors that allow that semantic. - * - * (32-bit processes must use lwp_set_private().) - */ - pcb->pcb_fsbase = grp[REG_FSBASE]; - pcb->pcb_gsbase = grp[REG_GSBASE]; - pcb->pcb_fs = fix_segreg(grp[REG_FS], IS_NOT_CS, datamodel); - pcb->pcb_gs = fix_segreg(grp[REG_GS], IS_NOT_CS, datamodel); - - /* - * Ensure that we go out via update_sregs - */ - PCB_SET_UPDATE_SEGS(pcb); - lwptot(lwp)->t_post_sys = 1; - if (thisthread) - kpreempt_enable(); -#if defined(_SYSCALL32_IMPL) - } else { - rp->r_rdi = (uint32_t)grp[REG_RDI]; - rp->r_rsi = (uint32_t)grp[REG_RSI]; - rp->r_rdx = (uint32_t)grp[REG_RDX]; - rp->r_rcx = (uint32_t)grp[REG_RCX]; - rp->r_rax = (uint32_t)grp[REG_RAX]; - rp->r_rbx = (uint32_t)grp[REG_RBX]; - rp->r_rbp = (uint32_t)grp[REG_RBP]; - rp->r_trapno = (uint32_t)grp[REG_TRAPNO]; - rp->r_err = (uint32_t)grp[REG_ERR]; - rp->r_rip = (uint32_t)grp[REG_RIP]; - - rp->r_cs = fix_segreg(grp[REG_CS], IS_CS, datamodel); - rp->r_ss = fix_segreg(grp[REG_DS], IS_NOT_CS, datamodel); - - rp->r_rsp = (uint32_t)grp[REG_RSP]; - - if (thisthread) - kpreempt_disable(); - - pcb->pcb_ds = fix_segreg(grp[REG_DS], IS_NOT_CS, datamodel); - pcb->pcb_es = fix_segreg(grp[REG_ES], IS_NOT_CS, datamodel); - - /* - * (See fsbase/gsbase commentary above) - */ - pcb->pcb_fs = fix_segreg(grp[REG_FS], IS_NOT_CS, datamodel); - pcb->pcb_gs = fix_segreg(grp[REG_GS], IS_NOT_CS, datamodel); - - /* - * Ensure that we go out via update_sregs - */ - PCB_SET_UPDATE_SEGS(pcb); - lwptot(lwp)->t_post_sys = 1; - if (thisthread) - kpreempt_enable(); -#endif - } - - /* - * Only certain bits of the flags register can be modified. - */ - rp->r_rfl = (rp->r_rfl & ~PSL_USERMASK) | - (grp[REG_RFL] & PSL_USERMASK); -} - -/* - * Determine whether eip is likely to have an interrupt frame - * on the stack. We do this by comparing the address to the - * range of addresses spanned by several well-known routines. - */ -extern void _interrupt(); -extern void _allsyscalls(); -extern void _cmntrap(); -extern void fakesoftint(); - -extern size_t _interrupt_size; -extern size_t _allsyscalls_size; -extern size_t _cmntrap_size; -extern size_t _fakesoftint_size; - -/* - * Get a pc-only stacktrace. Used for kmem_alloc() buffer ownership tracking. - * Returns MIN(current stack depth, pcstack_limit). - */ -int -getpcstack(pc_t *pcstack, int pcstack_limit) -{ - struct frame *fp = (struct frame *)getfp(); - struct frame *nextfp, *minfp, *stacktop; - int depth = 0; - int on_intr; - uintptr_t pc; - - if ((on_intr = CPU_ON_INTR(CPU)) != 0) - stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); - else - stacktop = (struct frame *)curthread->t_stk; - minfp = fp; - - pc = ((struct regs *)fp)->r_pc; - - while (depth < pcstack_limit) { - nextfp = (struct frame *)fp->fr_savfp; - pc = fp->fr_savpc; - if (nextfp <= minfp || nextfp >= stacktop) { - if (on_intr) { - /* - * Hop from interrupt stack to thread stack. - */ - stacktop = (struct frame *)curthread->t_stk; - minfp = (struct frame *)curthread->t_stkbase; - on_intr = 0; - continue; - } - break; - } - pcstack[depth++] = (pc_t)pc; - fp = nextfp; - minfp = fp; - } - return (depth); -} - -/* - * The following ELF header fields are defined as processor-specific - * in the V8 ABI: - * - * e_ident[EI_DATA] encoding of the processor-specific - * data in the object file - * e_machine processor identification - * e_flags processor-specific flags associated - * with the file - */ - -/* - * The value of at_flags reflects a platform's cpu module support. - * at_flags is used to check for allowing a binary to execute and - * is passed as the value of the AT_FLAGS auxiliary vector. - */ -int at_flags = 0; - -/* - * Check the processor-specific fields of an ELF header. - * - * returns 1 if the fields are valid, 0 otherwise - */ -/*ARGSUSED2*/ -int -elfheadcheck( - unsigned char e_data, - Elf32_Half e_machine, - Elf32_Word e_flags) -{ - if (e_data != ELFDATA2LSB) - return (0); - if (e_machine == EM_AMD64) - return (1); - return (e_machine == EM_386); -} - -uint_t auxv_hwcap_include = 0; /* patch to enable unrecognized features */ -uint_t auxv_hwcap_include_2 = 0; /* second word */ -uint_t auxv_hwcap_exclude = 0; /* patch for broken cpus, debugging */ -uint_t auxv_hwcap_exclude_2 = 0; /* second word */ -#if defined(_SYSCALL32_IMPL) -uint_t auxv_hwcap32_include = 0; /* ditto for 32-bit apps */ -uint_t auxv_hwcap32_include_2 = 0; /* ditto for 32-bit apps */ -uint_t auxv_hwcap32_exclude = 0; /* ditto for 32-bit apps */ -uint_t auxv_hwcap32_exclude_2 = 0; /* ditto for 32-bit apps */ -#endif - -/* - * Gather information about the processor and place it into auxv_hwcap - * so that it can be exported to the linker via the aux vector. - * - * We use this seemingly complicated mechanism so that we can ensure - * that /etc/system can be used to override what the system can or - * cannot discover for itself. - */ -void -bind_hwcap(void) -{ - uint_t cpu_hwcap_flags[2]; - cpuid_pass4(NULL, cpu_hwcap_flags); - - auxv_hwcap = (auxv_hwcap_include | cpu_hwcap_flags[0]) & - ~auxv_hwcap_exclude; - auxv_hwcap_2 = (auxv_hwcap_include_2 | cpu_hwcap_flags[1]) & - ~auxv_hwcap_exclude_2; - - /* - * On AMD processors, sysenter just doesn't work at all - * when the kernel is in long mode. On IA-32e processors - * it does, but there's no real point in all the alternate - * mechanism when syscall works on both. - * - * Besides, the kernel's sysenter handler is expecting a - * 32-bit lwp ... - */ - auxv_hwcap &= ~AV_386_SEP; - - if (auxv_hwcap_include || auxv_hwcap_exclude || auxv_hwcap_include_2 || - auxv_hwcap_exclude_2) { - /* - * The below assignment is regrettably required to get lint - * to accept the validity of our format string. The format - * string is in fact valid, but whatever intelligence in lint - * understands the cmn_err()-specific %b appears to have an - * off-by-one error: it (mistakenly) complains about bit - * number 32 (even though this is explicitly permitted). - * Normally, one would will away such warnings with a "LINTED" - * directive, but for reasons unclear and unknown, lint - * refuses to be assuaged in this case. Fortunately, lint - * doesn't pretend to have solved the Halting Problem -- - * and as soon as the format string is programmatic, it - * knows enough to shut up. - */ - char *fmt = "?user ABI extensions: %b\n"; - cmn_err(CE_CONT, fmt, auxv_hwcap, FMT_AV_386); - fmt = "?user ABI extensions (word 2): %b\n"; - cmn_err(CE_CONT, fmt, auxv_hwcap_2, FMT_AV_386_2); - } - -#if defined(_SYSCALL32_IMPL) - auxv_hwcap32 = (auxv_hwcap32_include | cpu_hwcap_flags[0]) & - ~auxv_hwcap32_exclude; - auxv_hwcap32_2 = (auxv_hwcap32_include_2 | cpu_hwcap_flags[1]) & - ~auxv_hwcap32_exclude_2; - - /* - * If this is an amd64 architecture machine from Intel, then - * syscall -doesn't- work in compatibility mode, only sysenter does. - * - * Sigh. - */ - if (!cpuid_syscall32_insn(NULL)) - auxv_hwcap32 &= ~AV_386_AMD_SYSC; - - /* - * 32-bit processes can -always- use the lahf/sahf instructions - */ - auxv_hwcap32 |= AV_386_AHF; - - /* - * 32-bit processes can -never- use fsgsbase instructions. - */ - auxv_hwcap32_2 &= ~AV_386_2_FSGSBASE; - - if (auxv_hwcap32_include || auxv_hwcap32_exclude || - auxv_hwcap32_include_2 || auxv_hwcap32_exclude_2) { - /* - * See the block comment in the cmn_err() of auxv_hwcap, above. - */ - char *fmt = "?32-bit user ABI extensions: %b\n"; - cmn_err(CE_CONT, fmt, auxv_hwcap32, FMT_AV_386); - fmt = "?32-bit user ABI extensions (word 2): %b\n"; - cmn_err(CE_CONT, fmt, auxv_hwcap32_2, FMT_AV_386_2); - } -#endif -} - -/* - * sync_icache() - this is called - * in proc/fs/prusrio.c. x86 has an unified cache and therefore - * this is a nop. - */ -/* ARGSUSED */ -void -sync_icache(caddr_t addr, uint_t len) -{ - /* Do nothing for now */ -} - -/*ARGSUSED*/ -void -sync_data_memory(caddr_t va, size_t len) -{ - /* Not implemented for this platform */ -} - -int -__ipltospl(int ipl) -{ - return (ipltospl(ipl)); -} - -/* - * The panic code invokes panic_saveregs() to record the contents of a - * regs structure into the specified panic_data structure for debuggers. - */ -void -panic_saveregs(panic_data_t *pdp, struct regs *rp) -{ - panic_nv_t *pnv = PANICNVGET(pdp); - - struct cregs creg; - - getcregs(&creg); - - PANICNVADD(pnv, "rdi", rp->r_rdi); - PANICNVADD(pnv, "rsi", rp->r_rsi); - PANICNVADD(pnv, "rdx", rp->r_rdx); - PANICNVADD(pnv, "rcx", rp->r_rcx); - PANICNVADD(pnv, "r8", rp->r_r8); - PANICNVADD(pnv, "r9", rp->r_r9); - PANICNVADD(pnv, "rax", rp->r_rax); - PANICNVADD(pnv, "rbx", rp->r_rbx); - PANICNVADD(pnv, "rbp", rp->r_rbp); - PANICNVADD(pnv, "r10", rp->r_r10); - PANICNVADD(pnv, "r11", rp->r_r11); - PANICNVADD(pnv, "r12", rp->r_r12); - PANICNVADD(pnv, "r13", rp->r_r13); - PANICNVADD(pnv, "r14", rp->r_r14); - PANICNVADD(pnv, "r15", rp->r_r15); - PANICNVADD(pnv, "fsbase", rdmsr(MSR_AMD_FSBASE)); - PANICNVADD(pnv, "gsbase", rdmsr(MSR_AMD_GSBASE)); - PANICNVADD(pnv, "ds", rp->r_ds); - PANICNVADD(pnv, "es", rp->r_es); - PANICNVADD(pnv, "fs", rp->r_fs); - PANICNVADD(pnv, "gs", rp->r_gs); - PANICNVADD(pnv, "trapno", rp->r_trapno); - PANICNVADD(pnv, "err", rp->r_err); - PANICNVADD(pnv, "rip", rp->r_rip); - PANICNVADD(pnv, "cs", rp->r_cs); - PANICNVADD(pnv, "rflags", rp->r_rfl); - PANICNVADD(pnv, "rsp", rp->r_rsp); - PANICNVADD(pnv, "ss", rp->r_ss); - PANICNVADD(pnv, "gdt_hi", (uint64_t)(creg.cr_gdt._l[3])); - PANICNVADD(pnv, "gdt_lo", (uint64_t)(creg.cr_gdt._l[0])); - PANICNVADD(pnv, "idt_hi", (uint64_t)(creg.cr_idt._l[3])); - PANICNVADD(pnv, "idt_lo", (uint64_t)(creg.cr_idt._l[0])); - - PANICNVADD(pnv, "ldt", creg.cr_ldt); - PANICNVADD(pnv, "task", creg.cr_task); - PANICNVADD(pnv, "cr0", creg.cr_cr0); - PANICNVADD(pnv, "cr2", creg.cr_cr2); - PANICNVADD(pnv, "cr3", creg.cr_cr3); - if (creg.cr_cr4) - PANICNVADD(pnv, "cr4", creg.cr_cr4); - - PANICNVSET(pdp, pnv); -} - -#define TR_ARG_MAX 6 /* Max args to print, same as SPARC */ - - -/* - * Print a stack backtrace using the specified frame pointer. We delay two - * seconds before continuing, unless this is the panic traceback. - * If we are in the process of panicking, we also attempt to write the - * stack backtrace to a staticly assigned buffer, to allow the panic - * code to find it and write it in to uncompressed pages within the - * system crash dump. - * Note that the frame for the starting stack pointer value is omitted because - * the corresponding %eip is not known. - */ - -extern char *dump_stack_scratch; - - -void -traceback(caddr_t fpreg) -{ - struct frame *fp = (struct frame *)fpreg; - struct frame *nextfp; - uintptr_t pc, nextpc; - ulong_t off; - char args[TR_ARG_MAX * 2 + 16], *sym; - uint_t offset = 0; - uint_t next_offset = 0; - char stack_buffer[1024]; - - if (!panicstr) - printf("traceback: %%fp = %p\n", (void *)fp); - - if (panicstr && !dump_stack_scratch) { - printf("Warning - stack not written to the dump buffer\n"); - } - - fp = (struct frame *)plat_traceback(fpreg); - if ((uintptr_t)fp < KERNELBASE) - goto out; - - pc = fp->fr_savpc; - fp = (struct frame *)fp->fr_savfp; - - while ((uintptr_t)fp >= KERNELBASE) { - /* - * XX64 Until port is complete tolerate 8-byte aligned - * frame pointers but flag with a warning so they can - * be fixed. - */ - if (((uintptr_t)fp & (STACK_ALIGN - 1)) != 0) { - if (((uintptr_t)fp & (8 - 1)) == 0) { - printf(" >> warning! 8-byte" - " aligned %%fp = %p\n", (void *)fp); - } else { - printf( - " >> mis-aligned %%fp = %p\n", (void *)fp); - break; - } - } - - args[0] = '\0'; - nextpc = (uintptr_t)fp->fr_savpc; - nextfp = (struct frame *)fp->fr_savfp; - if ((sym = kobj_getsymname(pc, &off)) != NULL) { - printf("%016lx %s:%s+%lx (%s)\n", (uintptr_t)fp, - mod_containing_pc((caddr_t)pc), sym, off, args); - (void) snprintf(stack_buffer, sizeof (stack_buffer), - "%s:%s+%lx (%s) | ", - mod_containing_pc((caddr_t)pc), sym, off, args); - } else { - printf("%016lx %lx (%s)\n", - (uintptr_t)fp, pc, args); - (void) snprintf(stack_buffer, sizeof (stack_buffer), - "%lx (%s) | ", pc, args); - } - - if (panicstr && dump_stack_scratch) { - next_offset = offset + strlen(stack_buffer); - if (next_offset < STACK_BUF_SIZE) { - bcopy(stack_buffer, dump_stack_scratch + offset, - strlen(stack_buffer)); - offset = next_offset; - } else { - /* - * In attempting to save the panic stack - * to the dumpbuf we have overflowed that area. - * Print a warning and continue to printf the - * stack to the msgbuf - */ - printf("Warning: stack in the dump buffer" - " may be incomplete\n"); - offset = next_offset; - } - } - - pc = nextpc; - fp = nextfp; - } -out: - if (!panicstr) { - printf("end of traceback\n"); - DELAY(2 * MICROSEC); - } else if (dump_stack_scratch) { - dump_stack_scratch[offset] = '\0'; - } -} - - -/* - * Generate a stack backtrace from a saved register set. - */ -void -traceregs(struct regs *rp) -{ - traceback((caddr_t)rp->r_fp); -} - -void -exec_set_sp(size_t stksize) -{ - klwp_t *lwp = ttolwp(curthread); - - lwptoregs(lwp)->r_sp = (uintptr_t)curproc->p_usrstack - stksize; -} - -hrtime_t -gethrtime_waitfree(void) -{ - return (dtrace_gethrtime()); -} - -hrtime_t -gethrtime(void) -{ - return (gethrtimef()); -} - -hrtime_t -gethrtime_unscaled(void) -{ - return (gethrtimeunscaledf()); -} - -void -scalehrtime(hrtime_t *hrt) -{ - scalehrtimef(hrt); -} - -uint64_t -unscalehrtime(hrtime_t nsecs) -{ - return (unscalehrtimef(nsecs)); -} - -void -gethrestime(timespec_t *tp) -{ - gethrestimef(tp); -} - -/* - * Part of the implementation of hres_tick(); this routine is - * easier in C than assembler .. called with the hres_lock held. - * - * XX64 Many of these timekeeping variables need to be extern'ed in a header - */ - -#include <sys/time.h> -#include <sys/machlock.h> - -extern int one_sec; -extern int max_hres_adj; - -void -__adj_hrestime(void) -{ - long long adj; - - if (hrestime_adj == 0) - adj = 0; - else if (hrestime_adj > 0) { - if (hrestime_adj < max_hres_adj) - adj = hrestime_adj; - else - adj = max_hres_adj; - } else { - if (hrestime_adj < -max_hres_adj) - adj = -max_hres_adj; - else - adj = hrestime_adj; - } - - timedelta -= adj; - hrestime_adj = timedelta; - hrestime.tv_nsec += adj; - - while (hrestime.tv_nsec >= NANOSEC) { - one_sec++; - hrestime.tv_sec++; - hrestime.tv_nsec -= NANOSEC; - } -} - -/* - * Wrapper functions to maintain backwards compability - */ -int -xcopyin(const void *uaddr, void *kaddr, size_t count) -{ - return (xcopyin_nta(uaddr, kaddr, count, UIO_COPY_CACHED)); -} - -int -xcopyout(const void *kaddr, void *uaddr, size_t count) -{ - return (xcopyout_nta(kaddr, uaddr, count, UIO_COPY_CACHED)); -} diff --git a/usr/src/uts/intel/ia32/os/bootdev.c b/usr/src/uts/intel/ia32/os/bootdev.c deleted file mode 100644 index 02f31efd56..0000000000 --- a/usr/src/uts/intel/ia32/os/bootdev.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/modctl.h> -#include <sys/sunddi.h> - -/* internal global data */ -static struct modlmisc modlmisc = { - &mod_miscops, "bootdev misc module" -}; - -static struct modlinkage modlinkage = { - MODREV_1, (void *)&modlmisc, NULL -}; - -int -_init() -{ - return (mod_install(&modlinkage)); -} - -int -_fini() -{ - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -/* - * convert a prom device path to an equivalent path in /devices - * Does not deal with aliases. Does deal with pathnames which - * are not fully qualified. This routine is generalized - * to work across several flavors of OBP - */ -int -i_promname_to_devname(char *prom_name, char *ret_buf) -{ - if (prom_name == NULL || ret_buf == NULL || - (strlen(prom_name) >= MAXPATHLEN)) { - return (EINVAL); - } - if (i_ddi_prompath_to_devfspath(prom_name, ret_buf) != DDI_SUCCESS) - return (EINVAL); - - return (0); -} - -/* - * If bootstring contains a device path, we need to convert to a format - * the prom will understand. To do so, we convert the existing path to - * a prom-compatible path and return the value of new_path. If the - * caller specifies new_path as NULL, we allocate an appropriately - * sized new_path on behalf of the caller. If the caller invokes this - * function with new_path = NULL, they must do so from a context in - * which it is safe to perform a sleeping memory allocation. - * - * NOTE: Intel does not have a real PROM, so the implementation - * simply returns a copy of the string passed in. - */ -char * -i_convert_boot_device_name(char *cur_path, char *new_path, size_t *len) -{ - if (new_path != NULL) { - (void) snprintf(new_path, *len, "%s", cur_path); - return (new_path); - } else { - *len = strlen(cur_path) + 1; - new_path = kmem_alloc(*len, KM_SLEEP); - (void) snprintf(new_path, *len, "%s", cur_path); - return (new_path); - } -} diff --git a/usr/src/uts/intel/ia32/os/comm_page_util.c b/usr/src/uts/intel/ia32/os/comm_page_util.c deleted file mode 100644 index f286bee7f6..0000000000 --- a/usr/src/uts/intel/ia32/os/comm_page_util.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2016 Joyent, Inc. - */ - - -#include <sys/types.h> -#include <sys/thread.h> -#include <sys/proc.h> -#include <sys/mman.h> -#include <sys/vmsystm.h> -#include <vm/as.h> -#include <vm/seg_umap.h> - -#if !defined(__xpv) -#include <sys/comm_page.h> -#endif /* !defined(__xpv) */ - -/* - * Map in the comm page. - * - * The contents of the comm page are only defined on non-xpv x86 at this time. - * Furthermore, the data is only valid in userspace (32-bit or 64-bit) when - * mapped from a 64-bit kernel. - * See: "uts/i86pc/sys/comm_page.h" - */ -caddr_t -comm_page_mapin() -{ -#if !defined(__xpv) - proc_t *p = curproc; - caddr_t addr = NULL; - size_t len = COMM_PAGE_SIZE; - uint_t prot = PROT_USER | PROT_READ; - segumap_crargs_t suarg; - - map_addr(&addr, len, (offset_t)0, 1, 0); - if (addr == NULL || valid_usr_range(addr, len, prot, p->p_as, - p->p_as->a_userlimit) != RANGE_OKAY) { - return (NULL); - } - - suarg.kaddr = (caddr_t)&comm_page; - suarg.prot = suarg.maxprot = prot; - if (as_map(p->p_as, addr, len, segumap_create, &suarg) != 0) { - return (NULL); - } - return (addr); -#else /* !defined(__xpv) */ - return (NULL); -#endif /* !defined(__xpv) */ -} diff --git a/usr/src/uts/intel/ia32/os/copy_subr.c b/usr/src/uts/intel/ia32/os/copy_subr.c deleted file mode 100644 index 0df1086260..0000000000 --- a/usr/src/uts/intel/ia32/os/copy_subr.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Miscellaneous C routines for copying data around without - * descending into assembler. Compilers are pretty good at - * scheduling instructions, and humans are pretty hopeless at - * writing correct assembler. - */ - -#include <sys/types.h> -#include <sys/systm.h> -#include <sys/errno.h> -#include <sys/param.h> - -/* - * copyinstr_noerr and copyoutstr_noerr can be implemented completely - * in C on machines with shared user and kernel context. - */ -static int -copystr_nofault(const char *src, char *dst, size_t maxlength, - size_t *lencopied) -{ - int error = 0; - size_t leftover; - - if ((leftover = maxlength) == 0) - error = ENAMETOOLONG; - else - do { - leftover--; - if ((*dst++ = *src++) == '\0') - break; - if (leftover == 0) { - error = ENAMETOOLONG; - break; - } - /*CONSTCOND*/ - } while (1); - - if (lencopied) - *lencopied = maxlength - leftover; - return (error); -} - - -int -copyinstr_noerr(const char *uaddr, char *kaddr, size_t maxlength, - size_t *lencopied) -{ - char *ua = (char *)uaddr; - - ASSERT((uintptr_t)kaddr > kernelbase); - - if ((uintptr_t)ua > kernelbase) { - /* - * force fault at kernelbase - */ - ua = (char *)kernelbase; - } - return (copystr_nofault(ua, kaddr, maxlength, lencopied)); -} - -int -copyoutstr_noerr(const char *kaddr, char *uaddr, size_t maxlength, - size_t *lencopied) -{ - char *ua = (char *)uaddr; - - ASSERT((uintptr_t)kaddr > kernelbase); - - if ((uintptr_t)ua > kernelbase) { - /* - * force fault at kernelbase - */ - ua = (char *)kernelbase; - } - return (copystr_nofault(kaddr, ua, maxlength, lencopied)); -} diff --git a/usr/src/uts/intel/ia32/os/cpc_subr.c b/usr/src/uts/intel/ia32/os/cpc_subr.c deleted file mode 100644 index 71e1ebaeee..0000000000 --- a/usr/src/uts/intel/ia32/os/cpc_subr.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2021 Joyent, Inc. - */ - -/* - * x86-specific routines used by the CPU Performance counter driver. - */ - -#include <sys/types.h> -#include <sys/time.h> -#include <sys/atomic.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/x86_archext.h> -#include <sys/cpuvar.h> -#include <sys/machcpuvar.h> -#include <sys/archsystm.h> -#include <sys/cpc_pcbe.h> -#include <sys/cpc_impl.h> -#include <sys/x_call.h> -#include <sys/cmn_err.h> -#include <sys/cmt.h> -#include <sys/spl.h> -#include <sys/apic.h> - -static const uint64_t allstopped = 0; -static kcpc_ctx_t *(*overflow_intr_handler)(caddr_t); - -/* Do threads share performance monitoring hardware? */ -static int strands_perfmon_shared = 0; - -int kcpc_hw_overflow_intr_installed; /* set by APIC code */ -extern kcpc_ctx_t *kcpc_overflow_intr(caddr_t arg, uint64_t bitmap); - -extern int kcpc_counts_include_idle; /* Project Private /etc/system variable */ - -void (*kcpc_hw_enable_cpc_intr)(void); /* set by APIC code */ - -int -kcpc_hw_add_ovf_intr(kcpc_ctx_t *(*handler)(caddr_t)) -{ - if (x86_type != X86_TYPE_P6) - return (0); - overflow_intr_handler = handler; - return (ipltospl(APIC_PCINT_IPL)); -} - -void -kcpc_hw_rem_ovf_intr(void) -{ - overflow_intr_handler = NULL; -} - -/* - * Hook used on P4 systems to catch online/offline events. - */ -/*ARGSUSED*/ -static int -kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg) -{ - pg_cmt_t *chip_pg; - int active_cpus_cnt; - - if (what != CPU_ON) - return (0); - - /* - * If any CPU-bound contexts exist, we don't need to invalidate - * anything, as no per-LWP contexts can coexist. - */ - if (kcpc_cpuctx || dtrace_cpc_in_use) - return (0); - - /* - * If this chip now has more than 1 active cpu, we must invalidate all - * contexts in the system. - */ - chip_pg = (pg_cmt_t *)pghw_find_pg(cpu[cpuid], PGHW_CHIP); - if (chip_pg != NULL) { - active_cpus_cnt = GROUP_SIZE(&chip_pg->cmt_cpus_actv); - if (active_cpus_cnt > 1) - kcpc_invalidate_all(); - } - - return (0); -} - -static kmutex_t cpu_setup_lock; /* protects setup_registered */ -static int setup_registered; - - -void -kcpc_hw_init(cpu_t *cp) -{ - kthread_t *t = cp->cpu_idle_thread; - uint32_t versionid; - struct cpuid_regs cpuid; - - strands_perfmon_shared = 0; - if (is_x86_feature(x86_featureset, X86FSET_HTT)) { - if (cpuid_getvendor(cpu[0]) == X86_VENDOR_Intel) { - /* - * Intel processors that support Architectural - * Performance Monitoring Version 3 have per strand - * performance monitoring hardware. - * Hence we can allow use of performance counters on - * multiple strands on the same core simultaneously. - */ - cpuid.cp_eax = 0x0; - (void) __cpuid_insn(&cpuid); - if (cpuid.cp_eax < 0xa) { - strands_perfmon_shared = 1; - } else { - cpuid.cp_eax = 0xa; - (void) __cpuid_insn(&cpuid); - - versionid = cpuid.cp_eax & 0xFF; - if (versionid < 3) { - strands_perfmon_shared = 1; - } - } - } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD || - cpuid_getvendor(cpu[0]) == X86_VENDOR_HYGON) { - /* - * On AMD systems with HT, all of the performance - * monitors exist on a per-logical CPU basis. - */ - strands_perfmon_shared = 0; - } else { - strands_perfmon_shared = 1; - } - } - - if (strands_perfmon_shared) { - mutex_enter(&cpu_setup_lock); - if (setup_registered == 0) { - mutex_enter(&cpu_lock); - register_cpu_setup_func(kcpc_cpu_setup, NULL); - mutex_exit(&cpu_lock); - setup_registered = 1; - } - mutex_exit(&cpu_setup_lock); - } - - mutex_init(&cp->cpu_cpc_ctxlock, "cpu_cpc_ctxlock", MUTEX_DEFAULT, 0); - - if (kcpc_counts_include_idle) - return; - - installctx(t, cp, kcpc_idle_save, kcpc_idle_restore, - NULL, NULL, NULL, NULL, NULL); -} - -void -kcpc_hw_fini(cpu_t *cp) -{ - ASSERT(cp->cpu_idle_thread == NULL); - - mutex_destroy(&cp->cpu_cpc_ctxlock); -} - -#define BITS(v, u, l) \ - (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1)) - -#define PCBE_NAMELEN 30 /* Enough Room for pcbe.manuf.model.family.stepping */ - -/* - * Examine the processor and load an appropriate PCBE. - */ -int -kcpc_hw_load_pcbe(void) -{ - return (kcpc_pcbe_tryload(cpuid_getvendorstr(CPU), cpuid_getfamily(CPU), - cpuid_getmodel(CPU), cpuid_getstep(CPU))); -} - -/* - * Called by the generic framework to check if it's OK to bind a set to a CPU. - */ -int -kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap) -{ - cpu_t *cpu, *p; - pg_t *chip_pg; - pg_cpu_itr_t itr; - - if (!strands_perfmon_shared) - return (0); - - /* - * Only one logical CPU on each Pentium 4 HT CPU may be bound to at - * once. - * - * This loop is protected by holding cpu_lock, in order to properly - * access the cpu_t of the desired cpu. - */ - mutex_enter(&cpu_lock); - if ((cpu = cpu_get(cpuid)) == NULL) { - mutex_exit(&cpu_lock); - return (-1); - } - - chip_pg = (pg_t *)pghw_find_pg(cpu, PGHW_CHIP); - - PG_CPU_ITR_INIT(chip_pg, itr); - while ((p = pg_cpu_next(&itr)) != NULL) { - if (p == cpu) - continue; - if (BT_TEST(kcpc_cpumap, p->cpu_id)) { - mutex_exit(&cpu_lock); - return (-1); - } - } - - mutex_exit(&cpu_lock); - return (0); -} - -/* - * Called by the generic framework to check if it's OK to bind a set to an LWP. - */ -int -kcpc_hw_lwp_hook(void) -{ - pg_cmt_t *chip; - group_t *chips; - group_iter_t i; - - if (!strands_perfmon_shared) - return (0); - - /* - * Only one CPU per chip may be online. - */ - mutex_enter(&cpu_lock); - - chips = pghw_set_lookup(PGHW_CHIP); - if (chips == NULL) { - mutex_exit(&cpu_lock); - return (0); - } - - group_iter_init(&i); - while ((chip = group_iterate(chips, &i)) != NULL) { - if (GROUP_SIZE(&chip->cmt_cpus_actv) > 1) { - mutex_exit(&cpu_lock); - return (-1); - } - } - - mutex_exit(&cpu_lock); - return (0); -} diff --git a/usr/src/uts/intel/ia32/os/ddi_i86.c b/usr/src/uts/intel/ia32/os/ddi_i86.c deleted file mode 100644 index f135d0673c..0000000000 --- a/usr/src/uts/intel/ia32/os/ddi_i86.c +++ /dev/null @@ -1,1903 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2014 Garrett D'Amore <garrett@damore.org> - */ - -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi_impldefs.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/ddifm.h> -#include <sys/fm/io/ddi.h> -#include <sys/fm/protocol.h> -#include <sys/ontrap.h> - - -/* - * DDI DMA Engine functions for x86. - * These functions are more naturally generic, but do not apply to SPARC. - */ - -int -ddi_dmae_alloc(dev_info_t *dip, int chnl, int (*dmae_waitfp)(), caddr_t arg) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_ACQUIRE, - (off_t *)dmae_waitfp, (size_t *)arg, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_release(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_FREE, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_getattr(dev_info_t *dip, ddi_dma_attr_t *attrp) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_GETATTR, 0, 0, - (caddr_t *)attrp, 0)); -} - -int -ddi_dmae_1stparty(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_1STPTY, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_prog(dev_info_t *dip, struct ddi_dmae_req *dmaereqp, - ddi_dma_cookie_t *cookiep, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_PROG, (off_t *)dmaereqp, - (size_t *)cookiep, (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_swsetup(dev_info_t *dip, struct ddi_dmae_req *dmaereqp, - ddi_dma_cookie_t *cookiep, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_SWSETUP, (off_t *)dmaereqp, - (size_t *)cookiep, (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_swstart(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_SWSTART, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_stop(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_STOP, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_enable(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_ENABLE, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_disable(dev_info_t *dip, int chnl) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_DISABLE, 0, 0, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -int -ddi_dmae_getcnt(dev_info_t *dip, int chnl, int *countp) -{ - return (ddi_dma_mctl(dip, dip, 0, DDI_DMA_E_GETCNT, 0, (size_t *)countp, - (caddr_t *)(uintptr_t)chnl, 0)); -} - -/* - * implementation specific access handle and routines: - */ - -static uintptr_t impl_acc_hdl_id = 0; - -/* - * access handle allocator - */ -ddi_acc_hdl_t * -impl_acc_hdl_get(ddi_acc_handle_t hdl) -{ - /* - * recast to ddi_acc_hdl_t instead of - * casting to ddi_acc_impl_t and then return the ah_platform_private - * - * this optimization based on the ddi_acc_hdl_t is the - * first member of the ddi_acc_impl_t. - */ - return ((ddi_acc_hdl_t *)hdl); -} - -ddi_acc_handle_t -impl_acc_hdl_alloc(int (*waitfp)(caddr_t), caddr_t arg) -{ - ddi_acc_impl_t *hp; - on_trap_data_t *otp; - int sleepflag; - - sleepflag = ((waitfp == (int (*)())KM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); - /* - * Allocate and initialize the data access handle and error status. - */ - if ((hp = kmem_zalloc(sizeof (ddi_acc_impl_t), sleepflag)) == NULL) - goto fail; - if ((hp->ahi_err = (ndi_err_t *)kmem_zalloc( - sizeof (ndi_err_t), sleepflag)) == NULL) { - kmem_free(hp, sizeof (ddi_acc_impl_t)); - goto fail; - } - if ((otp = (on_trap_data_t *)kmem_zalloc( - sizeof (on_trap_data_t), sleepflag)) == NULL) { - kmem_free(hp->ahi_err, sizeof (ndi_err_t)); - kmem_free(hp, sizeof (ddi_acc_impl_t)); - goto fail; - } - hp->ahi_err->err_ontrap = otp; - hp->ahi_common.ah_platform_private = (void *)hp; - - return ((ddi_acc_handle_t)hp); -fail: - if ((waitfp != (int (*)())KM_SLEEP) && - (waitfp != (int (*)())KM_NOSLEEP)) - ddi_set_callback(waitfp, arg, &impl_acc_hdl_id); - return (NULL); -} - -void -impl_acc_hdl_free(ddi_acc_handle_t handle) -{ - ddi_acc_impl_t *hp; - - /* - * The supplied (ddi_acc_handle_t) is actually a (ddi_acc_impl_t *), - * because that's what we allocated in impl_acc_hdl_alloc() above. - */ - hp = (ddi_acc_impl_t *)handle; - if (hp) { - kmem_free(hp->ahi_err->err_ontrap, sizeof (on_trap_data_t)); - kmem_free(hp->ahi_err, sizeof (ndi_err_t)); - kmem_free(hp, sizeof (ddi_acc_impl_t)); - if (impl_acc_hdl_id) - ddi_run_callback(&impl_acc_hdl_id); - } -} - -/* - * Function used to check if a given access handle owns the failing address. - * Called by ndi_fmc_error, when we detect a PIO error. - */ -/* ARGSUSED */ -static int -impl_acc_check(dev_info_t *dip, const void *handle, const void *addr, - const void *not_used) -{ - pfn_t pfn, fault_pfn; - ddi_acc_hdl_t *hp; - - hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); - - ASSERT(hp); - - if (addr != NULL) { - pfn = hp->ah_pfn; - fault_pfn = mmu_btop(*(uint64_t *)addr); - if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) - return (DDI_FM_NONFATAL); - } - return (DDI_FM_UNKNOWN); -} - -void -impl_acc_err_init(ddi_acc_hdl_t *handlep) -{ - int fmcap; - ndi_err_t *errp; - on_trap_data_t *otp; - ddi_acc_impl_t *hp = (ddi_acc_impl_t *)handlep; - - fmcap = ddi_fm_capable(handlep->ah_dip); - - if (handlep->ah_acc.devacc_attr_version < DDI_DEVICE_ATTR_V1 || - !DDI_FM_ACC_ERR_CAP(fmcap)) { - handlep->ah_acc.devacc_attr_access = DDI_DEFAULT_ACC; - } else if (handlep->ah_acc.devacc_attr_access == DDI_FLAGERR_ACC && - hp->ahi_scan == NULL) { - handlep->ah_acc.devacc_attr_access = DDI_DEFAULT_ACC; - } else if (DDI_FM_ACC_ERR_CAP(fmcap)) { - if (handlep->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) { - if (handlep->ah_xfermodes) - return; - i_ddi_drv_ereport_post(handlep->ah_dip, DVR_EFMCAP, - NULL, DDI_NOSLEEP); - } else { - errp = hp->ahi_err; - otp = (on_trap_data_t *)errp->err_ontrap; - otp->ot_handle = (void *)(hp); - otp->ot_prot = OT_DATA_ACCESS; - errp->err_status = DDI_FM_OK; - errp->err_expected = DDI_FM_ERR_UNEXPECTED; - errp->err_cf = impl_acc_check; - } - } -} - -/* ARGSUSED */ -int -impl_dma_check(dev_info_t *dip, const void *handle, const void *pci_hdl, - const void *not_used) -{ - return (DDI_FM_UNKNOWN); -} - -void -impl_acc_hdl_init(ddi_acc_hdl_t *handlep) -{ - ddi_acc_impl_t *hp; - int fmcap; - int devacc_attr_access; - - if (!handlep) - return; - fmcap = ddi_fm_capable(handlep->ah_dip); - if (handlep->ah_acc.devacc_attr_version < DDI_DEVICE_ATTR_V1 || - !DDI_FM_ACC_ERR_CAP(fmcap)) - devacc_attr_access = DDI_DEFAULT_ACC; - else - devacc_attr_access = handlep->ah_acc.devacc_attr_access; - - hp = (ddi_acc_impl_t *)handlep->ah_platform_private; - - /* - * Can only do FLAGERR if scan callback is set up. This should - * also guarantee that the peekpoke_mutex and err_mutex are defined. - */ - if (devacc_attr_access == DDI_FLAGERR_ACC && hp->ahi_scan == NULL) - devacc_attr_access = DDI_DEFAULT_ACC; - - switch (devacc_attr_access) { - case DDI_CAUTIOUS_ACC: - hp->ahi_get8 = i_ddi_caut_get8; - hp->ahi_put8 = i_ddi_caut_put8; - hp->ahi_rep_get8 = i_ddi_caut_rep_get8; - hp->ahi_rep_put8 = i_ddi_caut_rep_put8; - hp->ahi_get16 = i_ddi_caut_get16; - hp->ahi_get32 = i_ddi_caut_get32; - hp->ahi_put16 = i_ddi_caut_put16; - hp->ahi_put32 = i_ddi_caut_put32; - hp->ahi_rep_get16 = i_ddi_caut_rep_get16; - hp->ahi_rep_get32 = i_ddi_caut_rep_get32; - hp->ahi_rep_put16 = i_ddi_caut_rep_put16; - hp->ahi_rep_put32 = i_ddi_caut_rep_put32; - hp->ahi_get64 = i_ddi_caut_get64; - hp->ahi_put64 = i_ddi_caut_put64; - hp->ahi_rep_get64 = i_ddi_caut_rep_get64; - hp->ahi_rep_put64 = i_ddi_caut_rep_put64; - break; - case DDI_FLAGERR_ACC: - if (hp->ahi_acc_attr & DDI_ACCATTR_IO_SPACE) { - hp->ahi_get8 = i_ddi_prot_io_get8; - hp->ahi_put8 = i_ddi_prot_io_put8; - hp->ahi_rep_get8 = i_ddi_prot_io_rep_get8; - hp->ahi_rep_put8 = i_ddi_prot_io_rep_put8; - - /* temporary set these 64 functions to no-ops */ - hp->ahi_get64 = i_ddi_io_get64; - hp->ahi_put64 = i_ddi_io_put64; - hp->ahi_rep_get64 = i_ddi_io_rep_get64; - hp->ahi_rep_put64 = i_ddi_io_rep_put64; - - /* - * check for BIG endian access - */ - if (handlep->ah_acc.devacc_attr_endian_flags == - DDI_STRUCTURE_BE_ACC) { - hp->ahi_get16 = i_ddi_prot_io_swap_get16; - hp->ahi_get32 = i_ddi_prot_io_swap_get32; - hp->ahi_put16 = i_ddi_prot_io_swap_put16; - hp->ahi_put32 = i_ddi_prot_io_swap_put32; - hp->ahi_rep_get16 = - i_ddi_prot_io_swap_rep_get16; - hp->ahi_rep_get32 = - i_ddi_prot_io_swap_rep_get32; - hp->ahi_rep_put16 = - i_ddi_prot_io_swap_rep_put16; - hp->ahi_rep_put32 = - i_ddi_prot_io_swap_rep_put32; - } else { - hp->ahi_acc_attr |= DDI_ACCATTR_DIRECT; - hp->ahi_get16 = i_ddi_prot_io_get16; - hp->ahi_get32 = i_ddi_prot_io_get32; - hp->ahi_put16 = i_ddi_prot_io_put16; - hp->ahi_put32 = i_ddi_prot_io_put32; - hp->ahi_rep_get16 = i_ddi_prot_io_rep_get16; - hp->ahi_rep_get32 = i_ddi_prot_io_rep_get32; - hp->ahi_rep_put16 = i_ddi_prot_io_rep_put16; - hp->ahi_rep_put32 = i_ddi_prot_io_rep_put32; - } - - } else if (hp->ahi_acc_attr & DDI_ACCATTR_CPU_VADDR) { - - hp->ahi_get8 = i_ddi_prot_vaddr_get8; - hp->ahi_put8 = i_ddi_prot_vaddr_put8; - hp->ahi_rep_get8 = i_ddi_prot_vaddr_rep_get8; - hp->ahi_rep_put8 = i_ddi_prot_vaddr_rep_put8; - - /* - * check for BIG endian access - */ - if (handlep->ah_acc.devacc_attr_endian_flags == - DDI_STRUCTURE_BE_ACC) { - - hp->ahi_get16 = i_ddi_prot_vaddr_swap_get16; - hp->ahi_get32 = i_ddi_prot_vaddr_swap_get32; - hp->ahi_get64 = i_ddi_prot_vaddr_swap_get64; - hp->ahi_put16 = i_ddi_prot_vaddr_swap_put16; - hp->ahi_put32 = i_ddi_prot_vaddr_swap_put32; - hp->ahi_put64 = i_ddi_prot_vaddr_swap_put64; - hp->ahi_rep_get16 = - i_ddi_prot_vaddr_swap_rep_get16; - hp->ahi_rep_get32 = - i_ddi_prot_vaddr_swap_rep_get32; - hp->ahi_rep_get64 = - i_ddi_prot_vaddr_swap_rep_get64; - hp->ahi_rep_put16 = - i_ddi_prot_vaddr_swap_rep_put16; - hp->ahi_rep_put32 = - i_ddi_prot_vaddr_swap_rep_put32; - hp->ahi_rep_put64 = - i_ddi_prot_vaddr_swap_rep_put64; - } else { - hp->ahi_acc_attr |= DDI_ACCATTR_DIRECT; - hp->ahi_get16 = i_ddi_prot_vaddr_get16; - hp->ahi_get32 = i_ddi_prot_vaddr_get32; - hp->ahi_get64 = i_ddi_prot_vaddr_get64; - hp->ahi_put16 = i_ddi_prot_vaddr_put16; - hp->ahi_put32 = i_ddi_prot_vaddr_put32; - hp->ahi_put64 = i_ddi_prot_vaddr_put64; - hp->ahi_rep_get16 = i_ddi_prot_vaddr_rep_get16; - hp->ahi_rep_get32 = i_ddi_prot_vaddr_rep_get32; - hp->ahi_rep_get64 = i_ddi_prot_vaddr_rep_get64; - hp->ahi_rep_put16 = i_ddi_prot_vaddr_rep_put16; - hp->ahi_rep_put32 = i_ddi_prot_vaddr_rep_put32; - hp->ahi_rep_put64 = i_ddi_prot_vaddr_rep_put64; - } - } - break; - case DDI_DEFAULT_ACC: - if (hp->ahi_acc_attr & DDI_ACCATTR_IO_SPACE) { - hp->ahi_get8 = i_ddi_io_get8; - hp->ahi_put8 = i_ddi_io_put8; - hp->ahi_rep_get8 = i_ddi_io_rep_get8; - hp->ahi_rep_put8 = i_ddi_io_rep_put8; - - /* temporary set these 64 functions to no-ops */ - hp->ahi_get64 = i_ddi_io_get64; - hp->ahi_put64 = i_ddi_io_put64; - hp->ahi_rep_get64 = i_ddi_io_rep_get64; - hp->ahi_rep_put64 = i_ddi_io_rep_put64; - - /* - * check for BIG endian access - */ - if (handlep->ah_acc.devacc_attr_endian_flags == - DDI_STRUCTURE_BE_ACC) { - hp->ahi_get16 = i_ddi_io_swap_get16; - hp->ahi_get32 = i_ddi_io_swap_get32; - hp->ahi_put16 = i_ddi_io_swap_put16; - hp->ahi_put32 = i_ddi_io_swap_put32; - hp->ahi_rep_get16 = i_ddi_io_swap_rep_get16; - hp->ahi_rep_get32 = i_ddi_io_swap_rep_get32; - hp->ahi_rep_put16 = i_ddi_io_swap_rep_put16; - hp->ahi_rep_put32 = i_ddi_io_swap_rep_put32; - } else { - hp->ahi_acc_attr |= DDI_ACCATTR_DIRECT; - hp->ahi_get16 = i_ddi_io_get16; - hp->ahi_get32 = i_ddi_io_get32; - hp->ahi_put16 = i_ddi_io_put16; - hp->ahi_put32 = i_ddi_io_put32; - hp->ahi_rep_get16 = i_ddi_io_rep_get16; - hp->ahi_rep_get32 = i_ddi_io_rep_get32; - hp->ahi_rep_put16 = i_ddi_io_rep_put16; - hp->ahi_rep_put32 = i_ddi_io_rep_put32; - } - - } else if (hp->ahi_acc_attr & DDI_ACCATTR_CPU_VADDR) { - - hp->ahi_get8 = i_ddi_vaddr_get8; - hp->ahi_put8 = i_ddi_vaddr_put8; - hp->ahi_rep_get8 = i_ddi_vaddr_rep_get8; - hp->ahi_rep_put8 = i_ddi_vaddr_rep_put8; - - /* - * check for BIG endian access - */ - if (handlep->ah_acc.devacc_attr_endian_flags == - DDI_STRUCTURE_BE_ACC) { - - hp->ahi_get16 = i_ddi_vaddr_swap_get16; - hp->ahi_get32 = i_ddi_vaddr_swap_get32; - hp->ahi_get64 = i_ddi_vaddr_swap_get64; - hp->ahi_put16 = i_ddi_vaddr_swap_put16; - hp->ahi_put32 = i_ddi_vaddr_swap_put32; - hp->ahi_put64 = i_ddi_vaddr_swap_put64; - hp->ahi_rep_get16 = i_ddi_vaddr_swap_rep_get16; - hp->ahi_rep_get32 = i_ddi_vaddr_swap_rep_get32; - hp->ahi_rep_get64 = i_ddi_vaddr_swap_rep_get64; - hp->ahi_rep_put16 = i_ddi_vaddr_swap_rep_put16; - hp->ahi_rep_put32 = i_ddi_vaddr_swap_rep_put32; - hp->ahi_rep_put64 = i_ddi_vaddr_swap_rep_put64; - } else { - hp->ahi_acc_attr |= DDI_ACCATTR_DIRECT; - hp->ahi_get16 = i_ddi_vaddr_get16; - hp->ahi_get32 = i_ddi_vaddr_get32; - hp->ahi_get64 = i_ddi_vaddr_get64; - hp->ahi_put16 = i_ddi_vaddr_put16; - hp->ahi_put32 = i_ddi_vaddr_put32; - hp->ahi_put64 = i_ddi_vaddr_put64; - hp->ahi_rep_get16 = i_ddi_vaddr_rep_get16; - hp->ahi_rep_get32 = i_ddi_vaddr_rep_get32; - hp->ahi_rep_get64 = i_ddi_vaddr_rep_get64; - hp->ahi_rep_put16 = i_ddi_vaddr_rep_put16; - hp->ahi_rep_put32 = i_ddi_vaddr_rep_put32; - hp->ahi_rep_put64 = i_ddi_vaddr_rep_put64; - } - } - break; - } - hp->ahi_fault_check = i_ddi_acc_fault_check; - hp->ahi_fault_notify = i_ddi_acc_fault_notify; - hp->ahi_fault = 0; - impl_acc_err_init(handlep); -} - -/* - * The followings are low-level routines for data access. - * - * All of these routines should be implemented in assembly. Those - * that have been rewritten be found in ~ml/ddi_i86_asm.s - */ - -/*ARGSUSED*/ -uint16_t -i_ddi_vaddr_swap_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - return (ddi_swap16(*addr)); -} - -/*ARGSUSED*/ -uint16_t -i_ddi_io_swap_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - return (ddi_swap16(inw((uintptr_t)addr))); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_vaddr_swap_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - return (ddi_swap32(*addr)); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_io_swap_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - return (ddi_swap32(inl((uintptr_t)addr))); -} - -/*ARGSUSED*/ -uint64_t -i_ddi_vaddr_swap_get64(ddi_acc_impl_t *hdlp, uint64_t *addr) -{ - return (ddi_swap64(*addr)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, uint16_t value) -{ - *addr = ddi_swap16(value); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, uint16_t value) -{ - outw((uintptr_t)addr, ddi_swap16(value)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, uint32_t value) -{ - *addr = ddi_swap32(value); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, uint32_t value) -{ - outl((uintptr_t)addr, ddi_swap32(value)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_put64(ddi_acc_impl_t *hdlp, uint64_t *addr, uint64_t value) -{ - *addr = ddi_swap64(value); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_get8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - uint8_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = *d++; - else - for (; repcount; repcount--) - *h++ = *d; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = *d++; - else - for (; repcount; repcount--) - *h++ = *d; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = ddi_swap16(*d++); - else - for (; repcount; repcount--) - *h++ = ddi_swap16(*d); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 2) - *h++ = ddi_swap16(inw(port)); - else - for (; repcount; repcount--) - *h++ = ddi_swap16(inw(port)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = *d++; - else - for (; repcount; repcount--) - *h++ = *d; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = ddi_swap32(*d++); - else - for (; repcount; repcount--) - *h++ = ddi_swap32(*d); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 4) - *h++ = ddi_swap32(inl(port)); - else - for (; repcount; repcount--) - *h++ = ddi_swap32(inl(port)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_get64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = *d++; - else - for (; repcount; repcount--) - *h++ = *d; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_get64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *h++ = ddi_swap64(*d++); - else - for (; repcount; repcount--) - *h++ = ddi_swap64(*d); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_put8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - uint8_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap16(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap16(*h++); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 2) - outw(port, ddi_swap16(*h++)); - else - for (; repcount; repcount--) - outw(port, ddi_swap16(*h++)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap32(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap32(*h++); -} - -/*ARGSUSED*/ -void -i_ddi_io_swap_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 4) - outl(port, ddi_swap32(*h++)); - else - for (; repcount; repcount--) - outl(port, ddi_swap32(*h++)); -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_rep_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; -} - -/*ARGSUSED*/ -void -i_ddi_vaddr_swap_rep_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap64(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap64(*h++); -} - -/*ARGSUSED*/ -uint64_t -i_ddi_io_get64(ddi_acc_impl_t *hdlp, uint64_t *addr) -{ - panic("ddi_get64 from i/o space"); - /*NOTREACHED*/ - return (0); -} - -/*ARGSUSED*/ -void -i_ddi_io_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, uint64_t value) -{ - panic("ddi_put64 to i/o space"); - /*NOTREACHED*/ -} - -void -do_scan(ddi_acc_impl_t *hdlp) -{ - ddi_fm_error_t de; - ndi_err_t *errp = (ndi_err_t *)hdlp->ahi_err; - - bzero(&de, sizeof (ddi_fm_error_t)); - de.fme_version = DDI_FME_VERSION; - de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); - de.fme_flag = DDI_FM_ERR_UNEXPECTED; - - mutex_enter(hdlp->ahi_err_mutexp); - hdlp->ahi_scan(hdlp->ahi_scan_dip, &de); - if (de.fme_status != DDI_FM_OK) { - errp->err_ena = de.fme_ena; - errp->err_expected = de.fme_flag; - errp->err_status = DDI_FM_NONFATAL; - } - mutex_exit(hdlp->ahi_err_mutexp); -} - -/*ARGSUSED*/ -uint8_t -i_ddi_prot_vaddr_get8(ddi_acc_impl_t *hdlp, uint8_t *addr) -{ - uint8_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = *addr; - if (val == 0xff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint16_t -i_ddi_prot_vaddr_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - uint16_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = *addr; - if (val == 0xffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_prot_vaddr_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - uint32_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = *addr; - if (val == 0xffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint64_t -i_ddi_prot_vaddr_get64(ddi_acc_impl_t *hdlp, uint64_t *addr) -{ - uint64_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = *addr; - if (val == 0xffffffffffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint8_t -i_ddi_prot_io_get8(ddi_acc_impl_t *hdlp, uint8_t *addr) -{ - uint8_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = inb((uintptr_t)addr); - if (val == 0xff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint16_t -i_ddi_prot_io_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - uint16_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = inw((uintptr_t)addr); - if (val == 0xffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_prot_io_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - uint32_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = inl((uintptr_t)addr); - if (val == 0xffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint16_t -i_ddi_prot_vaddr_swap_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - uint16_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = ddi_swap16(*addr); - if (val == 0xffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint16_t -i_ddi_prot_io_swap_get16(ddi_acc_impl_t *hdlp, uint16_t *addr) -{ - uint16_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = ddi_swap16(inw((uintptr_t)addr)); - if (val == 0xffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_prot_vaddr_swap_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - uint32_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = ddi_swap32(*addr); - if (val == 0xffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint32_t -i_ddi_prot_io_swap_get32(ddi_acc_impl_t *hdlp, uint32_t *addr) -{ - uint32_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = ddi_swap32(inl((uintptr_t)addr)); - if (val == 0xffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -uint64_t -i_ddi_prot_vaddr_swap_get64(ddi_acc_impl_t *hdlp, uint64_t *addr) -{ - uint64_t val; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - val = ddi_swap64(*addr); - if (val == 0xffffffffffffffff) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); - - return (val); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_put8(ddi_acc_impl_t *hdlp, uint8_t *addr, uint8_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = value; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_put8(ddi_acc_impl_t *hdlp, uint8_t *addr, uint8_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - outb((uintptr_t)addr, value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, uint16_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = value; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, uint16_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - outw((uintptr_t)addr, value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, - uint32_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = value; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, uint32_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - outl((uintptr_t)addr, value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_put64(ddi_acc_impl_t *hdlp, uint64_t *addr, - uint64_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = value; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, - uint16_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = ddi_swap16(value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_put16(ddi_acc_impl_t *hdlp, uint16_t *addr, uint16_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - outw((uintptr_t)addr, ddi_swap16(value)); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, - uint32_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = ddi_swap32(value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_put32(ddi_acc_impl_t *hdlp, uint32_t *addr, uint32_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - outl((uintptr_t)addr, ddi_swap32(value)); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_put64(ddi_acc_impl_t *hdlp, uint64_t *addr, - uint64_t value) -{ - mutex_enter(hdlp->ahi_peekpoke_mutexp); - *addr = ddi_swap64(value); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_get8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint8_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--, port++) - if ((*h++ = inb(port)) == 0xff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = inb(port)) == 0xff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--, port += 2) - if ((*h++ = inw(port)) == 0xffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = inw(port)) == 0xffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--, port += 4) - if ((*h++ = inl(port)) == 0xffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = inl(port)) == 0xffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_get8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint8_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = *d++) == 0xff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = *d) == 0xff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = *d++) == 0xffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = *d) == 0xffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = ddi_swap16(*d++)) == 0xffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = ddi_swap16(*d)) == 0xffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_rep_get16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--, port += 2) - if ((*h++ = ddi_swap16(inw(port))) == 0xffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = ddi_swap16(inw(port))) == 0xffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = *d++) == 0xffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = *d) == 0xffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = ddi_swap32(*d++)) == 0xffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = ddi_swap32(*d)) == 0xffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_rep_get32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--, port += 4) - if ((*h++ = ddi_swap32(inl(port))) == 0xffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = ddi_swap32(inl(port))) == 0xffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_get64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = *d++) == 0xffffffffffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = *d) == 0xffffffffffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_get64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - int fail = 0; - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) { - for (; repcount; repcount--) - if ((*h++ = ddi_swap64(*d++)) == 0xffffffffffffffff) - fail = 1; - } else { - for (; repcount; repcount--) - if ((*h++ = ddi_swap64(*d)) == 0xffffffffffffffff) - fail = 1; - } - if (fail == 1) - do_scan(hdlp); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_put8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - uint8_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_put8(ddi_acc_impl_t *hdlp, uint8_t *host_addr, - uint8_t *dev_addr, size_t repcount, uint_t flags) -{ - uint8_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port++) - outb(port, *h++); - else - for (; repcount; repcount--) - outb(port, *h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 2) - outw(port, *h++); - else - for (; repcount; repcount--) - outw(port, *h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap16(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap16(*h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_rep_put16(ddi_acc_impl_t *hdlp, uint16_t *host_addr, - uint16_t *dev_addr, size_t repcount, uint_t flags) -{ - uint16_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 2) - outw(port, ddi_swap16(*h++)); - else - for (; repcount; repcount--) - outw(port, ddi_swap16(*h++)); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 4) - outl(port, *h++); - else - for (; repcount; repcount--) - outl(port, *h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap32(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap32(*h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_io_swap_rep_put32(ddi_acc_impl_t *hdlp, uint32_t *host_addr, - uint32_t *dev_addr, size_t repcount, uint_t flags) -{ - uint32_t *h; - uintptr_t port; - - h = host_addr; - port = (uintptr_t)dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--, port += 4) - outl(port, ddi_swap32(*h++)); - else - for (; repcount; repcount--) - outl(port, ddi_swap32(*h++)); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_rep_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = *h++; - else - for (; repcount; repcount--) - *d = *h++; - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -/*ARGSUSED*/ -void -i_ddi_prot_vaddr_swap_rep_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - uint64_t *h, *d; - - h = host_addr; - d = dev_addr; - - mutex_enter(hdlp->ahi_peekpoke_mutexp); - if (flags == DDI_DEV_AUTOINCR) - for (; repcount; repcount--) - *d++ = ddi_swap64(*h++); - else - for (; repcount; repcount--) - *d = ddi_swap64(*h++); - mutex_exit(hdlp->ahi_peekpoke_mutexp); -} - -void -ddi_io_rep_get8(ddi_acc_handle_t handle, - uint8_t *host_addr, uint8_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_get8) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -void -ddi_io_rep_get16(ddi_acc_handle_t handle, - uint16_t *host_addr, uint16_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_get16) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -void -ddi_io_rep_get32(ddi_acc_handle_t handle, - uint32_t *host_addr, uint32_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_get32) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -/*ARGSUSED*/ -void -i_ddi_io_rep_get64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - cmn_err(CE_PANIC, "ddi_rep_get64 from i/o space"); -} - -void -ddi_io_rep_put8(ddi_acc_handle_t handle, - uint8_t *host_addr, uint8_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_put8) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -void -ddi_io_rep_put16(ddi_acc_handle_t handle, - uint16_t *host_addr, uint16_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_put16) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -void -ddi_io_rep_put32(ddi_acc_handle_t handle, - uint32_t *host_addr, uint32_t *dev_addr, size_t repcount) -{ - (((ddi_acc_impl_t *)handle)->ahi_rep_put32) - ((ddi_acc_impl_t *)handle, host_addr, dev_addr, - repcount, DDI_DEV_NO_AUTOINCR); -} - -/*ARGSUSED*/ -void -i_ddi_io_rep_put64(ddi_acc_impl_t *hdlp, uint64_t *host_addr, - uint64_t *dev_addr, size_t repcount, uint_t flags) -{ - cmn_err(CE_PANIC, "ddi_rep_put64 to i/o space"); -} - -/* - * These next two functions could be translated into assembler someday - */ -int -ddi_check_acc_handle(ddi_acc_handle_t handle) -{ - ddi_acc_impl_t *hdlp = (ddi_acc_impl_t *)handle; - return (((*hdlp->ahi_fault_check)(hdlp) == DDI_SUCCESS) ? DDI_SUCCESS : - DDI_FAILURE); -} - -int -i_ddi_acc_fault_check(ddi_acc_impl_t *hdlp) -{ - /* Default version, just returns flag value */ - return (hdlp->ahi_fault); -} - -/*ARGSUSED*/ -void -i_ddi_acc_fault_notify(ddi_acc_impl_t *hdlp) -{ - /* Default version, does nothing for now */ -} - -void -i_ddi_acc_set_fault(ddi_acc_handle_t handle) -{ - ddi_acc_impl_t *hdlp = (ddi_acc_impl_t *)handle; - - if (!hdlp->ahi_fault) { - hdlp->ahi_fault = 1; - (*hdlp->ahi_fault_notify)(hdlp); - } -} - -void -i_ddi_acc_clr_fault(ddi_acc_handle_t handle) -{ - ddi_acc_impl_t *hdlp = (ddi_acc_impl_t *)handle; - - if (hdlp->ahi_fault) { - hdlp->ahi_fault = 0; - (*hdlp->ahi_fault_notify)(hdlp); - } -} diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c deleted file mode 100644 index 35345c3fe8..0000000000 --- a/usr/src/uts/intel/ia32/os/desctbls.c +++ /dev/null @@ -1,1218 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * Copyright 2018 Joyent, Inc. All rights reserved. - */ - -/* - * Copyright (c) 1992 Terrence R. Lambert. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - */ - -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/tss.h> -#include <sys/segments.h> -#include <sys/trap.h> -#include <sys/cpuvar.h> -#include <sys/bootconf.h> -#include <sys/x86_archext.h> -#include <sys/controlregs.h> -#include <sys/archsystm.h> -#include <sys/machsystm.h> -#include <sys/kobj.h> -#include <sys/cmn_err.h> -#include <sys/reboot.h> -#include <sys/kdi.h> -#include <sys/mach_mmu.h> -#include <sys/systm.h> -#include <sys/note.h> - -#ifdef __xpv -#include <sys/hypervisor.h> -#include <vm/as.h> -#endif - -#include <sys/promif.h> -#include <sys/bootinfo.h> -#include <vm/kboot_mmu.h> -#include <vm/hat_pte.h> - -/* - * cpu0 and default tables and structures. - */ -user_desc_t *gdt0; -#if !defined(__xpv) -desctbr_t gdt0_default_r; -#endif - -gate_desc_t *idt0; /* interrupt descriptor table */ - -tss_t *ktss0; /* kernel task state structure */ - - -user_desc_t zero_udesc; /* base zero user desc native procs */ -user_desc_t null_udesc; /* null user descriptor */ -system_desc_t null_sdesc; /* null system descriptor */ - -user_desc_t zero_u32desc; /* 32-bit compatibility procs */ - -user_desc_t ucs_on; -user_desc_t ucs_off; -user_desc_t ucs32_on; -user_desc_t ucs32_off; - -/* - * If the size of this is changed, you must update hat_pcp_setup() and the - * definitions in exception.s - */ -extern char dblfault_stack0[DEFAULTSTKSZ]; -extern char nmi_stack0[DEFAULTSTKSZ]; -extern char mce_stack0[DEFAULTSTKSZ]; - -extern void fast_null(void); -extern hrtime_t get_hrtime(void); -extern hrtime_t gethrvtime(void); -extern hrtime_t get_hrestime(void); -extern uint64_t getlgrp(void); - -void (*(fasttable[]))(void) = { - fast_null, /* T_FNULL routine */ - fast_null, /* T_FGETFP routine (initially null) */ - fast_null, /* T_FSETFP routine (initially null) */ - (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */ - (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */ - (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */ - (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */ -}; - -/* - * Structure containing pre-computed descriptors to allow us to temporarily - * interpose on a standard handler. - */ -struct interposing_handler { - int ih_inum; - gate_desc_t ih_interp_desc; - gate_desc_t ih_default_desc; -}; - -/* - * The brand infrastructure interposes on two handlers, and we use one as a - * NULL signpost. - */ -static struct interposing_handler brand_tbl[2]; - -/* - * software prototypes for default local descriptor table - */ - -/* - * Routines for loading segment descriptors in format the hardware - * can understand. - */ - -/* - * In long mode we have the new L or long mode attribute bit - * for code segments. Only the conforming bit in type is used along - * with descriptor priority and present bits. Default operand size must - * be zero when in long mode. In 32-bit compatibility mode all fields - * are treated as in legacy mode. For data segments while in long mode - * only the present bit is loaded. - */ -void -set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, - uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) -{ - ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); - /* This should never be a "system" segment. */ - ASSERT3U(type & SDT_S, !=, 0); - - /* - * 64-bit long mode. - */ - if (lmode == SDP_LONG) - dp->usd_def32 = 0; /* 32-bit operands only */ - else - /* - * 32-bit compatibility mode. - */ - dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ - - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the GDT whenever we change segment registers around. - * With KPTI on, the GDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - ASSERT3U(type & SDT_A, !=, 0); - - dp->usd_long = lmode; /* 64-bit mode */ - dp->usd_type = type; - dp->usd_dpl = dpl; - dp->usd_p = 1; - dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ - - dp->usd_lobase = (uintptr_t)base; - dp->usd_midbase = (uintptr_t)base >> 16; - dp->usd_hibase = (uintptr_t)base >> (16 + 8); - dp->usd_lolimit = size; - dp->usd_hilimit = (uintptr_t)size >> 16; -} - -/* - * Install system segment descriptor for LDT and TSS segments. - */ - -void -set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, - uint_t dpl) -{ - dp->ssd_lolimit = size; - dp->ssd_hilimit = (uintptr_t)size >> 16; - - dp->ssd_lobase = (uintptr_t)base; - dp->ssd_midbase = (uintptr_t)base >> 16; - dp->ssd_hibase = (uintptr_t)base >> (16 + 8); - dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); - - dp->ssd_type = type; - dp->ssd_zero1 = 0; /* must be zero */ - dp->ssd_zero2 = 0; - dp->ssd_dpl = dpl; - dp->ssd_p = 1; - dp->ssd_gran = 0; /* force byte units */ -} - -void * -get_ssd_base(system_desc_t *dp) -{ - uintptr_t base; - - base = (uintptr_t)dp->ssd_lobase | - (uintptr_t)dp->ssd_midbase << 16 | - (uintptr_t)dp->ssd_hibase << (16 + 8) | - (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); - return ((void *)base); -} - -/* - * Install gate segment descriptor for interrupt, trap, call and task gates. - * - * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on - * all interrupts. We have different ISTs for each class of exceptions that are - * most likely to occur while handling an existing exception; while many of - * these are just going to panic, it's nice not to trample on the existing - * exception state for debugging purposes. - * - * Normal interrupts are all redirected unconditionally to the KPTI trampoline - * stack space. This unifies the trampoline handling between user and kernel - * space (and avoids the need to touch %gs). - * - * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when - * we do a read from KMDB that cause another #PF. Without its own IST, this - * would stomp on the kernel's mcpu_kpti_flt frame. - */ -uint_t -idt_vector_to_ist(uint_t vector) -{ -#if defined(__xpv) - _NOTE(ARGUNUSED(vector)); - return (IST_NONE); -#else - switch (vector) { - /* These should always use IST even without KPTI enabled. */ - case T_DBLFLT: - return (IST_DF); - case T_NMIFLT: - return (IST_NMI); - case T_MCE: - return (IST_MCE); - - case T_BPTFLT: - case T_SGLSTP: - if (kpti_enable == 1) { - return (IST_DBG); - } - return (IST_NONE); - case T_STKFLT: - case T_GPFLT: - case T_PGFLT: - if (kpti_enable == 1) { - return (IST_NESTABLE); - } - return (IST_NONE); - default: - if (kpti_enable == 1) { - return (IST_DEFAULT); - } - return (IST_NONE); - } -#endif -} - -void -set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, - uint_t type, uint_t dpl, uint_t ist) -{ - dp->sgd_looffset = (uintptr_t)func; - dp->sgd_hioffset = (uintptr_t)func >> 16; - dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); - dp->sgd_selector = (uint16_t)sel; - dp->sgd_ist = ist; - dp->sgd_type = type; - dp->sgd_dpl = dpl; - dp->sgd_p = 1; -} - -/* - * Updates a single user descriptor in the the GDT of the current cpu. - * Caller is responsible for preventing cpu migration. - */ - -void -gdt_update_usegd(uint_t sidx, user_desc_t *udp) -{ -#if defined(DEBUG) - /* This should never be a "system" segment, but it might be null. */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_S, !=, 0); - } - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the GDT whenever we change segment registers around. - * With KPTI on, the GDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_A, !=, 0); - } -#endif - -#if defined(__xpv) - uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; - - if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) - panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); - -#else /* __xpv */ - CPU->cpu_gdt[sidx] = *udp; -#endif /* __xpv */ -} - -/* - * Writes single descriptor pointed to by udp into a processes - * LDT entry pointed to by ldp. - */ -int -ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) -{ -#if defined(DEBUG) - /* This should never be a "system" segment, but it might be null. */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_S, !=, 0); - } - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the LDT whenever we change segment registers around. - * With KPTI on, the LDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_A, !=, 0); - } -#endif - -#if defined(__xpv) - uint64_t dpa; - - dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | - ((uintptr_t)ldp & PAGEOFFSET); - - /* - * The hypervisor is a little more restrictive about what it - * supports in the LDT. - */ - if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) - return (EINVAL); - -#else /* __xpv */ - *ldp = *udp; - -#endif /* __xpv */ - return (0); -} - -#if defined(__xpv) - -/* - * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. - * Returns true if a valid entry was written. - */ -int -xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) -{ - trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ - - /* - * skip holes in the IDT - */ - if (GATESEG_GETOFFSET(sgd) == 0) - return (0); - - ASSERT(sgd->sgd_type == SDT_SYSIGT); - ti->vector = vec; - TI_SET_DPL(ti, sgd->sgd_dpl); - - /* - * Is this an interrupt gate? - */ - if (sgd->sgd_type == SDT_SYSIGT) { - /* LINTED */ - TI_SET_IF(ti, 1); - } - ti->cs = sgd->sgd_selector; - ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ - ti->address = GATESEG_GETOFFSET(sgd); - return (1); -} - -/* - * Convert a single hw format gate descriptor and write it into our virtual IDT. - */ -void -xen_idt_write(gate_desc_t *sgd, uint_t vec) -{ - trap_info_t trapinfo[2]; - - bzero(trapinfo, sizeof (trapinfo)); - if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) - return; - if (xen_set_trap_table(trapinfo) != 0) - panic("xen_idt_write: xen_set_trap_table() failed"); -} - -#endif /* __xpv */ - - -/* - * Build kernel GDT. - */ - -static void -init_gdt_common(user_desc_t *gdt) -{ - int i; - - /* - * 64-bit kernel code segment. - */ - set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, - SDP_PAGES, SDP_OP32); - - /* - * 64-bit kernel data segment. The limit attribute is ignored in 64-bit - * mode, but we set it here to 0xFFFF so that we can use the SYSRET - * instruction to return from system calls back to 32-bit applications. - * SYSRET doesn't update the base, limit, or attributes of %ss or %ds - * descriptors. We therefore must ensure that the kernel uses something, - * though it will be ignored by hardware, that is compatible with 32-bit - * apps. For the same reason we must set the default op size of this - * descriptor to 32-bit operands. - */ - set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, - SEL_KPL, SDP_PAGES, SDP_OP32); - gdt[GDT_KDATA].usd_def32 = 1; - - /* - * 64-bit user code segment. - */ - set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, - SDP_PAGES, SDP_OP32); - - /* - * 32-bit user code segment. - */ - set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * See gdt_ucode32() and gdt_ucode_native(). - */ - ucs_on = ucs_off = gdt[GDT_UCODE]; - ucs_off.usd_p = 0; /* forces #np fault */ - - ucs32_on = ucs32_off = gdt[GDT_U32CODE]; - ucs32_off.usd_p = 0; /* forces #np fault */ - - /* - * 32 and 64 bit data segments can actually share the same descriptor. - * In long mode only the present bit is checked but all other fields - * are loaded. But in compatibility mode all fields are interpreted - * as in legacy mode so they must be set correctly for a 32-bit data - * segment. - */ - set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, - SDP_PAGES, SDP_OP32); - -#if !defined(__xpv) - - /* - * The 64-bit kernel has no default LDT. By default, the LDT descriptor - * in the GDT is 0. - */ - - /* - * Kernel TSS - */ - set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, - sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); - -#endif /* !__xpv */ - - /* - * Initialize fs and gs descriptors for 32 bit processes. - * Only attributes and limits are initialized, the effective - * base address is programmed via fsbase/gsbase. - */ - set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * Initialize the descriptors set aside for brand usage. - * Only attributes and limits are initialized. - */ - for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) - set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * Initialize convenient zero base user descriptors for clearing - * lwp private %fs and %gs descriptors in GDT. See setregs() for - * an example. - */ - set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, - SDP_BYTES, SDP_OP32); - set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, - SDP_PAGES, SDP_OP32); -} - -#if defined(__xpv) - -static user_desc_t * -init_gdt(void) -{ - uint64_t gdtpa; - ulong_t ma[1]; /* XXPV should be a memory_t */ - ulong_t addr; - -#if !defined(__lint) - /* - * Our gdt is never larger than a single page. - */ - ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); -#endif - gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, - PAGESIZE, PAGESIZE); - bzero(gdt0, PAGESIZE); - - init_gdt_common(gdt0); - - /* - * XXX Since we never invoke kmdb until after the kernel takes - * over the descriptor tables why not have it use the kernel's - * selectors? - */ - if (boothowto & RB_DEBUG) { - set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, - SEL_KPL, SDP_PAGES, SDP_OP32); - set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, - SEL_KPL, SDP_PAGES, SDP_OP32); - } - - /* - * Clear write permission for page containing the gdt and install it. - */ - gdtpa = pfn_to_pa(va_to_pfn(gdt0)); - ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); - kbm_read_only((uintptr_t)gdt0, gdtpa); - xen_set_gdt(ma, NGDT); - - /* - * Reload the segment registers to use the new GDT. - * On 64-bit, fixup KCS_SEL to be in ring 3. - * See KCS_SEL in segments.h. - */ - load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); - - /* - * setup %gs for kernel - */ - xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); - - /* - * XX64 We should never dereference off "other gsbase" or - * "fsbase". So, we should arrange to point FSBASE and - * KGSBASE somewhere truly awful e.g. point it at the last - * valid address below the hole so that any attempts to index - * off them cause an exception. - * - * For now, point it at 8G -- at least it should be unmapped - * until some 64-bit processes run. - */ - addr = 0x200000000ul; - xen_set_segment_base(SEGBASE_FS, addr); - xen_set_segment_base(SEGBASE_GS_USER, addr); - xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); - - return (gdt0); -} - -#else /* __xpv */ - -static user_desc_t * -init_gdt(void) -{ - desctbr_t r_bgdt, r_gdt; - user_desc_t *bgdt; - -#if !defined(__lint) - /* - * Our gdt is never larger than a single page. - */ - ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); -#endif - gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, - PAGESIZE, PAGESIZE); - bzero(gdt0, PAGESIZE); - - init_gdt_common(gdt0); - - /* - * Copy in from boot's gdt to our gdt. - * Entry 0 is the null descriptor by definition. - */ - rd_gdtr(&r_bgdt); - bgdt = (user_desc_t *)r_bgdt.dtr_base; - if (bgdt == NULL) - panic("null boot gdt"); - - gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; - gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; - gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; - gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; - gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; - - /* - * Install our new GDT - */ - r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; - r_gdt.dtr_base = (uintptr_t)gdt0; - wr_gdtr(&r_gdt); - - /* - * Reload the segment registers to use the new GDT - */ - load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); - - /* - * setup %gs for kernel - */ - wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); - - /* - * XX64 We should never dereference off "other gsbase" or - * "fsbase". So, we should arrange to point FSBASE and - * KGSBASE somewhere truly awful e.g. point it at the last - * valid address below the hole so that any attempts to index - * off them cause an exception. - * - * For now, point it at 8G -- at least it should be unmapped - * until some 64-bit processes run. - */ - wrmsr(MSR_AMD_FSBASE, 0x200000000ul); - wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); - return (gdt0); -} - -#endif /* __xpv */ - - -/* - * Build kernel IDT. - * - * Note that for amd64 we pretty much require every gate to be an interrupt - * gate which blocks interrupts atomically on entry; that's because of our - * dependency on using 'swapgs' every time we come into the kernel to find - * the cpu structure. If we get interrupted just before doing that, %cs could - * be in kernel mode (so that the trap prolog doesn't do a swapgs), but - * %gsbase is really still pointing at something in userland. Bad things will - * ensue. We also use interrupt gates for i386 as well even though this is not - * required for some traps. - * - * Perhaps they should have invented a trap gate that does an atomic swapgs? - */ -static void -init_idt_common(gate_desc_t *idt) -{ - set_gatesegd(&idt[T_ZERODIV], - (kpti_enable == 1) ? &tr_div0trap : &div0trap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV)); - set_gatesegd(&idt[T_SGLSTP], - (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP)); - set_gatesegd(&idt[T_NMIFLT], - (kpti_enable == 1) ? &tr_nmiint : &nmiint, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT)); - set_gatesegd(&idt[T_BPTFLT], - (kpti_enable == 1) ? &tr_brktrap : &brktrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT)); - set_gatesegd(&idt[T_OVFLW], - (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW)); - set_gatesegd(&idt[T_BOUNDFLT], - (kpti_enable == 1) ? &tr_boundstrap : &boundstrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT)); - set_gatesegd(&idt[T_ILLINST], - (kpti_enable == 1) ? &tr_invoptrap : &invoptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST)); - set_gatesegd(&idt[T_NOEXTFLT], - (kpti_enable == 1) ? &tr_ndptrap : &ndptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT)); - - /* - * double fault handler. - * - * Note that on the hypervisor a guest does not receive #df faults. - * Instead a failsafe event is injected into the guest if its selectors - * and/or stack is in a broken state. See xen_failsafe_callback. - */ -#if !defined(__xpv) - set_gatesegd(&idt[T_DBLFLT], - (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT)); -#endif /* !__xpv */ - - /* - * T_EXTOVRFLT coprocessor-segment-overrun not supported. - */ - set_gatesegd(&idt[T_TSSFLT], - (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT)); - set_gatesegd(&idt[T_SEGFLT], - (kpti_enable == 1) ? &tr_segnptrap : &segnptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT)); - set_gatesegd(&idt[T_STKFLT], - (kpti_enable == 1) ? &tr_stktrap : &stktrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT)); - set_gatesegd(&idt[T_GPFLT], - (kpti_enable == 1) ? &tr_gptrap : &gptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT)); - set_gatesegd(&idt[T_PGFLT], - (kpti_enable == 1) ? &tr_pftrap : &pftrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT)); - set_gatesegd(&idt[T_EXTERRFLT], - (kpti_enable == 1) ? &tr_ndperr : &ndperr, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT)); - set_gatesegd(&idt[T_ALIGNMENT], - (kpti_enable == 1) ? &tr_achktrap : &achktrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT)); - set_gatesegd(&idt[T_MCE], - (kpti_enable == 1) ? &tr_mcetrap : &mcetrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE)); - set_gatesegd(&idt[T_SIMDFPE], - (kpti_enable == 1) ? &tr_xmtrap : &xmtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE)); - - /* - * install fast trap handler at 210. - */ - set_gatesegd(&idt[T_FASTTRAP], - (kpti_enable == 1) ? &tr_fasttrap : &fasttrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP)); - - /* - * System call handler. - */ - set_gatesegd(&idt[T_SYSCALLINT], - (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT)); - - /* - * Install the DTrace interrupt handler for the pid provider. - */ - set_gatesegd(&idt[T_DTRACE_RET], - (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET)); - - /* - * Prepare interposing descriptor for the syscall handler - * and cache copy of the default descriptor. - */ - brand_tbl[0].ih_inum = T_SYSCALLINT; - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; - - set_gatesegd(&(brand_tbl[0].ih_interp_desc), - (kpti_enable == 1) ? &tr_brand_sys_syscall_int : - &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, - idt_vector_to_ist(T_SYSCALLINT)); - - brand_tbl[1].ih_inum = 0; -} - -#if defined(__xpv) - -static void -init_idt(gate_desc_t *idt) -{ - init_idt_common(idt); -} - -#else /* __xpv */ - -static void -init_idt(gate_desc_t *idt) -{ - char ivctname[80]; - void (*ivctptr)(void); - int i; - - /* - * Initialize entire table with 'reserved' trap and then overwrite - * specific entries. T_EXTOVRFLT (9) is unsupported and reserved - * since it can only be generated on a 386 processor. 15 is also - * unsupported and reserved. - */ -#if !defined(__xpv) - for (i = 0; i < NIDT; i++) { - set_gatesegd(&idt[i], - (kpti_enable == 1) ? &tr_resvtrap : &resvtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(T_RESVTRAP)); - } -#else - for (i = 0; i < NIDT; i++) { - set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, - IST_NONE); - } -#endif - - /* - * 20-31 reserved - */ -#if !defined(__xpv) - for (i = 20; i < 32; i++) { - set_gatesegd(&idt[i], - (kpti_enable == 1) ? &tr_invaltrap : &invaltrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(T_INVALTRAP)); - } -#else - for (i = 20; i < 32; i++) { - set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, - IST_NONE); - } -#endif - - /* - * interrupts 32 - 255 - */ - for (i = 32; i < 256; i++) { -#if !defined(__xpv) - (void) snprintf(ivctname, sizeof (ivctname), - (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i); -#else - (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); -#endif - ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); - if (ivctptr == NULL) - panic("kobj_getsymvalue(%s) failed", ivctname); - - set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(i)); - } - - /* - * Now install the common ones. Note that it will overlay some - * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. - */ - init_idt_common(idt); -} - -#endif /* __xpv */ - -/* - * The kernel does not deal with LDTs unless a user explicitly creates - * one. Under normal circumstances, the LDTR contains 0. Any process attempting - * to reference the LDT will therefore cause a #gp. System calls made via the - * obsolete lcall mechanism are emulated by the #gp fault handler. - */ -static void -init_ldt(void) -{ -#if defined(__xpv) - xen_set_ldt(NULL, 0); -#else - wr_ldtr(0); -#endif -} - -#if !defined(__xpv) - -static void -init_tss(void) -{ - extern struct cpu cpus[]; - - /* - * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each - * context switch but it'll be overwritten with this same value anyway. - */ - if (kpti_enable == 1) { - ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; - } - - /* Set up the IST stacks for double fault, NMI, MCE. */ - ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)]; - ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)]; - ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)]; - - /* - * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is - * enabled), and also for KDI (always). - */ - ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp; - - if (kpti_enable == 1) { - /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */ - ktss0->tss_ist5 = - (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp; - - /* This IST stack is used for all other intrs (for KPTI). */ - ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; - } - - /* - * Set I/O bit map offset equal to size of TSS segment limit - * for no I/O permission map. This will force all user I/O - * instructions to generate #gp fault. - */ - ktss0->tss_bitmapbase = sizeof (*ktss0); - - /* - * Point %tr to descriptor for ktss0 in gdt. - */ - wr_tsr(KTSS_SEL); -} - -#endif /* !__xpv */ - -#if defined(__xpv) - -void -init_desctbls(void) -{ - uint_t vec; - user_desc_t *gdt; - - /* - * Setup and install our GDT. - */ - gdt = init_gdt(); - - /* - * Store static pa of gdt to speed up pa_to_ma() translations - * on lwp context switches. - */ - ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); - CPU->cpu_gdt = gdt; - CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); - - /* - * Setup and install our IDT. - */ -#if !defined(__lint) - ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); -#endif - idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, - PAGESIZE, PAGESIZE); - bzero(idt0, PAGESIZE); - init_idt(idt0); - for (vec = 0; vec < NIDT; vec++) - xen_idt_write(&idt0[vec], vec); - - CPU->cpu_idt = idt0; - - /* - * set default kernel stack - */ - xen_stack_switch(KDS_SEL, - (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); - - xen_init_callbacks(); - - init_ldt(); -} - -#else /* __xpv */ - -void -init_desctbls(void) -{ - user_desc_t *gdt; - desctbr_t idtr; - - /* - * Allocate IDT and TSS structures on unique pages for better - * performance in virtual machines. - */ -#if !defined(__lint) - ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); -#endif - idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, - PAGESIZE, PAGESIZE); - bzero(idt0, PAGESIZE); -#if !defined(__lint) - ASSERT(sizeof (*ktss0) <= PAGESIZE); -#endif - ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, - PAGESIZE, PAGESIZE); - bzero(ktss0, PAGESIZE); - - - /* - * Setup and install our GDT. - */ - gdt = init_gdt(); - ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); - CPU->cpu_gdt = gdt; - - /* - * Initialize this CPU's LDT. - */ - CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA, - LDT_CPU_SIZE, PAGESIZE); - bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE); - CPU->cpu_m.mcpu_ldt_len = 0; - - /* - * Setup and install our IDT. - */ - init_idt(idt0); - - idtr.dtr_base = (uintptr_t)idt0; - idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; - wr_idtr(&idtr); - CPU->cpu_idt = idt0; - - - init_tss(); - CPU->cpu_tss = ktss0; - init_ldt(); - - /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */ - kpti_safe_cr3 = (uint64_t)getcr3(); -} - -#endif /* __xpv */ - -#ifndef __xpv -/* - * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so - * we have to manually fix it up ourselves. - * - * The caller may still need to make sure that it can't go off-CPU with the - * incorrect limit, before calling this (such as disabling pre-emption). - */ -void -reset_gdtr_limit(void) -{ - ulong_t flags = intr_clear(); - desctbr_t gdtr; - - rd_gdtr(&gdtr); - gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1; - wr_gdtr(&gdtr); - - intr_restore(flags); -} -#endif /* __xpv */ - -/* - * In the early kernel, we need to set up a simple GDT to run on. - * - * XXPV Can dboot use this too? See dboot_gdt.s - */ -void -init_boot_gdt(user_desc_t *bgdt) -{ - set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, - SDP_PAGES, SDP_OP32); - set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, - SDP_PAGES, SDP_OP32); -} - -/* - * Enable interpositioning on the system call path by rewriting the - * sys{call|enter} MSRs and the syscall-related entries in the IDT to use - * the branded entry points. - */ -void -brand_interpositioning_enable(void) -{ - gate_desc_t *idt = CPU->cpu_idt; - int i; - - ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); - - for (i = 0; brand_tbl[i].ih_inum; i++) { - idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; -#if defined(__xpv) - xen_idt_write(&idt[brand_tbl[i].ih_inum], - brand_tbl[i].ih_inum); -#endif - } - -#if defined(__xpv) - - /* - * Currently the hypervisor only supports 64-bit syscalls via - * syscall instruction. The 32-bit syscalls are handled by - * interrupt gate above. - */ - xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, - CALLBACKF_mask_events); - -#else - - if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { - if (kpti_enable == 1) { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32); - } else { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); - } - } - -#endif - - if (is_x86_feature(x86_featureset, X86FSET_SEP)) { - if (kpti_enable == 1) { - wrmsr(MSR_INTC_SEP_EIP, - (uintptr_t)tr_brand_sys_sysenter); - } else { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); - } - } -} - -/* - * Disable interpositioning on the system call path by rewriting the - * sys{call|enter} MSRs and the syscall-related entries in the IDT to use - * the standard entry points, which bypass the interpositioning hooks. - */ -void -brand_interpositioning_disable(void) -{ - gate_desc_t *idt = CPU->cpu_idt; - int i; - - ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); - - for (i = 0; brand_tbl[i].ih_inum; i++) { - idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; -#if defined(__xpv) - xen_idt_write(&idt[brand_tbl[i].ih_inum], - brand_tbl[i].ih_inum); -#endif - } - -#if defined(__xpv) - - /* - * See comment above in brand_interpositioning_enable. - */ - xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, - CALLBACKF_mask_events); - -#else - - if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { - if (kpti_enable == 1) { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32); - } else { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); - } - } - -#endif - - if (is_x86_feature(x86_featureset, X86FSET_SEP)) { - if (kpti_enable == 1) { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter); - } else { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); - } - } -} diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c deleted file mode 100644 index 0037f49f85..0000000000 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ /dev/null @@ -1,1506 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2021 Joyent, Inc. - * Copyright 2021 RackTop Systems, Inc. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* Copyright (c) 1987, 1988 Microsoft Corporation */ -/* All Rights Reserved */ - -/* - * Copyright (c) 2009, Intel Corporation. - * All rights reserved. - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/signal.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/psw.h> -#include <sys/trap.h> -#include <sys/fault.h> -#include <sys/systm.h> -#include <sys/user.h> -#include <sys/file.h> -#include <sys/proc.h> -#include <sys/pcb.h> -#include <sys/lwp.h> -#include <sys/cpuvar.h> -#include <sys/thread.h> -#include <sys/disp.h> -#include <sys/fp.h> -#include <sys/siginfo.h> -#include <sys/archsystm.h> -#include <sys/kmem.h> -#include <sys/debug.h> -#include <sys/x86_archext.h> -#include <sys/sysmacros.h> -#include <sys/cmn_err.h> -#include <sys/kfpu.h> - -/* - * FPU Management Overview - * ----------------------- - * - * The x86 FPU has evolved substantially since its days as the x87 coprocessor; - * however, many aspects of its life as a coprocessor are still around in x86. - * - * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU. - * While that state still exists, there is much more that is covered by the FPU. - * Today, this includes not just traditional FPU state, but also supervisor only - * state. The following state is currently managed and covered logically by the - * idea of the FPU registers: - * - * o Traditional x87 FPU - * o Vector Registers (%xmm, %ymm, %zmm) - * o Memory Protection Extensions (MPX) Bounds Registers - * o Protected Key Rights Registers (PKRU) - * o Processor Trace data - * - * The rest of this covers how the FPU is managed and controlled, how state is - * saved and restored between threads, interactions with hypervisors, and other - * information exported to user land through aux vectors. A lot of background - * information is here to synthesize major parts of the Intel SDM, but - * unfortunately, it is not a replacement for reading it. - * - * FPU Control Registers - * --------------------- - * - * Because the x87 FPU began its life as a co-processor and the FPU was - * optional there are several bits that show up in %cr0 that we have to - * manipulate when dealing with the FPU. These are: - * - * o CR0.ET The 'extension type' bit. This was used originally to indicate - * that the FPU co-processor was present. Now it is forced on for - * compatibility. This is often used to verify whether or not the - * FPU is present. - * - * o CR0.NE The 'native error' bit. Used to indicate that native error - * mode should be enabled. This indicates that we should take traps - * on FPU errors. The OS enables this early in boot. - * - * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not - * wait/fwait instructions generate a #NM if CR0.TS is set. - * - * o CR0.EM The 'Emulation' bit. This is used to cause floating point - * operations (x87 through SSE4) to trap with a #UD so they can be - * emulated. The system never sets this bit, but makes sure it is - * clear on processor start up. - * - * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating - * point operation will generate a #NM. An fwait will as well, - * depending on the value in CR0.MP. - * - * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by - * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more - * complicated role. Historically it has been used to allow running systems to - * restore the FPU registers lazily. This will be discussed in greater depth - * later on. - * - * %cr4 is also used as part of the FPU control. Specifically we need to worry - * about the following bits in the system: - * - * o CR4.OSFXSR This bit is used to indicate that the OS understands and - * supports the execution of the fxsave and fxrstor - * instructions. This bit is required to be set to enable - * the use of the SSE->SSE4 instructions. - * - * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand - * and take a SIMD floating point exception (#XM). This bit - * is always enabled by the system. - * - * o CR4.OSXSAVE This bit is used to indicate that the OS understands and - * supports the execution of the xsave and xrstor family of - * instructions. This bit is required to use any of the AVX - * and newer feature sets. - * - * Because all supported processors are 64-bit, they'll always support the XMM - * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot. - * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid. - * - * %xcr0 is used to manage the behavior of the xsave feature set and is only - * present on the system if xsave is supported. %xcr0 is read and written to - * through by the xgetbv and xsetbv instructions. This register is present - * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a - * different component of the xsave state and controls whether or not that - * information is saved and restored. For newer feature sets like AVX and MPX, - * it also controls whether or not the corresponding instructions can be - * executed (much like CR0.OSFXSR does for the SSE feature sets). - * - * Everything in %xcr0 is around features available to users. There is also the - * IA32_XSS MSR which is used to control supervisor-only features that are still - * part of the xsave state. Bits that can be set in %xcr0 are reserved in - * IA32_XSS and vice versa. This is an important property that is particularly - * relevant to how the xsave instructions operate. - * - * Save Mechanisms - * --------------- - * - * When switching between running threads the FPU state needs to be saved and - * restored by the OS. If this state was not saved, users would rightfully - * complain about corrupt state. There are three mechanisms that exist on the - * processor for saving and restoring these state images: - * - * o fsave - * o fxsave - * o xsave - * - * fsave saves and restores only the x87 FPU and is the oldest of these - * mechanisms. This mechanism is never used in the kernel today because we are - * always running on systems that support fxsave. - * - * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register - * state to be saved and restored to and from a struct fxsave_state. This is the - * default mechanism that is used to save and restore the FPU on amd64. An - * important aspect of fxsave that was different from the original i386 fsave - * mechanism is that the restoring of FPU state with pending exceptions will not - * generate an exception, it will be deferred to the next use of the FPU. - * - * The final and by far the most complex mechanism is that of the xsave set. - * xsave allows for saving and restoring all of the traditional x86 pieces (x87 - * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc. - * registers. - * - * Data is saved and restored into and out of a struct xsave_state. The first - * part of the struct xsave_state is equivalent to the struct fxsave_state. - * After that, there is a header which is used to describe the remaining - * portions of the state. The header is a 64-byte value of which the first two - * uint64_t values are defined and the rest are reserved and must be zero. The - * first uint64_t is the xstate_bv member. This describes which values in the - * xsave_state are actually valid and present. This is updated on a save and - * used on restore. The second member is the xcomp_bv member. Its last bit - * determines whether or not a compressed version of the structure is used. - * - * When the uncompressed structure is used (currently the only format we - * support), then each state component is at a fixed offset in the structure, - * even if it is not being used. For example, if you only saved the AVX related - * state, but did not save the MPX related state, the offset would not change - * for any component. With the compressed format, components that aren't used - * are all elided (though the x87 and SSE state are always there). - * - * Unlike fxsave which saves all state, the xsave family does not always save - * and restore all the state that could be covered by the xsave_state. The - * instructions all take an argument which is a mask of what to consider. This - * is the same mask that will be used in the xstate_bv vector and it is also the - * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only - * considered with the xsaves and xrstors instructions. - * - * When a save or restore is requested, a bitwise and is performed between the - * requested bits and those that have been enabled in %xcr0. Only the bits that - * match that are then saved or restored. Others will be silently ignored by - * the processor. This idea is used often in the OS. We will always request that - * we save and restore all of the state, but only those portions that are - * actually enabled in %xcr0 will be touched. - * - * If a feature has been asked to be restored that is not set in the xstate_bv - * feature vector of the save state, then it will be set to its initial state by - * the processor (usually zeros). Also, when asked to save state, the processor - * may not write out data that is in its initial state as an optimization. This - * optimization only applies to saving data and not to restoring data. - * - * There are a few different variants of the xsave and xrstor instruction. They - * are: - * - * o xsave This is the original save instruction. It will save all of the - * requested data in the xsave state structure. It only saves data - * in the uncompressed (xcomp_bv[63] is zero) format. It may be - * executed at all privilege levels. - * - * o xrstor This is the original restore instruction. It will restore all of - * the requested data. The xrstor function can handle both the - * compressed and uncompressed formats. It may be executed at all - * privilege levels. - * - * o xsaveopt This is a variant of the xsave instruction that employs - * optimizations to try and only write out state that has been - * modified since the last time an xrstor instruction was called. - * The processor tracks a tuple of information about the last - * xrstor and tries to ensure that the same buffer is being used - * when this optimization is being used. However, because of the - * way that it tracks the xrstor buffer based on the address of it, - * it is not suitable for use if that buffer can be easily reused. - * The most common case is trying to save data to the stack in - * rtld. It may be executed at all privilege levels. - * - * o xsavec This is a variant of the xsave instruction that writes out the - * compressed form of the xsave_state. Otherwise it behaves as - * xsave. It may be executed at all privilege levels. - * - * o xsaves This is a variant of the xsave instruction. It is similar to - * xsavec in that it always writes the compressed form of the - * buffer. Unlike all the other forms, this instruction looks at - * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine - * what to save and restore. xsaves also implements the same - * optimization that xsaveopt does around modified pieces. User - * land may not execute the instruction. - * - * o xrstors This is a variant of the xrstor instruction. Similar to xsaves - * it can save and restore both the user and privileged states. - * Unlike xrstor it can only operate on the compressed form. - * User land may not execute the instruction. - * - * Based on all of these, the kernel has a precedence for what it will use. - * Basically, xsaves (not supported) is preferred to xsaveopt, which is - * preferred to xsave. A similar scheme is used when informing rtld (more later) - * about what it should use. xsavec is preferred to xsave. xsaveopt is not - * recommended due to the modified optimization not being appropriate for this - * use. - * - * Finally, there is one last gotcha with the xsave state. Importantly some AMD - * processors did not always save and restore some of the FPU exception state in - * some cases like Intel did. In those cases the OS will make up for this fact - * itself. - * - * FPU Initialization - * ------------------ - * - * One difference with the FPU registers is that not all threads have FPU state, - * only those that have an lwp. Generally this means kernel threads, which all - * share p0 and its lwp, do not have FPU state. Though there are definitely - * exceptions such as kcfpoold. In the rest of this discussion we'll use thread - * and lwp interchangeably, just think of thread meaning a thread that has a - * lwp. - * - * Each lwp has its FPU state allocated in its pcb (process control block). The - * actual storage comes from the fpsave_cachep kmem cache. This cache is sized - * dynamically at start up based on the save mechanism that we're using and the - * amount of memory required for it. This is dynamic because the xsave_state - * size varies based on the supported feature set. - * - * The hardware side of the FPU is initialized early in boot before we mount the - * root file system. This is effectively done in fpu_probe(). This is where we - * make the final decision about what the save and restore mechanisms we should - * use are, create the fpsave_cachep kmem cache, and initialize a number of - * function pointers that use save and restoring logic. - * - * The thread/lwp side is a a little more involved. There are two different - * things that we need to concern ourselves with. The first is how the FPU - * resources are allocated and the second is how the FPU state is initialized - * for a given lwp. - * - * We allocate the FPU save state from our kmem cache as part of lwp_fp_init(). - * This is always called unconditionally by the system as part of creating an - * LWP. - * - * There are three different initialization paths that we deal with. The first - * is when we are executing a new process. As part of exec all of the register - * state is reset. The exec case is particularly important because init is born - * like Athena, sprouting from the head of the kernel, without any true parent - * to fork from. The second is used whenever we fork or create a new lwp. The - * third is to deal with special lwps like the agent lwp. - * - * During exec, we will call fp_exec() which will initialize and set up the FPU - * state for the process. That will fill in the initial state for the FPU and - * also set that state in the FPU itself. As part of fp_exec() we also install a - * thread context operations vector that takes care of dealing with the saving - * and restoring of the FPU. These context handlers will also be called whenever - * an lwp is created or forked. In those cases, to initialize the FPU we will - * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context - * operations vector for the new thread. - * - * Next we'll end up in the context operation fp_new_lwp(). This saves the - * current thread's state, initializes the new thread's state, and copies over - * the relevant parts of the originating thread's state. It's as this point that - * we also install the FPU context operations into the new thread, which ensures - * that all future threads that are descendants of the current one get the - * thread context operations (unless they call exec). - * - * To deal with some things like the agent lwp, we double check the state of the - * FPU in sys_rtt_common() to make sure that it has been enabled before - * returning to user land. In general, this path should be rare, but it's useful - * for the odd lwp here and there. - * - * The FPU state will remain valid most of the time. There are times that - * the state will be rewritten. For example in restorecontext, due to /proc, or - * the lwp calls exec(). Whether the context is being freed or we are resetting - * the state, we will call fp_free() to disable the FPU and our context. - * - * Finally, when the lwp is destroyed, it will actually destroy and free the FPU - * state by calling fp_lwp_cleanup(). - * - * Kernel FPU Multiplexing - * ----------------------- - * - * Just as the kernel has to maintain all of the general purpose registers when - * switching between scheduled threads, the same is true of the FPU registers. - * - * When a thread has FPU state, it also has a set of context operations - * installed. These context operations take care of making sure that the FPU is - * properly saved and restored during a context switch (fpsave_ctxt and - * fprestore_ctxt respectively). This means that the current implementation of - * the FPU is 'eager', when a thread is running the CPU will have its FPU state - * loaded. While this is always true when executing in userland, there are a few - * cases where this is not true in the kernel. - * - * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was - * employed. This meant that the FPU would be saved on a context switch and the - * CR0.TS bit would be set. When a thread next tried to use the FPU, it would - * then take a #NM trap, at which point we would restore the FPU from the save - * area and return to user land. Given the frequency of use of the FPU alone by - * libc, there's no point returning to user land just to trap again. - * - * There are a few cases though where the FPU state may need to be changed for a - * thread on its behalf. The most notable cases are in the case of processes - * using /proc, restorecontext, forking, etc. In all of these cases the kernel - * will force a threads FPU state to be saved into the PCB through the fp_save() - * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the - * pcb. This indicates that the save state holds currently valid data. As a side - * effect of this, CR0.TS will be set. To make sure that all of the state is - * updated before returning to user land, in these cases, we set a flag on the - * PCB that says the FPU needs to be updated. This will make sure that we take - * the slow path out of a system call to fix things up for the thread. Due to - * the fact that this is a rather rare case, effectively setting the equivalent - * of t_postsys is acceptable. - * - * CR0.TS will be set after a save occurs and cleared when a restore occurs. - * Generally this means it will be cleared immediately by the new thread that is - * running in a context switch. However, this isn't the case for kernel threads. - * They currently operate with CR0.TS set as no kernel state is restored for - * them. This means that using the FPU will cause a #NM and panic. - * - * The FPU_VALID flag on the currently executing thread's pcb is meant to track - * what the value of CR0.TS should be. If it is set, then CR0.TS will be set. - * However, because we eagerly restore, the only time that CR0.TS should be set - * for a non-kernel thread is during operations where it will be cleared before - * returning to user land and importantly, the only data that is in it is its - * own. - * - * Kernel FPU Usage - * ---------------- - * - * Traditionally the kernel never used the FPU since it had no need for - * floating point operations. However, modern FPU hardware supports a variety - * of SIMD extensions which can speed up code such as parity calculations or - * encryption. - * - * To allow the kernel to take advantage of these features, the - * kernel_fpu_begin() and kernel_fpu_end() functions should be wrapped - * around any usage of the FPU by the kernel to ensure that user-level context - * is properly saved/restored, as well as to properly setup the FPU for use by - * the kernel. There are a variety of ways this wrapping can be used, as - * discussed in this section below. - * - * When kernel_fpu_begin() and kernel_fpu_end() are used for extended - * operations, the kernel_fpu_alloc() function should be used to allocate a - * kfpu_state_t structure that is used to save/restore the thread's kernel FPU - * state. This structure is not tied to any thread. That is, different threads - * can reuse the same kfpu_state_t structure, although not concurrently. A - * kfpu_state_t structure is freed by the kernel_fpu_free() function. - * - * In some cases, the kernel may need to use the FPU for a short operation - * without the overhead to manage a kfpu_state_t structure and without - * allowing for a context switch off the FPU. In this case the KFPU_NO_STATE - * bit can be set in the kernel_fpu_begin() and kernel_fpu_end() flags - * parameter. This indicates that there is no kfpu_state_t. When used this way, - * kernel preemption should be disabled by the caller (kpreempt_disable) before - * calling kernel_fpu_begin(), and re-enabled after calling kernel_fpu_end(). - * For this usage, it is important to limit the kernel's FPU use to short - * operations. The tradeoff between using the FPU without a kfpu_state_t - * structure vs. the overhead of allowing a context switch while using the FPU - * should be carefully considered on a case by case basis. - * - * In other cases, kernel threads have an LWP, but never execute in user space. - * In this situation, the LWP's pcb_fpu area can be used to save/restore the - * kernel's FPU state if the thread is context switched, instead of having to - * allocate and manage a kfpu_state_t structure. The KFPU_USE_LWP bit in the - * kernel_fpu_begin() and kernel_fpu_end() flags parameter is used to - * enable this behavior. It is the caller's responsibility to ensure that this - * is only used for a kernel thread which never executes in user space. - * - * FPU Exceptions - * -------------- - * - * Certain operations can cause the kernel to take traps due to FPU activity. - * Generally these events will cause a user process to receive a SIGFPU and if - * the kernel receives it in kernel context, we will die. Traditionally the #NM - * (Device Not Available / No Math) exception generated by CR0.TS would have - * caused us to restore the FPU. Now it is a fatal event regardless of whether - * or not user land causes it. - * - * While there are some cases where the kernel uses the FPU, it is up to the - * kernel to use the FPU in a way such that it cannot receive a trap or to use - * the appropriate trap protection mechanisms. - * - * Hypervisors - * ----------- - * - * When providing support for hypervisors things are a little bit more - * complicated because the FPU is not virtualized at all. This means that they - * need to save and restore the FPU and %xcr0 across entry and exit to the - * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These - * allow us to use the full native state to make sure that we are always saving - * and restoring the full FPU that the host sees, even when the guest is using a - * subset. - * - * One tricky aspect of this is that the guest may be using a subset of %xcr0 - * and therefore changing our %xcr0 on the fly. It is vital that when we're - * saving and restoring the FPU that we always use the largest %xcr0 contents - * otherwise we will end up leaving behind data in it. - * - * ELF PLT Support - * --------------- - * - * rtld has to preserve a subset of the FPU when it is saving and restoring - * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for - * more information. As a result, we set up an aux vector that contains - * information about what save and restore mechanisms it should be using and - * the sizing thereof based on what the kernel supports. This is passed down in - * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is - * initialized in fpu_subr.c. - */ - -kmem_cache_t *fpsave_cachep; - -/* Legacy fxsave layout + xsave header + ymm */ -#define AVX_XSAVE_SIZE (512 + 64 + 256) - -/* - * Various sanity checks. - */ -CTASSERT(sizeof (struct fxsave_state) == 512); -CTASSERT(sizeof (struct fnsave_state) == 108); -CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); -CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); - -/* - * This structure is the x86 implementation of the kernel FPU that is defined in - * uts/common/sys/kfpu.h. - */ - -typedef enum kfpu_flags { - /* - * This indicates that the save state has initial FPU data. - */ - KFPU_F_INITIALIZED = 0x01 -} kfpu_flags_t; - -struct kfpu_state { - fpu_ctx_t kfpu_ctx; - kfpu_flags_t kfpu_flags; - kthread_t *kfpu_curthread; -}; - -/* - * Initial kfpu state for SSE/SSE2 used by fpinit() - */ -const struct fxsave_state sse_initial = { - FPU_CW_INIT, /* fx_fcw */ - 0, /* fx_fsw */ - 0, /* fx_fctw */ - 0, /* fx_fop */ - 0, /* fx_rip */ - 0, /* fx_rdp */ - SSE_MXCSR_INIT /* fx_mxcsr */ - /* rest of structure is zero */ -}; - -/* - * Initial kfpu state for AVX used by fpinit() - */ -const struct xsave_state avx_initial = { - /* - * The definition below needs to be identical with sse_initial - * defined above. - */ - { - FPU_CW_INIT, /* fx_fcw */ - 0, /* fx_fsw */ - 0, /* fx_fctw */ - 0, /* fx_fop */ - 0, /* fx_rip */ - 0, /* fx_rdp */ - SSE_MXCSR_INIT /* fx_mxcsr */ - /* rest of structure is zero */ - }, - /* - * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid, - * and CPU should initialize XMM/YMM. - */ - 1, - 0 /* xs_xcomp_bv */ - /* rest of structure is zero */ -}; - -/* - * mxcsr_mask value (possibly reset in fpu_probe); used to avoid - * the #gp exception caused by setting unsupported bits in the - * MXCSR register - */ -uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT; - -/* - * Initial kfpu state for x87 used by fpinit() - */ -const struct fnsave_state x87_initial = { - FPU_CW_INIT, /* f_fcw */ - 0, /* __f_ign0 */ - 0, /* f_fsw */ - 0, /* __f_ign1 */ - 0xffff, /* f_ftw */ - /* rest of structure is zero */ -}; - -/* - * This vector is patched to xsave_ctxt() or xsaveopt_ctxt() if we discover we - * have an XSAVE-capable chip in fpu_probe. - */ -void (*fpsave_ctxt)(void *) = fpxsave_ctxt; -void (*fprestore_ctxt)(void *) = fpxrestore_ctxt; - -/* - * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable. - */ -void (*xsavep)(struct xsave_state *, uint64_t) = xsave; - -static int fpe_sicode(uint_t); -static int fpe_simd_sicode(uint_t); - -/* - * Copy the state of parent lwp's floating point context into the new lwp. - * Invoked for both fork() and lwp_create(). - * - * Note that we inherit -only- the control state (e.g. exception masks, - * rounding, precision control, etc.); the FPU registers are otherwise - * reset to their initial state. - */ -static void -fp_new_lwp(kthread_id_t t, kthread_id_t ct) -{ - struct fpu_ctx *fp; /* parent fpu context */ - struct fpu_ctx *cfp; /* new fpu context */ - struct fxsave_state *fx, *cfx; - struct xsave_state *cxs; - - ASSERT(fp_kind != FP_NO); - - fp = &t->t_lwp->lwp_pcb.pcb_fpu; - cfp = &ct->t_lwp->lwp_pcb.pcb_fpu; - - /* - * If the parent FPU state is still in the FPU hw then save it; - * conveniently, fp_save() already does this for us nicely. - */ - fp_save(fp); - - cfp->fpu_flags = FPU_EN | FPU_VALID; - cfp->fpu_regs.kfpu_status = 0; - cfp->fpu_regs.kfpu_xstatus = 0; - - /* - * Make sure that the child's FPU is cleaned up and made ready for user - * land. - */ - PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb); - - switch (fp_save_mech) { - case FP_FXSAVE: - fx = fp->fpu_regs.kfpu_u.kfpu_fx; - cfx = cfp->fpu_regs.kfpu_u.kfpu_fx; - bcopy(&sse_initial, cfx, sizeof (*cfx)); - cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; - cfx->fx_fcw = fx->fx_fcw; - break; - - case FP_XSAVE: - cfp->fpu_xsave_mask = fp->fpu_xsave_mask; - - VERIFY(fp->fpu_regs.kfpu_u.kfpu_xs != NULL); - - fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave; - cxs = cfp->fpu_regs.kfpu_u.kfpu_xs; - cfx = &cxs->xs_fxsave; - - bcopy(&avx_initial, cxs, sizeof (*cxs)); - cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; - cfx->fx_fcw = fx->fx_fcw; - cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_INITIAL); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - /* - * Mark that both the parent and child need to have the FPU cleaned up - * before returning to user land. - */ - - installctx(ct, cfp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, - fp_new_lwp, NULL, fp_free, NULL); -} - -/* - * Free any state associated with floating point context. - * Fp_free can be called in three cases: - * 1) from reaper -> thread_free -> freectx-> fp_free - * fp context belongs to a thread on deathrow - * nothing to do, thread will never be resumed - * thread calling ctxfree is reaper - * - * 2) from exec -> freectx -> fp_free - * fp context belongs to the current thread - * must disable fpu, thread calling ctxfree is curthread - * - * 3) from restorecontext -> setfpregs -> fp_free - * we have a modified context in the memory (lwp->pcb_fpu) - * disable fpu and release the fp context for the CPU - * - */ -/*ARGSUSED*/ -void -fp_free(struct fpu_ctx *fp, int isexec) -{ - ASSERT(fp_kind != FP_NO); - - if (fp->fpu_flags & FPU_VALID) - return; - - kpreempt_disable(); - /* - * We want to do fpsave rather than fpdisable so that we can - * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit - */ - fp->fpu_flags |= FPU_VALID; - /* If for current thread disable FP to track FPU_VALID */ - if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { - /* Clear errors if any to prevent frstor from complaining */ - (void) fperr_reset(); - if (fp_kind & __FP_SSE) - (void) fpxerr_reset(); - fpdisable(); - } - kpreempt_enable(); -} - -/* - * Store the floating point state and disable the floating point unit. - */ -void -fp_save(struct fpu_ctx *fp) -{ - ASSERT(fp_kind != FP_NO); - - kpreempt_disable(); - if (!fp || fp->fpu_flags & FPU_VALID || - (fp->fpu_flags & FPU_EN) == 0) { - kpreempt_enable(); - return; - } - ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); - - switch (fp_save_mech) { - case FP_FXSAVE: - fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx); - break; - - case FP_XSAVE: - xsavep(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - fp->fpu_flags |= FPU_VALID; - - /* - * We save the FPU as part of forking, execing, modifications via /proc, - * restorecontext, etc. As such, we need to make sure that we return to - * userland with valid state in the FPU. If we're context switched out - * before we hit sys_rtt_common() we'll end up having restored the FPU - * as part of the context ops operations. The restore logic always makes - * sure that FPU_VALID is set before doing a restore so we don't restore - * it a second time. - */ - PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb); - - kpreempt_enable(); -} - -/* - * Restore the FPU context for the thread: - * The possibilities are: - * 1. No active FPU context: Load the new context into the FPU hw - * and enable the FPU. - */ -void -fp_restore(struct fpu_ctx *fp) -{ - switch (fp_save_mech) { - case FP_FXSAVE: - fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx); - break; - - case FP_XSAVE: - xrestore(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - fp->fpu_flags &= ~FPU_VALID; -} - -/* - * Reset the FPU such that it is in a valid state for a new thread that is - * coming out of exec. The FPU will be in a usable state at this point. At this - * point we know that the FPU state has already been allocated and if this - * wasn't an init process, then it will have had fp_free() previously called. - */ -void -fp_exec(void) -{ - struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - struct ctxop *ctx = installctx_preallocate(); - - if (fp_save_mech == FP_XSAVE) { - fp->fpu_xsave_mask = XFEATURE_FP_ALL; - } - - /* - * Make sure that we're not preempted in the middle of initializing the - * FPU on CPU. - */ - kpreempt_disable(); - installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, - fp_new_lwp, NULL, fp_free, ctx); - fpinit(); - fp->fpu_flags = FPU_EN; - kpreempt_enable(); -} - - -/* - * Seeds the initial state for the current thread. The possibilities are: - * 1. Another process has modified the FPU state before we have done any - * initialization: Load the FPU state from the LWP state. - * 2. The FPU state has not been externally modified: Load a clean state. - */ -void -fp_seed(void) -{ - struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - - ASSERT(curthread->t_preempt >= 1); - ASSERT((fp->fpu_flags & FPU_EN) == 0); - - /* - * Always initialize a new context and initialize the hardware. - */ - if (fp_save_mech == FP_XSAVE) { - fp->fpu_xsave_mask = XFEATURE_FP_ALL; - } - - installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, - fp_new_lwp, NULL, fp_free, NULL); - fpinit(); - - /* - * If FPU_VALID is set, it means someone has modified registers via - * /proc. In this case, restore the current lwp's state. - */ - if (fp->fpu_flags & FPU_VALID) - fp_restore(fp); - - ASSERT((fp->fpu_flags & FPU_VALID) == 0); - fp->fpu_flags = FPU_EN; -} - -/* - * When using xsave/xrstor, these three functions are used by the lwp code to - * manage the memory for the xsave area. - */ -void -fp_lwp_init(struct _klwp *lwp) -{ - struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu; - - /* - * We keep a copy of the pointer in lwp_fpu so that we can restore the - * value in forklwp() after we duplicate the parent's LWP state. - */ - lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = - kmem_cache_alloc(fpsave_cachep, KM_SLEEP); - - if (fp_save_mech == FP_XSAVE) { - /* - * - * We bzero since the fpinit() code path will only - * partially initialize the xsave area using avx_inital. - */ - ASSERT(cpuid_get_xsave_size() >= sizeof (struct xsave_state)); - bzero(fp->fpu_regs.kfpu_u.kfpu_xs, cpuid_get_xsave_size()); - } -} - -void -fp_lwp_cleanup(struct _klwp *lwp) -{ - struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu; - - if (fp->fpu_regs.kfpu_u.kfpu_generic != NULL) { - kmem_cache_free(fpsave_cachep, - fp->fpu_regs.kfpu_u.kfpu_generic); - lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = NULL; - } -} - -/* - * Called during the process of forklwp(). The kfpu_u pointer will have been - * overwritten while copying the parent's LWP structure. We have a valid copy - * stashed in the child's lwp_fpu which we use to restore the correct value. - */ -void -fp_lwp_dup(struct _klwp *lwp) -{ - void *xp = lwp->lwp_fpu; - size_t sz; - - switch (fp_save_mech) { - case FP_FXSAVE: - sz = sizeof (struct fxsave_state); - break; - case FP_XSAVE: - sz = cpuid_get_xsave_size(); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - /* copy the parent's values into the new lwp's struct */ - bcopy(lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic, xp, sz); - /* now restore the pointer */ - lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp; -} - -/* - * Handle a processor extension error fault - * Returns non zero for error. - */ - -/*ARGSUSED*/ -int -fpexterrflt(struct regs *rp) -{ - uint32_t fpcw, fpsw; - fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - - ASSERT(fp_kind != FP_NO); - - /* - * Now we can enable the interrupts. - * (NOTE: x87 fp exceptions come thru interrupt gate) - */ - sti(); - - if (!fpu_exists) - return (FPE_FLTINV); - - /* - * Do an unconditional save of the FP state. If it's dirty (TS=0), - * it'll be saved into the fpu context area passed in (that of the - * current thread). If it's not dirty (it may not be, due to - * an intervening save due to a context switch between the sti(), - * above and here, then it's safe to just use the stored values in - * the context save area to determine the cause of the fault. - */ - fp_save(fp); - - /* clear exception flags in saved state, as if by fnclex */ - switch (fp_save_mech) { - case FP_FXSAVE: - fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw; - fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw &= ~FPS_SW_EFLAGS; - break; - - case FP_XSAVE: - fpsw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fcw; - fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS; - /* - * Always set LEGACY_FP as it may have been cleared by XSAVE - * instruction - */ - fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP; - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } - - fp->fpu_regs.kfpu_status = fpsw; - - if ((fpsw & FPS_ES) == 0) - return (0); /* No exception */ - - /* - * "and" the exception flags with the complement of the mask - * bits to determine which exception occurred - */ - return (fpe_sicode(fpsw & ~fpcw & 0x3f)); -} - -/* - * Handle an SSE/SSE2 precise exception. - * Returns a non-zero sicode for error. - */ -/*ARGSUSED*/ -int -fpsimderrflt(struct regs *rp) -{ - uint32_t mxcsr, xmask; - fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - - ASSERT(fp_kind & __FP_SSE); - - /* - * NOTE: Interrupts are disabled during execution of this - * function. They are enabled by the caller in trap.c. - */ - - /* - * The only way we could have gotten here if there is no FP unit - * is via a user executing an INT $19 instruction, so there is - * no fault in that case. - */ - if (!fpu_exists) - return (0); - - /* - * Do an unconditional save of the FP state. If it's dirty (TS=0), - * it'll be saved into the fpu context area passed in (that of the - * current thread). If it's not dirty, then it's safe to just use - * the stored values in the context save area to determine the - * cause of the fault. - */ - fp_save(fp); /* save the FPU state */ - - if (fp_save_mech == FP_XSAVE) { - mxcsr = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_mxcsr; - fp->fpu_regs.kfpu_status = - fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw; - } else { - mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx->fx_mxcsr; - fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; - } - fp->fpu_regs.kfpu_xstatus = mxcsr; - - /* - * compute the mask that determines which conditions can cause - * a #xm exception, and use this to clean the status bits so that - * we can identify the true cause of this one. - */ - xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS; - return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask)); -} - -/* - * In the unlikely event that someone is relying on this subcode being - * FPE_FLTILL for denormalize exceptions, it can always be patched back - * again to restore old behaviour. - */ -int fpe_fltden = FPE_FLTDEN; - -/* - * Map from the FPU status word to the FP exception si_code. - */ -static int -fpe_sicode(uint_t sw) -{ - if (sw & FPS_IE) - return (FPE_FLTINV); - if (sw & FPS_ZE) - return (FPE_FLTDIV); - if (sw & FPS_DE) - return (fpe_fltden); - if (sw & FPS_OE) - return (FPE_FLTOVF); - if (sw & FPS_UE) - return (FPE_FLTUND); - if (sw & FPS_PE) - return (FPE_FLTRES); - return (FPE_FLTINV); /* default si_code for other exceptions */ -} - -/* - * Map from the SSE status word to the FP exception si_code. - */ -static int -fpe_simd_sicode(uint_t sw) -{ - if (sw & SSE_IE) - return (FPE_FLTINV); - if (sw & SSE_ZE) - return (FPE_FLTDIV); - if (sw & SSE_DE) - return (FPE_FLTDEN); - if (sw & SSE_OE) - return (FPE_FLTOVF); - if (sw & SSE_UE) - return (FPE_FLTUND); - if (sw & SSE_PE) - return (FPE_FLTRES); - return (FPE_FLTINV); /* default si_code for other exceptions */ -} - -/* - * This routine is invoked as part of libc's __fpstart implementation - * via sysi86(2). - * - * It may be called -before- any context has been assigned in which case - * we try and avoid touching the hardware. Or it may be invoked well - * after the context has been assigned and fiddled with, in which case - * just tweak it directly. - */ -void -fpsetcw(uint16_t fcw, uint32_t mxcsr) -{ - struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu; - struct fxsave_state *fx; - - if (!fpu_exists || fp_kind == FP_NO) - return; - - if ((fp->fpu_flags & FPU_EN) == 0) { - if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) { - /* - * Common case. Floating point unit not yet - * enabled, and kernel already intends to initialize - * the hardware the way the caller wants. - */ - return; - } - /* - * Hmm. Userland wants a different default. - * Do a fake "first trap" to establish the context, then - * handle as if we already had a context before we came in. - */ - kpreempt_disable(); - fp_seed(); - kpreempt_enable(); - } - - /* - * Ensure that the current hardware state is flushed back to the - * pcb, then modify that copy. Next use of the fp will - * restore the context. - */ - fp_save(fp); - - switch (fp_save_mech) { - case FP_FXSAVE: - fx = fp->fpu_regs.kfpu_u.kfpu_fx; - fx->fx_fcw = fcw; - fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; - break; - - case FP_XSAVE: - fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave; - fx->fx_fcw = fcw; - fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; - /* - * Always set LEGACY_FP as it may have been cleared by XSAVE - * instruction - */ - fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP; - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } -} - -static void -kernel_fpu_fpstate_init(kfpu_state_t *kfpu) -{ - struct xsave_state *xs; - - switch (fp_save_mech) { - case FP_FXSAVE: - bcopy(&sse_initial, kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_fx, - sizeof (struct fxsave_state)); - kfpu->kfpu_ctx.fpu_xsave_mask = 0; - break; - case FP_XSAVE: - xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs; - bzero(xs, cpuid_get_xsave_size()); - bcopy(&avx_initial, xs, sizeof (*xs)); - xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; - kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL; - break; - default: - panic("invalid fp_save_mech"); - } - - /* - * Set the corresponding flags that the system expects on the FPU state - * to indicate that this is our state. The FPU_EN flag is required to - * indicate that FPU usage is allowed. The FPU_KERN flag is explicitly - * not set below as it represents that this state is being suppressed - * by the kernel. - */ - kfpu->kfpu_ctx.fpu_flags = FPU_EN | FPU_VALID; - kfpu->kfpu_flags |= KFPU_F_INITIALIZED; -} - -kfpu_state_t * -kernel_fpu_alloc(int kmflags) -{ - kfpu_state_t *kfpu; - - if ((kfpu = kmem_zalloc(sizeof (kfpu_state_t), kmflags)) == NULL) { - return (NULL); - } - - kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic = - kmem_cache_alloc(fpsave_cachep, kmflags); - if (kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic == NULL) { - kmem_free(kfpu, sizeof (kfpu_state_t)); - return (NULL); - } - - kernel_fpu_fpstate_init(kfpu); - - return (kfpu); -} - -void -kernel_fpu_free(kfpu_state_t *kfpu) -{ - kmem_cache_free(fpsave_cachep, - kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic); - kmem_free(kfpu, sizeof (kfpu_state_t)); -} - -static void -kernel_fpu_ctx_save(void *arg) -{ - kfpu_state_t *kfpu = arg; - fpu_ctx_t *pf; - - if (kfpu == NULL) { - /* - * A NULL kfpu implies this is a kernel thread with an LWP and - * no user-level FPU usage. Use the lwp fpu save area. - */ - pf = &curthread->t_lwp->lwp_pcb.pcb_fpu; - - ASSERT(curthread->t_procp->p_flag & SSYS); - ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0); - - fp_save(pf); - } else { - pf = &kfpu->kfpu_ctx; - - ASSERT3P(kfpu->kfpu_curthread, ==, curthread); - ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0); - - /* - * Note, we can't use fp_save because it assumes that we're - * saving to the thread's PCB and not somewhere else. Because - * this is a different FPU context, we instead have to do this - * ourselves. - */ - switch (fp_save_mech) { - case FP_FXSAVE: - fpxsave(pf->fpu_regs.kfpu_u.kfpu_fx); - break; - case FP_XSAVE: - xsavep(pf->fpu_regs.kfpu_u.kfpu_xs, pf->fpu_xsave_mask); - break; - default: - panic("Invalid fp_save_mech"); - } - - /* - * Because we have saved context here, our save state is no - * longer valid and therefore needs to be reinitialized. - */ - kfpu->kfpu_flags &= ~KFPU_F_INITIALIZED; - } - - pf->fpu_flags |= FPU_VALID; - - /* - * Clear KFPU flag. This allows swtch to check for improper kernel - * usage of the FPU (i.e. switching to a new thread while the old - * thread was in the kernel and using the FPU, but did not perform a - * context save). - */ - curthread->t_flag &= ~T_KFPU; -} - -static void -kernel_fpu_ctx_restore(void *arg) -{ - kfpu_state_t *kfpu = arg; - fpu_ctx_t *pf; - - if (kfpu == NULL) { - /* - * A NULL kfpu implies this is a kernel thread with an LWP and - * no user-level FPU usage. Use the lwp fpu save area. - */ - pf = &curthread->t_lwp->lwp_pcb.pcb_fpu; - - ASSERT(curthread->t_procp->p_flag & SSYS); - ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0); - } else { - pf = &kfpu->kfpu_ctx; - - ASSERT3P(kfpu->kfpu_curthread, ==, curthread); - ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0); - } - - fp_restore(pf); - curthread->t_flag |= T_KFPU; -} - -/* - * Validate that the thread is not switching off-cpu while actively using the - * FPU within the kernel. - */ -void -kernel_fpu_no_swtch(void) -{ - if ((curthread->t_flag & T_KFPU) != 0) { - panic("curthread swtch-ing while the kernel is using the FPU"); - } -} - -void -kernel_fpu_begin(kfpu_state_t *kfpu, uint_t flags) -{ - klwp_t *pl = curthread->t_lwp; - struct ctxop *ctx; - - if ((curthread->t_flag & T_KFPU) != 0) { - panic("curthread attempting to nest kernel FPU states"); - } - - /* KFPU_USE_LWP and KFPU_NO_STATE are mutually exclusive. */ - ASSERT((flags & (KFPU_USE_LWP | KFPU_NO_STATE)) != - (KFPU_USE_LWP | KFPU_NO_STATE)); - - if ((flags & KFPU_NO_STATE) == KFPU_NO_STATE) { - /* - * Since we don't have a kfpu_state or usable lwp pcb_fpu to - * hold our kernel FPU context, we depend on the caller doing - * kpreempt_disable for the duration of our FPU usage. This - * should only be done for very short periods of time. - */ - ASSERT(curthread->t_preempt > 0); - ASSERT(kfpu == NULL); - - if (pl != NULL) { - /* - * We might have already saved once so FPU_VALID could - * be set. This is handled in fp_save. - */ - fp_save(&pl->lwp_pcb.pcb_fpu); - pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL; - } - - curthread->t_flag |= T_KFPU; - - /* Always restore the fpu to the initial state. */ - fpinit(); - - return; - } - - /* - * We either have a kfpu, or are using the LWP pcb_fpu for context ops. - */ - - if ((flags & KFPU_USE_LWP) == 0) { - if (kfpu->kfpu_curthread != NULL) - panic("attempting to reuse kernel FPU state at %p when " - "another thread already is using", kfpu); - - if ((kfpu->kfpu_flags & KFPU_F_INITIALIZED) == 0) - kernel_fpu_fpstate_init(kfpu); - - kfpu->kfpu_curthread = curthread; - } - - /* - * Not all threads may have an active LWP. If they do and we're not - * going to re-use the LWP, then we should go ahead and save the state. - * We must also note that the fpu is now being used by the kernel and - * therefore we do not want to manage the fpu state via the user-level - * thread's context handlers. - * - * We might have already saved once (due to a prior use of the kernel - * FPU or another code path) so FPU_VALID could be set. This is handled - * by fp_save, as is the FPU_EN check. - */ - ctx = installctx_preallocate(); - kpreempt_disable(); - if (pl != NULL) { - if ((flags & KFPU_USE_LWP) == 0) - fp_save(&pl->lwp_pcb.pcb_fpu); - pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL; - } - - /* - * Set the context operations for kernel FPU usage. Note that this is - * done with a preallocated buffer and under kpreempt_disable because - * without a preallocated buffer, installctx does a sleeping - * allocation. We haven't finished initializing our kernel FPU state - * yet, and in the rare case that we happen to save/restore just as - * installctx() exits its own kpreempt_enable() internal call, we - * guard against restoring an uninitialized buffer (0xbaddcafe). - */ - installctx(curthread, kfpu, kernel_fpu_ctx_save, kernel_fpu_ctx_restore, - NULL, NULL, NULL, NULL, ctx); - - curthread->t_flag |= T_KFPU; - - if ((flags & KFPU_USE_LWP) == KFPU_USE_LWP) { - /* - * For pure kernel threads with an LWP, we can use the LWP's - * pcb_fpu to save/restore context. - */ - fpu_ctx_t *pf = &pl->lwp_pcb.pcb_fpu; - - VERIFY(curthread->t_procp->p_flag & SSYS); - VERIFY(kfpu == NULL); - ASSERT((pf->fpu_flags & FPU_EN) == 0); - - /* Always restore the fpu to the initial state. */ - if (fp_save_mech == FP_XSAVE) - pf->fpu_xsave_mask = XFEATURE_FP_ALL; - fpinit(); - pf->fpu_flags = FPU_EN | FPU_KERNEL; - } else { - /* initialize the kfpu state */ - kernel_fpu_ctx_restore(kfpu); - } - kpreempt_enable(); -} - -void -kernel_fpu_end(kfpu_state_t *kfpu, uint_t flags) -{ - ulong_t iflags; - - if ((curthread->t_flag & T_KFPU) == 0) { - panic("curthread attempting to clear kernel FPU state " - "without using it"); - } - - /* - * General comments on why the rest of this function is structured the - * way it is. Be aware that there is a lot of subtlety here. - * - * If a user-level thread ever uses the fpu while in the kernel, then - * we cannot call fpdisable since that does STTS. That will set the - * ts bit in %cr0 which will cause an exception if anything touches the - * fpu. However, the user-level context switch handler (fpsave_ctxt) - * needs to access the fpu to save the registers into the pcb. - * fpsave_ctxt relies on CLTS having been done to clear the ts bit in - * fprestore_ctxt when the thread context switched onto the CPU. - * - * Calling fpdisable only effects the current CPU's %cr0 register. - * - * During removectx and kpreempt_enable, we can voluntarily context - * switch, so the CPU we were on when we entered this function might - * not be the same one we're on when we return from removectx or end - * the function. Note there can be user-level context switch handlers - * still installed if this is a user-level thread. - * - * We also must be careful in the unlikely chance we're running in an - * interrupt thread, since we can't leave the CPU's %cr0 TS state set - * incorrectly for the "real" thread to resume on this CPU. - */ - - if ((flags & KFPU_NO_STATE) == 0) { - kpreempt_disable(); - } else { - ASSERT(curthread->t_preempt > 0); - } - - curthread->t_flag &= ~T_KFPU; - - /* - * When we are ending things, we explicitly don't save the current - * kernel FPU state back to the temporary state. The kfpu API is not - * intended to be a permanent save location. - * - * If this is a user-level thread and we were to context switch - * before returning to user-land, fpsave_ctxt will be a no-op since we - * already saved the user-level FPU state the first time we run - * kernel_fpu_begin (i.e. we won't save the bad kernel fpu state over - * the user-level fpu state). The fpsave_ctxt functions only save if - * FPU_VALID is not already set. fp_save also set PCB_SET_UPDATE_FPU so - * fprestore_ctxt will be done in sys_rtt_common when the thread - * finally returns to user-land. - */ - - if ((curthread->t_procp->p_flag & SSYS) != 0 && - curthread->t_intr == NULL) { - /* - * A kernel thread which is not an interrupt thread, so we - * STTS now. - */ - fpdisable(); - } - - if ((flags & KFPU_NO_STATE) == 0) { - removectx(curthread, kfpu, kernel_fpu_ctx_save, - kernel_fpu_ctx_restore, NULL, NULL, NULL, NULL); - - if (kfpu != NULL) { - if (kfpu->kfpu_curthread != curthread) { - panic("attempting to end kernel FPU state " - "for %p, but active thread is not " - "curthread", kfpu); - } else { - kfpu->kfpu_curthread = NULL; - } - } - - kpreempt_enable(); - } - - if (curthread->t_lwp != NULL) { - uint_t f; - - if (flags & KFPU_USE_LWP) { - f = FPU_EN | FPU_KERNEL; - } else { - f = FPU_KERNEL; - } - curthread->t_lwp->lwp_pcb.pcb_fpu.fpu_flags &= ~f; - } -} diff --git a/usr/src/uts/intel/ia32/os/sendsig.c b/usr/src/uts/intel/ia32/os/sendsig.c deleted file mode 100644 index e3d60eb62b..0000000000 --- a/usr/src/uts/intel/ia32/os/sendsig.c +++ /dev/null @@ -1,589 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/signal.h> -#include <sys/systm.h> -#include <sys/user.h> -#include <sys/mman.h> -#include <sys/class.h> -#include <sys/proc.h> -#include <sys/procfs.h> -#include <sys/buf.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/archsystm.h> -#include <sys/vmparam.h> -#include <sys/prsystm.h> -#include <sys/reboot.h> -#include <sys/uadmin.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/file.h> -#include <sys/session.h> -#include <sys/ucontext.h> -#include <sys/dnlc.h> -#include <sys/var.h> -#include <sys/cmn_err.h> -#include <sys/debugreg.h> -#include <sys/thread.h> -#include <sys/vtrace.h> -#include <sys/consdev.h> -#include <sys/psw.h> -#include <sys/regset.h> - -#include <sys/privregs.h> - -#include <sys/stack.h> -#include <sys/swap.h> -#include <vm/hat.h> -#include <vm/anon.h> -#include <vm/as.h> -#include <vm/page.h> -#include <vm/seg.h> -#include <vm/seg_kmem.h> -#include <vm/seg_map.h> -#include <vm/seg_vn.h> -#include <sys/exec.h> -#include <sys/acct.h> -#include <sys/core.h> -#include <sys/corectl.h> -#include <sys/modctl.h> -#include <sys/tuneable.h> -#include <c2/audit.h> -#include <sys/bootconf.h> -#include <sys/dumphdr.h> -#include <sys/promif.h> -#include <sys/systeminfo.h> -#include <sys/kdi.h> -#include <sys/contract_impl.h> -#include <sys/x86_archext.h> - -/* - * Construct the execution environment for the user's signal - * handler and arrange for control to be given to it on return - * to userland. The library code now calls setcontext() to - * clean up after the signal handler, so sigret() is no longer - * needed. - * - * (The various 'volatile' declarations are need to ensure that values - * are correct on the error return from on_fault().) - */ - - -/* - * An amd64 signal frame looks like this on the stack: - * - * old %rsp: - * <128 bytes of untouched stack space> - * <a siginfo_t [optional]> - * <a ucontext_t> - * <siginfo_t *> - * <signal number> - * new %rsp: <return address (deliberately invalid)> - * - * The signal number and siginfo_t pointer are only pushed onto the stack in - * order to allow stack backtraces. The actual signal handling code expects the - * arguments in registers. - */ - -struct sigframe { - caddr_t retaddr; - long signo; - siginfo_t *sip; -}; - -int -sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) -{ - volatile int minstacksz; - int newstack; - label_t ljb; - volatile caddr_t sp; - caddr_t fp; - volatile struct regs *rp; - volatile greg_t upc; - volatile proc_t *p = ttoproc(curthread); - struct as *as = p->p_as; - klwp_t *lwp = ttolwp(curthread); - ucontext_t *volatile tuc = NULL; - ucontext_t *uc; - siginfo_t *sip_addr; - volatile int watched; - - /* - * This routine is utterly dependent upon STACK_ALIGN being - * 16 and STACK_ENTRY_ALIGN being 8. Let's just acknowledge - * that and require it. - */ - -#if STACK_ALIGN != 16 || STACK_ENTRY_ALIGN != 8 -#error "sendsig() amd64 did not find the expected stack alignments" -#endif - - rp = lwptoregs(lwp); - upc = rp->r_pc; - - /* - * Since we're setting up to run the signal handler we have to - * arrange that the stack at entry to the handler is (only) - * STACK_ENTRY_ALIGN (i.e. 8) byte aligned so that when the handler - * executes its push of %rbp, the stack realigns to STACK_ALIGN - * (i.e. 16) correctly. - * - * The new sp will point to the sigframe and the ucontext_t. The - * above means that sp (and thus sigframe) will be 8-byte aligned, - * but not 16-byte aligned. ucontext_t, however, contains %xmm regs - * which must be 16-byte aligned. Because of this, for correct - * alignment, sigframe must be a multiple of 8-bytes in length, but - * not 16-bytes. This will place ucontext_t at a nice 16-byte boundary. - */ - - /* LINTED: logical expression always true: op "||" */ - ASSERT((sizeof (struct sigframe) % 16) == 8); - - minstacksz = sizeof (struct sigframe) + SA(sizeof (*uc)); - if (sip != NULL) - minstacksz += SA(sizeof (siginfo_t)); - ASSERT((minstacksz & (STACK_ENTRY_ALIGN - 1ul)) == 0); - - /* - * Figure out whether we will be handling this signal on - * an alternate stack specified by the user. Then allocate - * and validate the stack requirements for the signal handler - * context. on_fault will catch any faults. - */ - newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && - !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); - - if (newstack) { - fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + - SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN); - } else { - /* - * Drop below the 128-byte reserved region of the stack frame - * we're interrupting. - */ - fp = (caddr_t)rp->r_sp - STACK_RESERVE; - } - - /* - * Force proper stack pointer alignment, even in the face of a - * misaligned stack pointer from user-level before the signal. - */ - fp = (caddr_t)((uintptr_t)fp & ~(STACK_ENTRY_ALIGN - 1ul)); - - /* - * Most of the time during normal execution, the stack pointer - * is aligned on a STACK_ALIGN (i.e. 16 byte) boundary. However, - * (for example) just after a call instruction (which pushes - * the return address), the callers stack misaligns until the - * 'push %rbp' happens in the callee prolog. So while we should - * expect the stack pointer to be always at least STACK_ENTRY_ALIGN - * aligned, we should -not- expect it to always be STACK_ALIGN aligned. - * We now adjust to ensure that the new sp is aligned to - * STACK_ENTRY_ALIGN but not to STACK_ALIGN. - */ - sp = fp - minstacksz; - if (((uintptr_t)sp & (STACK_ALIGN - 1ul)) == 0) { - sp -= STACK_ENTRY_ALIGN; - minstacksz = fp - sp; - } - - /* - * Now, make sure the resulting signal frame address is sane - */ - if (sp >= as->a_userlimit || fp >= as->a_userlimit) { -#ifdef DEBUG - printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", - PTOU(p)->u_comm, p->p_pid, sig); - printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", - (void *)sp, (void *)hdlr, (uintptr_t)upc); - printf("sp above USERLIMIT\n"); -#endif - return (0); - } - - watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE); - - if (on_fault(&ljb)) - goto badstack; - - if (sip != NULL) { - zoneid_t zoneid; - - fp -= SA(sizeof (siginfo_t)); - uzero(fp, sizeof (siginfo_t)); - if (SI_FROMUSER(sip) && - (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID && - zoneid != sip->si_zoneid) { - k_siginfo_t sani_sip = *sip; - - sani_sip.si_pid = p->p_zone->zone_zsched->p_pid; - sani_sip.si_uid = 0; - sani_sip.si_ctid = -1; - sani_sip.si_zoneid = zoneid; - copyout_noerr(&sani_sip, fp, sizeof (sani_sip)); - } else - copyout_noerr(sip, fp, sizeof (*sip)); - sip_addr = (siginfo_t *)fp; - - if (sig == SIGPROF && - curthread->t_rprof != NULL && - curthread->t_rprof->rp_anystate) { - /* - * We stand on our head to deal with - * the real time profiling signal. - * Fill in the stuff that doesn't fit - * in a normal k_siginfo structure. - */ - int i = sip->si_nsysarg; - - while (--i >= 0) - sulword_noerr( - (ulong_t *)&(sip_addr->si_sysarg[i]), - (ulong_t)lwp->lwp_arg[i]); - copyout_noerr(curthread->t_rprof->rp_state, - sip_addr->si_mstate, - sizeof (curthread->t_rprof->rp_state)); - } - } else - sip_addr = NULL; - - /* - * save the current context on the user stack directly after the - * sigframe. Since sigframe is 8-byte-but-not-16-byte aligned, - * and since sizeof (struct sigframe) is 24, this guarantees - * 16-byte alignment for ucontext_t and its %xmm registers. - */ - uc = (ucontext_t *)(sp + sizeof (struct sigframe)); - tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP); - no_fault(); - savecontext(tuc, &lwp->lwp_sigoldmask); - if (on_fault(&ljb)) - goto badstack; - copyout_noerr(tuc, uc, sizeof (*tuc)); - kmem_free(tuc, sizeof (*tuc)); - tuc = NULL; - - lwp->lwp_oldcontext = (uintptr_t)uc; - - if (newstack) { - lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK; - if (lwp->lwp_ustack) - copyout_noerr(&lwp->lwp_sigaltstack, - (stack_t *)lwp->lwp_ustack, sizeof (stack_t)); - } - - /* - * Set up signal handler return and stack linkage - */ - { - struct sigframe frame; - - /* - * ensure we never return "normally" - */ - frame.retaddr = (caddr_t)(uintptr_t)-1L; - frame.signo = sig; - frame.sip = sip_addr; - copyout_noerr(&frame, sp, sizeof (frame)); - } - - no_fault(); - if (watched) - watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); - - /* - * Set up user registers for execution of signal handler. - */ - rp->r_sp = (greg_t)sp; - rp->r_pc = (greg_t)hdlr; - rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL); - - rp->r_rdi = sig; - rp->r_rsi = (uintptr_t)sip_addr; - rp->r_rdx = (uintptr_t)uc; - - if ((rp->r_cs & 0xffff) != UCS_SEL || - (rp->r_ss & 0xffff) != UDS_SEL) { - /* - * Try our best to deliver the signal. - */ - rp->r_cs = UCS_SEL; - rp->r_ss = UDS_SEL; - } - - /* - * Don't set lwp_eosys here. sendsig() is called via psig() after - * lwp_eosys is handled, so setting it here would affect the next - * system call. - */ - return (1); - -badstack: - no_fault(); - if (watched) - watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); - if (tuc) - kmem_free(tuc, sizeof (*tuc)); -#ifdef DEBUG - printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", - PTOU(p)->u_comm, p->p_pid, sig); - printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", - (void *)sp, (void *)hdlr, (uintptr_t)upc); -#endif - return (0); -} - -#ifdef _SYSCALL32_IMPL - -/* - * An i386 SVR4/ABI signal frame looks like this on the stack: - * - * old %esp: - * <a siginfo32_t [optional]> - * <a ucontext32_t> - * <pointer to that ucontext32_t> - * <pointer to that siginfo32_t> - * <signo> - * new %esp: <return address (deliberately invalid)> - */ -struct sigframe32 { - caddr32_t retaddr; - uint32_t signo; - caddr32_t sip; - caddr32_t ucp; -}; - -int -sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)()) -{ - volatile int minstacksz; - int newstack; - label_t ljb; - volatile caddr_t sp; - caddr_t fp; - volatile struct regs *rp; - volatile greg_t upc; - volatile proc_t *p = ttoproc(curthread); - klwp_t *lwp = ttolwp(curthread); - ucontext32_t *volatile tuc = NULL; - ucontext32_t *uc; - siginfo32_t *sip_addr; - volatile int watched; - - rp = lwptoregs(lwp); - upc = rp->r_pc; - - minstacksz = SA32(sizeof (struct sigframe32)) + SA32(sizeof (*uc)); - if (sip != NULL) - minstacksz += SA32(sizeof (siginfo32_t)); - ASSERT((minstacksz & (STACK_ALIGN32 - 1)) == 0); - - /* - * Figure out whether we will be handling this signal on - * an alternate stack specified by the user. Then allocate - * and validate the stack requirements for the signal handler - * context. on_fault will catch any faults. - */ - newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && - !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); - - if (newstack) { - fp = (caddr_t)(SA32((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + - SA32(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN32); - } else if ((rp->r_ss & 0xffff) != UDS_SEL) { - user_desc_t *ldt; - /* - * If the stack segment selector is -not- pointing at - * the UDS_SEL descriptor and we have an LDT entry for - * it instead, add the base address to find the effective va. - */ - if ((ldt = p->p_ldt) != NULL) - fp = (caddr_t)rp->r_sp + - USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]); - else - fp = (caddr_t)rp->r_sp; - } else - fp = (caddr_t)rp->r_sp; - - /* - * Force proper stack pointer alignment, even in the face of a - * misaligned stack pointer from user-level before the signal. - * Don't use the SA32() macro because that rounds up, not down. - */ - fp = (caddr_t)((uintptr_t)fp & ~(STACK_ALIGN32 - 1)); - sp = fp - minstacksz; - - /* - * Make sure lwp hasn't trashed its stack - */ - if (sp >= (caddr_t)(uintptr_t)USERLIMIT32 || - fp >= (caddr_t)(uintptr_t)USERLIMIT32) { -#ifdef DEBUG - printf("sendsig32: bad signal stack cmd=%s, pid=%d, sig=%d\n", - PTOU(p)->u_comm, p->p_pid, sig); - printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", - (void *)sp, (void *)hdlr, (uintptr_t)upc); - printf("sp above USERLIMIT\n"); -#endif - return (0); - } - - watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE); - - if (on_fault(&ljb)) - goto badstack; - - if (sip != NULL) { - siginfo32_t si32; - zoneid_t zoneid; - - siginfo_kto32(sip, &si32); - if (SI_FROMUSER(sip) && - (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID && - zoneid != sip->si_zoneid) { - si32.si_pid = p->p_zone->zone_zsched->p_pid; - si32.si_uid = 0; - si32.si_ctid = -1; - si32.si_zoneid = zoneid; - } - fp -= SA32(sizeof (si32)); - uzero(fp, sizeof (si32)); - copyout_noerr(&si32, fp, sizeof (si32)); - sip_addr = (siginfo32_t *)fp; - - if (sig == SIGPROF && - curthread->t_rprof != NULL && - curthread->t_rprof->rp_anystate) { - /* - * We stand on our head to deal with - * the real-time profiling signal. - * Fill in the stuff that doesn't fit - * in a normal k_siginfo structure. - */ - int i = sip->si_nsysarg; - - while (--i >= 0) - suword32_noerr(&(sip_addr->si_sysarg[i]), - (uint32_t)lwp->lwp_arg[i]); - copyout_noerr(curthread->t_rprof->rp_state, - sip_addr->si_mstate, - sizeof (curthread->t_rprof->rp_state)); - } - } else - sip_addr = NULL; - - /* save the current context on the user stack */ - fp -= SA32(sizeof (*tuc)); - uc = (ucontext32_t *)fp; - tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP); - no_fault(); - savecontext32(tuc, &lwp->lwp_sigoldmask); - if (on_fault(&ljb)) - goto badstack; - copyout_noerr(tuc, uc, sizeof (*tuc)); - kmem_free(tuc, sizeof (*tuc)); - tuc = NULL; - - lwp->lwp_oldcontext = (uintptr_t)uc; - - if (newstack) { - lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK; - if (lwp->lwp_ustack) { - stack32_t stk32; - - stk32.ss_sp = (caddr32_t)(uintptr_t) - lwp->lwp_sigaltstack.ss_sp; - stk32.ss_size = (size32_t) - lwp->lwp_sigaltstack.ss_size; - stk32.ss_flags = (int32_t) - lwp->lwp_sigaltstack.ss_flags; - copyout_noerr(&stk32, - (stack32_t *)lwp->lwp_ustack, sizeof (stk32)); - } - } - - /* - * Set up signal handler arguments - */ - { - struct sigframe32 frame32; - - frame32.sip = (caddr32_t)(uintptr_t)sip_addr; - frame32.ucp = (caddr32_t)(uintptr_t)uc; - frame32.signo = sig; - frame32.retaddr = 0xffffffff; /* never return! */ - copyout_noerr(&frame32, sp, sizeof (frame32)); - } - - no_fault(); - if (watched) - watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); - - rp->r_sp = (greg_t)(uintptr_t)sp; - rp->r_pc = (greg_t)(uintptr_t)hdlr; - rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL); - - if ((rp->r_cs & 0xffff) != U32CS_SEL || - (rp->r_ss & 0xffff) != UDS_SEL) { - /* - * Try our best to deliver the signal. - */ - rp->r_cs = U32CS_SEL; - rp->r_ss = UDS_SEL; - } - - /* - * Don't set lwp_eosys here. sendsig() is called via psig() after - * lwp_eosys is handled, so setting it here would affect the next - * system call. - */ - return (1); - -badstack: - no_fault(); - if (watched) - watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); - if (tuc) - kmem_free(tuc, sizeof (*tuc)); -#ifdef DEBUG - printf("sendsig32: bad signal stack cmd=%s pid=%d, sig=%d\n", - PTOU(p)->u_comm, p->p_pid, sig); - printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", - (void *)sp, (void *)hdlr, (uintptr_t)upc); -#endif - return (0); -} - -#endif /* _SYSCALL32_IMPL */ diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c deleted file mode 100644 index 80e149f01b..0000000000 --- a/usr/src/uts/intel/ia32/os/sundep.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2021 Joyent, Inc. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/signal.h> -#include <sys/systm.h> -#include <sys/user.h> -#include <sys/mman.h> -#include <sys/class.h> -#include <sys/proc.h> -#include <sys/procfs.h> -#include <sys/buf.h> -#include <sys/kmem.h> -#include <sys/cred.h> -#include <sys/archsystm.h> -#include <sys/vmparam.h> -#include <sys/prsystm.h> -#include <sys/reboot.h> -#include <sys/uadmin.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/file.h> -#include <sys/session.h> -#include <sys/ucontext.h> -#include <sys/dnlc.h> -#include <sys/var.h> -#include <sys/cmn_err.h> -#include <sys/debugreg.h> -#include <sys/thread.h> -#include <sys/vtrace.h> -#include <sys/consdev.h> -#include <sys/psw.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/cpu.h> -#include <sys/stack.h> -#include <sys/swap.h> -#include <vm/hat.h> -#include <vm/anon.h> -#include <vm/as.h> -#include <vm/page.h> -#include <vm/seg.h> -#include <vm/seg_kmem.h> -#include <vm/seg_map.h> -#include <vm/seg_vn.h> -#include <sys/exec.h> -#include <sys/acct.h> -#include <sys/core.h> -#include <sys/corectl.h> -#include <sys/modctl.h> -#include <sys/tuneable.h> -#include <c2/audit.h> -#include <sys/bootconf.h> -#include <sys/brand.h> -#include <sys/dumphdr.h> -#include <sys/promif.h> -#include <sys/systeminfo.h> -#include <sys/kdi.h> -#include <sys/contract_impl.h> -#include <sys/x86_archext.h> -#include <sys/segments.h> -#include <sys/ontrap.h> -#include <sys/cpu.h> -#ifdef __xpv -#include <sys/hypervisor.h> -#endif - -/* - * Compare the version of boot that boot says it is against - * the version of boot the kernel expects. - */ -int -check_boot_version(int boots_version) -{ - if (boots_version == BO_VERSION) - return (0); - - prom_printf("Wrong boot interface - kernel needs v%d found v%d\n", - BO_VERSION, boots_version); - prom_panic("halting"); - /*NOTREACHED*/ -} - -/* - * Process the physical installed list for boot. - * Finds: - * 1) the pfn of the highest installed physical page, - * 2) the number of pages installed - * 3) the number of distinct contiguous regions these pages fall into. - * 4) the number of contiguous memory ranges - */ -void -installed_top_size_ex( - struct memlist *list, /* pointer to start of installed list */ - pfn_t *high_pfn, /* return ptr for top value */ - pgcnt_t *pgcnt, /* return ptr for sum of installed pages */ - int *ranges) /* return ptr for the count of contig. ranges */ -{ - pfn_t top = 0; - pgcnt_t sumpages = 0; - pfn_t highp; /* high page in a chunk */ - int cnt = 0; - - for (; list; list = list->ml_next) { - ++cnt; - highp = (list->ml_address + list->ml_size - 1) >> PAGESHIFT; - if (top < highp) - top = highp; - sumpages += btop(list->ml_size); - } - - *high_pfn = top; - *pgcnt = sumpages; - *ranges = cnt; -} - -void -installed_top_size( - struct memlist *list, /* pointer to start of installed list */ - pfn_t *high_pfn, /* return ptr for top value */ - pgcnt_t *pgcnt) /* return ptr for sum of installed pages */ -{ - int ranges; - - installed_top_size_ex(list, high_pfn, pgcnt, &ranges); -} - -void -phys_install_has_changed(void) -{} - -/* - * Copy in a memory list from boot to kernel, with a filter function - * to remove pages. The filter function can increase the address and/or - * decrease the size to filter out pages. It will also align addresses and - * sizes to PAGESIZE. - */ -void -copy_memlist_filter( - struct memlist *src, - struct memlist **dstp, - void (*filter)(uint64_t *, uint64_t *)) -{ - struct memlist *dst, *prev; - uint64_t addr; - uint64_t size; - uint64_t eaddr; - - dst = *dstp; - prev = dst; - - /* - * Move through the memlist applying a filter against - * each range of memory. Note that we may apply the - * filter multiple times against each memlist entry. - */ - for (; src; src = src->ml_next) { - addr = P2ROUNDUP(src->ml_address, PAGESIZE); - eaddr = P2ALIGN(src->ml_address + src->ml_size, PAGESIZE); - while (addr < eaddr) { - size = eaddr - addr; - if (filter != NULL) - filter(&addr, &size); - if (size == 0) - break; - dst->ml_address = addr; - dst->ml_size = size; - dst->ml_next = 0; - if (prev == dst) { - dst->ml_prev = 0; - dst++; - } else { - dst->ml_prev = prev; - prev->ml_next = dst; - dst++; - prev++; - } - addr += size; - } - } - - *dstp = dst; -} - -/* - * Kernel setup code, called from startup(). - */ -void -kern_setup1(void) -{ - proc_t *pp; - - pp = &p0; - - proc_sched = pp; - - /* - * Initialize process 0 data structures - */ - pp->p_stat = SRUN; - pp->p_flag = SSYS; - - pp->p_pidp = &pid0; - pp->p_pgidp = &pid0; - pp->p_sessp = &session0; - pp->p_tlist = &t0; - pid0.pid_pglink = pp; - pid0.pid_pgtail = pp; - - /* - * XXX - we asssume that the u-area is zeroed out except for - * ttolwp(curthread)->lwp_regs. - */ - PTOU(curproc)->u_cmask = (mode_t)CMASK; - - thread_init(); /* init thread_free list */ - pid_init(); /* initialize pid (proc) table */ - contract_init(); /* initialize contracts */ - - init_pages_pp_maximum(); -} - -/* - * Load a procedure into a thread. - */ -void -thread_load(kthread_t *t, void (*start)(), caddr_t arg, size_t len) -{ - caddr_t sp; - size_t framesz; - caddr_t argp; - long *p; - extern void thread_start(); - - /* - * Push a "c" call frame onto the stack to represent - * the caller of "start". - */ - sp = t->t_stk; - ASSERT(((uintptr_t)t->t_stk & (STACK_ENTRY_ALIGN - 1)) == 0); - if (len != 0) { - /* - * the object that arg points at is copied into the - * caller's frame. - */ - framesz = SA(len); - sp -= framesz; - ASSERT(sp > t->t_stkbase); - argp = sp + SA(MINFRAME); - bcopy(arg, argp, len); - arg = argp; - } - /* - * Set up arguments (arg and len) on the caller's stack frame. - */ - p = (long *)sp; - - *--p = 0; /* fake call */ - *--p = 0; /* null frame pointer terminates stack trace */ - *--p = (long)len; - *--p = (intptr_t)arg; - *--p = (intptr_t)start; - - /* - * initialize thread to resume at thread_start() which will - * turn around and invoke (*start)(arg, len). - */ - t->t_pc = (uintptr_t)thread_start; - t->t_sp = (uintptr_t)p; - - ASSERT((t->t_sp & (STACK_ENTRY_ALIGN - 1)) == 0); -} - -/* - * load user registers into lwp. - */ -/*ARGSUSED2*/ -void -lwp_load(klwp_t *lwp, gregset_t grp, uintptr_t thrptr) -{ - struct regs *rp = lwptoregs(lwp); - - setgregs(lwp, grp); - rp->r_ps = PSL_USER; - - /* - * For 64-bit lwps, we allow one magic %fs selector value, and one - * magic %gs selector to point anywhere in the address space using - * %fsbase and %gsbase behind the scenes. libc uses %fs to point - * at the ulwp_t structure. - * - * For 32-bit lwps, libc wedges its lwp thread pointer into the - * ucontext ESP slot (which is otherwise irrelevant to setting a - * ucontext) and LWPGS_SEL value into gregs[REG_GS]. This is so - * syslwp_create() can atomically setup %gs. - * - * See setup_context() in libc. - */ -#ifdef _SYSCALL32_IMPL - if (lwp_getdatamodel(lwp) == DATAMODEL_ILP32) { - if (grp[REG_GS] == LWPGS_SEL) - (void) lwp_setprivate(lwp, _LWP_GSBASE, thrptr); - } else { - /* - * See lwp_setprivate in kernel and setup_context in libc. - * - * Currently libc constructs a ucontext from whole cloth for - * every new (not main) lwp created. For 64 bit processes - * %fsbase is directly set to point to current thread pointer. - * In the past (solaris 10) %fs was also set LWPFS_SEL to - * indicate %fsbase. Now we use the null GDT selector for - * this purpose. LWP[FS|GS]_SEL are only intended for 32 bit - * processes. To ease transition we support older libcs in - * the newer kernel by forcing %fs or %gs selector to null - * by calling lwp_setprivate if LWP[FS|GS]_SEL is passed in - * the ucontext. This is should be ripped out at some future - * date. Another fix would be for libc to do a getcontext - * and inherit the null %fs/%gs from the current context but - * that means an extra system call and could hurt performance. - */ - if (grp[REG_FS] == 0x1bb) /* hard code legacy LWPFS_SEL */ - (void) lwp_setprivate(lwp, _LWP_FSBASE, - (uintptr_t)grp[REG_FSBASE]); - - if (grp[REG_GS] == 0x1c3) /* hard code legacy LWPGS_SEL */ - (void) lwp_setprivate(lwp, _LWP_GSBASE, - (uintptr_t)grp[REG_GSBASE]); - } -#else - if (grp[GS] == LWPGS_SEL) - (void) lwp_setprivate(lwp, _LWP_GSBASE, thrptr); -#endif - - lwp->lwp_eosys = JUSTRETURN; - lwptot(lwp)->t_post_sys = 1; -} - -/* - * set syscall()'s return values for a lwp. - */ -void -lwp_setrval(klwp_t *lwp, int v1, int v2) -{ - lwptoregs(lwp)->r_ps &= ~PS_C; - lwptoregs(lwp)->r_r0 = v1; - lwptoregs(lwp)->r_r1 = v2; -} - -/* - * set syscall()'s return values for a lwp. - */ -void -lwp_setsp(klwp_t *lwp, caddr_t sp) -{ - lwptoregs(lwp)->r_sp = (intptr_t)sp; -} - -/* - * Copy regs from parent to child. - */ -void -lwp_forkregs(klwp_t *lwp, klwp_t *clwp) -{ - struct pcb *pcb = &clwp->lwp_pcb; - struct regs *rp = lwptoregs(lwp); - - if (!PCB_NEED_UPDATE_SEGS(pcb)) { - pcb->pcb_ds = rp->r_ds; - pcb->pcb_es = rp->r_es; - pcb->pcb_fs = rp->r_fs; - pcb->pcb_gs = rp->r_gs; - PCB_SET_UPDATE_SEGS(pcb); - lwptot(clwp)->t_post_sys = 1; - } - ASSERT(lwptot(clwp)->t_post_sys); - - fp_lwp_dup(clwp); - - bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct regs)); -} - -/* - * This function is currently unused on x86. - */ -/*ARGSUSED*/ -void -lwp_freeregs(klwp_t *lwp, int isexec) -{} - -/* - * This function is currently unused on x86. - */ -void -lwp_pcb_exit(void) -{} - -/* - * Lwp context ops for segment registers. - */ - -/* - * Every time we come into the kernel (syscall, interrupt or trap - * but not fast-traps) we capture the current values of the user's - * segment registers into the lwp's reg structure. This includes - * lcall for i386 generic system call support since it is handled - * as a segment-not-present trap. - * - * Here we save the current values from the lwp regs into the pcb - * and or PCB_UPDATE_SEGS (1) in pcb->pcb_rupdate to tell the rest - * of the kernel that the pcb copy of the segment registers is the - * current one. This ensures the lwp's next trip to user land via - * update_sregs. Finally we set t_post_sys to ensure that no - * system call fast-path's its way out of the kernel via sysret. - * - * (This means that we need to have interrupts disabled when we - * test t->t_post_sys in the syscall handlers; if the test fails, - * we need to keep interrupts disabled until we return to userland - * so we can't be switched away.) - * - * As a result of all this, we don't really have to do a whole lot - * if the thread is just mucking about in the kernel, switching on - * and off the cpu for whatever reason it feels like. And yet we - * still preserve fast syscalls, cause if we -don't- get - * descheduled, we never come here either. - */ - -#define VALID_LWP_DESC(udp) ((udp)->usd_type == SDT_MEMRWA && \ - (udp)->usd_p == 1 && (udp)->usd_dpl == SEL_UPL) - -/*ARGSUSED*/ -void -lwp_segregs_save(klwp_t *lwp) -{ - pcb_t *pcb = &lwp->lwp_pcb; - struct regs *rp; - - ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); - ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); - - if (!PCB_NEED_UPDATE_SEGS(pcb)) { - rp = lwptoregs(lwp); - - /* - * If there's no update already pending, capture the current - * %ds/%es/%fs/%gs values from lwp's regs in case the user - * changed them; %fsbase and %gsbase are privileged so the - * kernel versions of these registers in pcb_fsbase and - * pcb_gsbase are always up-to-date. - */ - pcb->pcb_ds = rp->r_ds; - pcb->pcb_es = rp->r_es; - pcb->pcb_fs = rp->r_fs; - pcb->pcb_gs = rp->r_gs; - PCB_SET_UPDATE_SEGS(pcb); - lwp->lwp_thread->t_post_sys = 1; - } - -#if !defined(__xpv) /* XXPV not sure if we can re-read gdt? */ - ASSERT(bcmp(&CPU->cpu_gdt[GDT_LWPFS], &lwp->lwp_pcb.pcb_fsdesc, - sizeof (lwp->lwp_pcb.pcb_fsdesc)) == 0); - ASSERT(bcmp(&CPU->cpu_gdt[GDT_LWPGS], &lwp->lwp_pcb.pcb_gsdesc, - sizeof (lwp->lwp_pcb.pcb_gsdesc)) == 0); -#endif -} - -/* - * Update the segment registers with new values from the pcb. - * - * We have to do this carefully, and in the following order, - * in case any of the selectors points at a bogus descriptor. - * If they do, we'll catch trap with on_trap and return 1. - * returns 0 on success. - * - * This is particularly tricky for %gs. - * This routine must be executed under a cli. - */ -int -update_sregs(struct regs *rp, klwp_t *lwp) -{ - pcb_t *pcb = &lwp->lwp_pcb; - ulong_t kgsbase; - on_trap_data_t otd; - int rc = 0; - - if (!on_trap(&otd, OT_SEGMENT_ACCESS)) { - -#if defined(__xpv) - /* - * On the hyervisor this is easy. The hypercall below will - * swapgs and load %gs with the user selector. If the user - * selector is bad the hypervisor will catch the fault and - * load %gs with the null selector instead. Either way the - * kernel's gsbase is not damaged. - */ - kgsbase = (ulong_t)CPU; - if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, - pcb->pcb_gs) != 0) { - no_trap(); - return (1); - } - - rp->r_gs = pcb->pcb_gs; - ASSERT((cpu_t *)kgsbase == CPU); - -#else /* __xpv */ - - /* - * A little more complicated running native. - */ - kgsbase = (ulong_t)CPU; - __set_gs(pcb->pcb_gs); - - /* - * If __set_gs fails it's because the new %gs is a bad %gs, - * we'll be taking a trap but with the original %gs and %gsbase - * undamaged (i.e. pointing at curcpu). - * - * We've just mucked up the kernel's gsbase. Oops. In - * particular we can't take any traps at all. Make the newly - * computed gsbase be the hidden gs via swapgs, and fix - * the kernel's gsbase back again. Later, when we return to - * userland we'll swapgs again restoring gsbase just loaded - * above. - */ - __asm__ __volatile__("mfence; swapgs"); - - rp->r_gs = pcb->pcb_gs; - - /* - * Restore kernel's gsbase. Note that this also serializes any - * attempted speculation from loading the user-controlled - * %gsbase. - */ - wrmsr(MSR_AMD_GSBASE, kgsbase); - -#endif /* __xpv */ - - /* - * Only override the descriptor base address if - * r_gs == LWPGS_SEL or if r_gs == NULL. A note on - * NULL descriptors -- 32-bit programs take faults - * if they deference NULL descriptors; however, - * when 64-bit programs load them into %fs or %gs, - * they DONT fault -- only the base address remains - * whatever it was from the last load. Urk. - * - * XXX - note that lwp_setprivate now sets %fs/%gs to the - * null selector for 64 bit processes. Whereas before - * %fs/%gs were set to LWP(FS|GS)_SEL regardless of - * the process's data model. For now we check for both - * values so that the kernel can also support the older - * libc. This should be ripped out at some point in the - * future. - */ - if (pcb->pcb_gs == LWPGS_SEL || pcb->pcb_gs == 0) { -#if defined(__xpv) - if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER, - pcb->pcb_gsbase)) { - no_trap(); - return (1); - } -#else - wrmsr(MSR_AMD_KGSBASE, pcb->pcb_gsbase); -#endif - } - - __set_ds(pcb->pcb_ds); - rp->r_ds = pcb->pcb_ds; - - __set_es(pcb->pcb_es); - rp->r_es = pcb->pcb_es; - - __set_fs(pcb->pcb_fs); - rp->r_fs = pcb->pcb_fs; - - /* - * Same as for %gs - */ - if (pcb->pcb_fs == LWPFS_SEL || pcb->pcb_fs == 0) { -#if defined(__xpv) - if (HYPERVISOR_set_segment_base(SEGBASE_FS, - pcb->pcb_fsbase)) { - no_trap(); - return (1); - } -#else - wrmsr(MSR_AMD_FSBASE, pcb->pcb_fsbase); -#endif - } - - } else { - cli(); - rc = 1; - } - no_trap(); - return (rc); -} - -/* - * Make sure any stale selectors are cleared from the segment registers - * by putting KDS_SEL (the kernel's default %ds gdt selector) into them. - * This is necessary because the kernel itself does not use %es, %fs, nor - * %ds. (%cs and %ss are necessary, and are set up by the kernel - along with - * %gs - to point to the current cpu struct.) If we enter kmdb while in the - * kernel and resume with a stale ldt or brandz selector sitting there in a - * segment register, kmdb will #gp fault if the stale selector points to, - * for example, an ldt in the context of another process. - * - * WARNING: Intel and AMD chips behave differently when storing - * the null selector into %fs and %gs while in long mode. On AMD - * chips fsbase and gsbase are not cleared. But on Intel chips, storing - * a null selector into %fs or %gs has the side effect of clearing - * fsbase or gsbase. For that reason we use KDS_SEL, which has - * consistent behavor between AMD and Intel. - * - * Caller responsible for preventing cpu migration. - */ -void -reset_sregs(void) -{ - ulong_t kgsbase = (ulong_t)CPU; - - ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); - - cli(); - __set_gs(KGS_SEL); - - /* - * restore kernel gsbase - */ -#if defined(__xpv) - xen_set_segment_base(SEGBASE_GS_KERNEL, kgsbase); -#else - wrmsr(MSR_AMD_GSBASE, kgsbase); -#endif - - sti(); - - __set_ds(KDS_SEL); - __set_es(0 | SEL_KPL); /* selector RPL not ring 0 on hypervisor */ - __set_fs(KFS_SEL); -} - - -#ifdef _SYSCALL32_IMPL - -/* - * Make it impossible for a process to change its data model. - * We do this by toggling the present bits for the 32 and - * 64-bit user code descriptors. That way if a user lwp attempts - * to change its data model (by using the wrong code descriptor in - * %cs) it will fault immediately. This also allows us to simplify - * assertions and checks in the kernel. - */ - -static void -gdt_ucode_model(model_t model) -{ - kpreempt_disable(); - if (model == DATAMODEL_NATIVE) { - gdt_update_usegd(GDT_UCODE, &ucs_on); - gdt_update_usegd(GDT_U32CODE, &ucs32_off); - } else { - gdt_update_usegd(GDT_U32CODE, &ucs32_on); - gdt_update_usegd(GDT_UCODE, &ucs_off); - } - kpreempt_enable(); -} - -#endif /* _SYSCALL32_IMPL */ - -/* - * Restore lwp private fs and gs segment descriptors - * on current cpu's GDT. - */ -static void -lwp_segregs_restore(klwp_t *lwp) -{ - pcb_t *pcb = &lwp->lwp_pcb; - - ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); - ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); - -#ifdef _SYSCALL32_IMPL - gdt_ucode_model(DATAMODEL_NATIVE); -#endif - - gdt_update_usegd(GDT_LWPFS, &pcb->pcb_fsdesc); - gdt_update_usegd(GDT_LWPGS, &pcb->pcb_gsdesc); - -} - -#ifdef _SYSCALL32_IMPL - -static void -lwp_segregs_restore32(klwp_t *lwp) -{ - /*LINTED*/ - cpu_t *cpu = CPU; - pcb_t *pcb = &lwp->lwp_pcb; - - ASSERT(VALID_LWP_DESC(&lwp->lwp_pcb.pcb_fsdesc)); - ASSERT(VALID_LWP_DESC(&lwp->lwp_pcb.pcb_gsdesc)); - - gdt_ucode_model(DATAMODEL_ILP32); - gdt_update_usegd(GDT_LWPFS, &pcb->pcb_fsdesc); - gdt_update_usegd(GDT_LWPGS, &pcb->pcb_gsdesc); -} - -#endif /* _SYSCALL32_IMPL */ - -/* - * If this is a process in a branded zone, then we want it to use the brand - * syscall entry points instead of the standard Solaris entry points. This - * routine must be called when a new lwp is created within a branded zone - * or when an existing lwp moves into a branded zone via a zone_enter() - * operation. - */ -void -lwp_attach_brand_hdlrs(klwp_t *lwp) -{ - kthread_t *t = lwptot(lwp); - - ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); - - ASSERT(removectx(t, NULL, brand_interpositioning_disable, - brand_interpositioning_enable, NULL, NULL, - brand_interpositioning_disable, NULL) == 0); - installctx(t, NULL, brand_interpositioning_disable, - brand_interpositioning_enable, NULL, NULL, - brand_interpositioning_disable, NULL, NULL); - - if (t == curthread) { - kpreempt_disable(); - brand_interpositioning_enable(); - kpreempt_enable(); - } -} - -/* - * If this is a process in a branded zone, then we want it to disable the - * brand syscall entry points. This routine must be called when the last - * lwp in a process is exiting in proc_exit(). - */ -void -lwp_detach_brand_hdlrs(klwp_t *lwp) -{ - kthread_t *t = lwptot(lwp); - - ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); - if (t == curthread) - kpreempt_disable(); - - /* Remove the original context handlers */ - VERIFY(removectx(t, NULL, brand_interpositioning_disable, - brand_interpositioning_enable, NULL, NULL, - brand_interpositioning_disable, NULL) != 0); - - if (t == curthread) { - /* Cleanup our MSR and IDT entries. */ - brand_interpositioning_disable(); - kpreempt_enable(); - } -} - -/* - * Add any lwp-associated context handlers to the lwp at the beginning - * of the lwp's useful life. - * - * All paths which create lwp's invoke lwp_create(); lwp_create() - * invokes lwp_stk_init() which initializes the stack, sets up - * lwp_regs, and invokes this routine. - * - * All paths which destroy lwp's invoke lwp_exit() to rip the lwp - * apart and put it on 'lwp_deathrow'; if the lwp is destroyed it - * ends up in thread_free() which invokes freectx(t, 0) before - * invoking lwp_stk_fini(). When the lwp is recycled from death - * row, lwp_stk_fini() is invoked, then thread_free(), and thus - * freectx(t, 0) as before. - * - * In the case of exec, the surviving lwp is thoroughly scrubbed - * clean; exec invokes freectx(t, 1) to destroy associated contexts. - * On the way back to the new image, it invokes setregs() which - * in turn invokes this routine. - */ -void -lwp_installctx(klwp_t *lwp) -{ - kthread_t *t = lwptot(lwp); - int thisthread = t == curthread; -#ifdef _SYSCALL32_IMPL - void (*restop)(klwp_t *) = lwp_getdatamodel(lwp) == DATAMODEL_NATIVE ? - lwp_segregs_restore : lwp_segregs_restore32; -#else - void (*restop)(klwp_t *) = lwp_segregs_restore; -#endif - struct ctxop *ctx; - - /* - * Install the basic lwp context handlers on each lwp. - * - * On the amd64 kernel, the context handlers are responsible for - * virtualizing %ds, %es, %fs, and %gs to the lwp. The register - * values are only ever changed via sys_rtt when the - * PCB_UPDATE_SEGS bit (1) is set in pcb->pcb_rupdate. Only - * sys_rtt gets to clear the bit. - * - * On the i386 kernel, the context handlers are responsible for - * virtualizing %gs/%fs to the lwp by updating the per-cpu GDTs - */ - ASSERT(removectx(t, lwp, lwp_segregs_save, restop, - NULL, NULL, NULL, NULL) == 0); - if (thisthread) { - ctx = installctx_preallocate(); - kpreempt_disable(); - } else { - ctx = NULL; - } - installctx(t, lwp, lwp_segregs_save, restop, - NULL, NULL, NULL, NULL, ctx); - if (thisthread) { - /* - * Since we're the right thread, set the values in the GDT - */ - restop(lwp); - kpreempt_enable(); - } - - /* - * If we have sysenter/sysexit instructions enabled, we need - * to ensure that the hardware mechanism is kept up-to-date with the - * lwp's kernel stack pointer across context switches. - * - * sep_save zeros the sysenter stack pointer msr; sep_restore sets - * it to the lwp's kernel stack pointer (kstktop). - */ - if (is_x86_feature(x86_featureset, X86FSET_SEP)) { - caddr_t kstktop = (caddr_t)lwp->lwp_regs; - ASSERT(removectx(t, kstktop, - sep_save, sep_restore, NULL, NULL, NULL, NULL) == 0); - - if (thisthread) { - ctx = installctx_preallocate(); - kpreempt_disable(); - } else { - ctx = NULL; - } - installctx(t, kstktop, - sep_save, sep_restore, NULL, NULL, NULL, NULL, ctx); - if (thisthread) { - /* - * We're the right thread, so set the stack pointer - * for the first sysenter instruction to use - */ - sep_restore(kstktop); - kpreempt_enable(); - } - } - - if (PROC_IS_BRANDED(ttoproc(t))) - lwp_attach_brand_hdlrs(lwp); -} - -/* - * Clear registers on exec(2). - */ -void -setregs(uarg_t *args) -{ - struct regs *rp; - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - pcb_t *pcb = &lwp->lwp_pcb; - greg_t sp; - - /* - * Initialize user registers - */ - (void) save_syscall_args(); /* copy args from registers first */ - rp = lwptoregs(lwp); - sp = rp->r_sp; - bzero(rp, sizeof (*rp)); - - rp->r_ss = UDS_SEL; - rp->r_sp = sp; - rp->r_pc = args->entry; - rp->r_ps = PSL_USER; - - pcb->pcb_fs = pcb->pcb_gs = 0; - pcb->pcb_fsbase = pcb->pcb_gsbase = 0; - - if (ttoproc(t)->p_model == DATAMODEL_NATIVE) { - - rp->r_cs = UCS_SEL; - - /* - * Only allow 64-bit user code descriptor to be present. - */ - gdt_ucode_model(DATAMODEL_NATIVE); - - /* - * Arrange that the virtualized %fs and %gs GDT descriptors - * have a well-defined initial state (present, ring 3 - * and of type data). - */ - pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_udesc; - - /* - * thrptr is either NULL or a value used by DTrace. - * 64-bit processes use %fs as their "thread" register. - */ - if (args->thrptr) - (void) lwp_setprivate(lwp, _LWP_FSBASE, args->thrptr); - - } else { - - rp->r_cs = U32CS_SEL; - rp->r_ds = rp->r_es = UDS_SEL; - - /* - * only allow 32-bit user code selector to be present. - */ - gdt_ucode_model(DATAMODEL_ILP32); - - pcb->pcb_fsdesc = pcb->pcb_gsdesc = zero_u32desc; - - /* - * thrptr is either NULL or a value used by DTrace. - * 32-bit processes use %gs as their "thread" register. - */ - if (args->thrptr) - (void) lwp_setprivate(lwp, _LWP_GSBASE, args->thrptr); - - } - - pcb->pcb_ds = rp->r_ds; - pcb->pcb_es = rp->r_es; - PCB_SET_UPDATE_SEGS(pcb); - - lwp->lwp_eosys = JUSTRETURN; - t->t_post_sys = 1; - - /* - * Add the lwp context handlers that virtualize segment registers, - * and/or system call stacks etc. - */ - lwp_installctx(lwp); - - /* - * Reset the FPU flags and then initialize the FPU for this lwp. - */ - fp_exec(); -} - -user_desc_t * -cpu_get_gdt(void) -{ - return (CPU->cpu_gdt); -} - - -#if !defined(lwp_getdatamodel) - -/* - * Return the datamodel of the given lwp. - */ -/*ARGSUSED*/ -model_t -lwp_getdatamodel(klwp_t *lwp) -{ - return (lwp->lwp_procp->p_model); -} - -#endif /* !lwp_getdatamodel */ - -#if !defined(get_udatamodel) - -model_t -get_udatamodel(void) -{ - return (curproc->p_model); -} - -#endif /* !get_udatamodel */ diff --git a/usr/src/uts/intel/ia32/os/syscall.c b/usr/src/uts/intel/ia32/os/syscall.c deleted file mode 100644 index 6cf4293ff4..0000000000 --- a/usr/src/uts/intel/ia32/os/syscall.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. - */ - -#include <sys/param.h> -#include <sys/vmparam.h> -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/systm.h> -#include <sys/signal.h> -#include <sys/stack.h> -#include <sys/cred.h> -#include <sys/cmn_err.h> -#include <sys/user.h> -#include <sys/privregs.h> -#include <sys/psw.h> -#include <sys/debug.h> -#include <sys/errno.h> -#include <sys/proc.h> -#include <sys/modctl.h> -#include <sys/var.h> -#include <sys/inline.h> -#include <sys/syscall.h> -#include <sys/ucontext.h> -#include <sys/cpuvar.h> -#include <sys/siginfo.h> -#include <sys/trap.h> -#include <sys/vtrace.h> -#include <sys/sysinfo.h> -#include <sys/procfs.h> -#include <sys/prsystm.h> -#include <c2/audit.h> -#include <sys/modctl.h> -#include <sys/aio_impl.h> -#include <sys/tnf.h> -#include <sys/tnf_probe.h> -#include <sys/copyops.h> -#include <sys/priv.h> -#include <sys/msacct.h> - -int syscalltrace = 0; -#ifdef SYSCALLTRACE -static kmutex_t systrace_lock; /* syscall tracing lock */ -#else -#define syscalltrace 0 -#endif /* SYSCALLTRACE */ - -typedef int64_t (*llfcn_t)(); /* function returning long long */ - -int pre_syscall(void); -void post_syscall(long rval1, long rval2); -static krwlock_t *lock_syscall(struct sysent *, uint_t); -void deferred_singlestep_trap(caddr_t); - -#ifdef _SYSCALL32_IMPL -#define LWP_GETSYSENT(lwp) \ - (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE ? sysent : sysent32) -#else -#define LWP_GETSYSENT(lwp) (sysent) -#endif - -/* - * If watchpoints are active, don't make copying in of - * system call arguments take a read watchpoint trap. - */ -static int -copyin_args(struct regs *rp, long *ap, uint_t nargs) -{ - greg_t *sp = 1 + (greg_t *)rp->r_sp; /* skip ret addr */ - - ASSERT(nargs <= MAXSYSARGS); - - return (copyin_nowatch(sp, ap, nargs * sizeof (*sp))); -} - -#if defined(_SYSCALL32_IMPL) -static int -copyin_args32(struct regs *rp, long *ap, uint_t nargs) -{ - greg32_t *sp = 1 + (greg32_t *)rp->r_sp; /* skip ret addr */ - uint32_t a32[MAXSYSARGS]; - int rc; - - ASSERT(nargs <= MAXSYSARGS); - - if ((rc = copyin_nowatch(sp, a32, nargs * sizeof (*sp))) == 0) { - uint32_t *a32p = &a32[0]; - - while (nargs--) - *ap++ = (ulong_t)*a32p++; - } - return (rc); -} -#define COPYIN_ARGS32 copyin_args32 -#else -#define COPYIN_ARGS32 copyin_args -#endif - -/* - * Error handler for system calls where arg copy gets fault. - */ -static longlong_t -syscall_err() -{ - return (0); -} - -/* - * Corresponding sysent entry to allow syscall_entry caller - * to invoke syscall_err. - */ -static struct sysent sysent_err = { - 0, SE_32RVAL1, NULL, NULL, (llfcn_t)syscall_err -}; - -/* - * Called from syscall() when a non-trivial 32-bit system call occurs. - * Sets up the args and returns a pointer to the handler. - */ -struct sysent * -syscall_entry(kthread_t *t, long *argp) -{ - klwp_t *lwp = ttolwp(t); - struct regs *rp = lwptoregs(lwp); - unsigned int code; - struct sysent *callp; - struct sysent *se = LWP_GETSYSENT(lwp); - int error = 0; - uint_t nargs; - - ASSERT(t == curthread && curthread->t_schedflag & TS_DONT_SWAP); - - lwp->lwp_ru.sysc++; - lwp->lwp_eosys = NORMALRETURN; /* assume this will be normal */ - - /* - * Set lwp_ap to point to the args, even if none are needed for this - * system call. This is for the loadable-syscall case where the - * number of args won't be known until the system call is loaded, and - * also maintains a non-NULL lwp_ap setup for get_syscall_args(). Note - * that lwp_ap MUST be set to a non-NULL value _BEFORE_ t_sysnum is - * set to non-zero; otherwise get_syscall_args(), seeing a non-zero - * t_sysnum for this thread, will charge ahead and dereference lwp_ap. - */ - lwp->lwp_ap = argp; /* for get_syscall_args */ - - code = rp->r_r0; - t->t_sysnum = (short)code; - callp = code >= NSYSCALL ? &nosys_ent : se + code; - - if ((t->t_pre_sys | syscalltrace) != 0) { - error = pre_syscall(); - - /* - * pre_syscall() has taken care so that lwp_ap is current; - * it either points to syscall-entry-saved amd64 regs, - * or it points to lwp_arg[], which has been re-copied from - * the ia32 ustack, but either way, it's a current copy after - * /proc has possibly mucked with the syscall args. - */ - - if (error) - return (&sysent_err); /* use dummy handler */ - } - - /* - * Fetch the system call arguments to the kernel stack copy used - * for syscall handling. - * Note: for loadable system calls the number of arguments required - * may not be known at this point, and will be zero if the system call - * was never loaded. Once the system call has been loaded, the number - * of args is not allowed to be changed. - */ - if ((nargs = (uint_t)callp->sy_narg) != 0 && - COPYIN_ARGS32(rp, argp, nargs)) { - (void) set_errno(EFAULT); - return (&sysent_err); /* use dummy handler */ - } - - return (callp); /* return sysent entry for caller */ -} - -void -syscall_exit(kthread_t *t, long rval1, long rval2) -{ - /* - * Handle signals and other post-call events if necessary. - */ - if ((t->t_post_sys_ast | syscalltrace) == 0) { - klwp_t *lwp = ttolwp(t); - struct regs *rp = lwptoregs(lwp); - - /* - * Normal return. - * Clear error indication and set return values. - */ - rp->r_ps &= ~PS_C; /* reset carry bit */ - rp->r_r0 = rval1; - rp->r_r1 = rval2; - lwp->lwp_state = LWP_USER; - } else { - post_syscall(rval1, rval2); - } - t->t_sysnum = 0; /* invalidate args */ -} - -/* - * Perform pre-system-call processing, including stopping for tracing, - * auditing, etc. - * - * This routine is called only if the t_pre_sys flag is set. Any condition - * requiring pre-syscall handling must set the t_pre_sys flag. If the - * condition is persistent, this routine will repost t_pre_sys. - */ -int -pre_syscall() -{ - kthread_t *t = curthread; - unsigned code = t->t_sysnum; - klwp_t *lwp = ttolwp(t); - proc_t *p = ttoproc(t); - int repost; - - t->t_pre_sys = repost = 0; /* clear pre-syscall processing flag */ - - ASSERT(t->t_schedflag & TS_DONT_SWAP); - -#if defined(DEBUG) - /* - * On the i386 kernel, lwp_ap points at the piece of the thread - * stack that we copy the users arguments into. - * - * On the amd64 kernel, the syscall arguments in the rdi..r9 - * registers should be pointed at by lwp_ap. If the args need to - * be copied so that those registers can be changed without losing - * the ability to get the args for /proc, they can be saved by - * save_syscall_args(), and lwp_ap will be restored by post_syscall(). - */ - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { -#if defined(_LP64) - ASSERT(lwp->lwp_ap == (long *)&lwptoregs(lwp)->r_rdi); - } else { -#endif - ASSERT((caddr_t)lwp->lwp_ap > t->t_stkbase && - (caddr_t)lwp->lwp_ap < t->t_stk); - } -#endif /* DEBUG */ - - /* - * Make sure the thread is holding the latest credentials for the - * process. The credentials in the process right now apply to this - * thread for the entire system call. - */ - if (t->t_cred != p->p_cred) { - cred_t *oldcred = t->t_cred; - /* - * DTrace accesses t_cred in probe context. t_cred must - * always be either NULL, or point to a valid, allocated cred - * structure. - */ - t->t_cred = crgetcred(); - crfree(oldcred); - } - - /* - * From the proc(4) manual page: - * When entry to a system call is being traced, the traced process - * stops after having begun the call to the system but before the - * system call arguments have been fetched from the process. - */ - if (PTOU(p)->u_systrap) { - if (prismember(&PTOU(p)->u_entrymask, code)) { - mutex_enter(&p->p_lock); - /* - * Recheck stop condition, now that lock is held. - */ - if (PTOU(p)->u_systrap && - prismember(&PTOU(p)->u_entrymask, code)) { - stop(PR_SYSENTRY, code); - - /* - * /proc may have modified syscall args, - * either in regs for amd64 or on ustack - * for ia32. Either way, arrange to - * copy them again, both for the syscall - * handler and for other consumers in - * post_syscall (like audit). Here, we - * only do amd64, and just set lwp_ap - * back to the kernel-entry stack copy; - * the syscall ml code redoes - * move-from-regs to set up for the - * syscall handler after we return. For - * ia32, save_syscall_args() below makes - * an lwp_ap-accessible copy. - */ -#if defined(_LP64) - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - lwp->lwp_argsaved = 0; - lwp->lwp_ap = - (long *)&lwptoregs(lwp)->r_rdi; - } -#endif - } - mutex_exit(&p->p_lock); - } - repost = 1; - } - - /* - * ia32 kernel, or ia32 proc on amd64 kernel: keep args in - * lwp_arg for post-syscall processing, regardless of whether - * they might have been changed in /proc above. - */ -#if defined(_LP64) - if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE) -#endif - (void) save_syscall_args(); - - if (lwp->lwp_sysabort) { - /* - * lwp_sysabort may have been set via /proc while the process - * was stopped on PR_SYSENTRY. If so, abort the system call. - * Override any error from the copyin() of the arguments. - */ - lwp->lwp_sysabort = 0; - (void) set_errno(EINTR); /* forces post_sys */ - t->t_pre_sys = 1; /* repost anyway */ - return (1); /* don't do system call, return EINTR */ - } - - /* - * begin auditing for this syscall if the c2audit module is loaded - * and auditing is enabled - */ - if (audit_active == C2AUDIT_LOADED) { - uint32_t auditing = au_zone_getstate(NULL); - - if (auditing & AU_AUDIT_MASK) { - int error; - if (error = audit_start(T_SYSCALL, code, auditing, \ - 0, lwp)) { - t->t_pre_sys = 1; /* repost anyway */ - (void) set_errno(error); - return (1); - } - repost = 1; - } - } - -#ifndef NPROBE - /* Kernel probe */ - if (tnf_tracing_active) { - TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */, - tnf_sysnum, sysnum, t->t_sysnum); - t->t_post_sys = 1; /* make sure post_syscall runs */ - repost = 1; - } -#endif /* NPROBE */ - -#ifdef SYSCALLTRACE - if (syscalltrace) { - int i; - long *ap; - char *cp; - char *sysname; - struct sysent *callp; - - if (code >= NSYSCALL) - callp = &nosys_ent; /* nosys has no args */ - else - callp = LWP_GETSYSENT(lwp) + code; - (void) save_syscall_args(); - mutex_enter(&systrace_lock); - printf("%d: ", p->p_pid); - if (code >= NSYSCALL) { - printf("0x%x", code); - } else { - sysname = mod_getsysname(code); - printf("%s[0x%x/0x%p]", sysname == NULL ? "NULL" : - sysname, code, callp->sy_callc); - } - cp = "("; - for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) { - printf("%s%lx", cp, *ap); - cp = ", "; - } - if (i) - printf(")"); - printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread); - mutex_exit(&systrace_lock); - } -#endif /* SYSCALLTRACE */ - - /* - * If there was a continuing reason for pre-syscall processing, - * set the t_pre_sys flag for the next system call. - */ - if (repost) - t->t_pre_sys = 1; - lwp->lwp_error = 0; /* for old drivers */ - lwp->lwp_badpriv = PRIV_NONE; - return (0); -} - - -/* - * Post-syscall processing. Perform abnormal system call completion - * actions such as /proc tracing, profiling, signals, preemption, etc. - * - * This routine is called only if t_post_sys, t_sig_check, or t_astflag is set. - * Any condition requiring pre-syscall handling must set one of these. - * If the condition is persistent, this routine will repost t_post_sys. - */ -void -post_syscall(long rval1, long rval2) -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - proc_t *p = ttoproc(t); - struct regs *rp = lwptoregs(lwp); - uint_t error; - uint_t code = t->t_sysnum; - int repost = 0; - int proc_stop = 0; /* non-zero if stopping */ - int sigprof = 0; /* non-zero if sending SIGPROF */ - - t->t_post_sys = 0; - - error = lwp->lwp_errno; - - /* - * Code can be zero if this is a new LWP returning after a forkall(), - * other than the one which matches the one in the parent which called - * forkall(). In these LWPs, skip most of post-syscall activity. - */ - if (code == 0) - goto sig_check; - /* - * If the trace flag is set, mark the lwp to take a single-step trap - * on return to user level (below). The x86 lcall interface and - * sysenter has already done this, and turned off the flag, but - * amd64 syscall interface has not. - */ - if (rp->r_ps & PS_T) { - lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; - rp->r_ps &= ~PS_T; - aston(curthread); - } - - /* put out audit record for this syscall */ - if (AU_AUDITING()) { - rval_t rval; - - /* XX64 -- truncation of 64-bit return values? */ - rval.r_val1 = (int)rval1; - rval.r_val2 = (int)rval2; - audit_finish(T_SYSCALL, code, error, &rval); - repost = 1; - } - - if (curthread->t_pdmsg != NULL) { - char *m = curthread->t_pdmsg; - - uprintf("%s", m); - kmem_free(m, strlen(m) + 1); - curthread->t_pdmsg = NULL; - } - - /* - * If we're going to stop for /proc tracing, set the flag and - * save the arguments so that the return values don't smash them. - */ - if (PTOU(p)->u_systrap) { - if (prismember(&PTOU(p)->u_exitmask, code)) { - if (lwp_getdatamodel(lwp) == DATAMODEL_LP64) - (void) save_syscall_args(); - proc_stop = 1; - } - repost = 1; - } - - /* - * Similarly check to see if SIGPROF might be sent. - */ - if (curthread->t_rprof != NULL && - curthread->t_rprof->rp_anystate != 0) { - if (lwp_getdatamodel(lwp) == DATAMODEL_LP64) - (void) save_syscall_args(); - sigprof = 1; - } - - if (lwp->lwp_eosys == NORMALRETURN) { - if (error == 0) { -#ifdef SYSCALLTRACE - if (syscalltrace) { - mutex_enter(&systrace_lock); - printf( - "%d: r_val1=0x%lx, r_val2=0x%lx, id 0x%p\n", - p->p_pid, rval1, rval2, curthread); - mutex_exit(&systrace_lock); - } -#endif /* SYSCALLTRACE */ - rp->r_ps &= ~PS_C; - rp->r_r0 = rval1; - rp->r_r1 = rval2; - } else { - int sig; -#ifdef SYSCALLTRACE - if (syscalltrace) { - mutex_enter(&systrace_lock); - printf("%d: error=%d, id 0x%p\n", - p->p_pid, error, curthread); - mutex_exit(&systrace_lock); - } -#endif /* SYSCALLTRACE */ - if (error == EINTR && t->t_activefd.a_stale) - error = EBADF; - if (error == EINTR && - (sig = lwp->lwp_cursig) != 0 && - sigismember(&PTOU(p)->u_sigrestart, sig) && - PTOU(p)->u_signal[sig - 1] != SIG_DFL && - PTOU(p)->u_signal[sig - 1] != SIG_IGN) - error = ERESTART; - rp->r_r0 = error; - rp->r_ps |= PS_C; - } - } - - /* - * From the proc(4) manual page: - * When exit from a system call is being traced, the traced process - * stops on completion of the system call just prior to checking for - * signals and returning to user level. At this point all return - * values have been stored into the traced process's saved registers. - */ - if (proc_stop) { - mutex_enter(&p->p_lock); - if (PTOU(p)->u_systrap && - prismember(&PTOU(p)->u_exitmask, code)) - stop(PR_SYSEXIT, code); - mutex_exit(&p->p_lock); - } - - /* - * If we are the parent returning from a successful - * vfork, wait for the child to exec or exit. - * This code must be here and not in the bowels of the system - * so that /proc can intercept exit from vfork in a timely way. - */ - if (t->t_flag & T_VFPARENT) { - ASSERT(code == SYS_vfork || code == SYS_forksys); - ASSERT(rp->r_r1 == 0 && error == 0); - vfwait((pid_t)rval1); - t->t_flag &= ~T_VFPARENT; - } - - /* - * If profiling is active, bill the current PC in user-land - * and keep reposting until profiling is disabled. - */ - if (p->p_prof.pr_scale) { - if (lwp->lwp_oweupc) - profil_tick(rp->r_pc); - repost = 1; - } - -sig_check: - /* - * Reset flag for next time. - * We must do this after stopping on PR_SYSEXIT - * because /proc uses the information in lwp_eosys. - */ - lwp->lwp_eosys = NORMALRETURN; - clear_stale_fd(); - t->t_flag &= ~T_FORKALL; - - if (t->t_astflag | t->t_sig_check) { - /* - * Turn off the AST flag before checking all the conditions that - * may have caused an AST. This flag is on whenever a signal or - * unusual condition should be handled after the next trap or - * syscall. - */ - astoff(t); - /* - * If a single-step trap occurred on a syscall (see trap()) - * recognize it now. Do this before checking for signals - * because deferred_singlestep_trap() may generate a SIGTRAP to - * the LWP or may otherwise mark the LWP to call issig(FORREAL). - */ - if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING) - deferred_singlestep_trap((caddr_t)rp->r_pc); - - t->t_sig_check = 0; - - /* - * The following check is legal for the following reasons: - * 1) The thread we are checking, is ourselves, so there is - * no way the proc can go away. - * 2) The only time we need to be protected by the - * lock is if the binding is changed. - * - * Note we will still take the lock and check the binding - * if the condition was true without the lock held. This - * prevents lock contention among threads owned by the - * same proc. - */ - - if (curthread->t_proc_flag & TP_CHANGEBIND) { - mutex_enter(&p->p_lock); - if (curthread->t_proc_flag & TP_CHANGEBIND) { - timer_lwpbind(); - curthread->t_proc_flag &= ~TP_CHANGEBIND; - } - mutex_exit(&p->p_lock); - } - - /* - * for kaio requests on the special kaio poll queue, - * copyout their results to user memory. - */ - if (p->p_aio) - aio_cleanup(0); - /* - * If this LWP was asked to hold, call holdlwp(), which will - * stop. holdlwps() sets this up and calls pokelwps() which - * sets the AST flag. - * - * Also check TP_EXITLWP, since this is used by fresh new LWPs - * through lwp_rtt(). That flag is set if the lwp_create(2) - * syscall failed after creating the LWP. - */ - if (ISHOLD(p) || (t->t_proc_flag & TP_EXITLWP)) - holdlwp(); - - /* - * All code that sets signals and makes ISSIG_PENDING - * evaluate true must set t_sig_check afterwards. - */ - if (ISSIG_PENDING(t, lwp, p)) { - if (issig(FORREAL)) - psig(); - t->t_sig_check = 1; /* recheck next time */ - } - - if (sigprof) { - int nargs = (code > 0 && code < NSYSCALL)? - LWP_GETSYSENT(lwp)[code].sy_narg : 0; - realsigprof(code, nargs, error); - t->t_sig_check = 1; /* recheck next time */ - } - - /* - * If a performance counter overflow interrupt was - * delivered *during* the syscall, then re-enable the - * AST so that we take a trip through trap() to cause - * the SIGEMT to be delivered. - */ - if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) - aston(t); - - /* - * /proc can't enable/disable the trace bit itself - * because that could race with the call gate used by - * system calls via "lcall". If that happened, an - * invalid EFLAGS would result. prstep()/prnostep() - * therefore schedule an AST for the purpose. - */ - if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) { - lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP; - rp->r_ps |= PS_T; - } - if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) { - lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP; - rp->r_ps &= ~PS_T; - } - } - - lwp->lwp_errno = 0; /* clear error for next time */ - -#ifndef NPROBE - /* Kernel probe */ - if (tnf_tracing_active) { - TNF_PROBE_3(syscall_end, "syscall thread", /* CSTYLED */, - tnf_long, rval1, rval1, - tnf_long, rval2, rval2, - tnf_long, errno, (long)error); - repost = 1; - } -#endif /* NPROBE */ - - /* - * Set state to LWP_USER here so preempt won't give us a kernel - * priority if it occurs after this point. Call CL_TRAPRET() to - * restore the user-level priority. - * - * It is important that no locks (other than spinlocks) be entered - * after this point before returning to user mode (unless lwp_state - * is set back to LWP_SYS). - * - * XXX Sampled times past this point are charged to the user. - */ - lwp->lwp_state = LWP_USER; - - if (t->t_trapret) { - t->t_trapret = 0; - thread_lock(t); - CL_TRAPRET(t); - thread_unlock(t); - } - if (CPU->cpu_runrun || t->t_schedflag & TS_ANYWAITQ) - preempt(); - prunstop(); - - lwp->lwp_errno = 0; /* clear error for next time */ - - /* - * The thread lock must be held in order to clear sysnum and reset - * lwp_ap atomically with respect to other threads in the system that - * may be looking at the args via lwp_ap from get_syscall_args(). - */ - - thread_lock(t); - t->t_sysnum = 0; /* no longer in a system call */ - - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { -#if defined(_LP64) - /* - * In case the args were copied to the lwp, reset the - * pointer so the next syscall will have the right - * lwp_ap pointer. - */ - lwp->lwp_ap = (long *)&rp->r_rdi; - } else { -#endif - lwp->lwp_ap = NULL; /* reset on every syscall entry */ - } - thread_unlock(t); - - lwp->lwp_argsaved = 0; - - /* - * If there was a continuing reason for post-syscall processing, - * set the t_post_sys flag for the next system call. - */ - if (repost) - t->t_post_sys = 1; - - /* - * If there is a ustack registered for this lwp, and the stack rlimit - * has been altered, read in the ustack. If the saved stack rlimit - * matches the bounds of the ustack, update the ustack to reflect - * the new rlimit. If the new stack rlimit is RLIM_INFINITY, disable - * stack checking by setting the size to 0. - */ - if (lwp->lwp_ustack != 0 && lwp->lwp_old_stk_ctl != 0) { - rlim64_t new_size; - caddr_t top; - stack_t stk; - struct rlimit64 rl; - - mutex_enter(&p->p_lock); - new_size = p->p_stk_ctl; - top = p->p_usrstack; - (void) rctl_rlimit_get(rctlproc_legacy[RLIMIT_STACK], p, &rl); - mutex_exit(&p->p_lock); - - if (rl.rlim_cur == RLIM64_INFINITY) - new_size = 0; - - if (copyin((stack_t *)lwp->lwp_ustack, &stk, - sizeof (stack_t)) == 0 && - (stk.ss_size == lwp->lwp_old_stk_ctl || - stk.ss_size == 0) && - stk.ss_sp == top - stk.ss_size) { - stk.ss_sp = (void *)((uintptr_t)stk.ss_sp + - stk.ss_size - (uintptr_t)new_size); - stk.ss_size = new_size; - - (void) copyout(&stk, (stack_t *)lwp->lwp_ustack, - sizeof (stack_t)); - } - - lwp->lwp_old_stk_ctl = 0; - } -} - -/* - * Called from post_syscall() when a deferred singlestep is to be taken. - */ -void -deferred_singlestep_trap(caddr_t pc) -{ - proc_t *p = ttoproc(curthread); - klwp_t *lwp = ttolwp(curthread); - pcb_t *pcb = &lwp->lwp_pcb; - uint_t fault = 0; - k_siginfo_t siginfo; - - bzero(&siginfo, sizeof (siginfo)); - - /* - * If both NORMAL_STEP and WATCH_STEP are in - * effect, give precedence to WATCH_STEP. - * If neither is set, user must have set the - * PS_T bit in %efl; treat this as NORMAL_STEP. - */ - if ((fault = undo_watch_step(&siginfo)) == 0 && - ((pcb->pcb_flags & NORMAL_STEP) || - !(pcb->pcb_flags & WATCH_STEP))) { - siginfo.si_signo = SIGTRAP; - siginfo.si_code = TRAP_TRACE; - siginfo.si_addr = pc; - fault = FLTTRACE; - } - pcb->pcb_flags &= ~(DEBUG_PENDING|NORMAL_STEP|WATCH_STEP); - - if (fault) { - /* - * Remember the fault and fault adddress - * for real-time (SIGPROF) profiling. - */ - lwp->lwp_lastfault = fault; - lwp->lwp_lastfaddr = siginfo.si_addr; - /* - * If a debugger has declared this fault to be an - * event of interest, stop the lwp. Otherwise just - * deliver the associated signal. - */ - if (prismember(&p->p_fltmask, fault) && - stop_on_fault(fault, &siginfo) == 0) - siginfo.si_signo = 0; - } - - if (siginfo.si_signo) - trapsig(&siginfo, 1); -} - -/* - * nonexistent system call-- signal lwp (may want to handle it) - * flag error if lwp won't see signal immediately - */ -int64_t -nosys(void) -{ - tsignal(curthread, SIGSYS); - return (set_errno(ENOSYS)); -} - -int -nosys32(void) -{ - return (nosys()); -} - -/* - * Execute a 32-bit system call on behalf of the current thread. - */ -void -dosyscall(void) -{ - /* - * Need space on the stack to store syscall arguments. - */ - long syscall_args[MAXSYSARGS]; - struct sysent *se; - int64_t ret; - - syscall_mstate(LMS_TRAP, LMS_SYSTEM); - - ASSERT(curproc->p_model == DATAMODEL_ILP32); - - CPU_STATS_ENTER_K(); - CPU_STATS_ADDQ(CPU, sys, syscall, 1); - CPU_STATS_EXIT_K(); - - se = syscall_entry(curthread, syscall_args); - - /* - * syscall_entry() copied all 8 arguments into syscall_args. - */ - ret = se->sy_callc(syscall_args[0], syscall_args[1], syscall_args[2], - syscall_args[3], syscall_args[4], syscall_args[5], syscall_args[6], - syscall_args[7]); - - syscall_exit(curthread, (int)ret & 0xffffffffu, (int)(ret >> 32)); - syscall_mstate(LMS_SYSTEM, LMS_TRAP); -} - -/* - * Get the arguments to the current system call. See comment atop - * save_syscall_args() regarding lwp_ap usage. - */ - -uint_t -get_syscall_args(klwp_t *lwp, long *argp, int *nargsp) -{ - kthread_t *t = lwptot(lwp); - ulong_t mask = 0xfffffffful; - uint_t code; - long *ap; - int nargs; - -#if defined(_LP64) - if (lwp_getdatamodel(lwp) == DATAMODEL_LP64) - mask = 0xfffffffffffffffful; -#endif - - /* - * The thread lock must be held while looking at the arguments to ensure - * they don't go away via post_syscall(). - * get_syscall_args() is the only routine to read them which is callable - * outside the LWP in question and hence the only one that must be - * synchronized in this manner. - */ - thread_lock(t); - - code = t->t_sysnum; - ap = lwp->lwp_ap; - - thread_unlock(t); - - if (code != 0 && code < NSYSCALL) { - nargs = LWP_GETSYSENT(lwp)[code].sy_narg; - - ASSERT(nargs <= MAXSYSARGS); - - *nargsp = nargs; - while (nargs-- > 0) - *argp++ = *ap++ & mask; - } else { - *nargsp = 0; - } - - return (code); -} - -#ifdef _SYSCALL32_IMPL -/* - * Get the arguments to the current 32-bit system call. - */ -uint_t -get_syscall32_args(klwp_t *lwp, int *argp, int *nargsp) -{ - long args[MAXSYSARGS]; - uint_t i, code; - - code = get_syscall_args(lwp, args, nargsp); - - for (i = 0; i != *nargsp; i++) - *argp++ = (int)args[i]; - return (code); -} -#endif - -/* - * Save the system call arguments in a safe place. - * - * On the i386 kernel: - * - * Copy the users args prior to changing the stack or stack pointer. - * This is so /proc will be able to get a valid copy of the - * args from the user stack even after the user stack has been changed. - * Note that the kernel stack copy of the args may also have been - * changed by a system call handler which takes C-style arguments. - * - * Note that this may be called by stop() from trap(). In that case - * t_sysnum will be zero (syscall_exit clears it), so no args will be - * copied. - * - * On the amd64 kernel: - * - * For 64-bit applications, lwp->lwp_ap normally points to %rdi..%r9 - * in the reg structure. If the user is going to change the argument - * registers, rax, or the stack and might want to get the args (for - * /proc tracing), it must copy the args elsewhere via save_syscall_args(). - * - * For 32-bit applications, lwp->lwp_ap normally points to a copy of - * the system call arguments on the kernel stack made from the user - * stack. Copy the args prior to change the stack or stack pointer. - * This is so /proc will be able to get a valid copy of the args - * from the user stack even after that stack has been changed. - * - * This may be called from stop() even when we're not in a system call. - * Since there's no easy way to tell, this must be safe (not panic). - * If the copyins get data faults, return non-zero. - */ -int -save_syscall_args() -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - uint_t code = t->t_sysnum; - uint_t nargs; - - if (lwp->lwp_argsaved || code == 0) - return (0); /* args already saved or not needed */ - - if (code >= NSYSCALL) { - nargs = 0; /* illegal syscall */ - } else { - struct sysent *se = LWP_GETSYSENT(lwp); - struct sysent *callp = se + code; - - nargs = callp->sy_narg; - if (LOADABLE_SYSCALL(callp) && nargs == 0) { - krwlock_t *module_lock; - - /* - * Find out how many arguments the system - * call uses. - * - * We have the property that loaded syscalls - * never change the number of arguments they - * use after they've been loaded once. This - * allows us to stop for /proc tracing without - * holding the module lock. - * /proc is assured that sy_narg is valid. - */ - module_lock = lock_syscall(se, code); - nargs = callp->sy_narg; - rw_exit(module_lock); - } - } - - /* - * Fetch the system call arguments. - */ - if (nargs == 0) - goto out; - - ASSERT(nargs <= MAXSYSARGS); - - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { -#if defined(_LP64) - struct regs *rp = lwptoregs(lwp); - - lwp->lwp_arg[0] = rp->r_rdi; - lwp->lwp_arg[1] = rp->r_rsi; - lwp->lwp_arg[2] = rp->r_rdx; - lwp->lwp_arg[3] = rp->r_rcx; - lwp->lwp_arg[4] = rp->r_r8; - lwp->lwp_arg[5] = rp->r_r9; - if (nargs > 6 && copyin_args(rp, &lwp->lwp_arg[6], nargs - 6)) - return (-1); - } else { -#endif - if (COPYIN_ARGS32(lwptoregs(lwp), lwp->lwp_arg, nargs)) - return (-1); - } -out: - lwp->lwp_ap = lwp->lwp_arg; - lwp->lwp_argsaved = 1; - t->t_post_sys = 1; /* so lwp_ap will be reset */ - return (0); -} - -void -reset_syscall_args(void) -{ - ttolwp(curthread)->lwp_argsaved = 0; -} - -/* - * Call a system call which takes a pointer to the user args struct and - * a pointer to the return values. This is a bit slower than the standard - * C arg-passing method in some cases. - */ -int64_t -syscall_ap(void) -{ - uint_t error; - struct sysent *callp; - rval_t rval; - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - struct regs *rp = lwptoregs(lwp); - - callp = LWP_GETSYSENT(lwp) + t->t_sysnum; - - /* - * If the arguments don't fit in registers %rdi-%r9, make sure they - * have been copied to the lwp_arg array. - */ - if (callp->sy_narg > 6 && save_syscall_args()) - return ((int64_t)set_errno(EFAULT)); - - rval.r_val1 = 0; - rval.r_val2 = rp->r_r1; - lwp->lwp_error = 0; /* for old drivers */ - error = (*(callp->sy_call))(lwp->lwp_ap, &rval); - if (error) - return ((longlong_t)set_errno(error)); - return (rval.r_vals); -} - -/* - * Load system call module. - * Returns with pointer to held read lock for module. - */ -static krwlock_t * -lock_syscall(struct sysent *table, uint_t code) -{ - krwlock_t *module_lock; - struct modctl *modp; - int id; - struct sysent *callp; - - callp = table + code; - module_lock = callp->sy_lock; - - /* - * Optimization to only call modload if we don't have a loaded - * syscall. - */ - rw_enter(module_lock, RW_READER); - if (LOADED_SYSCALL(callp)) - return (module_lock); - rw_exit(module_lock); - - for (;;) { - if ((id = modload("sys", syscallnames[code])) == -1) - break; - - /* - * If we loaded successfully at least once, the modctl - * will still be valid, so we try to grab it by filename. - * If this call fails, it's because the mod_filename - * was changed after the call to modload() (mod_hold_by_name() - * is the likely culprit). We can safely just take - * another lap if this is the case; the modload() will - * change the mod_filename back to one by which we can - * find the modctl. - */ - modp = mod_find_by_filename("sys", syscallnames[code]); - - if (modp == NULL) - continue; - - mutex_enter(&mod_lock); - - if (!modp->mod_installed) { - mutex_exit(&mod_lock); - continue; - } - break; - } - rw_enter(module_lock, RW_READER); - - if (id != -1) - mutex_exit(&mod_lock); - - return (module_lock); -} - -/* - * Loadable syscall support. - * If needed, load the module, then reserve it by holding a read - * lock for the duration of the call. - * Later, if the syscall is not unloadable, it could patch the vector. - */ -/*ARGSUSED*/ -int64_t -loadable_syscall( - long a0, long a1, long a2, long a3, - long a4, long a5, long a6, long a7) -{ - klwp_t *lwp = ttolwp(curthread); - int64_t rval; - struct sysent *callp; - struct sysent *se = LWP_GETSYSENT(lwp); - krwlock_t *module_lock; - int code, error = 0; - - code = curthread->t_sysnum; - callp = se + code; - - /* - * Try to autoload the system call if necessary - */ - module_lock = lock_syscall(se, code); - - /* - * we've locked either the loaded syscall or nosys - */ - - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { -#if defined(_LP64) - if (callp->sy_flags & SE_ARGC) { - rval = (int64_t)(*callp->sy_call)(a0, a1, a2, a3, - a4, a5); - } else { - rval = syscall_ap(); - } - } else { -#endif - /* - * Now that it's loaded, make sure enough args were copied. - */ - if (COPYIN_ARGS32(lwptoregs(lwp), lwp->lwp_ap, callp->sy_narg)) - error = EFAULT; - if (error) { - rval = set_errno(error); - } else if (callp->sy_flags & SE_ARGC) { - rval = (int64_t)(*callp->sy_call)(lwp->lwp_ap[0], - lwp->lwp_ap[1], lwp->lwp_ap[2], lwp->lwp_ap[3], - lwp->lwp_ap[4], lwp->lwp_ap[5]); - } else { - rval = syscall_ap(); - } - } - - rw_exit(module_lock); - return (rval); -} - -/* - * Indirect syscall handled in libc on x86 architectures - */ -int64_t -indir() -{ - return (nosys()); -} - -/* - * set_errno - set an error return from the current system call. - * This could be a macro. - * This returns the value it is passed, so that the caller can - * use tail-recursion-elimination and do return (set_errno(ERRNO)); - */ -uint_t -set_errno(uint_t error) -{ - ASSERT(error != 0); /* must not be used to clear errno */ - - curthread->t_post_sys = 1; /* have post_syscall do error return */ - return (ttolwp(curthread)->lwp_errno = error); -} - -/* - * set_proc_pre_sys - Set pre-syscall processing for entire process. - */ -void -set_proc_pre_sys(proc_t *p) -{ - kthread_t *t; - kthread_t *first; - - ASSERT(MUTEX_HELD(&p->p_lock)); - - t = first = p->p_tlist; - do { - t->t_pre_sys = 1; - } while ((t = t->t_forw) != first); -} - -/* - * set_proc_post_sys - Set post-syscall processing for entire process. - */ -void -set_proc_post_sys(proc_t *p) -{ - kthread_t *t; - kthread_t *first; - - ASSERT(MUTEX_HELD(&p->p_lock)); - - t = first = p->p_tlist; - do { - t->t_post_sys = 1; - } while ((t = t->t_forw) != first); -} - -/* - * set_proc_sys - Set pre- and post-syscall processing for entire process. - */ -void -set_proc_sys(proc_t *p) -{ - kthread_t *t; - kthread_t *first; - - ASSERT(MUTEX_HELD(&p->p_lock)); - - t = first = p->p_tlist; - do { - t->t_pre_sys = 1; - t->t_post_sys = 1; - } while ((t = t->t_forw) != first); -} - -/* - * set_all_proc_sys - set pre- and post-syscall processing flags for all - * user processes. - * - * This is needed when auditing, tracing, or other facilities which affect - * all processes are turned on. - */ -void -set_all_proc_sys() -{ - kthread_t *t; - kthread_t *first; - - mutex_enter(&pidlock); - t = first = curthread; - do { - t->t_pre_sys = 1; - t->t_post_sys = 1; - } while ((t = t->t_next) != first); - mutex_exit(&pidlock); -} - -/* - * set_all_zone_usr_proc_sys - set pre- and post-syscall processing flags for - * all user processes running in the zone of the current process - * - * This is needed when auditing, tracing, or other facilities which affect - * all processes are turned on. - */ -void -set_all_zone_usr_proc_sys(zoneid_t zoneid) -{ - proc_t *p; - kthread_t *t; - - mutex_enter(&pidlock); - for (p = practive; p != NULL; p = p->p_next) { - /* skip kernel and incomplete processes */ - if (p->p_exec == NULLVP || p->p_as == &kas || - p->p_stat == SIDL || p->p_stat == SZOMB || - (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) - continue; - /* - * Only processes in the given zone (eventually in - * all zones) are taken into account - */ - if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) { - mutex_enter(&p->p_lock); - if ((t = p->p_tlist) == NULL) { - mutex_exit(&p->p_lock); - continue; - } - /* - * Set pre- and post-syscall processing flags - * for all threads of the process - */ - do { - t->t_pre_sys = 1; - t->t_post_sys = 1; - } while (p->p_tlist != (t = t->t_forw)); - mutex_exit(&p->p_lock); - } - } - mutex_exit(&pidlock); -} - -/* - * set_proc_ast - Set asynchronous service trap (AST) flag for all - * threads in process. - */ -void -set_proc_ast(proc_t *p) -{ - kthread_t *t; - kthread_t *first; - - ASSERT(MUTEX_HELD(&p->p_lock)); - - t = first = p->p_tlist; - do { - aston(t); - } while ((t = t->t_forw) != first); -} diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c deleted file mode 100644 index b107afddfb..0000000000 --- a/usr/src/uts/intel/ia32/os/sysi86.c +++ /dev/null @@ -1,850 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2021 Joyent, Inc. - */ - -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* Copyright (c) 1987, 1988 Microsoft Corporation */ -/* All Rights Reserved */ - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/systm.h> -#include <sys/signal.h> -#include <sys/errno.h> -#include <sys/fault.h> -#include <sys/syscall.h> -#include <sys/cpuvar.h> -#include <sys/sysi86.h> -#include <sys/psw.h> -#include <sys/cred.h> -#include <sys/policy.h> -#include <sys/thread.h> -#include <sys/debug.h> -#include <sys/ontrap.h> -#include <sys/privregs.h> -#include <sys/x86_archext.h> -#include <sys/vmem.h> -#include <sys/kmem.h> -#include <sys/mman.h> -#include <sys/archsystm.h> -#include <vm/hat.h> -#include <vm/as.h> -#include <vm/seg.h> -#include <vm/seg_kmem.h> -#include <vm/faultcode.h> -#include <sys/fp.h> -#include <sys/cmn_err.h> -#include <sys/segments.h> -#include <sys/clock.h> -#include <vm/hat_i86.h> -#if defined(__xpv) -#include <sys/hypervisor.h> -#include <sys/note.h> -#endif - -static void ldt_alloc(proc_t *, uint_t); -static void ldt_free(proc_t *); -static void ldt_dup(proc_t *, proc_t *); -static void ldt_grow(proc_t *, uint_t); - -/* - * sysi86 System Call - */ - -/* ARGSUSED */ -int -sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) -{ - struct ssd ssd; - int error = 0; - int c; - proc_t *pp = curproc; - - switch (cmd) { - - /* - * The SI86V86 subsystem call of the SYSI86 system call - * supports only one subcode -- V86SC_IOPL. - */ - case SI86V86: - if (arg1 == V86SC_IOPL) { -#if defined(__xpv) - struct ctxop *ctx; -#endif - struct regs *rp = lwptoregs(ttolwp(curthread)); - greg_t oldpl = rp->r_ps & PS_IOPL; - greg_t newpl = arg2 & PS_IOPL; - - /* - * Must be privileged to run this system call - * if giving more io privilege. - */ - if (newpl > oldpl && (error = - secpolicy_sys_config(CRED(), B_FALSE)) != 0) - return (set_errno(error)); -#if defined(__xpv) - ctx = installctx_preallocate(); - kpreempt_disable(); - installctx(curthread, NULL, xen_disable_user_iopl, - xen_enable_user_iopl, NULL, NULL, - xen_disable_user_iopl, NULL, ctx); - xen_enable_user_iopl(); - kpreempt_enable(); -#else - rp->r_ps ^= oldpl ^ newpl; -#endif - } else - error = EINVAL; - break; - - /* - * Set a segment descriptor - */ - case SI86DSCR: - /* - * There are considerable problems here manipulating - * resources shared by many running lwps. Get everyone - * into a safe state before changing the LDT. - */ - if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK1)) { - error = EINTR; - break; - } - - if (get_udatamodel() == DATAMODEL_LP64) { - error = EINVAL; - break; - } - - if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { - error = EFAULT; - break; - } - - error = setdscr(&ssd); - - mutex_enter(&pp->p_lock); - if (curthread != pp->p_agenttp) - continuelwps(pp); - mutex_exit(&pp->p_lock); - break; - - case SI86FPHW: - c = fp_kind & 0xff; - if (suword32((void *)arg1, c) == -1) - error = EFAULT; - break; - - case SI86FPSTART: - /* - * arg1 is the address of _fp_hw - * arg2 is the desired x87 FCW value - * arg3 is the desired SSE MXCSR value - * a return value of one means SSE hardware, else none. - */ - c = fp_kind & 0xff; - if (suword32((void *)arg1, c) == -1) { - error = EFAULT; - break; - } - fpsetcw((uint16_t)arg2, (uint32_t)arg3); - return ((fp_kind & __FP_SSE) ? 1 : 0); - - /* real time clock management commands */ - - case WTODC: - if ((error = secpolicy_settime(CRED())) == 0) { - timestruc_t ts; - mutex_enter(&tod_lock); - gethrestime(&ts); - tod_set(ts); - mutex_exit(&tod_lock); - } - break; - -/* Give some timezone playing room */ -#define ONEWEEK (7 * 24 * 60 * 60) - - case SGMTL: - /* - * Called from 32 bit land, negative values - * are not sign extended, so we do that here - * by casting it to an int and back. We also - * clamp the value to within reason and detect - * when a 64 bit call overflows an int. - */ - if ((error = secpolicy_settime(CRED())) == 0) { - int newlag = (int)arg1; - -#ifdef _SYSCALL32_IMPL - if (get_udatamodel() == DATAMODEL_NATIVE && - (long)newlag != (long)arg1) { - error = EOVERFLOW; - } else -#endif - if (newlag >= -ONEWEEK && newlag <= ONEWEEK) - sgmtl(newlag); - else - error = EOVERFLOW; - } - break; - - case GGMTL: - if (get_udatamodel() == DATAMODEL_NATIVE) { - if (sulword((void *)arg1, ggmtl()) == -1) - error = EFAULT; -#ifdef _SYSCALL32_IMPL - } else { - time_t gmtl; - - if ((gmtl = ggmtl()) > INT32_MAX) { - /* - * Since gmt_lag can at most be - * +/- 12 hours, something is - * *seriously* messed up here. - */ - error = EOVERFLOW; - } else if (suword32((void *)arg1, (int32_t)gmtl) == -1) - error = EFAULT; -#endif - } - break; - - case RTCSYNC: - if ((error = secpolicy_settime(CRED())) == 0) - rtcsync(); - break; - - /* END OF real time clock management commands */ - - default: - error = EINVAL; - break; - } - return (error == 0 ? 0 : set_errno(error)); -} - -void -usd_to_ssd(user_desc_t *usd, struct ssd *ssd, selector_t sel) -{ - ssd->bo = USEGD_GETBASE(usd); - ssd->ls = USEGD_GETLIMIT(usd); - ssd->sel = sel; - - /* - * set type, dpl and present bits. - */ - ssd->acc1 = usd->usd_type; - ssd->acc1 |= usd->usd_dpl << 5; - ssd->acc1 |= usd->usd_p << (5 + 2); - - /* - * set avl, DB and granularity bits. - */ - ssd->acc2 = usd->usd_avl; - - ssd->acc2 |= usd->usd_long << 1; - - ssd->acc2 |= usd->usd_def32 << (1 + 1); - ssd->acc2 |= usd->usd_gran << (1 + 1 + 1); -} - -static void -ssd_to_usd(struct ssd *ssd, user_desc_t *usd) -{ - - ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0); - - USEGD_SETBASE(usd, ssd->bo); - USEGD_SETLIMIT(usd, ssd->ls); - - /* - * Set type, dpl and present bits. - * - * Force the "accessed" bit to on so that we don't run afoul of - * KPTI. - */ - usd->usd_type = ssd->acc1 | SDT_A; - usd->usd_dpl = ssd->acc1 >> 5; - usd->usd_p = ssd->acc1 >> (5 + 2); - - ASSERT(usd->usd_type >= SDT_MEMRO); - ASSERT(usd->usd_dpl == SEL_UPL); - - /* - * 64-bit code selectors are never allowed in the LDT. - * Reserved bit is always 0 on 32-bit systems. - */ - usd->usd_long = 0; - - /* - * set avl, DB and granularity bits. - */ - usd->usd_avl = ssd->acc2; - usd->usd_def32 = ssd->acc2 >> (1 + 1); - usd->usd_gran = ssd->acc2 >> (1 + 1 + 1); -} - - - -/* - * Load LDT register with the current process's LDT. - */ -static void -ldt_load(void) -{ -#if defined(__xpv) - xen_set_ldt(curproc->p_ldt, curproc->p_ldtlimit + 1); -#else - size_t len; - system_desc_t desc; - - /* - * Before we can use the LDT on this CPU, we must install the LDT in the - * user mapping table. - */ - len = (curproc->p_ldtlimit + 1) * sizeof (user_desc_t); - bcopy(curproc->p_ldt, CPU->cpu_m.mcpu_ldt, len); - CPU->cpu_m.mcpu_ldt_len = len; - set_syssegd(&desc, CPU->cpu_m.mcpu_ldt, len - 1, SDT_SYSLDT, SEL_KPL); - *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = desc; - - wr_ldtr(ULDT_SEL); -#endif -} - -/* - * Store a NULL selector in the LDTR. All subsequent illegal references to - * the LDT will result in a #gp. - */ -void -ldt_unload(void) -{ -#if defined(__xpv) - xen_set_ldt(NULL, 0); -#else - *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = null_sdesc; - wr_ldtr(0); - - bzero(CPU->cpu_m.mcpu_ldt, CPU->cpu_m.mcpu_ldt_len); - CPU->cpu_m.mcpu_ldt_len = 0; -#endif -} - -/*ARGSUSED*/ -static void -ldt_savectx(proc_t *p) -{ - ASSERT(p->p_ldt != NULL); - ASSERT(p == curproc); - - /* - * The 64-bit kernel must be sure to clear any stale ldt - * selectors when context switching away from a process that - * has a private ldt. Consider the following example: - * - * Wine creats a ldt descriptor and points a segment register - * to it. - * - * We then context switch away from wine lwp to kernel - * thread and hit breakpoint in kernel with kmdb - * - * When we continue and resume from kmdb we will #gp - * fault since kmdb will have saved the stale ldt selector - * from wine and will try to restore it but we are no longer in - * the context of the wine process and do not have our - * ldtr register pointing to the private ldt. - */ - reset_sregs(); - - ldt_unload(); - cpu_fast_syscall_enable(); -} - -static void -ldt_restorectx(proc_t *p) -{ - ASSERT(p->p_ldt != NULL); - ASSERT(p == curproc); - - ldt_load(); - cpu_fast_syscall_disable(); -} - -/* - * At exec time, we need to clear up our LDT context and re-enable fast syscalls - * for the new process image. - * - * The same is true for the other case, where we have: - * - * proc_exit() - * ->exitpctx()->ldt_savectx() - * ->freepctx()->ldt_freectx() - * - * Because pre-emption is not prevented between the two callbacks, we could have - * come off CPU, and brought back LDT context when coming back on CPU via - * ldt_restorectx(). - */ -/* ARGSUSED */ -static void -ldt_freectx(proc_t *p, int isexec) -{ - ASSERT(p->p_ldt != NULL); - ASSERT(p == curproc); - - kpreempt_disable(); - ldt_free(p); - cpu_fast_syscall_enable(); - kpreempt_enable(); -} - -/* - * Install ctx op that ensures syscall/sysenter are disabled. - * See comments below. - * - * When a thread with a private LDT forks, the new process - * must have the LDT context ops installed. - */ -/* ARGSUSED */ -static void -ldt_installctx(proc_t *p, proc_t *cp) -{ - proc_t *targ = p; - kthread_t *t; - - /* - * If this is a fork, operate on the child process. - */ - if (cp != NULL) { - targ = cp; - ldt_dup(p, cp); - } - - /* - * The process context ops expect the target process as their argument. - */ - ASSERT(removepctx(targ, targ, ldt_savectx, ldt_restorectx, - ldt_installctx, ldt_savectx, ldt_freectx) == 0); - - installpctx(targ, targ, ldt_savectx, ldt_restorectx, - ldt_installctx, ldt_savectx, ldt_freectx); - - /* - * We've just disabled fast system call and return instructions; take - * the slow path out to make sure we don't try to use one to return - * back to user. We must set t_post_sys for every thread in the - * process to make sure none of them escape out via fast return. - */ - - mutex_enter(&targ->p_lock); - t = targ->p_tlist; - do { - t->t_post_sys = 1; - } while ((t = t->t_forw) != targ->p_tlist); - mutex_exit(&targ->p_lock); -} - -int -setdscr(struct ssd *ssd) -{ - ushort_t seli; /* selector index */ - user_desc_t *ldp; /* descriptor pointer */ - user_desc_t ndesc; /* new descriptor */ - proc_t *pp = curproc; - int rc = 0; - - /* - * LDT segments: executable and data at DPL 3 only. - */ - if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) - return (EINVAL); - - /* - * check the selector index. - */ - seli = SELTOIDX(ssd->sel); - if (seli >= MAXNLDT || seli < LDT_UDBASE) - return (EINVAL); - - ndesc = null_udesc; - mutex_enter(&pp->p_ldtlock); - - /* - * If this is the first time for this process then setup a - * private LDT for it. - */ - if (pp->p_ldt == NULL) { - ldt_alloc(pp, seli); - - /* - * Now that this process has a private LDT, the use of - * the syscall/sysret and sysenter/sysexit instructions - * is forbidden for this processes because they destroy - * the contents of %cs and %ss segment registers. - * - * Explicity disable them here and add a context handler - * to the process. Note that disabling - * them here means we can't use sysret or sysexit on - * the way out of this system call - so we force this - * thread to take the slow path (which doesn't make use - * of sysenter or sysexit) back out. - */ - kpreempt_disable(); - ldt_installctx(pp, NULL); - cpu_fast_syscall_disable(); - ASSERT(curthread->t_post_sys != 0); - kpreempt_enable(); - - } else if (seli > pp->p_ldtlimit) { - ASSERT(pp->p_pctx != NULL); - - /* - * Increase size of ldt to include seli. - */ - ldt_grow(pp, seli); - } - - ASSERT(seli <= pp->p_ldtlimit); - ldp = &pp->p_ldt[seli]; - - /* - * On the 64-bit kernel, this is where things get more subtle. - * Recall that in the 64-bit kernel, when we enter the kernel we - * deliberately -don't- reload the segment selectors we came in on - * for %ds, %es, %fs or %gs. Messing with selectors is expensive, - * and the underlying descriptors are essentially ignored by the - * hardware in long mode - except for the base that we override with - * the gsbase MSRs. - * - * However, there's one unfortunate issue with this rosy picture -- - * a descriptor that's not marked as 'present' will still generate - * an #np when loading a segment register. - * - * Consider this case. An lwp creates a harmless LDT entry, points - * one of it's segment registers at it, then tells the kernel (here) - * to delete it. In the 32-bit kernel, the #np will happen on the - * way back to userland where we reload the segment registers, and be - * handled in kern_gpfault(). In the 64-bit kernel, the same thing - * will happen in the normal case too. However, if we're trying to - * use a debugger that wants to save and restore the segment registers, - * and the debugger things that we have valid segment registers, we - * have the problem that the debugger will try and restore the - * segment register that points at the now 'not present' descriptor - * and will take a #np right there. - * - * We should obviously fix the debugger to be paranoid about - * -not- restoring segment registers that point to bad descriptors; - * however we can prevent the problem here if we check to see if any - * of the segment registers are still pointing at the thing we're - * destroying; if they are, return an error instead. (That also seems - * a lot better failure mode than SIGKILL and a core file - * from kern_gpfault() too.) - */ - if (SI86SSD_PRES(ssd) == 0) { - kthread_t *t; - int bad = 0; - - /* - * Look carefully at the segment registers of every lwp - * in the process (they're all stopped by our caller). - * If we're about to invalidate a descriptor that's still - * being referenced by *any* of them, return an error, - * rather than having them #gp on their way out of the kernel. - */ - ASSERT(pp->p_lwprcnt == 1); - - mutex_enter(&pp->p_lock); - t = pp->p_tlist; - do { - klwp_t *lwp = ttolwp(t); - struct regs *rp = lwp->lwp_regs; - pcb_t *pcb = &lwp->lwp_pcb; - - if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { - bad = 1; - break; - } - - if (PCB_NEED_UPDATE_SEGS(pcb)) { - if (ssd->sel == pcb->pcb_ds || - ssd->sel == pcb->pcb_es || - ssd->sel == pcb->pcb_fs || - ssd->sel == pcb->pcb_gs) { - bad = 1; - break; - } - } else { - if (ssd->sel == rp->r_ds || - ssd->sel == rp->r_es || - ssd->sel == rp->r_fs || - ssd->sel == rp->r_gs) { - bad = 1; - break; - } - } - - } while ((t = t->t_forw) != pp->p_tlist); - mutex_exit(&pp->p_lock); - - if (bad) { - mutex_exit(&pp->p_ldtlock); - return (EBUSY); - } - } - - /* - * If acc1 is zero, clear the descriptor (including the 'present' bit). - * Make sure we update the CPU-private copy of the LDT. - */ - if (ssd->acc1 == 0) { - rc = ldt_update_segd(ldp, &null_udesc); - kpreempt_disable(); - ldt_load(); - kpreempt_enable(); - mutex_exit(&pp->p_ldtlock); - return (rc); - } - - /* - * Check segment type, allow segment not present and - * only user DPL (3). - */ - if (SI86SSD_DPL(ssd) != SEL_UPL) { - mutex_exit(&pp->p_ldtlock); - return (EINVAL); - } - - /* - * Do not allow 32-bit applications to create 64-bit mode code - * segments. - */ - if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && - SI86SSD_ISLONG(ssd)) { - mutex_exit(&pp->p_ldtlock); - return (EINVAL); - } - - /* - * Set up a code or data user segment descriptor, making sure to update - * the CPU-private copy of the LDT. - */ - if (SI86SSD_ISUSEG(ssd)) { - ssd_to_usd(ssd, &ndesc); - rc = ldt_update_segd(ldp, &ndesc); - kpreempt_disable(); - ldt_load(); - kpreempt_enable(); - mutex_exit(&pp->p_ldtlock); - return (rc); - } - - mutex_exit(&pp->p_ldtlock); - return (EINVAL); -} - -/* - * Allocate new LDT for process just large enough to contain seli. Note we - * allocate and grow LDT in PAGESIZE chunks. We do this to simplify the - * implementation and because on the hypervisor it's required, since the LDT - * must live on pages that have PROT_WRITE removed and which are given to the - * hypervisor. - * - * Note that we don't actually load the LDT into the current CPU here: it's done - * later by our caller. - */ -static void -ldt_alloc(proc_t *pp, uint_t seli) -{ - user_desc_t *ldt; - size_t ldtsz; - uint_t nsels; - - ASSERT(MUTEX_HELD(&pp->p_ldtlock)); - ASSERT(pp->p_ldt == NULL); - ASSERT(pp->p_ldtlimit == 0); - - /* - * Allocate new LDT just large enough to contain seli. The LDT must - * always be allocated in units of pages for KPTI. - */ - ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); - nsels = ldtsz / sizeof (user_desc_t); - ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); - - ldt = kmem_zalloc(ldtsz, KM_SLEEP); - ASSERT(IS_P2ALIGNED(ldt, PAGESIZE)); - -#if defined(__xpv) - if (xen_ldt_setprot(ldt, ldtsz, PROT_READ)) - panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed"); -#endif - - pp->p_ldt = ldt; - pp->p_ldtlimit = nsels - 1; -} - -static void -ldt_free(proc_t *pp) -{ - user_desc_t *ldt; - size_t ldtsz; - - ASSERT(pp->p_ldt != NULL); - - mutex_enter(&pp->p_ldtlock); - ldt = pp->p_ldt; - ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); - - ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE)); - - pp->p_ldt = NULL; - pp->p_ldtlimit = 0; - mutex_exit(&pp->p_ldtlock); - - if (pp == curproc) { - kpreempt_disable(); - ldt_unload(); - kpreempt_enable(); - } - -#if defined(__xpv) - /* - * We are not allowed to make the ldt writable until after - * we tell the hypervisor to unload it. - */ - if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE)) - panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); -#endif - - kmem_free(ldt, ldtsz); -} - -/* - * On fork copy new ldt for child. - */ -static void -ldt_dup(proc_t *pp, proc_t *cp) -{ - size_t ldtsz; - - ASSERT(pp->p_ldt != NULL); - ASSERT(cp != curproc); - - /* - * I assume the parent's ldt can't increase since we're in a fork. - */ - mutex_enter(&pp->p_ldtlock); - mutex_enter(&cp->p_ldtlock); - - ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); - - ldt_alloc(cp, pp->p_ldtlimit); - -#if defined(__xpv) - /* - * Make child's ldt writable so it can be copied into from - * parent's ldt. This works since ldt_alloc above did not load - * the ldt since its for the child process. If we tried to make - * an LDT writable that is loaded in hw the setprot operation - * would fail. - */ - if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE)) - panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); -#endif - - bcopy(pp->p_ldt, cp->p_ldt, ldtsz); - -#if defined(__xpv) - if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ)) - panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed"); -#endif - mutex_exit(&cp->p_ldtlock); - mutex_exit(&pp->p_ldtlock); - -} - -/* - * Note that we don't actually load the LDT into the current CPU here: it's done - * later by our caller - unless we take an error. This works out because - * ldt_load() does a copy of ->p_ldt instead of directly loading it into the GDT - * (and therefore can't be using the freed old LDT), and by definition if the - * new entry didn't pass validation, then the proc shouldn't be referencing an - * entry in the extended region. - */ -static void -ldt_grow(proc_t *pp, uint_t seli) -{ - user_desc_t *oldt, *nldt; - uint_t nsels; - size_t oldtsz, nldtsz; - - ASSERT(MUTEX_HELD(&pp->p_ldtlock)); - ASSERT(pp->p_ldt != NULL); - ASSERT(pp->p_ldtlimit != 0); - - /* - * Allocate larger LDT just large enough to contain seli. The LDT must - * always be allocated in units of pages for KPTI. - */ - nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE); - nsels = nldtsz / sizeof (user_desc_t); - ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT); - ASSERT(nsels > pp->p_ldtlimit); - - oldt = pp->p_ldt; - oldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); - - nldt = kmem_zalloc(nldtsz, KM_SLEEP); - ASSERT(IS_P2ALIGNED(nldt, PAGESIZE)); - - bcopy(oldt, nldt, oldtsz); - - /* - * unload old ldt. - */ - kpreempt_disable(); - ldt_unload(); - kpreempt_enable(); - -#if defined(__xpv) - - /* - * Make old ldt writable and new ldt read only. - */ - if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE)) - panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed"); - - if (xen_ldt_setprot(nldt, nldtsz, PROT_READ)) - panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed"); -#endif - - pp->p_ldt = nldt; - pp->p_ldtlimit = nsels - 1; - - kmem_free(oldt, oldtsz); -} diff --git a/usr/src/uts/intel/ia32/promif/README b/usr/src/uts/intel/ia32/promif/README deleted file mode 100644 index 6e22aee923..0000000000 --- a/usr/src/uts/intel/ia32/promif/README +++ /dev/null @@ -1,65 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -usr/src/uts/i86/promif for x86 (please update as required) - -Merge work (here and stand.i386/boot/promif) is done in July 1994. - -1) The merge is mechanic and textual, no attempt in re-design this interface. - There are other projects that will replace this piece of code. - -2) Three kind of binaries are generated: - #if defined(_KMDB) - for kmdb to use - #if defined(I386BOOT) - for booters (inetboot and ufsboot) to use and linked - together as libprom.a - #if !defined(_KMDB) && !defined(I386BOOT) - for kernel to use in *.o form - - ------------------------------------------------------------------------------ - -9 Jan, 1994: - - The new plan is to do away with most of the prom lib, keeping only those -routines that are truely common to the three prom clients (kernel, kadb, and -second level boot). The modules that remain are: - - prom_printf.c - prom_getchar.c - prom_putchar.c - prom_getverion.c - prom_init.c - -Everything else (which used the 3-way #ifdef's described above), has been -moved off into client-specific modules: - - .../psm/stand/boot/i386/prom.c - .../psm/stand/kadb/i386/prom.c - - -- Reg diff --git a/usr/src/uts/intel/ia32/promif/prom_enter.c b/usr/src/uts/intel/ia32/promif/prom_enter.c deleted file mode 100644 index 12da5a6903..0000000000 --- a/usr/src/uts/intel/ia32/promif/prom_enter.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/promif.h> -#include <sys/promimpl.h> -#include <sys/archsystm.h> -#include <sys/reboot.h> -#include <sys/kdi.h> - -/* - * The Intel cpu does not have an underlying monitor. - * So, we emulate the best we can..... - */ - -void -prom_enter_mon(void) -{ -#if defined(_KMDB) - prom_exit_to_mon(); -#endif - - if (boothowto & RB_DEBUG) - kmdb_enter(); - else { - prom_printf("Press any key to continue."); - (void) prom_getchar(); - } -} diff --git a/usr/src/uts/intel/ia32/promif/prom_exit.c b/usr/src/uts/intel/ia32/promif/prom_exit.c deleted file mode 100644 index e4027448fa..0000000000 --- a/usr/src/uts/intel/ia32/promif/prom_exit.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/promif.h> -#include <sys/consdev.h> -#include <sys/promimpl.h> -#include <sys/archsystm.h> -#include <sys/reboot.h> -#include <sys/kdi.h> - -/* - * The Intel cpu does not have an underlying monitor. - * So, we do the best we can..... - */ - -extern void prom_poll_enter(void); - -extern cons_polledio_t *cons_polledio; - -void -prom_exit_to_mon(void) -{ - -#if !defined(_KMDB) - prom_poll_enter(); -#endif - -#if !defined(_KMDB) - if (boothowto & RB_DEBUG) - kmdb_enter(); -#endif /* !_KMDB */ - prom_reboot_prompt(); - prom_reboot(NULL); -} - -#if !defined(_KMDB) -void -prom_poll_enter(void) -{ - if (cons_polledio != NULL) { - if (cons_polledio->cons_polledio_enter != NULL) { - cons_polledio->cons_polledio_enter( - cons_polledio->cons_polledio_argument); - } - } -} -#endif diff --git a/usr/src/uts/intel/ia32/promif/prom_panic.c b/usr/src/uts/intel/ia32/promif/prom_panic.c deleted file mode 100644 index 2893281fdc..0000000000 --- a/usr/src/uts/intel/ia32/promif/prom_panic.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/promif.h> -#include <sys/promimpl.h> -#include <sys/archsystm.h> -#include <sys/reboot.h> -#include <sys/kdi.h> - -void -prom_panic(char *s) -{ - const char fmt[] = "%s: prom_panic: %s\n"; - - if (s == NULL) - s = "unknown panic"; - -#if defined(_KMDB) - prom_printf(fmt, "kmdb", s); -#elif defined(_KERNEL) - prom_printf(fmt, "kernel", s); - if (boothowto & RB_DEBUG) - kmdb_enter(); -#else -#error "configuration error" -#endif - prom_reboot_prompt(); - prom_reboot(NULL); -} diff --git a/usr/src/uts/intel/ia32/sys/Makefile b/usr/src/uts/intel/ia32/sys/Makefile index 0ef2320b16..5cf1651263 100644 --- a/usr/src/uts/intel/ia32/sys/Makefile +++ b/usr/src/uts/intel/ia32/sys/Makefile @@ -19,22 +19,22 @@ # CDDL HEADER END # # -#pragma ident "%Z%%M% %I% %E% SMI" # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# uts/intel/ia32/sys/Makefile -# + # include global definitions include ../../../../Makefile.master -HDRS= \ - asm_linkage.h \ +HDRS= \ kdi_regs.h \ - machtypes.h \ privmregs.h \ - privregs.h \ + privregs.h + +LINKS= \ + asm_linkage.h \ + machtypes.h \ psw.h \ pte.h \ reg.h \ @@ -47,18 +47,28 @@ ROOTDIR= $(ROOTINCISA)/sys ROOTDIRS= $(ROOTDIR) ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) +ROOTLINKS= $(LINKS:%=$(ROOTDIR)/%) CHECKHDRS= $(HDRS:%.h=%.check) +$(ROOTDIR)/asm_linkage.h := LINK_TARGET = ../../sys/asm_linkage.h +$(ROOTDIR)/machtypes.h := LINK_TARGET = ../../sys/machtypes.h +$(ROOTDIR)/psw.h := LINK_TARGET = ../../sys/psw.h +$(ROOTDIR)/pte.h := LINK_TARGET = ../../sys/pte.h +$(ROOTDIR)/reg.h := LINK_TARGET = ../../sys/reg.h +$(ROOTDIR)/stack.h := LINK_TARGET = ../../sys/stack.h +$(ROOTDIR)/trap.h := LINK_TARGET = ../../sys/trap.h +$(ROOTDIR)/traptrace.h := LINK_TARGET = ../../sys/traptrace.h + # install rules $(ROOTDIR)/%: % $(INS.file) .KEEP_STATE: -.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) +.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) $(ROOTLINKS) -install_h: $(ROOTINCISA) .WAIT $(ROOTDIRS) .WAIT $(ROOTHDRS) +install_h: $(ROOTINCISA) .WAIT $(ROOTDIRS) .WAIT $(ROOTHDRS) $(ROOTLINKS) $(ROOTINCISA): $(INS.dir) @@ -66,4 +76,7 @@ $(ROOTINCISA): $(ROOTDIRS): $(ROOTINCISA) $(INS.dir) +$(ROOTLINKS): + $(RM) $@; $(SYMLINK) $(LINK_TARGET) $@ + check: $(CHECKHDRS) diff --git a/usr/src/uts/intel/ia32/sys/asm_linkage.h b/usr/src/uts/intel/ia32/sys/asm_linkage.h deleted file mode 100644 index 95d4987324..0000000000 --- a/usr/src/uts/intel/ia32/sys/asm_linkage.h +++ /dev/null @@ -1,351 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright 2019 Joyent, Inc. - */ - -#ifndef _IA32_SYS_ASM_LINKAGE_H -#define _IA32_SYS_ASM_LINKAGE_H - -#include <sys/stack.h> -#include <sys/trap.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _ASM /* The remainder of this file is only for assembly files */ - -/* - * make annoying differences in assembler syntax go away - */ - -/* - * D16 and A16 are used to insert instructions prefixes; the - * macros help the assembler code be slightly more portable. - */ -#if !defined(__GNUC_AS__) -/* - * /usr/ccs/bin/as prefixes are parsed as separate instructions - */ -#define D16 data16; -#define A16 addr16; - -/* - * (There are some weird constructs in constant expressions) - */ -#define _CONST(const) [const] -#define _BITNOT(const) -1!_CONST(const) -#define _MUL(a, b) _CONST(a \* b) - -#else -/* - * Why not use the 'data16' and 'addr16' prefixes .. well, the - * assembler doesn't quite believe in real mode, and thus argues with - * us about what we're trying to do. - */ -#define D16 .byte 0x66; -#define A16 .byte 0x67; - -#define _CONST(const) (const) -#define _BITNOT(const) ~_CONST(const) -#define _MUL(a, b) _CONST(a * b) - -#endif - -/* - * C pointers are different sizes between i386 and amd64. - * These constants can be used to compute offsets into pointer arrays. - */ -#if defined(__amd64) -#define CLONGSHIFT 3 -#define CLONGSIZE 8 -#define CLONGMASK 7 -#elif defined(__i386) -#define CLONGSHIFT 2 -#define CLONGSIZE 4 -#define CLONGMASK 3 -#endif - -/* - * Since we know we're either ILP32 or LP64 .. - */ -#define CPTRSHIFT CLONGSHIFT -#define CPTRSIZE CLONGSIZE -#define CPTRMASK CLONGMASK - -#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) -#error "inconsistent shift constants" -#endif - -#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) -#error "inconsistent mask constants" -#endif - -#define ASM_ENTRY_ALIGN 16 - -/* - * SSE register alignment and save areas - */ - -#define XMM_SIZE 16 -#define XMM_ALIGN 16 - -#if defined(__amd64) - -#define SAVE_XMM_PROLOG(sreg, nreg) \ - subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \ - movq %rsp, sreg - -#define RSTOR_XMM_EPILOG(sreg, nreg) \ - addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp - -#elif defined(__i386) - -#define SAVE_XMM_PROLOG(sreg, nreg) \ - subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \ - movl %esp, sreg; \ - addl $XMM_ALIGN, sreg; \ - andl $_BITNOT(XMM_ALIGN-1), sreg - -#define RSTOR_XMM_EPILOG(sreg, nreg) \ - addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; - -#endif /* __i386 */ - -/* - * profiling causes definitions of the MCOUNT and RTMCOUNT - * particular to the type - */ -#ifdef GPROF - -#define MCOUNT(x) \ - pushl %ebp; \ - movl %esp, %ebp; \ - call _mcount; \ - popl %ebp - -#endif /* GPROF */ - -#ifdef PROF - -#define MCOUNT(x) \ -/* CSTYLED */ \ - .lcomm .L_/**/x/**/1, 4, 4; \ - pushl %ebp; \ - movl %esp, %ebp; \ -/* CSTYLED */ \ - movl $.L_/**/x/**/1, %edx; \ - call _mcount; \ - popl %ebp - -#endif /* PROF */ - -/* - * if we are not profiling, MCOUNT should be defined to nothing - */ -#if !defined(PROF) && !defined(GPROF) -#define MCOUNT(x) -#endif /* !defined(PROF) && !defined(GPROF) */ - -#define RTMCOUNT(x) MCOUNT(x) - -/* - * Macro to define weak symbol aliases. These are similar to the ANSI-C - * #pragma weak _name = name - * except a compiler can determine type. The assembler must be told. Hence, - * the second parameter must be the type of the symbol (i.e.: function,...) - */ -#define ANSI_PRAGMA_WEAK(sym, stype) \ -/* CSTYLED */ \ - .weak _/**/sym; \ -/* CSTYLED */ \ - .type _/**/sym, @stype; \ -/* CSTYLED */ \ -_/**/sym = sym - -/* - * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in: - * #pragma weak sym1 = sym2 - */ -#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \ - .weak sym1; \ - .type sym1, @stype; \ -sym1 = sym2 - -/* - * ENTRY provides the standard procedure entry code and an easy way to - * insert the calls to mcount for profiling. ENTRY_NP is identical, but - * never calls mcount. - */ -#define ENTRY(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: MCOUNT(x) - -#define ENTRY_NP(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: - -#define RTENTRY(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: RTMCOUNT(x) - -/* - * ENTRY2 is identical to ENTRY but provides two labels for the entry point. - */ -#define ENTRY2(x, y) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x, y; \ - .type x, @function; \ - .type y, @function; \ -/* CSTYLED */ \ -x: ; \ -y: MCOUNT(x) - -#define ENTRY_NP2(x, y) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x, y; \ - .type x, @function; \ - .type y, @function; \ -/* CSTYLED */ \ -x: ; \ -y: - - -/* - * ALTENTRY provides for additional entry points. - */ -#define ALTENTRY(x) \ - .globl x; \ - .type x, @function; \ -x: - -/* - * DGDEF and DGDEF2 provide global data declarations. - * - * DGDEF provides a word aligned word of storage. - * - * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This - * implies this macro is best used for byte arrays. - * - * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. - */ -#define DGDEF2(name, sz) \ - .data; \ - .globl name; \ - .type name, @object; \ - .size name, sz; \ -name: - -#define DGDEF3(name, sz, algn) \ - .data; \ - .align algn; \ - .globl name; \ - .type name, @object; \ - .size name, sz; \ -name: - -#define DGDEF(name) DGDEF3(name, 4, 4) - -/* - * SET_SIZE trails a function and set the size for the ELF symbol table. - */ -#define SET_SIZE(x) \ - .size x, [.-x] - -/* - * NWORD provides native word value. - */ -#if defined(__amd64) - -/*CSTYLED*/ -#define NWORD quad - -#elif defined(__i386) - -#define NWORD long - -#endif /* __i386 */ - -/* - * These macros should be used when making indirect calls in the kernel. They - * will perform a jump or call to the corresponding register in a way that knows - * about retpolines and handles whether such mitigations are enabled or not. - * - * INDIRECT_JMP_REG will jump to named register. INDIRECT_CALL_REG will instead - * do a call. These macros cannot be used to dereference a register. For - * example, if you need to do something that looks like the following: - * - * call *24(%rdi) - * jmp *(%r15) - * - * You must instead first do a movq into the corresponding location. You need to - * be careful to make sure that the register that its loaded into is safe to - * use. Often that register may be saved or used elsewhere so it may not be safe - * to clobber the value. Usually, loading into %rax would be safe. These would - * turn into something like: - * - * movq 24(%rdi), %rdi; INDIRECT_CALL_REG(rdi) - * movq (%r15), %r15; INDIRECT_JMP_REG(r15) - * - * If you are trying to call a global function, then use the following pattern - * (substituting the register in question): - * - * leaq my_favorite_function(%rip), %rax - * INDIRECT_CALL_REG(rax) - * - * If you instead have a function pointer (say gethrtimef for example), then you - * need to do: - * - * movq my_favorite_function_pointer(%rip), %rax - * INDIRECT_CALL_REG(rax) - */ - -/* CSTYLED */ -#define INDIRECT_JMP_REG(reg) jmp __x86_indirect_thunk_/**/reg; - -/* CSTYLED */ -#define INDIRECT_CALL_REG(reg) call __x86_indirect_thunk_/**/reg; - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/usr/src/uts/intel/ia32/sys/comm_page_util.h b/usr/src/uts/intel/ia32/sys/comm_page_util.h deleted file mode 100644 index 331162c5ca..0000000000 --- a/usr/src/uts/intel/ia32/sys/comm_page_util.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2016 Joyent, Inc. - */ - -#ifndef _COMM_PAGE_UTIL_H -#define _COMM_PAGE_UTIL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_KERNEL) - -extern caddr_t comm_page_mapin(); - -#endif /* defined(_KERNEL) */ - -#ifdef __cplusplus -} -#endif - -#endif /* _COMM_PAGE_UTIL_H */ diff --git a/usr/src/uts/intel/ia32/sys/machtypes.h b/usr/src/uts/intel/ia32/sys/machtypes.h deleted file mode 100644 index 232b03028c..0000000000 --- a/usr/src/uts/intel/ia32/sys/machtypes.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_MACHTYPES_H -#define _IA32_SYS_MACHTYPES_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Machine dependent types: - * - * intel ia32 Version - */ - -#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || \ - defined(__EXTENSIONS__) - -#define REG_LABEL_PC 0 -#define REG_LABEL_SP 1 -#define REG_LABEL_BP 2 -#if defined(__amd64) -#define REG_LABEL_RBX 3 -#define REG_LABEL_R12 4 -#define REG_LABEL_R13 5 -#define REG_LABEL_R14 6 -#define REG_LABEL_R15 7 -#define REG_LABEL_MAX 8 -#else /* __amd64 */ -#define REG_LABEL_EBX 3 -#define REG_LABEL_ESI 4 -#define REG_LABEL_EDI 5 -#define REG_LABEL_MAX 6 -#endif /* __amd64 */ - -typedef struct _label_t { long val[REG_LABEL_MAX]; } label_t; - -#endif /* !defined(_POSIX_C_SOURCE)... */ - -typedef unsigned char lock_t; /* lock work for busy wait */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_MACHTYPES_H */ diff --git a/usr/src/uts/intel/ia32/sys/psw.h b/usr/src/uts/intel/ia32/sys/psw.h deleted file mode 100644 index 7c63813929..0000000000 --- a/usr/src/uts/intel/ia32/sys/psw.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_PSW_H -#define _IA32_SYS_PSW_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef _ASM - -/* Flags Register */ - -typedef struct flags { - uint_t fl_cf : 1, /* carry/borrow */ - : 1, /* reserved */ - fl_pf : 1, /* parity */ - : 1, /* reserved */ - fl_af : 1, /* carry/borrow */ - : 1, /* reserved */ - fl_zf : 1, /* zero */ - fl_sf : 1, /* sign */ - fl_tf : 1, /* trace */ - fl_if : 1, /* interrupt enable */ - fl_df : 1, /* direction */ - fl_of : 1, /* overflow */ - fl_iopl : 2, /* I/O privilege level */ - fl_nt : 1, /* nested task */ - : 1, /* reserved */ - fl_rf : 1, /* reset */ - fl_vm : 1, /* virtual 86 mode */ - fl_res : 14; /* reserved */ -} flags_t; - -#endif /* !_ASM */ - -#define PS_C 0x0001 /* carry bit */ -#define PS_MB1 0x0002 /* unused; must be 1. */ -#define PS_P 0x0004 /* parity bit */ -#define PS_AC 0x0010 /* auxiliary carry bit */ -#define PS_Z 0x0040 /* zero bit */ -#define PS_N 0x0080 /* negative bit */ -#define PS_T 0x0100 /* trace enable bit */ -#define PS_IE 0x0200 /* interrupt enable bit */ -#define PS_D 0x0400 /* direction bit */ -#define PS_V 0x0800 /* overflow bit */ -#define PS_IOPL 0x3000 /* I/O privilege level */ -#define PS_NT 0x4000 /* nested task flag */ -#define PS_RF 0x10000 /* restore flag */ -#define PS_VM 0x20000 /* virtual 86 mode flag */ -#define PS_ACHK 0x40000 /* alignment check enable (486) */ -#define PS_VINT 0x80000 /* virtual interrupt flag */ -#define PS_VINTP 0x100000 /* virtual interrupt pending */ -#define PS_ID 0x200000 /* ID flag */ - -#define PS_ICC (PS_C|PS_AC|PS_Z|PS_N) /* integer condition codes */ - -#define FMT_FLAGS_REG \ - "\20\26id\25vip\24vif\23ac\22vm\21rf" \ - "\17nt\14of\13df\12if\11tf\10sf\7zf\5af\3pf\1cf" - -#define PSL_USER 0x202 /* initial user FLAGS */ - -/* user variable PS bits */ -#define PSL_USERMASK (PS_ICC|PS_D|PS_T|PS_V|PS_P|PS_ACHK|PS_NT) - -/* PS bits changeable by the sahf instruction */ -#define PSL_LSAHFMASK (PS_ICC|PS_P) - -/* - * kernel flags settings - * - * Note that the kernel's SMAP protection relies on PS_ACHK not being present in - * the following two definitions. See uts/intel/ia32/ml/copy.s for more - * information on SMAP. - */ -#define F_OFF 0x2 /* interrupts off */ -#define F_ON 0x202 /* interrupts on */ - -#ifndef _ASM -typedef int psw_t; -#endif - -#include <sys/tss.h> -#include <sys/segments.h> /* selector definitions */ - -#define USERMODE(cs) ((uint16_t)(cs) != KCS_SEL) - -#include <sys/spl.h> - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_PSW_H */ diff --git a/usr/src/uts/intel/ia32/sys/pte.h b/usr/src/uts/intel/ia32/sys/pte.h deleted file mode 100644 index 138647347a..0000000000 --- a/usr/src/uts/intel/ia32/sys/pte.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_PTE_H -#define _IA32_SYS_PTE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifndef _ASM -#include <sys/types.h> -#endif /* _ASM */ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef _ASM - -#ifdef PTE36 /* PTE36 ---------------------------- */ - -typedef uint64_t pteval_t; -typedef pteval_t *pteptr_t; - -#define PRPTEx "llx" - -typedef struct pte32 { - uint32_t Present:1; - uint32_t AccessPermissions:2; - uint32_t WriteThru:1; - uint32_t NonCacheable:1; - uint32_t Referenced:1; - uint32_t Modified:1; - uint32_t MustBeZero:1; - uint32_t GlobalEnable:1; - uint32_t OSReserved:3; - uint32_t PhysicalPageNumber:20; -} pte32_t; - - -typedef struct pte { - uint32_t Present:1; - uint32_t AccessPermissions:2; - uint32_t WriteThru:1; - uint32_t NonCacheable:1; - uint32_t Referenced:1; - uint32_t Modified:1; - uint32_t MustBeZero:1; - uint32_t GlobalEnable:1; - uint32_t OSReserved:3; - uint32_t PhysicalPageNumberL:20; - uint32_t PhysicalPageNumberH; - /* - * An easy way to ensure that - * reserved bits are zero. - */ -} pte_t; - -struct pte64 { - uint32_t pte64_0_31; - uint32_t pte64_32_64; -}; - -#define NPTESHIFT 9 -#define NPTEPERPT 512 /* entries in page table */ -#define PTSIZE (NPTEPERPT * MMU_PAGESIZE) /* bytes mapped */ - - -#else /* PTE36 */ - /* PTE32 ---------------------------- */ - - -typedef uint32_t pteval_t; -typedef pteval_t *pteptr_t; - -#define PRPTEx "x" - -typedef struct pte { - uint_t Present:1; - uint_t AccessPermissions:2; - uint_t WriteThru:1; - uint_t NonCacheable:1; - uint_t Referenced:1; - uint_t Modified:1; - uint_t MustBeZero:1; - uint_t GlobalEnable:1; - uint_t OSReserved:3; - uint_t PhysicalPageNumber:20; -} pte_t; - -#define pte32_t pte_t - -#define NPTESHIFT 10 -#define NPTEPERPT 1024 /* entries in page table */ -#define PTSIZE (NPTEPERPT * MMU_PAGESIZE) /* bytes mapped */ - -#endif /* PTE36 */ - -#define PTE_VALID 0x01 -#define PTE_LARGEPAGE 0x80 -#define PTE_SRWX 0x02 - -#endif /* !_ASM */ - - -#ifdef __cplusplus -} -#endif - -#endif /* !_IA32_SYS_PTE_H */ diff --git a/usr/src/uts/intel/ia32/sys/reg.h b/usr/src/uts/intel/ia32/sys/reg.h deleted file mode 100644 index f33ccbd413..0000000000 --- a/usr/src/uts/intel/ia32/sys/reg.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_REG_H -#define _IA32_SYS_REG_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This file only exists for i386 backwards compatibility. - * Kernel code should not include it. - */ - -#ifdef _KERNEL -#error "kernel include of reg.h" -#else -#include <sys/regset.h> -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_REG_H */ diff --git a/usr/src/uts/intel/ia32/sys/stack.h b/usr/src/uts/intel/ia32/sys/stack.h deleted file mode 100644 index 3bee81d27a..0000000000 --- a/usr/src/uts/intel/ia32/sys/stack.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_STACK_H -#define _IA32_SYS_STACK_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#if !defined(_ASM) - -#include <sys/types.h> - -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * In the x86 world, a stack frame looks like this: - * - * |--------------------------| - * 4n+8(%ebp) ->| argument word n | - * | ... | (Previous frame) - * 8(%ebp) ->| argument word 0 | - * |--------------------------|-------------------- - * 4(%ebp) ->| return address | - * |--------------------------| - * 0(%ebp) ->| previous %ebp (optional) | - * |--------------------------| - * -4(%ebp) ->| unspecified | (Current frame) - * | ... | - * 0(%esp) ->| variable size | - * |--------------------------| - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN32 4 -#define STACK_ENTRY_ALIGN32 4 -#define STACK_BIAS32 0 -#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1)) -#define STACK_RESERVE32 0 -#define MINFRAME32 0 - -#if defined(__amd64) - -/* - * In the amd64 world, a stack frame looks like this: - * - * |--------------------------| - * 8n+16(%rbp)->| argument word n | - * | ... | (Previous frame) - * 16(%rbp) ->| argument word 0 | - * |--------------------------|-------------------- - * 8(%rbp) ->| return address | - * |--------------------------| - * 0(%rbp) ->| previous %rbp | - * |--------------------------| - * -8(%rbp) ->| unspecified | (Current frame) - * | ... | - * 0(%rsp) ->| variable size | - * |--------------------------| - * -128(%rsp) ->| reserved for function | - * |--------------------------| - * - * The end of the input argument area must be aligned on a 16-byte - * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry. - * - * The 128-byte location beyond %rsp is considered to be reserved for - * functions and is NOT modified by signal handlers. It can be used - * to store temporary data that is not needed across function calls. - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN64 16 -#define STACK_ENTRY_ALIGN64 8 -#define STACK_BIAS64 0 -#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1)) -#define STACK_RESERVE64 128 -#define MINFRAME64 0 - -#define STACK_ALIGN STACK_ALIGN64 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64 -#define STACK_BIAS STACK_BIAS64 -#define SA(x) SA64(x) -#define STACK_RESERVE STACK_RESERVE64 -#define MINFRAME MINFRAME64 - -#elif defined(__i386) - -#define STACK_ALIGN STACK_ALIGN32 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32 -#define STACK_BIAS STACK_BIAS32 -#define SA(x) SA32(x) -#define STACK_RESERVE STACK_RESERVE32 -#define MINFRAME MINFRAME32 - -#endif /* __i386 */ - -#if defined(_KERNEL) && !defined(_ASM) - -#if defined(DEBUG) -#if STACK_ALIGN == 4 -#define ASSERT_STACK_ALIGNED() \ - { \ - uint32_t __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16) -#define ASSERT_STACK_ALIGNED() \ - { \ - long double __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#endif -#else /* DEBUG */ -#define ASSERT_STACK_ALIGNED() -#endif /* DEBUG */ - -struct regs; - -void traceregs(struct regs *); -void traceback(caddr_t); - -#endif /* defined(_KERNEL) && !defined(_ASM) */ - -#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_STACK_H */ diff --git a/usr/src/uts/intel/ia32/sys/trap.h b/usr/src/uts/intel/ia32/sys/trap.h deleted file mode 100644 index 4165f1289e..0000000000 --- a/usr/src/uts/intel/ia32/sys/trap.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Copyright 2018 Joyent, Inc. - */ - -#ifndef _IA32_SYS_TRAP_H -#define _IA32_SYS_TRAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Trap type values - */ - -#define T_ZERODIV 0x0 /* #de divide by 0 error */ -#define T_SGLSTP 0x1 /* #db single step */ -#define T_NMIFLT 0x2 /* NMI */ -#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */ -#define T_OVFLW 0x4 /* #of INTO overflow fault */ -#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */ -#define T_ILLINST 0x6 /* #ud invalid opcode fault */ -#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */ -#define T_DBLFLT 0x8 /* #df double fault */ -#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */ -#define T_TSSFLT 0xa /* #ts invalid TSS fault */ -#define T_SEGFLT 0xb /* #np segment not present fault */ -#define T_STKFLT 0xc /* #ss stack fault */ -#define T_GPFLT 0xd /* #gp general protection fault */ -#define T_PGFLT 0xe /* #pf page fault */ -#define T_RESVTRAP 0xf /* reserved */ -#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */ -#define T_ALIGNMENT 0x11 /* #ac alignment check error */ -#define T_MCE 0x12 /* #mc machine check exception */ -#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */ -#define T_DBGENTR 0x14 /* debugger entry */ -#define T_INVALTRAP 0x1e /* invalid */ -#define T_ENDPERR 0x21 /* emulated extension error flt */ -#define T_ENOEXTFLT 0x20 /* emulated ext not present */ -#define T_FASTTRAP 0xd2 /* fast system call */ -#define T_SYSCALLINT 0x91 /* general system call */ -#define T_DTRACE_RET 0x92 /* DTrace pid return */ -#define T_INT80 0x80 /* int80 handler for linux emulation */ -#define T_SOFTINT 0x50fd /* pseudo softint trap type */ - -/* - * Pseudo traps. - */ -#define T_INTERRUPT 0x100 -#define T_FAULT 0x200 -#define T_AST 0x400 -#define T_SYSCALL 0x180 - - -/* - * Values of error code on stack in case of page fault - */ - -#define PF_ERR_MASK 0x01 /* Mask for error bit */ -#define PF_ERR_PAGE 0x00 /* page not present */ -#define PF_ERR_PROT 0x01 /* protection error */ -#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */ -#define PF_ERR_USER 0x04 /* processor was in user mode */ - /* (else supervisor) */ -#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */ - /* or kernel tried to execute a user page */ - /* (Intel SMEP) */ - -/* - * Definitions for fast system call subfunctions - */ -#define T_FNULL 0 /* Null trap for testing */ -#define T_FGETFP 1 /* Get emulated FP context */ -#define T_FSETFP 2 /* Set emulated FP context */ -#define T_GETHRTIME 3 /* Get high resolution time */ -#define T_GETHRVTIME 4 /* Get high resolution virtual time */ -#define T_GETHRESTIME 5 /* Get high resolution time */ -#define T_GETLGRP 6 /* Get home lgrpid */ - -#define T_LASTFAST 6 /* Last valid subfunction */ - -/* - * Offsets for an interrupt/trap frame. - */ -#define T_FRAME_ERR 0 -#define T_FRAME_RIP 8 -#define T_FRAME_CS 16 -#define T_FRAME_RFLAGS 24 -#define T_FRAME_RSP 32 -#define T_FRAME_SS 40 - -#define T_FRAMERET_RIP 0 -#define T_FRAMERET_CS 8 -#define T_FRAMERET_RFLAGS 16 -#define T_FRAMERET_RSP 24 -#define T_FRAMERET_SS 32 - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_TRAP_H */ diff --git a/usr/src/uts/intel/ia32/sys/traptrace.h b/usr/src/uts/intel/ia32/sys/traptrace.h deleted file mode 100644 index 038f01715c..0000000000 --- a/usr/src/uts/intel/ia32/sys/traptrace.h +++ /dev/null @@ -1,297 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef _IA32_SYS_TRAPTRACE_H -#define _IA32_SYS_TRAPTRACE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/privregs.h> - -/* - * Trap tracing. If TRAPTRACE is defined, an entry is recorded every time - * the CPU jumps through the Interrupt Descriptor Table (IDT). One exception - * is the Double Fault handler, which does not record a traptrace entry. - * - * There are facilities to (conditionally) interleave tracing of related - * facilities e.h. x-calls. - */ - -/* - * Note: non-assembler files that include this file must include - * <sys/systm.h> before it, for the typedef of pc_t to be visible. - */ - -#define TTR_STACK_DEPTH 10 - -#ifndef _ASM - -#define TTR_PAD1_SIZE (sizeof (long) - 1) - -typedef struct { - uintptr_t ttc_next; - uintptr_t ttc_first; - uintptr_t ttc_limit; - uintptr_t ttc_current; -} trap_trace_ctl_t; - -typedef struct { - struct regs ttr_regs; - greg_t ttr_cr2; - union _ttr_info { - struct _idt_entry { - int cpuid; - short vector; - uchar_t ipl; - uchar_t spl; - uchar_t pri; - } idt_entry; - struct _gate_entry { - int sysnum; - } gate_entry; - } ttr_info; - uintptr_t ttr_curthread; - uchar_t ttr_pad[TTR_PAD1_SIZE]; - uchar_t ttr_marker; - hrtime_t ttr_stamp; - int ttr_sdepth; - pc_t ttr_stack[TTR_STACK_DEPTH]; -} trap_trace_rec_t; - -#define ttr_cpuid ttr_info.idt_entry.cpuid -#define ttr_vector ttr_info.idt_entry.vector -#define ttr_ipl ttr_info.idt_entry.ipl -#define ttr_spl ttr_info.idt_entry.spl -#define ttr_pri ttr_info.idt_entry.pri -#define ttr_sysnum ttr_info.gate_entry.sysnum - -#define TRAPTR_NENT 128 - -extern trap_trace_ctl_t trap_trace_ctl[NCPU]; /* Allocated in locore.s */ -extern size_t trap_trace_bufsize; -extern int trap_trace_freeze; -extern trap_trace_rec_t trap_trace_postmort; /* Entry used after death */ - -#define TRAPTRACE_FREEZE trap_trace_freeze = 1; -#define TRAPTRACE_UNFREEZE trap_trace_freeze = 0; - -#else /* _ASM */ - -/* - * ptr -- will be set to a TRAPTRACE entry. - * scr1 -- scratch - * scr1_32 -- 32-bit version of scr1 - * scr2 -- scratch - * marker -- register containing byte to store in marker field of entry - * - * Note that this macro defines labels "8" and "9". - */ -#ifdef TRAPTRACE - -#if defined(__amd64) - -#define TRACE_PTR(ptr, scr1, scr1_32, scr2, marker) \ - leaq trap_trace_postmort(%rip), ptr; \ - cmpl $0, trap_trace_freeze(%rip); \ - jne 9f; \ - LOADCPU(ptr); \ - movl CPU_ID(ptr), scr1_32; \ - shlq $TRAPTR_SIZE_SHIFT, scr1; \ - leaq trap_trace_ctl(%rip), scr2; \ - addq scr2, scr1; \ - movq TRAPTR_NEXT(scr1), ptr; \ - leaq TRAP_ENT_SIZE(ptr), scr2; \ - cmpq TRAPTR_LIMIT(scr1), scr2; \ - jl 8f; \ - movq TRAPTR_FIRST(scr1), scr2; \ -8: movq scr2, TRAPTR_NEXT(scr1); \ -9: movb marker, TTR_MARKER(ptr); - -#elif defined(__i386) - -#define TRACE_PTR(ptr, scr1, scr1_32, scr2, marker) \ - movl $trap_trace_postmort, ptr; \ - cmpl $0, trap_trace_freeze; \ - jne 9f; \ - LOADCPU(ptr); \ - movl CPU_ID(ptr), scr1_32; \ - shll $TRAPTR_SIZE_SHIFT, scr1; \ - addl $trap_trace_ctl, scr1; \ - movl TRAPTR_NEXT(scr1), ptr; \ - leal TRAP_ENT_SIZE(ptr), scr2; \ - cmpl TRAPTR_LIMIT(scr1), scr2; \ - jl 8f; \ - movl TRAPTR_FIRST(scr1), scr2; \ -8: movl scr2, TRAPTR_NEXT(scr1); \ -9: movb marker, TTR_MARKER(ptr); - -#endif /* __i386 */ - -/* - * ptr -- pointer to the current TRAPTRACE entry. - * reg -- pointer to the stored registers; must be on the stack - * scr1 -- scratch used as array index - * scr2 -- scratch used as temporary - * - * Note that this macro defines label "9". - * Also captures curthread on exit of loop. - */ -#if defined(__xpv) -#define __GETCR2(_mov, reg) \ - _mov %gs:CPU_VCPU_INFO, reg; \ - _mov VCPU_INFO_ARCH_CR2(reg), reg -#else -#define __GETCR2(_mov, reg) \ - _mov %cr2, reg -#endif - -#if defined(__amd64) - -#define TRACE_REGS(ptr, reg, scr1, scr2) \ - xorq scr1, scr1; \ - /*CSTYLED*/ \ -9: movq (reg, scr1, 1), scr2; \ - movq scr2, (ptr, scr1, 1); \ - addq $CLONGSIZE, scr1; \ - cmpq $REGSIZE, scr1; \ - jl 9b; \ - movq %gs:CPU_THREAD, scr2; \ - movq scr2, TTR_CURTHREAD(ptr); \ - __GETCR2(movq, scr2); \ - movq scr2, TTR_CR2(ptr) - -#elif defined(__i386) - -#define TRACE_REGS(ptr, reg, scr1, scr2) \ - xorl scr1, scr1; \ - /*CSTYLED*/ \ -9: movl (reg, scr1, 1), scr2; \ - movl scr2, (ptr, scr1, 1); \ - addl $CLONGSIZE, scr1; \ - cmpl $REGSIZE, scr1; \ - jl 9b; \ - movl %gs:CPU_THREAD, scr2; \ - movl scr2, TTR_CURTHREAD(ptr); \ - __GETCR2(movl, scr2); \ - movl scr2, TTR_CR2(ptr) - -#endif /* __i386 */ - -/* - * The time stamp macro records a high-resolution time stamp for the - * given TRAPTRACE entry. Note that %eax and %edx are plowed by this - * macro; if they are to be preserved, it's up to the caller of the macro. - */ - -#if defined(__amd64) - -#define TRACE_STAMP(reg) \ - rdtsc; \ - movl %eax, TTR_STAMP(reg); \ - movl %edx, TTR_STAMP+4(reg) - -/* - * %rbp should be set before invoking this macro. - */ - -#define TRACE_STACK(tt) \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rdx; \ - pushq %rcx; \ - pushq %r8; \ - pushq %r9; \ - pushq %rax; \ - pushq %r12; \ - movq tt, %r12; \ - leaq TTR_STACK(%r12), %rdi; \ - movl $TTR_STACK_DEPTH, %esi; \ - call getpcstack; \ - movl %eax, TTR_SDEPTH(%r12); \ - popq %r12; \ - popq %rax; \ - popq %r9; \ - popq %r8; \ - popq %rcx; \ - popq %rdx; \ - popq %rsi; \ - popq %rdi - -#elif defined(__i386) - -#define TRACE_STAMP(reg) \ - xorl %eax, %eax; \ - xorl %edx, %edx; \ - btl $X86FSET_TSC, x86_featureset; \ - jnc 9f; \ - rdtsc; \ -9: movl %eax, TTR_STAMP(reg); \ - movl %edx, TTR_STAMP+4(reg) - -#define TRACE_STACK(tt) \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - pushl %ebx; \ - pushl $TTR_STACK_DEPTH; \ - movl tt, %ebx; \ - leal TTR_STACK(%ebx), %eax; \ - pushl %eax; \ - call getpcstack; \ - addl $8, %esp; \ - movl %eax, TTR_SDEPTH(%ebx); \ - popl %ebx; \ - popl %edx; \ - popl %ecx; \ - popl %eax - -#endif /* __i386 */ - -#else - -#define TRACE_PTR(ptr, scr1, scr1_32, scr2, marker) -#define TRACE_REGS(ptr, reg, scr1, scr2) -#define TRACE_STAMP(reg) -#define TRACE_STACK(reg) - -#endif /* TRAPTRACE */ - -#endif /* _ASM */ - -#define TT_SYSCALL 0xaa /* system call via lcall */ -#define TT_SYSENTER 0xab /* system call via sysenter */ -#define TT_SYSC 0xad /* system call via syscall (32-bit) */ -#define TT_SYSC64 0xae /* system call via syscall (64-bit) */ -#define TT_INTERRUPT 0xbb -#define TT_TRAP 0xcc -#define TT_INTTRAP 0xdd -#define TT_EVENT 0xee /* hypervisor event */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_TRAPTRACE_H */ diff --git a/usr/src/uts/intel/ia32/syscall/getcontext.c b/usr/src/uts/intel/ia32/syscall/getcontext.c deleted file mode 100644 index d5dfd5a9cd..0000000000 --- a/usr/src/uts/intel/ia32/syscall/getcontext.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/vmparam.h> -#include <sys/systm.h> -#include <sys/signal.h> -#include <sys/stack.h> -#include <sys/regset.h> -#include <sys/privregs.h> -#include <sys/frame.h> -#include <sys/proc.h> -#include <sys/brand.h> -#include <sys/psw.h> -#include <sys/ucontext.h> -#include <sys/asm_linkage.h> -#include <sys/errno.h> -#include <sys/archsystm.h> -#include <sys/schedctl.h> -#include <sys/debug.h> -#include <sys/sysmacros.h> - -/* - * Save user context. - */ -void -savecontext(ucontext_t *ucp, const k_sigset_t *mask) -{ - proc_t *p = ttoproc(curthread); - klwp_t *lwp = ttolwp(curthread); - struct regs *rp = lwptoregs(lwp); - - /* - * We unconditionally assign to every field through the end - * of the gregs, but we need to bzero() everything -after- that - * to avoid having any kernel stack garbage escape to userland. - */ - bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) - - offsetof(ucontext_t, uc_mcontext.fpregs)); - - ucp->uc_flags = UC_ALL; - ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext; - - /* - * Try to copyin() the ustack if one is registered. If the stack - * has zero size, this indicates that stack bounds checking has - * been disabled for this LWP. If stack bounds checking is disabled - * or the copyin() fails, we fall back to the legacy behavior. - */ - if (lwp->lwp_ustack == (uintptr_t)NULL || - copyin((void *)lwp->lwp_ustack, &ucp->uc_stack, - sizeof (ucp->uc_stack)) != 0 || - ucp->uc_stack.ss_size == 0) { - - if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) { - ucp->uc_stack = lwp->lwp_sigaltstack; - } else { - ucp->uc_stack.ss_sp = p->p_usrstack - p->p_stksize; - ucp->uc_stack.ss_size = p->p_stksize; - ucp->uc_stack.ss_flags = 0; - } - } - - /* - * If either the trace flag or REQUEST_STEP is set, - * arrange for single-stepping and turn off the trace flag. - */ - if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) { - /* - * Clear PS_T so that saved user context won't have trace - * flag set. - */ - rp->r_ps &= ~PS_T; - - if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) { - lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; - /* - * trap() always checks DEBUG_PENDING before - * checking for any pending signal. This at times - * can potentially lead to DEBUG_PENDING not being - * honoured. (for eg: the lwp is stopped by - * stop_on_fault() called from trap(), after being - * awakened it might see a pending signal and call - * savecontext(), however on the way back to userland - * there is no place it can be detected). Hence in - * anticipation of such occassions, set AST flag for - * the thread which will make the thread take an - * excursion through trap() where it will be handled - * appropriately. - */ - aston(curthread); - } - } - - getgregs(lwp, ucp->uc_mcontext.gregs); - if (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) - getfpregs(lwp, &ucp->uc_mcontext.fpregs); - else - ucp->uc_flags &= ~UC_FPU; - - sigktou(mask, &ucp->uc_sigmask); -} - -/* - * Restore user context. - */ -void -restorecontext(ucontext_t *ucp) -{ - kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); - - lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link; - - if (ucp->uc_flags & UC_STACK) { - if (ucp->uc_stack.ss_flags == SS_ONSTACK) - lwp->lwp_sigaltstack = ucp->uc_stack; - else - lwp->lwp_sigaltstack.ss_flags &= ~SS_ONSTACK; - } - - if (ucp->uc_flags & UC_CPU) { - /* - * If the trace flag is set, mark the lwp to take a - * single-step trap on return to user level (below). - * The x86 lcall interface and sysenter has already done this, - * and turned off the flag, but amd64 syscall interface has not. - */ - if (lwptoregs(lwp)->r_ps & PS_T) - lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; - setgregs(lwp, ucp->uc_mcontext.gregs); - lwp->lwp_eosys = JUSTRETURN; - t->t_post_sys = 1; - aston(curthread); - } - - if (ucp->uc_flags & UC_FPU) - setfpregs(lwp, &ucp->uc_mcontext.fpregs); - - if (ucp->uc_flags & UC_SIGMASK) { - /* - * We don't need to acquire p->p_lock here; - * we are manipulating thread-private data. - */ - schedctl_finish_sigblock(t); - sigutok(&ucp->uc_sigmask, &t->t_hold); - if (sigcheck(ttoproc(t), t)) - t->t_sig_check = 1; - } -} - - -int -getsetcontext(int flag, void *arg) -{ - ucontext_t uc; - ucontext_t *ucp; - klwp_t *lwp = ttolwp(curthread); - stack_t dummy_stk; - - /* - * In future releases, when the ucontext structure grows, - * getcontext should be modified to only return the fields - * specified in the uc_flags. That way, the structure can grow - * and still be binary compatible will all .o's which will only - * have old fields defined in uc_flags - */ - - switch (flag) { - default: - return (set_errno(EINVAL)); - - case GETCONTEXT: - schedctl_finish_sigblock(curthread); - savecontext(&uc, &curthread->t_hold); - if (uc.uc_flags & UC_SIGMASK) - SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); - if (copyout(&uc, arg, sizeof (uc))) - return (set_errno(EFAULT)); - return (0); - - case SETCONTEXT: - ucp = arg; - if (ucp == NULL) - exit(CLD_EXITED, 0); - /* - * Don't copyin filler or floating state unless we need it. - * The ucontext_t struct and fields are specified in the ABI. - */ - if (copyin(ucp, &uc, sizeof (ucontext_t) - - sizeof (uc.uc_filler) - - sizeof (uc.uc_mcontext.fpregs))) { - return (set_errno(EFAULT)); - } - if (uc.uc_flags & UC_SIGMASK) - SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask); - - if ((uc.uc_flags & UC_FPU) && - copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs, - sizeof (uc.uc_mcontext.fpregs))) { - return (set_errno(EFAULT)); - } - - restorecontext(&uc); - - if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0)) - (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack, - sizeof (uc.uc_stack)); - return (0); - - case GETUSTACK: - if (copyout(&lwp->lwp_ustack, arg, sizeof (caddr_t))) - return (set_errno(EFAULT)); - return (0); - - case SETUSTACK: - if (copyin(arg, &dummy_stk, sizeof (dummy_stk))) - return (set_errno(EFAULT)); - lwp->lwp_ustack = (uintptr_t)arg; - return (0); - } -} - -#ifdef _SYSCALL32_IMPL - -/* - * Save user context for 32-bit processes. - */ -void -savecontext32(ucontext32_t *ucp, const k_sigset_t *mask) -{ - proc_t *p = ttoproc(curthread); - klwp_t *lwp = ttolwp(curthread); - struct regs *rp = lwptoregs(lwp); - - bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) - - offsetof(ucontext32_t, uc_mcontext.fpregs)); - - ucp->uc_flags = UC_ALL; - ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext; - - if (lwp->lwp_ustack == (uintptr_t)NULL || - copyin((void *)lwp->lwp_ustack, &ucp->uc_stack, - sizeof (ucp->uc_stack)) != 0 || - ucp->uc_stack.ss_size == 0) { - - if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) { - ucp->uc_stack.ss_sp = - (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; - ucp->uc_stack.ss_size = - (size32_t)lwp->lwp_sigaltstack.ss_size; - ucp->uc_stack.ss_flags = SS_ONSTACK; - } else { - ucp->uc_stack.ss_sp = (caddr32_t)(uintptr_t) - (p->p_usrstack - p->p_stksize); - ucp->uc_stack.ss_size = (size32_t)p->p_stksize; - ucp->uc_stack.ss_flags = 0; - } - } - - /* - * If either the trace flag or REQUEST_STEP is set, arrange - * for single-stepping and turn off the trace flag. - */ - if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) { - /* - * Clear PS_T so that saved user context won't have trace - * flag set. - */ - rp->r_ps &= ~PS_T; - - if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) { - lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; - /* - * See comments in savecontext(). - */ - aston(curthread); - } - } - - getgregs32(lwp, ucp->uc_mcontext.gregs); - if (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) - getfpregs32(lwp, &ucp->uc_mcontext.fpregs); - else - ucp->uc_flags &= ~UC_FPU; - - sigktou(mask, &ucp->uc_sigmask); -} - -int -getsetcontext32(int flag, void *arg) -{ - ucontext32_t uc; - ucontext_t ucnat; - ucontext32_t *ucp; - klwp_t *lwp = ttolwp(curthread); - caddr32_t ustack32; - stack32_t dummy_stk32; - - switch (flag) { - default: - return (set_errno(EINVAL)); - - case GETCONTEXT: - schedctl_finish_sigblock(curthread); - savecontext32(&uc, &curthread->t_hold); - if (uc.uc_flags & UC_SIGMASK) - SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask); - if (copyout(&uc, arg, sizeof (uc))) - return (set_errno(EFAULT)); - return (0); - - case SETCONTEXT: - ucp = arg; - if (ucp == NULL) - exit(CLD_EXITED, 0); - if (copyin(ucp, &uc, sizeof (uc) - - sizeof (uc.uc_filler) - - sizeof (uc.uc_mcontext.fpregs))) { - return (set_errno(EFAULT)); - } - if (uc.uc_flags & UC_SIGMASK) - SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask); - if ((uc.uc_flags & UC_FPU) && - copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs, - sizeof (uc.uc_mcontext.fpregs))) { - return (set_errno(EFAULT)); - } - - ucontext_32ton(&uc, &ucnat); - restorecontext(&ucnat); - - if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0)) - (void) copyout(&uc.uc_stack, - (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack)); - return (0); - - case GETUSTACK: - ustack32 = (caddr32_t)lwp->lwp_ustack; - if (copyout(&ustack32, arg, sizeof (ustack32))) - return (set_errno(EFAULT)); - return (0); - - case SETUSTACK: - if (copyin(arg, &dummy_stk32, sizeof (dummy_stk32))) - return (set_errno(EFAULT)); - lwp->lwp_ustack = (uintptr_t)arg; - return (0); - } -} - -#endif /* _SYSCALL32_IMPL */ diff --git a/usr/src/uts/intel/ia32/syscall/lwp_private.c b/usr/src/uts/intel/ia32/syscall/lwp_private.c deleted file mode 100644 index 50331a2899..0000000000 --- a/usr/src/uts/intel/ia32/syscall/lwp_private.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * Copyright (c) 2018, Joyent, Inc. - */ - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/disp.h> -#include <sys/sysmacros.h> -#include <sys/cpuvar.h> -#include <sys/systm.h> -#include <sys/thread.h> -#include <sys/lwp.h> -#include <sys/segments.h> -#include <sys/privregs.h> -#include <sys/cmn_err.h> - -int -lwp_setprivate(klwp_t *lwp, int which, uintptr_t base) -{ - pcb_t *pcb = &lwp->lwp_pcb; - struct regs *rp = lwptoregs(lwp); - kthread_t *t = lwptot(lwp); - int thisthread = t == curthread; - int rval; - - if (thisthread) - kpreempt_disable(); - - - /* - * 32-bit compatibility processes point to the per-cpu GDT segment - * descriptors that are virtualized to the lwp. That allows 32-bit - * programs to mess with %fs and %gs; in particular it allows - * things like this: - * - * movw %gs, %ax - * ... - * movw %ax, %gs - * - * to work, which is needed by emulators for legacy application - * environments .. - * - * 64-bit processes may also point to a per-cpu GDT segment descriptor - * virtualized to the lwp. However the descriptor base is forced - * to zero (because we can't express the full 64-bit address range - * in a long mode descriptor), so don't reload segment registers - * in a 64-bit program! 64-bit processes must have selector values - * of zero for %fs and %gs to use the 64-bit fs_base and gs_base - * respectively. - */ - if (!PCB_NEED_UPDATE_SEGS(pcb)) { - pcb->pcb_ds = rp->r_ds; - pcb->pcb_es = rp->r_es; - pcb->pcb_fs = rp->r_fs; - pcb->pcb_gs = rp->r_gs; - PCB_SET_UPDATE_SEGS(pcb); - t->t_post_sys = 1; - } - ASSERT(t->t_post_sys); - - switch (which) { - case _LWP_FSBASE: - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - set_usegd(&pcb->pcb_fsdesc, SDP_LONG, 0, 0, - SDT_MEMRWA, SEL_UPL, SDP_BYTES, SDP_OP32); - rval = pcb->pcb_fs = 0; /* null gdt descriptor */ - } else { - set_usegd(&pcb->pcb_fsdesc, SDP_SHORT, (void *)base, -1, - SDT_MEMRWA, SEL_UPL, SDP_PAGES, SDP_OP32); - rval = pcb->pcb_fs = LWPFS_SEL; - } - if (thisthread) - gdt_update_usegd(GDT_LWPFS, &pcb->pcb_fsdesc); - - pcb->pcb_fsbase = base; - break; - case _LWP_GSBASE: - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - set_usegd(&pcb->pcb_gsdesc, SDP_LONG, 0, 0, - SDT_MEMRWA, SEL_UPL, SDP_BYTES, SDP_OP32); - rval = pcb->pcb_gs = 0; /* null gdt descriptor */ - } else { - set_usegd(&pcb->pcb_gsdesc, SDP_SHORT, (void *)base, -1, - SDT_MEMRWA, SEL_UPL, SDP_PAGES, SDP_OP32); - rval = pcb->pcb_gs = LWPGS_SEL; - } - if (thisthread) - gdt_update_usegd(GDT_LWPGS, &pcb->pcb_gsdesc); - - pcb->pcb_gsbase = base; - break; - default: - rval = -1; - break; - } - - if (thisthread) - kpreempt_enable(); - return (rval); -} - -static int -lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) -{ - pcb_t *pcb = &lwp->lwp_pcb; - struct regs *rp = lwptoregs(lwp); - uintptr_t sbase; - int error = 0; - - ASSERT(lwptot(lwp) == curthread); - - kpreempt_disable(); - switch (which) { - case _LWP_FSBASE: - if ((sbase = pcb->pcb_fsbase) != 0) { - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (PCB_NEED_UPDATE_SEGS(pcb)) { - if (pcb->pcb_fs == 0) - break; - } else { - if (rp->r_fs == 0) - break; - } - } else { - if (PCB_NEED_UPDATE_SEGS(pcb)) { - if (pcb->pcb_fs == LWPFS_SEL) - break; - } else { - if (rp->r_fs == LWPFS_SEL) - break; - } - } - } - error = EINVAL; - break; - case _LWP_GSBASE: - if ((sbase = pcb->pcb_gsbase) != 0) { - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (PCB_NEED_UPDATE_SEGS(pcb)) { - if (pcb->pcb_gs == 0) - break; - } else { - if (rp->r_gs == 0) - break; - } - } else { - if (PCB_NEED_UPDATE_SEGS(pcb)) { - if (pcb->pcb_gs == LWPGS_SEL) - break; - } else { - if (rp->r_gs == LWPGS_SEL) - break; - } - } - } - error = EINVAL; - break; - - - default: - error = ENOTSUP; - break; - } - kpreempt_enable(); - - if (error != 0) - return (error); - - if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (sulword((void *)base, sbase) == -1) - error = EFAULT; -#if defined(_SYSCALL32_IMPL) - } else { - if (suword32((void *)base, (uint32_t)sbase) == -1) - error = EFAULT; -#endif - } - return (error); -} - -/* - * libc-private syscall for managing per-lwp %gs and %fs segment base values. - */ -int -syslwp_private(int cmd, int which, uintptr_t base) -{ - klwp_t *lwp = ttolwp(curthread); - int res, error; - - switch (cmd) { - case _LWP_SETPRIVATE: - res = lwp_setprivate(lwp, which, base); - return (res < 0 ? set_errno(ENOTSUP) : res); - case _LWP_GETPRIVATE: - error = lwp_getprivate(lwp, which, base); - return (error != 0 ? set_errno(error) : error); - default: - return (set_errno(ENOTSUP)); - } -} |