diff options
Diffstat (limited to 'usr/src/uts/i86pc/ml')
| -rw-r--r-- | usr/src/uts/i86pc/ml/kpti_trampolines.s | 2 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/ml/offsets.in | 1 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/ml/syscall_asm_amd64.s | 166 |
3 files changed, 156 insertions, 13 deletions
diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s index 4b5102d547..17249eb747 100644 --- a/usr/src/uts/i86pc/ml/kpti_trampolines.s +++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s @@ -667,6 +667,8 @@ tr_intr_ret_end: MK_INTR_TRAMPOLINE_NOERR(invaltrap) MK_INTR_TRAMPOLINE_NOERR(fasttrap) MK_INTR_TRAMPOLINE_NOERR(dtrace_ret) + MK_INTR_TRAMPOLINE_NOERR(brand_sys_int80) + MK_INTR_TRAMPOLINE_NOERR(sys_int80) /* * These are special because they can interrupt other traps, and diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in index 622f7cd2a3..6c1de5c145 100644 --- a/usr/src/uts/i86pc/ml/offsets.in +++ b/usr/src/uts/i86pc/ml/offsets.in @@ -144,6 +144,7 @@ _klwp lwp_thread lwp_procp lwp_brand + lwp_brand_syscall lwp_eosys lwp_regs lwp_arg diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s index 8a68b4bced..8040e35297 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s +++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s @@ -515,6 +515,7 @@ noprod_sys_syscall: movq T_LWP(%r15), %r14 ASSERT_NO_RUPDATE_PENDING(%r14) + ENABLE_INTR_FLAGS MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM) @@ -528,6 +529,37 @@ noprod_sys_syscall: incq %gs:CPU_STATS_SYS_SYSCALL + /* + * If our LWP has an alternate system call handler, run that instead of + * the regular system call path. + */ + movq LWP_BRAND_SYSCALL(%r14), %rdi + testq %rdi, %rdi + jz _syscall_no_brand + + pushq %rax + subq $8, %rsp /* align stack for call to C */ + INDIRECT_CALL_REG(rdi) + addq $8, %rsp + + /* + * If the alternate handler returns non-zero, the normal system call + * processing is resumed. + */ + testl %eax, %eax + popq %rax + jnz _syscall_no_brand + + /* + * For branded syscalls which were handled in-kernel, shuffle the + * register state as would be done by the native handler before jumping + * to the post-syscall logic. + */ + movq REGOFF_RAX(%rsp), %r12 + movq REGOFF_RDX(%rsp), %r13 + jmp _syscall_after_brand + +_syscall_no_brand: movw %ax, T_SYSNUM(%r15) movzbl T_PRE_SYS(%r15), %ebx ORL_SYSCALLTRACE(%ebx) @@ -563,6 +595,8 @@ _syscall_invoke: shrq $32, %r13 /* upper 32-bits into %edx */ movl %r12d, %r12d /* lower 32-bits into %eax */ 5: + +_syscall_after_brand: /* * Optimistically assume that there's no post-syscall * work to do. (This is to avoid having to call syscall_mstate() @@ -825,11 +859,46 @@ _syscall32_save: incq %gs:CPU_STATS_SYS_SYSCALL /* + * If our lwp has an alternate system call handler, run that instead + * of the regular system call path. + */ + movq LWP_BRAND_SYSCALL(%r14), %rax + testq %rax, %rax + jz _syscall32_no_brand + + movb $LWP_SYS, LWP_STATE(%r14) + INDIRECT_CALL_REG(rax) + + /* + * If the alternate handler returns non-zero, the normal system call + * processing is resumed. + */ + testl %eax, %eax + jnz _syscall32_no_brand + + /* + * For branded syscalls which were handled in-kernel, shuffle the + * register state as would be done by the native handler before jumping + * to the post-syscall logic. + */ + movl REGOFF_RAX(%rsp), %r12d + movl REGOFF_RDX(%rsp), %r13d + jmp _syscall32_after_brand + +_syscall32_no_brand: + /* * Make some space for MAXSYSARGS (currently 8) 32-bit args placed * into 64-bit (long) arg slots, maintaining 16 byte alignment. Or * more succinctly: * * SA(MAXSYSARGS * sizeof (long)) == 64 + * + * Note, this space is used both to copy in the arguments from user + * land, but also to as part of the old UNIX style syscall_ap() method. + * syscall_entry expects that we do not change the values of this space + * that we give it. However, this means that when we end up in the more + * recent model of passing the arguments based on the calling + * conventions, we'll need to save an additional 16 bytes of stack. */ #define SYS_DROP 64 /* drop for args */ subq $SYS_DROP, %rsp @@ -857,12 +926,16 @@ _syscall32_save: */ movq %rax, %rbx - movl 0(%rsp), %edi - movl 8(%rsp), %esi - movl 0x10(%rsp), %edx - movl 0x18(%rsp), %ecx - movl 0x20(%rsp), %r8d - movl 0x28(%rsp), %r9d + movl 0x0(%rsp), %edi /* arg0 */ + movl 0x8(%rsp), %esi /* arg1 */ + movl 0x10(%rsp), %edx /* arg2 */ + movl 0x38(%rsp), %eax /* arg7 load */ + movl 0x18(%rsp), %ecx /* arg3 */ + pushq %rax /* arg7 saved to stack */ + movl 0x28(%rsp), %r8d /* arg4 */ + movl 0x38(%rsp), %eax /* arg6 load */ + movl 0x30(%rsp), %r9d /* arg5 */ + pushq %rax /* arg6 saved to stack */ movq SY_CALLC(%rbx), %rax INDIRECT_CALL_REG(rax) @@ -881,6 +954,8 @@ _syscall32_save: shrq $32, %r13 /* upper 32-bits into %edx */ movl %eax, %r12d /* lower 32-bits into %eax */ +_syscall32_after_brand: + /* * Optimistically assume that there's no post-syscall * work to do. (This is to avoid having to call syscall_mstate() @@ -1133,15 +1208,20 @@ _full_syscall_postsys32: /* * Fetch the arguments copied onto the kernel stack and put * them in the right registers to invoke a C-style syscall handler. - * %rax contains the handler address. + * %rax contains the handler address. For the last two arguments, we + * push them onto the stack -- we can't clobber the old arguments. */ movq %rax, %rbx - movl 0(%rsp), %edi - movl 8(%rsp), %esi - movl 0x10(%rsp), %edx - movl 0x18(%rsp), %ecx - movl 0x20(%rsp), %r8d - movl 0x28(%rsp), %r9d + movl 0x0(%rsp), %edi /* arg0 */ + movl 0x8(%rsp), %esi /* arg1 */ + movl 0x10(%rsp), %edx /* arg2 */ + movl 0x38(%rsp), %eax /* arg7 load */ + movl 0x18(%rsp), %ecx /* arg3 */ + pushq %rax /* arg7 saved to stack */ + movl 0x28(%rsp), %r8d /* arg4 */ + movl 0x38(%rsp), %eax /* arg6 load */ + movl 0x30(%rsp), %r9d /* arg5 */ + pushq %rax /* arg6 saved to stack */ movq SY_CALLC(%rbx), %rax INDIRECT_CALL_REG(rax) @@ -1220,6 +1300,66 @@ _full_syscall_postsys32: SET_SIZE(brand_sys_sysenter) /* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the other entry points, there is no + * default action to take if no callback is registered for this process. + */ + + ENTRY_NP(brand_sys_int80) + SWAPGS /* kernel gsbase */ + XPV_TRAP_POP + call smap_enable + + /* + * We first attempt to call the "b_int80" handler from the "struct + * brand_mach_ops" for this brand. If no handler function is installed + * for this brand, the BRAND_CALLBACK() macro returns here and we + * check the lwp for a "lwp_brand_syscall" handler. + */ + BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK()) + + /* + * Check to see if this lwp provides "lwp_brand_syscall". If so, we + * will route this int80 through the regular system call handling path. + */ + movq %r15, %gs:CPU_RTMP_R15 + movq %gs:CPU_THREAD, %r15 + movq T_LWP(%r15), %r15 + movq LWP_BRAND_SYSCALL(%r15), %r15 + testq %r15, %r15 + movq %gs:CPU_RTMP_R15, %r15 + jnz nopop_syscall_int + + /* + * The brand provided neither a "b_int80", nor a "lwp_brand_syscall" + * function, and has thus opted out of handling this trap. + */ + SWAPGS /* user gsbase */ + jmp nopop_int80 + + ENTRY_NP(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %rip to point before the INT, push the expected error + * code and fake a GP fault. Note on 64-bit hypervisor we need + * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack + * because gptrap will pop them again with its own XPV_TRAP_POP. + */ + XPV_TRAP_POP + call smap_enable +nopop_int80: + subq $2, (%rsp) /* int insn 2-bytes */ + pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) +#if defined(__xpv) + push %r11 + push %rcx +#endif + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) + +/* * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by * the generic i386 libc to do system calls. We do a small amount of setup * before jumping into the existing sys_syscall32 path. |
