diff options
Diffstat (limited to 'usr/src/uts/i86pc/ml/syscall_asm.s')
-rw-r--r-- | usr/src/uts/i86pc/ml/syscall_asm.s | 125 |
1 files changed, 119 insertions, 6 deletions
diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s index 9465c864a1..bc9618fcb9 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm.s +++ b/usr/src/uts/i86pc/ml/syscall_asm.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +37,7 @@ #include <sys/regset.h> #include <sys/psw.h> #include <sys/x86_archext.h> +#include <sys/machbrand.h> #if defined(__lint) @@ -114,6 +114,52 @@ orl %ecx, %edi; \ ORL_SYSCALLTRACE(%edi) +/* + * When the brand's callback is invoked, the stack will look like this: + * -------------------------------------- + * | 'scratch space' | + * | user's %ebx | + * | user's %gs selector | + * | kernel's %gs selector | + * | | lwp brand data | + * | | proc brand data | + * v | user return address | + * | callback wrapper return addr | + * -------------------------------------- + * + * The lx brand (at least) uses each of these fields. + * If the brand code returns, we assume that we are meant to execute the + * normal system call path. + */ +#define BRAND_CALLBACK(callback_id) \ + subl $4, %esp /* save some scratch space */ ;\ + pushl %ebx /* save %ebx to use for scratch */ ;\ + pushl %gs /* save the user %gs */ ;\ + movl $KGS_SEL, %ebx ;\ + pushl %ebx /* push kernel's %gs */ ;\ + movw %ebx, %gs /* switch to the kernel's %gs */ ;\ + movl %gs:CPU_THREAD, %ebx /* load the thread pointer */ ;\ + movl T_LWP(%ebx), %ebx /* load the lwp pointer */ ;\ + pushl LWP_BRAND(%ebx) /* push the lwp's brand data */ ;\ + movl LWP_PROCP(%ebx), %ebx /* load the proc pointer */ ;\ + pushl P_BRAND_DATA(%ebx) /* push the proc's brand data */ ;\ + movl P_BRAND(%ebx), %ebx /* load the brand pointer */ ;\ + movl B_MACHOPS(%ebx), %ebx /* load the machops pointer */ ;\ + movl _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx ;\ + cmpl $0, %ebx ;\ + je 1f ;\ + movl %ebx, 20(%esp) /* save callback to scratch */ ;\ + movl 12(%esp), %ebx /* grab the the user %gs */ ;\ + movw %ebx, %gs /* restore the user %gs */ ;\ + movl 16(%esp), %ebx /* restore %ebx */ ;\ + pushl 24(%esp) /* push the return address */ ;\ + call *24(%esp) /* call callback */ ;\ + addl $4, %esp /* get rid of ret addr */ ;\ +1: movl 12(%esp), %ebx /* grab the the user %gs */ ;\ + movw %ebx, %gs /* restore the user %gs */ ;\ + movl 16(%esp), %ebx /* restore user's %ebx */ ;\ + addl $24, %esp /* restore stack ptr */ + #define MSTATE_TRANSITION(from, to) \ pushl $to; \ pushl $from; \ @@ -314,9 +360,12 @@ size_t _allsyscalls_size; #else /* __lint */ - ENTRY_NP2(sys_call, _allsyscalls) + ENTRY_NP2(brand_sys_call, _allsyscalls) + BRAND_CALLBACK(BRAND_CB_SYSCALL) + ALTENTRY(sys_call) / on entry eax = system call number + / set up the stack to look as in reg.h subl $8, %esp / pad the stack with ERRCODE and TRAPNO @@ -401,6 +450,7 @@ _syscall_fault: xorl %edx, %edx jmp _syslcall_done SET_SIZE(sys_call) + SET_SIZE(brand_sys_call) #endif /* __lint */ @@ -460,6 +510,25 @@ _syscall_fault: * * Note that we are unable to return both "rvals" to userland with this * call, as %edx is used by the sysexit instruction. + * + * One final complication in this routine is its interaction with + * single-stepping in a debugger. For most of the system call mechanisms, + * the CPU automatically clears the single-step flag before we enter the + * kernel. The sysenter mechanism does not clear the flag, so a user + * single-stepping through a libc routine may suddenly find him/herself + * single-stepping through the kernel. To detect this, kmdb compares the + * trap %pc to the [brand_]sys_enter addresses on each single-step trap. + * If it finds that we have single-stepped to a sysenter entry point, it + * explicitly clears the flag and executes the sys_sysenter routine. + * + * One final complication in this final complication is the fact that we + * have two different entry points for sysenter: brand_sys_sysenter and + * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping + * through the kernel with kmdb, we will eventually hit the instruction at + * sys_sysenter. kmdb cannot distinguish between that valid single-step + * and the undesirable one mentioned above. To avoid this situation, we + * simply add a jump over the instruction at sys_sysenter to make it + * impossible to single-step to it. */ #if defined(__lint) @@ -469,7 +538,19 @@ sys_sysenter() #else /* __lint */ - ENTRY_NP(sys_sysenter) + ENTRY_NP(brand_sys_sysenter) + pushl %edx + BRAND_CALLBACK(BRAND_CB_SYSENTER) + popl %edx + /* + * Jump over sys_sysenter to allow single-stepping as described + * above. + */ + ja 1f + + ALTENTRY(sys_sysenter) + nop +1: / / do what the call gate would've done to the stack .. / @@ -544,6 +625,38 @@ _sysenter_done: sti sysexit SET_SIZE(sys_sysenter) + SET_SIZE(brand_sys_sysenter) + +#endif /* __lint */ + +#if defined(__lint) +/* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the sysenter path, there is no default + * action to take if no callback is registered for this process. + */ +void +sys_int80() +{} + +#else /* __lint */ + + ENTRY_NP(brand_sys_int80) + BRAND_CALLBACK(BRAND_CB_INT80) + + ALTENTRY(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %eip to point before the INT, push the expected error + * code and fake a GP fault. + * + */ + subl $2, (%esp) /* int insn 2-bytes */ + pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) /* * Declare a uintptr_t which covers the entire pc range of syscall |