summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2014-10-14 12:24:58 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2014-10-14 12:24:58 +0000
commitd02748937355fe239b4e1f4a7b927795d98d889d (patch)
tree9f3ff0171182d0b0c08a4f391e21b7234e44e3cc
parent7940642e2f955b21ae5a813f41db265ddb93bfd0 (diff)
downloadillumos-joyent-d02748937355fe239b4e1f4a7b927795d98d889d.tar.gz
OS-3415 lxbrand mishandles amd64 red zone, original %r15 value clobbered during uucopy
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s49
-rw-r--r--usr/src/uts/intel/brand/lx/lx_brand_asm.s51
2 files changed, 60 insertions, 40 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
index b808c398a2..f449f47be3 100644
--- a/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
@@ -37,12 +37,12 @@
#define JMP \
pushq $_CONST(. - lx_handler_table); \
jmp lx_handler; \
- .align 16;
+ .align 16;
#define JMP4 JMP; JMP; JMP; JMP
-#define JMP16 JMP4; JMP4; JMP4; JMP4
-#define JMP64 JMP16; JMP16; JMP16; JMP16
-#define JMP256 JMP64; JMP64; JMP64; JMP64
+#define JMP16 JMP4; JMP4; JMP4; JMP4
+#define JMP64 JMP16; JMP16; JMP16; JMP16
+#define JMP256 JMP64; JMP64; JMP64; JMP64
/*
* Alternate jump table that turns on lx_traceflag before proceeding with
@@ -51,14 +51,14 @@
#define TJMP \
pushq $_CONST(. - lx_handler_trace_table); \
jmp lx_handler_trace; \
- .align 16;
+ .align 16;
#define TJMP4 TJMP; TJMP; TJMP; TJMP
-#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4
-#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16
-#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64
+#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4
+#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16
+#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64
+
-
#if defined(lint)
#include <sys/types.h>
@@ -126,10 +126,12 @@ lx_sigreturn_tolibc(uintptr_t sp)
SET_SIZE(lx_handler_table)
ENTRY_NP(lx_handler_trace)
+ subq $128, %rsp /* skip red zone */
pushq %rsi
- movq lx_traceflag@GOTPCREL(%rip), %rsi
+ movq lx_traceflag@GOTPCREL(%rip), %rsi
movq $1, (%rsi)
popq %rsi
+ addq $128, %rsp
/*
* While we could just fall through to lx_handler(), we "tail-call" it
* instead to make ourselves a little more comprehensible to trace
@@ -140,6 +142,13 @@ lx_sigreturn_tolibc(uintptr_t sp)
ALTENTRY(lx_handler)
/*
+ * We are running on the Linux process's stack here so we have to
+ * account for the AMD64 ABI red zone of 128 bytes past the %rsp which
+ * the process can use as scratch space.
+ */
+ subq $128, %rsp
+
+ /*
* %rbp isn't always going to be a frame pointer on Linux, but when
* it is, saving it here lets us have a coherent stack backtrace.
*/
@@ -154,13 +163,13 @@ lx_sigreturn_tolibc(uintptr_t sp)
* Save %rbp and then fill it with what would be its usual value as
* the frame pointer. The value we save for %rsp needs to be the
* stack pointer at the time of the syscall so we need to skip the
- * saved %rbp and (what will be) the return address.
+ * red zone, saved %rbp and (what will be) the return address.
*/
movq %rbp, LXR_RBP(%rsp)
movq %rsp, %rbp
- addq $_CONST(SIZEOF_LX_REGS_T), %rbp
+ addq $SIZEOF_LX_REGS_T, %rbp
movq %rbp, LXR_RSP(%rsp)
- addq $_CONST(_MUL(CPTRSIZE, 2)), LXR_RSP(%rsp)
+ addq $144, LXR_RSP(%rsp) /* 128 byte red zone + 2 pointers */
movq $0, LXR_GS(%rsp)
movw %gs, LXR_GS(%rsp)
@@ -187,7 +196,7 @@ lx_sigreturn_tolibc(uintptr_t sp)
* value on the stack with the return address, and use the value to
* compute the system call number by dividing by the table entry size.
*/
- xchgq CPTRSIZE(%rbp), %rax
+ xchgq 136(%rbp), %rax /* 128 byte red zone + rbp we pushed */
shrq $4, %rax
movq %rax, LXR_RAX(%rsp)
@@ -225,10 +234,12 @@ lx_sigreturn_tolibc(uintptr_t sp)
movq LXR_R15(%rsp), %r15
movw LXR_GS(%rsp), %gs
- addq $SIZEOF_LX_REGS_T, %rsp
+ /* addq $SIZEOF_LX_REGS_T, %rsp not needed due to next instr. */
movq %rbp, %rsp
popq %rbp
+
+ addq $128, %rsp /* red zone */
ret
SET_SIZE(lx_handler)
@@ -236,7 +247,7 @@ lx_sigreturn_tolibc(uintptr_t sp)
* lx_setup_clone(uintptr_t %gs, void *retaddr, void *stack)
* ignore arg0 (%rdi) on 64-bit
* Return to Linux app using arg1 (%rsi) with the Linux stack we got
- * in arg2 (%rdx).
+ * in arg2 (%rdx).
*/
ENTRY_NP(lx_setup_clone)
xorq %rbp, %rbp /* terminating stack */
@@ -281,7 +292,7 @@ lx_sigreturn_tolibc(uintptr_t sp)
* Unlike the 32-bit case, we don't reset %rbp before jumping into the
* Linux handler, since that would mean the handler would clobber our
* data in the stack frame it builds.
- *
+ *
*/
ENTRY_NP(lx_sigdeliver)
pushq %rbp
@@ -320,7 +331,7 @@ lx_sigreturn_tolibc(uintptr_t sp)
* arg1 %rsi is ptr to converted siginfo on stack or NULL
*/
movq -16(%rbp), %rsi
- cmp $0, %rsi
+ cmp $0, %rsi
je 1f
movq 8(%rsp), %rsi
1:
@@ -352,7 +363,7 @@ lx_sigreturn_tolibc(uintptr_t sp)
* lx_sigacthandler(int sig, siginfo_t *s, void *p)
*/
ENTRY_NP(lx_sigacthandler)
- movq libc_sigacthandler@GOTPCREL(%rip), %rax
+ movq libc_sigacthandler@GOTPCREL(%rip), %rax
jmp *(%rax) /* jmp to libc's interposer */
SET_SIZE(lx_sigacthandler)
diff --git a/usr/src/uts/intel/brand/lx/lx_brand_asm.s b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
index fdd80e1ad2..897f3891fc 100644
--- a/usr/src/uts/intel/brand/lx/lx_brand_asm.s
+++ b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
@@ -116,10 +116,15 @@ SET_SIZE(lx_brand_int80_disable)
*
* %rax - syscall number
*
- * When called, all general registers, except for %r15, are as they were when
- * the user process made the system call. %r15 is available to the callback as
- * a scratch register. If the callback returns to the kernel path, %r15 does
- * not have to be restored to the user value.
+ * See uts/i86pc/ml/syscall_asm_amd64.s for what happens before we get into
+ * the following lx brand-specific codepath.
+ *
+ * As the comment on the BRAND_CALLBACK macro describes, when we're called, all
+ * general registers, except for %r15, are as they were when the user process
+ * made the system call. %r15 is available to the callback as a scratch
+ * register. If the callback returns to the kernel path, %r15 does not have to
+ * be restored to the user value since BRAND_CALLBACK does that. If we jump
+ * out to the emulation we need to restore %r15 here.
*
* To 'return' to our user-space handler, we just need to place its address
* into %rcx. The original return address is passed back in %rax.
@@ -163,18 +168,18 @@ ENTRY(lx_brand_syscall_callback)
* This block is basically similar to a large assert.
*
* In debug code we do some extra validation of the %fsbase register to
- * validate that we always have the expected Linux thread pointer and not
- * the native value. At this point we know that the lwp brand data should
- * contain the Linux %fsbase (from a Linux arch_prctl syscall) since the
- * native %fsbase check above is non-null. We also know that we are
- * making a Linux syscall from the other check above. We read the %fsbase
- * and compare to the saved Linux %fsbase in the lwp_brand data. If we
- * don't have the expected value, we save the incorrect %fsbase value
- * into the br_lx_fsbase member for later inspection and change the
- * syscall we are making into the Linux pivot_root syscall (an obscure
- * syscall which we don't support and which an app in the zone cannot
- * use). This allows us to see this error downstream via DTrace and
- * see the incorrect %fsbase value we had.
+ * validate that we always have the expected Linux thread pointer and
+ * not the native value. At this point we know that the lwp brand data
+ * should contain the Linux %fsbase (from a Linux arch_prctl syscall)
+ * since the native %fsbase check above is non-null. We also know that
+ * we are making a Linux syscall from the other check above. We read
+ * the %fsbase and compare to the saved Linux %fsbase in the lwp_brand
+ * data. If we don't have the expected value, we save the incorrect
+ * %fsbase value into the br_lx_fsbase member for later inspection and
+ * change the syscall we are making into the Linux pivot_root syscall
+ * (an obscure syscall which we don't support and which an app in the
+ * zone cannot use). This allows us to see this error downstream via
+ * DTrace and see the incorrect %fsbase value we had.
*/
GET_V(SP_REG, 0, V_LWP, %r15); /* get lwp pointer */
movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
@@ -188,7 +193,8 @@ ENTRY(lx_brand_syscall_callback)
movl $MSR_AMD_FSBASE, %ecx /* fsbase msr */
rdmsr /* get fsbase to edx:eax */
- shlq $32, %rdx /* fix %edx; %eax lo already ok */
+ /* fix %edx; %eax lo already ok */
+ shlq $32, %rdx
or %rdx, %rax /* full value in %rax */
cmp %rax, %r15 /* check if is lx fsbase */
je 4f /* match, ok */
@@ -198,7 +204,7 @@ ENTRY(lx_brand_syscall_callback)
GET_V(%rdx, 0, V_LWP, %r15); /* get lwp pointer */
movq LWP_BRAND(%r15), %r15 /* grab lx lwp data pointer */
movq %rax, BR_LX_FSBASE(%r15) /* save bad Linux fsbase */
- movq $155, %rax /* fail! use pivot_root syscall */
+ movq $155, %rax /* fail! use pivot_root */
jmp 5f
4:
@@ -236,9 +242,12 @@ ENTRY(lx_brand_syscall_callback)
movq 0x8(%rsp), %rcx
movq 0x10(%rsp), %rdx
addq $24, %rsp
-3:
- /* Linux syscall - validate syscall number */
+3:
+ /*
+ * Linux syscall - validate syscall number.
+ * If necessary, the Linux %fsbase has already been loaded above.
+ */
GET_PROCP(SP_REG, 0, %r15)
movq P_ZONE(%r15), %r15 /* grab the zone pointer */
/* grab the 'max syscall num' for this process from 'zone brand data' */
@@ -304,7 +313,7 @@ ENTRY(lx_brand_int80_callback)
cmpl %ebx, %eax /* is 0 <= syscall <= MAX? */
jbe 0f /* yes, syscall is OK */
- xorl %eax, %eax /* no, zero syscall number */
+ xorl %eax, %eax /* no, zero syscall number */
0:
.lx_brand_int80_patch_point: