diff options
Diffstat (limited to 'src/runtime/linux/amd64')
-rw-r--r-- | src/runtime/linux/amd64/defs.h | 48 | ||||
-rw-r--r-- | src/runtime/linux/amd64/rt0.s | 9 | ||||
-rw-r--r-- | src/runtime/linux/amd64/rt1.c | 486 | ||||
-rw-r--r-- | src/runtime/linux/amd64/sys.s | 201 |
4 files changed, 744 insertions, 0 deletions
diff --git a/src/runtime/linux/amd64/defs.h b/src/runtime/linux/amd64/defs.h new file mode 100644 index 000000000..e4c9ec08c --- /dev/null +++ b/src/runtime/linux/amd64/defs.h @@ -0,0 +1,48 @@ +/* + * System structs for Linux, amd64 + */ + +typedef uint64 dev_t; +typedef uint64 ino_t; +typedef uint32 mode_t; +typedef uint64 nlink_t; +typedef uint32 uid_t; +typedef uint32 gid_t; +typedef int64 off_t; +typedef int64 blksize_t; +typedef int64 blkcnt_t; +typedef int64 time_t; + +struct timespec { + time_t tv_sec; + int64 tv_nsec; +}; + +struct timeval { + time_t tv_sec; + int64 tv_usec; +}; + +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + nlink_t st_nlink; /* number of hard links */ + mode_t st_mode; /* protection */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + int32 pad0; + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for filesystem I/O */ + blkcnt_t st_blocks; /* number of blocks allocated */ + struct timespec st_atime; /* time of last access */ + struct timespec st_mtime; /* time of last modification */ + struct timespec st_ctime; /* time of last status change */ +}; + +#define O_CREAT 0100 + +// Linux-specific system calls +int64 futex(uint32*, int32, uint32, struct timespec*, uint32*, uint32); +int64 clone(int32, void*, M*, G*, void(*)(void)); + diff --git a/src/runtime/linux/amd64/rt0.s b/src/runtime/linux/amd64/rt0.s new file mode 100644 index 000000000..55be5bcee --- /dev/null +++ b/src/runtime/linux/amd64/rt0.s @@ -0,0 +1,9 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Darwin and Linux use the same linkage to main + +TEXT _rt0_amd64_linux(SB),7,$-8 + MOVQ $_rt0_amd64(SB), AX + JMP AX diff --git a/src/runtime/linux/amd64/rt1.c b/src/runtime/linux/amd64/rt1.c new file mode 100644 index 000000000..5b3e45809 --- /dev/null +++ b/src/runtime/linux/amd64/rt1.c @@ -0,0 +1,486 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "amd64_linux.h" +#include "signals_linux.h" + +/* From /usr/include/asm-x86_64/sigcontext.h */ +struct _fpstate { + uint16 cwd; + uint16 swd; + uint16 twd; /* Note this is not the same as the 32bit/x87/FSAVE twd */ + uint16 fop; + uint64 rip; + uint32 rdp; + uint32 mxcsr; + uint32 mxcsr_mask; + uint32 st_space[32]; /* 8*16 bytes for each FP-reg */ + uint32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ + uint32 reserved2[24]; +}; + +struct sigcontext { + uint64 r8; + uint64 r9; + uint64 r10; + uint64 r11; + uint64 r12; + uint64 r13; + uint64 r14; + uint64 r15; + uint64 rdi; + uint64 rsi; + uint64 rbp; + uint64 rbx; + uint64 rdx; + uint64 rax; + uint64 rcx; + uint64 rsp; + uint64 rip; + uint64 eflags; /* RFLAGS */ + uint16 cs; + uint16 gs; + uint16 fs; + uint16 __pad0; + uint64 err; + uint64 trapno; + uint64 oldmask; + uint64 cr2; + struct _fpstate *fpstate; /* zero when no FPU context */ + uint64 reserved1[8]; +}; + + +/* From /usr/include/asm-x86_64/signal.h */ +typedef struct sigaltstack { + void /*__user*/ *ss_sp; + int32 ss_flags; + uint64 ss_size; +} stack_t; + +typedef uint64 sigset_t; + + +/* From /usr/include/asm-x86_64/ucontext.h */ +struct ucontext { + uint64 uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + + +void +print_sigcontext(struct sigcontext *sc) +{ + prints("\nrax "); sys·printhex(sc->rax); + prints("\nrbx "); sys·printhex(sc->rbx); + prints("\nrcx "); sys·printhex(sc->rcx); + prints("\nrdx "); sys·printhex(sc->rdx); + prints("\nrdi "); sys·printhex(sc->rdi); + prints("\nrsi "); sys·printhex(sc->rsi); + prints("\nrbp "); sys·printhex(sc->rbp); + prints("\nrsp "); sys·printhex(sc->rsp); + prints("\nr8 "); sys·printhex(sc->r8 ); + prints("\nr9 "); sys·printhex(sc->r9 ); + prints("\nr10 "); sys·printhex(sc->r10); + prints("\nr11 "); sys·printhex(sc->r11); + prints("\nr12 "); sys·printhex(sc->r12); + prints("\nr13 "); sys·printhex(sc->r13); + prints("\nr14 "); sys·printhex(sc->r14); + prints("\nr15 "); sys·printhex(sc->r15); + prints("\nrip "); sys·printhex(sc->rip); + prints("\nrflags "); sys·printhex(sc->eflags); + prints("\ncs "); sys·printhex(sc->cs); + prints("\nfs "); sys·printhex(sc->fs); + prints("\ngs "); sys·printhex(sc->gs); + prints("\n"); +} + + +/* + * This assembler routine takes the args from registers, puts them on the stack, + * and calls sighandler(). + */ +extern void sigtramp(void); +extern void sigignore(void); // just returns +extern void sigreturn(void); // calls sigreturn + +/* + * Rudimentary reverse-engineered definition of signal interface. + * You'd think it would be documented. + */ +/* From /usr/include/bits/siginfo.h */ +struct siginfo { + int32 si_signo; /* signal number */ + int32 si_errno; /* errno association */ + int32 si_code; /* signal code */ + int32 si_status; /* exit value */ + void *si_addr; /* faulting address */ + /* more stuff here */ +}; + +// This is a struct sigaction from /usr/include/asm/signal.h +struct sigaction { + void (*sa_handler)(int32, struct siginfo*, void*); + uint64 sa_flags; + void (*sa_restorer)(void); + uint64 sa_mask; +}; + +void +sighandler(int32 sig, struct siginfo* info, void** context) +{ + if(panicking) // traceback already printed + sys_Exit(2); + + struct sigcontext *sc = &(((struct ucontext *)context)->uc_mcontext); + + if(sig < 0 || sig >= NSIG){ + prints("Signal "); + sys·printint(sig); + }else{ + prints(sigtab[sig].name); + } + + prints("\nFaulting address: "); sys·printpointer(info->si_addr); + prints("\npc: "); sys·printhex(sc->rip); + prints("\n\n"); + + if(gotraceback()){ + traceback((void *)sc->rip, (void *)sc->rsp, (void *)sc->r15); + tracebackothers((void*)sc->r15); + print_sigcontext(sc); + } + + sys·Breakpoint(); + sys_Exit(2); +} + +struct stack_t { + void *sp; + int32 flags; + int32 pad; + int64 size; +}; + +void +signalstack(byte *p, int32 n) +{ + struct stack_t st; + + st.sp = p; + st.size = n; + st.pad = 0; + st.flags = 0; + sigaltstack(&st, nil); +} + +void rt_sigaction(int64, void*, void*, uint64); + +enum { + SA_RESTART = 0x10000000, + SA_ONSTACK = 0x08000000, + SA_RESTORER = 0x04000000, + SA_SIGINFO = 0x00000004, +}; + +void +initsig(void) +{ + static struct sigaction sa; + + int32 i; + sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; + sa.sa_mask = 0xFFFFFFFFFFFFFFFFULL; + sa.sa_restorer = (void*)sigreturn; + for(i = 0; i<NSIG; i++) { + if(sigtab[i].flags) { + if(sigtab[i].flags & SigCatch) + sa.sa_handler = (void*)sigtramp; + else + sa.sa_handler = (void*)sigignore; + if(sigtab[i].flags & SigRestart) + sa.sa_flags |= SA_RESTART; + else + sa.sa_flags &= ~SA_RESTART; + rt_sigaction(i, &sa, nil, 8); + } + } +} + + +// Linux futex. +// +// futexsleep(uint32 *addr, uint32 val) +// futexwakeup(uint32 *addr) +// +// Futexsleep atomically checks if *addr == val and if so, sleeps on addr. +// Futexwakeup wakes up one thread sleeping on addr. +// Futexsleep is allowed to wake up spuriously. + +enum +{ + FUTEX_WAIT = 0, + FUTEX_WAKE = 1, + + EINTR = 4, + EAGAIN = 11, +}; + +// TODO(rsc) I tried using 1<<40 here but futex woke up (-ETIMEDOUT). +// I wonder if the timespec that gets to the kernel +// actually has two 32-bit numbers in it, so tha +// a 64-bit 1<<40 ends up being 0 seconds, +// 1<<8 nanoseconds. +static struct timespec longtime = +{ + 1<<30, // 34 years + 0 +}; + +// Atomically, +// if(*addr == val) sleep +// Might be woken up spuriously; that's allowed. +static void +futexsleep(uint32 *addr, uint32 val) +{ + int64 ret; + + ret = futex(addr, FUTEX_WAIT, val, &longtime, nil, 0); + if(ret >= 0 || ret == -EAGAIN || ret == -EINTR) + return; + + prints("futexsleep addr="); + sys·printpointer(addr); + prints(" val="); + sys·printint(val); + prints(" returned "); + sys·printint(ret); + prints("\n"); + *(int32*)0x1005 = 0x1005; +} + +// If any procs are sleeping on addr, wake up at least one. +static void +futexwakeup(uint32 *addr) +{ + int64 ret; + + ret = futex(addr, FUTEX_WAKE, 1, nil, nil, 0); + + if(ret >= 0) + return; + + // I don't know that futex wakeup can return + // EAGAIN or EINTR, but if it does, it would be + // safe to loop and call futex again. + + prints("futexwakeup addr="); + sys·printpointer(addr); + prints(" returned "); + sys·printint(ret); + prints("\n"); + *(int32*)0x1006 = 0x1006; +} + + +// Lock and unlock. +// +// The lock state is a single 32-bit word that holds +// a 31-bit count of threads waiting for the lock +// and a single bit (the low bit) saying whether the lock is held. +// The uncontended case runs entirely in user space. +// When contention is detected, we defer to the kernel (futex). +// +// A reminder: compare-and-swap cas(addr, old, new) does +// if(*addr == old) { *addr = new; return 1; } +// else return 0; +// but atomically. + +static void +futexlock(Lock *l) +{ + uint32 v; + +again: + v = l->key; + if((v&1) == 0){ + if(cas(&l->key, v, v|1)){ + // Lock wasn't held; we grabbed it. + return; + } + goto again; + } + + // Lock was held; try to add ourselves to the waiter count. + if(!cas(&l->key, v, v+2)) + goto again; + + // We're accounted for, now sleep in the kernel. + // + // We avoid the obvious lock/unlock race because + // the kernel won't put us to sleep if l->key has + // changed underfoot and is no longer v+2. + // + // We only really care that (v&1) == 1 (the lock is held), + // and in fact there is a futex variant that could + // accomodate that check, but let's not get carried away.) + futexsleep(&l->key, v+2); + + // We're awake: remove ourselves from the count. + for(;;){ + v = l->key; + if(v < 2) + throw("bad lock key"); + if(cas(&l->key, v, v-2)) + break; + } + + // Try for the lock again. + goto again; +} + +static void +futexunlock(Lock *l) +{ + uint32 v; + + // Atomically get value and clear lock bit. +again: + v = l->key; + if((v&1) == 0) + throw("unlock of unlocked lock"); + if(!cas(&l->key, v, v&~1)) + goto again; + + // If there were waiters, wake one. + if(v & ~1) + futexwakeup(&l->key); +} + +void +lock(Lock *l) +{ + if(m->locks < 0) + throw("lock count"); + m->locks++; + futexlock(l); +} + +void +unlock(Lock *l) +{ + m->locks--; + if(m->locks < 0) + throw("lock count"); + futexunlock(l); +} + + +// One-time notifications. +// +// Since the lock/unlock implementation already +// takes care of sleeping in the kernel, we just reuse it. +// (But it's a weird use, so it gets its own interface.) +// +// We use a lock to represent the event: +// unlocked == event has happened. +// Thus the lock starts out locked, and to wait for the +// event you try to lock the lock. To signal the event, +// you unlock the lock. + +void +noteclear(Note *n) +{ + n->lock.key = 0; // memset(n, 0, sizeof *n) + futexlock(&n->lock); +} + +void +notewakeup(Note *n) +{ + futexunlock(&n->lock); +} + +void +notesleep(Note *n) +{ + futexlock(&n->lock); + futexunlock(&n->lock); // Let other sleepers find out too. +} + + +// Clone, the Linux rfork. +enum +{ + CLONE_VM = 0x100, + CLONE_FS = 0x200, + CLONE_FILES = 0x400, + CLONE_SIGHAND = 0x800, + CLONE_PTRACE = 0x2000, + CLONE_VFORK = 0x4000, + CLONE_PARENT = 0x8000, + CLONE_THREAD = 0x10000, + CLONE_NEWNS = 0x20000, + CLONE_SYSVSEM = 0x40000, + CLONE_SETTLS = 0x80000, + CLONE_PARENT_SETTID = 0x100000, + CLONE_CHILD_CLEARTID = 0x200000, + CLONE_UNTRACED = 0x800000, + CLONE_CHILD_SETTID = 0x1000000, + CLONE_STOPPED = 0x2000000, + CLONE_NEWUTS = 0x4000000, + CLONE_NEWIPC = 0x8000000, +}; + +void +newosproc(M *m, G *g, void *stk, void (*fn)(void)) +{ + int64 ret; + int32 flags; + + flags = CLONE_PARENT /* getppid doesn't change in child */ + | CLONE_VM /* share memory */ + | CLONE_FS /* share cwd, etc */ + | CLONE_FILES /* share fd table */ + | CLONE_SIGHAND /* share sig handler table */ + | CLONE_PTRACE /* revisit - okay for now */ + | CLONE_THREAD /* revisit - okay for now */ + ; + + if(0){ + prints("newosproc stk="); + sys·printpointer(stk); + prints(" m="); + sys·printpointer(m); + prints(" g="); + sys·printpointer(g); + prints(" fn="); + sys·printpointer(fn); + prints(" clone="); + sys·printpointer(clone); + prints("\n"); + } + + ret = clone(flags, stk, m, g, fn); + if(ret < 0) + *(int32*)123 = 123; +} + +void +osinit(void) +{ +} + +// Called to initialize a new m (including the bootstrap m). +void +minit(void) +{ + // Initialize signal handling. + m->gsignal = malg(32*1024); // OS X wants >=8K, Linux >=2K + signalstack(m->gsignal->stackguard, 32*1024); +} diff --git a/src/runtime/linux/amd64/sys.s b/src/runtime/linux/amd64/sys.s new file mode 100644 index 000000000..49349bb48 --- /dev/null +++ b/src/runtime/linux/amd64/sys.s @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +// System calls and other sys.stuff for AMD64, Linux +// + +TEXT sys·Exit(SB),7,$0-8 + MOVL 8(SP), DI + MOVL $231, AX // exitgroup - force all os threads to exi + SYSCALL + RET + +TEXT exit1(SB),7,$0-8 + MOVL 8(SP), DI + MOVL $60, AX // exit - exit the current os thread + SYSCALL + RET + +TEXT open(SB),7,$0-16 + MOVQ 8(SP), DI + MOVL 16(SP), SI + MOVL 20(SP), DX + MOVL $2, AX // syscall entry + SYSCALL + RET + +TEXT close(SB),7,$0-8 + MOVL 8(SP), DI + MOVL $3, AX // syscall entry + SYSCALL + RET + +TEXT fstat(SB),7,$0-16 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL $5, AX // syscall entry + SYSCALL + RET + +TEXT read(SB),7,$0-24 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL 24(SP), DX + MOVL $0, AX // syscall entry + SYSCALL + RET + +TEXT write(SB),7,$0-24 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL 24(SP), DX + MOVL $1, AX // syscall entry + SYSCALL + RET + +TEXT sys·write(SB),7,$0-24 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL 24(SP), DX + MOVL $1, AX // syscall entry + SYSCALL + RET + +TEXT rt_sigaction(SB),7,$0-32 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVQ 24(SP), DX + MOVQ 32(SP), R10 + MOVL $13, AX // syscall entry + SYSCALL + RET + +TEXT sigtramp(SB),7,$24-16 + MOVQ 32(R14), R15 // g = m->gsignal + MOVQ DI,0(SP) + MOVQ SI,8(SP) + MOVQ DX,16(SP) + CALL sighandler(SB) + RET + +TEXT sigignore(SB),7,$0 + RET + +TEXT sigreturn(SB),7,$0 + MOVL $15, AX // rt_sigreturn + SYSCALL + INT $3 // not reached + +TEXT sys·mmap(SB),7,$0-32 + MOVQ 8(SP), DI + MOVQ $0, SI + MOVL 16(SP), SI + MOVL 20(SP), DX + MOVL 24(SP), R10 + MOVL 28(SP), R8 + MOVL 32(SP), R9 + +/* flags arg for ANON is 1000 but sb 20 */ + MOVL CX, AX + ANDL $~0x1000, CX + ANDL $0x1000, AX + SHRL $7, AX + ORL AX, CX + + MOVL CX, R10 + MOVL $9, AX // syscall entry + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS 2(PC) + CALL notok(SB) + RET + +TEXT notok(SB),7,$0 + MOVQ $0xf1, BP + MOVQ BP, (BP) + RET + +TEXT sys·memclr(SB),7,$0-16 + MOVQ 8(SP), DI // arg 1 addr + MOVL 16(SP), CX // arg 2 count (cannot be zero) + ADDL $7, CX + SHRL $3, CX + MOVQ $0, AX + CLD + REP + STOSQ + RET + +TEXT sys·getcallerpc+0(SB),7,$0 + MOVQ x+0(FP),AX // addr of first arg + MOVQ -8(AX),AX // get calling pc + RET + +TEXT sys·setcallerpc+0(SB),7,$0 + MOVQ x+0(FP),AX // addr of first arg + MOVQ x+8(FP), BX + MOVQ BX, -8(AX) // set calling pc + RET + +// int64 futex(int32 *uaddr, int32 op, int32 val, +// struct timespec *timeout, int32 *uaddr2, int32 val2); +TEXT futex(SB),7,$0 + MOVQ 8(SP), DI + MOVL 16(SP), SI + MOVL 20(SP), DX + MOVQ 24(SP), R10 + MOVQ 32(SP), R8 + MOVL 40(SP), R9 + MOVL $202, AX + SYSCALL + RET + +// int64 clone(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); +TEXT clone(SB),7,$0 + MOVL flags+8(SP), DI + MOVQ stack+16(SP), SI + + // Copy m, g, fn off parent stack for use by child. + // Careful: Linux system call clobbers CX and R11. + MOVQ m+24(SP), R8 + MOVQ g+32(SP), R9 + MOVQ fn+40(SP), R12 + + MOVL $56, AX + SYSCALL + + // In parent, return. + CMPQ AX, $0 + JEQ 2(PC) + RET + + // In child, set up new stack + MOVQ SI, SP + MOVQ R8, R14 // m + MOVQ R9, R15 // g + + // Initialize m->procid to Linux tid + MOVL $186, AX // gettid + SYSCALL + MOVQ AX, 24(R14) + + // Call fn + CALL R12 + + // It shouldn't return. If it does, exi + MOVL $111, DI + MOVL $60, AX + SYSCALL + JMP -3(PC) // keep exiting + +TEXT sigaltstack(SB),7,$-8 + MOVQ new+8(SP), DI + MOVQ old+16(SP), SI + MOVQ $131, AX + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS 2(PC) + CALL notok(SB) + RET |