summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/amd64')
-rw-r--r--src/pkg/runtime/amd64/arch.h3
-rw-r--r--src/pkg/runtime/amd64/asm.s577
-rw-r--r--src/pkg/runtime/amd64/atomic.c19
-rw-r--r--src/pkg/runtime/amd64/closure.c123
-rw-r--r--src/pkg/runtime/amd64/memmove.s88
-rw-r--r--src/pkg/runtime/amd64/traceback.c295
6 files changed, 1105 insertions, 0 deletions
diff --git a/src/pkg/runtime/amd64/arch.h b/src/pkg/runtime/amd64/arch.h
new file mode 100644
index 000000000..fe10fd89f
--- /dev/null
+++ b/src/pkg/runtime/amd64/arch.h
@@ -0,0 +1,3 @@
+enum {
+ thechar = '6'
+};
diff --git a/src/pkg/runtime/amd64/asm.s b/src/pkg/runtime/amd64/asm.s
new file mode 100644
index 000000000..3e3818c10
--- /dev/null
+++ b/src/pkg/runtime/amd64/asm.s
@@ -0,0 +1,577 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "amd64/asm.h"
+
+TEXT _rt0_amd64(SB),7,$-8
+ // copy arguments forward on an even stack
+ MOVQ 0(DI), AX // argc
+ LEAQ 8(DI), BX // argv
+ SUBQ $(4*8+7), SP // 2args 2auto
+ ANDQ $~15, SP
+ MOVQ AX, 16(SP)
+ MOVQ BX, 24(SP)
+
+ // if there is an initcgo, call it.
+ MOVQ initcgo(SB), AX
+ TESTQ AX, AX
+ JZ needtls
+ CALL AX
+ CMPL runtime·iswindows(SB), $0
+ JEQ ok
+
+needtls:
+ LEAQ runtime·tls0(SB), DI
+ CALL runtime·settls(SB)
+
+ // store through it, to make sure it works
+ get_tls(BX)
+ MOVQ $0x123, g(BX)
+ MOVQ runtime·tls0(SB), AX
+ CMPQ AX, $0x123
+ JEQ 2(PC)
+ MOVL AX, 0 // abort
+ok:
+ // set the per-goroutine and per-mach "registers"
+ get_tls(BX)
+ LEAQ runtime·g0(SB), CX
+ MOVQ CX, g(BX)
+ LEAQ runtime·m0(SB), AX
+ MOVQ AX, m(BX)
+
+ // save m->g0 = g0
+ MOVQ CX, m_g0(AX)
+
+ // create istack out of the given (operating system) stack
+ LEAQ (-8192+104)(SP), AX
+ MOVQ AX, g_stackguard(CX)
+ MOVQ SP, g_stackbase(CX)
+
+ CLD // convention is D is always left cleared
+ CALL runtime·check(SB)
+
+ MOVL 16(SP), AX // copy argc
+ MOVL AX, 0(SP)
+ MOVQ 24(SP), AX // copy argv
+ MOVQ AX, 8(SP)
+ CALL runtime·args(SB)
+ CALL runtime·osinit(SB)
+ CALL runtime·schedinit(SB)
+
+ // create a new goroutine to start program
+ PUSHQ $runtime·mainstart(SB) // entry
+ PUSHQ $0 // arg size
+ CALL runtime·newproc(SB)
+ POPQ AX
+ POPQ AX
+
+ // start this M
+ CALL runtime·mstart(SB)
+
+ CALL runtime·notok(SB) // never returns
+ RET
+
+TEXT runtime·mainstart(SB),7,$0
+ CALL main·init(SB)
+ CALL runtime·initdone(SB)
+ CALL main·main(SB)
+ PUSHQ $0
+ CALL runtime·exit(SB)
+ POPQ AX
+ CALL runtime·notok(SB)
+ RET
+
+TEXT runtime·breakpoint(SB),7,$0
+ BYTE $0xcc
+ RET
+
+/*
+ * go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), 7, $0
+ MOVQ 8(SP), AX // gobuf
+ LEAQ 8(SP), BX // caller's SP
+ MOVQ BX, gobuf_sp(AX)
+ MOVQ 0(SP), BX // caller's PC
+ MOVQ BX, gobuf_pc(AX)
+ get_tls(CX)
+ MOVQ g(CX), BX
+ MOVQ BX, gobuf_g(AX)
+ RET
+
+// void gogo(Gobuf*, uintptr)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), 7, $0
+ MOVQ 16(SP), AX // return 2nd arg
+ MOVQ 8(SP), BX // gobuf
+ MOVQ gobuf_g(BX), DX
+ MOVQ 0(DX), CX // make sure g != nil
+ get_tls(CX)
+ MOVQ DX, g(CX)
+ MOVQ gobuf_sp(BX), SP // restore SP
+ MOVQ gobuf_pc(BX), BX
+ JMP BX
+
+// void gogocall(Gobuf*, void (*fn)(void))
+// restore state from Gobuf but then call fn.
+// (call fn, returning to state in Gobuf)
+TEXT runtime·gogocall(SB), 7, $0
+ MOVQ 16(SP), AX // fn
+ MOVQ 8(SP), BX // gobuf
+ MOVQ gobuf_g(BX), DX
+ get_tls(CX)
+ MOVQ DX, g(CX)
+ MOVQ 0(DX), CX // make sure g != nil
+ MOVQ gobuf_sp(BX), SP // restore SP
+ MOVQ gobuf_pc(BX), BX
+ PUSHQ BX
+ JMP AX
+ POPQ BX // not reached
+
+// void mcall(void (*fn)(G*))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return. It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), 7, $0
+ MOVQ fn+0(FP), DI
+
+ get_tls(CX)
+ MOVQ g(CX), AX // save state in g->gobuf
+ MOVQ 0(SP), BX // caller's PC
+ MOVQ BX, (g_sched+gobuf_pc)(AX)
+ LEAQ 8(SP), BX // caller's SP
+ MOVQ BX, (g_sched+gobuf_sp)(AX)
+ MOVQ AX, (g_sched+gobuf_g)(AX)
+
+ // switch to m->g0 & its stack, call fn
+ MOVQ m(CX), BX
+ MOVQ m_g0(BX), SI
+ CMPQ SI, AX // if g == m->g0 call badmcall
+ JNE 2(PC)
+ CALL runtime·badmcall(SB)
+ MOVQ SI, g(CX) // g = m->g0
+ MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->gobuf.sp
+ PUSHQ AX
+ CALL DI
+ POPQ AX
+ CALL runtime·badmcall2(SB)
+ RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already done get_tls(CX); MOVQ m(CX), BX.
+TEXT runtime·morestack(SB),7,$0
+ // Cannot grow scheduler stack (m->g0).
+ MOVQ m_g0(BX), SI
+ CMPQ g(CX), SI
+ JNE 2(PC)
+ INT $3
+
+ // Called from f.
+ // Set m->morebuf to f's caller.
+ MOVQ 8(SP), AX // f's caller's PC
+ MOVQ AX, (m_morebuf+gobuf_pc)(BX)
+ LEAQ 16(SP), AX // f's caller's SP
+ MOVQ AX, (m_morebuf+gobuf_sp)(BX)
+ MOVQ AX, m_moreargp(BX)
+ get_tls(CX)
+ MOVQ g(CX), SI
+ MOVQ SI, (m_morebuf+gobuf_g)(BX)
+
+ // Set m->morepc to f's PC.
+ MOVQ 0(SP), AX
+ MOVQ AX, m_morepc(BX)
+
+ // Call newstack on m->g0's stack.
+ MOVQ m_g0(BX), BP
+ MOVQ BP, g(CX)
+ MOVQ (g_sched+gobuf_sp)(BP), SP
+ CALL runtime·newstack(SB)
+ MOVQ $0, 0x1003 // crash if newstack returns
+ RET
+
+// Called from reflection library. Mimics morestack,
+// reuses stack growth code to create a frame
+// with the desired args running the desired function.
+//
+// func call(fn *byte, arg *byte, argsize uint32).
+TEXT reflect·call(SB), 7, $0
+ get_tls(CX)
+ MOVQ m(CX), BX
+
+ // Save our caller's state as the PC and SP to
+ // restore when returning from f.
+ MOVQ 0(SP), AX // our caller's PC
+ MOVQ AX, (m_morebuf+gobuf_pc)(BX)
+ LEAQ 8(SP), AX // our caller's SP
+ MOVQ AX, (m_morebuf+gobuf_sp)(BX)
+ MOVQ g(CX), AX
+ MOVQ AX, (m_morebuf+gobuf_g)(BX)
+
+ // Set up morestack arguments to call f on a new stack.
+ // We set f's frame size to 1, as a hint to newstack
+ // that this is a call from reflect·call.
+ // If it turns out that f needs a larger frame than
+ // the default stack, f's usual stack growth prolog will
+ // allocate a new segment (and recopy the arguments).
+ MOVQ 8(SP), AX // fn
+ MOVQ 16(SP), DX // arg frame
+ MOVL 24(SP), CX // arg size
+
+ MOVQ AX, m_morepc(BX) // f's PC
+ MOVQ DX, m_moreargp(BX) // argument frame pointer
+ MOVL CX, m_moreargsize(BX) // f's argument size
+ MOVL $1, m_moreframesize(BX) // f's frame size
+
+ // Call newstack on m->g0's stack.
+ MOVQ m_g0(BX), BP
+ get_tls(CX)
+ MOVQ BP, g(CX)
+ MOVQ (g_sched+gobuf_sp)(BP), SP
+ CALL runtime·newstack(SB)
+ MOVQ $0, 0x1103 // crash if newstack returns
+ RET
+
+// Return point when leaving stack.
+TEXT runtime·lessstack(SB), 7, $0
+ // Save return value in m->cret
+ get_tls(CX)
+ MOVQ m(CX), BX
+ MOVQ AX, m_cret(BX)
+
+ // Call oldstack on m->g0's stack.
+ MOVQ m_g0(BX), BP
+ MOVQ BP, g(CX)
+ MOVQ (g_sched+gobuf_sp)(BP), SP
+ CALL runtime·oldstack(SB)
+ MOVQ $0, 0x1004 // crash if oldstack returns
+ RET
+
+// morestack trampolines
+TEXT runtime·morestack00(SB),7,$0
+ get_tls(CX)
+ MOVQ m(CX), BX
+ MOVQ $0, AX
+ MOVQ AX, m_moreframesize(BX)
+ MOVQ $runtime·morestack(SB), AX
+ JMP AX
+
+TEXT runtime·morestack01(SB),7,$0
+ get_tls(CX)
+ MOVQ m(CX), BX
+ SHLQ $32, AX
+ MOVQ AX, m_moreframesize(BX)
+ MOVQ $runtime·morestack(SB), AX
+ JMP AX
+
+TEXT runtime·morestack10(SB),7,$0
+ get_tls(CX)
+ MOVQ m(CX), BX
+ MOVLQZX AX, AX
+ MOVQ AX, m_moreframesize(BX)
+ MOVQ $runtime·morestack(SB), AX
+ JMP AX
+
+TEXT runtime·morestack11(SB),7,$0
+ get_tls(CX)
+ MOVQ m(CX), BX
+ MOVQ AX, m_moreframesize(BX)
+ MOVQ $runtime·morestack(SB), AX
+ JMP AX
+
+// subcases of morestack01
+// with const of 8,16,...48
+TEXT runtime·morestack8(SB),7,$0
+ PUSHQ $1
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT runtime·morestack16(SB),7,$0
+ PUSHQ $2
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT runtime·morestack24(SB),7,$0
+ PUSHQ $3
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT runtime·morestack32(SB),7,$0
+ PUSHQ $4
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT runtime·morestack40(SB),7,$0
+ PUSHQ $5
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT runtime·morestack48(SB),7,$0
+ PUSHQ $6
+ MOVQ $morestack<>(SB), AX
+ JMP AX
+
+TEXT morestack<>(SB),7,$0
+ get_tls(CX)
+ MOVQ m(CX), BX
+ POPQ AX
+ SHLQ $35, AX
+ MOVQ AX, m_moreframesize(BX)
+ MOVQ $runtime·morestack(SB), AX
+ JMP AX
+
+// bool cas(int32 *val, int32 old, int32 new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT runtime·cas(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ MOVL 20(SP), CX
+ LOCK
+ CMPXCHGL CX, 0(BX)
+ JZ 3(PC)
+ MOVL $0, AX
+ RET
+ MOVL $1, AX
+ RET
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+// if(*val == old){
+// *val = new;
+// return 1;
+// } else
+// return 0;
+TEXT runtime·casp(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), AX
+ MOVQ 24(SP), CX
+ LOCK
+ CMPXCHGQ CX, 0(BX)
+ JZ 3(PC)
+ MOVL $0, AX
+ RET
+ MOVL $1, AX
+ RET
+
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT runtime·xadd(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ RET
+
+TEXT runtime·xchg(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT runtime·procyield(SB),7,$0
+ MOVL 8(SP), AX
+again:
+ PAUSE
+ SUBL $1, AX
+ JNZ again
+ RET
+
+TEXT runtime·atomicstorep(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), AX
+ XCHGQ AX, 0(BX)
+ RET
+
+TEXT runtime·atomicstore(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. pop the caller
+// 2. sub 5 bytes from the callers return
+// 3. jmp to the argument
+TEXT runtime·jmpdefer(SB), 7, $0
+ MOVQ 8(SP), AX // fn
+ MOVQ 16(SP), BX // caller sp
+ LEAQ -8(BX), SP // caller sp after CALL
+ SUBQ $5, (SP) // return to CALL again
+ JMP AX // but first run the deferred function
+
+// Dummy function to use in saved gobuf.PC,
+// to match SP pointing at a return address.
+// The gobuf.PC is unused by the contortions here
+// but setting it to return will make the traceback code work.
+TEXT return<>(SB),7,$0
+ RET
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.c for more details.
+TEXT runtime·asmcgocall(SB),7,$0
+ MOVQ fn+0(FP), AX
+ MOVQ arg+8(FP), BX
+ MOVQ SP, DX
+
+ // Figure out if we need to switch to m->g0 stack.
+ // We get called to create new OS threads too, and those
+ // come in on the m->g0 stack already.
+ get_tls(CX)
+ MOVQ m(CX), BP
+ MOVQ m_g0(BP), SI
+ MOVQ g(CX), DI
+ CMPQ SI, DI
+ JEQ 6(PC)
+ MOVQ SP, (g_sched+gobuf_sp)(DI)
+ MOVQ $return<>(SB), (g_sched+gobuf_pc)(DI)
+ MOVQ DI, (g_sched+gobuf_g)(DI)
+ MOVQ SI, g(CX)
+ MOVQ (g_sched+gobuf_sp)(SI), SP
+
+ // Now on a scheduling stack (a pthread-created stack).
+ SUBQ $32, SP
+ ANDQ $~15, SP // alignment for gcc ABI
+ MOVQ DI, 16(SP) // save g
+ MOVQ DX, 8(SP) // save SP
+ MOVQ BX, DI // DI = first argument in AMD64 ABI
+ MOVQ BX, CX // CX = first argument in Win64
+ CALL AX
+
+ // Restore registers, g, stack pointer.
+ get_tls(CX)
+ MOVQ 16(SP), DI
+ MOVQ DI, g(CX)
+ MOVQ 8(SP), SP
+ RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// See cgocall.c for more details.
+TEXT runtime·cgocallback(SB),7,$24
+ MOVQ fn+0(FP), AX
+ MOVQ frame+8(FP), BX
+ MOVQ framesize+16(FP), DX
+
+ // Save current m->g0->sched.sp on stack and then set it to SP.
+ get_tls(CX)
+ MOVQ m(CX), BP
+ MOVQ m_g0(BP), SI
+ PUSHQ (g_sched+gobuf_sp)(SI)
+ MOVQ SP, (g_sched+gobuf_sp)(SI)
+
+ // Switch to m->curg stack and call runtime.cgocallback
+ // with the three arguments. Because we are taking over
+ // the execution of m->curg but *not* resuming what had
+ // been running, we need to save that information (m->curg->gobuf)
+ // so that we can restore it when we're done.
+ // We can restore m->curg->gobuf.sp easily, because calling
+ // runtime.cgocallback leaves SP unchanged upon return.
+ // To save m->curg->gobuf.pc, we push it onto the stack.
+ // This has the added benefit that it looks to the traceback
+ // routine like cgocallback is going to return to that
+ // PC (because we defined cgocallback to have
+ // a frame size of 24, the same amount that we use below),
+ // so that the traceback will seamlessly trace back into
+ // the earlier calls.
+ MOVQ m_curg(BP), SI
+ MOVQ SI, g(CX)
+ MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
+
+ // Push gobuf.pc
+ MOVQ (g_sched+gobuf_pc)(SI), BP
+ SUBQ $8, DI
+ MOVQ BP, 0(DI)
+
+ // Push arguments to cgocallbackg.
+ // Frame size here must match the frame size above
+ // to trick traceback routines into doing the right thing.
+ SUBQ $24, DI
+ MOVQ AX, 0(DI)
+ MOVQ BX, 8(DI)
+ MOVQ DX, 16(DI)
+
+ // Switch stack and make the call.
+ MOVQ DI, SP
+ CALL runtime·cgocallbackg(SB)
+
+ // Restore g->gobuf (== m->curg->gobuf) from saved values.
+ get_tls(CX)
+ MOVQ g(CX), SI
+ MOVQ 24(SP), BP
+ MOVQ BP, (g_sched+gobuf_pc)(SI)
+ LEAQ (24+8)(SP), DI
+ MOVQ DI, (g_sched+gobuf_sp)(SI)
+
+ // Switch back to m->g0's stack and restore m->g0->sched.sp.
+ // (Unlike m->curg, the g0 goroutine never uses sched.pc,
+ // so we do not have to restore it.)
+ MOVQ m(CX), BP
+ MOVQ m_g0(BP), SI
+ MOVQ SI, g(CX)
+ MOVQ (g_sched+gobuf_sp)(SI), SP
+ POPQ (g_sched+gobuf_sp)(SI)
+
+ // Done!
+ RET
+
+// check that SP is in range [g->stackbase, g->stackguard)
+TEXT runtime·stackcheck(SB), 7, $0
+ get_tls(CX)
+ MOVQ g(CX), AX
+ CMPQ g_stackbase(AX), SP
+ JHI 2(PC)
+ INT $3
+ CMPQ SP, g_stackguard(AX)
+ JHI 2(PC)
+ INT $3
+ RET
+
+TEXT runtime·memclr(SB),7,$0
+ MOVQ 8(SP), DI // arg 1 addr
+ MOVQ 16(SP), CX // arg 2 count
+ MOVQ CX, BX
+ ANDQ $7, BX
+ SHRQ $3, CX
+ MOVQ $0, AX
+ CLD
+ REP
+ STOSQ
+ MOVQ BX, CX
+ REP
+ STOSB
+ RET
+
+TEXT runtime·getcallerpc(SB),7,$0
+ MOVQ x+0(FP),AX // addr of first arg
+ MOVQ -8(AX),AX // get calling pc
+ RET
+
+TEXT runtime·setcallerpc(SB),7,$0
+ MOVQ x+0(FP),AX // addr of first arg
+ MOVQ x+8(FP), BX
+ MOVQ BX, -8(AX) // set calling pc
+ RET
+
+TEXT runtime·getcallersp(SB),7,$0
+ MOVQ sp+0(FP), AX
+ RET
+
+GLOBL runtime·tls0(SB), $64
diff --git a/src/pkg/runtime/amd64/atomic.c b/src/pkg/runtime/amd64/atomic.c
new file mode 100644
index 000000000..a4f2a114f
--- /dev/null
+++ b/src/pkg/runtime/amd64/atomic.c
@@ -0,0 +1,19 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#pragma textflag 7
+uint32
+runtime·atomicload(uint32 volatile* addr)
+{
+ return *addr;
+}
+
+#pragma textflag 7
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+ return *addr;
+}
diff --git a/src/pkg/runtime/amd64/closure.c b/src/pkg/runtime/amd64/closure.c
new file mode 100644
index 000000000..481b4a888
--- /dev/null
+++ b/src/pkg/runtime/amd64/closure.c
@@ -0,0 +1,123 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#pragma textflag 7
+// func closure(siz int32,
+// fn func(arg0, arg1, arg2 *ptr, callerpc uintptr, xxx) yyy,
+// arg0, arg1, arg2 *ptr) (func(xxx) yyy)
+void
+runtime·closure(int32 siz, byte *fn, byte *arg0)
+{
+ byte *p, *q, **ret;
+ int32 i, n;
+ int64 pcrel;
+
+ if(siz < 0 || siz%8 != 0)
+ runtime·throw("bad closure size");
+
+ ret = (byte**)((byte*)&arg0 + siz);
+
+ if(siz > 100) {
+ // TODO(rsc): implement stack growth preamble?
+ runtime·throw("closure too big");
+ }
+
+ // compute size of new fn.
+ // must match code laid out below.
+ n = 7+10+3; // SUBQ MOVQ MOVQ
+ if(siz <= 4*8)
+ n += 2*siz/8; // MOVSQ MOVSQ...
+ else
+ n += 7+3; // MOVQ REP MOVSQ
+ n += 12; // CALL worst case; sometimes only 5
+ n += 7+1; // ADDQ RET
+
+ // store args aligned after code, so gc can find them.
+ n += siz;
+ if(n%8)
+ n += 8 - n%8;
+
+ p = runtime·mal(n);
+ *ret = p;
+ q = p + n - siz;
+
+ if(siz > 0) {
+ runtime·memmove(q, (byte*)&arg0, siz);
+
+ // SUBQ $siz, SP
+ *p++ = 0x48;
+ *p++ = 0x81;
+ *p++ = 0xec;
+ *(uint32*)p = siz;
+ p += 4;
+
+ // MOVQ $q, SI
+ *p++ = 0x48;
+ *p++ = 0xbe;
+ *(byte**)p = q;
+ p += 8;
+
+ // MOVQ SP, DI
+ *p++ = 0x48;
+ *p++ = 0x89;
+ *p++ = 0xe7;
+
+ if(siz <= 4*8) {
+ for(i=0; i<siz; i+=8) {
+ // MOVSQ
+ *p++ = 0x48;
+ *p++ = 0xa5;
+ }
+ } else {
+ // MOVQ $(siz/8), CX [32-bit immediate siz/8]
+ *p++ = 0x48;
+ *p++ = 0xc7;
+ *p++ = 0xc1;
+ *(uint32*)p = siz/8;
+ p += 4;
+
+ // REP; MOVSQ
+ *p++ = 0xf3;
+ *p++ = 0x48;
+ *p++ = 0xa5;
+ }
+ }
+
+ // call fn
+ pcrel = fn - (p+5);
+ if((int32)pcrel == pcrel) {
+ // can use direct call with pc-relative offset
+ // CALL fn
+ *p++ = 0xe8;
+ *(int32*)p = pcrel;
+ p += 4;
+ } else {
+ // MOVQ $fn, CX [64-bit immediate fn]
+ *p++ = 0x48;
+ *p++ = 0xb9;
+ *(byte**)p = fn;
+ p += 8;
+
+ // CALL *CX
+ *p++ = 0xff;
+ *p++ = 0xd1;
+ }
+
+ // ADDQ $siz, SP
+ *p++ = 0x48;
+ *p++ = 0x81;
+ *p++ = 0xc4;
+ *(uint32*)p = siz;
+ p += 4;
+
+ // RET
+ *p++ = 0xc3;
+
+ if(p > q)
+ runtime·throw("bad math in sys.closure");
+}
+
+
diff --git a/src/pkg/runtime/amd64/memmove.s b/src/pkg/runtime/amd64/memmove.s
new file mode 100644
index 000000000..e78be8145
--- /dev/null
+++ b/src/pkg/runtime/amd64/memmove.s
@@ -0,0 +1,88 @@
+// Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
+// Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+TEXT runtime·memmove(SB), 7, $0
+
+ MOVQ to+0(FP), DI
+ MOVQ fr+8(FP), SI
+ MOVLQSX n+16(FP), BX
+
+/*
+ * check and set for backwards
+ */
+ CMPQ SI, DI
+ JLS back
+
+/*
+ * forward copy loop
+ */
+forward:
+ MOVQ BX, CX
+ SHRQ $3, CX
+ ANDQ $7, BX
+
+ REP; MOVSQ
+ MOVQ BX, CX
+ REP; MOVSB
+
+ MOVQ to+0(FP),AX
+ RET
+back:
+/*
+ * check overlap
+ */
+ MOVQ SI, CX
+ ADDQ BX, CX
+ CMPQ CX, DI
+ JLS forward
+
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+ ADDQ BX, DI
+ ADDQ BX, SI
+ STD
+
+/*
+ * copy
+ */
+ MOVQ BX, CX
+ SHRQ $3, CX
+ ANDQ $7, BX
+
+ SUBQ $8, DI
+ SUBQ $8, SI
+ REP; MOVSQ
+
+ ADDQ $7, DI
+ ADDQ $7, SI
+ MOVQ BX, CX
+ REP; MOVSB
+
+ CLD
+ MOVQ to+0(FP),AX
+ RET
+
diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c
new file mode 100644
index 000000000..3e85d36bd
--- /dev/null
+++ b/src/pkg/runtime/amd64/traceback.c
@@ -0,0 +1,295 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "malloc.h"
+
+static uintptr isclosureentry(uintptr);
+void runtime·deferproc(void);
+void runtime·newproc(void);
+void runtime·newstack(void);
+void runtime·morestack(void);
+void runtime·sigpanic(void);
+
+// This code is also used for the 386 tracebacks.
+// Use uintptr for an appropriate word-sized integer.
+
+// Generic traceback. Handles runtime stack prints (pcbuf == nil)
+// as well as the runtime.Callers function (pcbuf != nil).
+// A little clunky to merge the two but avoids duplicating
+// the code and all its subtlety.
+int32
+runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max)
+{
+ byte *p;
+ int32 i, n, iter, sawnewstack;
+ uintptr pc, lr, tracepc;
+ byte *fp;
+ Stktop *stk;
+ Func *f;
+ bool waspanic;
+
+ USED(lr0);
+ pc = (uintptr)pc0;
+ lr = 0;
+ fp = nil;
+ waspanic = false;
+
+ // If the PC is goexit, the goroutine hasn't started yet.
+ if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) {
+ fp = sp;
+ lr = pc;
+ pc = (uintptr)g->entry;
+ }
+
+ // If the PC is zero, it's likely a nil function call.
+ // Start in the caller's frame.
+ if(pc == 0) {
+ pc = lr;
+ lr = 0;
+ }
+
+ // If the PC is zero, it's likely a nil function call.
+ // Start in the caller's frame.
+ if(pc == 0) {
+ pc = *(uintptr*)sp;
+ sp += sizeof(uintptr);
+ }
+
+ n = 0;
+ sawnewstack = 0;
+ stk = (Stktop*)g->stackbase;
+ for(iter = 0; iter < 100 && n < max; iter++) { // iter avoids looping forever
+ // Typically:
+ // pc is the PC of the running function.
+ // sp is the stack pointer at that program counter.
+ // fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown.
+ // stk is the stack containing sp.
+ // The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp.
+
+ if(pc == (uintptr)runtime·lessstack) {
+ // Hit top of stack segment. Unwind to next segment.
+ pc = (uintptr)stk->gobuf.pc;
+ sp = stk->gobuf.sp;
+ lr = 0;
+ fp = nil;
+ if(pcbuf == nil)
+ runtime·printf("----- stack segment boundary -----\n");
+ stk = (Stktop*)stk->stackbase;
+ continue;
+ }
+ if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) {
+ // Dangerous, but worthwhile: see if this is a closure:
+ // ADDQ $wwxxyyzz, SP; RET
+ // [48] 81 c4 zz yy xx ww c3
+ // The 0x48 byte is only on amd64.
+ p = (byte*)pc;
+ // We check p < p+8 to avoid wrapping and faulting if we lose track.
+ if(runtime·mheap.arena_start < p && p < p+8 && p+8 < runtime·mheap.arena_used && // pointer in allocated memory
+ (sizeof(uintptr) != 8 || *p++ == 0x48) && // skip 0x48 byte on amd64
+ p[0] == 0x81 && p[1] == 0xc4 && p[6] == 0xc3) {
+ sp += *(uint32*)(p+2);
+ pc = *(uintptr*)sp;
+ sp += sizeof(uintptr);
+ lr = 0;
+ fp = nil;
+ continue;
+ }
+
+ // Closure at top of stack, not yet started.
+ if(lr == (uintptr)runtime·goexit && (pc = isclosureentry(pc)) != 0) {
+ fp = sp;
+ continue;
+ }
+
+ // Unknown pc: stop.
+ break;
+ }
+
+ // Found an actual function.
+ if(fp == nil) {
+ fp = sp;
+ if(pc > f->entry && f->frame >= sizeof(uintptr))
+ fp += f->frame - sizeof(uintptr);
+ if(lr == 0)
+ lr = *(uintptr*)fp;
+ fp += sizeof(uintptr);
+ } else if(lr == 0)
+ lr = *(uintptr*)fp;
+
+ if(skip > 0)
+ skip--;
+ else if(pcbuf != nil)
+ pcbuf[n++] = pc;
+ else {
+ // Print during crash.
+ // main+0xf /home/rsc/go/src/runtime/x.go:23
+ // main(0x1, 0x2, 0x3)
+ runtime·printf("%S", f->name);
+ if(pc > f->entry)
+ runtime·printf("+%p", (uintptr)(pc - f->entry));
+ tracepc = pc; // back up to CALL instruction for funcline.
+ if(n > 0 && pc > f->entry && !waspanic)
+ tracepc--;
+ runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
+ runtime·printf("\t%S(", f->name);
+ for(i = 0; i < f->args; i++) {
+ if(i != 0)
+ runtime·prints(", ");
+ runtime·printhex(((uintptr*)fp)[i]);
+ if(i >= 4) {
+ runtime·prints(", ...");
+ break;
+ }
+ }
+ runtime·prints(")\n");
+ n++;
+ }
+
+ waspanic = f->entry == (uintptr)runtime·sigpanic;
+
+ if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc)
+ fp += 2*sizeof(uintptr);
+
+ if(f->entry == (uintptr)runtime·newstack)
+ sawnewstack = 1;
+
+ if(pcbuf == nil && f->entry == (uintptr)runtime·morestack && g == m->g0 && sawnewstack) {
+ // The fact that we saw newstack means that morestack
+ // has managed to record its information in m, so we can
+ // use it to keep unwinding the stack.
+ runtime·printf("----- morestack called from goroutine %d -----\n", m->curg->goid);
+ pc = (uintptr)m->morepc;
+ sp = m->morebuf.sp - sizeof(void*);
+ lr = (uintptr)m->morebuf.pc;
+ fp = m->morebuf.sp;
+ sawnewstack = 0;
+ g = m->curg;
+ stk = (Stktop*)g->stackbase;
+ continue;
+ }
+
+ if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && g == m->g0) {
+ // Lessstack is running on scheduler stack. Switch to original goroutine.
+ runtime·printf("----- lessstack called from goroutine %d -----\n", m->curg->goid);
+ g = m->curg;
+ stk = (Stktop*)g->stackbase;
+ sp = stk->gobuf.sp;
+ pc = (uintptr)stk->gobuf.pc;
+ fp = nil;
+ lr = 0;
+ continue;
+ }
+
+ // Unwind to next frame.
+ pc = lr;
+ lr = 0;
+ sp = fp;
+ fp = nil;
+ }
+
+ if(pcbuf == nil && (pc = g->gopc) != 0 && (f = runtime·findfunc(pc)) != nil) {
+ runtime·printf("----- goroutine created by -----\n%S", f->name);
+ if(pc > f->entry)
+ runtime·printf("+%p", (uintptr)(pc - f->entry));
+ tracepc = pc; // back up to CALL instruction for funcline.
+ if(n > 0 && pc > f->entry)
+ tracepc--;
+ runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
+ }
+
+ return n;
+}
+
+void
+runtime·traceback(byte *pc0, byte *sp, byte*, G *g)
+{
+ runtime·gentraceback(pc0, sp, nil, g, 0, nil, 100);
+}
+
+int32
+runtime·callers(int32 skip, uintptr *pcbuf, int32 m)
+{
+ byte *pc, *sp;
+
+ // our caller's pc, sp.
+ sp = (byte*)&skip;
+ pc = runtime·getcallerpc(&skip);
+
+ return runtime·gentraceback(pc, sp, nil, g, skip, pcbuf, m);
+}
+
+static uintptr
+isclosureentry(uintptr pc)
+{
+ byte *p;
+ int32 i, siz;
+
+ p = (byte*)pc;
+ if(p < runtime·mheap.arena_start || p+32 > runtime·mheap.arena_used)
+ return 0;
+
+ if(*p == 0xe8) {
+ // CALL fn
+ return pc+5+*(int32*)(p+1);
+ }
+
+ if(sizeof(uintptr) == 8 && p[0] == 0x48 && p[1] == 0xb9 && p[10] == 0xff && p[11] == 0xd1) {
+ // MOVQ $fn, CX; CALL *CX
+ return *(uintptr*)(p+2);
+ }
+
+ // SUBQ $siz, SP
+ if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x81 || *p++ != 0xec)
+ return 0;
+ siz = *(uint32*)p;
+ p += 4;
+
+ // MOVQ $q, SI
+ if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xbe)
+ return 0;
+ p += sizeof(uintptr);
+
+ // MOVQ SP, DI
+ if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x89 || *p++ != 0xe7)
+ return 0;
+
+ // CLD on 32-bit
+ if(sizeof(uintptr) == 4 && *p++ != 0xfc)
+ return 0;
+
+ if(siz <= 4*sizeof(uintptr)) {
+ // MOVSQ...
+ for(i=0; i<siz; i+=sizeof(uintptr))
+ if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xa5)
+ return 0;
+ } else {
+ // MOVQ $(siz/8), CX [32-bit immediate siz/8]
+ if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xc7 || *p++ != 0xc1)
+ return 0;
+ p += 4;
+
+ // REP MOVSQ
+ if(*p++ != 0xf3 || (sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xa5)
+ return 0;
+ }
+
+ // CALL fn
+ if(*p == 0xe8) {
+ p++;
+ return (uintptr)p+4 + *(int32*)p;
+ }
+
+ // MOVQ $fn, CX; CALL *CX
+ if(sizeof(uintptr) != 8 || *p++ != 0x48 || *p++ != 0xb9)
+ return 0;
+
+ pc = *(uintptr*)p;
+ p += 8;
+
+ if(*p++ != 0xff || *p != 0xd1)
+ return 0;
+
+ return pc;
+}