diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-04-26 09:55:32 +0200 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-04-26 09:55:32 +0200 |
commit | 7b15ed9ef455b6b66c6b376898a88aef5d6a9970 (patch) | |
tree | 3ef530baa80cdf29436ba981f5783be6b4d2202b /src/pkg/runtime | |
parent | 50104cc32a498f7517a51c8dc93106c51c7a54b4 (diff) | |
download | golang-7b15ed9ef455b6b66c6b376898a88aef5d6a9970.tar.gz |
Imported Upstream version 2011.04.13upstream/2011.04.13
Diffstat (limited to 'src/pkg/runtime')
54 files changed, 2199 insertions, 773 deletions
diff --git a/src/pkg/runtime/386/asm.s b/src/pkg/runtime/386/asm.s index 74e1df0da..598fc6846 100644 --- a/src/pkg/runtime/386/asm.s +++ b/src/pkg/runtime/386/asm.s @@ -105,7 +105,7 @@ TEXT runtime·breakpoint(SB),7,$0 * go-routine */ -// uintptr gosave(Gobuf*) +// void gosave(Gobuf*) // save state in Gobuf; setjmp TEXT runtime·gosave(SB), 7, $0 MOVL 4(SP), AX // gobuf @@ -116,7 +116,6 @@ TEXT runtime·gosave(SB), 7, $0 get_tls(CX) MOVL g(CX), BX MOVL BX, gobuf_g(AX) - MOVL $0, AX // return 0 RET // void gogo(Gobuf*, uintptr) @@ -148,6 +147,35 @@ TEXT runtime·gogocall(SB), 7, $0 JMP AX POPL BX // not reached +// void mcall(void (*fn)(G*)) +// Switch to m->g0's stack, call fn(g). +// Fn must never return. It should gogo(&g->gobuf) +// to keep running g. +TEXT runtime·mcall(SB), 7, $0 + MOVL fn+0(FP), DI + + get_tls(CX) + MOVL g(CX), AX // save state in g->gobuf + MOVL 0(SP), BX // caller's PC + MOVL BX, (g_sched+gobuf_pc)(AX) + LEAL 4(SP), BX // caller's SP + MOVL BX, (g_sched+gobuf_sp)(AX) + MOVL AX, (g_sched+gobuf_g)(AX) + + // switch to m->g0 & its stack, call fn + MOVL m(CX), BX + MOVL m_g0(BX), SI + CMPL SI, AX // if g == m->g0 call badmcall + JNE 2(PC) + CALL runtime·badmcall(SB) + MOVL SI, g(CX) // g = m->g0 + MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->gobuf.sp + PUSHL AX + CALL DI + POPL AX + CALL runtime·badmcall2(SB) + RET + /* * support for morestack */ @@ -183,10 +211,10 @@ TEXT runtime·morestack(SB),7,$0 MOVL 0(SP), AX MOVL AX, m_morepc(BX) - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVL m_g0(BX), BP MOVL BP, g(CX) - MOVL (m_sched+gobuf_sp)(BX), AX + MOVL (g_sched+gobuf_sp)(BP), AX MOVL -4(AX), BX // fault if CALL would, before smashing SP MOVL AX, SP CALL runtime·newstack(SB) @@ -226,11 +254,11 @@ TEXT reflect·call(SB), 7, $0 MOVL CX, m_moreargsize(BX) // f's argument size MOVL $1, m_moreframesize(BX) // f's frame size - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVL m_g0(BX), BP get_tls(CX) MOVL BP, g(CX) - MOVL (m_sched+gobuf_sp)(BX), SP + MOVL (g_sched+gobuf_sp)(BP), SP CALL runtime·newstack(SB) MOVL $0, 0x1103 // crash if newstack returns RET @@ -243,10 +271,10 @@ TEXT runtime·lessstack(SB), 7, $0 MOVL m(CX), BX MOVL AX, m_cret(BX) - // Call oldstack on m's scheduling stack. - MOVL m_g0(BX), DX - MOVL DX, g(CX) - MOVL (m_sched+gobuf_sp)(BX), SP + // Call oldstack on m->g0's stack. + MOVL m_g0(BX), BP + MOVL BP, g(CX) + MOVL (g_sched+gobuf_sp)(BP), SP CALL runtime·oldstack(SB) MOVL $0, 0x1004 // crash if oldstack returns RET @@ -302,6 +330,133 @@ TEXT runtime·jmpdefer(SB), 7, $0 SUBL $5, (SP) // return to CALL again JMP AX // but first run the deferred function +// Dummy function to use in saved gobuf.PC, +// to match SP pointing at a return address. +// The gobuf.PC is unused by the contortions here +// but setting it to return will make the traceback code work. +TEXT return<>(SB),7,$0 + RET + +// asmcgocall(void(*fn)(void*), void *arg) +// Call fn(arg) on the scheduler stack, +// aligned appropriately for the gcc ABI. +// See cgocall.c for more details. +TEXT runtime·asmcgocall(SB),7,$0 + MOVL fn+0(FP), AX + MOVL arg+4(FP), BX + MOVL SP, DX + + // Figure out if we need to switch to m->g0 stack. + // We get called to create new OS threads too, and those + // come in on the m->g0 stack already. + get_tls(CX) + MOVL m(CX), BP + MOVL m_g0(BP), SI + MOVL g(CX), DI + CMPL SI, DI + JEQ 6(PC) + MOVL SP, (g_sched+gobuf_sp)(DI) + MOVL $return<>(SB), (g_sched+gobuf_pc)(DI) + MOVL DI, (g_sched+gobuf_g)(DI) + MOVL SI, g(CX) + MOVL (g_sched+gobuf_sp)(SI), SP + + // Now on a scheduling stack (a pthread-created stack). + SUBL $32, SP + ANDL $~15, SP // alignment, perhaps unnecessary + MOVL DI, 8(SP) // save g + MOVL DX, 4(SP) // save SP + MOVL BX, 0(SP) // first argument in x86-32 ABI + CALL AX + + // Restore registers, g, stack pointer. + get_tls(CX) + MOVL 8(SP), DI + MOVL DI, g(CX) + MOVL 4(SP), SP + RET + +// cgocallback(void (*fn)(void*), void *frame, uintptr framesize) +// See cgocall.c for more details. +TEXT runtime·cgocallback(SB),7,$12 + MOVL fn+0(FP), AX + MOVL frame+4(FP), BX + MOVL framesize+8(FP), DX + + // Save current m->g0->sched.sp on stack and then set it to SP. + get_tls(CX) + MOVL m(CX), BP + MOVL m_g0(BP), SI + PUSHL (g_sched+gobuf_sp)(SI) + MOVL SP, (g_sched+gobuf_sp)(SI) + + // Switch to m->curg stack and call runtime.cgocallback + // with the three arguments. Because we are taking over + // the execution of m->curg but *not* resuming what had + // been running, we need to save that information (m->curg->gobuf) + // so that we can restore it when we're done. + // We can restore m->curg->gobuf.sp easily, because calling + // runtime.cgocallback leaves SP unchanged upon return. + // To save m->curg->gobuf.pc, we push it onto the stack. + // This has the added benefit that it looks to the traceback + // routine like cgocallback is going to return to that + // PC (because we defined cgocallback to have + // a frame size of 12, the same amount that we use below), + // so that the traceback will seamlessly trace back into + // the earlier calls. + MOVL m_curg(BP), SI + MOVL SI, g(CX) + MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI + + // Push gobuf.pc + MOVL (g_sched+gobuf_pc)(SI), BP + SUBL $4, DI + MOVL BP, 0(DI) + + // Push arguments to cgocallbackg. + // Frame size here must match the frame size above + // to trick traceback routines into doing the right thing. + SUBL $12, DI + MOVL AX, 0(DI) + MOVL BX, 4(DI) + MOVL DX, 8(DI) + + // Switch stack and make the call. + MOVL DI, SP + CALL runtime·cgocallbackg(SB) + + // Restore g->gobuf (== m->curg->gobuf) from saved values. + get_tls(CX) + MOVL g(CX), SI + MOVL 12(SP), BP + MOVL BP, (g_sched+gobuf_pc)(SI) + LEAL (12+4)(SP), DI + MOVL DI, (g_sched+gobuf_sp)(SI) + + // Switch back to m->g0's stack and restore m->g0->sched.sp. + // (Unlike m->curg, the g0 goroutine never uses sched.pc, + // so we do not have to restore it.) + MOVL m(CX), BP + MOVL m_g0(BP), SI + MOVL SI, g(CX) + MOVL (g_sched+gobuf_sp)(SI), SP + POPL (g_sched+gobuf_sp)(SI) + + // Done! + RET + +// check that SP is in range [g->stackbase, g->stackguard) +TEXT runtime·stackcheck(SB), 7, $0 + get_tls(CX) + MOVL g(CX), AX + CMPL g_stackbase(AX), SP + JHI 2(PC) + INT $3 + CMPL SP, g_stackguard(AX) + JHI 2(PC) + INT $3 + RET + TEXT runtime·memclr(SB),7,$0 MOVL 4(SP), DI // arg 1 addr MOVL 8(SP), CX // arg 2 count @@ -345,82 +500,4 @@ TEXT runtime·emptyfunc(SB),0,$0 TEXT runtime·abort(SB),7,$0 INT $0x3 -// runcgo(void(*fn)(void*), void *arg) -// Call fn(arg) on the scheduler stack, -// aligned appropriately for the gcc ABI. -TEXT runtime·runcgo(SB),7,$16 - MOVL fn+0(FP), AX - MOVL arg+4(FP), BX - MOVL SP, CX - - // Figure out if we need to switch to m->g0 stack. - get_tls(DI) - MOVL m(DI), DX - MOVL m_g0(DX), SI - CMPL g(DI), SI - JEQ 2(PC) - MOVL (m_sched+gobuf_sp)(DX), SP - - // Now on a scheduling stack (a pthread-created stack). - SUBL $16, SP - ANDL $~15, SP // alignment for gcc ABI - MOVL g(DI), BP - MOVL BP, 8(SP) - MOVL SI, g(DI) - MOVL CX, 4(SP) - MOVL BX, 0(SP) - CALL AX - - // Back; switch to original g and stack, re-establish - // "DF is clear" invariant. - CLD - get_tls(DI) - MOVL 8(SP), SI - MOVL SI, g(DI) - MOVL 4(SP), SP - RET - -// runcgocallback(G *g1, void* sp, void (*fn)(void)) -// Switch to g1 and sp, call fn, switch back. fn's arguments are on -// the new stack. -TEXT runtime·runcgocallback(SB),7,$32 - MOVL g1+0(FP), DX - MOVL sp+4(FP), AX - MOVL fn+8(FP), BX - - // We are running on m's scheduler stack. Save current SP - // into m->sched.sp so that a recursive call to runcgo doesn't - // clobber our stack, and also so that we can restore - // the SP when the call finishes. Reusing m->sched.sp - // for this purpose depends on the fact that there is only - // one possible gosave of m->sched. - get_tls(CX) - MOVL DX, g(CX) - MOVL m(CX), CX - MOVL SP, (m_sched+gobuf_sp)(CX) - - // Set new SP, call fn - MOVL AX, SP - CALL BX - - // Restore old g and SP, return - get_tls(CX) - MOVL m(CX), DX - MOVL m_g0(DX), BX - MOVL BX, g(CX) - MOVL (m_sched+gobuf_sp)(DX), SP - RET - -// check that SP is in range [g->stackbase, g->stackguard) -TEXT runtime·stackcheck(SB), 7, $0 - get_tls(CX) - MOVL g(CX), AX - CMPL g_stackbase(AX), SP - JHI 2(PC) - INT $3 - CMPL SP, g_stackguard(AX) - JHI 2(PC) - INT $3 - RET - GLOBL runtime·tls0(SB), $32 diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile index e4cc08175..4da78c5f0 100644 --- a/src/pkg/runtime/Makefile +++ b/src/pkg/runtime/Makefile @@ -22,6 +22,7 @@ GOFILES=\ debug.go\ error.go\ extern.go\ + mem.go\ sig.go\ softfloat64.go\ type.go\ @@ -52,6 +53,7 @@ OFILES=\ cgocall.$O\ chan.$O\ closure.$O\ + cpuprof.$O\ float.$O\ complex.$O\ hashmap.$O\ diff --git a/src/pkg/runtime/amd64/asm.s b/src/pkg/runtime/amd64/asm.s index cc05435f7..a611985c5 100644 --- a/src/pkg/runtime/amd64/asm.s +++ b/src/pkg/runtime/amd64/asm.s @@ -89,7 +89,7 @@ TEXT runtime·breakpoint(SB),7,$0 * go-routine */ -// uintptr gosave(Gobuf*) +// void gosave(Gobuf*) // save state in Gobuf; setjmp TEXT runtime·gosave(SB), 7, $0 MOVQ 8(SP), AX // gobuf @@ -100,7 +100,6 @@ TEXT runtime·gosave(SB), 7, $0 get_tls(CX) MOVQ g(CX), BX MOVQ BX, gobuf_g(AX) - MOVL $0, AX // return 0 RET // void gogo(Gobuf*, uintptr) @@ -132,6 +131,35 @@ TEXT runtime·gogocall(SB), 7, $0 JMP AX POPQ BX // not reached +// void mcall(void (*fn)(G*)) +// Switch to m->g0's stack, call fn(g). +// Fn must never return. It should gogo(&g->gobuf) +// to keep running g. +TEXT runtime·mcall(SB), 7, $0 + MOVQ fn+0(FP), DI + + get_tls(CX) + MOVQ g(CX), AX // save state in g->gobuf + MOVQ 0(SP), BX // caller's PC + MOVQ BX, (g_sched+gobuf_pc)(AX) + LEAQ 8(SP), BX // caller's SP + MOVQ BX, (g_sched+gobuf_sp)(AX) + MOVQ AX, (g_sched+gobuf_g)(AX) + + // switch to m->g0 & its stack, call fn + MOVQ m(CX), BX + MOVQ m_g0(BX), SI + CMPQ SI, AX // if g == m->g0 call badmcall + JNE 2(PC) + CALL runtime·badmcall(SB) + MOVQ SI, g(CX) // g = m->g0 + MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->gobuf.sp + PUSHQ AX + CALL DI + POPQ AX + CALL runtime·badmcall2(SB) + RET + /* * support for morestack */ @@ -160,10 +188,10 @@ TEXT runtime·morestack(SB),7,$0 MOVQ 0(SP), AX MOVQ AX, m_morepc(BX) - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVQ m_g0(BX), BP MOVQ BP, g(CX) - MOVQ (m_sched+gobuf_sp)(BX), SP + MOVQ (g_sched+gobuf_sp)(BP), SP CALL runtime·newstack(SB) MOVQ $0, 0x1003 // crash if newstack returns RET @@ -201,11 +229,11 @@ TEXT reflect·call(SB), 7, $0 MOVL CX, m_moreargsize(BX) // f's argument size MOVL $1, m_moreframesize(BX) // f's frame size - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVQ m_g0(BX), BP get_tls(CX) MOVQ BP, g(CX) - MOVQ (m_sched+gobuf_sp)(BX), SP + MOVQ (g_sched+gobuf_sp)(BP), SP CALL runtime·newstack(SB) MOVQ $0, 0x1103 // crash if newstack returns RET @@ -217,10 +245,10 @@ TEXT runtime·lessstack(SB), 7, $0 MOVQ m(CX), BX MOVQ AX, m_cret(BX) - // Call oldstack on m's scheduling stack. - MOVQ m_g0(BX), DX - MOVQ DX, g(CX) - MOVQ (m_sched+gobuf_sp)(BX), SP + // Call oldstack on m->g0's stack. + MOVQ m_g0(BX), BP + MOVQ BP, g(CX) + MOVQ (g_sched+gobuf_sp)(BP), SP CALL runtime·oldstack(SB) MOVQ $0, 0x1004 // crash if oldstack returns RET @@ -336,7 +364,6 @@ TEXT runtime·casp(SB), 7, $0 MOVL $1, AX RET - // void jmpdefer(fn, sp); // called from deferreturn. // 1. pop the caller @@ -349,68 +376,119 @@ TEXT runtime·jmpdefer(SB), 7, $0 SUBQ $5, (SP) // return to CALL again JMP AX // but first run the deferred function -// runcgo(void(*fn)(void*), void *arg) +// Dummy function to use in saved gobuf.PC, +// to match SP pointing at a return address. +// The gobuf.PC is unused by the contortions here +// but setting it to return will make the traceback code work. +TEXT return<>(SB),7,$0 + RET + +// asmcgocall(void(*fn)(void*), void *arg) // Call fn(arg) on the scheduler stack, // aligned appropriately for the gcc ABI. -TEXT runtime·runcgo(SB),7,$32 - MOVQ fn+0(FP), R12 - MOVQ arg+8(FP), R13 - MOVQ SP, CX +// See cgocall.c for more details. +TEXT runtime·asmcgocall(SB),7,$0 + MOVQ fn+0(FP), AX + MOVQ arg+8(FP), BX + MOVQ SP, DX // Figure out if we need to switch to m->g0 stack. - get_tls(DI) - MOVQ m(DI), DX - MOVQ m_g0(DX), SI - CMPQ g(DI), SI - JEQ 2(PC) - MOVQ (m_sched+gobuf_sp)(DX), SP + // We get called to create new OS threads too, and those + // come in on the m->g0 stack already. + get_tls(CX) + MOVQ m(CX), BP + MOVQ m_g0(BP), SI + MOVQ g(CX), DI + CMPQ SI, DI + JEQ 6(PC) + MOVQ SP, (g_sched+gobuf_sp)(DI) + MOVQ $return<>(SB), (g_sched+gobuf_pc)(DI) + MOVQ DI, (g_sched+gobuf_g)(DI) + MOVQ SI, g(CX) + MOVQ (g_sched+gobuf_sp)(SI), SP // Now on a scheduling stack (a pthread-created stack). SUBQ $32, SP ANDQ $~15, SP // alignment for gcc ABI - MOVQ g(DI), BP - MOVQ BP, 16(SP) - MOVQ SI, g(DI) - MOVQ CX, 8(SP) - MOVQ R13, DI // DI = first argument in AMD64 ABI - CALL R12 + MOVQ DI, 16(SP) // save g + MOVQ DX, 8(SP) // save SP + MOVQ BX, DI // DI = first argument in AMD64 ABI + CALL AX // Restore registers, g, stack pointer. - get_tls(DI) - MOVQ 16(SP), SI - MOVQ SI, g(DI) + get_tls(CX) + MOVQ 16(SP), DI + MOVQ DI, g(CX) MOVQ 8(SP), SP RET -// runcgocallback(G *g1, void* sp, void (*fn)(void)) -// Switch to g1 and sp, call fn, switch back. fn's arguments are on -// the new stack. -TEXT runtime·runcgocallback(SB),7,$48 - MOVQ g1+0(FP), DX - MOVQ sp+8(FP), AX - MOVQ fp+16(FP), BX - - // We are running on m's scheduler stack. Save current SP - // into m->sched.sp so that a recursive call to runcgo doesn't - // clobber our stack, and also so that we can restore - // the SP when the call finishes. Reusing m->sched.sp - // for this purpose depends on the fact that there is only - // one possible gosave of m->sched. - get_tls(CX) - MOVQ DX, g(CX) - MOVQ m(CX), CX - MOVQ SP, (m_sched+gobuf_sp)(CX) - - // Set new SP, call fn - MOVQ AX, SP - CALL BX +// cgocallback(void (*fn)(void*), void *frame, uintptr framesize) +// See cgocall.c for more details. +TEXT runtime·cgocallback(SB),7,$24 + MOVQ fn+0(FP), AX + MOVQ frame+8(FP), BX + MOVQ framesize+16(FP), DX - // Restore old g and SP, return + // Save current m->g0->sched.sp on stack and then set it to SP. get_tls(CX) - MOVQ m(CX), DX - MOVQ m_g0(DX), BX - MOVQ BX, g(CX) - MOVQ (m_sched+gobuf_sp)(DX), SP + MOVQ m(CX), BP + MOVQ m_g0(BP), SI + PUSHQ (g_sched+gobuf_sp)(SI) + MOVQ SP, (g_sched+gobuf_sp)(SI) + + // Switch to m->curg stack and call runtime.cgocallback + // with the three arguments. Because we are taking over + // the execution of m->curg but *not* resuming what had + // been running, we need to save that information (m->curg->gobuf) + // so that we can restore it when we're done. + // We can restore m->curg->gobuf.sp easily, because calling + // runtime.cgocallback leaves SP unchanged upon return. + // To save m->curg->gobuf.pc, we push it onto the stack. + // This has the added benefit that it looks to the traceback + // routine like cgocallback is going to return to that + // PC (because we defined cgocallback to have + // a frame size of 24, the same amount that we use below), + // so that the traceback will seamlessly trace back into + // the earlier calls. + MOVQ m_curg(BP), SI + MOVQ SI, g(CX) + MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI + + // Push gobuf.pc + MOVQ (g_sched+gobuf_pc)(SI), BP + SUBQ $8, DI + MOVQ BP, 0(DI) + + // Push arguments to cgocallbackg. + // Frame size here must match the frame size above + // to trick traceback routines into doing the right thing. + SUBQ $24, DI + MOVQ AX, 0(DI) + MOVQ BX, 8(DI) + MOVQ DX, 16(DI) + + // Switch stack and make the call. + MOVQ DI, SP + CALL runtime·cgocallbackg(SB) + + // Restore g->gobuf (== m->curg->gobuf) from saved values. + get_tls(CX) + MOVQ g(CX), SI + MOVQ 24(SP), BP + MOVQ BP, (g_sched+gobuf_pc)(SI) + LEAQ (24+8)(SP), DI + MOVQ DI, (g_sched+gobuf_sp)(SI) + + // Switch back to m->g0's stack and restore m->g0->sched.sp. + // (Unlike m->curg, the g0 goroutine never uses sched.pc, + // so we do not have to restore it.) + MOVQ m(CX), BP + MOVQ m_g0(BP), SI + MOVQ SI, g(CX) + MOVQ (g_sched+gobuf_sp)(SI), SP + POPQ (g_sched+gobuf_sp)(SI) + + // Done! RET // check that SP is in range [g->stackbase, g->stackguard) diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c index 0f6733c36..d422cb692 100644 --- a/src/pkg/runtime/amd64/traceback.c +++ b/src/pkg/runtime/amd64/traceback.c @@ -18,8 +18,8 @@ void runtime·morestack(void); // as well as the runtime.Callers function (pcbuf != nil). // A little clunky to merge the two but avoids duplicating // the code and all its subtlety. -static int32 -gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 max) +int32 +runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) { byte *p; int32 i, n, iter, sawnewstack; @@ -28,6 +28,7 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 max) Stktop *stk; Func *f; + USED(lr0); pc = (uintptr)pc0; lr = 0; fp = nil; @@ -199,7 +200,7 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 max) void runtime·traceback(byte *pc0, byte *sp, byte*, G *g) { - gentraceback(pc0, sp, g, 0, nil, 100); + runtime·gentraceback(pc0, sp, nil, g, 0, nil, 100); } int32 @@ -211,7 +212,7 @@ runtime·callers(int32 skip, uintptr *pcbuf, int32 m) sp = (byte*)&skip; pc = runtime·getcallerpc(&skip); - return gentraceback(pc, sp, g, skip, pcbuf, m); + return runtime·gentraceback(pc, sp, nil, g, skip, pcbuf, m); } static uintptr diff --git a/src/pkg/runtime/arm/asm.s b/src/pkg/runtime/arm/asm.s index f9fe7e628..4d36606a7 100644 --- a/src/pkg/runtime/arm/asm.s +++ b/src/pkg/runtime/arm/asm.s @@ -93,14 +93,13 @@ TEXT runtime·breakpoint(SB),7,$0 * go-routine */ -// uintptr gosave(Gobuf*) +// void gosave(Gobuf*) // save state in Gobuf; setjmp TEXT runtime·gosave(SB), 7, $-4 MOVW 0(FP), R0 // gobuf MOVW SP, gobuf_sp(R0) MOVW LR, gobuf_pc(R0) MOVW g, gobuf_g(R0) - MOVW $0, R0 // return 0 RET // void gogo(Gobuf*, uintptr) @@ -127,6 +126,30 @@ TEXT runtime·gogocall(SB), 7, $-4 MOVW gobuf_pc(R0), LR MOVW R1, PC +// void mcall(void (*fn)(G*)) +// Switch to m->g0's stack, call fn(g). +// Fn must never return. It should gogo(&g->gobuf) +// to keep running g. +TEXT runtime·mcall(SB), 7, $-4 + MOVW fn+0(FP), R0 + + // Save caller state in g->gobuf. + MOVW SP, (g_sched+gobuf_sp)(g) + MOVW LR, (g_sched+gobuf_pc)(g) + MOVW g, (g_sched+gobuf_g)(g) + + // Switch to m->g0 & its stack, call fn. + MOVW g, R1 + MOVW m_g0(m), g + CMP g, R1 + BL.EQ runtime·badmcall(SB) + MOVW (g_sched+gobuf_sp)(g), SP + SUB $8, SP + MOVW R1, 4(SP) + BL (R0) + BL runtime·badmcall2(SB) + RET + /* * support for morestack */ @@ -159,9 +182,9 @@ TEXT runtime·morestack(SB),7,$-4 // Set m->morepc to f's PC. MOVW LR, m_morepc(m) - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVW m_g0(m), g - MOVW (m_sched+gobuf_sp)(m), SP + MOVW (g_sched+gobuf_sp)(g), SP B runtime·newstack(SB) // Called from reflection library. Mimics morestack, @@ -192,9 +215,9 @@ TEXT reflect·call(SB), 7, $-4 MOVW $1, R3 MOVW R3, m_moreframesize(m) // f's frame size - // Call newstack on m's scheduling stack. + // Call newstack on m->g0's stack. MOVW m_g0(m), g - MOVW (m_sched+gobuf_sp)(m), SP + MOVW (g_sched+gobuf_sp)(g), SP B runtime·newstack(SB) // Return point when leaving stack. @@ -203,9 +226,9 @@ TEXT runtime·lessstack(SB), 7, $-4 // Save return value in m->cret MOVW R0, m_cret(m) - // Call oldstack on m's scheduling stack. + // Call oldstack on m->g0's stack. MOVW m_g0(m), g - MOVW (m_sched+gobuf_sp)(m), SP + MOVW (g_sched+gobuf_sp)(g), SP B runtime·oldstack(SB) // void jmpdefer(fn, sp); @@ -221,6 +244,12 @@ TEXT runtime·jmpdefer(SB), 7, $0 MOVW $-4(SP), SP // SP is 4 below argp, due to saved LR B (R0) +TEXT runtime·asmcgocall(SB),7,$0 + B runtime·cgounimpl(SB) + +TEXT runtime·cgocallback(SB),7,$0 + B runtime·cgounimpl(SB) + TEXT runtime·memclr(SB),7,$20 MOVW 0(FP), R0 MOVW $0, R1 // c = 0 @@ -248,22 +277,6 @@ TEXT runtime·getcallersp(SB),7,$-4 MOVW $-4(R0), R0 RET -// runcgo(void(*fn)(void*), void *arg) -// Just call fn(arg), but first align the stack -// appropriately for the gcc ABI. -// TODO(kaib): figure out the arm-gcc ABI -TEXT runtime·runcgo(SB),7,$16 - BL runtime·abort(SB) -// MOVL fn+0(FP), AX -// MOVL arg+4(FP), BX -// MOVL SP, CX -// ANDL $~15, SP // alignment for gcc ABI -// MOVL CX, 4(SP) -// MOVL BX, 0(SP) -// CALL AX -// MOVL 4(SP), SP -// RET - TEXT runtime·emptyfunc(SB),0,$0 RET @@ -271,10 +284,6 @@ TEXT runtime·abort(SB),7,$-4 MOVW $0, R0 MOVW (R0), R1 -TEXT runtime·runcgocallback(SB),7,$0 - MOVW $0, R0 - MOVW (R0), R1 - // bool armcas(int32 *val, int32 old, int32 new) // Atomically: // if(*val == old){ diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c index ad3096823..c3934c37c 100644 --- a/src/pkg/runtime/arm/traceback.c +++ b/src/pkg/runtime/arm/traceback.c @@ -14,8 +14,8 @@ void _mod(void); void _divu(void); void _modu(void); -static int32 -gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) +int32 +runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) { int32 i, n, iter; uintptr pc, lr, tracepc, x; @@ -189,11 +189,10 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i return n; } - void runtime·traceback(byte *pc0, byte *sp, byte *lr, G *g) { - gentraceback(pc0, sp, lr, g, 0, nil, 100); + runtime·gentraceback(pc0, sp, lr, g, 0, nil, 100); } // func caller(n int) (pc uintptr, file string, line int, ok bool) @@ -205,5 +204,5 @@ runtime·callers(int32 skip, uintptr *pcbuf, int32 m) sp = runtime·getcallersp(&skip); pc = runtime·getcallerpc(&skip); - return gentraceback(pc, sp, 0, g, skip, pcbuf, m); + return runtime·gentraceback(pc, sp, 0, g, skip, pcbuf, m); } diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c index 741e8f0b8..58f287e90 100644 --- a/src/pkg/runtime/cgocall.c +++ b/src/pkg/runtime/cgocall.c @@ -3,18 +3,97 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch.h" #include "stack.h" #include "cgocall.h" +// Cgo call and callback support. +// +// To call into the C function f from Go, the cgo-generated code calls +// runtime.cgocall(_cgo_Cfunc_f, frame), where _cgo_Cfunc_f is a +// gcc-compiled function written by cgo. +// +// runtime.cgocall (below) locks g to m, calls entersyscall +// so as not to block other goroutines or the garbage collector, +// and then calls runtime.asmcgocall(_cgo_Cfunc_f, frame). +// +// runtime.asmcgocall (in $GOARCH/asm.s) switches to the m->g0 stack +// (assumed to be an operating system-allocated stack, so safe to run +// gcc-compiled code on) and calls _cgo_Cfunc_f(frame). +// +// _cgo_Cfunc_f invokes the actual C function f with arguments +// taken from the frame structure, records the results in the frame, +// and returns to runtime.asmcgocall. +// +// After it regains control, runtime.asmcgocall switches back to the +// original g (m->curg)'s stack and returns to runtime.cgocall. +// +// After it regains control, runtime.cgocall calls exitsyscall, which blocks +// until this m can run Go code without violating the $GOMAXPROCS limit, +// and then unlocks g from m. +// +// The above description skipped over the possibility of the gcc-compiled +// function f calling back into Go. If that happens, we continue down +// the rabbit hole during the execution of f. +// +// To make it possible for gcc-compiled C code to call a Go function p.GoF, +// cgo writes a gcc-compiled function named GoF (not p.GoF, since gcc doesn't +// know about packages). The gcc-compiled C function f calls GoF. +// +// GoF calls crosscall2(_cgoexp_GoF, frame, framesize). Crosscall2 +// (in cgo/$GOOS.S, a gcc-compiled assembly file) is a two-argument +// adapter from the gcc function call ABI to the 6c function call ABI. +// It is called from gcc to call 6c functions. In this case it calls +// _cgoexp_GoF(frame, framesize), still running on m->g0's stack +// and outside the $GOMAXPROCS limit. Thus, this code cannot yet +// call arbitrary Go code directly and must be careful not to allocate +// memory or use up m->g0's stack. +// +// _cgoexp_GoF calls runtime.cgocallback(p.GoF, frame, framesize). +// (The reason for having _cgoexp_GoF instead of writing a crosscall3 +// to make this call directly is that _cgoexp_GoF, because it is compiled +// with 6c instead of gcc, can refer to dotted names like +// runtime.cgocallback and p.GoF.) +// +// runtime.cgocallback (in $GOOS/asm.s) switches from m->g0's +// stack to the original g (m->curg)'s stack, on which it calls +// runtime.cgocallbackg(p.GoF, frame, framesize). +// As part of the stack switch, runtime.cgocallback saves the current +// SP as m->g0->sched.sp, so that any use of m->g0's stack during the +// execution of the callback will be done below the existing stack frames. +// Before overwriting m->g0->sched.sp, it pushes the old value on the +// m->g0 stack, so that it can be restored later. +// +// runtime.cgocallbackg (below) is now running on a real goroutine +// stack (not an m->g0 stack). First it calls runtime.exitsyscall, which will +// block until the $GOMAXPROCS limit allows running this goroutine. +// Once exitsyscall has returned, it is safe to do things like call the memory +// allocator or invoke the Go callback function p.GoF. runtime.cgocallback +// first defers a function to unwind m->g0.sched.sp, so that if p.GoF +// panics, m->g0.sched.sp will be restored to its old value: the m->g0 stack +// and the m->curg stack will be unwound in lock step. +// Then it calls p.GoF. Finally it pops but does not execute the deferred +// function, calls runtime.entersyscall, and returns to runtime.cgocallback. +// +// After it regains control, runtime.cgocallback switches back to +// m->g0's stack (the pointer is still in m->g0.sched.sp), restores the old +// m->g0.sched.sp value from the stack, and returns to _cgoexp_GoF. +// +// _cgoexp_GoF immediately returns to crosscall2, which restores the +// callee-save registers for gcc and returns to GoF, which returns to f. + void *initcgo; /* filled in by dynamic linker when Cgo is available */ int64 ncgocall; -void runtime·entersyscall(void); -void runtime·exitsyscall(void); + +static void unlockm(void); +static void unwindm(void); + +// Call from Go to C. void runtime·cgocall(void (*fn)(void*), void *arg) { - G *oldlock; + Defer *d; if(!runtime·iscgo) runtime·throw("cgocall unavailable"); @@ -28,61 +107,49 @@ runtime·cgocall(void (*fn)(void*), void *arg) * Lock g to m to ensure we stay on the same stack if we do a * cgo callback. */ - oldlock = m->lockedg; - m->lockedg = g; - g->lockedm = m; + d = nil; + if(m->lockedg == nil) { + m->lockedg = g; + g->lockedm = m; + + // Add entry to defer stack in case of panic. + d = runtime·malloc(sizeof(*d)); + d->fn = (byte*)unlockm; + d->siz = 0; + d->link = g->defer; + d->argp = (void*)-1; // unused because unwindm never recovers + g->defer = d; + } /* * Announce we are entering a system call * so that the scheduler knows to create another * M to run goroutines while we are in the * foreign code. + * + * The call to asmcgocall is guaranteed not to + * split the stack and does not allocate memory, + * so it is safe to call while "in a system call", outside + * the $GOMAXPROCS accounting. */ runtime·entersyscall(); - runtime·runcgo(fn, arg); + runtime·asmcgocall(fn, arg); runtime·exitsyscall(); - m->lockedg = oldlock; - if(oldlock == nil) - g->lockedm = nil; - - return; + if(d != nil) { + if(g->defer != d || d->fn != (byte*)unlockm) + runtime·throw("runtime: bad defer entry in cgocallback"); + g->defer = d->link; + runtime·free(d); + unlockm(); + } } -// When a C function calls back into Go, the wrapper function will -// call this. This switches to a Go stack, copies the arguments -// (arg/argsize) on to the stack, calls the function, copies the -// arguments back where they came from, and finally returns to the old -// stack. -void -runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize) +static void +unlockm(void) { - Gobuf oldsched, oldg1sched; - G *g1; - void *sp; - - if(g != m->g0) - runtime·throw("bad g in cgocallback"); - - g1 = m->curg; - oldsched = m->sched; - oldg1sched = g1->sched; - - runtime·startcgocallback(g1); - - sp = g1->sched.sp - argsize; - if(sp < g1->stackguard - StackGuard - StackSystem + 8) // +8 for return address - runtime·throw("g stack overflow in cgocallback"); - runtime·mcpy(sp, arg, argsize); - - runtime·runcgocallback(g1, sp, fn); - - runtime·mcpy(arg, sp, argsize); - - runtime·endcgocallback(g1); - - m->sched = oldsched; - g1->sched = oldg1sched; + m->lockedg = nil; + g->lockedm = nil; } void @@ -92,6 +159,8 @@ runtime·Cgocalls(int64 ret) FLUSH(&ret); } +// Helper functions for cgo code. + void (*_cgo_malloc)(void*); void (*_cgo_free)(void*); @@ -115,3 +184,63 @@ runtime·cfree(void *p) runtime·cgocall(_cgo_free, p); } +// Call from C back to Go. + +void +runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize) +{ + Defer *d; + + if(g != m->curg) + runtime·throw("runtime: bad g in cgocallback"); + + runtime·exitsyscall(); // coming out of cgo call + + // Add entry to defer stack in case of panic. + d = runtime·malloc(sizeof(*d)); + d->fn = (byte*)unwindm; + d->siz = 0; + d->link = g->defer; + d->argp = (void*)-1; // unused because unwindm never recovers + g->defer = d; + + // Invoke callback. + reflect·call((byte*)fn, arg, argsize); + + // Pop defer. + // Do not unwind m->g0->sched.sp. + // Our caller, cgocallback, will do that. + if(g->defer != d || d->fn != (byte*)unwindm) + runtime·throw("runtime: bad defer entry in cgocallback"); + g->defer = d->link; + runtime·free(d); + + runtime·entersyscall(); // going back to cgo call +} + +static void +unwindm(void) +{ + // Restore sp saved by cgocallback during + // unwind of g's stack (see comment at top of file). + switch(thechar){ + default: + runtime·throw("runtime: unwindm not implemented"); + case '8': + case '6': + m->g0->sched.sp = *(void**)m->g0->sched.sp; + break; + } +} + +void +runtime·badcgocallback(void) // called from assembly +{ + runtime·throw("runtime: misaligned stack in cgocallback"); +} + +void +runtime·cgounimpl(void) // called from (incomplete) assembly +{ + runtime·throw("runtime: cgo not implemented"); +} diff --git a/src/pkg/runtime/cgocall.h b/src/pkg/runtime/cgocall.h index 1ad954eb1..253661a7e 100644 --- a/src/pkg/runtime/cgocall.h +++ b/src/pkg/runtime/cgocall.h @@ -7,6 +7,6 @@ */ void runtime·cgocall(void (*fn)(void*), void*); -void runtime·cgocallback(void (*fn)(void), void*, int32); +void runtime·cgocallback(void (*fn)(void), void*, uintptr); void *runtime·cmalloc(uintptr); void runtime·cfree(void*); diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c index 3177c2295..8c45b076d 100644 --- a/src/pkg/runtime/chan.c +++ b/src/pkg/runtime/chan.c @@ -5,13 +5,9 @@ #include "runtime.h" #include "type.h" -static int32 debug = 0; +#define MAXALIGN 7 -enum -{ - Wclosed = 0x0001, // writer has closed - Rclosed = 0x0002, // reader has seen close -}; +static int32 debug = 0; typedef struct Link Link; typedef struct WaitQ WaitQ; @@ -40,32 +36,47 @@ struct Hchan uint32 qcount; // total data in the q uint32 dataqsiz; // size of the circular q uint16 elemsize; - uint16 closed; // Wclosed Rclosed errorcount + bool closed; uint8 elemalign; Alg* elemalg; // interface for element type - Link* senddataq; // pointer for sender - Link* recvdataq; // pointer for receiver + uint32 sendx; // send index + uint32 recvx; // receive index WaitQ recvq; // list of recv waiters WaitQ sendq; // list of send waiters SudoG* free; // freelist Lock; }; +// Buffer follows Hchan immediately in memory. +// chanbuf(c, i) is pointer to the i'th slot in the buffer. +#define chanbuf(c, i) ((byte*)((c)+1)+(uintptr)(c)->elemsize*(i)) + struct Link { Link* link; // asynch queue circular linked list byte elem[8]; // asynch queue data element (+ more) }; +enum +{ + // Scase.kind + CaseRecv, + CaseSend, + CaseDefault, +}; + struct Scase { Hchan* chan; // chan byte* pc; // return pc - uint16 send; // 0-recv 1-send 2-default + uint16 kind; uint16 so; // vararg of selected bool union { - byte elem[8]; // element (send) - byte* elemp; // pointer to element (recv) + byte elem[2*sizeof(void*)]; // element (send) + struct { + byte* elemp; // pointer to element (recv) + bool* receivedp; // pointer to received bool (recv2) + } recv; } u; }; @@ -90,7 +101,8 @@ Hchan* runtime·makechan_c(Type *elem, int64 hint) { Hchan *c; - int32 i; + int32 n; + byte *by; if(hint < 0 || (int32)hint != hint || hint > ((uintptr)-1) / elem->size) runtime·panicstring("makechan: size out of range"); @@ -100,32 +112,22 @@ runtime·makechan_c(Type *elem, int64 hint) runtime·throw("runtime.makechan: unsupported elem type"); } - c = runtime·mal(sizeof(*c)); + // calculate rounded size of Hchan + n = sizeof(*c); + while(n & MAXALIGN) + n++; + + // allocate memory in one call + by = runtime·mal(n + hint*elem->size); + + c = (Hchan*)by; + by += n; runtime·addfinalizer(c, destroychan, 0); c->elemsize = elem->size; c->elemalg = &runtime·algarray[elem->alg]; c->elemalign = elem->align; - - if(hint > 0) { - Link *d, *b, *e; - - // make a circular q - b = nil; - e = nil; - for(i=0; i<hint; i++) { - d = runtime·mal(sizeof(*d) + c->elemsize - sizeof(d->elem)); - if(e == nil) - e = d; - d->link = b; - b = d; - } - e->link = b; - c->recvdataq = b; - c->senddataq = b; - c->qcount = 0; - c->dataqsiz = hint; - } + c->dataqsiz = hint; if(debug) runtime·printf("makechan: chan=%p; elemsize=%D; elemalg=%d; elemalign=%d; dataqsiz=%d\n", @@ -183,7 +185,7 @@ runtime·chansend(Hchan *c, byte *ep, bool *pres) runtime·lock(c); loop: - if(c->closed & Wclosed) + if(c->closed) goto closed; if(c->dataqsiz > 0) @@ -228,7 +230,7 @@ loop: return; asynch: - if(c->closed & Wclosed) + if(c->closed) goto closed; if(c->qcount >= c->dataqsiz) { @@ -247,8 +249,9 @@ asynch: goto asynch; } if(ep != nil) - c->elemalg->copy(c->elemsize, c->senddataq->elem, ep); - c->senddataq = c->senddataq->link; + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep); + if(++c->sendx == c->dataqsiz) + c->sendx = 0; c->qcount++; sg = dequeue(&c->recvq, c); @@ -269,7 +272,7 @@ closed: } void -runtime·chanrecv(Hchan* c, byte *ep, bool *pres, bool *closed) +runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received) { SudoG *sg; G *gp; @@ -284,14 +287,12 @@ runtime·chanrecv(Hchan* c, byte *ep, bool *pres, bool *closed) runtime·printf("chanrecv: chan=%p\n", c); runtime·lock(c); - if(closed != nil) - *closed = false; loop: if(c->dataqsiz > 0) goto asynch; - if(c->closed & Wclosed) + if(c->closed) goto closed; sg = dequeue(&c->sendq, c); @@ -305,14 +306,16 @@ loop: runtime·unlock(c); runtime·ready(gp); - if(pres != nil) - *pres = true; + if(selected != nil) + *selected = true; + if(received != nil) + *received = true; return; } - if(pres != nil) { + if(selected != nil) { runtime·unlock(c); - *pres = false; + *selected = false; return; } @@ -331,18 +334,22 @@ loop: if(ep != nil) c->elemalg->copy(c->elemsize, ep, sg->elem); c->elemalg->copy(c->elemsize, sg->elem, nil); + if(received != nil) + *received = true; freesg(c, sg); runtime·unlock(c); return; asynch: if(c->qcount <= 0) { - if(c->closed & Wclosed) + if(c->closed) goto closed; - if(pres != nil) { + if(selected != nil) { runtime·unlock(c); - *pres = false; + *selected = false; + if(received != nil) + *received = false; return; } sg = allocsg(c); @@ -355,9 +362,10 @@ asynch: goto asynch; } if(ep != nil) - c->elemalg->copy(c->elemsize, ep, c->recvdataq->elem); - c->elemalg->copy(c->elemsize, c->recvdataq->elem, nil); - c->recvdataq = c->recvdataq->link; + c->elemalg->copy(c->elemsize, ep, chanbuf(c, c->recvx)); + c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil); + if(++c->recvx == c->dataqsiz) + c->recvx = 0; c->qcount--; sg = dequeue(&c->sendq, c); if(sg != nil) { @@ -365,24 +373,22 @@ asynch: freesg(c, sg); runtime·unlock(c); runtime·ready(gp); - if(pres != nil) - *pres = true; - return; - } + } else + runtime·unlock(c); - runtime·unlock(c); - if(pres != nil) - *pres = true; + if(selected != nil) + *selected = true; + if(received != nil) + *received = true; return; closed: - if(closed != nil) - *closed = true; if(ep != nil) c->elemalg->copy(c->elemsize, ep, nil); - c->closed |= Rclosed; - if(pres != nil) - *pres = true; + if(selected != nil) + *selected = true; + if(received != nil) + *received = false; runtime·unlock(c); } @@ -416,16 +422,16 @@ runtime·chanrecv1(Hchan* c, ...) runtime·chanrecv(c, ae, nil, nil); } -// chanrecv3(hchan *chan any) (elem any, closed bool); +// chanrecv2(hchan *chan any) (elem any, received bool); #pragma textflag 7 void -runtime·chanrecv3(Hchan* c, ...) +runtime·chanrecv2(Hchan* c, ...) { int32 o; byte *ae, *ac; if(c == nil) - runtime·panicstring("range over nil channel"); + runtime·panicstring("receive from nil channel"); o = runtime·rnd(sizeof(c), Structrnd); ae = (byte*)&c + o; @@ -490,9 +496,35 @@ runtime·selectnbsend(Hchan *c, ...) // #pragma textflag 7 void -runtime·selectnbrecv(byte *v, Hchan *c, bool ok) +runtime·selectnbrecv(byte *v, Hchan *c, bool selected) { - runtime·chanrecv(c, v, &ok, nil); + runtime·chanrecv(c, v, &selected, nil); +} + +// func selectnbrecv2(elem *any, ok *bool, c chan any) bool +// +// compiler implements +// +// select { +// case v, ok = <-c: +// ... foo +// default: +// ... bar +// } +// +// as +// +// if c != nil && selectnbrecv2(&v, &ok, c) { +// ... foo +// } else { +// ... bar +// } +// +#pragma textflag 7 +void +runtime·selectnbrecv2(byte *v, bool *received, Hchan *c, bool selected) +{ + runtime·chanrecv(c, v, &selected, received); } static void newselect(int32, Select**); @@ -530,19 +562,30 @@ newselect(int32 size, Select **selp) runtime·printf("newselect s=%p size=%d\n", sel, size); } +// cut in half to give stack a chance to split +static void selectsend(Select **selp, Hchan *c, void *pc); + // selectsend(sel *byte, hchan *chan any, elem any) (selected bool); #pragma textflag 7 void runtime·selectsend(Select *sel, Hchan *c, ...) { - int32 i, eo; - Scase *cas; - byte *ae; - // nil cases do not compete if(c == nil) return; + + selectsend(&sel, c, runtime·getcallerpc(&sel)); +} +static void +selectsend(Select **selp, Hchan *c, void *pc) +{ + int32 i, eo; + Scase *cas; + byte *ae; + Select *sel; + + sel = *selp; i = sel->ncase; if(i >= sel->tcase) runtime·throw("selectsend: too many cases"); @@ -550,67 +593,88 @@ runtime·selectsend(Select *sel, Hchan *c, ...) cas = runtime·mal(sizeof *cas + c->elemsize - sizeof(cas->u.elem)); sel->scase[i] = cas; - cas->pc = runtime·getcallerpc(&sel); + cas->pc = pc; cas->chan = c; eo = runtime·rnd(sizeof(sel), sizeof(c)); eo = runtime·rnd(eo+sizeof(c), c->elemsize); cas->so = runtime·rnd(eo+c->elemsize, Structrnd); - cas->send = 1; + cas->kind = CaseSend; - ae = (byte*)&sel + eo; + ae = (byte*)selp + eo; c->elemalg->copy(c->elemsize, cas->u.elem, ae); if(debug) - runtime·printf("selectsend s=%p pc=%p chan=%p so=%d send=%d\n", - sel, cas->pc, cas->chan, cas->so, cas->send); + runtime·printf("selectsend s=%p pc=%p chan=%p so=%d\n", + sel, cas->pc, cas->chan, cas->so); } +// cut in half to give stack a chance to split +static void selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool*, int32 so); + // selectrecv(sel *byte, hchan *chan any, elem *any) (selected bool); #pragma textflag 7 void -runtime·selectrecv(Select *sel, Hchan *c, ...) +runtime·selectrecv(Select *sel, Hchan *c, void *elem, bool selected) { - int32 i, eo; - Scase *cas; + // nil cases do not compete + if(c == nil) + return; + + selectrecv(sel, c, runtime·getcallerpc(&sel), elem, nil, (byte*)&selected - (byte*)&sel); +} +// selectrecv2(sel *byte, hchan *chan any, elem *any, received *bool) (selected bool); +#pragma textflag 7 +void +runtime·selectrecv2(Select *sel, Hchan *c, void *elem, bool *received, bool selected) +{ // nil cases do not compete if(c == nil) return; + selectrecv(sel, c, runtime·getcallerpc(&sel), elem, received, (byte*)&selected - (byte*)&sel); +} + +static void +selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool *received, int32 so) +{ + int32 i; + Scase *cas; + i = sel->ncase; if(i >= sel->tcase) runtime·throw("selectrecv: too many cases"); sel->ncase = i+1; cas = runtime·mal(sizeof *cas); sel->scase[i] = cas; - cas->pc = runtime·getcallerpc(&sel); + cas->pc = pc; cas->chan = c; - eo = runtime·rnd(sizeof(sel), sizeof(c)); - eo = runtime·rnd(eo+sizeof(c), sizeof(byte*)); - cas->so = runtime·rnd(eo+sizeof(byte*), Structrnd); - cas->send = 0; - cas->u.elemp = *(byte**)((byte*)&sel + eo); + cas->so = so; + cas->kind = CaseRecv; + cas->u.recv.elemp = elem; + cas->u.recv.receivedp = nil; + cas->u.recv.receivedp = received; if(debug) - runtime·printf("selectrecv s=%p pc=%p chan=%p so=%d send=%d\n", - sel, cas->pc, cas->chan, cas->so, cas->send); + runtime·printf("selectrecv s=%p pc=%p chan=%p so=%d\n", + sel, cas->pc, cas->chan, cas->so); } - -static void selectdefault(Select*, void*); +// cut in half to give stack a chance to split +static void selectdefault(Select*, void*, int32); // selectdefault(sel *byte) (selected bool); #pragma textflag 7 void -runtime·selectdefault(Select *sel, ...) +runtime·selectdefault(Select *sel, bool selected) { - selectdefault(sel, runtime·getcallerpc(&sel)); + selectdefault(sel, runtime·getcallerpc(&sel), (byte*)&selected - (byte*)&sel); } static void -selectdefault(Select *sel, void *callerpc) +selectdefault(Select *sel, void *callerpc, int32 so) { int32 i; Scase *cas; @@ -624,13 +688,12 @@ selectdefault(Select *sel, void *callerpc) cas->pc = callerpc; cas->chan = nil; - cas->so = runtime·rnd(sizeof(sel), Structrnd); - cas->send = 2; - cas->u.elemp = nil; + cas->so = so; + cas->kind = CaseDefault; if(debug) - runtime·printf("selectdefault s=%p pc=%p so=%d send=%d\n", - sel, cas->pc, cas->so, cas->send); + runtime·printf("selectdefault s=%p pc=%p so=%d\n", + sel, cas->pc, cas->so); } static void @@ -747,8 +810,8 @@ loop: cas = sel->scase[o]; c = cas->chan; - switch(cas->send) { - case 0: // recv + switch(cas->kind) { + case CaseRecv: if(c->dataqsiz > 0) { if(c->qcount > 0) goto asyncrecv; @@ -757,12 +820,12 @@ loop: if(sg != nil) goto syncrecv; } - if(c->closed & Wclosed) + if(c->closed) goto rclose; break; - case 1: // send - if(c->closed & Wclosed) + case CaseSend: + if(c->closed) goto sclose; if(c->dataqsiz > 0) { if(c->qcount < c->dataqsiz) @@ -774,7 +837,7 @@ loop: } break; - case 2: // default + case CaseDefault: dfl = cas; break; } @@ -794,12 +857,12 @@ loop: sg = allocsg(c); sg->offset = o; - switch(cas->send) { - case 0: // recv + switch(cas->kind) { + case CaseRecv: enqueue(&c->recvq, sg); break; - case 1: // send + case CaseSend: if(c->dataqsiz == 0) c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem); enqueue(&c->sendq, sg); @@ -821,7 +884,7 @@ loop: if(sg == nil || i != sg->offset) { cas = sel->scase[i]; c = cas->chan; - if(cas->send) + if(cas->kind == CaseSend) dequeueg(&c->sendq, c); else dequeueg(&c->recvq, c); @@ -841,12 +904,14 @@ loop: } if(debug) - runtime·printf("wait-return: sel=%p c=%p cas=%p send=%d o=%d\n", - sel, c, cas, cas->send, o); - - if(!cas->send) { - if(cas->u.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.elemp, sg->elem); + runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d o=%d\n", + sel, c, cas, cas->kind, o); + + if(cas->kind == CaseRecv) { + if(cas->u.recv.receivedp != nil) + *cas->u.recv.receivedp = true; + if(cas->u.recv.elemp != nil) + c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem); c->elemalg->copy(c->elemsize, sg->elem, nil); } @@ -855,10 +920,13 @@ loop: asyncrecv: // can receive from buffer - if(cas->u.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.elemp, c->recvdataq->elem); - c->elemalg->copy(c->elemsize, c->recvdataq->elem, nil); - c->recvdataq = c->recvdataq->link; + if(cas->u.recv.receivedp != nil) + *cas->u.recv.receivedp = true; + if(cas->u.recv.elemp != nil) + c->elemalg->copy(c->elemsize, cas->u.recv.elemp, chanbuf(c, c->recvx)); + c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil); + if(++c->recvx == c->dataqsiz) + c->recvx = 0; c->qcount--; sg = dequeue(&c->sendq, c); if(sg != nil) { @@ -871,8 +939,9 @@ asyncrecv: asyncsend: // can send to buffer if(cas->u.elem != nil) - c->elemalg->copy(c->elemsize, c->senddataq->elem, cas->u.elem); - c->senddataq = c->senddataq->link; + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->u.elem); + if(++c->sendx == c->dataqsiz) + c->sendx = 0; c->qcount++; sg = dequeue(&c->recvq, c); if(sg != nil) { @@ -886,8 +955,10 @@ syncrecv: // can receive from sleeping sender (sg) if(debug) runtime·printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o); - if(cas->u.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.elemp, sg->elem); + if(cas->u.recv.receivedp != nil) + *cas->u.recv.receivedp = true; + if(cas->u.recv.elemp != nil) + c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem); c->elemalg->copy(c->elemsize, sg->elem, nil); gp = sg->g; gp->param = sg; @@ -896,16 +967,17 @@ syncrecv: rclose: // read at end of closed channel - if(cas->u.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.elemp, nil); - c->closed |= Rclosed; + if(cas->u.recv.receivedp != nil) + *cas->u.recv.receivedp = false; + if(cas->u.recv.elemp != nil) + c->elemalg->copy(c->elemsize, cas->u.recv.elemp, nil); goto retc; syncsend: // can send to sleeping receiver (sg) if(debug) runtime·printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o); - if(c->closed & Wclosed) + if(c->closed) goto sclose; c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem); gp = sg->g; @@ -916,7 +988,6 @@ retc: selunlock(sel); // return to pc corresponding to chosen case - pc = cas->pc; as = (byte*)selp + cas->so; freesel(sel); @@ -941,12 +1012,12 @@ runtime·closechan(Hchan *c) runtime·gosched(); runtime·lock(c); - if(c->closed & Wclosed) { + if(c->closed) { runtime·unlock(c); runtime·panicstring("close of closed channel"); } - c->closed |= Wclosed; + c->closed = true; // release all readers for(;;) { @@ -979,12 +1050,6 @@ runtime·chanclose(Hchan *c) runtime·closechan(c); } -bool -runtime·chanclosed(Hchan *c) -{ - return (c->closed & Rclosed) != 0; -} - int32 runtime·chanlen(Hchan *c) { @@ -997,15 +1062,6 @@ runtime·chancap(Hchan *c) return c->dataqsiz; } - -// closedchan(sel *byte) bool; -void -runtime·closedchan(Hchan *c, bool closed) -{ - closed = runtime·chanclosed(c); - FLUSH(&closed); -} - static SudoG* dequeue(WaitQ *q, Hchan *c) { diff --git a/src/pkg/runtime/cpuprof.c b/src/pkg/runtime/cpuprof.c new file mode 100644 index 000000000..6233bcb45 --- /dev/null +++ b/src/pkg/runtime/cpuprof.c @@ -0,0 +1,421 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// CPU profiling. +// Based on algorithms and data structures used in +// http://code.google.com/p/google-perftools/. +// +// The main difference between this code and the google-perftools +// code is that this code is written to allow copying the profile data +// to an arbitrary io.Writer, while the google-perftools code always +// writes to an operating system file. +// +// The signal handler for the profiling clock tick adds a new stack trace +// to a hash table tracking counts for recent traces. Most clock ticks +// hit in the cache. In the event of a cache miss, an entry must be +// evicted from the hash table, copied to a log that will eventually be +// written as profile data. The google-perftools code flushed the +// log itself during the signal handler. This code cannot do that, because +// the io.Writer might block or need system calls or locks that are not +// safe to use from within the signal handler. Instead, we split the log +// into two halves and let the signal handler fill one half while a goroutine +// is writing out the other half. When the signal handler fills its half, it +// offers to swap with the goroutine. If the writer is not done with its half, +// we lose the stack trace for this clock tick (and record that loss). +// The goroutine interacts with the signal handler by calling getprofile() to +// get the next log piece to write, implicitly handing back the last log +// piece it obtained. +// +// The state of this dance between the signal handler and the goroutine +// is encoded in the Profile.handoff field. If handoff == 0, then the goroutine +// is not using either log half and is waiting (or will soon be waiting) for +// a new piece by calling notesleep(&p->wait). If the signal handler +// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) +// to wake the goroutine. The value indicates the number of entries in the +// log half being handed off. The goroutine leaves the non-zero value in +// place until it has finished processing the log half and then flips the number +// back to zero. Setting the high bit in handoff means that the profiling is over, +// and the goroutine is now in charge of flushing the data left in the hash table +// to the log and returning that data. +// +// The handoff field is manipulated using atomic operations. +// For the most part, the manipulation of handoff is orderly: if handoff == 0 +// then the signal handler owns it and can change it to non-zero. +// If handoff != 0 then the goroutine owns it and can change it to zero. +// If that were the end of the story then we would not need to manipulate +// handoff using atomic operations. The operations are needed, however, +// in order to let the log closer set the high bit to indicate "EOF" safely +// in the situation when normally the goroutine "owns" handoff. + +#include "runtime.h" +#include "malloc.h" + +enum +{ + HashSize = 1<<10, + LogSize = 1<<17, + Assoc = 4, + MaxStack = 64, +}; + +typedef struct Profile Profile; +typedef struct Bucket Bucket; +typedef struct Entry Entry; + +struct Entry { + uintptr count; + uintptr depth; + uintptr stack[MaxStack]; +}; + +struct Bucket { + Entry entry[Assoc]; +}; + +struct Profile { + bool on; // profiling is on + Note wait; // goroutine waits here + uintptr count; // tick count + uintptr evicts; // eviction count + uintptr lost; // lost ticks that need to be logged + uintptr totallost; // total lost ticks + + // Active recent stack traces. + Bucket hash[HashSize]; + + // Log of traces evicted from hash. + // Signal handler has filled log[toggle][:nlog]. + // Goroutine is writing log[1-toggle][:handoff]. + uintptr log[2][LogSize/2]; + uintptr nlog; + int32 toggle; + uint32 handoff; + + // Writer state. + // Writer maintains its own toggle to avoid races + // looking at signal handler's toggle. + uint32 wtoggle; + bool wholding; // holding & need to release a log half + bool flushing; // flushing hash table - profile is over +}; + +static Lock lk; +static Profile *prof; + +static void tick(uintptr*, int32); +static void add(Profile*, uintptr*, int32); +static bool evict(Profile*, Entry*); +static bool flushlog(Profile*); + +// LostProfileData is a no-op function used in profiles +// to mark the number of profiling stack traces that were +// discarded due to slow data writers. +static void LostProfileData(void) { +} + +// SetCPUProfileRate sets the CPU profiling rate. +// The user documentation is in debug.go. +void +runtime·SetCPUProfileRate(int32 hz) +{ + uintptr *p; + uintptr n; + + // Clamp hz to something reasonable. + if(hz < 0) + hz = 0; + if(hz > 1000000) + hz = 1000000; + + runtime·lock(&lk); + if(hz > 0) { + if(prof == nil) { + prof = runtime·SysAlloc(sizeof *prof); + if(prof == nil) { + runtime·printf("runtime: cpu profiling cannot allocate memory\n"); + runtime·unlock(&lk); + return; + } + } + if(prof->on || prof->handoff != 0) { + runtime·printf("runtime: cannot set cpu profile rate until previous profile has finished.\n"); + runtime·unlock(&lk); + return; + } + + prof->on = true; + p = prof->log[0]; + // pprof binary header format. + // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 + *p++ = 0; // count for header + *p++ = 3; // depth for header + *p++ = 0; // version number + *p++ = 1000000 / hz; // period (microseconds) + *p++ = 0; + prof->nlog = p - prof->log[0]; + prof->toggle = 0; + prof->wholding = false; + prof->wtoggle = 0; + prof->flushing = false; + runtime·noteclear(&prof->wait); + + runtime·setcpuprofilerate(tick, hz); + } else if(prof->on) { + runtime·setcpuprofilerate(nil, 0); + prof->on = false; + + // Now add is not running anymore, and getprofile owns the entire log. + // Set the high bit in prof->handoff to tell getprofile. + for(;;) { + n = prof->handoff; + if(n&0x80000000) + runtime·printf("runtime: setcpuprofile(off) twice"); + if(runtime·cas(&prof->handoff, n, n|0x80000000)) + break; + } + if(n == 0) { + // we did the transition from 0 -> nonzero so we wake getprofile + runtime·notewakeup(&prof->wait); + } + } + runtime·unlock(&lk); +} + +static void +tick(uintptr *pc, int32 n) +{ + add(prof, pc, n); +} + +// add adds the stack trace to the profile. +// It is called from signal handlers and other limited environments +// and cannot allocate memory or acquire locks that might be +// held at the time of the signal, nor can it use substantial amounts +// of stack. It is allowed to call evict. +static void +add(Profile *p, uintptr *pc, int32 n) +{ + int32 i, j; + uintptr h, x; + Bucket *b; + Entry *e; + + if(n > MaxStack) + n = MaxStack; + + // Compute hash. + h = 0; + for(i=0; i<n; i++) { + h = h<<8 | (h>>(8*(sizeof(h)-1))); + x = pc[i]; + h += x*31 + x*7 + x*3; + } + p->count++; + + // Add to entry count if already present in table. + b = &p->hash[h%HashSize]; + for(i=0; i<Assoc; i++) { + e = &b->entry[i]; + if(e->depth != n) + continue; + for(j=0; j<n; j++) + if(e->stack[j] != pc[j]) + goto ContinueAssoc; + e->count++; + return; + ContinueAssoc:; + } + + // Evict entry with smallest count. + e = &b->entry[0]; + for(i=1; i<Assoc; i++) + if(b->entry[i].count < e->count) + e = &b->entry[i]; + if(e->count > 0) { + if(!evict(p, e)) { + // Could not evict entry. Record lost stack. + p->lost++; + p->totallost++; + return; + } + p->evicts++; + } + + // Reuse the newly evicted entry. + e->depth = n; + e->count = 1; + for(i=0; i<n; i++) + e->stack[i] = pc[i]; +} + +// evict copies the given entry's data into the log, so that +// the entry can be reused. evict is called from add, which +// is called from the profiling signal handler, so it must not +// allocate memory or block. It is safe to call flushLog. +// evict returns true if the entry was copied to the log, +// false if there was no room available. +static bool +evict(Profile *p, Entry *e) +{ + int32 i, d, nslot; + uintptr *log, *q; + + d = e->depth; + nslot = d+2; + log = p->log[p->toggle]; + if(p->nlog+nslot > nelem(p->log[0])) { + if(!flushlog(p)) + return false; + log = p->log[p->toggle]; + } + + q = log+p->nlog; + *q++ = e->count; + *q++ = d; + for(i=0; i<d; i++) + *q++ = e->stack[i]; + p->nlog = q - log; + e->count = 0; + return true; +} + +// flushlog tries to flush the current log and switch to the other one. +// flushlog is called from evict, called from add, called from the signal handler, +// so it cannot allocate memory or block. It can try to swap logs with +// the writing goroutine, as explained in the comment at the top of this file. +static bool +flushlog(Profile *p) +{ + uintptr *log, *q; + + if(!runtime·cas(&p->handoff, 0, p->nlog)) + return false; + runtime·notewakeup(&p->wait); + + p->toggle = 1 - p->toggle; + log = p->log[p->toggle]; + q = log; + if(p->lost > 0) { + *q++ = p->lost; + *q++ = 1; + *q++ = (uintptr)LostProfileData; + } + p->nlog = q - log; + return true; +} + +// getprofile blocks until the next block of profiling data is available +// and returns it as a []byte. It is called from the writing goroutine. +Slice +getprofile(Profile *p) +{ + uint32 i, j, n; + Slice ret; + Bucket *b; + Entry *e; + + ret.array = nil; + ret.len = 0; + ret.cap = 0; + + if(p == nil) + return ret; + + if(p->wholding) { + // Release previous log to signal handling side. + // Loop because we are racing against setprofile(off). + for(;;) { + n = p->handoff; + if(n == 0) { + runtime·printf("runtime: phase error during cpu profile handoff\n"); + return ret; + } + if(n & 0x80000000) { + p->wtoggle = 1 - p->wtoggle; + p->wholding = false; + p->flushing = true; + goto flush; + } + if(runtime·cas(&p->handoff, n, 0)) + break; + } + p->wtoggle = 1 - p->wtoggle; + p->wholding = false; + } + + if(p->flushing) + goto flush; + + if(!p->on && p->handoff == 0) + return ret; + + // Wait for new log. + runtime·entersyscall(); + runtime·notesleep(&p->wait); + runtime·exitsyscall(); + runtime·noteclear(&p->wait); + + n = p->handoff; + if(n == 0) { + runtime·printf("runtime: phase error during cpu profile wait\n"); + return ret; + } + if(n == 0x80000000) { + p->flushing = true; + goto flush; + } + n &= ~0x80000000; + + // Return new log to caller. + p->wholding = true; + + ret.array = (byte*)p->log[p->wtoggle]; + ret.len = n*sizeof(uintptr); + ret.cap = ret.len; + return ret; + +flush: + // In flush mode. + // Add is no longer being called. We own the log. + // Also, p->handoff is non-zero, so flushlog will return false. + // Evict the hash table into the log and return it. + for(i=0; i<HashSize; i++) { + b = &p->hash[i]; + for(j=0; j<Assoc; j++) { + e = &b->entry[j]; + if(e->count > 0 && !evict(p, e)) { + // Filled the log. Stop the loop and return what we've got. + goto breakflush; + } + } + } +breakflush: + + // Return pending log data. + if(p->nlog > 0) { + // Note that we're using toggle now, not wtoggle, + // because we're working on the log directly. + ret.array = (byte*)p->log[p->toggle]; + ret.len = p->nlog*sizeof(uintptr); + ret.cap = ret.len; + p->nlog = 0; + return ret; + } + + // Made it through the table without finding anything to log. + // Finally done. Clean up and return nil. + p->flushing = false; + if(!runtime·cas(&p->handoff, p->handoff, 0)) + runtime·printf("runtime: profile flush racing with something\n"); + return ret; // set to nil at top of function +} + +// CPUProfile returns the next cpu profile block as a []byte. +// The user documentation is in debug.go. +void +runtime·CPUProfile(Slice ret) +{ + ret = getprofile(prof); + FLUSH(&ret); +} diff --git a/src/pkg/runtime/darwin/386/defs.h b/src/pkg/runtime/darwin/386/defs.h index f9d874d85..bb70207fd 100644 --- a/src/pkg/runtime/darwin/386/defs.h +++ b/src/pkg/runtime/darwin/386/defs.h @@ -89,6 +89,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -139,14 +142,14 @@ struct StackT { typedef union Sighandler Sighandler; union Sighandler { - void *__sa_handler; - void *__sa_sigaction; + uint32 __sa_handler; + uint32 __sa_sigaction; }; typedef struct Sigaction Sigaction; struct Sigaction { Sighandler __sigaction_u; - void *sa_tramp; + uint32 sa_tramp; uint32 sa_mask; int32 sa_flags; }; @@ -171,14 +174,26 @@ struct Siginfo { uint32 __pad[7]; }; +typedef struct Timeval Timeval; +struct Timeval { + int32 tv_sec; + int32 tv_usec; +}; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; + typedef struct FPControl FPControl; struct FPControl { - byte pad0[2]; + byte pad_godefs_0[2]; }; typedef struct FPStatus FPStatus; struct FPStatus { - byte pad0[2]; + byte pad_godefs_0[2]; }; typedef struct RegMMST RegMMST; @@ -214,7 +229,7 @@ struct Regs { typedef struct FloatState FloatState; struct FloatState { - int32 fpu_reserved[2]; + uint64 fpu_reserved; FPControl fpu_fcw; FPStatus fpu_fsw; uint8 fpu_ftw; @@ -267,7 +282,7 @@ struct Ucontext { int32 uc_onstack; uint32 uc_sigmask; StackT uc_stack; - Ucontext *uc_link; + uint32 uc_link; uint32 uc_mcsize; Mcontext *uc_mcontext; }; diff --git a/src/pkg/runtime/darwin/386/signal.c b/src/pkg/runtime/darwin/386/signal.c index aeef5de3f..35bbb178b 100644 --- a/src/pkg/runtime/darwin/386/signal.c +++ b/src/pkg/runtime/darwin/386/signal.c @@ -46,6 +46,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) mc = uc->uc_mcontext; r = &mc->ss; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->eip, (uint8*)r->esp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Work around Leopard bug that doesn't set FPE_INTDIV. // Look at instruction to see if it is a divide. @@ -126,31 +131,57 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0U; + sa.sa_tramp = (uintptr)runtime·sigtramp; // runtime·sigtramp's job is to call into real handler + sa.__sigaction_u.__sa_sigaction = (uintptr)fn; + runtime·sigaction(i, &sa, nil); +} + void runtime·initsig(int32 queue) { int32 i; - static Sigaction sa; + void *fn; runtime·siginit(); - sa.sa_flags |= SA_SIGINFO|SA_ONSTACK; - sa.sa_mask = 0xFFFFFFFFU; - sa.sa_tramp = runtime·sigtramp; // runtime·sigtramp's job is to call into real handler for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; - if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) { - sa.__sigaction_u.__sa_sigaction = runtime·sighandler; - } else { - sa.__sigaction_u.__sa_sigaction = runtime·sigignore; - } - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; + if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) + fn = runtime·sighandler; else - sa.sa_flags &= ~SA_RESTART; - runtime·sigaction(i, &sa, nil); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/darwin/386/sys.s b/src/pkg/runtime/darwin/386/sys.s index 9d2caca0a..08eca9d5a 100644 --- a/src/pkg/runtime/darwin/386/sys.s +++ b/src/pkg/runtime/darwin/386/sys.s @@ -45,6 +45,11 @@ TEXT runtime·munmap(SB),7,$0 CALL runtime·notok(SB) RET +TEXT runtime·setitimer(SB),7,$0 + MOVL $83, AX + INT $0x80 + RET + // void gettime(int64 *sec, int32 *usec) TEXT runtime·gettime(SB), 7, $32 LEAL 12(SP), AX // must be non-nil, unused diff --git a/src/pkg/runtime/darwin/amd64/defs.h b/src/pkg/runtime/darwin/amd64/defs.h index 09e595988..90f798e8a 100644 --- a/src/pkg/runtime/darwin/amd64/defs.h +++ b/src/pkg/runtime/darwin/amd64/defs.h @@ -89,6 +89,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -135,19 +138,19 @@ struct StackT { void *ss_sp; uint64 ss_size; int32 ss_flags; - byte pad0[4]; + byte pad_godefs_0[4]; }; typedef union Sighandler Sighandler; union Sighandler { - void *__sa_handler; - void *__sa_sigaction; + uint64 __sa_handler; + uint64 __sa_sigaction; }; typedef struct Sigaction Sigaction; struct Sigaction { Sighandler __sigaction_u; - void *sa_tramp; + uint64 sa_tramp; uint32 sa_mask; int32 sa_flags; }; @@ -172,14 +175,27 @@ struct Siginfo { uint64 __pad[7]; }; +typedef struct Timeval Timeval; +struct Timeval { + int64 tv_sec; + int32 tv_usec; + byte pad_godefs_0[4]; +}; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; + typedef struct FPControl FPControl; struct FPControl { - byte pad0[2]; + byte pad_godefs_0[2]; }; typedef struct FPStatus FPStatus; struct FPStatus { - byte pad0[2]; + byte pad_godefs_0[2]; }; typedef struct RegMMST RegMMST; @@ -220,7 +236,7 @@ struct Regs { typedef struct FloatState FloatState; struct FloatState { - int32 fpu_reserved[2]; + uint64 fpu_reserved; FPControl fpu_fcw; FPStatus fpu_fsw; uint8 fpu_ftw; @@ -274,7 +290,7 @@ struct Mcontext { ExceptionState es; Regs ss; FloatState fs; - byte pad0[4]; + byte pad_godefs_0[4]; }; typedef struct Ucontext Ucontext; @@ -282,7 +298,7 @@ struct Ucontext { int32 uc_onstack; uint32 uc_sigmask; StackT uc_stack; - Ucontext *uc_link; + uint64 uc_link; uint64 uc_mcsize; Mcontext *uc_mcontext; }; diff --git a/src/pkg/runtime/darwin/amd64/signal.c b/src/pkg/runtime/darwin/amd64/signal.c index 402ab33ca..3a99d2308 100644 --- a/src/pkg/runtime/darwin/amd64/signal.c +++ b/src/pkg/runtime/darwin/amd64/signal.c @@ -54,6 +54,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) mc = uc->uc_mcontext; r = &mc->ss; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->rip, (uint8*)r->rsp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Work around Leopard bug that doesn't set FPE_INTDIV. // Look at instruction to see if it is a divide. @@ -136,31 +141,57 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + sa.sa_tramp = (uintptr)runtime·sigtramp; // runtime·sigtramp's job is to call into real handler + sa.__sigaction_u.__sa_sigaction = (uintptr)fn; + runtime·sigaction(i, &sa, nil); +} + void runtime·initsig(int32 queue) { int32 i; - static Sigaction sa; + void *fn; runtime·siginit(); - sa.sa_flags |= SA_SIGINFO|SA_ONSTACK; - sa.sa_mask = 0xFFFFFFFFU; - sa.sa_tramp = runtime·sigtramp; // runtime·sigtramp's job is to call into real handler for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; - if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) { - sa.__sigaction_u.__sa_sigaction = runtime·sighandler; - } else { - sa.__sigaction_u.__sa_sigaction = runtime·sigignore; - } - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; + if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) + fn = runtime·sighandler; else - sa.sa_flags &= ~SA_RESTART; - runtime·sigaction(i, &sa, nil); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/darwin/amd64/sys.s b/src/pkg/runtime/darwin/amd64/sys.s index 4f9e0d77a..39398e065 100644 --- a/src/pkg/runtime/darwin/amd64/sys.s +++ b/src/pkg/runtime/darwin/amd64/sys.s @@ -38,11 +38,19 @@ TEXT runtime·write(SB),7,$0 SYSCALL RET +TEXT runtime·setitimer(SB), 7, $0 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVQ 24(SP), DX + MOVL $(0x2000000+83), AX // syscall entry + SYSCALL + RET + // void gettime(int64 *sec, int32 *usec) TEXT runtime·gettime(SB), 7, $32 MOVQ SP, DI // must be non-nil, unused MOVQ $0, SI - MOVQ $(0x2000000+116), AX + MOVL $(0x2000000+116), AX SYSCALL MOVQ sec+0(FP), DI MOVQ AX, (DI) @@ -138,8 +146,7 @@ TEXT runtime·bsdthread_create(SB),7,$0 MOVQ mm+16(SP), SI // "arg" MOVQ stk+8(SP), DX // stack MOVQ gg+24(SP), R10 // "pthread" -// TODO(rsc): why do we get away with 0 flags here but not on 386? - MOVQ $0, R8 // flags + MOVQ $0x01000000, R8 // flags = PTHREAD_START_CUSTOM MOVQ $0, R9 // paranoia MOVQ $(0x2000000+360), AX // bsdthread_create SYSCALL diff --git a/src/pkg/runtime/darwin/defs.c b/src/pkg/runtime/darwin/defs.c index 1a1cdf880..032a6bcbb 100644 --- a/src/pkg/runtime/darwin/defs.c +++ b/src/pkg/runtime/darwin/defs.c @@ -116,6 +116,10 @@ enum { $SEGV_MAPERR = SEGV_MAPERR, $SEGV_ACCERR = SEGV_ACCERR, + + $ITIMER_REAL = ITIMER_REAL, + $ITIMER_VIRTUAL = ITIMER_VIRTUAL, + $ITIMER_PROF = ITIMER_PROF, }; typedef mach_msg_body_t $MachBody; @@ -130,6 +134,8 @@ typedef struct __sigaction $Sigaction; // used in syscalls // typedef struct sigaction $Sigaction; // used by the C library typedef union sigval $Sigval; typedef siginfo_t $Siginfo; +typedef struct timeval $Timeval; +typedef struct itimerval $Itimerval; typedef struct fp_control $FPControl; typedef struct fp_status $FPStatus; diff --git a/src/pkg/runtime/darwin/os.h b/src/pkg/runtime/darwin/os.h index 35ef4e6d9..339768e51 100644 --- a/src/pkg/runtime/darwin/os.h +++ b/src/pkg/runtime/darwin/os.h @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#define SIG_DFL ((void*)0) +#define SIG_IGN ((void*)1) + int32 runtime·bsdthread_create(void*, M*, G*, void(*)(void)); void runtime·bsdthread_register(void); int32 runtime·mach_msg_trap(MachHeader*, int32, uint32, uint32, uint32, uint32, uint32); @@ -23,3 +26,4 @@ struct StackT; void runtime·sigaltstack(struct StackT*, struct StackT*); void runtime·sigtramp(void); void runtime·sigpanic(void); +void runtime·setitimer(int32, Itimerval*, Itimerval*); diff --git a/src/pkg/runtime/debug.go b/src/pkg/runtime/debug.go index 5117e1a55..6370a57d8 100644 --- a/src/pkg/runtime/debug.go +++ b/src/pkg/runtime/debug.go @@ -4,8 +4,6 @@ package runtime -import "unsafe" - // Breakpoint() executes a breakpoint trap. func Breakpoint() @@ -31,65 +29,6 @@ func Cgocalls() int64 // Goroutines returns the number of goroutines that currently exist. func Goroutines() int32 -type MemStatsType struct { - // General statistics. - // Not locked during update; approximate. - Alloc uint64 // bytes allocated and still in use - TotalAlloc uint64 // bytes allocated (even if freed) - Sys uint64 // bytes obtained from system (should be sum of XxxSys below) - Lookups uint64 // number of pointer lookups - Mallocs uint64 // number of mallocs - Frees uint64 // number of frees - - // Main allocation heap statistics. - HeapAlloc uint64 // bytes allocated and still in use - HeapSys uint64 // bytes obtained from system - HeapIdle uint64 // bytes in idle spans - HeapInuse uint64 // bytes in non-idle span - HeapObjects uint64 // total number of allocated objects - - // Low-level fixed-size structure allocator statistics. - // Inuse is bytes used now. - // Sys is bytes obtained from system. - StackInuse uint64 // bootstrap stacks - StackSys uint64 - MSpanInuse uint64 // mspan structures - MSpanSys uint64 - MCacheInuse uint64 // mcache structures - MCacheSys uint64 - BuckHashSys uint64 // profiling bucket hash table - - // Garbage collector statistics. - NextGC uint64 - PauseTotalNs uint64 - PauseNs [256]uint64 // most recent GC pause times - NumGC uint32 - EnableGC bool - DebugGC bool - - // Per-size allocation statistics. - // Not locked during update; approximate. - // 61 is NumSizeClasses in the C code. - BySize [61]struct { - Size uint32 - Mallocs uint64 - Frees uint64 - } -} - -var sizeof_C_MStats int // filled in by malloc.goc - -func init() { - if sizeof_C_MStats != unsafe.Sizeof(MemStats) { - println(sizeof_C_MStats, unsafe.Sizeof(MemStats)) - panic("MStats vs MemStatsType size mismatch") - } -} - -// MemStats holds statistics about the memory system. -// The statistics are only approximate, as they are not interlocked on update. -var MemStats MemStatsType - // Alloc allocates a block of the given size. // FOR TESTING AND DEBUGGING ONLY. func Alloc(uintptr) *byte @@ -102,9 +41,6 @@ func Free(*byte) // FOR TESTING AND DEBUGGING ONLY. func Lookup(*byte) (*byte, uintptr) -// GC runs a garbage collection. -func GC() - // MemProfileRate controls the fraction of memory allocations // that are recorded and reported in the memory profile. // The profiler aims to sample an average of @@ -156,4 +92,24 @@ func (r *MemProfileRecord) Stack() []uintptr { // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes. // These are sites where memory was allocated, but it has all // been released back to the runtime. +// Most clients should use the runtime/pprof package or +// the testing package's -test.memprofile flag instead +// of calling MemProfile directly. func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) + +// CPUProfile returns the next chunk of binary CPU profiling stack trace data, +// blocking until data is available. If profiling is turned off and all the profile +// data accumulated while it was on has been returned, CPUProfile returns nil. +// The caller must save the returned data before calling CPUProfile again. +// Most clients should use the runtime/pprof package or +// the testing package's -test.cpuprofile flag instead of calling +// CPUProfile directly. +func CPUProfile() []byte + +// SetCPUProfileRate sets the CPU profiling rate to hz samples per second. +// If hz <= 0, SetCPUProfileRate turns off profiling. +// If the profiler is on, the rate cannot be changed without first turning it off. +// Most clients should use the runtime/pprof package or +// the testing package's -test.cpuprofile flag instead of calling +// SetCPUProfileRate directly. +func SetCPUProfileRate(hz int) diff --git a/src/pkg/runtime/freebsd/386/defs.h b/src/pkg/runtime/freebsd/386/defs.h index 128be9cc9..ae12b2019 100644 --- a/src/pkg/runtime/freebsd/386/defs.h +++ b/src/pkg/runtime/freebsd/386/defs.h @@ -61,6 +61,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -154,7 +157,9 @@ struct Mcontext { int32 mc_ownedfp; int32 mc_spare1[1]; int32 mc_fpstate[128]; - int32 mc_spare2[8]; + int32 mc_fsbase; + int32 mc_gsbase; + int32 mc_spare2[6]; }; typedef struct Ucontext Ucontext; @@ -165,6 +170,18 @@ struct Ucontext { StackT uc_stack; int32 uc_flags; int32 __spare__[4]; - byte pad0[12]; + byte pad_godefs_0[12]; +}; + +typedef struct Timeval Timeval; +struct Timeval { + int32 tv_sec; + int32 tv_usec; +}; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; }; #pragma pack off diff --git a/src/pkg/runtime/freebsd/386/signal.c b/src/pkg/runtime/freebsd/386/signal.c index 8e9d74256..1ae2554eb 100644 --- a/src/pkg/runtime/freebsd/386/signal.c +++ b/src/pkg/runtime/freebsd/386/signal.c @@ -54,6 +54,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) uc = context; r = &uc->uc_mcontext; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->mc_eip, (uint8*)r->mc_esp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -122,32 +127,58 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + if (fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.__sigaction_u.__sa_sigaction = (void*)fn; + runtime·sigaction(i, &sa, nil); +} + void runtime·initsig(int32 queue) { - static Sigaction sa; + int32 i; + void *fn; runtime·siginit(); - int32 i; - sa.sa_flags |= SA_ONSTACK | SA_SIGINFO; - sa.sa_mask = ~0x0ull; - - for(i = 0; i < NSIG; i++) { + for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) - sa.__sigaction_u.__sa_sigaction = (void*) runtime·sigtramp; + fn = runtime·sighandler; else - sa.__sigaction_u.__sa_sigaction = (void*) runtime·sigignore; - - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; - else - sa.sa_flags &= ~SA_RESTART; - - runtime·sigaction(i, &sa, nil); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/freebsd/386/sys.s b/src/pkg/runtime/freebsd/386/sys.s index 60c189bf8..c4715b668 100644 --- a/src/pkg/runtime/freebsd/386/sys.s +++ b/src/pkg/runtime/freebsd/386/sys.s @@ -87,6 +87,11 @@ TEXT runtime·munmap(SB),7,$-4 CALL runtime·notok(SB) RET +TEXT runtime·setitimer(SB), 7, $-4 + MOVL $83, AX + INT $0x80 + RET + TEXT runtime·gettime(SB), 7, $32 MOVL $116, AX LEAL 12(SP), BX diff --git a/src/pkg/runtime/freebsd/amd64/defs.h b/src/pkg/runtime/freebsd/amd64/defs.h index 2a295a479..b101b1932 100644 --- a/src/pkg/runtime/freebsd/amd64/defs.h +++ b/src/pkg/runtime/freebsd/amd64/defs.h @@ -61,6 +61,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -83,7 +86,7 @@ struct ThrParam { int64 *child_tid; int64 *parent_tid; int32 flags; - byte pad0[4]; + byte pad_godefs_0[4]; Rtprio *rtp; void* spare[3]; }; @@ -93,7 +96,7 @@ struct Sigaltstack { int8 *ss_sp; uint64 ss_size; int32 ss_flags; - byte pad0[4]; + byte pad_godefs_0[4]; }; typedef struct Sigset Sigset; @@ -114,7 +117,7 @@ struct StackT { int8 *ss_sp; uint64 ss_size; int32 ss_flags; - byte pad0[4]; + byte pad_godefs_0[4]; }; typedef struct Siginfo Siginfo; @@ -178,6 +181,18 @@ struct Ucontext { StackT uc_stack; int32 uc_flags; int32 __spare__[4]; - byte pad0[12]; + byte pad_godefs_0[12]; +}; + +typedef struct Timeval Timeval; +struct Timeval { + int64 tv_sec; + int64 tv_usec; +}; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; }; #pragma pack off diff --git a/src/pkg/runtime/freebsd/amd64/signal.c b/src/pkg/runtime/freebsd/amd64/signal.c index f145371b4..9d8e5e692 100644 --- a/src/pkg/runtime/freebsd/amd64/signal.c +++ b/src/pkg/runtime/freebsd/amd64/signal.c @@ -62,6 +62,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) uc = context; r = &uc->uc_mcontext; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->mc_rip, (uint8*)r->mc_rsp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -130,32 +135,58 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + if (fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.__sigaction_u.__sa_sigaction = (void*)fn; + runtime·sigaction(i, &sa, nil); +} + void runtime·initsig(int32 queue) { - static Sigaction sa; + int32 i; + void *fn; runtime·siginit(); - int32 i; - sa.sa_flags |= SA_ONSTACK | SA_SIGINFO; - sa.sa_mask = ~0x0ull; - - for(i = 0; i < NSIG; i++) { + for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) - sa.__sigaction_u.__sa_sigaction = (void*) runtime·sigtramp; + fn = runtime·sighandler; else - sa.__sigaction_u.__sa_sigaction = (void*) runtime·sigignore; - - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; - else - sa.sa_flags &= ~SA_RESTART; - - runtime·sigaction(i, &sa, nil); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/freebsd/amd64/sys.s b/src/pkg/runtime/freebsd/amd64/sys.s index d986e9ac0..9a6fdf1ac 100644 --- a/src/pkg/runtime/freebsd/amd64/sys.s +++ b/src/pkg/runtime/freebsd/amd64/sys.s @@ -65,6 +65,14 @@ TEXT runtime·write(SB),7,$-8 SYSCALL RET +TEXT runtime·setitimer(SB), 7, $-8 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVQ 24(SP), DX + MOVL $83, AX + SYSCALL + RET + TEXT runtime·gettime(SB), 7, $32 MOVL $116, AX LEAQ 8(SP), DI diff --git a/src/pkg/runtime/freebsd/defs.c b/src/pkg/runtime/freebsd/defs.c index 32a80f475..2ce4fdc51 100644 --- a/src/pkg/runtime/freebsd/defs.c +++ b/src/pkg/runtime/freebsd/defs.c @@ -19,6 +19,7 @@ #include <sys/rtprio.h> #include <sys/thr.h> #include <sys/_sigset.h> +#include <sys/unistd.h> enum { $PROT_NONE = PROT_NONE, @@ -86,6 +87,10 @@ enum { $SEGV_MAPERR = SEGV_MAPERR, $SEGV_ACCERR = SEGV_ACCERR, + + $ITIMER_REAL = ITIMER_REAL, + $ITIMER_VIRTUAL = ITIMER_VIRTUAL, + $ITIMER_PROF = ITIMER_PROF, }; typedef struct rtprio $Rtprio; @@ -99,3 +104,5 @@ typedef siginfo_t $Siginfo; typedef mcontext_t $Mcontext; typedef ucontext_t $Ucontext; +typedef struct timeval $Timeval; +typedef struct itimerval $Itimerval; diff --git a/src/pkg/runtime/freebsd/mem.c b/src/pkg/runtime/freebsd/mem.c index f5bbfa6fa..f80439e38 100644 --- a/src/pkg/runtime/freebsd/mem.c +++ b/src/pkg/runtime/freebsd/mem.c @@ -53,7 +53,7 @@ runtime·SysMap(void *v, uintptr n) if(sizeof(void*) == 8) { p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); if(p != v) { - runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); + runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p); runtime·throw("runtime: address space conflict"); } return; diff --git a/src/pkg/runtime/freebsd/os.h b/src/pkg/runtime/freebsd/os.h index 455355bc7..13754688b 100644 --- a/src/pkg/runtime/freebsd/os.h +++ b/src/pkg/runtime/freebsd/os.h @@ -1,5 +1,10 @@ +#define SIG_DFL ((void*)0) +#define SIG_IGN ((void*)1) + int32 runtime·thr_new(ThrParam*, int32); void runtime·sigpanic(void); void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); struct sigaction; void runtime·sigaction(int32, struct sigaction*, struct sigaction*); +void runtiem·setitimerval(int32, Itimerval*, Itimerval*); +void runtime·setitimer(int32, Itimerval*, Itimerval*); diff --git a/src/pkg/runtime/linux/386/defs.h b/src/pkg/runtime/linux/386/defs.h index c1f58b2a0..6ae1c4e13 100644 --- a/src/pkg/runtime/linux/386/defs.h +++ b/src/pkg/runtime/linux/386/defs.h @@ -58,6 +58,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -98,7 +101,8 @@ struct Fpstate { uint32 reserved; Fpxreg _fxsr_st[8]; Xmmreg _xmm[8]; - uint32 padding[56]; + uint32 padding1[44]; + byte Pad_godefs_0[48]; }; typedef struct Timespec Timespec; @@ -176,4 +180,10 @@ struct Ucontext { Sigcontext uc_mcontext; uint32 uc_sigmask; }; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; #pragma pack off diff --git a/src/pkg/runtime/linux/386/signal.c b/src/pkg/runtime/linux/386/signal.c index bd918c7ea..9b72ecbae 100644 --- a/src/pkg/runtime/linux/386/signal.c +++ b/src/pkg/runtime/linux/386/signal.c @@ -51,6 +51,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) uc = context; r = &uc->uc_mcontext; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->eip, (uint8*)r->esp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -114,30 +119,59 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + sa.sa_restorer = (void*)runtime·sigreturn; + if(fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.k_sa_handler = fn; + runtime·rt_sigaction(i, &sa, nil, 8); +} + void runtime·initsig(int32 queue) { - static Sigaction sa; + int32 i; + void *fn; runtime·siginit(); - int32 i; - sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; - sa.sa_mask = 0xFFFFFFFFFFFFFFFFULL; - sa.sa_restorer = (void*)runtime·sigreturn; for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) - sa.k_sa_handler = (void*)runtime·sigtramp; + fn = runtime·sighandler; else - sa.k_sa_handler = (void*)runtime·sigignore; - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; - else - sa.sa_flags &= ~SA_RESTART; - runtime·rt_sigaction(i, &sa, nil, 8); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/linux/386/sys.s b/src/pkg/runtime/linux/386/sys.s index a684371be..c39ce253f 100644 --- a/src/pkg/runtime/linux/386/sys.s +++ b/src/pkg/runtime/linux/386/sys.s @@ -30,6 +30,15 @@ TEXT runtime·write(SB),7,$0 INT $0x80 RET + +TEXT runtime·setitimer(SB),7,$0-24 + MOVL $104, AX // syscall - setitimer + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + INT $0x80 + RET + TEXT runtime·gettime(SB), 7, $32 MOVL $78, AX // syscall - gettimeofday LEAL 8(SP), BX diff --git a/src/pkg/runtime/linux/amd64/defs.h b/src/pkg/runtime/linux/amd64/defs.h index 3e3d32f0d..70d63145c 100644 --- a/src/pkg/runtime/linux/amd64/defs.h +++ b/src/pkg/runtime/linux/amd64/defs.h @@ -58,6 +58,9 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; // Types @@ -88,9 +91,15 @@ struct Siginfo { int32 si_signo; int32 si_errno; int32 si_code; - byte pad0[4]; + byte pad_godefs_0[4]; byte _sifields[112]; }; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; #pragma pack off // godefs -f -m64 defs1.c @@ -170,7 +179,7 @@ typedef struct Sigaltstack Sigaltstack; struct Sigaltstack { void *ss_sp; int32 ss_flags; - byte pad0[4]; + byte pad_godefs_0[4]; uint64 ss_size; }; diff --git a/src/pkg/runtime/linux/amd64/signal.c b/src/pkg/runtime/linux/amd64/signal.c index ea0932523..1db9c95e5 100644 --- a/src/pkg/runtime/linux/amd64/signal.c +++ b/src/pkg/runtime/linux/amd64/signal.c @@ -61,6 +61,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) mc = &uc->uc_mcontext; r = (Sigcontext*)mc; // same layout, more conveient names + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->rip, (uint8*)r->rsp, nil, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -124,30 +129,59 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + sa.sa_restorer = (void*)runtime·sigreturn; + if(fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.sa_handler = fn; + runtime·rt_sigaction(i, &sa, nil, 8); +} + void runtime·initsig(int32 queue) { - static Sigaction sa; + int32 i; + void *fn; runtime·siginit(); - int32 i; - sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; - sa.sa_mask = 0xFFFFFFFFFFFFFFFFULL; - sa.sa_restorer = (void*)runtime·sigreturn; for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) - sa.sa_handler = (void*)runtime·sigtramp; + fn = runtime·sighandler; else - sa.sa_handler = (void*)runtime·sigignore; - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; - else - sa.sa_flags &= ~SA_RESTART; - runtime·rt_sigaction(i, &sa, nil, 8); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/linux/amd64/sys.s b/src/pkg/runtime/linux/amd64/sys.s index 1bf734dc0..11df1f894 100644 --- a/src/pkg/runtime/linux/amd64/sys.s +++ b/src/pkg/runtime/linux/amd64/sys.s @@ -36,6 +36,14 @@ TEXT runtime·write(SB),7,$0-24 SYSCALL RET +TEXT runtime·setitimer(SB),7,$0-24 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVQ 24(SP), DX + MOVL $38, AX // syscall entry + SYSCALL + RET + TEXT runtime·gettime(SB), 7, $32 LEAQ 8(SP), DI MOVQ $0, SI diff --git a/src/pkg/runtime/linux/arm/defs.h b/src/pkg/runtime/linux/arm/defs.h index ff43d689a..6b2f22c66 100644 --- a/src/pkg/runtime/linux/arm/defs.h +++ b/src/pkg/runtime/linux/arm/defs.h @@ -1,4 +1,4 @@ -// godefs -carm-gcc -f -I/usr/local/google/src/linux-2.6.28/arch/arm/include -f -I/usr/local/google/src/linux-2.6.28/include -f-D__KERNEL__ -f-D__ARCH_SI_UID_T=int defs_arm.c +// godefs -f-I/usr/src/linux-headers-2.6.26-2-versatile/include defs_arm.c // MACHINE GENERATED - DO NOT EDIT. @@ -58,23 +58,15 @@ enum { BUS_OBJERR = 0x3, SEGV_MAPERR = 0x1, SEGV_ACCERR = 0x2, + ITIMER_REAL = 0, + ITIMER_PROF = 0x2, + ITIMER_VIRTUAL = 0x1, }; // Types #pragma pack on -typedef struct Sigset Sigset; -struct Sigset { - uint32 sig[2]; -}; - -typedef struct Sigaction Sigaction; -struct Sigaction { - void *sa_handler; - uint32 sa_flags; - void *sa_restorer; - Sigset sa_mask; -}; +typedef uint32 Sigset; typedef struct Timespec Timespec; struct Timespec { @@ -120,11 +112,23 @@ struct Ucontext { Ucontext *uc_link; Sigaltstack uc_stack; Sigcontext uc_mcontext; - Sigset uc_sigmask; - int32 __unused[30]; + uint32 uc_sigmask; + int32 __unused[31]; uint32 uc_regspace[128]; }; +typedef struct Timeval Timeval; +struct Timeval { + int32 tv_sec; + int32 tv_usec; +}; + +typedef struct Itimerval Itimerval; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; + typedef struct Siginfo Siginfo; struct Siginfo { int32 si_signo; @@ -132,4 +136,12 @@ struct Siginfo { int32 si_code; uint8 _sifields[4]; }; + +typedef struct Sigaction Sigaction; +struct Sigaction { + void *sa_handler; + uint32 sa_flags; + void *sa_restorer; + uint32 sa_mask; +}; #pragma pack off diff --git a/src/pkg/runtime/linux/arm/signal.c b/src/pkg/runtime/linux/arm/signal.c index 843c40b68..05c6b0261 100644 --- a/src/pkg/runtime/linux/arm/signal.c +++ b/src/pkg/runtime/linux/arm/signal.c @@ -58,6 +58,11 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) uc = context; r = &uc->uc_mcontext; + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->arm_pc, (uint8*)r->arm_sp, (uint8*)r->arm_lr, gp); + return; + } + if(gp != nil && (runtime·sigtab[sig].flags & SigPanic)) { // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -119,31 +124,59 @@ runtime·signalstack(byte *p, int32 n) runtime·sigaltstack(&st, nil); } +static void +sigaction(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask = ~0ULL; + sa.sa_restorer = (void*)runtime·sigreturn; + if(fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.sa_handler = fn; + runtime·rt_sigaction(i, &sa, nil, 8); +} + void runtime·initsig(int32 queue) { - static Sigaction sa; + int32 i; + void *fn; runtime·siginit(); - int32 i; - sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; - sa.sa_mask.sig[0] = 0xFFFFFFFF; - sa.sa_mask.sig[1] = 0xFFFFFFFF; - sa.sa_restorer = (void*)runtime·sigreturn; for(i = 0; i<NSIG; i++) { if(runtime·sigtab[i].flags) { if((runtime·sigtab[i].flags & SigQueue) != queue) continue; if(runtime·sigtab[i].flags & (SigCatch | SigQueue)) - sa.sa_handler = (void*)runtime·sigtramp; + fn = runtime·sighandler; else - sa.sa_handler = (void*)runtime·sigignore; - if(runtime·sigtab[i].flags & SigRestart) - sa.sa_flags |= SA_RESTART; - else - sa.sa_flags &= ~SA_RESTART; - runtime·rt_sigaction(i, &sa, nil, 8); + fn = runtime·sigignore; + sigaction(i, fn, (runtime·sigtab[i].flags & SigRestart) != 0); } } } + +void +runtime·resetcpuprofiler(int32 hz) +{ + Itimerval it; + + runtime·memclr((byte*)&it, sizeof it); + if(hz == 0) { + runtime·setitimer(ITIMER_PROF, &it, nil); + sigaction(SIGPROF, SIG_IGN, true); + } else { + sigaction(SIGPROF, runtime·sighandler, true); + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1000000 / hz; + it.it_value = it.it_interval; + runtime·setitimer(ITIMER_PROF, &it, nil); + } + m->profilehz = hz; +} diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s index 9daf9c2e4..b9767a028 100644 --- a/src/pkg/runtime/linux/arm/sys.s +++ b/src/pkg/runtime/linux/arm/sys.s @@ -26,6 +26,7 @@ #define SYS_futex (SYS_BASE + 240) #define SYS_exit_group (SYS_BASE + 248) #define SYS_munmap (SYS_BASE + 91) +#define SYS_setitimer (SYS_BASE + 104) #define ARM_BASE (SYS_BASE + 0x0f0000) #define SYS_ARM_cacheflush (ARM_BASE + 2) @@ -72,6 +73,14 @@ TEXT runtime·munmap(SB),7,$0 SWI $0 RET +TEXT runtime·setitimer(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW $SYS_setitimer, R7 + SWI $0 + RET + TEXT runtime·gettime(SB),7,$32 /* dummy version - return 0,0 */ MOVW $0, R1 diff --git a/src/pkg/runtime/linux/defs.c b/src/pkg/runtime/linux/defs.c index 2044fd60c..5dda78789 100644 --- a/src/pkg/runtime/linux/defs.c +++ b/src/pkg/runtime/linux/defs.c @@ -15,6 +15,8 @@ // headers for things like ucontext_t, so that happens in // a separate file, defs1.c. +#include <asm/posix_types.h> +#define size_t __kernel_size_t #include <asm/signal.h> #include <asm/siginfo.h> #include <asm/mman.h> @@ -80,9 +82,14 @@ enum { $SEGV_MAPERR = SEGV_MAPERR, $SEGV_ACCERR = SEGV_ACCERR, + + $ITIMER_REAL = ITIMER_REAL, + $ITIMER_VIRTUAL = ITIMER_VIRTUAL, + $ITIMER_PROF = ITIMER_PROF, }; typedef struct timespec $Timespec; typedef struct timeval $Timeval; typedef struct sigaction $Sigaction; typedef siginfo_t $Siginfo; +typedef struct itimerval $Itimerval; diff --git a/src/pkg/runtime/linux/defs2.c b/src/pkg/runtime/linux/defs2.c index 3c0b110fc..ff641fff2 100644 --- a/src/pkg/runtime/linux/defs2.c +++ b/src/pkg/runtime/linux/defs2.c @@ -8,7 +8,7 @@ -f -I/home/rsc/pub/linux-2.6/arch/x86/include \ -f -I/home/rsc/pub/linux-2.6/include \ -f -D_LOOSE_KERNEL_NAMES \ - -f -D__ARCH_SI_UID_T=__kernel_uid32_t \ + -f -D__ARCH_SI_UID_T'='__kernel_uid32_t \ defs2.c >386/defs.h * The asm header tricks we have to use for Linux on amd64 @@ -100,6 +100,10 @@ enum { $SEGV_MAPERR = SEGV_MAPERR, $SEGV_ACCERR = SEGV_ACCERR, + + $ITIMER_REAL = ITIMER_REAL, + $ITIMER_VIRTUAL = ITIMER_VIRTUAL, + $ITIMER_PROF = ITIMER_PROF, }; typedef struct _fpreg $Fpreg; @@ -113,4 +117,4 @@ typedef siginfo_t $Siginfo; typedef struct sigaltstack $Sigaltstack; typedef struct sigcontext $Sigcontext; typedef struct ucontext $Ucontext; - +typedef struct itimerval $Itimerval; diff --git a/src/pkg/runtime/linux/defs_arm.c b/src/pkg/runtime/linux/defs_arm.c index a5897d6d0..1f935046e 100644 --- a/src/pkg/runtime/linux/defs_arm.c +++ b/src/pkg/runtime/linux/defs_arm.c @@ -4,16 +4,18 @@ /* * Input to godefs - godefs -carm-gcc -f -I/usr/local/google/src/linux-2.6.28/arch/arm/include -f -I/usr/local/google/src/linux-2.6.28/include -f-D__KERNEL__ -f-D__ARCH_SI_UID_T=int defs_arm.c >arm/defs.h - - * Another input file for ARM defs.h + * On a Debian Lenny arm linux distribution: + godefs -f-I/usr/src/linux-headers-2.6.26-2-versatile/include defs_arm.c */ +#define __ARCH_SI_UID_T int + #include <asm/signal.h> #include <asm/mman.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <asm/siginfo.h> +#include <linux/time.h> /* #include <sys/signal.h> @@ -21,8 +23,6 @@ #include <ucontext.h> */ -#include <time.h> - enum { $PROT_NONE = PROT_NONE, $PROT_READ = PROT_READ, @@ -84,14 +84,19 @@ enum { $SEGV_MAPERR = SEGV_MAPERR & 0xFFFF, $SEGV_ACCERR = SEGV_ACCERR & 0xFFFF, + + $ITIMER_REAL = ITIMER_REAL, + $ITIMER_PROF = ITIMER_PROF, + $ITIMER_VIRTUAL = ITIMER_VIRTUAL, }; typedef sigset_t $Sigset; -typedef struct sigaction $Sigaction; typedef struct timespec $Timespec; typedef struct sigaltstack $Sigaltstack; typedef struct sigcontext $Sigcontext; typedef struct ucontext $Ucontext; +typedef struct timeval $Timeval; +typedef struct itimerval $Itimerval; struct xsiginfo { int si_signo; @@ -101,3 +106,17 @@ struct xsiginfo { }; typedef struct xsiginfo $Siginfo; + +#undef sa_handler +#undef sa_flags +#undef sa_restorer +#undef sa_mask + +struct xsigaction { + void (*sa_handler)(void); + unsigned long sa_flags; + void (*sa_restorer)(void); + unsigned int sa_mask; /* mask last for extensibility */ +}; + +typedef struct xsigaction $Sigaction; diff --git a/src/pkg/runtime/linux/mem.c b/src/pkg/runtime/linux/mem.c index 633ad0c62..d2f6f8204 100644 --- a/src/pkg/runtime/linux/mem.c +++ b/src/pkg/runtime/linux/mem.c @@ -59,7 +59,7 @@ runtime·SysMap(void *v, uintptr n) if(sizeof(void*) == 8) { p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); if(p != v) { - runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); + runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p); runtime·throw("runtime: address space conflict"); } return; diff --git a/src/pkg/runtime/linux/os.h b/src/pkg/runtime/linux/os.h index 772ade7da..6ae088977 100644 --- a/src/pkg/runtime/linux/os.h +++ b/src/pkg/runtime/linux/os.h @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#define SIG_DFL ((void*)0) +#define SIG_IGN ((void*)1) + // Linux-specific system calls int32 runtime·futex(uint32*, int32, uint32, Timespec*, uint32*, uint32); int32 runtime·clone(int32, void*, M*, G*, void(*)(void)); @@ -11,3 +14,4 @@ void runtime·rt_sigaction(uintptr, struct Sigaction*, void*, uintptr); void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); void runtime·sigpanic(void); +void runtime·setitimer(int32, Itimerval*, Itimerval*); diff --git a/src/pkg/runtime/linux/signals.h b/src/pkg/runtime/linux/signals.h index 1fc5f8c87..919b80ea2 100644 --- a/src/pkg/runtime/linux/signals.h +++ b/src/pkg/runtime/linux/signals.h @@ -13,7 +13,7 @@ SigTab runtime·sigtab[] = { /* 1 */ Q+R, "SIGHUP: terminal line hangup", /* 2 */ Q+R, "SIGINT: interrupt", /* 3 */ C, "SIGQUIT: quit", - /* 4 */ C, "SIGILL: illegal instruction", + /* 4 */ C+P, "SIGILL: illegal instruction", /* 5 */ C, "SIGTRAP: trace trap", /* 6 */ C, "SIGABRT: abort", /* 7 */ C+P, "SIGBUS: bus error", diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go new file mode 100644 index 000000000..fe505a329 --- /dev/null +++ b/src/pkg/runtime/mem.go @@ -0,0 +1,69 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +type MemStatsType struct { + // General statistics. + // Not locked during update; approximate. + Alloc uint64 // bytes allocated and still in use + TotalAlloc uint64 // bytes allocated (even if freed) + Sys uint64 // bytes obtained from system (should be sum of XxxSys below) + Lookups uint64 // number of pointer lookups + Mallocs uint64 // number of mallocs + Frees uint64 // number of frees + + // Main allocation heap statistics. + HeapAlloc uint64 // bytes allocated and still in use + HeapSys uint64 // bytes obtained from system + HeapIdle uint64 // bytes in idle spans + HeapInuse uint64 // bytes in non-idle span + HeapObjects uint64 // total number of allocated objects + + // Low-level fixed-size structure allocator statistics. + // Inuse is bytes used now. + // Sys is bytes obtained from system. + StackInuse uint64 // bootstrap stacks + StackSys uint64 + MSpanInuse uint64 // mspan structures + MSpanSys uint64 + MCacheInuse uint64 // mcache structures + MCacheSys uint64 + BuckHashSys uint64 // profiling bucket hash table + + // Garbage collector statistics. + NextGC uint64 + PauseTotalNs uint64 + PauseNs [256]uint64 // most recent GC pause times + NumGC uint32 + EnableGC bool + DebugGC bool + + // Per-size allocation statistics. + // Not locked during update; approximate. + // 61 is NumSizeClasses in the C code. + BySize [61]struct { + Size uint32 + Mallocs uint64 + Frees uint64 + } +} + +var sizeof_C_MStats int // filled in by malloc.goc + +func init() { + if sizeof_C_MStats != unsafe.Sizeof(MemStats) { + println(sizeof_C_MStats, unsafe.Sizeof(MemStats)) + panic("MStats vs MemStatsType size mismatch") + } +} + +// MemStats holds statistics about the memory system. +// The statistics are only approximate, as they are not interlocked on update. +var MemStats MemStatsType + +// GC runs a garbage collection. +func GC() diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index 7c175b308..14d485b71 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -53,7 +53,6 @@ enum { static uint64 nlookup; static uint64 nsizelookup; static uint64 naddrlookup; -static uint64 nhandoff; static int32 gctrace; typedef struct Workbuf Workbuf; @@ -71,10 +70,8 @@ extern byte end[]; static G *fing; static Finalizer *finq; static int32 fingwait; -static uint32 nfullwait; static void runfinq(void); -static bool bitlookup(void*, uintptr**, uintptr*, int32*); static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); @@ -379,8 +376,6 @@ mark(void) case Gdead: break; case Grunning: - case Grecovery: - case Gstackalloc: if(gp != g) runtime·throw("mark - world not stopped"); scanstack(gp); diff --git a/src/pkg/runtime/plan9/386/signal.c b/src/pkg/runtime/plan9/386/signal.c index 6bde09846..364fd1c41 100644 --- a/src/pkg/runtime/plan9/386/signal.c +++ b/src/pkg/runtime/plan9/386/signal.c @@ -14,3 +14,11 @@ runtime·signame(int32) { return runtime·emptystring; } + +void +runtime·resetcpuprofiler(int32 hz) +{ + // TODO: Enable profiling interrupts. + + m->profilehz = hz; +} diff --git a/src/pkg/runtime/pprof/pprof.go b/src/pkg/runtime/pprof/pprof.go index 9bee51128..fdeceb4e8 100644 --- a/src/pkg/runtime/pprof/pprof.go +++ b/src/pkg/runtime/pprof/pprof.go @@ -14,6 +14,7 @@ import ( "io" "os" "runtime" + "sync" ) // WriteHeapProfile writes a pprof-formatted heap profile to w. @@ -105,3 +106,71 @@ func WriteHeapProfile(w io.Writer) os.Error { } return b.Flush() } + +var cpu struct { + sync.Mutex + profiling bool + done chan bool +} + +// StartCPUProfile enables CPU profiling for the current process. +// While profiling, the profile will be buffered and written to w. +// StartCPUProfile returns an error if profiling is already enabled. +func StartCPUProfile(w io.Writer) os.Error { + // The runtime routines allow a variable profiling rate, + // but in practice operating systems cannot trigger signals + // at more than about 500 Hz, and our processing of the + // signal is not cheap (mostly getting the stack trace). + // 100 Hz is a reasonable choice: it is frequent enough to + // produce useful data, rare enough not to bog down the + // system, and a nice round number to make it easy to + // convert sample counts to seconds. Instead of requiring + // each client to specify the frequency, we hard code it. + const hz = 100 + + // Avoid queueing behind StopCPUProfile. + // Could use TryLock instead if we had it. + if cpu.profiling { + return fmt.Errorf("cpu profiling already in use") + } + + cpu.Lock() + defer cpu.Unlock() + if cpu.done == nil { + cpu.done = make(chan bool) + } + // Double-check. + if cpu.profiling { + return fmt.Errorf("cpu profiling already in use") + } + cpu.profiling = true + runtime.SetCPUProfileRate(hz) + go profileWriter(w) + return nil +} + +func profileWriter(w io.Writer) { + for { + data := runtime.CPUProfile() + if data == nil { + break + } + w.Write(data) + } + cpu.done <- true +} + +// StopCPUProfile stops the current CPU profile, if any. +// StopCPUProfile only returns after all the writes for the +// profile have completed. +func StopCPUProfile() { + cpu.Lock() + defer cpu.Unlock() + + if !cpu.profiling { + return + } + cpu.profiling = false + runtime.SetCPUProfileRate(0) + <-cpu.done +} diff --git a/src/pkg/runtime/pprof/pprof_test.go b/src/pkg/runtime/pprof/pprof_test.go new file mode 100644 index 000000000..a060917a2 --- /dev/null +++ b/src/pkg/runtime/pprof/pprof_test.go @@ -0,0 +1,77 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pprof_test + +import ( + "bytes" + "hash/crc32" + "runtime" + . "runtime/pprof" + "strings" + "testing" + "unsafe" +) + +func TestCPUProfile(t *testing.T) { + switch runtime.GOOS { + case "darwin": + // see Apple Bug Report #9177434 (copied into change description) + return + case "plan9": + // unimplemented + return + case "windows": + // unimplemented + return + } + + buf := make([]byte, 100000) + var prof bytes.Buffer + if err := StartCPUProfile(&prof); err != nil { + t.Fatal(err) + } + // This loop takes about a quarter second on a 2 GHz laptop. + // We only need to get one 100 Hz clock tick, so we've got + // a 25x safety buffer. + for i := 0; i < 1000; i++ { + crc32.ChecksumIEEE(buf) + } + StopCPUProfile() + + // Convert []byte to []uintptr. + bytes := prof.Bytes() + val := *(*[]uintptr)(unsafe.Pointer(&bytes)) + val = val[:len(bytes)/unsafe.Sizeof(uintptr(0))] + + if len(val) < 10 { + t.Fatalf("profile too short: %#x", val) + } + if val[0] != 0 || val[1] != 3 || val[2] != 0 || val[3] != 1e6/100 || val[4] != 0 { + t.Fatalf("unexpected header %#x", val[:5]) + } + + // Check that profile is well formed and contains ChecksumIEEE. + found := false + val = val[5:] + for len(val) > 0 { + if len(val) < 2 || val[0] < 1 || val[1] < 1 || uintptr(len(val)) < 2+val[1] { + t.Fatalf("malformed profile. leftover: %#x", val) + } + for _, pc := range val[2 : 2+val[1]] { + f := runtime.FuncForPC(pc) + if f == nil { + continue + } + if strings.Contains(f.Name(), "ChecksumIEEE") { + found = true + } + } + val = val[2+val[1]:] + } + + if !found { + t.Fatal("did not find ChecksumIEEE in the profile") + } +} diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index db6072b5c..e212c7820 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -12,6 +12,9 @@ bool runtime·iscgo; static void unwindstack(G*, byte*); +static void schedule(G*); +static void acquireproc(void); +static void releaseproc(void); typedef struct Sched Sched; @@ -67,6 +70,7 @@ struct Sched { int32 msyscall; // number of ms in system calls int32 predawn; // running initialization, don't run new gs. + int32 profilehz; // cpu profiling rate Note stopped; // one g can wait here for ms to stop int32 waitstop; // after setting this flag @@ -75,6 +79,13 @@ struct Sched { Sched runtime·sched; int32 gomaxprocs; +// An m that is waiting for notewakeup(&m->havenextg). This may be +// only be accessed while the scheduler lock is held. This is used to +// minimize the number of times we call notewakeup while the scheduler +// lock is held, since the m will normally move quickly to lock the +// scheduler itself, producing lock contention. +static M* mwakeup; + // Scheduling helpers. Sched must be locked. static void gput(G*); // put/get on ghead/gtail static G* gget(void); @@ -86,9 +97,6 @@ static void matchmg(void); // match ms to gs static void readylocked(G*); // ready, but sched is locked static void mnextg(M*, G*); -// Scheduler loop. -static void scheduler(void); - // The bootstrap sequence is: // // call osinit @@ -130,6 +138,26 @@ runtime·schedinit(void) m->nomemprof--; } +// Lock the scheduler. +static void +schedlock(void) +{ + runtime·lock(&runtime·sched); +} + +// Unlock the scheduler. +static void +schedunlock(void) +{ + M *m; + + m = mwakeup; + mwakeup = nil; + runtime·unlock(&runtime·sched); + if(m != nil) + runtime·notewakeup(&m->havenextg); +} + // Called after main·init_function; main·main will be called on return. void runtime·initdone(void) @@ -141,9 +169,9 @@ runtime·initdone(void) // If main·init_function started other goroutines, // kick off new ms to handle them, like ready // would have, had it not been pre-dawn. - runtime·lock(&runtime·sched); + schedlock(); matchmg(); - runtime·unlock(&runtime·sched); + schedunlock(); } void @@ -261,9 +289,9 @@ mget(G *g) void runtime·ready(G *g) { - runtime·lock(&runtime·sched); + schedlock(); readylocked(g); - runtime·unlock(&runtime·sched); + schedunlock(); } // Mark g ready to run. Sched is already locked. @@ -280,7 +308,7 @@ readylocked(G *g) } // Mark runnable. - if(g->status == Grunnable || g->status == Grunning || g->status == Grecovery || g->status == Gstackalloc) { + if(g->status == Grunnable || g->status == Grunning) { runtime·printf("goroutine %d has status %d\n", g->goid, g->status); runtime·throw("bad g->status in ready"); } @@ -314,7 +342,9 @@ mnextg(M *m, G *g) m->nextg = g; if(m->waitnextg) { m->waitnextg = 0; - runtime·notewakeup(&m->havenextg); + if(mwakeup != nil) + runtime·notewakeup(&mwakeup->havenextg); + mwakeup = m; } } @@ -335,7 +365,7 @@ nextgandunlock(void) if(m->nextg != nil) { gp = m->nextg; m->nextg = nil; - runtime·unlock(&runtime·sched); + schedunlock(); return gp; } @@ -353,7 +383,7 @@ nextgandunlock(void) continue; } runtime·sched.mcpu++; // this m will run gp - runtime·unlock(&runtime·sched); + schedunlock(); return gp; } // Otherwise, wait on global m queue. @@ -368,7 +398,7 @@ nextgandunlock(void) runtime·sched.waitstop = 0; runtime·notewakeup(&runtime·sched.stopped); } - runtime·unlock(&runtime·sched); + schedunlock(); runtime·notesleep(&m->havenextg); if((gp = m->nextg) == nil) @@ -382,7 +412,7 @@ nextgandunlock(void) void runtime·stoptheworld(void) { - runtime·lock(&runtime·sched); + schedlock(); runtime·gcwaiting = 1; runtime·sched.mcpumax = 1; while(runtime·sched.mcpu > 1) { @@ -392,11 +422,11 @@ runtime·stoptheworld(void) // so this is okay. runtime·noteclear(&runtime·sched.stopped); runtime·sched.waitstop = 1; - runtime·unlock(&runtime·sched); + schedunlock(); runtime·notesleep(&runtime·sched.stopped); - runtime·lock(&runtime·sched); + schedlock(); } - runtime·unlock(&runtime·sched); + schedunlock(); } // TODO(rsc): Remove. This is only temporary, @@ -404,11 +434,11 @@ runtime·stoptheworld(void) void runtime·starttheworld(void) { - runtime·lock(&runtime·sched); + schedlock(); runtime·gcwaiting = 0; runtime·sched.mcpumax = runtime·gomaxprocs; matchmg(); - runtime·unlock(&runtime·sched); + schedunlock(); } // Called to start an M. @@ -419,8 +449,15 @@ runtime·mstart(void) runtime·throw("bad runtime·mstart"); if(m->mcache == nil) m->mcache = runtime·allocmcache(); + + // Record top of stack for use by mcall. + // Once we call schedule we're never coming back, + // so other calls can reuse this stack space. + runtime·gosave(&m->g0->sched); + m->g0->sched.pc = (void*)-1; // make sure it is never used + runtime·minit(); - scheduler(); + schedule(nil); } // When running with cgo, we call libcgo_thread_start @@ -454,7 +491,7 @@ matchmg(void) if((m = mget(g)) == nil){ m = runtime·malloc(sizeof(M)); // Add to runtime·allm so garbage collector doesn't free m - // when it is just in a register (R14 on amd64). + // when it is just in a register or thread-local storage. m->alllink = runtime·allm; runtime·allm = m; m->id = runtime·sched.mcount++; @@ -469,7 +506,7 @@ matchmg(void) ts.m = m; ts.g = m->g0; ts.fn = runtime·mstart; - runtime·runcgo(libcgo_thread_start, &ts); + runtime·asmcgocall(libcgo_thread_start, &ts); } else { if(Windows) // windows will layout sched stack on os stack @@ -483,58 +520,19 @@ matchmg(void) } } -// Scheduler loop: find g to run, run it, repeat. +// One round of scheduler: find a goroutine and run it. +// The argument is the goroutine that was running before +// schedule was called, or nil if this is the first call. +// Never returns. static void -scheduler(void) +schedule(G *gp) { - G* gp; - - runtime·lock(&runtime·sched); - if(runtime·gosave(&m->sched) != 0){ - gp = m->curg; - if(gp->status == Grecovery) { - // switched to scheduler to get stack unwound. - // don't go through the full scheduling logic. - Defer *d; - - d = gp->defer; - gp->defer = d->link; - - // unwind to the stack frame with d's arguments in it. - unwindstack(gp, d->argp); - - // make the deferproc for this d return again, - // this time returning 1. function will jump to - // standard return epilogue. - // the -2*sizeof(uintptr) makes up for the - // two extra words that are on the stack at - // each call to deferproc. - // (the pc we're returning to does pop pop - // before it tests the return value.) - // on the arm there are 2 saved LRs mixed in too. - if(thechar == '5') - gp->sched.sp = (byte*)d->argp - 4*sizeof(uintptr); - else - gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr); - gp->sched.pc = d->pc; - gp->status = Grunning; - runtime·free(d); - runtime·gogo(&gp->sched, 1); - } - - if(gp->status == Gstackalloc) { - // switched to scheduler stack to call stackalloc. - gp->param = runtime·stackalloc((uintptr)gp->param); - gp->status = Grunning; - runtime·gogo(&gp->sched, 1); - } - - // Jumped here via runtime·gosave/gogo, so didn't - // execute lock(&runtime·sched) above. - runtime·lock(&runtime·sched); + int32 hz; + schedlock(); + if(gp != nil) { if(runtime·sched.predawn) - runtime·throw("init sleeping"); + runtime·throw("init rescheduling"); // Just finished running gp. gp->m = nil; @@ -545,8 +543,6 @@ scheduler(void) switch(gp->status){ case Grunnable: case Gdead: - case Grecovery: - case Gstackalloc: // Shouldn't have been running! runtime·throw("bad gp->status in sched"); case Grunning: @@ -578,10 +574,16 @@ scheduler(void) gp->status = Grunning; m->curg = gp; gp->m = m; + + // Check whether the profiler needs to be turned on or off. + hz = runtime·sched.profilehz; + if(m->profilehz != hz) + runtime·resetcpuprofiler(hz); + if(gp->sched.pc == (byte*)runtime·goexit) { // kickoff runtime·gogocall(&gp->sched, (void(*)(void))gp->entry); } - runtime·gogo(&gp->sched, 1); + runtime·gogo(&gp->sched, 0); } // Enter scheduler. If g->status is Grunning, @@ -595,8 +597,7 @@ runtime·gosched(void) runtime·throw("gosched holding locks"); if(g == m->g0) runtime·throw("gosched of g0"); - if(runtime·gosave(&g->sched) == 0) - runtime·gogo(&m->sched, 1); + runtime·mcall(schedule); } // The goroutine g is about to enter a system call. @@ -605,19 +606,20 @@ runtime·gosched(void) // not from the low-level system calls used by the runtime. // Entersyscall cannot split the stack: the runtime·gosave must // make g->sched refer to the caller's stack pointer. +// It's okay to call matchmg and notewakeup even after +// decrementing mcpu, because we haven't released the +// sched lock yet. #pragma textflag 7 void runtime·entersyscall(void) { - runtime·lock(&runtime·sched); // Leave SP around for gc and traceback. // Do before notewakeup so that gc // never sees Gsyscall with wrong stack. runtime·gosave(&g->sched); - if(runtime·sched.predawn) { - runtime·unlock(&runtime·sched); + if(runtime·sched.predawn) return; - } + schedlock(); g->status = Gsyscall; runtime·sched.mcpu--; runtime·sched.msyscall++; @@ -627,7 +629,7 @@ runtime·entersyscall(void) runtime·sched.waitstop = 0; runtime·notewakeup(&runtime·sched.stopped); } - runtime·unlock(&runtime·sched); + schedunlock(); } // The goroutine g exited its system call. @@ -637,17 +639,16 @@ runtime·entersyscall(void) void runtime·exitsyscall(void) { - runtime·lock(&runtime·sched); - if(runtime·sched.predawn) { - runtime·unlock(&runtime·sched); + if(runtime·sched.predawn) return; - } + + schedlock(); runtime·sched.msyscall--; runtime·sched.mcpu++; // Fast path - if there's room for this m, we're done. - if(runtime·sched.mcpu <= runtime·sched.mcpumax) { + if(m->profilehz == runtime·sched.profilehz && runtime·sched.mcpu <= runtime·sched.mcpumax) { g->status = Grunning; - runtime·unlock(&runtime·sched); + schedunlock(); return; } // Tell scheduler to put g back on the run queue: @@ -655,7 +656,7 @@ runtime·exitsyscall(void) // but keeps the garbage collector from thinking // that g is running right now, which it's not. g->readyonstop = 1; - runtime·unlock(&runtime·sched); + schedunlock(); // Slow path - all the cpus are taken. // The scheduler will ready g and put this m to sleep. @@ -664,60 +665,6 @@ runtime·exitsyscall(void) runtime·gosched(); } -// Restore the position of m's scheduler stack if we unwind the stack -// through a cgo callback. -static void -runtime·unwindcgocallback(void **spaddr, void *sp) -{ - *spaddr = sp; -} - -// Start scheduling g1 again for a cgo callback. -void -runtime·startcgocallback(G* g1) -{ - Defer *d; - - runtime·lock(&runtime·sched); - g1->status = Grunning; - runtime·sched.msyscall--; - runtime·sched.mcpu++; - runtime·unlock(&runtime·sched); - - // Add an entry to the defer stack which restores the old - // position of m's scheduler stack. This is so that if the - // code we are calling panics, we won't lose the space on the - // scheduler stack. Note that we are locked to this m here. - d = runtime·malloc(sizeof(*d) + 2*sizeof(void*) - sizeof(d->args)); - d->fn = (byte*)runtime·unwindcgocallback; - d->siz = 2 * sizeof(uintptr); - ((void**)d->args)[0] = &m->sched.sp; - ((void**)d->args)[1] = m->sched.sp; - d->link = g1->defer; - g1->defer = d; -} - -// Stop scheduling g1 after a cgo callback. -void -runtime·endcgocallback(G* g1) -{ - Defer *d; - - runtime·lock(&runtime·sched); - g1->status = Gsyscall; - runtime·sched.mcpu--; - runtime·sched.msyscall++; - runtime·unlock(&runtime·sched); - - // Remove the entry on the defer stack added by - // startcgocallback. - d = g1->defer; - if (d == nil || d->fn != (byte*)runtime·unwindcgocallback) - runtime·throw("bad defer entry in endcgocallback"); - g1->defer = d->link; - runtime·free(d); -} - void runtime·oldstack(void) { @@ -767,6 +714,10 @@ runtime·newstack(void) runtime·printf("runtime: split stack overflow: %p < %p\n", m->morebuf.sp, g1->stackguard - StackGuard); runtime·throw("runtime: split stack overflow"); } + if(argsize % sizeof(uintptr) != 0) { + runtime·printf("runtime: stack split with misaligned argsize %d\n", argsize); + runtime·throw("runtime: stack split argsize"); + } reflectcall = framesize==1; if(reflectcall) @@ -831,12 +782,18 @@ runtime·newstack(void) *(int32*)345 = 123; // never return } +static void +mstackalloc(G *gp) +{ + gp->param = runtime·stackalloc((uintptr)gp->param); + runtime·gogo(&gp->sched, 0); +} + G* runtime·malg(int32 stacksize) { G *newg; byte *stk; - int32 oldstatus; newg = runtime·malloc(sizeof(G)); if(stacksize >= 0) { @@ -845,17 +802,10 @@ runtime·malg(int32 stacksize) stk = runtime·stackalloc(StackSystem + stacksize); } else { // have to call stackalloc on scheduler stack. - oldstatus = g->status; g->param = (void*)(StackSystem + stacksize); - g->status = Gstackalloc; - // next two lines are runtime·gosched without the check - // of m->locks. we're almost certainly holding a lock, - // but this is not a real rescheduling so it's okay. - if(runtime·gosave(&g->sched) == 0) - runtime·gogo(&m->sched, 1); + runtime·mcall(mstackalloc); stk = g->param; g->param = nil; - g->status = oldstatus; } newg->stack0 = stk; newg->stackguard = stk + StackSystem + StackGuard; @@ -900,7 +850,7 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) if(siz > 1024) runtime·throw("runtime.newproc: too many args"); - runtime·lock(&runtime·sched); + schedlock(); if((newg = gfget()) != nil){ newg->status = Gwaiting; @@ -933,7 +883,7 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) newg->goid = runtime·goidgen; newprocreadylocked(newg); - runtime·unlock(&runtime·sched); + schedunlock(); return newg; //printf(" goid=%d\n", newg->goid); @@ -1040,6 +990,8 @@ printpanics(Panic *p) runtime·printf(" [recovered]"); runtime·printf("\n"); } + +static void recovery(G*); void runtime·panic(Eface e) @@ -1070,9 +1022,8 @@ runtime·panic(Eface e) // for scheduler to find. d->link = g->defer; g->defer = d; - g->status = Grecovery; - runtime·gosched(); - runtime·throw("recovery failed"); // gosched should not return + runtime·mcall(recovery); + runtime·throw("recovery failed"); // mcall should not return } runtime·free(d); } @@ -1083,6 +1034,36 @@ runtime·panic(Eface e) runtime·dopanic(0); } +static void +recovery(G *gp) +{ + Defer *d; + + // Rewind gp's stack; we're running on m->g0's stack. + d = gp->defer; + gp->defer = d->link; + + // Unwind to the stack frame with d's arguments in it. + unwindstack(gp, d->argp); + + // Make the deferproc for this d return again, + // this time returning 1. The calling function will + // jump to the standard return epilogue. + // The -2*sizeof(uintptr) makes up for the + // two extra words that are on the stack at + // each call to deferproc. + // (The pc we're returning to does pop pop + // before it tests the return value.) + // On the arm there are 2 saved LRs mixed in too. + if(thechar == '5') + gp->sched.sp = (byte*)d->argp - 4*sizeof(uintptr); + else + gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr); + gp->sched.pc = d->pc; + runtime·free(d); + runtime·gogo(&gp->sched, 1); +} + #pragma textflag 7 /* no split, or else g->stackguard is not the stack for fp */ void runtime·recover(byte *argp, Eface ret) @@ -1210,7 +1191,7 @@ runtime·gomaxprocsfunc(int32 n) { int32 ret; - runtime·lock(&runtime·sched); + schedlock(); ret = runtime·gomaxprocs; if (n <= 0) n = ret; @@ -1218,7 +1199,7 @@ runtime·gomaxprocsfunc(int32 n) runtime·sched.mcpumax = n; // handle fewer procs? if(runtime·sched.mcpu > runtime·sched.mcpumax) { - runtime·unlock(&runtime·sched); + schedunlock(); // just give up the cpu. // we'll only get rescheduled once the // number has come down. @@ -1227,7 +1208,7 @@ runtime·gomaxprocsfunc(int32 n) } // handle more procs matchmg(); - runtime·unlock(&runtime·sched); + schedunlock(); return ret; } @@ -1238,6 +1219,12 @@ runtime·UnlockOSThread(void) g->lockedm = nil; } +bool +runtime·lockedOSThread(void) +{ + return g->lockedm != nil && m->lockedg != nil; +} + // for testing of wire, unwire void runtime·mid(uint32 ret) @@ -1258,3 +1245,69 @@ runtime·mcount(void) { return runtime·sched.mcount; } + +void +runtime·badmcall(void) // called from assembly +{ + runtime·throw("runtime: mcall called on m->g0 stack"); +} + +void +runtime·badmcall2(void) // called from assembly +{ + runtime·throw("runtime: mcall function returned"); +} + +static struct { + Lock; + void (*fn)(uintptr*, int32); + int32 hz; + uintptr pcbuf[100]; +} prof; + +void +runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) +{ + int32 n; + + if(prof.fn == nil || prof.hz == 0) + return; + + runtime·lock(&prof); + if(prof.fn == nil) { + runtime·unlock(&prof); + return; + } + n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf)); + if(n > 0) + prof.fn(prof.pcbuf, n); + runtime·unlock(&prof); +} + +void +runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) +{ + // Force sane arguments. + if(hz < 0) + hz = 0; + if(hz == 0) + fn = nil; + if(fn == nil) + hz = 0; + + // Stop profiler on this cpu so that it is safe to lock prof. + // if a profiling signal came in while we had prof locked, + // it would deadlock. + runtime·resetcpuprofiler(0); + + runtime·lock(&prof); + prof.fn = fn; + prof.hz = hz; + runtime·unlock(&prof); + runtime·lock(&runtime·sched); + runtime·sched.profilehz = hz; + runtime·unlock(&runtime·sched); + + if(hz != 0) + runtime·resetcpuprofiler(hz); +} diff --git a/src/pkg/runtime/reflect.goc b/src/pkg/runtime/reflect.goc index 71d648266..9bdc48afb 100644 --- a/src/pkg/runtime/reflect.goc +++ b/src/pkg/runtime/reflect.goc @@ -70,22 +70,18 @@ func makechan(typ *byte, size uint32) (ch *byte) { ch = (byte*)runtime·makechan_c(t->elem, size); } -func chansend(ch *byte, val *byte, pres *bool) { - runtime·chansend((Hchan*)ch, val, pres); +func chansend(ch *byte, val *byte, selected *bool) { + runtime·chansend((Hchan*)ch, val, selected); } -func chanrecv(ch *byte, val *byte, pres *bool) { - runtime·chanrecv((Hchan*)ch, val, pres, nil); +func chanrecv(ch *byte, val *byte, selected *bool, received *bool) { + runtime·chanrecv((Hchan*)ch, val, selected, received); } func chanclose(ch *byte) { runtime·chanclose((Hchan*)ch); } -func chanclosed(ch *byte) (r bool) { - r = runtime·chanclosed((Hchan*)ch); -} - func chanlen(ch *byte) (r int32) { r = runtime·chanlen((Hchan*)ch); } diff --git a/src/pkg/runtime/runtime-gdb.py b/src/pkg/runtime/runtime-gdb.py index 68dd8abdc..08772a431 100644 --- a/src/pkg/runtime/runtime-gdb.py +++ b/src/pkg/runtime/runtime-gdb.py @@ -215,6 +215,8 @@ class IfacePrinter: return 'string' def to_string(self): + if self.val['data'] == 0: + return 0x0 try: dtype = iface_dtype(self.val) except: @@ -308,15 +310,11 @@ class GoroutinesCmd(gdb.Command): for ptr in linked_list(gdb.parse_and_eval("'runtime.allg'"), 'alllink'): if ptr['status'] == 6: # 'gdead' continue - m = ptr['m'] s = ' ' - if m: - pc = m['sched']['pc'].cast(vp) - sp = m['sched']['sp'].cast(vp) + if ptr['m']: s = '*' - else: - pc = ptr['sched']['pc'].cast(vp) - sp = ptr['sched']['sp'].cast(vp) + pc = ptr['sched']['pc'].cast(vp) + sp = ptr['sched']['sp'].cast(vp) blk = gdb.block_for_pc(long((pc))) print s, ptr['goid'], "%8s" % sts[long((ptr['status']))], blk.function @@ -326,7 +324,7 @@ def find_goroutine(goid): if ptr['status'] == 6: # 'gdead' continue if ptr['goid'] == goid: - return [(ptr['m'] or ptr)['sched'][x].cast(vp) for x in 'pc', 'sp'] + return [ptr['sched'][x].cast(vp) for x in 'pc', 'sp'] return None, None diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 85dca54f7..6cf2685fd 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -103,8 +103,6 @@ enum Gwaiting, Gmoribund, Gdead, - Grecovery, - Gstackalloc, }; enum { @@ -219,7 +217,6 @@ struct M uint64 procid; // for debuggers, but offset not hard-coded G* gsignal; // signal-handling G uint32 tls[8]; // thread-local storage (for 386 extern register) - Gobuf sched; // scheduling stack G* curg; // current running goroutine int32 id; int32 mallocing; @@ -228,6 +225,7 @@ struct M int32 nomemprof; int32 waitnextg; int32 dying; + int32 profilehz; Note havenextg; G* nextg; M* alllink; // on allm @@ -385,7 +383,7 @@ int32 runtime·charntorune(int32*, uint8*, int32); void runtime·gogo(Gobuf*, uintptr); void runtime·gogocall(Gobuf*, void(*)(void)); -uintptr runtime·gosave(Gobuf*); +void runtime·gosave(Gobuf*); void runtime·lessstack(void); void runtime·goargs(void); void runtime·goenvs(void); @@ -442,25 +440,27 @@ void runtime·walkfintab(void (*fn)(void*)); void runtime·runpanic(Panic*); void* runtime·getcallersp(void*); int32 runtime·mcount(void); +void runtime·mcall(void(*)(G*)); void runtime·exit(int32); void runtime·breakpoint(void); void runtime·gosched(void); void runtime·goexit(void); -void runtime·runcgo(void (*fn)(void*), void*); -void runtime·runcgocallback(G*, void*, void (*fn)()); +void runtime·asmcgocall(void (*fn)(void*), void*); void runtime·entersyscall(void); void runtime·exitsyscall(void); -void runtime·startcgocallback(G*); -void runtime·endcgocallback(G*); G* runtime·newproc1(byte*, byte*, int32, int32, void*); void runtime·siginit(void); bool runtime·sigsend(int32 sig); void runtime·gettime(int64*, int32*); int32 runtime·callers(int32, uintptr*, int32); +int32 runtime·gentraceback(byte*, byte*, byte*, G*, int32, uintptr*, int32); int64 runtime·nanotime(void); void runtime·dopanic(int32); void runtime·startpanic(void); +void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp); +void runtime·resetcpuprofiler(int32); +void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32); #pragma varargck argpos runtime·printf 1 #pragma varargck type "d" int32 @@ -590,7 +590,6 @@ Hchan* runtime·makechan_c(Type*, int64); void runtime·chansend(Hchan*, void*, bool*); void runtime·chanrecv(Hchan*, void*, bool*, bool*); void runtime·chanclose(Hchan*); -bool runtime·chanclosed(Hchan*); int32 runtime·chanlen(Hchan*); int32 runtime·chancap(Hchan*); diff --git a/src/pkg/runtime/windows/386/signal.c b/src/pkg/runtime/windows/386/signal.c index 08b32a137..cc6a2302f 100644 --- a/src/pkg/runtime/windows/386/signal.c +++ b/src/pkg/runtime/windows/386/signal.c @@ -88,3 +88,11 @@ runtime·sighandler(ExceptionRecord *info, void *frame, Context *r) runtime·exit(2); return 0; } + +void +runtime·resetcpuprofiler(int32 hz) +{ + // TODO: Enable profiling interrupts. + + m->profilehz = hz; +} diff --git a/src/pkg/runtime/windows/386/sys.s b/src/pkg/runtime/windows/386/sys.s index bca48febe..15f7f95b8 100644 --- a/src/pkg/runtime/windows/386/sys.s +++ b/src/pkg/runtime/windows/386/sys.s @@ -20,7 +20,7 @@ TEXT runtime·stdcall_raw(SB),7,$0 CMPL g(DI), SI MOVL SP, BX JEQ 2(PC) - MOVL (m_sched+gobuf_sp)(DX), SP + MOVL (g_sched+gobuf_sp)(SI), SP PUSHL BX PUSHL g(DI) MOVL SI, g(DI) |