diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2013-03-04 21:27:36 +0100 |
---|---|---|
committer | Michael Stapelberg <michael@stapelberg.de> | 2013-03-04 21:27:36 +0100 |
commit | 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch) | |
tree | db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/runtime | |
parent | 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff) | |
download | golang-upstream/1.1_hg20130304.tar.gz |
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/runtime')
229 files changed, 20551 insertions, 5742 deletions
diff --git a/src/pkg/runtime/alg.c b/src/pkg/runtime/alg.c index 36973eba3..ad85b43ae 100644 --- a/src/pkg/runtime/alg.c +++ b/src/pkg/runtime/alg.c @@ -19,13 +19,13 @@ runtime·memhash(uintptr *h, uintptr s, void *a) uintptr hash; b = a; - hash = M0; + hash = M0 ^ *h; while(s > 0) { hash = (hash ^ *b) * M1; b++; s--; } - *h = (*h ^ hash) * M1; + *h = hash; } void @@ -316,7 +316,7 @@ runtime·strhash(uintptr *h, uintptr s, void *a) void runtime·strequal(bool *eq, uintptr s, void *a, void *b) { - int32 alen; + intgo alen; USED(s); alen = ((String*)a)->len; @@ -324,6 +324,10 @@ runtime·strequal(bool *eq, uintptr s, void *a, void *b) *eq = false; return; } + if(((String*)a)->str == ((String*)b)->str) { + *eq = true; + return; + } runtime·memequal(eq, alen, ((String*)a)->str, ((String*)b)->str); } @@ -351,7 +355,7 @@ void runtime·interhash(uintptr *h, uintptr s, void *a) { USED(s); - *h = (*h ^ runtime·ifacehash(*(Iface*)a)) * M1; + *h = runtime·ifacehash(*(Iface*)a, *h ^ M0) * M1; } void @@ -385,7 +389,7 @@ void runtime·nilinterhash(uintptr *h, uintptr s, void *a) { USED(s); - *h = (*h ^ runtime·efacehash(*(Eface*)a)) * M1; + *h = runtime·efacehash(*(Eface*)a, *h ^ M0) * M1; } void @@ -469,10 +473,11 @@ void runtime·equal(Type *t, ...) { byte *x, *y; - bool *ret; + uintptr ret; x = (byte*)(&t+1); y = x + t->size; - ret = (bool*)(y + t->size); - t->alg->equal(ret, t->size, x, y); + ret = (uintptr)(y + t->size); + ret = ROUND(ret, Structrnd); + t->alg->equal((bool*)ret, t->size, x, y); } diff --git a/src/pkg/runtime/append_test.go b/src/pkg/runtime/append_test.go index b8552224e..36390181e 100644 --- a/src/pkg/runtime/append_test.go +++ b/src/pkg/runtime/append_test.go @@ -19,6 +19,67 @@ func BenchmarkAppend(b *testing.B) { } } +func benchmarkAppendBytes(b *testing.B, length int) { + b.StopTimer() + x := make([]byte, 0, N) + y := make([]byte, length) + b.StartTimer() + for i := 0; i < b.N; i++ { + x = x[0:0] + x = append(x, y...) + } +} + +func BenchmarkAppend1Byte(b *testing.B) { + benchmarkAppendBytes(b, 1) +} + +func BenchmarkAppend4Bytes(b *testing.B) { + benchmarkAppendBytes(b, 4) +} + +func BenchmarkAppend8Bytes(b *testing.B) { + benchmarkAppendBytes(b, 8) +} + +func BenchmarkAppend16Bytes(b *testing.B) { + benchmarkAppendBytes(b, 16) +} + +func BenchmarkAppend32Bytes(b *testing.B) { + benchmarkAppendBytes(b, 32) +} + +func benchmarkAppendStr(b *testing.B, str string) { + b.StopTimer() + x := make([]byte, 0, N) + b.StartTimer() + for i := 0; i < b.N; i++ { + x = x[0:0] + x = append(x, str...) + } +} + +func BenchmarkAppendStr1Byte(b *testing.B) { + benchmarkAppendStr(b, "1") +} + +func BenchmarkAppendStr4Bytes(b *testing.B) { + benchmarkAppendStr(b, "1234") +} + +func BenchmarkAppendStr8Bytes(b *testing.B) { + benchmarkAppendStr(b, "12345678") +} + +func BenchmarkAppendStr16Bytes(b *testing.B) { + benchmarkAppendStr(b, "1234567890123456") +} + +func BenchmarkAppendStr32Bytes(b *testing.B) { + benchmarkAppendStr(b, "12345678901234567890123456789012") +} + func BenchmarkAppendSpecialCase(b *testing.B) { b.StopTimer() x := make([]int, 0, N) @@ -50,3 +111,13 @@ func TestSideEffectOrder(t *testing.T) { t.Error("append failed: ", x[0], x[1]) } } + +func TestAppendOverlap(t *testing.T) { + x := []byte("1234") + x = append(x[1:], x...) // p > q in runtime·appendslice. + got := string(x) + want := "2341234" + if got != want { + t.Errorf("overlap failed: got %q want %q", got, want) + } +} diff --git a/src/pkg/runtime/arch_386.h b/src/pkg/runtime/arch_386.h index a0798f99e..4df795f71 100644 --- a/src/pkg/runtime/arch_386.h +++ b/src/pkg/runtime/arch_386.h @@ -1,4 +1,6 @@ enum { thechar = '8', - CacheLineSize = 64 + BigEndian = 0, + CacheLineSize = 64, + appendCrossover = 16 }; diff --git a/src/pkg/runtime/arch_amd64.h b/src/pkg/runtime/arch_amd64.h index dd1cfc18d..e83dc9105 100644 --- a/src/pkg/runtime/arch_amd64.h +++ b/src/pkg/runtime/arch_amd64.h @@ -1,4 +1,6 @@ enum { thechar = '6', - CacheLineSize = 64 + BigEndian = 0, + CacheLineSize = 64, + appendCrossover = 16 }; diff --git a/src/pkg/runtime/arch_arm.h b/src/pkg/runtime/arch_arm.h index c1a7a0f37..f6af58514 100644 --- a/src/pkg/runtime/arch_arm.h +++ b/src/pkg/runtime/arch_arm.h @@ -1,4 +1,6 @@ enum { thechar = '5', - CacheLineSize = 32 + BigEndian = 0, + CacheLineSize = 32, + appendCrossover = 16 }; diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s index 21bd293ab..96f04e0ae 100644 --- a/src/pkg/runtime/asm_386.s +++ b/src/pkg/runtime/asm_386.s @@ -14,22 +14,22 @@ TEXT _rt0_386(SB),7,$0 MOVL BX, 124(SP) // set default stack bounds. - // initcgo may update stackguard. + // _cgo_init may update stackguard. MOVL $runtime·g0(SB), BP LEAL (-64*1024+104)(SP), BX MOVL BX, g_stackguard(BP) MOVL SP, g_stackbase(BP) - // if there is an initcgo, call it to let it + // if there is an _cgo_init, call it to let it // initialize and to set up GS. if not, // we set up GS ourselves. - MOVL initcgo(SB), AX + MOVL _cgo_init(SB), AX TESTL AX, AX JZ needtls PUSHL BP CALL AX POPL BP - // skip runtime·ldt0setup(SB) and tls test after initcgo for non-windows + // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows CMPL runtime·iswindows(SB), $0 JEQ ok needtls: @@ -75,7 +75,7 @@ ok: CALL runtime·schedinit(SB) // create a new goroutine to start program - PUSHL $runtime·main(SB) // entry + PUSHL $runtime·main·f(SB) // entry PUSHL $0 // arg size CALL runtime·newproc(SB) POPL AX @@ -87,6 +87,9 @@ ok: INT $3 RET +DATA runtime·main·f+0(SB)/4,$runtime·main(SB) +GLOBL runtime·main·f(SB),8,$4 + TEXT runtime·breakpoint(SB),7,$0 INT $3 RET @@ -131,22 +134,40 @@ TEXT runtime·gogo(SB), 7, $0 MOVL gobuf_pc(BX), BX JMP BX -// void gogocall(Gobuf*, void (*fn)(void)) +// void gogocall(Gobuf*, void (*fn)(void), uintptr r0) // restore state from Gobuf but then call fn. // (call fn, returning to state in Gobuf) TEXT runtime·gogocall(SB), 7, $0 + MOVL 12(SP), DX // context MOVL 8(SP), AX // fn MOVL 4(SP), BX // gobuf - MOVL gobuf_g(BX), DX + MOVL gobuf_g(BX), DI get_tls(CX) - MOVL DX, g(CX) - MOVL 0(DX), CX // make sure g != nil + MOVL DI, g(CX) + MOVL 0(DI), CX // make sure g != nil MOVL gobuf_sp(BX), SP // restore SP MOVL gobuf_pc(BX), BX PUSHL BX JMP AX POPL BX // not reached +// void gogocallfn(Gobuf*, FuncVal*) +// restore state from Gobuf but then call fn. +// (call fn, returning to state in Gobuf) +TEXT runtime·gogocallfn(SB), 7, $0 + MOVL 8(SP), DX // fn + MOVL 4(SP), BX // gobuf + MOVL gobuf_g(BX), DI + get_tls(CX) + MOVL DI, g(CX) + MOVL 0(DI), CX // make sure g != nil + MOVL gobuf_sp(BX), SP // restore SP + MOVL gobuf_pc(BX), BX + PUSHL BX + MOVL 0(DX), BX + JMP BX + POPL BX // not reached + // void mcall(void (*fn)(G*)) // Switch to m->g0's stack, call fn(g). // Fn must never return. It should gogo(&g->sched) @@ -189,11 +210,13 @@ TEXT runtime·morestack(SB),7,$0 CMPL g(CX), SI JNE 2(PC) INT $3 + + MOVL DX, m_cret(BX) - // frame size in DX + // frame size in DI // arg size in AX // Save in m. - MOVL DX, m_moreframesize(BX) + MOVL DI, m_moreframesize(BX) MOVL AX, m_moreargsize(BX) // Called from f. @@ -299,6 +322,33 @@ TEXT runtime·cas(SB), 7, $0 MOVL $1, AX RET +// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new) +// Atomically: +// if(*val == *old){ +// *val = new; +// return 1; +// } else { +// *old = *val +// return 0; +// } +TEXT runtime·cas64(SB), 7, $0 + MOVL 4(SP), BP + MOVL 8(SP), SI + MOVL 0(SI), AX + MOVL 4(SI), DX + MOVL 12(SP), BX + MOVL 16(SP), CX + LOCK + CMPXCHG8B 0(BP) + JNZ cas64_fail + MOVL $1, AX + RET +cas64_fail: + MOVL AX, 0(SI) + MOVL DX, 4(SI) + MOVL $0, AX + RET + // bool casp(void **p, void *old, void *new) // Atomically: // if(*p == old){ @@ -357,17 +407,49 @@ TEXT runtime·atomicstore(SB), 7, $0 XCHGL AX, 0(BX) RET +// uint64 atomicload64(uint64 volatile* addr); +// so actually +// void atomicload64(uint64 *res, uint64 volatile *addr); +TEXT runtime·atomicload64(SB), 7, $0 + MOVL 4(SP), BX + MOVL 8(SP), AX + // MOVQ (%EAX), %MM0 + BYTE $0x0f; BYTE $0x6f; BYTE $0x00 + // MOVQ %MM0, 0(%EBX) + BYTE $0x0f; BYTE $0x7f; BYTE $0x03 + // EMMS + BYTE $0x0F; BYTE $0x77 + RET + +// void runtime·atomicstore64(uint64 volatile* addr, uint64 v); +TEXT runtime·atomicstore64(SB), 7, $0 + MOVL 4(SP), AX + // MOVQ and EMMS were introduced on the Pentium MMX. + // MOVQ 0x8(%ESP), %MM0 + BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 + // MOVQ %MM0, (%EAX) + BYTE $0x0f; BYTE $0x7f; BYTE $0x00 + // EMMS + BYTE $0x0F; BYTE $0x77 + // This is essentially a no-op, but it provides required memory fencing. + // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). + MOVL $0, AX + LOCK + XADDL AX, (SP) + RET + // void jmpdefer(fn, sp); // called from deferreturn. // 1. pop the caller // 2. sub 5 bytes from the callers return // 3. jmp to the argument TEXT runtime·jmpdefer(SB), 7, $0 - MOVL 4(SP), AX // fn + MOVL 4(SP), DX // fn MOVL 8(SP), BX // caller sp LEAL -4(BX), SP // caller sp after CALL SUBL $5, (SP) // return to CALL again - JMP AX // but first run the deferred function + MOVL 0(DX), BX + JMP BX // but first run the deferred function // Dummy function to use in saved gobuf.PC, // to match SP pointing at a return address. @@ -416,23 +498,49 @@ TEXT runtime·asmcgocall(SB),7,$0 RET // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) -// See cgocall.c for more details. +// Turn the fn into a Go func (by taking its address) and call +// cgocallback_gofunc. TEXT runtime·cgocallback(SB),7,$12 - MOVL fn+0(FP), AX - MOVL frame+4(FP), BX - MOVL framesize+8(FP), DX + LEAL fn+0(FP), AX + MOVL AX, 0(SP) + MOVL frame+4(FP), AX + MOVL AX, 4(SP) + MOVL framesize+8(FP), AX + MOVL AX, 8(SP) + MOVL $runtime·cgocallback_gofunc(SB), AX + CALL AX + RET - // Save current m->g0->sched.sp on stack and then set it to SP. +// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) +// See cgocall.c for more details. +TEXT runtime·cgocallback_gofunc(SB),7,$12 + // If m is nil, Go did not create the current thread. + // Call needm to obtain one for temporary use. + // In this case, we're running on the thread stack, so there's + // lots of space, but the linker doesn't know. Hide the call from + // the linker analysis by using an indirect call through AX. get_tls(CX) +#ifdef GOOS_windows + CMPL CX, $0 + JNE 3(PC) + PUSHL $0 + JMP needm +#endif MOVL m(CX), BP - - // If m is nil, it is almost certainly because we have been called - // on a thread that Go did not create. We're going to crash as - // soon as we try to use m; instead, try to print a nice error and exit. + PUSHL BP CMPL BP, $0 - JNE 2(PC) - CALL runtime·badcallback(SB) + JNE havem +needm: + MOVL $runtime·needm(SB), AX + CALL AX + get_tls(CX) + MOVL m(CX), BP +havem: + // Now there's a valid m, and we're running on its m->g0. + // Save current m->g0->sched.sp on stack and then set it to SP. + // Save current sp in m->g0->sched.sp in preparation for + // switch back to m->curg stack. MOVL m_g0(BP), SI PUSHL (g_sched+gobuf_sp)(SI) MOVL SP, (g_sched+gobuf_sp)(SI) @@ -451,6 +559,10 @@ TEXT runtime·cgocallback(SB),7,$12 // a frame size of 12, the same amount that we use below), // so that the traceback will seamlessly trace back into // the earlier calls. + MOVL fn+0(FP), AX + MOVL frame+4(FP), BX + MOVL framesize+8(FP), DX + MOVL m_curg(BP), SI MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -488,10 +600,38 @@ TEXT runtime·cgocallback(SB),7,$12 MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), SP POPL (g_sched+gobuf_sp)(SI) + + // If the m on entry was nil, we called needm above to borrow an m + // for the duration of the call. Since the call is over, return it with dropm. + POPL BP + CMPL BP, $0 + JNE 3(PC) + MOVL $runtime·dropm(SB), AX + CALL AX // Done! RET +// void setmg(M*, G*); set m and g. for use by needm. +TEXT runtime·setmg(SB), 7, $0 +#ifdef GOOS_windows + MOVL mm+0(FP), AX + CMPL AX, $0 + JNE settls + MOVL $0, 0x14(FS) + RET +settls: + LEAL m_tls(AX), AX + MOVL AX, 0x14(FS) +#endif + MOVL mm+0(FP), AX + get_tls(CX) + MOVL mm+0(FP), AX + MOVL AX, m(CX) + MOVL gg+4(FP), BX + MOVL BX, g(CX) + RET + // check that SP is in range [g->stackbase, g->stackguard) TEXT runtime·stackcheck(SB), 7, $0 get_tls(CX) diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s index d41ab96d0..987958498 100644 --- a/src/pkg/runtime/asm_amd64.s +++ b/src/pkg/runtime/asm_amd64.s @@ -14,14 +14,14 @@ TEXT _rt0_amd64(SB),7,$-8 MOVQ BX, 24(SP) // create istack out of the given (operating system) stack. - // initcgo may update stackguard. + // _cgo_init may update stackguard. MOVQ $runtime·g0(SB), DI LEAQ (-64*1024+104)(SP), BX MOVQ BX, g_stackguard(DI) MOVQ SP, g_stackbase(DI) - // if there is an initcgo, call it. - MOVQ initcgo(SB), AX + // if there is an _cgo_init, call it. + MOVQ _cgo_init(SB), AX TESTQ AX, AX JZ needtls // g0 already in DI @@ -31,6 +31,10 @@ TEXT _rt0_amd64(SB),7,$-8 JEQ ok needtls: + // skip TLS setup on Plan 9 + CMPL runtime·isplan9(SB), $1 + JEQ ok + LEAQ runtime·tls0(SB), DI CALL runtime·settls(SB) @@ -64,7 +68,7 @@ ok: CALL runtime·schedinit(SB) // create a new goroutine to start program - PUSHQ $runtime·main(SB) // entry + PUSHQ $runtime·main·f(SB) // entry PUSHQ $0 // arg size CALL runtime·newproc(SB) POPQ AX @@ -76,6 +80,9 @@ ok: MOVL $0xf1, 0xf1 // crash RET +DATA runtime·main·f+0(SB)/8,$runtime·main(SB) +GLOBL runtime·main·f(SB),8,$8 + TEXT runtime·breakpoint(SB),7,$0 BYTE $0xcc RET @@ -114,22 +121,40 @@ TEXT runtime·gogo(SB), 7, $0 MOVQ gobuf_pc(BX), BX JMP BX -// void gogocall(Gobuf*, void (*fn)(void)) +// void gogocall(Gobuf*, void (*fn)(void), uintptr r0) // restore state from Gobuf but then call fn. // (call fn, returning to state in Gobuf) TEXT runtime·gogocall(SB), 7, $0 + MOVQ 24(SP), DX // context MOVQ 16(SP), AX // fn MOVQ 8(SP), BX // gobuf - MOVQ gobuf_g(BX), DX + MOVQ gobuf_g(BX), DI get_tls(CX) - MOVQ DX, g(CX) - MOVQ 0(DX), CX // make sure g != nil + MOVQ DI, g(CX) + MOVQ 0(DI), CX // make sure g != nil MOVQ gobuf_sp(BX), SP // restore SP MOVQ gobuf_pc(BX), BX PUSHQ BX JMP AX POPQ BX // not reached +// void gogocallfn(Gobuf*, FuncVal*) +// restore state from Gobuf but then call fn. +// (call fn, returning to state in Gobuf) +TEXT runtime·gogocallfn(SB), 7, $0 + MOVQ 16(SP), DX // fn + MOVQ 8(SP), BX // gobuf + MOVQ gobuf_g(BX), AX + get_tls(CX) + MOVQ AX, g(CX) + MOVQ 0(AX), CX // make sure g != nil + MOVQ gobuf_sp(BX), SP // restore SP + MOVQ gobuf_pc(BX), BX + PUSHQ BX + MOVQ 0(DX), BX + JMP BX + POPQ BX // not reached + // void mcall(void (*fn)(G*)) // Switch to m->g0's stack, call fn(g). // Fn must never return. It should gogo(&g->sched) @@ -171,6 +196,8 @@ TEXT runtime·morestack(SB),7,$0 CMPQ g(CX), SI JNE 2(PC) INT $3 + + MOVQ DX, m_cret(BX) // Called from f. // Set m->morebuf to f's caller. @@ -344,6 +371,30 @@ TEXT runtime·cas(SB), 7, $0 MOVL $1, AX RET +// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new) +// Atomically: +// if(*val == *old){ +// *val = new; +// return 1; +// } else { +// *old = *val +// return 0; +// } +TEXT runtime·cas64(SB), 7, $0 + MOVQ 8(SP), BX + MOVQ 16(SP), BP + MOVQ 0(BP), AX + MOVQ 24(SP), CX + LOCK + CMPXCHGQ CX, 0(BX) + JNZ cas64_fail + MOVL $1, AX + RET +cas64_fail: + MOVQ AX, 0(BP) + MOVL $0, AX + RET + // bool casp(void **val, void *old, void *new) // Atomically: // if(*val == old){ @@ -376,6 +427,15 @@ TEXT runtime·xadd(SB), 7, $0 ADDL CX, AX RET +TEXT runtime·xadd64(SB), 7, $0 + MOVQ 8(SP), BX + MOVQ 16(SP), AX + MOVQ AX, CX + LOCK + XADDQ AX, 0(BX) + ADDQ CX, AX + RET + TEXT runtime·xchg(SB), 7, $0 MOVQ 8(SP), BX MOVL 16(SP), AX @@ -402,17 +462,24 @@ TEXT runtime·atomicstore(SB), 7, $0 XCHGL AX, 0(BX) RET +TEXT runtime·atomicstore64(SB), 7, $0 + MOVQ 8(SP), BX + MOVQ 16(SP), AX + XCHGQ AX, 0(BX) + RET + // void jmpdefer(fn, sp); // called from deferreturn. // 1. pop the caller // 2. sub 5 bytes from the callers return // 3. jmp to the argument TEXT runtime·jmpdefer(SB), 7, $0 - MOVQ 8(SP), AX // fn + MOVQ 8(SP), DX // fn MOVQ 16(SP), BX // caller sp LEAQ -8(BX), SP // caller sp after CALL SUBQ $5, (SP) // return to CALL again - JMP AX // but first run the deferred function + MOVQ 0(DX), BX + JMP BX // but first run the deferred function // Dummy function to use in saved gobuf.PC, // to match SP pointing at a return address. @@ -446,39 +513,67 @@ TEXT runtime·asmcgocall(SB),7,$0 MOVQ (g_sched+gobuf_sp)(SI), SP // Now on a scheduling stack (a pthread-created stack). - SUBQ $48, SP + // Make sure we have enough room for 4 stack-backed fast-call + // registers as per windows amd64 calling convention. + SUBQ $64, SP ANDQ $~15, SP // alignment for gcc ABI - MOVQ DI, 32(SP) // save g - MOVQ DX, 24(SP) // save SP + MOVQ DI, 48(SP) // save g + MOVQ DX, 40(SP) // save SP MOVQ BX, DI // DI = first argument in AMD64 ABI MOVQ BX, CX // CX = first argument in Win64 CALL AX // Restore registers, g, stack pointer. get_tls(CX) - MOVQ 32(SP), DI + MOVQ 48(SP), DI MOVQ DI, g(CX) - MOVQ 24(SP), SP + MOVQ 40(SP), SP RET // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) -// See cgocall.c for more details. +// Turn the fn into a Go func (by taking its address) and call +// cgocallback_gofunc. TEXT runtime·cgocallback(SB),7,$24 - MOVQ fn+0(FP), AX - MOVQ frame+8(FP), BX - MOVQ framesize+16(FP), DX + LEAQ fn+0(FP), AX + MOVQ AX, 0(SP) + MOVQ frame+8(FP), AX + MOVQ AX, 8(SP) + MOVQ framesize+16(FP), AX + MOVQ AX, 16(SP) + MOVQ $runtime·cgocallback_gofunc(SB), AX + CALL AX + RET - // Save current m->g0->sched.sp on stack and then set it to SP. +// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) +// See cgocall.c for more details. +TEXT runtime·cgocallback_gofunc(SB),7,$24 + // If m is nil, Go did not create the current thread. + // Call needm to obtain one for temporary use. + // In this case, we're running on the thread stack, so there's + // lots of space, but the linker doesn't know. Hide the call from + // the linker analysis by using an indirect call through AX. get_tls(CX) +#ifdef GOOS_windows + CMPQ CX, $0 + JNE 3(PC) + PUSHQ $0 + JMP needm +#endif MOVQ m(CX), BP - - // If m is nil, it is almost certainly because we have been called - // on a thread that Go did not create. We're going to crash as - // soon as we try to use m; instead, try to print a nice error and exit. + PUSHQ BP CMPQ BP, $0 - JNE 2(PC) - CALL runtime·badcallback(SB) + JNE havem +needm: + MOVQ $runtime·needm(SB), AX + CALL AX + get_tls(CX) + MOVQ m(CX), BP +havem: + // Now there's a valid m, and we're running on its m->g0. + // Save current m->g0->sched.sp on stack and then set it to SP. + // Save current sp in m->g0->sched.sp in preparation for + // switch back to m->curg stack. MOVQ m_g0(BP), SI PUSHQ (g_sched+gobuf_sp)(SI) MOVQ SP, (g_sched+gobuf_sp)(SI) @@ -497,6 +592,10 @@ TEXT runtime·cgocallback(SB),7,$24 // a frame size of 24, the same amount that we use below), // so that the traceback will seamlessly trace back into // the earlier calls. + MOVQ fn+0(FP), AX + MOVQ frame+8(FP), BX + MOVQ framesize+16(FP), DX + MOVQ m_curg(BP), SI MOVQ SI, g(CX) MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -534,10 +633,37 @@ TEXT runtime·cgocallback(SB),7,$24 MOVQ SI, g(CX) MOVQ (g_sched+gobuf_sp)(SI), SP POPQ (g_sched+gobuf_sp)(SI) + + // If the m on entry was nil, we called needm above to borrow an m + // for the duration of the call. Since the call is over, return it with dropm. + POPQ BP + CMPQ BP, $0 + JNE 3(PC) + MOVQ $runtime·dropm(SB), AX + CALL AX // Done! RET +// void setmg(M*, G*); set m and g. for use by needm. +TEXT runtime·setmg(SB), 7, $0 + MOVQ mm+0(FP), AX +#ifdef GOOS_windows + CMPQ AX, $0 + JNE settls + MOVQ $0, 0x28(GS) + RET +settls: + LEAQ m_tls(AX), AX + MOVQ AX, 0x28(GS) +#endif + get_tls(CX) + MOVQ mm+0(FP), AX + MOVQ AX, m(CX) + MOVQ gg+8(FP), BX + MOVQ BX, g(CX) + RET + // check that SP is in range [g->stackbase, g->stackguard) TEXT runtime·stackcheck(SB), 7, $0 get_tls(CX) diff --git a/src/pkg/runtime/asm_arm.s b/src/pkg/runtime/asm_arm.s index 423fda7a0..45b53541b 100644 --- a/src/pkg/runtime/asm_arm.s +++ b/src/pkg/runtime/asm_arm.s @@ -31,6 +31,13 @@ TEXT _rt0_arm(SB),7,$-4 MOVW R13, g_stackbase(g) BL runtime·emptyfunc(SB) // fault if stack check is wrong + // if there is an _cgo_init, call it. + MOVW _cgo_init(SB), R2 + CMP $0, R2 + MOVW.NE g, R0 // first argument of _cgo_init is g + BL.NE (R2) // will clobber R0-R3 + + BL runtime·checkgoarm(SB) BL runtime·check(SB) // saved argc, argv @@ -43,7 +50,7 @@ TEXT _rt0_arm(SB),7,$-4 BL runtime·schedinit(SB) // create a new goroutine to start program - MOVW $runtime·main(SB), R0 + MOVW $runtime·main·f(SB), R0 MOVW.W R0, -4(R13) MOVW $8, R0 MOVW.W R0, -4(R13) @@ -58,24 +65,25 @@ TEXT _rt0_arm(SB),7,$-4 MOVW $1234, R0 MOVW $1000, R1 MOVW R0, (R1) // fail hard - B runtime·_dep_dummy(SB) // Never reached - -// TODO(kaib): remove these once i actually understand how the linker removes symbols -// pull in dummy dependencies -TEXT runtime·_dep_dummy(SB),7,$0 - BL _div(SB) - BL _divu(SB) - BL _mod(SB) - BL _modu(SB) - BL _modu(SB) - BL _sfloat(SB) + +DATA runtime·main·f+0(SB)/4,$runtime·main(SB) +GLOBL runtime·main·f(SB),8,$4 TEXT runtime·breakpoint(SB),7,$0 - // no breakpoint yet; let program exit + // gdb won't skip this breakpoint instruction automatically, + // so you must manually "set $pc+=4" to skip it and continue. + WORD $0xe1200071 // BKPT 0x0001 RET +GLOBL runtime·goarm(SB), $4 TEXT runtime·asminit(SB),7,$0 - // No per-thread init. + // disable runfast (flush-to-zero) mode of vfp if runtime.goarm > 5 + MOVW runtime·goarm(SB), R11 + CMP $5, R11 + BLE 4(PC) + WORD $0xeef1ba10 // vmrs r11, fpscr + BIC $(1<<24), R11 + WORD $0xeee1ba10 // vmsr fpscr, r11 RET /* @@ -95,26 +103,49 @@ TEXT runtime·gosave(SB), 7, $-4 // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), 7, $-4 MOVW 0(FP), R1 // gobuf - MOVW 4(FP), R0 // return 2nd arg MOVW gobuf_g(R1), g MOVW 0(g), R2 // make sure g != nil + MOVW _cgo_save_gm(SB), R2 + CMP $0, R2 // if in Cgo, we have to save g and m + BL.NE (R2) // this call will clobber R0 + MOVW 4(FP), R0 // return 2nd arg MOVW gobuf_sp(R1), SP // restore SP MOVW gobuf_pc(R1), PC -// void gogocall(Gobuf*, void (*fn)(void)) +// void gogocall(Gobuf*, void (*fn)(void), uintptr r7) // restore state from Gobuf but then call fn. // (call fn, returning to state in Gobuf) // using frame size $-4 means do not save LR on stack. TEXT runtime·gogocall(SB), 7, $-4 - MOVW 0(FP), R0 // gobuf + MOVW 0(FP), R3 // gobuf MOVW 4(FP), R1 // fn - MOVW 8(FP), R2 // fp offset - MOVW gobuf_g(R0), g - MOVW 0(g), R3 // make sure g != nil - MOVW gobuf_sp(R0), SP // restore SP - MOVW gobuf_pc(R0), LR + MOVW gobuf_g(R3), g + MOVW 0(g), R0 // make sure g != nil + MOVW _cgo_save_gm(SB), R0 + CMP $0, R0 // if in Cgo, we have to save g and m + BL.NE (R0) // this call will clobber R0 + MOVW 8(FP), R7 // context + MOVW gobuf_sp(R3), SP // restore SP + MOVW gobuf_pc(R3), LR MOVW R1, PC +// void gogocallfn(Gobuf*, FuncVal*) +// restore state from Gobuf but then call fn. +// (call fn, returning to state in Gobuf) +// using frame size $-4 means do not save LR on stack. +TEXT runtime·gogocallfn(SB), 7, $-4 + MOVW 0(FP), R3 // gobuf + MOVW 4(FP), R1 // fn + MOVW gobuf_g(R3), g + MOVW 0(g), R0 // make sure g != nil + MOVW _cgo_save_gm(SB), R0 + CMP $0, R0 // if in Cgo, we have to save g and m + BL.NE (R0) // this call will clobber R0 + MOVW gobuf_sp(R3), SP // restore SP + MOVW gobuf_pc(R3), LR + MOVW R1, R7 + MOVW 0(R1), PC + // void mcall(void (*fn)(G*)) // Switch to m->g0's stack, call fn(g). // Fn must never return. It should gogo(&g->sched) @@ -157,6 +188,7 @@ TEXT runtime·morestack(SB),7,$-4 BL.EQ runtime·abort(SB) // Save in m. + MOVW R7, m_cret(m) // function context MOVW R1, m_moreframesize(m) MOVW R2, m_moreargsize(m) @@ -228,28 +260,175 @@ TEXT runtime·lessstack(SB), 7, $-4 TEXT runtime·jmpdefer(SB), 7, $0 MOVW 0(SP), LR MOVW $-4(LR), LR // BL deferreturn - MOVW fn+0(FP), R0 + MOVW fn+0(FP), R7 MOVW argp+4(FP), SP MOVW $-4(SP), SP // SP is 4 below argp, due to saved LR - B (R0) + MOVW 0(R7), R1 + B (R1) + +// Dummy function to use in saved gobuf.PC, +// to match SP pointing at a return address. +// The gobuf.PC is unused by the contortions here +// but setting it to return will make the traceback code work. +TEXT return<>(SB),7,$0 + RET +// asmcgocall(void(*fn)(void*), void *arg) +// Call fn(arg) on the scheduler stack, +// aligned appropriately for the gcc ABI. +// See cgocall.c for more details. TEXT runtime·asmcgocall(SB),7,$0 - B runtime·cgounimpl(SB) + MOVW fn+0(FP), R1 + MOVW arg+4(FP), R0 + MOVW R13, R2 + MOVW g, R5 + + // Figure out if we need to switch to m->g0 stack. + // We get called to create new OS threads too, and those + // come in on the m->g0 stack already. + MOVW m_g0(m), R3 + CMP R3, g + BEQ 7(PC) + MOVW R13, (g_sched + gobuf_sp)(g) + MOVW $return<>(SB), R4 + MOVW R4, (g_sched+gobuf_pc)(g) + MOVW g, (g_sched+gobuf_g)(g) + MOVW R3, g + MOVW (g_sched+gobuf_sp)(g), R13 + + // Now on a scheduling stack (a pthread-created stack). + SUB $24, R13 + BIC $0x7, R13 // alignment for gcc ABI + MOVW R5, 20(R13) // save old g + MOVW R2, 16(R13) // save old SP + // R0 already contains the first argument + BL (R1) + + // Restore registers, g, stack pointer. + MOVW 20(R13), g + MOVW 16(R13), R13 + RET -TEXT runtime·cgocallback(SB),7,$0 - B runtime·cgounimpl(SB) +// cgocallback(void (*fn)(void*), void *frame, uintptr framesize) +// Turn the fn into a Go func (by taking its address) and call +// cgocallback_gofunc. +TEXT runtime·cgocallback(SB),7,$12 + MOVW $fn+0(FP), R0 + MOVW R0, 4(R13) + MOVW frame+4(FP), R0 + MOVW R0, 8(R13) + MOVW framesize+8(FP), R0 + MOVW R0, 12(R13) + MOVW $runtime·cgocallback_gofunc(SB), R0 + BL (R0) + RET + +// cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize) +// See cgocall.c for more details. +TEXT runtime·cgocallback_gofunc(SB),7,$16 + // Load m and g from thread-local storage. + MOVW _cgo_load_gm(SB), R0 + CMP $0, R0 + BL.NE (R0) + + // If m is nil, Go did not create the current thread. + // Call needm to obtain one for temporary use. + // In this case, we're running on the thread stack, so there's + // lots of space, but the linker doesn't know. Hide the call from + // the linker analysis by using an indirect call. + MOVW m, savedm-16(SP) + CMP $0, m + B.NE havem + MOVW $runtime·needm(SB), R0 + BL (R0) + +havem: + // Now there's a valid m, and we're running on its m->g0. + // Save current m->g0->sched.sp on stack and then set it to SP. + // Save current sp in m->g0->sched.sp in preparation for + // switch back to m->curg stack. + MOVW fn+0(FP), R0 + MOVW frame+4(FP), R1 + MOVW framesize+8(FP), R2 + + MOVW m_g0(m), R3 + MOVW (g_sched+gobuf_sp)(R3), R4 + MOVW.W R4, -4(R13) + MOVW R13, (g_sched+gobuf_sp)(R3) + + // Switch to m->curg stack and call runtime.cgocallbackg + // with the three arguments. Because we are taking over + // the execution of m->curg but *not* resuming what had + // been running, we need to save that information (m->curg->gobuf) + // so that we can restore it when we're done. + // We can restore m->curg->gobuf.sp easily, because calling + // runtime.cgocallbackg leaves SP unchanged upon return. + // To save m->curg->gobuf.pc, we push it onto the stack. + // This has the added benefit that it looks to the traceback + // routine like cgocallbackg is going to return to that + // PC (because we defined cgocallbackg to have + // a frame size of 16, the same amount that we use below), + // so that the traceback will seamlessly trace back into + // the earlier calls. + + // Save current m->g0->sched.sp on stack and then set it to SP. + MOVW m_curg(m), g + MOVW (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 + + // Push gobuf.pc + MOVW (g_sched+gobuf_pc)(g), R5 + SUB $4, R4 + MOVW R5, 0(R4) + + // Push arguments to cgocallbackg. + // Frame size here must match the frame size above + // to trick traceback routines into doing the right thing. + SUB $16, R4 + MOVW R0, 4(R4) + MOVW R1, 8(R4) + MOVW R2, 12(R4) + + // Switch stack and make the call. + MOVW R4, R13 + BL runtime·cgocallbackg(SB) + + // Restore g->gobuf (== m->curg->gobuf) from saved values. + MOVW 16(R13), R5 + MOVW R5, (g_sched+gobuf_pc)(g) + ADD $(16+4), R13 // SP clobbered! It is ok! + MOVW R13, (g_sched+gobuf_sp)(g) + + // Switch back to m->g0's stack and restore m->g0->sched.sp. + // (Unlike m->curg, the g0 goroutine never uses sched.pc, + // so we do not have to restore it.) + MOVW m_g0(m), g + MOVW (g_sched+gobuf_sp)(g), R13 + // POP R6 + MOVW 0(R13), R6 + ADD $4, R13 + MOVW R6, (g_sched+gobuf_sp)(g) + + // If the m on entry was nil, we called needm above to borrow an m + // for the duration of the call. Since the call is over, return it with dropm. + MOVW savedm-16(SP), R6 + CMP $0, R6 + B.NE 3(PC) + MOVW $runtime·dropm(SB), R0 + BL (R0) + + // Done! + RET + +// void setmg(M*, G*); set m and g. for use by needm. +TEXT runtime·setmg(SB), 7, $-4 + MOVW mm+0(FP), m + MOVW gg+4(FP), g + + // Save m and g to thread-local storage. + MOVW _cgo_save_gm(SB), R0 + CMP $0, R0 + BL.NE (R0) -TEXT runtime·memclr(SB),7,$20 - MOVW 0(FP), R0 - MOVW $0, R1 // c = 0 - MOVW R1, -16(SP) - MOVW 4(FP), R1 // n - MOVW R1, -12(SP) - MOVW m, -8(SP) // Save m and g - MOVW g, -4(SP) - BL runtime·memset(SB) - MOVW -8(SP), m // Restore m and g, memset clobbers them - MOVW -4(SP), g RET TEXT runtime·getcallerpc(SB),7,$-4 @@ -269,16 +448,6 @@ TEXT runtime·getcallersp(SB),7,$-4 TEXT runtime·emptyfunc(SB),0,$0 RET -// int64 runtime·cputicks(), so really -// void runtime·cputicks(int64 *ticks) -// stubbed: return int64(0) -TEXT runtime·cputicks(SB),7,$0 - MOVW 0(FP), R1 - MOVW $0, R0 - MOVW R0, 0(R1) - MOVW R0, 4(R1) - RET - TEXT runtime·abort(SB),7,$-4 MOVW $0, R0 MOVW (R0), R1 diff --git a/src/pkg/runtime/atomic_386.c b/src/pkg/runtime/atomic_386.c index a4f2a114f..79b7cbf96 100644 --- a/src/pkg/runtime/atomic_386.c +++ b/src/pkg/runtime/atomic_386.c @@ -17,3 +17,16 @@ runtime·atomicloadp(void* volatile* addr) { return *addr; } + +#pragma textflag 7 +uint64 +runtime·xadd64(uint64 volatile* addr, int64 v) +{ + uint64 old; + + old = *addr; + while(!runtime·cas64(addr, &old, old+v)) { + // nothing + } + return old+v; +} diff --git a/src/pkg/runtime/atomic_amd64.c b/src/pkg/runtime/atomic_amd64.c index a4f2a114f..e92d8ec21 100644 --- a/src/pkg/runtime/atomic_amd64.c +++ b/src/pkg/runtime/atomic_amd64.c @@ -12,6 +12,13 @@ runtime·atomicload(uint32 volatile* addr) } #pragma textflag 7 +uint64 +runtime·atomicload64(uint64 volatile* addr) +{ + return *addr; +} + +#pragma textflag 7 void* runtime·atomicloadp(void* volatile* addr) { diff --git a/src/pkg/runtime/atomic_arm.c b/src/pkg/runtime/atomic_arm.c index 52e4059ae..0b54840cc 100644 --- a/src/pkg/runtime/atomic_arm.c +++ b/src/pkg/runtime/atomic_arm.c @@ -3,6 +3,14 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch_GOARCH.h" + +static union { + Lock l; + byte pad [CacheLineSize]; +} locktab[57]; + +#define LOCK(addr) (&locktab[((uintptr)(addr)>>3)%nelem(locktab)].l) // Atomic add and return new value. #pragma textflag 7 @@ -80,4 +88,56 @@ runtime·atomicstore(uint32 volatile* addr, uint32 v) if(runtime·cas(addr, old, v)) return; } -}
\ No newline at end of file +} + +#pragma textflag 7 +bool +runtime·cas64(uint64 volatile *addr, uint64 *old, uint64 new) +{ + bool res; + + runtime·lock(LOCK(addr)); + if(*addr == *old) { + *addr = new; + res = true; + } else { + *old = *addr; + res = false; + } + runtime·unlock(LOCK(addr)); + return res; +} + +#pragma textflag 7 +uint64 +runtime·xadd64(uint64 volatile *addr, int64 delta) +{ + uint64 res; + + runtime·lock(LOCK(addr)); + res = *addr + delta; + *addr = res; + runtime·unlock(LOCK(addr)); + return res; +} + +#pragma textflag 7 +uint64 +runtime·atomicload64(uint64 volatile *addr) +{ + uint64 res; + + runtime·lock(LOCK(addr)); + res = *addr; + runtime·unlock(LOCK(addr)); + return res; +} + +#pragma textflag 7 +void +runtime·atomicstore64(uint64 volatile *addr, uint64 v) +{ + runtime·lock(LOCK(addr)); + *addr = v; + runtime·unlock(LOCK(addr)); +} diff --git a/src/pkg/runtime/callback_windows_386.c b/src/pkg/runtime/callback_windows_386.c index fcd292fbc..880588da6 100644 --- a/src/pkg/runtime/callback_windows_386.c +++ b/src/pkg/runtime/callback_windows_386.c @@ -4,6 +4,7 @@ #include "runtime.h" #include "type.h" +#include "typekind.h" #include "defs_GOOS_GOARCH.h" #include "os_GOOS.h" @@ -79,7 +80,7 @@ runtime·compilecallback(Eface fn, bool cleanstack) // MOVL fn, AX *p++ = 0xb8; - *(uint32*)p = (uint32)fn.data; + *(uint32*)p = (uint32)(fn.data); p += 4; // MOVL argsize, DX diff --git a/src/pkg/runtime/callback_windows_amd64.c b/src/pkg/runtime/callback_windows_amd64.c index 99d7cb9e3..1a4779291 100644 --- a/src/pkg/runtime/callback_windows_amd64.c +++ b/src/pkg/runtime/callback_windows_amd64.c @@ -4,6 +4,7 @@ #include "runtime.h" #include "type.h" +#include "typekind.h" #include "defs_GOOS_GOARCH.h" #include "os_GOOS.h" @@ -77,7 +78,7 @@ runtime·compilecallback(Eface fn, bool /*cleanstack*/) // MOVQ fn, AX *p++ = 0x48; *p++ = 0xb8; - *(uint64*)p = (uint64)fn.data; + *(uint64*)p = (uint64)(fn.data); p += 8; // PUSH AX *p++ = 0x50; diff --git a/src/pkg/runtime/cgo/asm_386.s b/src/pkg/runtime/cgo/asm_386.s new file mode 100644 index 000000000..7faaa4097 --- /dev/null +++ b/src/pkg/runtime/cgo/asm_386.s @@ -0,0 +1,29 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * void crosscall2(void (*fn)(void*, int32), void*, int32) + * Save registers and call fn with two arguments. + */ +TEXT crosscall2(SB),7,$0 + PUSHL BP + MOVL SP, BP + PUSHL BX + PUSHL SI + PUSHL DI + + SUBL $8, SP + MOVL 16(BP), AX + MOVL AX, 4(SP) + MOVL 12(BP), AX + MOVL AX, 0(SP) + MOVL 8(BP), AX + CALL AX + ADDL $8, SP + + POPL DI + POPL SI + POPL BX + POPL BP + RET diff --git a/src/pkg/runtime/cgo/asm_amd64.s b/src/pkg/runtime/cgo/asm_amd64.s new file mode 100644 index 000000000..53f7148a2 --- /dev/null +++ b/src/pkg/runtime/cgo/asm_amd64.s @@ -0,0 +1,45 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * void crosscall2(void (*fn)(void*, int32), void*, int32) + * Save registers and call fn with two arguments. + */ +TEXT crosscall2(SB),7,$0 + SUBQ $0x58, SP /* keeps stack pointer 32-byte aligned */ + MOVQ BX, 0x10(SP) + MOVQ BP, 0x18(SP) + MOVQ R12, 0x20(SP) + MOVQ R13, 0x28(SP) + MOVQ R14, 0x30(SP) + MOVQ R15, 0x38(SP) + +#ifdef GOOS_windows + // Win64 save RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 + MOVQ DI, 0x40(SP) + MOVQ SI, 0x48(SP) + + MOVQ DX, 0(SP) /* arg */ + MOVQ R8, 8(SP) /* argsize (includes padding) */ + + CALL CX /* fn */ + + MOVQ 0x40(SP), DI + MOVQ 0x48(SP), SI +#else + MOVQ SI, 0(SP) /* arg */ + MOVQ DX, 8(SP) /* argsize (includes padding) */ + + CALL DI /* fn */ +#endif + + MOVQ 0x10(SP), BX + MOVQ 0x18(SP), BP + MOVQ 0x20(SP), R12 + MOVQ 0x28(SP), R13 + MOVQ 0x30(SP), R14 + MOVQ 0x38(SP), R15 + + ADDQ $0x58, SP + RET diff --git a/src/pkg/runtime/cgo/asm_arm.s b/src/pkg/runtime/cgo/asm_arm.s new file mode 100644 index 000000000..a6ea0dc07 --- /dev/null +++ b/src/pkg/runtime/cgo/asm_arm.s @@ -0,0 +1,23 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * void crosscall2(void (*fn)(void*, int32), void*, int32) + * Save registers and call fn with two arguments. + */ +TEXT crosscall2(SB),7,$-4 + /* + * We still need to save all callee save register as before, and then + * push 2 args for fn (R1 and R2). + * Also note that at procedure entry in 5c/5g world, 4(R13) will be the + * first arg, so we must push another dummy reg (R0) for 0(R13). + * Additionally, cgo_tls_set_gm will clobber R0, so we need to save R0 + * nevertheless. + */ + MOVM.WP [R0, R1, R2, R4, R5, R6, R7, R8, R9, R10, R11, R12, R14], (R13) + MOVW _cgo_load_gm(SB), R0 + BL (R0) + MOVW PC, R14 + MOVW 0(R13), PC + MOVM.IAW (R13), [R0, R1, R2, R4, R5, R6, R7, R8, R9, R10, R11, R12, PC] diff --git a/src/pkg/runtime/cgo/callbacks.c b/src/pkg/runtime/cgo/callbacks.c index f36fb3fd7..51bd529ec 100644 --- a/src/pkg/runtime/cgo/callbacks.c +++ b/src/pkg/runtime/cgo/callbacks.c @@ -33,7 +33,13 @@ static void _cgo_allocate_internal(uintptr len, byte *ret) { + CgoMal *c; + ret = runtime·mal(len); + c = runtime·mal(sizeof(*c)); + c->next = m->cgomal; + c->alloc = ret; + m->cgomal = c; FLUSH(&ret); } @@ -71,3 +77,19 @@ _cgo_panic(void *a, int32 n) { runtime·cgocallback((void(*)(void))_cgo_panic_internal, a, n); } + +#pragma cgo_import_static x_cgo_init +extern void x_cgo_init(G*); +void (*_cgo_init)(G*) = x_cgo_init; + +#pragma cgo_import_static x_cgo_malloc +extern void x_cgo_malloc(void*); +void (*_cgo_malloc)(void*) = x_cgo_malloc; + +#pragma cgo_import_static x_cgo_free +extern void x_cgo_free(void*); +void (*_cgo_free)(void*) = x_cgo_free; + +#pragma cgo_import_static x_cgo_thread_start +extern void x_cgo_thread_start(void*); +void (*_cgo_thread_start)(void*) = x_cgo_thread_start; diff --git a/src/pkg/runtime/cgo/cgo.go b/src/pkg/runtime/cgo/cgo.go index 414f3da36..e0d538668 100644 --- a/src/pkg/runtime/cgo/cgo.go +++ b/src/pkg/runtime/cgo/cgo.go @@ -18,6 +18,8 @@ package cgo #cgo openbsd LDFLAGS: -lpthread #cgo windows LDFLAGS: -lm -mthreads +#cgo CFLAGS: -Wall -Werror + */ import "C" diff --git a/src/pkg/runtime/cgo/cgo_arm.c b/src/pkg/runtime/cgo/cgo_arm.c new file mode 100644 index 000000000..d23f53e77 --- /dev/null +++ b/src/pkg/runtime/cgo/cgo_arm.c @@ -0,0 +1,12 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma cgo_import_static x_cgo_load_gm +extern void x_cgo_load_gm(void); +void (*_cgo_load_gm)(void) = x_cgo_load_gm; + +#pragma cgo_import_static x_cgo_save_gm +extern void x_cgo_save_gm(void); +void (*_cgo_save_gm)(void) = x_cgo_save_gm; + diff --git a/src/pkg/runtime/cgo/gcc_386.S b/src/pkg/runtime/cgo/gcc_386.S index 9abab7ebd..94ba5842f 100644 --- a/src/pkg/runtime/cgo/gcc_386.S +++ b/src/pkg/runtime/cgo/gcc_386.S @@ -35,31 +35,6 @@ EXT(crosscall_386): popl %ebp ret -/* - * void crosscall2(void (*fn)(void*, int32), void*, int32) - * - * Save registers and call fn with two arguments. - */ -.globl EXT(crosscall2) -EXT(crosscall2): - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %esi - pushl %edi - - pushl 16(%ebp) - pushl 12(%ebp) - mov 8(%ebp), %eax - call *%eax - addl $8,%esp - - popl %edi - popl %esi - popl %ebx - popl %ebp - ret - .globl EXT(__stack_chk_fail_local) EXT(__stack_chk_fail_local): 1: diff --git a/src/pkg/runtime/cgo/gcc_amd64.S b/src/pkg/runtime/cgo/gcc_amd64.S index 706ee6b58..81b270195 100644 --- a/src/pkg/runtime/cgo/gcc_amd64.S +++ b/src/pkg/runtime/cgo/gcc_amd64.S @@ -19,9 +19,6 @@ * are callee-save so they must be saved explicitly. * The standard x86-64 ABI passes the three arguments m, g, fn * in %rdi, %rsi, %rdx. - * - * Also need to set %r15 to g and %r14 to m (see ../pkg/runtime/mkasmh.sh) - * during the call. */ .globl EXT(crosscall_amd64) EXT(crosscall_amd64): @@ -45,48 +42,3 @@ EXT(crosscall_amd64): popq %rbp popq %rbx ret - -/* - * void crosscall2(void (*fn)(void*, int32), void *arg, int32 argsize) - * - * Save registers and call fn with two arguments. fn is a Go function - * which takes parameters on the stack rather than in registers. - */ -.globl EXT(crosscall2) -EXT(crosscall2): - subq $0x58, %rsp /* keeps stack pointer 32-byte aligned */ - movq %rbx, 0x10(%rsp) - movq %rbp, 0x18(%rsp) - movq %r12, 0x20(%rsp) - movq %r13, 0x28(%rsp) - movq %r14, 0x30(%rsp) - movq %r15, 0x38(%rsp) - -#if defined(_WIN64) - // Win64 save RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 - movq %rdi, 0x40(%rsp) - movq %rsi, 0x48(%rsp) - - movq %rdx, 0(%rsp) /* arg */ - movq %r8, 8(%rsp) /* argsize (includes padding) */ - - call *%rcx /* fn */ -#else - movq %rsi, 0(%rsp) /* arg */ - movq %rdx, 8(%rsp) /* argsize (includes padding) */ - - call *%rdi /* fn */ -#endif - - movq 0x10(%rsp), %rbx - movq 0x18(%rsp), %rbp - movq 0x20(%rsp), %r12 - movq 0x28(%rsp), %r13 - movq 0x30(%rsp), %r14 - movq 0x38(%rsp), %r15 -#if defined(__WIN64) - movq 0x40(%rsp), %rdi - movq 0x48(%rsp), %rsi -#endif - addq $0x58, %rsp - ret diff --git a/src/pkg/runtime/cgo/gcc_arm.S b/src/pkg/runtime/cgo/gcc_arm.S index 32d862984..809fcb9a0 100644 --- a/src/pkg/runtime/cgo/gcc_arm.S +++ b/src/pkg/runtime/cgo/gcc_arm.S @@ -1 +1,36 @@ -/* unimplemented */ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * Apple still insists on underscore prefixes for C function names. + */ +#if defined(__APPLE__) +#define EXT(s) _##s +#else +#define EXT(s) s +#endif + +/* + * void crosscall_arm2(void (*fn)(void), void *g, void *m) + * + * Calling into the 5c tool chain, where all registers are caller save. + * Called from standard ARM EABI, where r4-r11 are callee-save, so they + * must be saved explicitly. + */ +.globl EXT(crosscall_arm2) +EXT(crosscall_arm2): + push {r4, r5, r6, r7, r8, r9, r10, r11, ip, lr} + mov r10, r1 // g + mov r9, r2 // m + mov r3, r0 // save r0, cgo_tls_set_gm will clobber it + bl EXT(x_cgo_save_gm) // save current g and m into TLS variable + mov lr, pc + mov pc, r3 + pop {r4, r5, r6, r7, r8, r9, r10, r11, ip, pc} + +.globl EXT(__stack_chk_fail_local) +EXT(__stack_chk_fail_local): +1: + b 1b + diff --git a/src/pkg/runtime/cgo/gcc_darwin_386.c b/src/pkg/runtime/cgo/gcc_darwin_386.c index 2c30c666f..ad9fb5abf 100644 --- a/src/pkg/runtime/cgo/gcc_darwin_386.c +++ b/src/pkg/runtime/cgo/gcc_darwin_386.c @@ -101,8 +101,8 @@ inittls(void) pthread_key_delete(tofree[i]); } -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { pthread_attr_t attr; size_t size; @@ -115,10 +115,9 @@ xinitcgo(G *g) inittls(); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -127,14 +126,14 @@ libcgo_sys_thread_start(ThreadStart *ts) int err; sigfillset(&ign); - sigprocmask(SIG_SETMASK, &ign, &oset); + pthread_sigmask(SIG_SETMASK, &ign, &oset); pthread_attr_init(&attr); pthread_attr_getstacksize(&attr, &size); ts->g->stackguard = size; err = pthread_create(&p, &attr, threadentry, ts); - sigprocmask(SIG_SETMASK, &oset, nil); + pthread_sigmask(SIG_SETMASK, &oset, nil); if (err != 0) { fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); @@ -153,7 +152,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_darwin_amd64.c b/src/pkg/runtime/cgo/gcc_darwin_amd64.c index 89dc7a4e8..65d381633 100644 --- a/src/pkg/runtime/cgo/gcc_darwin_amd64.c +++ b/src/pkg/runtime/cgo/gcc_darwin_amd64.c @@ -72,7 +72,7 @@ inittls(void) } void -xinitcgo(G *g) +x_cgo_init(G *g) { pthread_attr_t attr; size_t size; @@ -85,10 +85,9 @@ xinitcgo(G *g) inittls(); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -97,14 +96,14 @@ libcgo_sys_thread_start(ThreadStart *ts) int err; sigfillset(&ign); - sigprocmask(SIG_SETMASK, &ign, &oset); + pthread_sigmask(SIG_SETMASK, &ign, &oset); pthread_attr_init(&attr); pthread_attr_getstacksize(&attr, &size); ts->g->stackguard = size; err = pthread_create(&p, &attr, threadentry, ts); - sigprocmask(SIG_SETMASK, &oset, nil); + pthread_sigmask(SIG_SETMASK, &oset, nil); if (err != 0) { fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); @@ -123,7 +122,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_freebsd_386.c b/src/pkg/runtime/cgo/gcc_freebsd_386.c index 2c97e2a33..7c62a1bc4 100644 --- a/src/pkg/runtime/cgo/gcc_freebsd_386.c +++ b/src/pkg/runtime/cgo/gcc_freebsd_386.c @@ -6,12 +6,13 @@ #include <sys/signalvar.h> #include <pthread.h> #include <signal.h> +#include <string.h> #include "libcgo.h" static void* threadentry(void*); -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { pthread_attr_t attr; size_t size; @@ -22,10 +23,9 @@ xinitcgo(G *g) pthread_attr_destroy(&attr); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -60,7 +60,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_freebsd_amd64.c b/src/pkg/runtime/cgo/gcc_freebsd_amd64.c index 3beb4d7bb..6be8bd251 100644 --- a/src/pkg/runtime/cgo/gcc_freebsd_amd64.c +++ b/src/pkg/runtime/cgo/gcc_freebsd_amd64.c @@ -6,12 +6,13 @@ #include <sys/signalvar.h> #include <pthread.h> #include <signal.h> +#include <string.h> #include "libcgo.h" static void* threadentry(void*); -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { pthread_attr_t attr; size_t size; @@ -22,10 +23,9 @@ xinitcgo(G *g) pthread_attr_destroy(&attr); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -61,7 +61,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_freebsd_arm.c b/src/pkg/runtime/cgo/gcc_freebsd_arm.c new file mode 100644 index 000000000..3bcb0b270 --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_freebsd_arm.c @@ -0,0 +1,114 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <pthread.h> +#include <string.h> +#include "libcgo.h" + +static void *threadentry(void*); + +// We have to resort to TLS variable to save g(R10) and +// m(R9). One reason is that external code might trigger +// SIGSEGV, and our runtime.sigtramp don't even know we +// are in external code, and will continue to use R10/R9, +// this might as well result in another SIGSEGV. +// Note: all three functions will clobber R0, and the last +// two can be called from 5c ABI code. +void __aeabi_read_tp(void) __attribute__((naked)); +void x_cgo_save_gm(void) __attribute__((naked)); +void x_cgo_load_gm(void) __attribute__((naked)); + +void +__aeabi_read_tp(void) +{ + // read @ 0xffff1000 + __asm__ __volatile__ ( + "ldr r0, =0xffff1000\n\t" + "ldr r0, [r0]\n\t" + "mov pc, lr\n\t" + ); +} + +// g (R10) at 8(TP), m (R9) at 12(TP) +void +x_cgo_load_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "ldr r10, [r0, #8]\n\t" + "ldr r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_save_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "str r10, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + x_cgo_save_gm(); // save g and m for the initial thread + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + pthread_t p; + size_t size; + int err; + + // Not sure why the memset is necessary here, + // but without it, we get a bogus stack size + // out of pthread_attr_getstacksize. C'est la Linux. + memset(&attr, 0, sizeof attr); + pthread_attr_init(&attr); + size = 0; + pthread_attr_getstacksize(&attr, &size); + ts->g->stackguard = size; + err = pthread_create(&p, &attr, threadentry, ts); + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +extern void crosscall_arm2(void (*fn)(void), void *g, void *m); +static void* +threadentry(void *v) +{ + ThreadStart ts; + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2; + + crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_linux_386.c b/src/pkg/runtime/cgo/gcc_linux_386.c index 7d84acc11..9357a63f7 100644 --- a/src/pkg/runtime/cgo/gcc_linux_386.c +++ b/src/pkg/runtime/cgo/gcc_linux_386.c @@ -9,8 +9,8 @@ static void *threadentry(void*); -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { pthread_attr_t attr; size_t size; @@ -21,10 +21,9 @@ xinitcgo(G *g) pthread_attr_destroy(&attr); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -64,7 +63,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_linux_amd64.c b/src/pkg/runtime/cgo/gcc_linux_amd64.c index 28cbf78c5..bc76117d3 100644 --- a/src/pkg/runtime/cgo/gcc_linux_amd64.c +++ b/src/pkg/runtime/cgo/gcc_linux_amd64.c @@ -10,7 +10,7 @@ static void* threadentry(void*); void -xinitcgo(G* g) +x_cgo_init(G* g) { pthread_attr_t attr; size_t size; @@ -21,10 +21,9 @@ xinitcgo(G* g) pthread_attr_destroy(&attr); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { pthread_attr_t attr; sigset_t ign, oset; @@ -59,7 +58,7 @@ threadentry(void *v) ts.g->stackbase = (uintptr)&ts; /* - * libcgo_sys_thread_start set stackguard to stack size; + * _cgo_sys_thread_start set stackguard to stack size; * change to actual guard pointer. */ ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; diff --git a/src/pkg/runtime/cgo/gcc_linux_arm.c b/src/pkg/runtime/cgo/gcc_linux_arm.c index 8397c75bb..46a1126ad 100644 --- a/src/pkg/runtime/cgo/gcc_linux_arm.c +++ b/src/pkg/runtime/cgo/gcc_linux_arm.c @@ -2,19 +2,113 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <pthread.h> +#include <string.h> #include "libcgo.h" -static void -xinitcgo(G *g) +static void *threadentry(void*); + +// We have to resort to TLS variable to save g(R10) and +// m(R9). One reason is that external code might trigger +// SIGSEGV, and our runtime.sigtramp don't even know we +// are in external code, and will continue to use R10/R9, +// this might as well result in another SIGSEGV. +// Note: all three functions will clobber R0, and the last +// two can be called from 5c ABI code. +void __aeabi_read_tp(void) __attribute__((naked)); +void x_cgo_save_gm(void) __attribute__((naked)); +void x_cgo_load_gm(void) __attribute__((naked)); + +void +__aeabi_read_tp(void) +{ + // b __kuser_get_tls @ 0xffff0fe0 + __asm__ __volatile__ ( + "mvn r0, #0xf000\n\t" + "sub pc, r0, #31\n\t" + "nop\n\tnop\n\t" + ); +} + +// g (R10) at 8(TP), m (R9) at 12(TP) +void +x_cgo_load_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "ldr r10, [r0, #8]\n\t" + "ldr r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_save_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "str r10, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_init(G *g) { - // unimplemented + pthread_attr_t attr; + size_t size; + x_cgo_save_gm(); // save g and m for the initial thread + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + pthread_t p; + size_t size; + int err; + + // Not sure why the memset is necessary here, + // but without it, we get a bogus stack size + // out of pthread_attr_getstacksize. C'est la Linux. + memset(&attr, 0, sizeof attr); + pthread_attr_init(&attr); + size = 0; + pthread_attr_getstacksize(&attr, &size); + ts->g->stackguard = size; + err = pthread_create(&p, &attr, threadentry, ts); + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +extern void crosscall_arm2(void (*fn)(void), void *g, void *m); +static void* +threadentry(void *v) { - // unimplemented - *(int*)0 = 0; + ThreadStart ts; + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2; + + crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m); + return nil; } diff --git a/src/pkg/runtime/cgo/gcc_netbsd_386.c b/src/pkg/runtime/cgo/gcc_netbsd_386.c new file mode 100644 index 000000000..09b271df4 --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_netbsd_386.c @@ -0,0 +1,80 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <sys/types.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include "libcgo.h" + +static void* threadentry(void*); + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + sigset_t ign, oset; + pthread_t p; + size_t size; + int err; + + sigfillset(&ign); + sigprocmask(SIG_SETMASK, &ign, &oset); + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + ts->g->stackguard = size; + err = pthread_create(&p, &attr, threadentry, ts); + + sigprocmask(SIG_SETMASK, &oset, nil); + + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +static void* +threadentry(void *v) +{ + ThreadStart ts; + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; + + /* + * Set specific keys. On NetBSD/ELF, the thread local storage + * is just before %gs:0. Our dynamic 8.out's reserve 8 bytes + * for the two words g and m at %gs:-8 and %gs:-4. + */ + asm volatile ( + "movl %0, %%gs:-8\n" // MOVL g, -8(GS) + "movl %1, %%gs:-4\n" // MOVL m, -4(GS) + :: "r"(ts.g), "r"(ts.m) + ); + + crosscall_386(ts.fn); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_netbsd_amd64.c b/src/pkg/runtime/cgo/gcc_netbsd_amd64.c new file mode 100644 index 000000000..080c59ba4 --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_netbsd_amd64.c @@ -0,0 +1,80 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <sys/types.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include "libcgo.h" + +static void* threadentry(void*); + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + sigset_t ign, oset; + pthread_t p; + size_t size; + int err; + + sigfillset(&ign); + sigprocmask(SIG_SETMASK, &ign, &oset); + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + + ts->g->stackguard = size; + err = pthread_create(&p, &attr, threadentry, ts); + + sigprocmask(SIG_SETMASK, &oset, nil); + + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +static void* +threadentry(void *v) +{ + ThreadStart ts; + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; + + /* + * Set specific keys. On NetBSD/ELF, the thread local storage + * is just before %fs:0. Our dynamic 6.out's reserve 16 bytes + * for the two words g and m at %fs:-16 and %fs:-8. + */ + asm volatile ( + "movq %0, %%fs:-16\n" // MOVL g, -16(FS) + "movq %1, %%fs:-8\n" // MOVL m, -8(FS) + :: "r"(ts.g), "r"(ts.m) + ); + crosscall_amd64(ts.fn); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_netbsd_arm.c b/src/pkg/runtime/cgo/gcc_netbsd_arm.c new file mode 100644 index 000000000..d93b531e7 --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_netbsd_arm.c @@ -0,0 +1,122 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <sys/types.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include "libcgo.h" + +static void *threadentry(void*); + +// We have to resort to TLS variable to save g(R10) and +// m(R9). One reason is that external code might trigger +// SIGSEGV, and our runtime.sigtramp don't even know we +// are in external code, and will continue to use R10/R9, +// this might as well result in another SIGSEGV. +// Note: all three functions will clobber R0, and the last +// two can be called from 5c ABI code. +void __aeabi_read_tp(void) __attribute__((naked)); +void x_cgo_save_gm(void) __attribute__((naked)); +void x_cgo_load_gm(void) __attribute__((naked)); + +void +__aeabi_read_tp(void) +{ + // this function is only allowed to clobber r0 + __asm__ __volatile__ ( + "mrc p15, 0, r0, c13, c0, 3\n\t" + "cmp r0, #0\n\t" + "movne pc, lr\n\t" + "push {r1,r2,r3,r12}\n\t" + "svc 0x00a0013c\n\t" // _lwp_getprivate + "pop {r1,r2,r3,r12}\n\t" + "mov pc, lr\n\t" + ); +} + +// g (R10) at 8(TP), m (R9) at 12(TP) +void +x_cgo_load_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "ldr r10, [r0, #8]\n\t" + "ldr r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_save_gm(void) +{ + __asm__ __volatile__ ( + "push {lr}\n\t" + "bl __aeabi_read_tp\n\t" + "str r10, [r0, #8]\n\t" + "str r9, [r0, #12]\n\t" + "pop {pc}\n\t" + ); +} + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + x_cgo_save_gm(); // save g and m for the initial thread + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + sigset_t ign, oset; + pthread_t p; + size_t size; + int err; + + sigfillset(&ign); + sigprocmask(SIG_SETMASK, &ign, &oset); + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + ts->g->stackguard = size; + err = pthread_create(&p, &attr, threadentry, ts); + + sigprocmask(SIG_SETMASK, &oset, nil); + + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +extern void crosscall_arm2(void (*fn)(void), void *g, void *m); +static void* +threadentry(void *v) +{ + ThreadStart ts; + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2; + + crosscall_arm2(ts.fn, (void *)ts.g, (void *)ts.m); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_openbsd_386.c b/src/pkg/runtime/cgo/gcc_openbsd_386.c new file mode 100644 index 000000000..86c1365ad --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_openbsd_386.c @@ -0,0 +1,169 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <sys/types.h> +#include <dlfcn.h> +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include "libcgo.h" + +static void* threadentry(void*); + +// TCB_SIZE is sizeof(struct thread_control_block), +// as defined in /usr/src/lib/librthread/tcb.h +#define TCB_SIZE (4 * sizeof(void *)) +#define TLS_SIZE (2 * sizeof(void *)) + +void *__get_tcb(void); +void __set_tcb(void *); + +static int (*sys_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); + +struct thread_args { + void *(*func)(void *); + void *arg; +}; + +static void +tcb_fixup(int mainthread) +{ + void *newtcb, *oldtcb; + + // The OpenBSD ld.so(1) does not currently support PT_TLS. As a result, + // we need to allocate our own TLS space while preserving the existing + // TCB that has been setup via librthread. + + newtcb = malloc(TCB_SIZE + TLS_SIZE); + if(newtcb == NULL) + abort(); + + // The signal trampoline expects the TLS slots to be zeroed. + bzero(newtcb, TLS_SIZE); + + oldtcb = __get_tcb(); + bcopy(oldtcb, newtcb + TLS_SIZE, TCB_SIZE); + __set_tcb(newtcb + TLS_SIZE); + + // The main thread TCB is a static allocation - do not try to free it. + if(!mainthread) + free(oldtcb); +} + +static void * +thread_start_wrapper(void *arg) +{ + struct thread_args args = *(struct thread_args *)arg; + + free(arg); + tcb_fixup(0); + + return args.func(args.arg); +} + +int +pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ + struct thread_args *p; + + p = malloc(sizeof(*p)); + if(p == NULL) { + errno = ENOMEM; + return -1; + } + p->func = start_routine; + p->arg = arg; + + return sys_pthread_create(thread, attr, thread_start_wrapper, p); +} + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + void *handle; + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); + + // Locate symbol for the system pthread_create function. + handle = dlopen("libpthread.so", RTLD_LAZY); + if(handle == NULL) { + fprintf(stderr, "dlopen: failed to load libpthread: %s\n", dlerror()); + abort(); + } + sys_pthread_create = dlsym(handle, "pthread_create"); + if(sys_pthread_create == NULL) { + fprintf(stderr, "dlsym: failed to find pthread_create: %s\n", dlerror()); + abort(); + } + dlclose(handle); + + tcb_fixup(1); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + sigset_t ign, oset; + pthread_t p; + size_t size; + int err; + + sigfillset(&ign); + sigprocmask(SIG_SETMASK, &ign, &oset); + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + ts->g->stackguard = size; + err = sys_pthread_create(&p, &attr, threadentry, ts); + + sigprocmask(SIG_SETMASK, &oset, nil); + + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +static void* +threadentry(void *v) +{ + ThreadStart ts; + + tcb_fixup(0); + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; + + /* + * Set specific keys. On OpenBSD/ELF, the thread local storage + * is just before %gs:0. Our dynamic 8.out's reserve 8 bytes + * for the two words g and m at %gs:-8 and %gs:-4. + */ + asm volatile ( + "movl %0, %%gs:-8\n" // MOVL g, -8(GS) + "movl %1, %%gs:-4\n" // MOVL m, -4(GS) + :: "r"(ts.g), "r"(ts.m) + ); + + crosscall_386(ts.fn); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_openbsd_amd64.c b/src/pkg/runtime/cgo/gcc_openbsd_amd64.c new file mode 100644 index 000000000..d3a5e36b0 --- /dev/null +++ b/src/pkg/runtime/cgo/gcc_openbsd_amd64.c @@ -0,0 +1,169 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <sys/types.h> +#include <dlfcn.h> +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include "libcgo.h" + +static void* threadentry(void*); + +// TCB_SIZE is sizeof(struct thread_control_block), +// as defined in /usr/src/lib/librthread/tcb.h +#define TCB_SIZE (4 * sizeof(void *)) +#define TLS_SIZE (2 * sizeof(void *)) + +void *__get_tcb(void); +void __set_tcb(void *); + +static int (*sys_pthread_create)(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); + +struct thread_args { + void *(*func)(void *); + void *arg; +}; + +static void +tcb_fixup(int mainthread) +{ + void *newtcb, *oldtcb; + + // The OpenBSD ld.so(1) does not currently support PT_TLS. As a result, + // we need to allocate our own TLS space while preserving the existing + // TCB that has been setup via librthread. + + newtcb = malloc(TCB_SIZE + TLS_SIZE); + if(newtcb == NULL) + abort(); + + // The signal trampoline expects the TLS slots to be zeroed. + bzero(newtcb, TLS_SIZE); + + oldtcb = __get_tcb(); + bcopy(oldtcb, newtcb + TLS_SIZE, TCB_SIZE); + __set_tcb(newtcb + TLS_SIZE); + + // The main thread TCB is a static allocation - do not try to free it. + if(!mainthread) + free(oldtcb); +} + +static void * +thread_start_wrapper(void *arg) +{ + struct thread_args args = *(struct thread_args *)arg; + + free(arg); + tcb_fixup(0); + + return args.func(args.arg); +} + +int +pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ + struct thread_args *p; + + p = malloc(sizeof(*p)); + if(p == NULL) { + errno = ENOMEM; + return -1; + } + p->func = start_routine; + p->arg = arg; + + return sys_pthread_create(thread, attr, thread_start_wrapper, p); +} + +void +x_cgo_init(G *g) +{ + pthread_attr_t attr; + size_t size; + void *handle; + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + g->stackguard = (uintptr)&attr - size + 4096; + pthread_attr_destroy(&attr); + + // Locate symbol for the system pthread_create function. + handle = dlopen("libpthread.so", RTLD_LAZY); + if(handle == NULL) { + fprintf(stderr, "dlopen: failed to load libpthread: %s\n", dlerror()); + abort(); + } + sys_pthread_create = dlsym(handle, "pthread_create"); + if(sys_pthread_create == NULL) { + fprintf(stderr, "dlsym: failed to find pthread_create: %s\n", dlerror()); + abort(); + } + dlclose(handle); + + tcb_fixup(1); +} + + +void +_cgo_sys_thread_start(ThreadStart *ts) +{ + pthread_attr_t attr; + sigset_t ign, oset; + pthread_t p; + size_t size; + int err; + + sigfillset(&ign); + sigprocmask(SIG_SETMASK, &ign, &oset); + + pthread_attr_init(&attr); + pthread_attr_getstacksize(&attr, &size); + + ts->g->stackguard = size; + err = sys_pthread_create(&p, &attr, threadentry, ts); + + sigprocmask(SIG_SETMASK, &oset, nil); + + if (err != 0) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err)); + abort(); + } +} + +static void* +threadentry(void *v) +{ + ThreadStart ts; + + tcb_fixup(0); + + ts = *(ThreadStart*)v; + free(v); + + ts.g->stackbase = (uintptr)&ts; + + /* + * _cgo_sys_thread_start set stackguard to stack size; + * change to actual guard pointer. + */ + ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096; + + /* + * Set specific keys. On OpenBSD/ELF, the thread local storage + * is just before %fs:0. Our dynamic 6.out's reserve 16 bytes + * for the two words g and m at %fs:-16 and %fs:-8. + */ + asm volatile ( + "movq %0, %%fs:-16\n" // MOVL g, -16(FS) + "movq %1, %%fs:-8\n" // MOVL m, -8(FS) + :: "r"(ts.g), "r"(ts.m) + ); + crosscall_amd64(ts.fn); + return nil; +} diff --git a/src/pkg/runtime/cgo/gcc_setenv.c b/src/pkg/runtime/cgo/gcc_setenv.c index 7da4ad915..a0938166d 100644 --- a/src/pkg/runtime/cgo/gcc_setenv.c +++ b/src/pkg/runtime/cgo/gcc_setenv.c @@ -1,4 +1,4 @@ -// Copyright 20111 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -9,10 +9,8 @@ #include <stdlib.h> /* Stub for calling setenv */ -static void -xlibcgo_setenv(char **arg) +void +x_cgo_setenv(char **arg) { setenv(arg[0], arg[1], 1); } - -void (*libcgo_setenv)(char**) = xlibcgo_setenv; diff --git a/src/pkg/runtime/cgo/gcc_util.c b/src/pkg/runtime/cgo/gcc_util.c index e06b6f64d..20913d736 100644 --- a/src/pkg/runtime/cgo/gcc_util.c +++ b/src/pkg/runtime/cgo/gcc_util.c @@ -5,7 +5,7 @@ #include "libcgo.h" /* Stub for calling malloc from Go */ -static void +void x_cgo_malloc(void *p) { struct a { @@ -16,10 +16,8 @@ x_cgo_malloc(void *p) a->ret = malloc(a->n); } -void (*_cgo_malloc)(void*) = x_cgo_malloc; - /* Stub for calling free from Go */ -static void +void x_cgo_free(void *p) { struct a { @@ -29,11 +27,9 @@ x_cgo_free(void *p) free(a->arg); } -void (*_cgo_free)(void*) = x_cgo_free; - /* Stub for creating a new thread */ -static void -xlibcgo_thread_start(ThreadStart *arg) +void +x_cgo_thread_start(ThreadStart *arg) { ThreadStart *ts; @@ -45,7 +41,5 @@ xlibcgo_thread_start(ThreadStart *arg) } *ts = *arg; - libcgo_sys_thread_start(ts); /* OS-dependent half */ + _cgo_sys_thread_start(ts); /* OS-dependent half */ } - -void (*libcgo_thread_start)(ThreadStart*) = xlibcgo_thread_start; diff --git a/src/pkg/runtime/cgo/gcc_windows_386.c b/src/pkg/runtime/cgo/gcc_windows_386.c index 2b940d362..02eab12e5 100644 --- a/src/pkg/runtime/cgo/gcc_windows_386.c +++ b/src/pkg/runtime/cgo/gcc_windows_386.c @@ -4,31 +4,31 @@ #define WIN32_LEAN_AND_MEAN #include <windows.h> +#include <process.h> #include "libcgo.h" -static void *threadentry(void*); +static void threadentry(void*); /* 1MB is default stack size for 32-bit Windows. Allocation granularity on Windows is typically 64 KB. The constant is also hardcoded in cmd/ld/pe.c (keep synchronized). */ #define STACKSIZE (1*1024*1024) -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { int tmp; g->stackguard = (uintptr)&tmp - STACKSIZE + 8*1024; } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { _beginthread(threadentry, 0, ts); } -static void* +static void threadentry(void *v) { ThreadStart ts; @@ -55,5 +55,4 @@ threadentry(void *v) crosscall_386(ts.fn); LocalFree(tls0); - return nil; } diff --git a/src/pkg/runtime/cgo/gcc_windows_amd64.c b/src/pkg/runtime/cgo/gcc_windows_amd64.c index 0d2f5d233..f7695a1cc 100644 --- a/src/pkg/runtime/cgo/gcc_windows_amd64.c +++ b/src/pkg/runtime/cgo/gcc_windows_amd64.c @@ -4,31 +4,31 @@ #define WIN64_LEAN_AND_MEAN #include <windows.h> +#include <process.h> #include "libcgo.h" -static void *threadentry(void*); +static void threadentry(void*); /* 2MB is default stack size for 64-bit Windows. Allocation granularity on Windows is typically 64 KB. The constant is also hardcoded in cmd/ld/pe.c (keep synchronized). */ #define STACKSIZE (2*1024*1024) -static void -xinitcgo(G *g) +void +x_cgo_init(G *g) { int tmp; g->stackguard = (uintptr)&tmp - STACKSIZE + 8*1024; } -void (*initcgo)(G*) = xinitcgo; void -libcgo_sys_thread_start(ThreadStart *ts) +_cgo_sys_thread_start(ThreadStart *ts) { _beginthread(threadentry, 0, ts); } -static void* +static void threadentry(void *v) { ThreadStart ts; @@ -53,5 +53,4 @@ threadentry(void *v) ); crosscall_amd64(ts.fn); - return nil; } diff --git a/src/pkg/runtime/cgo/libcgo.h b/src/pkg/runtime/cgo/libcgo.h index c31d19d76..41a371c27 100644 --- a/src/pkg/runtime/cgo/libcgo.h +++ b/src/pkg/runtime/cgo/libcgo.h @@ -26,7 +26,7 @@ struct G }; /* - * Arguments to the libcgo_thread_start call. + * Arguments to the _cgo_thread_start call. * Also known to ../pkg/runtime/runtime.h. */ typedef struct ThreadStart ThreadStart; @@ -40,14 +40,14 @@ struct ThreadStart /* * Called by 5c/6c/8c world. * Makes a local copy of the ThreadStart and - * calls libcgo_sys_thread_start(ts). + * calls _cgo_sys_thread_start(ts). */ -extern void (*libcgo_thread_start)(ThreadStart *ts); +extern void (*_cgo_thread_start)(ThreadStart *ts); /* * Creates the new operating system thread (OS, arch dependent). */ -void libcgo_sys_thread_start(ThreadStart *ts); +void _cgo_sys_thread_start(ThreadStart *ts); /* * Call fn in the 6c world. diff --git a/src/pkg/runtime/cgo/netbsd.c b/src/pkg/runtime/cgo/netbsd.c new file mode 100644 index 000000000..b6403f686 --- /dev/null +++ b/src/pkg/runtime/cgo/netbsd.c @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Supply environ and __progname, because we don't +// link against the standard NetBSD crt0.o and the +// libc dynamic library needs them. + +char *environ[1]; +char *__progname; + +#pragma dynexport environ environ +#pragma dynexport __progname __progname diff --git a/src/pkg/runtime/cgo/openbsd.c b/src/pkg/runtime/cgo/openbsd.c new file mode 100644 index 000000000..84e9f9eff --- /dev/null +++ b/src/pkg/runtime/cgo/openbsd.c @@ -0,0 +1,21 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Supply environ, __progname and __guard_local, because +// we don't link against the standard OpenBSD crt0.o and +// the libc dynamic library needs them. + +char *environ[1]; +char *__progname; +long __guard_local; + +#pragma dynexport environ environ +#pragma dynexport __progname __progname + +// This is normally marked as hidden and placed in the +// .openbsd.randomdata section. +#pragma dynexport __guard_local __guard_local + +// We override pthread_create to support PT_TLS. +#pragma dynexport pthread_create pthread_create diff --git a/src/pkg/runtime/cgo/setenv.c b/src/pkg/runtime/cgo/setenv.c new file mode 100644 index 000000000..4c47cdb00 --- /dev/null +++ b/src/pkg/runtime/cgo/setenv.c @@ -0,0 +1,10 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd linux netbsd openbsd + +#pragma cgo_import_static x_cgo_setenv + +void x_cgo_setenv(char**); +void (*_cgo_setenv)(char**) = x_cgo_setenv; diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c index 7a26538ec..590bf9b67 100644 --- a/src/pkg/runtime/cgocall.c +++ b/src/pkg/runtime/cgocall.c @@ -6,6 +6,7 @@ #include "arch_GOARCH.h" #include "stack.h" #include "cgocall.h" +#include "race.h" // Cgo call and callback support. // @@ -41,7 +42,7 @@ // know about packages). The gcc-compiled C function f calls GoF. // // GoF calls crosscall2(_cgoexp_GoF, frame, framesize). Crosscall2 -// (in cgo/$GOOS.S, a gcc-compiled assembly file) is a two-argument +// (in cgo/gcc_$GOARCH.S, a gcc-compiled assembly file) is a two-argument // adapter from the gcc function call ABI to the 6c function call ABI. // It is called from gcc to call 6c functions. In this case it calls // _cgoexp_GoF(frame, framesize), still running on m->g0's stack @@ -82,43 +83,55 @@ // _cgoexp_GoF immediately returns to crosscall2, which restores the // callee-save registers for gcc and returns to GoF, which returns to f. -void *initcgo; /* filled in by dynamic linker when Cgo is available */ +void *_cgo_init; /* filled in by dynamic linker when Cgo is available */ +static int64 cgosync; /* represents possible synchronization in C code */ + +// These two are only used by the architecture where TLS based storage isn't +// the default for g and m (e.g., ARM) +void *_cgo_load_gm; /* filled in by dynamic linker when Cgo is available */ +void *_cgo_save_gm; /* filled in by dynamic linker when Cgo is available */ -static void unlockm(void); static void unwindm(void); // Call from Go to C. +static FuncVal unlockOSThread = { runtime·unlockOSThread }; + void runtime·cgocall(void (*fn)(void*), void *arg) { Defer d; + if(m->racecall) { + runtime·asmcgocall(fn, arg); + return; + } + if(!runtime·iscgo && !Windows) runtime·throw("cgocall unavailable"); if(fn == 0) runtime·throw("cgocall nil"); + if(raceenabled) + runtime·racereleasemerge(&cgosync); + m->ncgocall++; /* * Lock g to m to ensure we stay on the same stack if we do a - * cgo callback. + * cgo callback. Add entry to defer stack in case of panic. */ - d.nofree = false; - if(m->lockedg == nil) { - m->lockedg = g; - g->lockedm = m; - - // Add entry to defer stack in case of panic. - d.fn = (byte*)unlockm; - d.siz = 0; - d.link = g->defer; - d.argp = (void*)-1; // unused because unlockm never recovers - d.nofree = true; - g->defer = &d; - } + runtime·lockOSThread(); + d.fn = &unlockOSThread; + d.siz = 0; + d.link = g->defer; + d.argp = (void*)-1; // unused because unlockm never recovers + d.special = true; + d.free = false; + g->defer = &d; + + m->ncgo++; /* * Announce we are entering a system call @@ -135,29 +148,31 @@ runtime·cgocall(void (*fn)(void*), void *arg) runtime·asmcgocall(fn, arg); runtime·exitsyscall(); - if(d.nofree) { - if(g->defer != &d || d.fn != (byte*)unlockm) - runtime·throw("runtime: bad defer entry in cgocallback"); - g->defer = d.link; - unlockm(); + m->ncgo--; + if(m->ncgo == 0) { + // We are going back to Go and are not in a recursive + // call. Let the GC collect any memory allocated via + // _cgo_allocate that is no longer referenced. + m->cgomal = nil; } -} -static void -unlockm(void) -{ - m->lockedg = nil; - g->lockedm = nil; + if(g->defer != &d || d.fn != &unlockOSThread) + runtime·throw("runtime: bad defer entry in cgocallback"); + g->defer = d.link; + runtime·unlockOSThread(); + + if(raceenabled) + runtime·raceacquire(&cgosync); } void runtime·NumCgoCall(int64 ret) { - M *m; + M *mp; ret = 0; - for(m=runtime·atomicloadp(&runtime·allm); m; m=m->alllink) - ret += m->ncgocall; + for(mp=runtime·atomicloadp(&runtime·allm); mp; mp=mp->alllink) + ret += mp->ncgocall; FLUSH(&ret); } @@ -188,31 +203,50 @@ runtime·cfree(void *p) // Call from C back to Go. +static FuncVal unwindmf = {unwindm}; + void -runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize) +runtime·cgocallbackg(FuncVal *fn, void *arg, uintptr argsize) { Defer d; + if(m->racecall) { + reflect·call(fn, arg, argsize); + return; + } + if(g != m->curg) runtime·throw("runtime: bad g in cgocallback"); runtime·exitsyscall(); // coming out of cgo call + if(m->needextram) { + m->needextram = 0; + runtime·newextram(); + } + // Add entry to defer stack in case of panic. - d.fn = (byte*)unwindm; + d.fn = &unwindmf; d.siz = 0; d.link = g->defer; d.argp = (void*)-1; // unused because unwindm never recovers - d.nofree = true; + d.special = true; + d.free = false; g->defer = &d; + if(raceenabled) + runtime·raceacquire(&cgosync); + // Invoke callback. - reflect·call((byte*)fn, arg, argsize); + reflect·call(fn, arg, argsize); + + if(raceenabled) + runtime·racereleasemerge(&cgosync); // Pop defer. // Do not unwind m->g0->sched.sp. // Our caller, cgocallback, will do that. - if(g->defer != &d || d.fn != (byte*)unwindm) + if(g->defer != &d || d.fn != &unwindmf) runtime·throw("runtime: bad defer entry in cgocallback"); g->defer = d.link; @@ -229,7 +263,8 @@ unwindm(void) runtime·throw("runtime: unwindm not implemented"); case '8': case '6': - m->g0->sched.sp = *(void**)m->g0->sched.sp; + case '5': + m->g0->sched.sp = *(uintptr*)m->g0->sched.sp; break; } } diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c index ef27144ef..32995c6dd 100644 --- a/src/pkg/runtime/chan.c +++ b/src/pkg/runtime/chan.c @@ -3,7 +3,10 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch_GOARCH.h" #include "type.h" +#include "race.h" +#include "malloc.h" #define MAXALIGN 7 #define NOSELGEN 1 @@ -20,6 +23,7 @@ struct SudoG G* g; // g and selgen constitute uint32 selgen; // a weak pointer to g SudoG* link; + int64 releasetime; byte* elem; // data element }; @@ -29,21 +33,25 @@ struct WaitQ SudoG* last; }; +// The garbage collector is assuming that Hchan can only contain pointers into the stack +// and cannot contain pointers into the heap. struct Hchan { - uint32 qcount; // total data in the q - uint32 dataqsiz; // size of the circular q + uintgo qcount; // total data in the q + uintgo dataqsiz; // size of the circular q uint16 elemsize; bool closed; uint8 elemalign; Alg* elemalg; // interface for element type - uint32 sendx; // send index - uint32 recvx; // receive index + uintgo sendx; // send index + uintgo recvx; // receive index WaitQ recvq; // list of recv waiters WaitQ sendq; // list of send waiters Lock; }; +uint32 runtime·Hchansize = sizeof(Hchan); + // Buffer follows Hchan immediately in memory. // chanbuf(c, i) is pointer to the i'th slot in the buffer. #define chanbuf(c, i) ((byte*)((c)+1)+(uintptr)(c)->elemsize*(i)) @@ -79,17 +87,22 @@ static void dequeueg(WaitQ*); static SudoG* dequeue(WaitQ*); static void enqueue(WaitQ*, SudoG*); static void destroychan(Hchan*); +static void racesync(Hchan*, SudoG*); Hchan* runtime·makechan_c(ChanType *t, int64 hint) { Hchan *c; - int32 n; + uintptr n; Type *elem; - + elem = t->elem; - if(hint < 0 || (int32)hint != hint || (elem->size > 0 && hint > ((uintptr)-1) / elem->size)) + // compiler checks this but be safe. + if(elem->size >= (1<<16)) + runtime·throw("makechan: invalid channel element type"); + + if(hint < 0 || (intgo)hint != hint || (elem->size > 0 && hint > MaxMem / elem->size)) runtime·panicstring("makechan: size out of range"); // calculate rounded size of Hchan @@ -103,18 +116,19 @@ runtime·makechan_c(ChanType *t, int64 hint) c->elemalg = elem->alg; c->elemalign = elem->align; c->dataqsiz = hint; + runtime·settype(c, (uintptr)t | TypeInfo_Chan); if(debug) - runtime·printf("makechan: chan=%p; elemsize=%D; elemalg=%p; elemalign=%d; dataqsiz=%d\n", - c, (int64)elem->size, elem->alg, elem->align, c->dataqsiz); + runtime·printf("makechan: chan=%p; elemsize=%D; elemalg=%p; elemalign=%d; dataqsiz=%D\n", + c, (int64)elem->size, elem->alg, elem->align, (int64)c->dataqsiz); return c; } // For reflect -// func makechan(typ *ChanType, size uint32) (chan) +// func makechan(typ *ChanType, size uint64) (chan) void -reflect·makechan(ChanType *t, uint32 size, Hchan *c) +reflect·makechan(ChanType *t, uint64 size, Hchan *c) { c = runtime·makechan_c(t, size); FLUSH(&c); @@ -143,11 +157,12 @@ runtime·makechan(ChanType *t, int64 hint, Hchan *ret) * the operation; we'll see that it's now closed. */ void -runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) +runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc) { SudoG *sg; SudoG mysg; G* gp; + int64 t0; if(c == nil) { USED(t); @@ -155,9 +170,7 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) *pres = false; return; } - g->status = Gwaiting; - g->waitreason = "chan send (nil chan)"; - runtime·gosched(); + runtime·park(nil, nil, "chan send (nil chan)"); return; // not reached } @@ -170,7 +183,17 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) runtime·prints("\n"); } + t0 = 0; + mysg.releasetime = 0; + if(runtime·blockprofilerate > 0) { + t0 = runtime·cputicks(); + mysg.releasetime = -1; + } + runtime·lock(c); + // TODO(dvyukov): add similar instrumentation to select. + if(raceenabled) + runtime·racereadpc(c, pc, runtime·chansend); if(c->closed) goto closed; @@ -179,12 +202,16 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) sg = dequeue(&c->recvq); if(sg != nil) { + if(raceenabled) + racesync(c, sg); runtime·unlock(c); - + gp = sg->g; gp->param = sg; if(sg->elem != nil) c->elemalg->copy(c->elemsize, sg->elem, ep); + if(sg->releasetime) + sg->releasetime = runtime·cputicks(); runtime·ready(gp); if(pres != nil) @@ -202,11 +229,8 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) mysg.g = g; mysg.selgen = NOSELGEN; g->param = nil; - g->status = Gwaiting; - g->waitreason = "chan send"; enqueue(&c->sendq, &mysg); - runtime·unlock(c); - runtime·gosched(); + runtime·park(runtime·unlock, c, "chan send"); if(g->param == nil) { runtime·lock(c); @@ -215,6 +239,9 @@ runtime·chansend(ChanType *t, Hchan *c, byte *ep, bool *pres) goto closed; } + if(mysg.releasetime > 0) + runtime·blockevent(mysg.releasetime - t0, 2); + return; asynch: @@ -230,15 +257,16 @@ asynch: mysg.g = g; mysg.elem = nil; mysg.selgen = NOSELGEN; - g->status = Gwaiting; - g->waitreason = "chan send"; enqueue(&c->sendq, &mysg); - runtime·unlock(c); - runtime·gosched(); + runtime·park(runtime·unlock, c, "chan send"); runtime·lock(c); goto asynch; } + + if(raceenabled) + runtime·racerelease(chanbuf(c, c->sendx)); + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep); if(++c->sendx == c->dataqsiz) c->sendx = 0; @@ -248,11 +276,15 @@ asynch: if(sg != nil) { gp = sg->g; runtime·unlock(c); + if(sg->releasetime) + sg->releasetime = runtime·cputicks(); runtime·ready(gp); } else runtime·unlock(c); if(pres != nil) *pres = true; + if(mysg.releasetime > 0) + runtime·blockevent(mysg.releasetime - t0, 2); return; closed: @@ -267,6 +299,7 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive SudoG *sg; SudoG mysg; G *gp; + int64 t0; if(runtime·gcwaiting) runtime·gosched(); @@ -280,12 +313,17 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive *selected = false; return; } - g->status = Gwaiting; - g->waitreason = "chan receive (nil chan)"; - runtime·gosched(); + runtime·park(nil, nil, "chan receive (nil chan)"); return; // not reached } + t0 = 0; + mysg.releasetime = 0; + if(runtime·blockprofilerate > 0) { + t0 = runtime·cputicks(); + mysg.releasetime = -1; + } + runtime·lock(c); if(c->dataqsiz > 0) goto asynch; @@ -295,12 +333,16 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive sg = dequeue(&c->sendq); if(sg != nil) { + if(raceenabled) + racesync(c, sg); runtime·unlock(c); if(ep != nil) c->elemalg->copy(c->elemsize, ep, sg->elem); gp = sg->g; gp->param = sg; + if(sg->releasetime) + sg->releasetime = runtime·cputicks(); runtime·ready(gp); if(selected != nil) @@ -320,11 +362,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive mysg.g = g; mysg.selgen = NOSELGEN; g->param = nil; - g->status = Gwaiting; - g->waitreason = "chan receive"; enqueue(&c->recvq, &mysg); - runtime·unlock(c); - runtime·gosched(); + runtime·park(runtime·unlock, c, "chan receive"); if(g->param == nil) { runtime·lock(c); @@ -335,6 +374,8 @@ runtime·chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *receive if(received != nil) *received = true; + if(mysg.releasetime > 0) + runtime·blockevent(mysg.releasetime - t0, 2); return; asynch: @@ -352,15 +393,16 @@ asynch: mysg.g = g; mysg.elem = nil; mysg.selgen = NOSELGEN; - g->status = Gwaiting; - g->waitreason = "chan receive"; enqueue(&c->recvq, &mysg); - runtime·unlock(c); - runtime·gosched(); + runtime·park(runtime·unlock, c, "chan receive"); runtime·lock(c); goto asynch; } + + if(raceenabled) + runtime·raceacquire(chanbuf(c, c->recvx)); + if(ep != nil) c->elemalg->copy(c->elemsize, ep, chanbuf(c, c->recvx)); c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil); @@ -372,6 +414,8 @@ asynch: if(sg != nil) { gp = sg->g; runtime·unlock(c); + if(sg->releasetime) + sg->releasetime = runtime·cputicks(); runtime·ready(gp); } else runtime·unlock(c); @@ -380,6 +424,8 @@ asynch: *selected = true; if(received != nil) *received = true; + if(mysg.releasetime > 0) + runtime·blockevent(mysg.releasetime - t0, 2); return; closed: @@ -389,7 +435,11 @@ closed: *selected = true; if(received != nil) *received = false; + if(raceenabled) + runtime·raceacquire(c); runtime·unlock(c); + if(mysg.releasetime > 0) + runtime·blockevent(mysg.releasetime - t0, 2); } // chansend1(hchan *chan any, elem any); @@ -397,7 +447,7 @@ closed: void runtime·chansend1(ChanType *t, Hchan* c, ...) { - runtime·chansend(t, c, (byte*)(&c+1), nil); + runtime·chansend(t, c, (byte*)(&c+1), nil, runtime·getcallerpc(&t)); } // chanrecv1(hchan *chan any) (elem any); @@ -446,8 +496,8 @@ runtime·selectnbsend(ChanType *t, Hchan *c, ...) byte *ae, *ap; ae = (byte*)(&c + 1); - ap = ae + runtime·rnd(t->elem->size, Structrnd); - runtime·chansend(t, c, ae, ap); + ap = ae + ROUND(t->elem->size, Structrnd); + runtime·chansend(t, c, ae, ap, runtime·getcallerpc(&t)); } // func selectnbrecv(elem *any, c chan any) bool @@ -474,7 +524,7 @@ void runtime·selectnbrecv(ChanType *t, byte *v, Hchan *c, bool selected) { runtime·chanrecv(t, c, v, &selected, nil); -} +} // func selectnbrecv2(elem *any, ok *bool, c chan any) bool // @@ -500,7 +550,7 @@ void runtime·selectnbrecv2(ChanType *t, byte *v, bool *received, Hchan *c, bool selected) { runtime·chanrecv(t, c, v, &selected, received); -} +} // For reflect: // func chansend(c chan, val iword, nb bool) (selected bool) @@ -509,12 +559,13 @@ runtime·selectnbrecv2(ChanType *t, byte *v, bool *received, Hchan *c, bool sele // // The "uintptr selected" is really "bool selected" but saying // uintptr gets us the right alignment for the output parameter block. +#pragma textflag 7 void reflect·chansend(ChanType *t, Hchan *c, uintptr val, bool nb, uintptr selected) { bool *sp; byte *vp; - + if(nb) { selected = false; sp = (bool*)&selected; @@ -527,7 +578,7 @@ reflect·chansend(ChanType *t, Hchan *c, uintptr val, bool nb, uintptr selected) vp = (byte*)&val; else vp = (byte*)val; - runtime·chansend(t, c, vp, sp); + runtime·chansend(t, c, vp, sp, runtime·getcallerpc(&t)); } // For reflect: @@ -571,7 +622,7 @@ runtime·newselect(int32 size, ...) int32 o; Select **selp; - o = runtime·rnd(sizeof(size), Structrnd); + o = ROUND(sizeof(size), Structrnd); selp = (Select**)((byte*)&size + o); newselect(size, selp); } @@ -619,7 +670,7 @@ runtime·selectsend(Select *sel, Hchan *c, void *elem, bool selected) // nil cases do not compete if(c == nil) return; - + selectsend(sel, c, runtime·getcallerpc(&sel), elem, (byte*)&selected - (byte*)&sel); } @@ -628,7 +679,7 @@ selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so) { int32 i; Scase *cas; - + i = sel->ncase; if(i >= sel->tcase) runtime·throw("selectsend: too many cases"); @@ -774,9 +825,7 @@ selunlock(Select *sel) void runtime·block(void) { - g->status = Gwaiting; // forever - g->waitreason = "select (no cases)"; - runtime·gosched(); + runtime·park(nil, nil, "select (no cases)"); // forever } static void* selectgo(Select**); @@ -796,7 +845,7 @@ static void* selectgo(Select **selp) { Select *sel; - uint32 o, i, j; + uint32 o, i, j, k; Scase *cas, *dfl; Hchan *c; SudoG *sg; @@ -830,12 +879,42 @@ selectgo(Select **selp) } // sort the cases by Hchan address to get the locking order. + // simple heap sort, to guarantee n log n time and constant stack footprint. for(i=0; i<sel->ncase; i++) { - c = sel->scase[i].chan; - for(j=i; j>0 && sel->lockorder[j-1] >= c; j--) - sel->lockorder[j] = sel->lockorder[j-1]; + j = i; + c = sel->scase[j].chan; + while(j > 0 && sel->lockorder[k=(j-1)/2] < c) { + sel->lockorder[j] = sel->lockorder[k]; + j = k; + } + sel->lockorder[j] = c; + } + for(i=sel->ncase; i-->0; ) { + c = sel->lockorder[i]; + sel->lockorder[i] = sel->lockorder[0]; + j = 0; + for(;;) { + k = j*2+1; + if(k >= i) + break; + if(k+1 < i && sel->lockorder[k] < sel->lockorder[k+1]) + k++; + if(c < sel->lockorder[k]) { + sel->lockorder[j] = sel->lockorder[k]; + j = k; + continue; + } + break; + } sel->lockorder[j] = c; } + /* + for(i=0; i+1<sel->ncase; i++) + if(sel->lockorder[i] > sel->lockorder[i+1]) { + runtime·printf("i=%d %p %p\n", i, sel->lockorder[i], sel->lockorder[i+1]); + runtime·throw("select: broken sort"); + } + */ sellock(sel); loop: @@ -899,7 +978,7 @@ loop: case CaseRecv: enqueue(&c->recvq, sg); break; - + case CaseSend: enqueue(&c->sendq, sg); break; @@ -907,10 +986,7 @@ loop: } g->param = nil; - g->status = Gwaiting; - g->waitreason = "select"; - selunlock(sel); - runtime·gosched(); + runtime·park((void(*)(Lock*))selunlock, (Lock*)sel, "select"); sellock(sel); sg = g->param; @@ -951,6 +1027,8 @@ loop: asyncrecv: // can receive from buffer + if(raceenabled) + runtime·raceacquire(chanbuf(c, c->recvx)); if(cas->receivedp != nil) *cas->receivedp = true; if(cas->sg.elem != nil) @@ -971,6 +1049,8 @@ asyncrecv: asyncsend: // can send to buffer + if(raceenabled) + runtime·racerelease(chanbuf(c, c->sendx)); c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->sg.elem); if(++c->sendx == c->dataqsiz) c->sendx = 0; @@ -987,6 +1067,8 @@ asyncsend: syncrecv: // can receive from sleeping sender (sg) + if(raceenabled) + racesync(c, sg); selunlock(sel); if(debug) runtime·printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o); @@ -1006,10 +1088,14 @@ rclose: *cas->receivedp = false; if(cas->sg.elem != nil) c->elemalg->copy(c->elemsize, cas->sg.elem, nil); + if(raceenabled) + runtime·raceacquire(c); goto retc; syncsend: // can send to sleeping receiver (sg) + if(raceenabled) + racesync(c, sg); selunlock(sel); if(debug) runtime·printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o); @@ -1020,11 +1106,17 @@ syncsend: runtime·ready(gp); retc: - // return to pc corresponding to chosen case + // return pc corresponding to chosen case. + // Set boolean passed during select creation + // (at offset selp + cas->so) to true. + // If cas->so == 0, this is a reflect-driven select and we + // don't need to update the boolean. pc = cas->pc; - as = (byte*)selp + cas->so; + if(cas->so > 0) { + as = (byte*)selp + cas->so; + *as = true; + } runtime·free(sel); - *as = true; return pc; sclose: @@ -1034,7 +1126,89 @@ sclose: return nil; // not reached } +// This struct must match ../reflect/value.go:/runtimeSelect. +typedef struct runtimeSelect runtimeSelect; +struct runtimeSelect +{ + uintptr dir; + ChanType *typ; + Hchan *ch; + uintptr val; +}; + +// This enum must match ../reflect/value.go:/SelectDir. +enum SelectDir { + SelectSend = 1, + SelectRecv, + SelectDefault, +}; + +// func rselect(cases []runtimeSelect) (chosen int, word uintptr, recvOK bool) +void +reflect·rselect(Slice cases, intgo chosen, uintptr word, bool recvOK) +{ + int32 i; + Select *sel; + runtimeSelect* rcase, *rc; + void *elem; + void *recvptr; + uintptr maxsize; + + chosen = -1; + word = 0; + recvOK = false; + + maxsize = 0; + rcase = (runtimeSelect*)cases.array; + for(i=0; i<cases.len; i++) { + rc = &rcase[i]; + if(rc->dir == SelectRecv && rc->ch != nil && maxsize < rc->typ->elem->size) + maxsize = rc->typ->elem->size; + } + + recvptr = nil; + if(maxsize > sizeof(void*)) + recvptr = runtime·mal(maxsize); + + newselect(cases.len, &sel); + for(i=0; i<cases.len; i++) { + rc = &rcase[i]; + switch(rc->dir) { + case SelectDefault: + selectdefault(sel, (void*)i, 0); + break; + case SelectSend: + if(rc->ch == nil) + break; + if(rc->typ->elem->size > sizeof(void*)) + elem = (void*)rc->val; + else + elem = (void*)&rc->val; + selectsend(sel, rc->ch, (void*)i, elem, 0); + break; + case SelectRecv: + if(rc->ch == nil) + break; + if(rc->typ->elem->size > sizeof(void*)) + elem = recvptr; + else + elem = &word; + selectrecv(sel, rc->ch, (void*)i, elem, &recvOK, 0); + break; + } + } + + chosen = (intgo)(uintptr)selectgo(&sel); + if(rcase[chosen].dir == SelectRecv && rcase[chosen].typ->elem->size > sizeof(void*)) + word = (uintptr)recvptr; + + FLUSH(&chosen); + FLUSH(&word); + FLUSH(&recvOK); +} + // closechan(sel *byte); +#pragma textflag 7 void runtime·closechan(Hchan *c) { @@ -1053,6 +1227,11 @@ runtime·closechan(Hchan *c) runtime·panicstring("close of closed channel"); } + if(raceenabled) { + runtime·racewritepc(c, runtime·getcallerpc(&c), runtime·closechan); + runtime·racerelease(c); + } + c->closed = true; // release all readers @@ -1087,9 +1266,9 @@ reflect·chanclose(Hchan *c) } // For reflect -// func chanlen(c chan) (len int32) +// func chanlen(c chan) (len int) void -reflect·chanlen(Hchan *c, int32 len) +reflect·chanlen(Hchan *c, intgo len) { if(c == nil) len = 0; @@ -1099,9 +1278,9 @@ reflect·chanlen(Hchan *c, int32 len) } // For reflect -// func chancap(c chan) (cap int32) +// func chancap(c chan) int void -reflect·chancap(Hchan *c, int32 cap) +reflect·chancap(Hchan *c, intgo cap) { if(c == nil) cap = 0; @@ -1160,3 +1339,12 @@ enqueue(WaitQ *q, SudoG *sgp) q->last->link = sgp; q->last = sgp; } + +static void +racesync(Hchan *c, SudoG *sg) +{ + runtime·racerelease(chanbuf(c, 0)); + runtime·raceacquireg(sg->g, chanbuf(c, 0)); + runtime·racereleaseg(sg->g, chanbuf(c, 0)); + runtime·raceacquire(chanbuf(c, 0)); +} diff --git a/src/pkg/runtime/closure_386.c b/src/pkg/runtime/closure_386.c deleted file mode 100644 index b4d867711..000000000 --- a/src/pkg/runtime/closure_386.c +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "runtime.h" - -#pragma textflag 7 -// func closure(siz int32, -// fn func(arg0, arg1, arg2 *ptr, callerpc uintptr, xxx) yyy, -// arg0, arg1, arg2 *ptr) (func(xxx) yyy) -void -runtime·closure(int32 siz, byte *fn, byte *arg0) -{ - byte *p, *q, **ret; - int32 i, n; - int32 pcrel; - - if(siz < 0 || siz%4 != 0) - runtime·throw("bad closure size"); - - ret = (byte**)((byte*)&arg0 + siz); - - if(siz > 100) { - // TODO(rsc): implement stack growth preamble? - runtime·throw("closure too big"); - } - - // compute size of new fn. - // must match code laid out below. - n = 6+5+2+1; // SUBL MOVL MOVL CLD - if(siz <= 4*4) - n += 1*siz/4; // MOVSL MOVSL... - else - n += 6+2; // MOVL REP MOVSL - n += 5; // CALL - n += 6+1; // ADDL RET - - // store args aligned after code, so gc can find them. - n += siz; - if(n%4) - n += 4 - n%4; - - p = runtime·mal(n); - *ret = p; - q = p + n - siz; - - if(siz > 0) { - runtime·memmove(q, (byte*)&arg0, siz); - - // SUBL $siz, SP - *p++ = 0x81; - *p++ = 0xec; - *(uint32*)p = siz; - p += 4; - - // MOVL $q, SI - *p++ = 0xbe; - *(byte**)p = q; - p += 4; - - // MOVL SP, DI - *p++ = 0x89; - *p++ = 0xe7; - - // CLD - *p++ = 0xfc; - - if(siz <= 4*4) { - for(i=0; i<siz; i+=4) { - // MOVSL - *p++ = 0xa5; - } - } else { - // MOVL $(siz/4), CX [32-bit immediate siz/4] - *p++ = 0xc7; - *p++ = 0xc1; - *(uint32*)p = siz/4; - p += 4; - - // REP; MOVSL - *p++ = 0xf3; - *p++ = 0xa5; - } - } - - // call fn - pcrel = fn - (p+5); - // direct call with pc-relative offset - // CALL fn - *p++ = 0xe8; - *(int32*)p = pcrel; - p += 4; - - // ADDL $siz, SP - *p++ = 0x81; - *p++ = 0xc4; - *(uint32*)p = siz; - p += 4; - - // RET - *p++ = 0xc3; - - if(p > q) - runtime·throw("bad math in sys.closure"); -} diff --git a/src/pkg/runtime/closure_amd64.c b/src/pkg/runtime/closure_amd64.c deleted file mode 100644 index 481b4a888..000000000 --- a/src/pkg/runtime/closure_amd64.c +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "runtime.h" - -#pragma textflag 7 -// func closure(siz int32, -// fn func(arg0, arg1, arg2 *ptr, callerpc uintptr, xxx) yyy, -// arg0, arg1, arg2 *ptr) (func(xxx) yyy) -void -runtime·closure(int32 siz, byte *fn, byte *arg0) -{ - byte *p, *q, **ret; - int32 i, n; - int64 pcrel; - - if(siz < 0 || siz%8 != 0) - runtime·throw("bad closure size"); - - ret = (byte**)((byte*)&arg0 + siz); - - if(siz > 100) { - // TODO(rsc): implement stack growth preamble? - runtime·throw("closure too big"); - } - - // compute size of new fn. - // must match code laid out below. - n = 7+10+3; // SUBQ MOVQ MOVQ - if(siz <= 4*8) - n += 2*siz/8; // MOVSQ MOVSQ... - else - n += 7+3; // MOVQ REP MOVSQ - n += 12; // CALL worst case; sometimes only 5 - n += 7+1; // ADDQ RET - - // store args aligned after code, so gc can find them. - n += siz; - if(n%8) - n += 8 - n%8; - - p = runtime·mal(n); - *ret = p; - q = p + n - siz; - - if(siz > 0) { - runtime·memmove(q, (byte*)&arg0, siz); - - // SUBQ $siz, SP - *p++ = 0x48; - *p++ = 0x81; - *p++ = 0xec; - *(uint32*)p = siz; - p += 4; - - // MOVQ $q, SI - *p++ = 0x48; - *p++ = 0xbe; - *(byte**)p = q; - p += 8; - - // MOVQ SP, DI - *p++ = 0x48; - *p++ = 0x89; - *p++ = 0xe7; - - if(siz <= 4*8) { - for(i=0; i<siz; i+=8) { - // MOVSQ - *p++ = 0x48; - *p++ = 0xa5; - } - } else { - // MOVQ $(siz/8), CX [32-bit immediate siz/8] - *p++ = 0x48; - *p++ = 0xc7; - *p++ = 0xc1; - *(uint32*)p = siz/8; - p += 4; - - // REP; MOVSQ - *p++ = 0xf3; - *p++ = 0x48; - *p++ = 0xa5; - } - } - - // call fn - pcrel = fn - (p+5); - if((int32)pcrel == pcrel) { - // can use direct call with pc-relative offset - // CALL fn - *p++ = 0xe8; - *(int32*)p = pcrel; - p += 4; - } else { - // MOVQ $fn, CX [64-bit immediate fn] - *p++ = 0x48; - *p++ = 0xb9; - *(byte**)p = fn; - p += 8; - - // CALL *CX - *p++ = 0xff; - *p++ = 0xd1; - } - - // ADDQ $siz, SP - *p++ = 0x48; - *p++ = 0x81; - *p++ = 0xc4; - *(uint32*)p = siz; - p += 4; - - // RET - *p++ = 0xc3; - - if(p > q) - runtime·throw("bad math in sys.closure"); -} - - diff --git a/src/pkg/runtime/closure_arm.c b/src/pkg/runtime/closure_arm.c deleted file mode 100644 index 119e91b61..000000000 --- a/src/pkg/runtime/closure_arm.c +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "runtime.h" - -/* - There are two bits of magic: - - The signature of the compiler generated function uses two stack frames - as arguments (callerpc separates these frames) - - size determines how many arguments runtime.closure actually has - starting at arg0. - - Example closure with 3 captured variables: - func closure(siz int32, - fn func(arg0, arg1, arg2 *ptr, callerpc uintptr, xxx) yyy, - arg0, arg1, arg2 *ptr) (func(xxx) yyy) - - Code generated: - src R0 - dst R1 - end R3 - tmp R4 - frame = siz+4 - -//skip loop for 0 size closures - MOVW.W R14,-frame(R13) - - MOVW $vars(PC), R0 - MOVW $4(SP), R1 - MOVW $siz(R0), R3 -loop: MOVW.P 4(R0), R4 - MOVW.P R4, 4(R1) - CMP R0, R3 - BNE loop - - MOVW 8(PC), R0 - BL (R0) // 2 words - MOVW.P frame(R13),R15 -fptr: WORD *fn -vars: WORD arg0 - WORD arg1 - WORD arg2 -*/ - -extern void runtime·cacheflush(byte* start, byte* end); - -#pragma textflag 7 -void -runtime·closure(int32 siz, byte *fn, byte *arg0) -{ - byte *p, *q, **ret; - uint32 *pc; - int32 n; - - if(siz < 0 || siz%4 != 0) - runtime·throw("bad closure size"); - - ret = (byte**)((byte*)&arg0 + siz); - - if(siz > 100) { - // TODO(kaib): implement stack growth preamble? - runtime·throw("closure too big"); - } - - // size of new fn. - // must match code laid out below. - if (siz > 0) - n = 6 * 4 + 7 * 4; - else - n = 6 * 4; - - // store args aligned after code, so gc can find them. - n += siz; - - p = runtime·mal(n); - *ret = p; - q = p + n - siz; - - pc = (uint32*)p; - - // MOVW.W R14,-frame(R13) - *pc++ = 0xe52de000 | (siz + 4); - - if(siz > 0) { - runtime·memmove(q, (byte*)&arg0, siz); - - // MOVW $vars(PC), R0 - *pc = 0xe28f0000 | (int32)(q - (byte*)pc - 8); - pc++; - - // MOVW $4(SP), R1 - *pc++ = 0xe28d1004; - - // MOVW $siz(R0), R3 - *pc++ = 0xe2803000 | siz; - - // MOVW.P 4(R0), R4 - *pc++ = 0xe4904004; - // MOVW.P R4, 4(R1) - *pc++ = 0xe4814004; - // CMP R0, R3 - *pc++ = 0xe1530000; - // BNE loop - *pc++ = 0x1afffffb; - } - - // MOVW fptr(PC), R0 - *pc = 0xe59f0008 | (int32)((q - 4) -(byte*) pc - 8); - pc++; - - // BL (R0) - *pc++ = 0xe28fe000; - *pc++ = 0xe280f000; - - // MOVW.P frame(R13),R15 - *pc++ = 0xe49df000 | (siz + 4); - - // WORD *fn - *pc++ = (uint32)fn; - - p = (byte*)pc; - - if(p > q) - runtime·throw("bad math in sys.closure"); - - runtime·cacheflush(*ret, q+siz); -} - diff --git a/src/pkg/runtime/complex.c b/src/pkg/runtime/complex.c index eeb943940..395e70fe3 100644 --- a/src/pkg/runtime/complex.c +++ b/src/pkg/runtime/complex.c @@ -13,28 +13,30 @@ runtime·complex128div(Complex128 n, Complex128 d, Complex128 q) float64 a, b, ratio, denom; // Special cases as in C99. - ninf = runtime·isInf(n.real, 0) || runtime·isInf(n.imag, 0); - dinf = runtime·isInf(d.real, 0) || runtime·isInf(d.imag, 0); + ninf = n.real == runtime·posinf || n.real == runtime·neginf || + n.imag == runtime·posinf || n.imag == runtime·neginf; + dinf = d.real == runtime·posinf || d.real == runtime·neginf || + d.imag == runtime·posinf || d.imag == runtime·neginf; - nnan = !ninf && (runtime·isNaN(n.real) || runtime·isNaN(n.imag)); - dnan = !dinf && (runtime·isNaN(d.real) || runtime·isNaN(d.imag)); + nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag)); + dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag)); if(nnan || dnan) { - q.real = runtime·NaN(); - q.imag = runtime·NaN(); - } else if(ninf && !dinf && !dnan) { - q.real = runtime·Inf(0); - q.imag = runtime·Inf(0); - } else if(!ninf && !nnan && dinf) { + q.real = runtime·nan; + q.imag = runtime·nan; + } else if(ninf && !dinf) { + q.real = runtime·posinf; + q.imag = runtime·posinf; + } else if(!ninf && dinf) { q.real = 0; q.imag = 0; } else if(d.real == 0 && d.imag == 0) { if(n.real == 0 && n.imag == 0) { - q.real = runtime·NaN(); - q.imag = runtime·NaN(); + q.real = runtime·nan; + q.imag = runtime·nan; } else { - q.real = runtime·Inf(0); - q.imag = runtime·Inf(0); + q.real = runtime·posinf; + q.imag = runtime·posinf; } } else { // Standard complex arithmetic, factored to avoid unnecessary overflow. diff --git a/src/pkg/runtime/complex_test.go b/src/pkg/runtime/complex_test.go new file mode 100644 index 000000000..f41e6a357 --- /dev/null +++ b/src/pkg/runtime/complex_test.go @@ -0,0 +1,67 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "math/cmplx" + "testing" +) + +var result complex128 + +func BenchmarkComplex128DivNormal(b *testing.B) { + d := 15 + 2i + n := 32 + 3i + res := 0i + for i := 0; i < b.N; i++ { + n += 0.1i + res += n / d + } + result = res +} + +func BenchmarkComplex128DivNisNaN(b *testing.B) { + d := cmplx.NaN() + n := 32 + 3i + res := 0i + for i := 0; i < b.N; i++ { + n += 0.1i + res += n / d + } + result = res +} + +func BenchmarkComplex128DivDisNaN(b *testing.B) { + d := 15 + 2i + n := cmplx.NaN() + res := 0i + for i := 0; i < b.N; i++ { + d += 0.1i + res += n / d + } + result = res +} + +func BenchmarkComplex128DivNisInf(b *testing.B) { + d := 15 + 2i + n := cmplx.Inf() + res := 0i + for i := 0; i < b.N; i++ { + d += 0.1i + res += n / d + } + result = res +} + +func BenchmarkComplex128DivDisInf(b *testing.B) { + d := cmplx.Inf() + n := 32 + 3i + res := 0i + for i := 0; i < b.N; i++ { + n += 0.1i + res += n / d + } + result = res +} diff --git a/src/pkg/runtime/cpuprof.c b/src/pkg/runtime/cpuprof.c index 05fa0cf61..9a0606a22 100644 --- a/src/pkg/runtime/cpuprof.c +++ b/src/pkg/runtime/cpuprof.c @@ -99,6 +99,7 @@ struct Profile { uint32 wtoggle; bool wholding; // holding & need to release a log half bool flushing; // flushing hash table - profile is over + bool eod_sent; // special end-of-data record sent; => flushing }; static Lock lk; @@ -109,16 +110,20 @@ static void add(Profile*, uintptr*, int32); static bool evict(Profile*, Entry*); static bool flushlog(Profile*); +static uintptr eod[3] = {0, 1, 0}; + // LostProfileData is a no-op function used in profiles // to mark the number of profiling stack traces that were // discarded due to slow data writers. -static void LostProfileData(void) { +static void +LostProfileData(void) +{ } // SetCPUProfileRate sets the CPU profiling rate. // The user documentation is in debug.go. void -runtime·SetCPUProfileRate(int32 hz) +runtime·SetCPUProfileRate(intgo hz) { uintptr *p; uintptr n; @@ -163,6 +168,7 @@ runtime·SetCPUProfileRate(int32 hz) prof->wholding = false; prof->wtoggle = 0; prof->flushing = false; + prof->eod_sent = false; runtime·noteclear(&prof->wait); runtime·setcpuprofilerate(tick, hz); @@ -356,7 +362,7 @@ getprofile(Profile *p) return ret; // Wait for new log. - runtime·entersyscall(); + runtime·entersyscallblock(); runtime·notesleep(&p->wait); runtime·exitsyscall(); runtime·noteclear(&p->wait); @@ -409,6 +415,16 @@ breakflush: } // Made it through the table without finding anything to log. + if(!p->eod_sent) { + // We may not have space to append this to the partial log buf, + // so we always return a new slice for the end-of-data marker. + p->eod_sent = true; + ret.array = (byte*)eod; + ret.len = sizeof eod; + ret.cap = ret.len; + return ret; + } + // Finally done. Clean up and return nil. p->flushing = false; if(!runtime·cas(&p->handoff, p->handoff, 0)) diff --git a/src/pkg/runtime/crash_cgo_test.go b/src/pkg/runtime/crash_cgo_test.go new file mode 100644 index 000000000..8ccea8f37 --- /dev/null +++ b/src/pkg/runtime/crash_cgo_test.go @@ -0,0 +1,88 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build cgo + +package runtime_test + +import ( + "testing" +) + +func TestCgoCrashHandler(t *testing.T) { + testCrashHandler(t, true) +} + +func TestCgoSignalDeadlock(t *testing.T) { + got := executeTest(t, cgoSignalDeadlockSource, nil) + want := "OK\n" + if got != want { + t.Fatalf("expected %q, but got %q", want, got) + } +} + +const cgoSignalDeadlockSource = ` +package main + +import "C" + +import ( + "fmt" + "runtime" + "time" +) + +func main() { + runtime.GOMAXPROCS(100) + ping := make(chan bool) + go func() { + for i := 0; ; i++ { + runtime.Gosched() + select { + case done := <-ping: + if done { + ping <- true + return + } + ping <- true + default: + } + func() { + defer func() { + recover() + }() + var s *string + *s = "" + }() + } + }() + time.Sleep(time.Millisecond) + for i := 0; i < 64; i++ { + go func() { + runtime.LockOSThread() + select {} + }() + go func() { + runtime.LockOSThread() + select {} + }() + time.Sleep(time.Millisecond) + ping <- false + select { + case <-ping: + case <-time.After(time.Second): + fmt.Printf("HANG\n") + return + } + } + ping <- true + select { + case <-ping: + case <-time.After(time.Second): + fmt.Printf("HANG\n") + return + } + fmt.Printf("OK\n") +} +` diff --git a/src/pkg/runtime/crash_test.go b/src/pkg/runtime/crash_test.go new file mode 100644 index 000000000..5f84cb5a2 --- /dev/null +++ b/src/pkg/runtime/crash_test.go @@ -0,0 +1,177 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "text/template" +) + +func executeTest(t *testing.T, templ string, data interface{}) string { + checkStaleRuntime(t) + + st := template.Must(template.New("crashSource").Parse(templ)) + + dir, err := ioutil.TempDir("", "go-build") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(dir) + + src := filepath.Join(dir, "main.go") + f, err := os.Create(src) + if err != nil { + t.Fatalf("failed to create %v: %v", src, err) + } + err = st.Execute(f, data) + if err != nil { + f.Close() + t.Fatalf("failed to execute template: %v", err) + } + f.Close() + + got, _ := exec.Command("go", "run", src).CombinedOutput() + return string(got) +} + +func checkStaleRuntime(t *testing.T) { + // 'go run' uses the installed copy of runtime.a, which may be out of date. + out, err := exec.Command("go", "list", "-f", "{{.Stale}}", "runtime").CombinedOutput() + if err != nil { + t.Fatalf("failed to execute 'go list': %v\n%v", err, string(out)) + } + if string(out) != "false\n" { + t.Fatalf("Stale runtime.a. Run 'go install runtime'.") + } +} + +func testCrashHandler(t *testing.T, cgo bool) { + type crashTest struct { + Cgo bool + } + got := executeTest(t, crashSource, &crashTest{Cgo: cgo}) + want := "main: recovered done\nnew-thread: recovered done\nsecond-new-thread: recovered done\nmain-again: recovered done\n" + if got != want { + t.Fatalf("expected %q, but got %q", want, got) + } +} + +func TestCrashHandler(t *testing.T) { + testCrashHandler(t, false) +} + +func testDeadlock(t *testing.T, source string) { + got := executeTest(t, source, nil) + want := "fatal error: all goroutines are asleep - deadlock!\n" + if !strings.HasPrefix(got, want) { + t.Fatalf("expected %q, but got %q", want, got) + } +} + +func TestSimpleDeadlock(t *testing.T) { + testDeadlock(t, simpleDeadlockSource) +} + +func TestInitDeadlock(t *testing.T) { + testDeadlock(t, initDeadlockSource) +} + +func TestLockedDeadlock(t *testing.T) { + testDeadlock(t, lockedDeadlockSource) +} + +func TestLockedDeadlock2(t *testing.T) { + testDeadlock(t, lockedDeadlockSource2) +} + +const crashSource = ` +package main + +import ( + "fmt" + "runtime" +) + +{{if .Cgo}} +import "C" +{{end}} + +func test(name string) { + defer func() { + if x := recover(); x != nil { + fmt.Printf(" recovered") + } + fmt.Printf(" done\n") + }() + fmt.Printf("%s:", name) + var s *string + _ = *s + fmt.Print("SHOULD NOT BE HERE") +} + +func testInNewThread(name string) { + c := make(chan bool) + go func() { + runtime.LockOSThread() + test(name) + c <- true + }() + <-c +} + +func main() { + runtime.LockOSThread() + test("main") + testInNewThread("new-thread") + testInNewThread("second-new-thread") + test("main-again") +} +` + +const simpleDeadlockSource = ` +package main +func main() { + select {} +} +` + +const initDeadlockSource = ` +package main +func init() { + select {} +} +func main() { +} +` + +const lockedDeadlockSource = ` +package main +import "runtime" +func main() { + runtime.LockOSThread() + select {} +} +` + +const lockedDeadlockSource2 = ` +package main +import ( + "runtime" + "time" +) +func main() { + go func() { + runtime.LockOSThread() + select {} + }() + time.Sleep(time.Millisecond) + select {} +} +` diff --git a/src/pkg/runtime/debug.go b/src/pkg/runtime/debug.go index b802fc63f..d82afb08e 100644 --- a/src/pkg/runtime/debug.go +++ b/src/pkg/runtime/debug.go @@ -4,7 +4,7 @@ package runtime -// Breakpoint() executes a breakpoint trap. +// Breakpoint executes a breakpoint trap. func Breakpoint() // LockOSThread wires the calling goroutine to its current operating system thread. @@ -125,6 +125,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) // blocking until data is available. If profiling is turned off and all the profile // data accumulated while it was on has been returned, CPUProfile returns nil. // The caller must save the returned data before calling CPUProfile again. +// // Most clients should use the runtime/pprof package or // the testing package's -test.cpuprofile flag instead of calling // CPUProfile directly. @@ -133,11 +134,37 @@ func CPUProfile() []byte // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. // If hz <= 0, SetCPUProfileRate turns off profiling. // If the profiler is on, the rate cannot be changed without first turning it off. +// // Most clients should use the runtime/pprof package or // the testing package's -test.cpuprofile flag instead of calling // SetCPUProfileRate directly. func SetCPUProfileRate(hz int) +// SetBlockProfileRate controls the fraction of goroutine blocking events +// that are reported in the blocking profile. The profiler aims to sample +// an average of one blocking event per rate nanoseconds spent blocked. +// +// To include every blocking event in the profile, pass rate = 1. +// To turn off profiling entirely, pass rate <= 0. +func SetBlockProfileRate(rate int) + +// BlockProfileRecord describes blocking events originated +// at a particular call sequence (stack trace). +type BlockProfileRecord struct { + Count int64 + Cycles int64 + StackRecord +} + +// BlockProfile returns n, the number of records in the current blocking profile. +// If len(p) >= n, BlockProfile copies the profile into p and returns n, true. +// If len(p) < n, BlockProfile does not change p and returns n, false. +// +// Most clients should use the runtime/pprof package or +// the testing package's -test.blockprofile flag instead +// of calling BlockProfile directly. +func BlockProfile(p []BlockProfileRecord) (n int, ok bool) + // Stack formats a stack trace of the calling goroutine into buf // and returns the number of bytes written to buf. // If all is true, Stack formats stack traces of all other goroutines diff --git a/src/pkg/runtime/debug/debug.c b/src/pkg/runtime/debug/debug.c new file mode 100644 index 000000000..a7292c477 --- /dev/null +++ b/src/pkg/runtime/debug/debug.c @@ -0,0 +1,9 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Nothing to see here. +// This file exists so that the go command knows that parts of the +// package are implemented in C, so that it does not instruct the +// Go compiler to complain about extern declarations. +// The actual implementations are in package runtime. diff --git a/src/pkg/runtime/debug/garbage.go b/src/pkg/runtime/debug/garbage.go new file mode 100644 index 000000000..8f3026426 --- /dev/null +++ b/src/pkg/runtime/debug/garbage.go @@ -0,0 +1,101 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package debug + +import ( + "runtime" + "sort" + "time" +) + +// GCStats collect information about recent garbage collections. +type GCStats struct { + LastGC time.Time // time of last collection + NumGC int64 // number of garbage collections + PauseTotal time.Duration // total pause for all collections + Pause []time.Duration // pause history, most recent first + PauseQuantiles []time.Duration +} + +// Implemented in package runtime. +func readGCStats(*[]time.Duration) +func enableGC(bool) bool +func setGCPercent(int) int +func freeOSMemory() + +// ReadGCStats reads statistics about garbage collection into stats. +// The number of entries in the pause history is system-dependent; +// stats.Pause slice will be reused if large enough, reallocated otherwise. +// ReadGCStats may use the full capacity of the stats.Pause slice. +// If stats.PauseQuantiles is non-empty, ReadGCStats fills it with quantiles +// summarizing the distribution of pause time. For example, if +// len(stats.PauseQuantiles) is 5, it will be filled with the minimum, +// 25%, 50%, 75%, and maximum pause times. +func ReadGCStats(stats *GCStats) { + // Create a buffer with space for at least two copies of the + // pause history tracked by the runtime. One will be returned + // to the caller and the other will be used as a temporary buffer + // for computing quantiles. + const maxPause = len(((*runtime.MemStats)(nil)).PauseNs) + if cap(stats.Pause) < 2*maxPause { + stats.Pause = make([]time.Duration, 2*maxPause) + } + + // readGCStats fills in the pause history (up to maxPause entries) + // and then three more: Unix ns time of last GC, number of GC, + // and total pause time in nanoseconds. Here we depend on the + // fact that time.Duration's native unit is nanoseconds, so the + // pauses and the total pause time do not need any conversion. + readGCStats(&stats.Pause) + n := len(stats.Pause) - 3 + stats.LastGC = time.Unix(0, int64(stats.Pause[n])) + stats.NumGC = int64(stats.Pause[n+1]) + stats.PauseTotal = stats.Pause[n+2] + stats.Pause = stats.Pause[:n] + + if len(stats.PauseQuantiles) > 0 { + if n == 0 { + for i := range stats.PauseQuantiles { + stats.PauseQuantiles[i] = 0 + } + } else { + // There's room for a second copy of the data in stats.Pause. + // See the allocation at the top of the function. + sorted := stats.Pause[n : n+n] + copy(sorted, stats.Pause) + sort.Sort(byDuration(sorted)) + nq := len(stats.PauseQuantiles) - 1 + for i := 0; i < nq; i++ { + stats.PauseQuantiles[i] = sorted[len(sorted)*i/nq] + } + stats.PauseQuantiles[nq] = sorted[len(sorted)-1] + } + } +} + +type byDuration []time.Duration + +func (x byDuration) Len() int { return len(x) } +func (x byDuration) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x byDuration) Less(i, j int) bool { return x[i] < x[j] } + +// SetGCPercent sets the garbage collection target percentage: +// a collection is triggered when the ratio of freshly allocated data +// to live data remaining after the previous collection reaches this percentage. +// SetGCPercent returns the previous setting. +// The initial setting is the value of the GOGC environment variable +// at startup, or 100 if the variable is not set. +// A negative percentage disables garbage collection. +func SetGCPercent(percent int) int { + return setGCPercent(percent) +} + +// FreeOSMemory forces a garbage collection followed by an +// attempt to return as much memory to the operating system +// as possible. (Even if this is not called, the runtime gradually +// returns memory to the operating system in a background task.) +func FreeOSMemory() { + freeOSMemory() +} diff --git a/src/pkg/runtime/debug/garbage_test.go b/src/pkg/runtime/debug/garbage_test.go new file mode 100644 index 000000000..b93cfee56 --- /dev/null +++ b/src/pkg/runtime/debug/garbage_test.go @@ -0,0 +1,100 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package debug + +import ( + "runtime" + "testing" + "time" +) + +func TestReadGCStats(t *testing.T) { + var stats GCStats + var mstats runtime.MemStats + var min, max time.Duration + + // First ReadGCStats will allocate, second should not, + // especially if we follow up with an explicit garbage collection. + stats.PauseQuantiles = make([]time.Duration, 10) + ReadGCStats(&stats) + runtime.GC() + + // Assume these will return same data: no GC during ReadGCStats. + ReadGCStats(&stats) + runtime.ReadMemStats(&mstats) + + if stats.NumGC != int64(mstats.NumGC) { + t.Errorf("stats.NumGC = %d, but mstats.NumGC = %d", stats.NumGC, mstats.NumGC) + } + if stats.PauseTotal != time.Duration(mstats.PauseTotalNs) { + t.Errorf("stats.PauseTotal = %d, but mstats.PauseTotalNs = %d", stats.PauseTotal, mstats.PauseTotalNs) + } + if stats.LastGC.UnixNano() != int64(mstats.LastGC) { + t.Errorf("stats.LastGC.UnixNano = %d, but mstats.LastGC = %d", stats.LastGC.UnixNano(), mstats.LastGC) + } + n := int(mstats.NumGC) + if n > len(mstats.PauseNs) { + n = len(mstats.PauseNs) + } + if len(stats.Pause) != n { + t.Errorf("len(stats.Pause) = %d, want %d", len(stats.Pause), n) + } else { + off := (int(mstats.NumGC) + len(mstats.PauseNs) - 1) % len(mstats.PauseNs) + for i := 0; i < n; i++ { + dt := stats.Pause[i] + if dt != time.Duration(mstats.PauseNs[off]) { + t.Errorf("stats.Pause[%d] = %d, want %d", i, dt, mstats.PauseNs[off]) + } + if max < dt { + max = dt + } + if min > dt || i == 0 { + min = dt + } + off = (off + len(mstats.PauseNs) - 1) % len(mstats.PauseNs) + } + } + + q := stats.PauseQuantiles + nq := len(q) + if q[0] != min || q[nq-1] != max { + t.Errorf("stats.PauseQuantiles = [%d, ..., %d], want [%d, ..., %d]", q[0], q[nq-1], min, max) + } + + for i := 0; i < nq-1; i++ { + if q[i] > q[i+1] { + t.Errorf("stats.PauseQuantiles[%d]=%d > stats.PauseQuantiles[%d]=%d", i, q[i], i+1, q[i+1]) + } + } +} + +var big = make([]byte, 1<<20) + +func TestFreeOSMemory(t *testing.T) { + var ms1, ms2 runtime.MemStats + + if big == nil { + t.Skip("test is not reliable when run multiple times") + } + big = nil + runtime.GC() + runtime.ReadMemStats(&ms1) + FreeOSMemory() + runtime.ReadMemStats(&ms2) + if ms1.HeapReleased >= ms2.HeapReleased { + t.Errorf("released before=%d; released after=%d; did not go up", ms1.HeapReleased, ms2.HeapReleased) + } +} + +func TestSetGCPercent(t *testing.T) { + // Test that the variable is being set and returned correctly. + // Assume the percentage itself is implemented fine during GC, + // which is harder to test. + old := SetGCPercent(123) + new := SetGCPercent(old) + if new != 123 { + t.Errorf("SetGCPercent(123); SetGCPercent(x) = %d, want 123", new) + } +} diff --git a/src/pkg/runtime/debug/stack.go b/src/pkg/runtime/debug/stack.go index a533a5c3b..2896b2141 100644 --- a/src/pkg/runtime/debug/stack.go +++ b/src/pkg/runtime/debug/stack.go @@ -29,6 +29,8 @@ func PrintStack() { // For each routine, it includes the source line information and PC value, // then attempts to discover, for Go functions, the calling function or // method and the text of the line containing the invocation. +// +// This function is deprecated. Use package runtime's Stack instead. func Stack() []byte { return stack() } diff --git a/src/pkg/runtime/debug/stack_test.go b/src/pkg/runtime/debug/stack_test.go index f33f5072b..bbd662618 100644 --- a/src/pkg/runtime/debug/stack_test.go +++ b/src/pkg/runtime/debug/stack_test.go @@ -36,7 +36,7 @@ func (t T) method() []byte { func TestStack(t *testing.T) { b := T(0).method() lines := strings.Split(string(b), "\n") - if len(lines) <= 6 { + if len(lines) < 6 { t.Fatal("too few lines") } n := 0 diff --git a/src/pkg/runtime/defs_freebsd.go b/src/pkg/runtime/defs_freebsd.go index 306e32197..084022715 100644 --- a/src/pkg/runtime/defs_freebsd.go +++ b/src/pkg/runtime/defs_freebsd.go @@ -7,8 +7,9 @@ /* Input to cgo. -GOARCH=amd64 cgo -cdefs defs.go >amd64/defs.h -GOARCH=386 cgo -cdefs defs.go >386/defs.h +GOARCH=amd64 go tool cgo -cdefs defs_freebsd.go >defs_freebsd_amd64.h +GOARCH=386 go tool cgo -cdefs defs_freebsd.go >defs_freebsd_386.h +GOARCH=arm go tool cgo -cdefs defs_freebsd.go >defs_freebsd_arm.h */ package runtime @@ -38,6 +39,8 @@ const ( MAP_PRIVATE = C.MAP_PRIVATE MAP_FIXED = C.MAP_FIXED + MADV_FREE = C.MADV_FREE + SA_SIGINFO = C.SA_SIGINFO SA_RESTART = C.SA_RESTART SA_ONSTACK = C.SA_ONSTACK @@ -104,7 +107,6 @@ type Rtprio C.struct_rtprio type ThrParam C.struct_thr_param type Sigaltstack C.struct_sigaltstack type Sigset C.struct___sigset -type Sigval C.union_sigval type StackT C.stack_t type Siginfo C.siginfo_t diff --git a/src/pkg/runtime/defs_freebsd_386.h b/src/pkg/runtime/defs_freebsd_386.h index 29fcb8b57..d00c852c6 100644 --- a/src/pkg/runtime/defs_freebsd_386.h +++ b/src/pkg/runtime/defs_freebsd_386.h @@ -1,193 +1,190 @@ -// godefs -f -m32 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_freebsd.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - UMTX_OP_WAIT = 0x2, - UMTX_OP_WAKE = 0x3, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x2, - FPE_INTOVF = 0x1, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x5, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + UMTX_OP_WAIT = 0x2, + UMTX_OP_WAKE = 0x3, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x2, + FPE_INTOVF = 0x1, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; -// Types +typedef struct Rtprio Rtprio; +typedef struct ThrParam ThrParam; +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct StackT StackT; +typedef struct Siginfo Siginfo; +typedef struct Mcontext Mcontext; +typedef struct Ucontext Ucontext; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; + #pragma pack on -typedef struct Rtprio Rtprio; struct Rtprio { - uint16 type; - uint16 prio; + uint16 type; + uint16 prio; }; - -typedef struct ThrParam ThrParam; struct ThrParam { - void *start_func; - void *arg; - int8 *stack_base; - uint32 stack_size; - int8 *tls_base; - uint32 tls_size; - int32 *child_tid; - int32 *parent_tid; - int32 flags; - Rtprio *rtp; - void* spare[3]; + void *start_func; + byte *arg; + int8 *stack_base; + uint32 stack_size; + int8 *tls_base; + uint32 tls_size; + int32 *child_tid; + int32 *parent_tid; + int32 flags; + Rtprio *rtp; + void *spare[3]; }; - -typedef struct Sigaltstack Sigaltstack; struct Sigaltstack { - int8 *ss_sp; - uint32 ss_size; - int32 ss_flags; + int8 *ss_sp; + uint32 ss_size; + int32 ss_flags; }; - -typedef struct Sigset Sigset; struct Sigset { - uint32 __bits[4]; -}; - -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; - int32 sigval_int; - void *sigval_ptr; + uint32 __bits[4]; }; - -typedef struct StackT StackT; struct StackT { - int8 *ss_sp; - uint32 ss_size; - int32 ss_flags; + int8 *ss_sp; + uint32 ss_size; + int32 ss_flags; }; -typedef struct Siginfo Siginfo; struct Siginfo { - int32 si_signo; - int32 si_errno; - int32 si_code; - int32 si_pid; - uint32 si_uid; - int32 si_status; - void *si_addr; - Sigval si_value; - byte _reason[32]; + int32 si_signo; + int32 si_errno; + int32 si_code; + int32 si_pid; + uint32 si_uid; + int32 si_status; + byte *si_addr; + byte si_value[4]; + byte _reason[32]; }; -typedef struct Mcontext Mcontext; struct Mcontext { - int32 mc_onstack; - int32 mc_gs; - int32 mc_fs; - int32 mc_es; - int32 mc_ds; - int32 mc_edi; - int32 mc_esi; - int32 mc_ebp; - int32 mc_isp; - int32 mc_ebx; - int32 mc_edx; - int32 mc_ecx; - int32 mc_eax; - int32 mc_trapno; - int32 mc_err; - int32 mc_eip; - int32 mc_cs; - int32 mc_eflags; - int32 mc_esp; - int32 mc_ss; - int32 mc_len; - int32 mc_fpformat; - int32 mc_ownedfp; - int32 mc_spare1[1]; - int32 mc_fpstate[128]; - int32 mc_fsbase; - int32 mc_gsbase; - int32 mc_spare2[6]; + int32 mc_onstack; + int32 mc_gs; + int32 mc_fs; + int32 mc_es; + int32 mc_ds; + int32 mc_edi; + int32 mc_esi; + int32 mc_ebp; + int32 mc_isp; + int32 mc_ebx; + int32 mc_edx; + int32 mc_ecx; + int32 mc_eax; + int32 mc_trapno; + int32 mc_err; + int32 mc_eip; + int32 mc_cs; + int32 mc_eflags; + int32 mc_esp; + int32 mc_ss; + int32 mc_len; + int32 mc_fpformat; + int32 mc_ownedfp; + int32 mc_spare1[1]; + int32 mc_fpstate[128]; + int32 mc_fsbase; + int32 mc_gsbase; + int32 mc_spare2[6]; }; - -typedef struct Ucontext Ucontext; struct Ucontext { - Sigset uc_sigmask; - Mcontext uc_mcontext; - Ucontext *uc_link; - StackT uc_stack; - int32 uc_flags; - int32 __spare__[4]; - byte pad_godefs_0[12]; + Sigset uc_sigmask; + Mcontext uc_mcontext; + Ucontext *uc_link; + StackT uc_stack; + int32 uc_flags; + int32 __spare__[4]; + byte Pad_cgo_0[12]; }; -typedef struct Timespec Timespec; struct Timespec { - int32 tv_sec; - int32 tv_nsec; + int32 tv_sec; + int32 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int32 tv_sec; - int32 tv_usec; + int32 tv_sec; + int32 tv_usec; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; + + #pragma pack off diff --git a/src/pkg/runtime/defs_freebsd_amd64.h b/src/pkg/runtime/defs_freebsd_amd64.h index 8a222dca4..6348c0482 100644 --- a/src/pkg/runtime/defs_freebsd_amd64.h +++ b/src/pkg/runtime/defs_freebsd_amd64.h @@ -1,204 +1,201 @@ -// godefs -f -m64 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_freebsd.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - UMTX_OP_WAIT = 0x2, - UMTX_OP_WAKE = 0x3, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x2, - FPE_INTOVF = 0x1, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x5, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + UMTX_OP_WAIT = 0x2, + UMTX_OP_WAKE = 0x3, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x2, + FPE_INTOVF = 0x1, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; -// Types +typedef struct Rtprio Rtprio; +typedef struct ThrParam ThrParam; +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct StackT StackT; +typedef struct Siginfo Siginfo; +typedef struct Mcontext Mcontext; +typedef struct Ucontext Ucontext; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; + #pragma pack on -typedef struct Rtprio Rtprio; struct Rtprio { - uint16 type; - uint16 prio; + uint16 type; + uint16 prio; }; - -typedef struct ThrParam ThrParam; struct ThrParam { - void *start_func; - void *arg; - int8 *stack_base; - uint64 stack_size; - int8 *tls_base; - uint64 tls_size; - int64 *child_tid; - int64 *parent_tid; - int32 flags; - byte pad_godefs_0[4]; - Rtprio *rtp; - void* spare[3]; + void *start_func; + byte *arg; + int8 *stack_base; + uint64 stack_size; + int8 *tls_base; + uint64 tls_size; + int64 *child_tid; + int64 *parent_tid; + int32 flags; + byte Pad_cgo_0[4]; + Rtprio *rtp; + void *spare[3]; }; - -typedef struct Sigaltstack Sigaltstack; struct Sigaltstack { - int8 *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; + int8 *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; - -typedef struct Sigset Sigset; struct Sigset { - uint32 __bits[4]; -}; - -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; - int32 sigval_int; - void *sigval_ptr; + uint32 __bits[4]; }; - -typedef struct StackT StackT; struct StackT { - int8 *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; + int8 *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; -typedef struct Siginfo Siginfo; struct Siginfo { - int32 si_signo; - int32 si_errno; - int32 si_code; - int32 si_pid; - uint32 si_uid; - int32 si_status; - void *si_addr; - Sigval si_value; - byte _reason[40]; + int32 si_signo; + int32 si_errno; + int32 si_code; + int32 si_pid; + uint32 si_uid; + int32 si_status; + byte *si_addr; + byte si_value[8]; + byte _reason[40]; }; -typedef struct Mcontext Mcontext; struct Mcontext { - int64 mc_onstack; - int64 mc_rdi; - int64 mc_rsi; - int64 mc_rdx; - int64 mc_rcx; - int64 mc_r8; - int64 mc_r9; - int64 mc_rax; - int64 mc_rbx; - int64 mc_rbp; - int64 mc_r10; - int64 mc_r11; - int64 mc_r12; - int64 mc_r13; - int64 mc_r14; - int64 mc_r15; - uint32 mc_trapno; - uint16 mc_fs; - uint16 mc_gs; - int64 mc_addr; - uint32 mc_flags; - uint16 mc_es; - uint16 mc_ds; - int64 mc_err; - int64 mc_rip; - int64 mc_cs; - int64 mc_rflags; - int64 mc_rsp; - int64 mc_ss; - int64 mc_len; - int64 mc_fpformat; - int64 mc_ownedfp; - int64 mc_fpstate[64]; - int64 mc_fsbase; - int64 mc_gsbase; - int64 mc_spare[6]; + int64 mc_onstack; + int64 mc_rdi; + int64 mc_rsi; + int64 mc_rdx; + int64 mc_rcx; + int64 mc_r8; + int64 mc_r9; + int64 mc_rax; + int64 mc_rbx; + int64 mc_rbp; + int64 mc_r10; + int64 mc_r11; + int64 mc_r12; + int64 mc_r13; + int64 mc_r14; + int64 mc_r15; + uint32 mc_trapno; + uint16 mc_fs; + uint16 mc_gs; + int64 mc_addr; + uint32 mc_flags; + uint16 mc_es; + uint16 mc_ds; + int64 mc_err; + int64 mc_rip; + int64 mc_cs; + int64 mc_rflags; + int64 mc_rsp; + int64 mc_ss; + int64 mc_len; + int64 mc_fpformat; + int64 mc_ownedfp; + int64 mc_fpstate[64]; + int64 mc_fsbase; + int64 mc_gsbase; + int64 mc_spare[6]; }; - -typedef struct Ucontext Ucontext; struct Ucontext { - Sigset uc_sigmask; - Mcontext uc_mcontext; - Ucontext *uc_link; - StackT uc_stack; - int32 uc_flags; - int32 __spare__[4]; - byte pad_godefs_0[12]; + Sigset uc_sigmask; + Mcontext uc_mcontext; + Ucontext *uc_link; + StackT uc_stack; + int32 uc_flags; + int32 __spare__[4]; + byte Pad_cgo_0[12]; }; -typedef struct Timespec Timespec; struct Timespec { - int64 tv_sec; - int64 tv_nsec; + int64 tv_sec; + int64 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int64 tv_sec; - int64 tv_usec; + int64 tv_sec; + int64 tv_usec; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; + + #pragma pack off diff --git a/src/pkg/runtime/defs_freebsd_arm.h b/src/pkg/runtime/defs_freebsd_arm.h new file mode 100644 index 000000000..a744b808f --- /dev/null +++ b/src/pkg/runtime/defs_freebsd_arm.h @@ -0,0 +1,163 @@ +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_freebsd.go + + +enum { + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x5, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + UMTX_OP_WAIT = 0x2, + UMTX_OP_WAKE = 0x3, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x2, + FPE_INTOVF = 0x1, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, +}; + +typedef struct Rtprio Rtprio; +typedef struct ThrParam ThrParam; +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct StackT StackT; +typedef struct Siginfo Siginfo; +typedef struct Mcontext Mcontext; +typedef struct Ucontext Ucontext; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; + +#pragma pack on + +struct Rtprio { + uint16 type; + uint16 prio; +}; +struct ThrParam { + void *start_func; + byte *arg; + uint8 *stack_base; + uint32 stack_size; + uint8 *tls_base; + uint32 tls_size; + int32 *child_tid; + int32 *parent_tid; + int32 flags; + Rtprio *rtp; + void *spare[3]; +}; +struct Sigaltstack { + uint8 *ss_sp; + uint32 ss_size; + int32 ss_flags; +}; +struct Sigset { + uint32 __bits[4]; +}; +struct StackT { + uint8 *ss_sp; + uint32 ss_size; + int32 ss_flags; +}; + +struct Siginfo { + int32 si_signo; + int32 si_errno; + int32 si_code; + int32 si_pid; + uint32 si_uid; + int32 si_status; + byte *si_addr; + byte si_value[4]; + byte _reason[32]; +}; + +struct Mcontext { + uint32 __gregs[17]; + byte __fpu[140]; +}; +struct Ucontext { + Sigset uc_sigmask; + Mcontext uc_mcontext; + Ucontext *uc_link; + StackT uc_stack; + int32 uc_flags; + int32 __spare__[4]; +}; + +struct Timespec { + int64 tv_sec; + int32 tv_nsec; +}; +struct Timeval { + int64 tv_sec; + int32 tv_usec; +}; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; + + +#pragma pack off diff --git a/src/pkg/runtime/defs_linux_386.h b/src/pkg/runtime/defs_linux_386.h index 02760f987..e257a6f85 100644 --- a/src/pkg/runtime/defs_linux_386.h +++ b/src/pkg/runtime/defs_linux_386.h @@ -132,7 +132,7 @@ struct Sigaction { void *k_sa_handler; uint32 sa_flags; void *sa_restorer; - uint32 sa_mask; + uint64 sa_mask; }; struct Siginfo { int32 si_signo; diff --git a/src/pkg/runtime/defs_linux_arm.h b/src/pkg/runtime/defs_linux_arm.h index 9e5c83a07..f72ec3d1b 100644 --- a/src/pkg/runtime/defs_linux_arm.h +++ b/src/pkg/runtime/defs_linux_arm.h @@ -143,6 +143,6 @@ struct Sigaction { void *sa_handler; uint32 sa_flags; void *sa_restorer; - uint32 sa_mask; + uint64 sa_mask; }; #pragma pack off diff --git a/src/pkg/runtime/defs_netbsd.go b/src/pkg/runtime/defs_netbsd.go index 47c30cf10..53e061041 100644 --- a/src/pkg/runtime/defs_netbsd.go +++ b/src/pkg/runtime/defs_netbsd.go @@ -7,18 +7,21 @@ /* Input to cgo. -GOARCH=amd64 cgo -cdefs defs.go >amd64/defs.h -GOARCH=386 cgo -cdefs defs.go >386/defs.h +GOARCH=amd64 go tool cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go >defs_netbsd_amd64.h +GOARCH=386 go tool cgo -cdefs defs_netbsd.go defs_netbsd_386.go >defs_netbsd_386.h */ +// +godefs map __fpregset_t [644]byte + package runtime /* #include <sys/types.h> #include <sys/mman.h> +#include <sys/signal.h> #include <sys/time.h> +#include <sys/ucontext.h> #include <sys/unistd.h> -#include <sys/signal.h> #include <errno.h> #include <signal.h> */ @@ -34,6 +37,8 @@ const ( MAP_PRIVATE = C.MAP_PRIVATE MAP_FIXED = C.MAP_FIXED + MADV_FREE = C.MADV_FREE + SA_SIGINFO = C.SA_SIGINFO SA_RESTART = C.SA_RESTART SA_ONSTACK = C.SA_ONSTACK @@ -95,8 +100,7 @@ const ( type Sigaltstack C.struct_sigaltstack type Sigset C.sigset_t -type Siginfo C.siginfo_t -type Sigval C.union_sigval +type Siginfo C.struct__ksiginfo type StackT C.stack_t @@ -104,8 +108,5 @@ type Timespec C.struct_timespec type Timeval C.struct_timeval type Itimerval C.struct_itimerval -// This is a hack to avoid pulling in machine/fpu.h. -type sfxsave64 struct{} -type usavefpu struct{} - -type Sigcontext C.struct_sigcontext +type McontextT C.mcontext_t +type UcontextT C.ucontext_t diff --git a/src/pkg/runtime/defs_netbsd_386.go b/src/pkg/runtime/defs_netbsd_386.go new file mode 100644 index 000000000..e9e36608e --- /dev/null +++ b/src/pkg/runtime/defs_netbsd_386.go @@ -0,0 +1,42 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +/* +Input to cgo. + +GOARCH=amd64 go tool cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go >defs_netbsd_amd64.h +GOARCH=386 go tool cgo -cdefs defs_netbsd.go defs_netbsd_386.go >defs_netbsd_386.h +*/ + +package runtime + +/* +#include <sys/types.h> +#include <machine/mcontext.h> +*/ +import "C" + +const ( + REG_GS = C._REG_GS + REG_FS = C._REG_FS + REG_ES = C._REG_ES + REG_DS = C._REG_DS + REG_EDI = C._REG_EDI + REG_ESI = C._REG_ESI + REG_EBP = C._REG_EBP + REG_ESP = C._REG_ESP + REG_EBX = C._REG_EBX + REG_EDX = C._REG_EDX + REG_ECX = C._REG_ECX + REG_EAX = C._REG_EAX + REG_TRAPNO = C._REG_TRAPNO + REG_ERR = C._REG_ERR + REG_EIP = C._REG_EIP + REG_CS = C._REG_CS + REG_EFL = C._REG_EFL + REG_UESP = C._REG_UESP + REG_SS = C._REG_SS +) diff --git a/src/pkg/runtime/defs_netbsd_386.h b/src/pkg/runtime/defs_netbsd_386.h index aff87fb3b..04c380e3f 100644 --- a/src/pkg/runtime/defs_netbsd_386.h +++ b/src/pkg/runtime/defs_netbsd_386.h @@ -1,146 +1,163 @@ -// godefs -f -m32 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_netbsd.go defs_netbsd_386.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x1, - FPE_INTOVF = 0x2, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x6, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x1, + FPE_INTOVF = 0x2, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; -// Types +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct Siginfo Siginfo; +typedef struct StackT StackT; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; +typedef struct McontextT McontextT; +typedef struct UcontextT UcontextT; + #pragma pack on -typedef struct Sigaltstack Sigaltstack; struct Sigaltstack { - void *ss_sp; - uint32 ss_size; - int32 ss_flags; + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; }; - -typedef uint32 Sigset; - -typedef struct Siginfo Siginfo; -struct Siginfo { - int32 si_signo; - int32 si_code; - int32 si_errno; - byte _data[116]; +struct Sigset { + uint32 __bits[4]; }; - -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; +struct Siginfo { + int32 _signo; + int32 _code; + int32 _errno; + byte _reason[20]; }; -typedef struct StackT StackT; struct StackT { - void *ss_sp; - uint32 ss_size; - int32 ss_flags; + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; }; -typedef struct Timespec Timespec; struct Timespec { - int32 tv_sec; - int32 tv_nsec; + int64 tv_sec; + int32 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int32 tv_sec; - int32 tv_usec; + int64 tv_sec; + int32 tv_usec; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; -typedef void sfxsave64; - -typedef void usavefpu; - -typedef struct Sigcontext Sigcontext; -struct Sigcontext { - int32 sc_gs; - int32 sc_fs; - int32 sc_es; - int32 sc_ds; - int32 sc_edi; - int32 sc_esi; - int32 sc_ebp; - int32 sc_ebx; - int32 sc_edx; - int32 sc_ecx; - int32 sc_eax; - int32 sc_eip; - int32 sc_cs; - int32 sc_eflags; - int32 sc_esp; - int32 sc_ss; - int32 sc_onstack; - int32 sc_mask; - int32 sc_trapno; - int32 sc_err; - usavefpu *sc_fpstate; +struct McontextT { + int32 __gregs[19]; + byte __fpregs[644]; + int32 _mc_tlsbase; +}; +struct UcontextT { + uint32 uc_flags; + UcontextT *uc_link; + Sigset uc_sigmask; + StackT uc_stack; + McontextT uc_mcontext; + int32 __uc_pad[4]; }; + + #pragma pack off +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_netbsd.go defs_netbsd_386.go + + +enum { + REG_GS = 0x0, + REG_FS = 0x1, + REG_ES = 0x2, + REG_DS = 0x3, + REG_EDI = 0x4, + REG_ESI = 0x5, + REG_EBP = 0x6, + REG_ESP = 0x7, + REG_EBX = 0x8, + REG_EDX = 0x9, + REG_ECX = 0xa, + REG_EAX = 0xb, + REG_TRAPNO = 0xc, + REG_ERR = 0xd, + REG_EIP = 0xe, + REG_CS = 0xf, + REG_EFL = 0x10, + REG_UESP = 0x11, + REG_SS = 0x12, +}; + diff --git a/src/pkg/runtime/defs_netbsd_amd64.go b/src/pkg/runtime/defs_netbsd_amd64.go new file mode 100644 index 000000000..68f586b2f --- /dev/null +++ b/src/pkg/runtime/defs_netbsd_amd64.go @@ -0,0 +1,49 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +/* +Input to cgo. + +GOARCH=amd64 go tool cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go >defs_netbsd_amd64.h +GOARCH=386 go tool cgo -cdefs defs_netbsd.go defs_netbsd_386.go >defs_netbsd_386.h +*/ + +package runtime + +/* +#include <sys/types.h> +#include <machine/mcontext.h> +*/ +import "C" + +const ( + REG_RDI = C._REG_RDI + REG_RSI = C._REG_RSI + REG_RDX = C._REG_RDX + REG_RCX = C._REG_RCX + REG_R8 = C._REG_R8 + REG_R9 = C._REG_R9 + REG_R10 = C._REG_R10 + REG_R11 = C._REG_R11 + REG_R12 = C._REG_R12 + REG_R13 = C._REG_R13 + REG_R14 = C._REG_R14 + REG_R15 = C._REG_R15 + REG_RBP = C._REG_RBP + REG_RBX = C._REG_RBX + REG_RAX = C._REG_RAX + REG_GS = C._REG_GS + REG_FS = C._REG_FS + REG_ES = C._REG_ES + REG_DS = C._REG_DS + REG_TRAPNO = C._REG_TRAPNO + REG_ERR = C._REG_ERR + REG_RIP = C._REG_RIP + REG_CS = C._REG_CS + REG_RFLAGS = C._REG_RFLAGS + REG_RSP = C._REG_RSP + REG_SS = C._REG_SS +) diff --git a/src/pkg/runtime/defs_netbsd_amd64.h b/src/pkg/runtime/defs_netbsd_amd64.h index 27bf4b9d6..3d3f576d3 100644 --- a/src/pkg/runtime/defs_netbsd_amd64.h +++ b/src/pkg/runtime/defs_netbsd_amd64.h @@ -1,158 +1,174 @@ -// godefs -f -m64 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x1, - FPE_INTOVF = 0x2, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x6, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x1, + FPE_INTOVF = 0x2, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, }; -// Types +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct Siginfo Siginfo; +typedef struct StackT StackT; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; +typedef struct McontextT McontextT; +typedef struct UcontextT UcontextT; + #pragma pack on -typedef struct Sigaltstack Sigaltstack; struct Sigaltstack { - void *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; + byte *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; - -typedef uint32 Sigset; - -typedef struct Siginfo Siginfo; -struct Siginfo { - int32 si_signo; - int32 si_code; - int32 si_errno; - byte pad_godefs_0[4]; - byte _data[120]; +struct Sigset { + uint32 __bits[4]; }; - -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; +struct Siginfo { + int32 _signo; + int32 _code; + int32 _errno; + int32 _pad; + byte _reason[24]; }; -typedef struct StackT StackT; struct StackT { - void *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; + byte *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; -typedef struct Timespec Timespec; struct Timespec { - int32 tv_sec; - byte pad_godefs_0[4]; - int64 tv_nsec; + int64 tv_sec; + int64 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int64 tv_sec; - int64 tv_usec; + int64 tv_sec; + int32 tv_usec; + byte Pad_cgo_0[4]; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; -typedef void sfxsave64; - -typedef void usavefpu; - -typedef struct Sigcontext Sigcontext; -struct Sigcontext { - int64 sc_rdi; - int64 sc_rsi; - int64 sc_rdx; - int64 sc_rcx; - int64 sc_r8; - int64 sc_r9; - int64 sc_r10; - int64 sc_r11; - int64 sc_r12; - int64 sc_r13; - int64 sc_r14; - int64 sc_r15; - int64 sc_rbp; - int64 sc_rbx; - int64 sc_rax; - int64 sc_gs; - int64 sc_fs; - int64 sc_es; - int64 sc_ds; - int64 sc_trapno; - int64 sc_err; - int64 sc_rip; - int64 sc_cs; - int64 sc_rflags; - int64 sc_rsp; - int64 sc_ss; - sfxsave64 *sc_fpstate; - int32 sc_onstack; - int32 sc_mask; +struct McontextT { + uint64 __gregs[26]; + uint64 _mc_tlsbase; + int8 __fpregs[512]; +}; +struct UcontextT { + uint32 uc_flags; + byte Pad_cgo_0[4]; + UcontextT *uc_link; + Sigset uc_sigmask; + StackT uc_stack; + McontextT uc_mcontext; }; + + #pragma pack off +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go + + +enum { + REG_RDI = 0x0, + REG_RSI = 0x1, + REG_RDX = 0x2, + REG_RCX = 0x3, + REG_R8 = 0x4, + REG_R9 = 0x5, + REG_R10 = 0x6, + REG_R11 = 0x7, + REG_R12 = 0x8, + REG_R13 = 0x9, + REG_R14 = 0xa, + REG_R15 = 0xb, + REG_RBP = 0xc, + REG_RBX = 0xd, + REG_RAX = 0xe, + REG_GS = 0xf, + REG_FS = 0x10, + REG_ES = 0x11, + REG_DS = 0x12, + REG_TRAPNO = 0x13, + REG_ERR = 0x14, + REG_RIP = 0x15, + REG_CS = 0x16, + REG_RFLAGS = 0x17, + REG_RSP = 0x18, + REG_SS = 0x19, +}; + diff --git a/src/pkg/runtime/defs_netbsd_arm.h b/src/pkg/runtime/defs_netbsd_arm.h new file mode 100644 index 000000000..f67475c76 --- /dev/null +++ b/src/pkg/runtime/defs_netbsd_arm.h @@ -0,0 +1,140 @@ +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_netbsd.go + + +enum { + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, + + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, + + MADV_FREE = 0x6, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x1, + FPE_INTOVF = 0x2, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, + + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, +}; + +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigset Sigset; +typedef struct Siginfo Siginfo; +typedef struct StackT StackT; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; +typedef struct McontextT McontextT; +typedef struct UcontextT UcontextT; + +#pragma pack on + +struct Sigaltstack { + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; +}; +struct Sigset { + uint32 __bits[4]; +}; +struct Siginfo { + int32 _signo; + int32 _code; + int32 _errno; + byte _reason[20]; +}; + +struct StackT { + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; +}; + +struct Timespec { + int64 tv_sec; + int32 tv_nsec; +}; +struct Timeval { + int64 tv_sec; + int32 tv_usec; +}; +struct Itimerval { + Timeval it_interval; + Timeval it_value; +}; + +struct McontextT { + uint32 __gregs[17]; +#ifdef __ARM_EABI__ + byte __fpu[4+8*32+4]; +#else + byte __fpu[4+4*33+4]; +#endif + uint32 _mc_tlsbase; +}; +struct UcontextT { + uint32 uc_flags; + UcontextT *uc_link; + Sigset uc_sigmask; + StackT uc_stack; + McontextT uc_mcontext; + int32 __uc_pad[2]; +}; + +#pragma pack off diff --git a/src/pkg/runtime/defs_openbsd.go b/src/pkg/runtime/defs_openbsd.go index 47c30cf10..ff94b9405 100644 --- a/src/pkg/runtime/defs_openbsd.go +++ b/src/pkg/runtime/defs_openbsd.go @@ -7,8 +7,8 @@ /* Input to cgo. -GOARCH=amd64 cgo -cdefs defs.go >amd64/defs.h -GOARCH=386 cgo -cdefs defs.go >386/defs.h +GOARCH=amd64 go tool cgo -cdefs defs_openbsd.go >defs_openbsd_amd64.h +GOARCH=386 go tool cgo -cdefs defs_openbsd.go >defs_openbsd_386.h */ package runtime @@ -34,6 +34,8 @@ const ( MAP_PRIVATE = C.MAP_PRIVATE MAP_FIXED = C.MAP_FIXED + MADV_FREE = C.MADV_FREE + SA_SIGINFO = C.SA_SIGINFO SA_RESTART = C.SA_RESTART SA_ONSTACK = C.SA_ONSTACK @@ -93,9 +95,12 @@ const ( ITIMER_PROF = C.ITIMER_PROF ) +type Tfork C.struct___tfork + type Sigaltstack C.struct_sigaltstack -type Sigset C.sigset_t +type Sigcontext C.struct_sigcontext type Siginfo C.siginfo_t +type Sigset C.sigset_t type Sigval C.union_sigval type StackT C.stack_t @@ -103,9 +108,3 @@ type StackT C.stack_t type Timespec C.struct_timespec type Timeval C.struct_timeval type Itimerval C.struct_itimerval - -// This is a hack to avoid pulling in machine/fpu.h. -type sfxsave64 struct{} -type usavefpu struct{} - -type Sigcontext C.struct_sigcontext diff --git a/src/pkg/runtime/defs_openbsd_386.h b/src/pkg/runtime/defs_openbsd_386.h index aff87fb3b..323bb084a 100644 --- a/src/pkg/runtime/defs_openbsd_386.h +++ b/src/pkg/runtime/defs_openbsd_386.h @@ -1,146 +1,150 @@ -// godefs -f -m32 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_openbsd.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x1, - FPE_INTOVF = 0x2, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, -}; + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, -// Types -#pragma pack on + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, -typedef struct Sigaltstack Sigaltstack; -struct Sigaltstack { - void *ss_sp; - uint32 ss_size; - int32 ss_flags; -}; + MADV_FREE = 0x6, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x1, + FPE_INTOVF = 0x2, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, -typedef uint32 Sigset; + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, +}; + +typedef struct Tfork Tfork; +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigcontext Sigcontext; typedef struct Siginfo Siginfo; -struct Siginfo { - int32 si_signo; - int32 si_code; - int32 si_errno; - byte _data[116]; +typedef struct StackT StackT; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; + +#pragma pack on + +struct Tfork { + byte *tf_tcb; + int32 *tf_tid; + byte *tf_stack; }; -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; +struct Sigaltstack { + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; }; +struct Sigcontext { + int32 sc_gs; + int32 sc_fs; + int32 sc_es; + int32 sc_ds; + int32 sc_edi; + int32 sc_esi; + int32 sc_ebp; + int32 sc_ebx; + int32 sc_edx; + int32 sc_ecx; + int32 sc_eax; + int32 sc_eip; + int32 sc_cs; + int32 sc_eflags; + int32 sc_esp; + int32 sc_ss; + int32 sc_onstack; + int32 sc_mask; + int32 sc_trapno; + int32 sc_err; + void *sc_fpstate; +}; +struct Siginfo { + int32 si_signo; + int32 si_code; + int32 si_errno; + byte _data[116]; +}; +typedef uint32 Sigset; +typedef byte Sigval[4]; -typedef struct StackT StackT; struct StackT { - void *ss_sp; - uint32 ss_size; - int32 ss_flags; + byte *ss_sp; + uint32 ss_size; + int32 ss_flags; }; -typedef struct Timespec Timespec; struct Timespec { - int32 tv_sec; - int32 tv_nsec; + int32 tv_sec; + int32 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int32 tv_sec; - int32 tv_usec; + int32 tv_sec; + int32 tv_usec; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; -typedef void sfxsave64; -typedef void usavefpu; - -typedef struct Sigcontext Sigcontext; -struct Sigcontext { - int32 sc_gs; - int32 sc_fs; - int32 sc_es; - int32 sc_ds; - int32 sc_edi; - int32 sc_esi; - int32 sc_ebp; - int32 sc_ebx; - int32 sc_edx; - int32 sc_ecx; - int32 sc_eax; - int32 sc_eip; - int32 sc_cs; - int32 sc_eflags; - int32 sc_esp; - int32 sc_ss; - int32 sc_onstack; - int32 sc_mask; - int32 sc_trapno; - int32 sc_err; - usavefpu *sc_fpstate; -}; #pragma pack off diff --git a/src/pkg/runtime/defs_openbsd_amd64.h b/src/pkg/runtime/defs_openbsd_amd64.h index 27bf4b9d6..429cc99f0 100644 --- a/src/pkg/runtime/defs_openbsd_amd64.h +++ b/src/pkg/runtime/defs_openbsd_amd64.h @@ -1,158 +1,162 @@ -// godefs -f -m64 defs.c +// Created by cgo -cdefs - DO NOT EDIT +// cgo -cdefs defs_openbsd.go -// MACHINE GENERATED - DO NOT EDIT. -// Constants enum { - PROT_NONE = 0, - PROT_READ = 0x1, - PROT_WRITE = 0x2, - PROT_EXEC = 0x4, - MAP_ANON = 0x1000, - MAP_PRIVATE = 0x2, - MAP_FIXED = 0x10, - SA_SIGINFO = 0x40, - SA_RESTART = 0x2, - SA_ONSTACK = 0x1, - EINTR = 0x4, - SIGHUP = 0x1, - SIGINT = 0x2, - SIGQUIT = 0x3, - SIGILL = 0x4, - SIGTRAP = 0x5, - SIGABRT = 0x6, - SIGEMT = 0x7, - SIGFPE = 0x8, - SIGKILL = 0x9, - SIGBUS = 0xa, - SIGSEGV = 0xb, - SIGSYS = 0xc, - SIGPIPE = 0xd, - SIGALRM = 0xe, - SIGTERM = 0xf, - SIGURG = 0x10, - SIGSTOP = 0x11, - SIGTSTP = 0x12, - SIGCONT = 0x13, - SIGCHLD = 0x14, - SIGTTIN = 0x15, - SIGTTOU = 0x16, - SIGIO = 0x17, - SIGXCPU = 0x18, - SIGXFSZ = 0x19, - SIGVTALRM = 0x1a, - SIGPROF = 0x1b, - SIGWINCH = 0x1c, - SIGINFO = 0x1d, - SIGUSR1 = 0x1e, - SIGUSR2 = 0x1f, - FPE_INTDIV = 0x1, - FPE_INTOVF = 0x2, - FPE_FLTDIV = 0x3, - FPE_FLTOVF = 0x4, - FPE_FLTUND = 0x5, - FPE_FLTRES = 0x6, - FPE_FLTINV = 0x7, - FPE_FLTSUB = 0x8, - BUS_ADRALN = 0x1, - BUS_ADRERR = 0x2, - BUS_OBJERR = 0x3, - SEGV_MAPERR = 0x1, - SEGV_ACCERR = 0x2, - ITIMER_REAL = 0, - ITIMER_VIRTUAL = 0x1, - ITIMER_PROF = 0x2, -}; + PROT_NONE = 0x0, + PROT_READ = 0x1, + PROT_WRITE = 0x2, + PROT_EXEC = 0x4, -// Types -#pragma pack on + MAP_ANON = 0x1000, + MAP_PRIVATE = 0x2, + MAP_FIXED = 0x10, -typedef struct Sigaltstack Sigaltstack; -struct Sigaltstack { - void *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; -}; + MADV_FREE = 0x6, + + SA_SIGINFO = 0x40, + SA_RESTART = 0x2, + SA_ONSTACK = 0x1, + + EINTR = 0x4, + + SIGHUP = 0x1, + SIGINT = 0x2, + SIGQUIT = 0x3, + SIGILL = 0x4, + SIGTRAP = 0x5, + SIGABRT = 0x6, + SIGEMT = 0x7, + SIGFPE = 0x8, + SIGKILL = 0x9, + SIGBUS = 0xa, + SIGSEGV = 0xb, + SIGSYS = 0xc, + SIGPIPE = 0xd, + SIGALRM = 0xe, + SIGTERM = 0xf, + SIGURG = 0x10, + SIGSTOP = 0x11, + SIGTSTP = 0x12, + SIGCONT = 0x13, + SIGCHLD = 0x14, + SIGTTIN = 0x15, + SIGTTOU = 0x16, + SIGIO = 0x17, + SIGXCPU = 0x18, + SIGXFSZ = 0x19, + SIGVTALRM = 0x1a, + SIGPROF = 0x1b, + SIGWINCH = 0x1c, + SIGINFO = 0x1d, + SIGUSR1 = 0x1e, + SIGUSR2 = 0x1f, + + FPE_INTDIV = 0x1, + FPE_INTOVF = 0x2, + FPE_FLTDIV = 0x3, + FPE_FLTOVF = 0x4, + FPE_FLTUND = 0x5, + FPE_FLTRES = 0x6, + FPE_FLTINV = 0x7, + FPE_FLTSUB = 0x8, + + BUS_ADRALN = 0x1, + BUS_ADRERR = 0x2, + BUS_OBJERR = 0x3, -typedef uint32 Sigset; + SEGV_MAPERR = 0x1, + SEGV_ACCERR = 0x2, + ITIMER_REAL = 0x0, + ITIMER_VIRTUAL = 0x1, + ITIMER_PROF = 0x2, +}; + +typedef struct Tfork Tfork; +typedef struct Sigaltstack Sigaltstack; +typedef struct Sigcontext Sigcontext; typedef struct Siginfo Siginfo; -struct Siginfo { - int32 si_signo; - int32 si_code; - int32 si_errno; - byte pad_godefs_0[4]; - byte _data[120]; +typedef struct StackT StackT; +typedef struct Timespec Timespec; +typedef struct Timeval Timeval; +typedef struct Itimerval Itimerval; + +#pragma pack on + +struct Tfork { + byte *tf_tcb; + int32 *tf_tid; + byte *tf_stack; }; -typedef union Sigval Sigval; -union Sigval { - int32 sival_int; - void *sival_ptr; +struct Sigaltstack { + byte *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; +struct Sigcontext { + int64 sc_rdi; + int64 sc_rsi; + int64 sc_rdx; + int64 sc_rcx; + int64 sc_r8; + int64 sc_r9; + int64 sc_r10; + int64 sc_r11; + int64 sc_r12; + int64 sc_r13; + int64 sc_r14; + int64 sc_r15; + int64 sc_rbp; + int64 sc_rbx; + int64 sc_rax; + int64 sc_gs; + int64 sc_fs; + int64 sc_es; + int64 sc_ds; + int64 sc_trapno; + int64 sc_err; + int64 sc_rip; + int64 sc_cs; + int64 sc_rflags; + int64 sc_rsp; + int64 sc_ss; + void *sc_fpstate; + int32 sc_onstack; + int32 sc_mask; +}; +struct Siginfo { + int32 si_signo; + int32 si_code; + int32 si_errno; + byte Pad_cgo_0[4]; + byte _data[120]; +}; +typedef uint32 Sigset; +typedef byte Sigval[8]; -typedef struct StackT StackT; struct StackT { - void *ss_sp; - uint64 ss_size; - int32 ss_flags; - byte pad_godefs_0[4]; + byte *ss_sp; + uint64 ss_size; + int32 ss_flags; + byte Pad_cgo_0[4]; }; -typedef struct Timespec Timespec; struct Timespec { - int32 tv_sec; - byte pad_godefs_0[4]; - int64 tv_nsec; + int32 tv_sec; + byte Pad_cgo_0[4]; + int64 tv_nsec; }; - -typedef struct Timeval Timeval; struct Timeval { - int64 tv_sec; - int64 tv_usec; + int64 tv_sec; + int64 tv_usec; }; - -typedef struct Itimerval Itimerval; struct Itimerval { - Timeval it_interval; - Timeval it_value; + Timeval it_interval; + Timeval it_value; }; -typedef void sfxsave64; -typedef void usavefpu; - -typedef struct Sigcontext Sigcontext; -struct Sigcontext { - int64 sc_rdi; - int64 sc_rsi; - int64 sc_rdx; - int64 sc_rcx; - int64 sc_r8; - int64 sc_r9; - int64 sc_r10; - int64 sc_r11; - int64 sc_r12; - int64 sc_r13; - int64 sc_r14; - int64 sc_r15; - int64 sc_rbp; - int64 sc_rbx; - int64 sc_rax; - int64 sc_gs; - int64 sc_fs; - int64 sc_es; - int64 sc_ds; - int64 sc_trapno; - int64 sc_err; - int64 sc_rip; - int64 sc_cs; - int64 sc_rflags; - int64 sc_rsp; - int64 sc_ss; - sfxsave64 *sc_fpstate; - int32 sc_onstack; - int32 sc_mask; -}; #pragma pack off diff --git a/src/pkg/runtime/defs_plan9_386.h b/src/pkg/runtime/defs_plan9_386.h index 58fd9d94d..bde299dee 100644 --- a/src/pkg/runtime/defs_plan9_386.h +++ b/src/pkg/runtime/defs_plan9_386.h @@ -1,2 +1,29 @@ -// nothing to see here -#define tos_pid 48 +#define PAGESIZE 0x1000 + +typedef struct Ureg Ureg; + +struct Ureg +{ + uint32 di; /* general registers */ + uint32 si; /* ... */ + uint32 bp; /* ... */ + uint32 nsp; + uint32 bx; /* ... */ + uint32 dx; /* ... */ + uint32 cx; /* ... */ + uint32 ax; /* ... */ + uint32 gs; /* data segments */ + uint32 fs; /* ... */ + uint32 es; /* ... */ + uint32 ds; /* ... */ + uint32 trap; /* trap type */ + uint32 ecode; /* error code (or zero) */ + uint32 pc; /* pc */ + uint32 cs; /* old context */ + uint32 flags; /* old flags */ + union { + uint32 usp; + uint32 sp; + }; + uint32 ss; /* old stack segment */ +}; diff --git a/src/pkg/runtime/defs_plan9_amd64.h b/src/pkg/runtime/defs_plan9_amd64.h new file mode 100644 index 000000000..d8fec67eb --- /dev/null +++ b/src/pkg/runtime/defs_plan9_amd64.h @@ -0,0 +1,34 @@ +#define PAGESIZE 0x200000ULL + +typedef struct Ureg Ureg; + +struct Ureg { + uint64 ax; + uint64 bx; + uint64 cx; + uint64 dx; + uint64 si; + uint64 di; + uint64 bp; + uint64 r8; + uint64 r9; + uint64 r10; + uint64 r11; + uint64 r12; + uint64 r13; + uint64 r14; + uint64 r15; + + uint16 ds; + uint16 es; + uint16 fs; + uint16 gs; + + uint64 type; + uint64 error; /* error code (or zero) */ + uint64 ip; /* pc */ + uint64 cs; /* old context */ + uint64 flags; /* old flags */ + uint64 sp; /* sp */ + uint64 ss; /* old stack segment */ +}; diff --git a/src/pkg/runtime/env_plan9.c b/src/pkg/runtime/env_plan9.c new file mode 100644 index 000000000..848d73303 --- /dev/null +++ b/src/pkg/runtime/env_plan9.c @@ -0,0 +1,33 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "os_GOOS.h" + +byte* +runtime·getenv(int8 *s) +{ + int32 fd, len, n, r; + byte file[128]; + byte *p; + + len = runtime·findnull((byte*)s); + if(len > sizeof file-6) + return nil; + + runtime·memclr(file, sizeof file); + runtime·memmove((void*)file, (void*)"/env/", 5); + runtime·memmove((void*)(file+5), (void*)s, len); + + fd = runtime·open(file, OREAD); + if(fd < 0) + return nil; + n = runtime·seek(fd, 0, 2); + p = runtime·malloc(n+1); + r = runtime·pread(fd, p, n, 0); + runtime·close(fd); + if(r < 0) + return nil; + return p; +} diff --git a/src/pkg/runtime/env_posix.c b/src/pkg/runtime/env_posix.c new file mode 100644 index 000000000..8333811fb --- /dev/null +++ b/src/pkg/runtime/env_posix.c @@ -0,0 +1,61 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd linux netbsd openbsd windows + +#include "runtime.h" + +Slice syscall·envs; + +byte* +runtime·getenv(int8 *s) +{ + int32 i, j, len; + byte *v, *bs; + String* envv; + int32 envc; + + bs = (byte*)s; + len = runtime·findnull(bs); + envv = (String*)syscall·envs.array; + envc = syscall·envs.len; + for(i=0; i<envc; i++){ + if(envv[i].len <= len) + continue; + v = envv[i].str; + for(j=0; j<len; j++) + if(bs[j] != v[j]) + goto nomatch; + if(v[len] != '=') + goto nomatch; + return v+len+1; + nomatch:; + } + return nil; +} + +void (*_cgo_setenv)(byte**); + +// Update the C environment if cgo is loaded. +// Called from syscall.Setenv. +void +syscall·setenv_c(String k, String v) +{ + byte *arg[2]; + + if(_cgo_setenv == nil) + return; + + arg[0] = runtime·malloc(k.len + 1); + runtime·memmove(arg[0], k.str, k.len); + arg[0][k.len] = 0; + + arg[1] = runtime·malloc(v.len + 1); + runtime·memmove(arg[1], v.str, v.len); + arg[1][v.len] = 0; + + runtime·asmcgocall((void*)_cgo_setenv, arg); + runtime·free(arg[0]); + runtime·free(arg[1]); +} diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go index 51921135b..062aea248 100644 --- a/src/pkg/runtime/export_test.go +++ b/src/pkg/runtime/export_test.go @@ -25,3 +25,45 @@ var Entersyscall = entersyscall var Exitsyscall = exitsyscall var LockedOSThread = golockedOSThread var Stackguard = stackguard + +type LFNode struct { + Next *LFNode + Pushcnt uintptr +} + +func lfstackpush(head *uint64, node *LFNode) +func lfstackpop2(head *uint64) *LFNode + +var LFStackPush = lfstackpush +var LFStackPop = lfstackpop2 + +type ParFor struct { + body *byte + done uint32 + Nthr uint32 + nthrmax uint32 + thrseq uint32 + Cnt uint32 + Ctx *byte + wait bool +} + +func parforalloc2(nthrmax uint32) *ParFor +func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32)) +func parfordo(desc *ParFor) +func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr) + +var NewParFor = parforalloc2 +var ParForSetup = parforsetup2 +var ParForDo = parfordo + +func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) { + begin, end := parforiters(desc, uintptr(tid)) + return uint32(begin), uint32(end) +} + +func testSchedLocalQueue() +func testSchedLocalQueueSteal() + +var TestSchedLocalQueue1 = testSchedLocalQueue +var TestSchedLocalQueueSteal1 = testSchedLocalQueueSteal diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go index d93259d7b..fbaffd1d5 100644 --- a/src/pkg/runtime/extern.go +++ b/src/pkg/runtime/extern.go @@ -42,8 +42,8 @@ type Func struct { // Keep in sync with runtime.h:struct Func pc0 uintptr // starting pc, ln for table ln0 int32 frame int32 // stack frame size - args int32 // number of 32-bit in/out args - locals int32 // number of 32-bit locals + args int32 // in/out args size + locals int32 // locals size } // FuncForPC returns a *Func describing the function that contains the @@ -67,7 +67,7 @@ func (f *Func) FileLine(pc uintptr) (file string, line int) { // implemented in symtab.c func funcline_go(*Func, uintptr) (string, int) -// mid returns the current os thread (m) id. +// mid returns the current OS thread (m) id. func mid() uint32 // SetFinalizer sets the finalizer associated with x to f. diff --git a/src/pkg/runtime/float.c b/src/pkg/runtime/float.c index f481519f6..42082e434 100644 --- a/src/pkg/runtime/float.c +++ b/src/pkg/runtime/float.c @@ -4,170 +4,7 @@ #include "runtime.h" -static uint64 uvnan = 0x7FF0000000000001ULL; -static uint64 uvinf = 0x7FF0000000000000ULL; -static uint64 uvneginf = 0xFFF0000000000000ULL; - -uint32 -runtime·float32tobits(float32 f) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float32 f; - uint32 i; - } u; - u.f = f; - return u.i; -} - -uint64 -runtime·float64tobits(float64 f) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float64 f; - uint64 i; - } u; - u.f = f; - return u.i; -} - -float64 -runtime·float64frombits(uint64 i) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float64 f; - uint64 i; - } u; - u.i = i; - return u.f; -} - -float32 -runtime·float32frombits(uint32 i) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float32 f; - uint32 i; - } u; - u.i = i; - return u.f; -} - -bool -runtime·isInf(float64 f, int32 sign) -{ - uint64 x; - - x = runtime·float64tobits(f); - if(sign == 0) - return x == uvinf || x == uvneginf; - if(sign > 0) - return x == uvinf; - return x == uvneginf; -} - -float64 -runtime·NaN(void) -{ - return runtime·float64frombits(uvnan); -} - -bool -runtime·isNaN(float64 f) -{ - uint64 x; - - x = runtime·float64tobits(f); - return ((uint32)(x>>52) & 0x7FF) == 0x7FF && !runtime·isInf(f, 0); -} - -float64 -runtime·Inf(int32 sign) -{ - if(sign >= 0) - return runtime·float64frombits(uvinf); - else - return runtime·float64frombits(uvneginf); -} - -enum -{ - MASK = 0x7ffL, - SHIFT = 64-11-1, - BIAS = 1022L, -}; - -float64 -runtime·frexp(float64 d, int32 *ep) -{ - uint64 x; - - if(d == 0) { - *ep = 0; - return 0; - } - x = runtime·float64tobits(d); - *ep = (int32)((x >> SHIFT) & MASK) - BIAS; - x &= ~((uint64)MASK << SHIFT); - x |= (uint64)BIAS << SHIFT; - return runtime·float64frombits(x); -} - -float64 -runtime·ldexp(float64 d, int32 e) -{ - uint64 x; - - if(d == 0) - return 0; - x = runtime·float64tobits(d); - e += (int32)(x >> SHIFT) & MASK; - if(e <= 0) - return 0; /* underflow */ - if(e >= MASK){ /* overflow */ - if(d < 0) - return runtime·Inf(-1); - return runtime·Inf(1); - } - x &= ~((uint64)MASK << SHIFT); - x |= (uint64)e << SHIFT; - return runtime·float64frombits(x); -} - -float64 -runtime·modf(float64 d, float64 *ip) -{ - float64 dd; - uint64 x; - int32 e; - - if(d < 1) { - if(d < 0) { - d = runtime·modf(-d, ip); - *ip = -*ip; - return -d; - } - *ip = 0; - return d; - } - - x = runtime·float64tobits(d); - e = (int32)((x >> SHIFT) & MASK) - BIAS; - - /* - * Keep the top 11+e bits; clear the rest. - */ - if(e <= 64-11) - x &= ~(((uint64)1 << (64LL-11LL-e))-1); - dd = runtime·float64frombits(x); - *ip = dd; - return d - dd; -} - +// used as float64 via runtime· names +uint64 ·nan = 0x7FF8000000000001ULL; +uint64 ·posinf = 0x7FF0000000000000ULL; +uint64 ·neginf = 0xFFF0000000000000ULL; diff --git a/src/pkg/runtime/gc_test.go b/src/pkg/runtime/gc_test.go index 65894a6fd..e1e1b1d01 100644 --- a/src/pkg/runtime/gc_test.go +++ b/src/pkg/runtime/gc_test.go @@ -5,37 +5,80 @@ package runtime_test import ( + "os" "runtime" "testing" ) func TestGcSys(t *testing.T) { + if os.Getenv("GOGC") == "off" { + t.Fatalf("GOGC=off in environment; test cannot pass") + } + data := struct{ Short bool }{testing.Short()} + got := executeTest(t, testGCSysSource, &data) + want := "OK\n" + if got != want { + t.Fatalf("expected %q, but got %q", want, got) + } +} + +const testGCSysSource = ` +package main + +import ( + "fmt" + "runtime" +) + +func main() { + runtime.GOMAXPROCS(1) memstats := new(runtime.MemStats) runtime.GC() runtime.ReadMemStats(memstats) sys := memstats.Sys + runtime.MemProfileRate = 0 // disable profiler + itercount := 1000000 - if testing.Short() { - itercount = 100000 - } +{{if .Short}} + itercount = 100000 +{{end}} for i := 0; i < itercount; i++ { workthegc() } // Should only be using a few MB. + // We allocated 100 MB or (if not short) 1 GB. runtime.ReadMemStats(memstats) if sys > memstats.Sys { sys = 0 } else { sys = memstats.Sys - sys } - t.Logf("used %d extra bytes", sys) - if sys > 4<<20 { - t.Fatalf("using too much memory: %d bytes", sys) + if sys > 16<<20 { + fmt.Printf("using too much memory: %d bytes\n", sys) + return } + fmt.Printf("OK\n") } func workthegc() []byte { return make([]byte, 1029) } +` + +func TestGcDeepNesting(t *testing.T) { + type T [2][2][2][2][2][2][2][2][2][2]*int + a := new(T) + + // Prevent the compiler from applying escape analysis. + // This makes sure new(T) is allocated on heap, not on the stack. + t.Logf("%p", a) + + a[0][0][0][0][0][0][0][0][0][0] = new(int) + *a[0][0][0][0][0][0][0][0][0][0] = 13 + runtime.GC() + if *a[0][0][0][0][0][0][0][0][0][0] != 13 { + t.Fail() + } +} diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c index 63ed4e2a3..37111daa9 100644 --- a/src/pkg/runtime/hashmap.c +++ b/src/pkg/runtime/hashmap.c @@ -3,8 +3,11 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "arch_GOARCH.h" +#include "malloc.h" #include "hashmap.h" #include "type.h" +#include "race.h" /* Hmap flag values */ #define IndirectVal (1<<0) /* storing pointers to values */ @@ -13,7 +16,7 @@ #define CanFreeKey (1<<3) /* okay to free pointers to keys */ struct Hmap { /* a hash table; initialize with hash_init() */ - uint32 count; /* elements in table - must be first */ + uintgo count; /* elements in table - must be first */ uint8 datasize; /* amount of data to store in entry */ uint8 flag; uint8 valoff; /* offset of value in key+value data block */ @@ -78,7 +81,7 @@ hash_subtable_new (Hmap *h, int32 power, int32 used) max_probes = 1 << power; } bytes += limit_bytes - elemsize; - st = malloc (offsetof (struct hash_subtable, entry[0]) + bytes); + st = runtime·mallocgc(offsetof (struct hash_subtable, entry[0]) + bytes, UseSpanType ? FlagNoPointers : 0, 1, 1); st->power = power; st->used = used; st->datasize = h->datasize; @@ -115,7 +118,7 @@ hash_init (Hmap *h, int32 datasize, int64 hint) if(datasize < sizeof (void *)) datasize = sizeof (void *); - datasize = runtime·rnd(datasize, sizeof (void *)); + datasize = ROUND(datasize, sizeof (void *)); init_sizes (hint, &init_power); h->datasize = datasize; assert (h->datasize == datasize); @@ -273,7 +276,7 @@ hash_lookup (MapType *t, Hmap *h, void *data, void **pres) struct hash_entry *end_e; void *key; bool eq; - + hash = h->hash0; (*t->key->alg->hash) (&hash, t->key->size, data); hash &= ~HASH_MASK; @@ -416,8 +419,12 @@ hash_insert_internal (MapType *t, struct hash_subtable **pst, int32 flags, hash_ *pres = ins_e->data; return (1); } - assert (e_hash != hash || (flags & HASH_REHASH) == 0); - hash += (e_hash == hash); /* adjust hash if it collides */ + if (e_hash == hash) { /* adjust hash if it collides */ + assert ((flags & HASH_REHASH) == 0); + hash++; + if ((hash & HASH_MASK) == HASH_SUBHASH) + runtime·throw("runtime: map hash collision overflow"); + } ins_e = HASH_OFFSET (ins_e, elemsize); ins_i++; if (e_hash <= hash) { /* set e to insertion point */ @@ -462,7 +469,7 @@ hash_insert (MapType *t, Hmap *h, void *data, void **pres) { uintptr hash; int32 rc; - + hash = h->hash0; (*t->key->alg->hash) (&hash, t->key->size, data); rc = hash_insert_internal (t, &h->st, 0, hash, h, data, pres); @@ -618,7 +625,7 @@ hash_iter_init (MapType *t, Hmap *h, struct hash_iter *it) it->subtable_state[0].e = h->st->entry; it->subtable_state[0].start = h->st->entry; it->subtable_state[0].last = h->st->last; - + // fastrand1 returns 31 useful bits. // We don't care about not having a bottom bit but we // do want top bits. @@ -700,6 +707,82 @@ hash_visit (Hmap *h, void (*data_visit) (void *arg, int32 level, void *data), vo hash_visit_internal (h->st, 0, 0, data_visit, arg); } +// Initialize the iterator. +// Returns false if Hmap contains no pointers (in which case the iterator is not initialized). +bool +hash_gciter_init (Hmap *h, struct hash_gciter *it) +{ + // GC during map initialization + if(h->st == nil) + return false; + + it->elemsize = h->datasize + offsetof (struct hash_entry, data[0]); + it->flag = h->flag; + it->valoff = h->valoff; + it->i = 0; + it->st = h->st; + it->subtable_state[it->i].e = h->st->entry; + it->subtable_state[it->i].last = h->st->last; + return true; +} + +// Returns true and fills *data with subtable/key/value data, +// or returns false if the iterator has terminated. +bool +hash_gciter_next (struct hash_gciter *it, struct hash_gciter_data *data) +{ + struct hash_entry *e; + struct hash_gciter_sub *sub; + + data->st = nil; + data->key_data = nil; + data->val_data = nil; + + // pointer to the first-level table + if(it->st != nil) { + data->st = it->st; + it->st = nil; + return true; + } + +popped: + sub = &it->subtable_state[it->i]; + e = sub->e; + while (e <= sub->last) { + if ((e->hash & HASH_MASK) == HASH_SUBHASH) { + struct hash_subtable *st = *(struct hash_subtable **)e->data; + data->st = st; + sub->e = HASH_OFFSET (e, it->elemsize); + + // push + it->i++; + assert (it->i < nelem(it->subtable_state)); + sub++; + sub->e = st->entry; + sub->last = st->last; + + return true; + } + if(e->hash != HASH_NIL) { + void *key_data = e->data; + void *val_data = (byte*)e->data + it->valoff; + data->key_data = key_data; + data->val_data = val_data; + data->indirectkey = (it->flag & IndirectKey) != 0; + data->indirectval = (it->flag & IndirectVal) != 0; + sub->e = HASH_OFFSET (e, it->elemsize); + return true; + } + e = HASH_OFFSET (e, it->elemsize); + } + if(it->i != 0) { + // pop + it->i--; + goto popped; + } + return false; +} + // /// interfaces to go runtime // @@ -724,14 +807,13 @@ hash_keyptr(Hmap *h, void *p) static int32 debug = 0; -// makemap(typ *Type, hint uint32) (hmap *map[any]any); Hmap* runtime·makemap_c(MapType *typ, int64 hint) { Hmap *h; Type *key, *val; uintptr ksize, vsize; - + key = typ->key; val = typ->elem; @@ -744,8 +826,15 @@ runtime·makemap_c(MapType *typ, int64 hint) h = runtime·mal(sizeof(*h)); h->flag |= CanFreeTable; /* until reflect gets involved, free is okay */ - ksize = runtime·rnd(key->size, sizeof(void*)); - vsize = runtime·rnd(val->size, sizeof(void*)); + if(UseSpanType) { + if(false) { + runtime·printf("makemap %S: %p\n", *typ->string, h); + } + runtime·settype(h, (uintptr)typ | TypeInfo_Map); + } + + ksize = ROUND(key->size, sizeof(void*)); + vsize = ROUND(val->size, sizeof(void*)); if(ksize > MaxData || vsize > MaxData || ksize+vsize > MaxData) { // Either key is too big, or value is, or combined they are. // Prefer to keep the key if possible, because we look at @@ -828,8 +917,11 @@ runtime·mapaccess1(MapType *t, Hmap *h, ...) byte *ak, *av; bool pres; + if(raceenabled && h != nil) + runtime·racereadpc(h, runtime·getcallerpc(&t), runtime·mapaccess1); + ak = (byte*)(&h + 1); - av = ak + runtime·rnd(t->key->size, Structrnd); + av = ak + ROUND(t->key->size, Structrnd); runtime·mapaccess(t, h, ak, av, &pres); @@ -853,8 +945,11 @@ runtime·mapaccess2(MapType *t, Hmap *h, ...) { byte *ak, *av, *ap; + if(raceenabled && h != nil) + runtime·racereadpc(h, runtime·getcallerpc(&t), runtime·mapaccess2); + ak = (byte*)(&h + 1); - av = ak + runtime·rnd(t->key->size, Structrnd); + av = ak + ROUND(t->key->size, Structrnd); ap = av + t->elem->size; runtime·mapaccess(t, h, ak, av, ap); @@ -881,6 +976,9 @@ reflect·mapaccess(MapType *t, Hmap *h, uintptr key, uintptr val, bool pres) { byte *ak, *av; + if(raceenabled && h != nil) + runtime·racereadpc(h, runtime·getcallerpc(&t), reflect·mapaccess); + if(t->key->size <= sizeof(key)) ak = (byte*)&key; else @@ -951,8 +1049,10 @@ runtime·mapassign1(MapType *t, Hmap *h, ...) if(h == nil) runtime·panicstring("assignment to entry in nil map"); + if(raceenabled) + runtime·racewritepc(h, runtime·getcallerpc(&t), runtime·mapassign1); ak = (byte*)(&h + 1); - av = ak + runtime·rnd(t->key->size, t->elem->align); + av = ak + ROUND(t->key->size, t->elem->align); runtime·mapassign(t, h, ak, av); } @@ -965,8 +1065,10 @@ runtime·mapdelete(MapType *t, Hmap *h, ...) byte *ak; if(h == nil) - runtime·panicstring("deletion of entry in nil map"); + return; + if(raceenabled) + runtime·racewritepc(h, runtime·getcallerpc(&t), runtime·mapdelete); ak = (byte*)(&h + 1); runtime·mapassign(t, h, ak, nil); @@ -990,6 +1092,8 @@ reflect·mapassign(MapType *t, Hmap *h, uintptr key, uintptr val, bool pres) if(h == nil) runtime·panicstring("assignment to entry in nil map"); + if(raceenabled) + runtime·racewritepc(h, runtime·getcallerpc(&t), reflect·mapassign); if(t->key->size <= sizeof(key)) ak = (byte*)&key; else @@ -1011,6 +1115,8 @@ runtime·mapiterinit(MapType *t, Hmap *h, struct hash_iter *it) it->data = nil; return; } + if(raceenabled) + runtime·racereadpc(h, runtime·getcallerpc(&t), runtime·mapiterinit); hash_iter_init(t, h, it); it->data = hash_next(it); if(debug) { @@ -1054,6 +1160,8 @@ reflect·mapiterinit(MapType *t, Hmap *h, struct hash_iter *it) void runtime·mapiternext(struct hash_iter *it) { + if(raceenabled) + runtime·racereadpc(it->h, runtime·getcallerpc(&it), runtime·mapiternext); if(runtime·gcwaiting) runtime·gosched(); @@ -1148,15 +1256,18 @@ reflect·mapiterkey(struct hash_iter *it, uintptr key, bool ok) } // For reflect: -// func maplen(h map) (len int32) +// func maplen(h map) (len int) // Like len(m) in the actual language, we treat the nil map as length 0. void -reflect·maplen(Hmap *h, int32 len) +reflect·maplen(Hmap *h, intgo len) { if(h == nil) len = 0; - else + else { len = h->count; + if(raceenabled) + runtime·racereadpc(h, runtime·getcallerpc(&h), reflect·maplen); + } FLUSH(&len); } @@ -1171,7 +1282,7 @@ runtime·mapiter2(struct hash_iter *it, ...) t = it->t; ak = (byte*)(&it + 1); - av = ak + runtime·rnd(t->key->size, t->elem->align); + av = ak + ROUND(t->key->size, t->elem->align); res = it->data; if(res == nil) diff --git a/src/pkg/runtime/hashmap.h b/src/pkg/runtime/hashmap.h index 4c10cf6ef..9b82f299e 100644 --- a/src/pkg/runtime/hashmap.h +++ b/src/pkg/runtime/hashmap.h @@ -63,7 +63,6 @@ } */ -#define malloc runtime·mal #define memset(a,b,c) runtime·memclr((byte*)(a), (uint32)(c)) #define memcpy(a,b,c) runtime·memmove((byte*)(a),(byte*)(b),(uint32)(c)) #define assert(a) if(!(a)) runtime·throw("hashmap assert") @@ -143,7 +142,7 @@ struct hash_iter { Remove all sub-tables associated with *h. This undoes the effects of hash_init(). If other memory pointed to by user data must be freed, the caller is - responsible for doiing do by iterating over *h first; see + responsible for doing so by iterating over *h first; see hash_iter_init()/hash_next(). */ // void hash_destroy (struct hash *h); @@ -152,7 +151,7 @@ struct hash_iter { /* Initialize *it from *h. */ // void hash_iter_init (struct hash *h, struct hash_iter *it); -/* Return the next used entry in the table which which *it was initialized. */ +/* Return the next used entry in the table with which *it was initialized. */ // void *hash_next (struct hash_iter *it); /*---- test interface ----*/ @@ -160,3 +159,27 @@ struct hash_iter { whether used or not. "level" is the subtable level, 0 means first level. */ /* TESTING ONLY: DO NOT USE THIS ROUTINE IN NORMAL CODE */ // void hash_visit (struct hash *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg); + +/* Used by the garbage collector */ +struct hash_gciter +{ + int32 elemsize; + uint8 flag; + uint8 valoff; + uint32 i; /* stack pointer in subtable_state */ + struct hash_subtable *st; + struct hash_gciter_sub { + struct hash_entry *e; /* pointer into subtable */ + struct hash_entry *last; /* last entry in subtable */ + } subtable_state[4]; +}; +struct hash_gciter_data +{ + struct hash_subtable *st; /* subtable pointer, or nil */ + uint8 *key_data; /* key data, or nil */ + uint8 *val_data; /* value data, or nil */ + bool indirectkey; /* storing pointers to keys */ + bool indirectval; /* storing pointers to values */ +}; +bool hash_gciter_init (struct Hmap *h, struct hash_gciter *it); +bool hash_gciter_next (struct hash_gciter *it, struct hash_gciter_data *data); diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c index 2b60c4f23..370edffb8 100644 --- a/src/pkg/runtime/iface.c +++ b/src/pkg/runtime/iface.c @@ -5,6 +5,7 @@ #include "runtime.h" #include "arch_GOARCH.h" #include "type.h" +#include "typekind.h" #include "malloc.h" void @@ -19,19 +20,6 @@ runtime·printeface(Eface e) runtime·printf("(%p,%p)", e.type, e.data); } -/* - * layout of Itab known to compilers - */ -struct Itab -{ - InterfaceType* inter; - Type* type; - Itab* link; - int32 bad; - int32 unused; - void (*fun[])(void); -}; - static Itab* hash[1009]; static Lock ifacelock; @@ -182,19 +170,37 @@ copyout(Type *t, void **src, void *dst) alg->copy(size, dst, *src); } -// func convT2I(typ *byte, typ2 *byte, elem any) (ret any) #pragma textflag 7 void -runtime·convT2I(Type *t, InterfaceType *inter, ...) +runtime·typ2Itab(Type *t, InterfaceType *inter, Itab **cache, Itab *ret) +{ + Itab *tab; + + tab = itab(inter, t, 0); + runtime·atomicstorep(cache, tab); + ret = tab; + FLUSH(&ret); +} + +// func convT2I(typ *byte, typ2 *byte, cache **byte, elem any) (ret any) +#pragma textflag 7 +void +runtime·convT2I(Type *t, InterfaceType *inter, Itab **cache, ...) { byte *elem; Iface *ret; + Itab *tab; int32 wid; - elem = (byte*)(&inter+1); + elem = (byte*)(&cache+1); wid = t->size; - ret = (Iface*)(elem + runtime·rnd(wid, Structrnd)); - ret->tab = itab(inter, t, 0); + ret = (Iface*)(elem + ROUND(wid, Structrnd)); + tab = runtime·atomicloadp(cache); + if(!tab) { + tab = itab(inter, t, 0); + runtime·atomicstorep(cache, tab); + } + ret->tab = tab; copyin(t, elem, &ret->data); } @@ -209,7 +215,7 @@ runtime·convT2E(Type *t, ...) elem = (byte*)(&t+1); wid = t->size; - ret = (Eface*)(elem + runtime·rnd(wid, Structrnd)); + ret = (Eface*)(elem + ROUND(wid, Structrnd)); ret->type = t; copyin(t, elem, &ret->data); } @@ -272,6 +278,13 @@ runtime·assertI2T2(Type *t, Iface i, ...) copyout(t, &i.data, ret); } +void +runtime·assertI2TOK(Type *t, Iface i, bool ok) +{ + ok = i.tab!=nil && i.tab->type==t; + FLUSH(&ok); +} + static void assertE2Tret(Type *t, Eface e, byte *ret); // func ifaceE2T(typ *byte, iface any) (ret any) @@ -328,6 +341,13 @@ runtime·assertE2T2(Type *t, Eface e, ...) copyout(t, &e.data, ret); } +void +runtime·assertE2TOK(Type *t, Eface e, bool ok) +{ + ok = t==e.type; + FLUSH(&ok); +} + // func convI2E(elem any) (ret any) void runtime·convI2E(Iface i, Eface ret) @@ -387,7 +407,7 @@ void runtime·convI2I(InterfaceType* inter, Iface i, Iface ret) { Itab *tab; - + ret.data = i.data; if((tab = i.tab) == nil) ret.tab = nil; @@ -526,10 +546,10 @@ runtime·assertE2E2(InterfaceType* inter, Eface e, Eface ret, bool ok) } static uintptr -ifacehash1(void *data, Type *t) +ifacehash1(void *data, Type *t, uintptr h) { Alg *alg; - uintptr size, h; + uintptr size; Eface err; if(t == nil) @@ -543,7 +563,6 @@ ifacehash1(void *data, Type *t) runtime·newErrorString(runtime·catstring(runtime·gostringnocopy((byte*)"hash of unhashable type "), *t->string), &err); runtime·panic(err); } - h = 0; if(size <= sizeof(data)) alg->hash(&h, size, &data); else @@ -552,17 +571,17 @@ ifacehash1(void *data, Type *t) } uintptr -runtime·ifacehash(Iface a) +runtime·ifacehash(Iface a, uintptr h) { if(a.tab == nil) - return 0; - return ifacehash1(a.data, a.tab->type); + return h; + return ifacehash1(a.data, a.tab->type, h); } uintptr -runtime·efacehash(Eface a) +runtime·efacehash(Eface a, uintptr h) { - return ifacehash1(a.data, a.type); + return ifacehash1(a.data, a.type, h); } static bool @@ -666,39 +685,54 @@ reflect·unsafe_Typeof(Eface e, Eface ret) } void -reflect·unsafe_New(Eface typ, void *ret) +reflect·unsafe_New(Type *t, void *ret) { - Type *t; + uint32 flag; - // Reflect library has reinterpreted typ - // as its own kind of type structure. - // We know that the pointer to the original - // type structure sits before the data pointer. - t = (Type*)((Eface*)typ.data-1); + flag = t->kind&KindNoPointers ? FlagNoPointers : 0; + ret = runtime·mallocgc(t->size, flag, 1, 1); + + if(UseSpanType && !flag) { + if(false) { + runtime·printf("unsafe_New %S: %p\n", *t->string, ret); + } + runtime·settype(ret, (uintptr)t | TypeInfo_SingleObject); + } - if(t->kind&KindNoPointers) - ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1); - else - ret = runtime·mal(t->size); FLUSH(&ret); } void -reflect·unsafe_NewArray(Eface typ, uint32 n, void *ret) +reflect·unsafe_NewArray(Type *t, intgo n, void *ret) { uint64 size; - Type *t; - // Reflect library has reinterpreted typ - // as its own kind of type structure. - // We know that the pointer to the original - // type structure sits before the data pointer. - t = (Type*)((Eface*)typ.data-1); - size = n*t->size; - if(t->kind&KindNoPointers) + if(size == 0) + ret = (byte*)&runtime·zerobase; + else if(t->kind&KindNoPointers) ret = runtime·mallocgc(size, FlagNoPointers, 1, 1); - else - ret = runtime·mal(size); + else { + ret = runtime·mallocgc(size, 0, 1, 1); + + if(UseSpanType) { + if(false) { + runtime·printf("unsafe_NewArray [%D]%S: %p\n", (int64)n, *t->string, ret); + } + runtime·settype(ret, (uintptr)t | TypeInfo_Array); + } + } + + FLUSH(&ret); +} + +void +reflect·typelinks(Slice ret) +{ + extern Type *typelink[], *etypelink[]; + static int32 first = 1; + ret.array = (byte*)typelink; + ret.len = etypelink - typelink; + ret.cap = ret.len; FLUSH(&ret); } diff --git a/src/pkg/runtime/iface_test.go b/src/pkg/runtime/iface_test.go new file mode 100644 index 000000000..bca0ea0ee --- /dev/null +++ b/src/pkg/runtime/iface_test.go @@ -0,0 +1,138 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "testing" +) + +type I1 interface { + Method1() +} + +type I2 interface { + Method1() + Method2() +} + +type TS uint16 +type TM uintptr +type TL [2]uintptr + +func (TS) Method1() {} +func (TS) Method2() {} +func (TM) Method1() {} +func (TM) Method2() {} +func (TL) Method1() {} +func (TL) Method2() {} + +var ( + e interface{} + e_ interface{} + i1 I1 + i2 I2 + ts TS + tm TM + tl TL +) + +func BenchmarkConvT2ESmall(b *testing.B) { + for i := 0; i < b.N; i++ { + e = ts + } +} + +func BenchmarkConvT2EUintptr(b *testing.B) { + for i := 0; i < b.N; i++ { + e = tm + } +} + +func BenchmarkConvT2ELarge(b *testing.B) { + for i := 0; i < b.N; i++ { + e = tl + } +} + +func BenchmarkConvT2ISmall(b *testing.B) { + for i := 0; i < b.N; i++ { + i1 = ts + } +} + +func BenchmarkConvT2IUintptr(b *testing.B) { + for i := 0; i < b.N; i++ { + i1 = tm + } +} + +func BenchmarkConvT2ILarge(b *testing.B) { + for i := 0; i < b.N; i++ { + i1 = tl + } +} + +func BenchmarkConvI2E(b *testing.B) { + i2 = tm + for i := 0; i < b.N; i++ { + e = i2 + } +} + +func BenchmarkConvI2I(b *testing.B) { + i2 = tm + for i := 0; i < b.N; i++ { + i1 = i2 + } +} + +func BenchmarkAssertE2T(b *testing.B) { + e = tm + for i := 0; i < b.N; i++ { + tm = e.(TM) + } +} + +func BenchmarkAssertE2TLarge(b *testing.B) { + e = tl + for i := 0; i < b.N; i++ { + tl = e.(TL) + } +} + +func BenchmarkAssertE2I(b *testing.B) { + e = tm + for i := 0; i < b.N; i++ { + i1 = e.(I1) + } +} + +func BenchmarkAssertI2T(b *testing.B) { + i1 = tm + for i := 0; i < b.N; i++ { + tm = i1.(TM) + } +} + +func BenchmarkAssertI2I(b *testing.B) { + i1 = tm + for i := 0; i < b.N; i++ { + i2 = i1.(I2) + } +} + +func BenchmarkAssertI2E(b *testing.B) { + i1 = tm + for i := 0; i < b.N; i++ { + e = i1.(interface{}) + } +} + +func BenchmarkAssertE2E(b *testing.B) { + e = tm + for i := 0; i < b.N; i++ { + e_ = e + } +} diff --git a/src/pkg/runtime/lfstack.c b/src/pkg/runtime/lfstack.c new file mode 100644 index 000000000..1d48491aa --- /dev/null +++ b/src/pkg/runtime/lfstack.c @@ -0,0 +1,65 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Lock-free stack. + +#include "runtime.h" +#include "arch_GOARCH.h" + +#ifdef _64BIT +// Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag. +// So we use 17msb of pointers as ABA counter. +# define PTR_BITS 47 +#else +# define PTR_BITS 32 +#endif +#define PTR_MASK ((1ull<<PTR_BITS)-1) +#define CNT_MASK (0ull-1) + +void +runtime·lfstackpush(uint64 *head, LFNode *node) +{ + uint64 old, new; + + if((uintptr)node != ((uintptr)node&PTR_MASK)) { + runtime·printf("p=%p\n", node); + runtime·throw("runtime·lfstackpush: invalid pointer"); + } + + node->pushcnt++; + new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS); + old = runtime·atomicload64(head); + for(;;) { + node->next = (LFNode*)(uintptr)(old&PTR_MASK); + if(runtime·cas64(head, &old, new)) + break; + } +} + +LFNode* +runtime·lfstackpop(uint64 *head) +{ + LFNode *node, *node2; + uint64 old, new; + + old = runtime·atomicload64(head); + for(;;) { + if(old == 0) + return nil; + node = (LFNode*)(uintptr)(old&PTR_MASK); + node2 = runtime·atomicloadp(&node->next); + new = 0; + if(node2 != nil) + new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS); + if(runtime·cas64(head, &old, new)) + return node; + } +} + +void +runtime·lfstackpop2(uint64 *head, LFNode *node) +{ + node = runtime·lfstackpop(head); + FLUSH(&node); +} diff --git a/src/pkg/runtime/lfstack_test.go b/src/pkg/runtime/lfstack_test.go new file mode 100644 index 000000000..505aae605 --- /dev/null +++ b/src/pkg/runtime/lfstack_test.go @@ -0,0 +1,130 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "math/rand" + . "runtime" + "testing" + "unsafe" +) + +type MyNode struct { + LFNode + data int +} + +func fromMyNode(node *MyNode) *LFNode { + return (*LFNode)(unsafe.Pointer(node)) +} + +func toMyNode(node *LFNode) *MyNode { + return (*MyNode)(unsafe.Pointer(node)) +} + +func TestLFStack(t *testing.T) { + stack := new(uint64) + // Need to keep additional referenfces to nodes, the stack is not all that type-safe. + var nodes []*MyNode + + // Check the stack is initially empty. + if LFStackPop(stack) != nil { + t.Fatalf("stack is not empty") + } + + // Push one element. + node := &MyNode{data: 42} + nodes = append(nodes, node) + LFStackPush(stack, fromMyNode(node)) + + // Push another. + node = &MyNode{data: 43} + nodes = append(nodes, node) + LFStackPush(stack, fromMyNode(node)) + + // Pop one element. + node = toMyNode(LFStackPop(stack)) + if node == nil { + t.Fatalf("stack is empty") + } + if node.data != 43 { + t.Fatalf("no lifo") + } + + // Pop another. + node = toMyNode(LFStackPop(stack)) + if node == nil { + t.Fatalf("stack is empty") + } + if node.data != 42 { + t.Fatalf("no lifo") + } + + // Check the stack is empty again. + if LFStackPop(stack) != nil { + t.Fatalf("stack is not empty") + } + if *stack != 0 { + t.Fatalf("stack is not empty") + } +} + +func TestLFStackStress(t *testing.T) { + const K = 100 + P := 4 * GOMAXPROCS(-1) + N := 100000 + if testing.Short() { + N /= 10 + } + // Create 2 stacks. + stacks := [2]*uint64{new(uint64), new(uint64)} + // Need to keep additional referenfces to nodes, the stack is not all that type-safe. + var nodes []*MyNode + // Push K elements randomly onto the stacks. + sum := 0 + for i := 0; i < K; i++ { + sum += i + node := &MyNode{data: i} + nodes = append(nodes, node) + LFStackPush(stacks[i%2], fromMyNode(node)) + } + c := make(chan bool, P) + for p := 0; p < P; p++ { + go func() { + r := rand.New(rand.NewSource(rand.Int63())) + // Pop a node from a random stack, then push it onto a random stack. + for i := 0; i < N; i++ { + node := toMyNode(LFStackPop(stacks[r.Intn(2)])) + if node != nil { + LFStackPush(stacks[r.Intn(2)], fromMyNode(node)) + } + } + c <- true + }() + } + for i := 0; i < P; i++ { + <-c + } + // Pop all elements from both stacks, and verify that nothing lost. + sum2 := 0 + cnt := 0 + for i := 0; i < 2; i++ { + for { + node := toMyNode(LFStackPop(stacks[i])) + if node == nil { + break + } + cnt++ + sum2 += node.data + node.Next = nil + } + } + if cnt != K { + t.Fatalf("Wrong number of nodes %d/%d", cnt, K) + } + if sum2 != sum { + t.Fatalf("Wrong sum %d/%d", sum2, sum) + } +} diff --git a/src/pkg/runtime/lock_futex.c b/src/pkg/runtime/lock_futex.c index b4465bff1..9b1f5f6db 100644 --- a/src/pkg/runtime/lock_futex.c +++ b/src/pkg/runtime/lock_futex.c @@ -111,7 +111,8 @@ runtime·noteclear(Note *n) void runtime·notewakeup(Note *n) { - runtime·xchg(&n->key, 1); + if(runtime·xchg(&n->key, 1)) + runtime·throw("notewakeup - double wakeup"); runtime·futexwakeup(&n->key, 1); } diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index 9ae3a9d61..ac131b3af 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -9,17 +9,18 @@ package runtime #include "runtime.h" #include "arch_GOARCH.h" -#include "stack.h" #include "malloc.h" -#include "defs_GOOS_GOARCH.h" #include "type.h" +#include "typekind.h" +#include "race.h" -#pragma dataflag 16 /* mark mheap as 'no pointers', hiding from garbage collector */ -MHeap runtime·mheap; +MHeap *runtime·mheap; -extern MStats mstats; // defined in extern.go +int32 runtime·checking; -extern volatile int32 runtime·MemProfileRate; +extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go + +extern volatile intgo runtime·MemProfileRate; // Allocate an object of at least size bytes. // Small objects are allocated from the per-thread cache's free lists. @@ -27,7 +28,8 @@ extern volatile int32 runtime·MemProfileRate; void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) { - int32 sizeclass, rate; + int32 sizeclass; + intgo rate; MCache *c; uintptr npages; MSpan *s; @@ -41,6 +43,9 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) if(size == 0) size = 1; + if(DebugTypeAtBlockEnd) + size += sizeof(uintptr); + c = m->mcache; c->local_nmalloc++; if(size <= MaxSmallSize) { @@ -60,7 +65,7 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) npages = size >> PageShift; if((size & PageMask) != 0) npages++; - s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1); + s = runtime·MHeap_Alloc(runtime·mheap, npages, 0, 1, zeroed); if(s == nil) runtime·throw("out of memory"); size = npages<<PageShift; @@ -71,9 +76,20 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) // setup for mark sweep runtime·markspan(v, 0, 0, true); } + + if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) { + // purge cache stats to prevent overflow + runtime·lock(runtime·mheap); + runtime·purgecachedstats(c); + runtime·unlock(runtime·mheap); + } + if(!(flag & FlagNoGC)) runtime·markallocated(v, size, (flag&FlagNoPointers) != 0); + if(DebugTypeAtBlockEnd) + *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0; + m->mallocing = 0; if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) { @@ -95,6 +111,11 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) if(dogc && mstats.heap_alloc >= mstats.next_gc) runtime·gc(0); + + if(raceenabled) { + runtime·racemalloc(v, size, m->racepc); + m->racepc = nil; + } return v; } @@ -130,6 +151,9 @@ runtime·free(void *v) } prof = runtime·blockspecial(v); + if(raceenabled) + runtime·racefree(v); + // Find size class for v. sizeclass = s->sizeclass; c = m->mcache; @@ -141,7 +165,7 @@ runtime·free(void *v) // they might coalesce v into other spans and change the bitmap further. runtime·markfreed(v, size); runtime·unmarkspan(v, 1<<PageShift); - runtime·MHeap_Free(&runtime·mheap, s, 1); + runtime·MHeap_Free(runtime·mheap, s, 1); } else { // Small object. size = runtime·class_to_size[sizeclass]; @@ -169,7 +193,14 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) MSpan *s; m->mcache->local_nlookup++; - s = runtime·MHeap_LookupMaybe(&runtime·mheap, v); + if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) { + // purge cache stats to prevent overflow + runtime·lock(runtime·mheap); + runtime·purgecachedstats(m->mcache); + runtime·unlock(runtime·mheap); + } + + s = runtime·MHeap_LookupMaybe(runtime·mheap, v); if(sp) *sp = s; if(s == nil) { @@ -196,7 +227,7 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) return 0; } - n = runtime·class_to_size[s->sizeclass]; + n = s->elemsize; if(base) { i = ((byte*)v - p)/n; *base = p + i*n; @@ -210,14 +241,15 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) MCache* runtime·allocmcache(void) { - int32 rate; + intgo rate; MCache *c; - runtime·lock(&runtime·mheap); - c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc); - mstats.mcache_inuse = runtime·mheap.cachealloc.inuse; - mstats.mcache_sys = runtime·mheap.cachealloc.sys; - runtime·unlock(&runtime·mheap); + runtime·lock(runtime·mheap); + c = runtime·FixAlloc_Alloc(&runtime·mheap->cachealloc); + mstats.mcache_inuse = runtime·mheap->cachealloc.inuse; + mstats.mcache_sys = runtime·mheap->cachealloc.sys; + runtime·unlock(runtime·mheap); + runtime·memclr((byte*)c, sizeof(*c)); // Set first allocation sample size. rate = runtime·MemProfileRate; @@ -230,12 +262,19 @@ runtime·allocmcache(void) } void -runtime·purgecachedstats(M* m) +runtime·freemcache(MCache *c) { - MCache *c; + runtime·MCache_ReleaseAll(c); + runtime·lock(runtime·mheap); + runtime·purgecachedstats(c); + runtime·FixAlloc_Free(&runtime·mheap->cachealloc, c); + runtime·unlock(runtime·mheap); +} +void +runtime·purgecachedstats(MCache *c) +{ // Protected by either heap or GC lock. - c = m->mcache; mstats.heap_alloc += c->local_cachealloc; c->local_cachealloc = 0; mstats.heap_objects += c->local_objects; @@ -274,6 +313,9 @@ runtime·mallocinit(void) USED(arena_size); USED(bitmap_size); + if((runtime·mheap = runtime·SysAlloc(sizeof(*runtime·mheap))) == nil) + runtime·throw("runtime: cannot allocate heap metadata"); + runtime·InitSizes(); limit = runtime·memlimit(); @@ -283,32 +325,30 @@ runtime·mallocinit(void) // enough to hold 4 bits per allocated word. if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) { // On a 64-bit machine, allocate from a single contiguous reservation. - // 16 GB should be big enough for now. + // 128 GB (MaxMem) should be big enough for now. // // The code will work with the reservation at any address, but ask - // SysReserve to use 0x000000f800000000 if possible. - // Allocating a 16 GB region takes away 36 bits, and the amd64 + // SysReserve to use 0x000000c000000000 if possible. + // Allocating a 128 GB region takes away 37 bits, and the amd64 // doesn't let us choose the top 17 bits, so that leaves the 11 bits - // in the middle of 0x00f8 for us to choose. Choosing 0x00f8 means - // that the valid memory addresses will begin 0x00f8, 0x00f9, 0x00fa, 0x00fb. - // None of the bytes f8 f9 fa fb can appear in valid UTF-8, and - // they are otherwise as far from ff (likely a common byte) as possible. - // Choosing 0x00 for the leading 6 bits was more arbitrary, but it - // is not a common ASCII code point either. Using 0x11f8 instead + // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means + // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x0x00df. + // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid + // UTF-8 sequences, and they are otherwise as far away from + // ff (likely a common byte) as possible. An earlier attempt to use 0x11f8 // caused out of memory errors on OS X during thread allocations. // These choices are both for debuggability and to reduce the // odds of the conservative garbage collector not collecting memory // because some non-pointer block of memory had a bit pattern // that matched a memory address. // - // Actually we reserve 17 GB (because the bitmap ends up being 1 GB) - // but it hardly matters: fc is not valid UTF-8 either, and we have to - // allocate 15 GB before we get that far. + // Actually we reserve 136 GB (because the bitmap ends up being 8 GB) + // but it hardly matters: e0 00 is not valid UTF-8 either. // // If this fails we fall back to the 32 bit memory mechanism - arena_size = 16LL<<30; + arena_size = MaxMem; bitmap_size = arena_size / (sizeof(void*)*8/4); - p = runtime·SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size); + p = runtime·SysReserve((void*)(0x00c0ULL<<32), bitmap_size + arena_size); } if (p == nil) { // On a 32-bit machine, we can't typically get away @@ -354,13 +394,13 @@ runtime·mallocinit(void) if((uintptr)p & (((uintptr)1<<PageShift)-1)) runtime·throw("runtime: SysReserve returned unaligned address"); - runtime·mheap.bitmap = p; - runtime·mheap.arena_start = p + bitmap_size; - runtime·mheap.arena_used = runtime·mheap.arena_start; - runtime·mheap.arena_end = runtime·mheap.arena_start + arena_size; + runtime·mheap->bitmap = p; + runtime·mheap->arena_start = p + bitmap_size; + runtime·mheap->arena_used = runtime·mheap->arena_start; + runtime·mheap->arena_end = runtime·mheap->arena_start + arena_size; // Initialize the rest of the allocator. - runtime·MHeap_Init(&runtime·mheap, runtime·SysAlloc); + runtime·MHeap_Init(runtime·mheap, runtime·SysAlloc); m->mcache = runtime·allocmcache(); // See if it works. @@ -394,6 +434,8 @@ runtime·MHeap_SysAlloc(MHeap *h, uintptr n) runtime·SysMap(p, n); h->arena_used += n; runtime·MHeap_MapBits(h); + if(raceenabled) + runtime·racemapshadow(p, n); return p; } @@ -420,11 +462,231 @@ runtime·MHeap_SysAlloc(MHeap *h, uintptr n) if(h->arena_used > h->arena_end) h->arena_end = h->arena_used; runtime·MHeap_MapBits(h); + if(raceenabled) + runtime·racemapshadow(p, n); } return p; } +static Lock settype_lock; + +void +runtime·settype_flush(M *mp, bool sysalloc) +{ + uintptr *buf, *endbuf; + uintptr size, ofs, j, t; + uintptr ntypes, nbytes2, nbytes3; + uintptr *data2; + byte *data3; + bool sysalloc3; + void *v; + uintptr typ, p; + MSpan *s; + + buf = mp->settype_buf; + endbuf = buf + mp->settype_bufsize; + + runtime·lock(&settype_lock); + while(buf < endbuf) { + v = (void*)*buf; + *buf = 0; + buf++; + typ = *buf; + buf++; + + // (Manually inlined copy of runtime·MHeap_Lookup) + p = (uintptr)v>>PageShift; + if(sizeof(void*) == 8) + p -= (uintptr)runtime·mheap->arena_start >> PageShift; + s = runtime·mheap->map[p]; + + if(s->sizeclass == 0) { + s->types.compression = MTypes_Single; + s->types.data = typ; + continue; + } + + size = s->elemsize; + ofs = ((uintptr)v - (s->start<<PageShift)) / size; + + switch(s->types.compression) { + case MTypes_Empty: + ntypes = (s->npages << PageShift) / size; + nbytes3 = 8*sizeof(uintptr) + 1*ntypes; + + if(!sysalloc) { + data3 = runtime·mallocgc(nbytes3, FlagNoPointers, 0, 1); + } else { + data3 = runtime·SysAlloc(nbytes3); + if(data3 == nil) + runtime·throw("runtime: cannot allocate memory"); + if(0) runtime·printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3); + } + + s->types.compression = MTypes_Bytes; + s->types.sysalloc = sysalloc; + s->types.data = (uintptr)data3; + + ((uintptr*)data3)[1] = typ; + data3[8*sizeof(uintptr) + ofs] = 1; + break; + + case MTypes_Words: + ((uintptr*)s->types.data)[ofs] = typ; + break; + + case MTypes_Bytes: + data3 = (byte*)s->types.data; + for(j=1; j<8; j++) { + if(((uintptr*)data3)[j] == typ) { + break; + } + if(((uintptr*)data3)[j] == 0) { + ((uintptr*)data3)[j] = typ; + break; + } + } + if(j < 8) { + data3[8*sizeof(uintptr) + ofs] = j; + } else { + ntypes = (s->npages << PageShift) / size; + nbytes2 = ntypes * sizeof(uintptr); + + if(!sysalloc) { + data2 = runtime·mallocgc(nbytes2, FlagNoPointers, 0, 1); + } else { + data2 = runtime·SysAlloc(nbytes2); + if(data2 == nil) + runtime·throw("runtime: cannot allocate memory"); + if(0) runtime·printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2); + } + + sysalloc3 = s->types.sysalloc; + + s->types.compression = MTypes_Words; + s->types.sysalloc = sysalloc; + s->types.data = (uintptr)data2; + + // Move the contents of data3 to data2. Then deallocate data3. + for(j=0; j<ntypes; j++) { + t = data3[8*sizeof(uintptr) + j]; + t = ((uintptr*)data3)[t]; + data2[j] = t; + } + if(sysalloc3) { + nbytes3 = 8*sizeof(uintptr) + 1*ntypes; + if(0) runtime·printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3); + runtime·SysFree(data3, nbytes3); + } + + data2[ofs] = typ; + } + break; + } + } + runtime·unlock(&settype_lock); + + mp->settype_bufsize = 0; +} + +// It is forbidden to use this function if it is possible that +// explicit deallocation via calling runtime·free(v) may happen. +void +runtime·settype(void *v, uintptr t) +{ + M *mp; + uintptr *buf; + uintptr i; + MSpan *s; + + if(t == 0) + runtime·throw("settype: zero type"); + + mp = m; + buf = mp->settype_buf; + i = mp->settype_bufsize; + buf[i+0] = (uintptr)v; + buf[i+1] = t; + i += 2; + mp->settype_bufsize = i; + + if(i == nelem(mp->settype_buf)) { + runtime·settype_flush(mp, false); + } + + if(DebugTypeAtBlockEnd) { + s = runtime·MHeap_Lookup(runtime·mheap, v); + *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t; + } +} + +void +runtime·settype_sysfree(MSpan *s) +{ + uintptr ntypes, nbytes; + + if(!s->types.sysalloc) + return; + + nbytes = (uintptr)-1; + + switch (s->types.compression) { + case MTypes_Words: + ntypes = (s->npages << PageShift) / s->elemsize; + nbytes = ntypes * sizeof(uintptr); + break; + case MTypes_Bytes: + ntypes = (s->npages << PageShift) / s->elemsize; + nbytes = 8*sizeof(uintptr) + 1*ntypes; + break; + } + + if(nbytes != (uintptr)-1) { + if(0) runtime·printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes); + runtime·SysFree((void*)s->types.data, nbytes); + } +} + +uintptr +runtime·gettype(void *v) +{ + MSpan *s; + uintptr t, ofs; + byte *data; + + s = runtime·MHeap_LookupMaybe(runtime·mheap, v); + if(s != nil) { + t = 0; + switch(s->types.compression) { + case MTypes_Empty: + break; + case MTypes_Single: + t = s->types.data; + break; + case MTypes_Words: + ofs = (uintptr)v - (s->start<<PageShift); + t = ((uintptr*)s->types.data)[ofs/s->elemsize]; + break; + case MTypes_Bytes: + ofs = (uintptr)v - (s->start<<PageShift); + data = (byte*)s->types.data; + t = data[8*sizeof(uintptr) + ofs/s->elemsize]; + t = ((uintptr*)data)[t]; + break; + default: + runtime·throw("runtime·gettype: invalid compression kind"); + } + if(0) { + runtime·lock(&settype_lock); + runtime·printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t); + runtime·unlock(&settype_lock); + } + return t; + } + return 0; +} + // Runtime stubs. void* @@ -433,46 +695,63 @@ runtime·mal(uintptr n) return runtime·mallocgc(n, 0, 1, 1); } -func new(typ *Type) (ret *uint8) { - uint32 flag = typ->kind&KindNoPointers ? FlagNoPointers : 0; - ret = runtime·mallocgc(typ->size, flag, 1, 1); +#pragma textflag 7 +void +runtime·new(Type *typ, uint8 *ret) +{ + uint32 flag; + + if(raceenabled) + m->racepc = runtime·getcallerpc(&typ); + + if(typ->size == 0) { + // All 0-length allocations use this pointer. + // The language does not require the allocations to + // have distinct values. + ret = (uint8*)&runtime·zerobase; + } else { + flag = typ->kind&KindNoPointers ? FlagNoPointers : 0; + ret = runtime·mallocgc(typ->size, flag, 1, 1); + + if(UseSpanType && !flag) { + if(false) { + runtime·printf("new %S: %p\n", *typ->string, ret); + } + runtime·settype(ret, (uintptr)typ | TypeInfo_SingleObject); + } + } + FLUSH(&ret); } +// same as runtime·new, but callable from C void* -runtime·stackalloc(uint32 n) +runtime·cnew(Type *typ) { - // Stackalloc must be called on scheduler stack, so that we - // never try to grow the stack during the code that stackalloc runs. - // Doing so would cause a deadlock (issue 1547). - if(g != m->g0) - runtime·throw("stackalloc not on scheduler stack"); - - // Stack allocator uses malloc/free most of the time, - // but if we're in the middle of malloc and need stack, - // we have to do something else to avoid deadlock. - // In that case, we fall back on a fixed-size free-list - // allocator, assuming that inside malloc all the stack - // frames are small, so that all the stack allocations - // will be a single size, the minimum (right now, 5k). - if(m->mallocing || m->gcing || n == FixedStack) { - if(n != FixedStack) { - runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n); - runtime·throw("stackalloc"); + uint32 flag; + void *ret; + + if(raceenabled) + m->racepc = runtime·getcallerpc(&typ); + + if(typ->size == 0) { + // All 0-length allocations use this pointer. + // The language does not require the allocations to + // have distinct values. + ret = (uint8*)&runtime·zerobase; + } else { + flag = typ->kind&KindNoPointers ? FlagNoPointers : 0; + ret = runtime·mallocgc(typ->size, flag, 1, 1); + + if(UseSpanType && !flag) { + if(false) { + runtime·printf("new %S: %p\n", *typ->string, ret); + } + runtime·settype(ret, (uintptr)typ | TypeInfo_SingleObject); } - return runtime·FixAlloc_Alloc(m->stackalloc); } - return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0); -} -void -runtime·stackfree(void *v, uintptr n) -{ - if(m->mallocing || m->gcing || n == FixedStack) { - runtime·FixAlloc_Free(m->stackalloc, v); - return; - } - runtime·free(v); + return ret; } func GC() { @@ -483,7 +762,8 @@ func SetFinalizer(obj Eface, finalizer Eface) { byte *base; uintptr size; FuncType *ft; - int32 i, nret; + int32 i; + uintptr nret; Type *t; if(obj.type == nil) { diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index d846f6810..38122bf8a 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -85,6 +85,8 @@ typedef struct MHeap MHeap; typedef struct MSpan MSpan; typedef struct MStats MStats; typedef struct MLink MLink; +typedef struct MTypes MTypes; +typedef struct GCStats GCStats; enum { @@ -113,21 +115,30 @@ enum HeapAllocChunk = 1<<20, // Chunk size for heap growth // Number of bits in page to span calculations (4k pages). - // On 64-bit, we limit the arena to 16G, so 22 bits suffices. - // On 32-bit, we don't bother limiting anything: 20 bits for 4G. + // On 64-bit, we limit the arena to 128GB, or 37 bits. + // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. #ifdef _64BIT - MHeapMap_Bits = 22, + MHeapMap_Bits = 37 - PageShift, #else - MHeapMap_Bits = 20, + MHeapMap_Bits = 32 - PageShift, #endif // Max number of threads to run garbage collection. // 2, 3, and 4 are all plausible maximums depending // on the hardware details of the machine. The garbage - // collector scales well to 4 cpus. - MaxGcproc = 4, + // collector scales well to 8 cpus. + MaxGcproc = 8, }; +// Maximum memory allocation size, a hint for callers. +// This must be a #define instead of an enum because it +// is so large. +#ifdef _64BIT +#define MaxMem (1ULL<<(MHeapMap_Bits+PageShift)) /* 128 GB */ +#else +#define MaxMem ((uintptr)-1) +#endif + // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) struct MLink { @@ -188,7 +199,7 @@ void runtime·FixAlloc_Free(FixAlloc *f, void *p); // Statistics. -// Shared with Go: if you edit this structure, also edit extern.go. +// Shared with Go: if you edit this structure, also edit type MemStats in mem.go. struct MStats { // General statistics. @@ -219,7 +230,7 @@ struct MStats uint64 buckhash_sys; // profiling bucket hash table // Statistics about garbage collector. - // Protected by stopping the world during GC. + // Protected by mheap or stopping the world during GC. uint64 next_gc; // next GC (in heap_alloc time) uint64 last_gc; // last GC (in absolute time) uint64 pause_total_ns; @@ -239,7 +250,6 @@ struct MStats #define mstats runtime·memStats /* name shared with Go */ extern MStats mstats; - // Size classes. Computed and initialized by InitSizes. // // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, @@ -273,19 +283,19 @@ struct MCacheList struct MCache { MCacheList list[NumSizeClasses]; - uint64 size; - int64 local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap - int64 local_objects; // objects allocated (or freed) from cache since last lock of heap - int64 local_alloc; // bytes allocated (or freed) since last lock of heap - int64 local_total_alloc; // bytes allocated (even if freed) since last lock of heap - int64 local_nmalloc; // number of mallocs since last lock of heap - int64 local_nfree; // number of frees since last lock of heap - int64 local_nlookup; // number of pointer lookups since last lock of heap + uintptr size; + intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap + intptr local_objects; // objects allocated (or freed) from cache since last lock of heap + intptr local_alloc; // bytes allocated (or freed) since last lock of heap + uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap + uintptr local_nmalloc; // number of mallocs since last lock of heap + uintptr local_nfree; // number of frees since last lock of heap + uintptr local_nlookup; // number of pointer lookups since last lock of heap int32 next_sample; // trigger heap sample after allocating this many bytes // Statistics about allocation size classes since last lock of heap struct { - int64 nmalloc; - int64 nfree; + uintptr nmalloc; + uintptr nfree; } local_by_size[NumSizeClasses]; }; @@ -294,6 +304,44 @@ void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zero void runtime·MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size); void runtime·MCache_ReleaseAll(MCache *c); +// MTypes describes the types of blocks allocated within a span. +// The compression field describes the layout of the data. +// +// MTypes_Empty: +// All blocks are free, or no type information is available for +// allocated blocks. +// The data field has no meaning. +// MTypes_Single: +// The span contains just one block. +// The data field holds the type information. +// The sysalloc field has no meaning. +// MTypes_Words: +// The span contains multiple blocks. +// The data field points to an array of type [NumBlocks]uintptr, +// and each element of the array holds the type of the corresponding +// block. +// MTypes_Bytes: +// The span contains at most seven different types of blocks. +// The data field points to the following structure: +// struct { +// type [8]uintptr // type[0] is always 0 +// index [NumBlocks]byte +// } +// The type of the i-th block is: data.type[data.index[i]] +enum +{ + MTypes_Empty = 0, + MTypes_Single = 1, + MTypes_Words = 2, + MTypes_Bytes = 3, +}; +struct MTypes +{ + byte compression; // one of MTypes_* + bool sysalloc; // whether (void*)data is from runtime·SysAlloc + uintptr data; +}; + // An MSpan is a run of pages. enum { @@ -306,16 +354,17 @@ struct MSpan { MSpan *next; // in a span linked list MSpan *prev; // in a span linked list - MSpan *allnext; // in the list of all spans PageID start; // starting page number uintptr npages; // number of pages in span MLink *freelist; // list of free objects uint32 ref; // number of allocated objects in this span - uint32 sizeclass; // size class + int32 sizeclass; // size class + uintptr elemsize; // computed from sizeclass or from npages uint32 state; // MSpanInUse etc int64 unusedsince; // First time spotted by GC in MSpanFree state uintptr npreleased; // number of pages released to the OS byte *limit; // end of data in span + MTypes types; // types of allocated objects in this span }; void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); @@ -342,6 +391,7 @@ struct MCentral void runtime·MCentral_Init(MCentral *c, int32 sizeclass); int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first); void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first); +void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); // Main malloc heap. // The heap itself is the "free[]" and "large" arrays, @@ -351,7 +401,9 @@ struct MHeap Lock; MSpan free[MaxMHeapList]; // free lists of given length MSpan large; // free lists length >= MaxMHeapList - MSpan *allspans; + MSpan **allspans; + uint32 nspan; + uint32 nspancap; // span lookup MSpan *map[1<<MHeapMap_Bits]; @@ -375,10 +427,10 @@ struct MHeap FixAlloc spanalloc; // allocator for Span* FixAlloc cachealloc; // allocator for MCache* }; -extern MHeap runtime·mheap; +extern MHeap *runtime·mheap; void runtime·MHeap_Init(MHeap *h, void *(*allocator)(uintptr)); -MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct); +MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed); void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); @@ -394,12 +446,18 @@ void runtime·markallocated(void *v, uintptr n, bool noptr); void runtime·checkallocated(void *v, uintptr n); void runtime·markfreed(void *v, uintptr n); void runtime·checkfreed(void *v, uintptr n); -int32 runtime·checking; +extern int32 runtime·checking; void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); void runtime·unmarkspan(void *v, uintptr size); bool runtime·blockspecial(void*); void runtime·setblockspecial(void*, bool); -void runtime·purgecachedstats(M*); +void runtime·purgecachedstats(MCache*); +void* runtime·cnew(Type*); + +void runtime·settype(void*, uintptr); +void runtime·settype_flush(M*, bool); +void runtime·settype_sysfree(MSpan*); +uintptr runtime·gettype(void*); enum { @@ -412,8 +470,26 @@ enum void runtime·MProf_Malloc(void*, uintptr); void runtime·MProf_Free(void*, uintptr); void runtime·MProf_GC(void); -int32 runtime·helpgc(bool*); +int32 runtime·gcprocs(void); +void runtime·helpgc(int32 nproc); void runtime·gchelper(void); -bool runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret); +bool runtime·getfinalizer(void *p, bool del, FuncVal **fn, uintptr *nret); void runtime·walkfintab(void (*fn)(void*)); + +enum +{ + TypeInfo_SingleObject = 0, + TypeInfo_Array = 1, + TypeInfo_Map = 2, + TypeInfo_Chan = 3, + + // Enables type information at the end of blocks allocated from heap + DebugTypeAtBlockEnd = 0, +}; + +// defined in mgc0.go +void runtime·gc_m_ptr(Eface*); +void runtime·gc_itab_ptr(Eface*); + +void runtime·memorydump(void); diff --git a/src/pkg/runtime/mallocrep1.go b/src/pkg/runtime/mallocrep1.go index 41c104c0b..bc33e3a6b 100644 --- a/src/pkg/runtime/mallocrep1.go +++ b/src/pkg/runtime/mallocrep1.go @@ -39,6 +39,7 @@ func OkAmount(size, n uintptr) bool { } func AllocAndFree(size, count int) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) if *chatty { fmt.Printf("size=%d count=%d ...\n", size, count) } diff --git a/src/pkg/runtime/mcache.c b/src/pkg/runtime/mcache.c index 518e00c12..64803e703 100644 --- a/src/pkg/runtime/mcache.c +++ b/src/pkg/runtime/mcache.c @@ -21,7 +21,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) l = &c->list[sizeclass]; if(l->list == nil) { // Replenish using central lists. - n = runtime·MCentral_AllocList(&runtime·mheap.central[sizeclass], + n = runtime·MCentral_AllocList(&runtime·mheap->central[sizeclass], runtime·class_to_transfercount[sizeclass], &first); if(n == 0) runtime·throw("out of memory"); @@ -43,11 +43,6 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) // block is zeroed iff second word is zero ... if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0) runtime·memclr((byte*)v, size); - else { - // ... except for the link pointer - // that we used above; zero that. - v->next = nil; - } } c->local_cachealloc += size; c->local_objects++; @@ -74,7 +69,7 @@ ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass) c->size -= n*runtime·class_to_size[sizeclass]; // Return them to central free list. - runtime·MCentral_FreeList(&runtime·mheap.central[sizeclass], n, first); + runtime·MCentral_FreeList(&runtime·mheap->central[sizeclass], n, first); } void diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c index ff0c2d11a..ac8b5aa0d 100644 --- a/src/pkg/runtime/mcentral.c +++ b/src/pkg/runtime/mcentral.c @@ -34,12 +34,13 @@ runtime·MCentral_Init(MCentral *c, int32 sizeclass) // Allocate up to n objects from the central free list. // Return the number of objects allocated. // The objects are linked together by their first words. -// On return, *pstart points at the first object and *pend at the last. +// On return, *pstart points at the first object. int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) { - MLink *first, *last, *v; - int32 i; + MSpan *s; + MLink *first, *last; + int32 cap, avail, i; runtime·lock(c); // Replenish central list if empty. @@ -50,47 +51,37 @@ runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) return 0; } } + s = c->nonempty.next; + cap = (s->npages << PageShift) / s->elemsize; + avail = cap - s->ref; + if(avail < n) + n = avail; - // Copy from list, up to n. // First one is guaranteed to work, because we just grew the list. - first = MCentral_Alloc(c); + first = s->freelist; last = first; - for(i=1; i<n && (v = MCentral_Alloc(c)) != nil; i++) { - last->next = v; - last = v; + for(i=1; i<n; i++) { + last = last->next; } + s->freelist = last->next; last->next = nil; - c->nfree -= i; - - runtime·unlock(c); - *pfirst = first; - return i; -} + s->ref += n; + c->nfree -= n; -// Helper: allocate one object from the central free list. -static void* -MCentral_Alloc(MCentral *c) -{ - MSpan *s; - MLink *v; - - if(runtime·MSpanList_IsEmpty(&c->nonempty)) - return nil; - s = c->nonempty.next; - s->ref++; - v = s->freelist; - s->freelist = v->next; - if(s->freelist == nil) { + if(n == avail) { + if(s->freelist != nil || s->ref != cap) { + runtime·throw("invalid freelist"); + } runtime·MSpanList_Remove(s); runtime·MSpanList_Insert(&c->empty, s); } - return v; + + runtime·unlock(c); + *pfirst = first; + return n; } // Free n objects back into the central free list. -// Return the number of objects allocated. -// The objects are linked together by their first words. -// On return, *pstart points at the first object and *pend at the last. void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *start) { @@ -118,7 +109,7 @@ MCentral_Free(MCentral *c, void *v) int32 size; // Find span for v. - s = runtime·MHeap_Lookup(&runtime·mheap, v); + s = runtime·MHeap_Lookup(runtime·mheap, v); if(s == nil || s->ref == 0) runtime·throw("invalid free"); @@ -143,11 +134,47 @@ MCentral_Free(MCentral *c, void *v) s->freelist = nil; c->nfree -= (s->npages << PageShift) / size; runtime·unlock(c); - runtime·MHeap_Free(&runtime·mheap, s, 0); + runtime·MHeap_Free(runtime·mheap, s, 0); runtime·lock(c); } } +// Free n objects from a span s back into the central free list c. +// Called from GC. +void +runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end) +{ + int32 size; + + runtime·lock(c); + + // Move to nonempty if necessary. + if(s->freelist == nil) { + runtime·MSpanList_Remove(s); + runtime·MSpanList_Insert(&c->nonempty, s); + } + + // Add the objects back to s's free list. + end->next = s->freelist; + s->freelist = start; + s->ref -= n; + c->nfree += n; + + // If s is completely freed, return it to the heap. + if(s->ref == 0) { + size = runtime·class_to_size[c->sizeclass]; + runtime·MSpanList_Remove(s); + *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing + s->freelist = nil; + c->nfree -= (s->npages << PageShift) / size; + runtime·unlock(c); + runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); + runtime·MHeap_Free(runtime·mheap, s, 0); + } else { + runtime·unlock(c); + } +} + void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj) { @@ -174,7 +201,7 @@ MCentral_Grow(MCentral *c) runtime·unlock(c); runtime·MGetSizeClassInfo(c->sizeclass, &size, &npages, &n); - s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0); + s = runtime·MHeap_Alloc(runtime·mheap, npages, c->sizeclass, 0, 1); if(s == nil) { // TODO(rsc): Log out of memory runtime·lock(c); diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go index 76680086c..79edc5a60 100644 --- a/src/pkg/runtime/mem.go +++ b/src/pkg/runtime/mem.go @@ -6,6 +6,9 @@ package runtime import "unsafe" +// Note: the MemStats struct should be kept in sync with +// struct MStats in malloc.h + // A MemStats records statistics about the memory allocator. type MemStats struct { // General statistics. @@ -39,7 +42,7 @@ type MemStats struct { NextGC uint64 // next run in HeapAlloc time (bytes) LastGC uint64 // last run in absolute time (ns) PauseTotalNs uint64 - PauseNs [256]uint64 // most recent GC pause times + PauseNs [256]uint64 // circular buffer of recent GC pause times, most recent at [(NumGC+255)%256] NumGC uint32 EnableGC bool DebugGC bool diff --git a/src/pkg/runtime/mem_darwin.c b/src/pkg/runtime/mem_darwin.c index cde5601cf..04e719394 100644 --- a/src/pkg/runtime/mem_darwin.c +++ b/src/pkg/runtime/mem_darwin.c @@ -14,7 +14,7 @@ runtime·SysAlloc(uintptr n) void *v; mstats.sys += n; - v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(v < (void*)4096) return nil; return v; @@ -51,7 +51,7 @@ runtime·SysMap(void *v, uintptr n) void *p; mstats.sys += n; - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) diff --git a/src/pkg/runtime/mem_freebsd.c b/src/pkg/runtime/mem_freebsd.c index d1c22583d..f217e9db1 100644 --- a/src/pkg/runtime/mem_freebsd.c +++ b/src/pkg/runtime/mem_freebsd.c @@ -14,7 +14,7 @@ runtime·SysAlloc(uintptr n) void *v; mstats.sys += n; - v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(v < (void*)4096) return nil; return v; @@ -23,9 +23,7 @@ runtime·SysAlloc(uintptr n) void runtime·SysUnused(void *v, uintptr n) { - USED(v); - USED(n); - // TODO(rsc): call madvise MADV_DONTNEED + runtime·madvise(v, n, MADV_FREE); } void @@ -61,7 +59,7 @@ runtime·SysMap(void *v, uintptr n) // On 64-bit, we don't actually have v reserved, so tread carefully. if(sizeof(void*) == 8) { - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) { @@ -71,7 +69,7 @@ runtime·SysMap(void *v, uintptr n) return; } - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) diff --git a/src/pkg/runtime/mem_linux.c b/src/pkg/runtime/mem_linux.c index b3e79cc41..ebcec1e86 100644 --- a/src/pkg/runtime/mem_linux.c +++ b/src/pkg/runtime/mem_linux.c @@ -10,6 +10,7 @@ enum { + EAGAIN = 11, ENOMEM = 12, _PAGE_SIZE = 4096, }; @@ -56,13 +57,17 @@ runtime·SysAlloc(uintptr n) void *p; mstats.sys += n; - p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(p < (void*)4096) { if(p == (void*)EACCES) { runtime·printf("runtime: mmap: access denied\n"); runtime·printf("if you're running SELinux, enable execmem for this process.\n"); runtime·exit(2); } + if(p == (void*)EAGAIN) { + runtime·printf("runtime: mmap: too much locked memory (check 'ulimit -l').\n"); + runtime·exit(2); + } return nil; } return p; @@ -113,7 +118,7 @@ runtime·SysMap(void *v, uintptr n) // On 64-bit, we don't actually have v reserved, so tread carefully. if(sizeof(void*) == 8 && (uintptr)v >= 0xffffffffU) { - p = mmap_fixed(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(p == (void*)ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) { @@ -123,7 +128,7 @@ runtime·SysMap(void *v, uintptr n) return; } - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p == (void*)ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) diff --git a/src/pkg/runtime/mem_netbsd.c b/src/pkg/runtime/mem_netbsd.c index 34ff31d90..77ce04c4e 100644 --- a/src/pkg/runtime/mem_netbsd.c +++ b/src/pkg/runtime/mem_netbsd.c @@ -19,7 +19,7 @@ runtime·SysAlloc(uintptr n) void *v; mstats.sys += n; - v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(v < (void*)4096) return nil; return v; @@ -28,9 +28,7 @@ runtime·SysAlloc(uintptr n) void runtime·SysUnused(void *v, uintptr n) { - USED(v); - USED(n); - // TODO(rsc): call madvise MADV_DONTNEED + runtime·madvise(v, n, MADV_FREE); } void @@ -67,7 +65,7 @@ runtime·SysMap(void *v, uintptr n) // On 64-bit, we don't actually have v reserved, so tread carefully. if(sizeof(void*) == 8) { - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) { @@ -77,7 +75,7 @@ runtime·SysMap(void *v, uintptr n) return; } - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) diff --git a/src/pkg/runtime/mem_openbsd.c b/src/pkg/runtime/mem_openbsd.c index 34ff31d90..77ce04c4e 100644 --- a/src/pkg/runtime/mem_openbsd.c +++ b/src/pkg/runtime/mem_openbsd.c @@ -19,7 +19,7 @@ runtime·SysAlloc(uintptr n) void *v; mstats.sys += n; - v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(v < (void*)4096) return nil; return v; @@ -28,9 +28,7 @@ runtime·SysAlloc(uintptr n) void runtime·SysUnused(void *v, uintptr n) { - USED(v); - USED(n); - // TODO(rsc): call madvise MADV_DONTNEED + runtime·madvise(v, n, MADV_FREE); } void @@ -67,7 +65,7 @@ runtime·SysMap(void *v, uintptr n) // On 64-bit, we don't actually have v reserved, so tread carefully. if(sizeof(void*) == 8) { - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) { @@ -77,7 +75,7 @@ runtime·SysMap(void *v, uintptr n) return; } - p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p == (void*)-ENOMEM) runtime·throw("runtime: out of memory"); if(p != v) diff --git a/src/pkg/runtime/mem_plan9.c b/src/pkg/runtime/mem_plan9.c index 15cbc176b..26ca367f1 100644 --- a/src/pkg/runtime/mem_plan9.c +++ b/src/pkg/runtime/mem_plan9.c @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "defs_GOOS_GOARCH.h" #include "arch_GOARCH.h" #include "malloc.h" #include "os_GOOS.h" @@ -13,14 +14,14 @@ static Lock memlock; enum { - Round = 4095 + Round = PAGESIZE-1 }; void* runtime·SysAlloc(uintptr nbytes) { uintptr bl; - + runtime·lock(&memlock); mstats.sys += nbytes; // Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c diff --git a/src/pkg/runtime/memset_arm.s b/src/pkg/runtime/memclr_arm.s index 974b8da7a..afc529d90 100644 --- a/src/pkg/runtime/memset_arm.s +++ b/src/pkg/runtime/memclr_arm.s @@ -23,35 +23,32 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -TO = 1 -TOE = 2 -N = 3 -TMP = 3 /* N and TMP don't overlap */ - -// TODO(kaib): memset clobbers R9 and R10 (m and g). This makes the -// registers unpredictable if (when) memset SIGSEGV's. Fix it by -// moving the R4-R11 register bank. -TEXT runtime·memset(SB), $0 - MOVW R0, R(TO) - MOVW data+4(FP), R(4) - MOVW n+8(FP), R(N) +TO = 8 +TOE = 11 +N = 12 +TMP = 12 /* N and TMP don't overlap */ + +TEXT runtime·memclr(SB),7,$0 + MOVW ptr+0(FP), R(TO) + MOVW n+4(FP), R(N) + MOVW $0, R(0) ADD R(N), R(TO), R(TOE) /* to end pointer */ CMP $4, R(N) /* need at least 4 bytes to copy */ BLT _1tail - AND $0xFF, R(4) /* it's a byte */ - SLL $8, R(4), R(TMP) /* replicate to a word */ - ORR R(TMP), R(4) - SLL $16, R(4), R(TMP) - ORR R(TMP), R(4) + AND $0xFF, R(0) /* it's a byte */ + SLL $8, R(0), R(TMP) /* replicate to a word */ + ORR R(TMP), R(0) + SLL $16, R(0), R(TMP) + ORR R(TMP), R(0) _4align: /* align on 4 */ AND.S $3, R(TO), R(TMP) BEQ _4aligned - MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + MOVBU.P R(0), 1(R(TO)) /* implicit write back */ B _4align _4aligned: @@ -59,19 +56,19 @@ _4aligned: CMP R(TMP), R(TO) BHS _4tail - MOVW R4, R5 /* replicate */ - MOVW R4, R6 - MOVW R4, R7 - MOVW R4, R8 - MOVW R4, R9 - MOVW R4, R10 - MOVW R4, R11 + MOVW R0, R1 /* replicate */ + MOVW R0, R2 + MOVW R0, R3 + MOVW R0, R4 + MOVW R0, R5 + MOVW R0, R6 + MOVW R0, R7 _f32loop: CMP R(TMP), R(TO) BHS _4tail - MOVM.IA.W [R4-R11], (R(TO)) + MOVM.IA.W [R0-R7], (R(TO)) B _f32loop _4tail: @@ -80,14 +77,14 @@ _4loop: CMP R(TMP), R(TO) BHS _1tail - MOVW.P R(4), 4(R(TO)) /* implicit write back */ + MOVW.P R(0), 4(R(TO)) /* implicit write back */ B _4loop _1tail: CMP R(TO), R(TOE) BEQ _return - MOVBU.P R(4), 1(R(TO)) /* implicit write back */ + MOVBU.P R(0), 1(R(TO)) /* implicit write back */ B _1tail _return: diff --git a/src/pkg/runtime/memmove_arm.s b/src/pkg/runtime/memmove_arm.s index 5c0e57404..c5d7e9d70 100644 --- a/src/pkg/runtime/memmove_arm.s +++ b/src/pkg/runtime/memmove_arm.s @@ -23,19 +23,40 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +// TE or TS are spilled to the stack during bulk register moves. TS = 0 -TE = 1 -FROM = 2 -N = 3 -TMP = 3 /* N and TMP don't overlap */ -TMP1 = 4 - -// TODO(kaib): This can be done with the existing registers of LR is re-used. Same for memset. -TEXT runtime·memmove(SB), 7, $8 - // save g and m - MOVW R9, 4(R13) - MOVW R10, 8(R13) - +TE = 8 + +// Warning: the linker will use R11 to synthesize certain instructions. Please +// take care and double check with objdump. +FROM = 11 +N = 12 +TMP = 12 /* N and TMP don't overlap */ +TMP1 = 5 + +RSHIFT = 5 +LSHIFT = 6 +OFFSET = 7 + +BR0 = 0 /* shared with TS */ +BW0 = 1 +BR1 = 1 +BW1 = 2 +BR2 = 2 +BW2 = 3 +BR3 = 3 +BW3 = 4 + +FW0 = 1 +FR0 = 2 +FW1 = 2 +FR1 = 3 +FW2 = 3 +FR2 = 4 +FW3 = 4 +FR3 = 8 /* shared with TE */ + +TEXT runtime·memmove(SB), 7, $4 _memmove: MOVW to+0(FP), R(TS) MOVW from+4(FP), R(FROM) @@ -64,15 +85,17 @@ _b4aligned: /* is source now aligned? */ BNE _bunaligned ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ + MOVW R(TS), savedts+4(SP) _b32loop: CMP R(TMP), R(TE) BLS _b4tail - MOVM.DB.W (R(FROM)), [R4-R11] - MOVM.DB.W [R4-R11], (R(TE)) + MOVM.DB.W (R(FROM)), [R0-R7] + MOVM.DB.W [R0-R7], (R(TE)) B _b32loop _b4tail: /* do remaining words if possible */ + MOVW savedts+4(SP), R(TS) ADD $3, R(TS), R(TMP) _b4loop: CMP R(TMP), R(TE) @@ -107,22 +130,24 @@ _f4aligned: /* is source now aligned? */ BNE _funaligned SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ + MOVW R(TE), savedte+4(SP) _f32loop: CMP R(TMP), R(TS) BHS _f4tail - MOVM.IA.W (R(FROM)), [R4-R11] - MOVM.IA.W [R4-R11], (R(TS)) + MOVM.IA.W (R(FROM)), [R1-R8] + MOVM.IA.W [R1-R8], (R(TS)) B _f32loop _f4tail: + MOVW savedte+4(SP), R(TE) SUB $3, R(TE), R(TMP) /* do remaining words if possible */ _f4loop: CMP R(TMP), R(TS) BHS _f1tail MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ - MOVW.P R4, 4(R(TS)) /* implicit write back */ + MOVW.P R(TMP1), 4(R(TS)) /* implicit write back */ B _f4loop _f1tail: @@ -134,25 +159,9 @@ _f1tail: B _f1tail _return: - // restore g and m - MOVW 4(R13), R9 - MOVW 8(R13), R10 MOVW to+0(FP), R0 RET -RSHIFT = 4 -LSHIFT = 5 -OFFSET = 6 - -BR0 = 7 -BW0 = 8 -BR1 = 8 -BW1 = 9 -BR2 = 9 -BW2 = 10 -BR3 = 10 -BW3 = 11 - _bunaligned: CMP $2, R(TMP) /* is R(TMP) < 2 ? */ @@ -172,7 +181,8 @@ _bunaligned: CMP R(TMP), R(TE) BLS _b1tail - AND $~0x03, R(FROM) /* align source */ + BIC $3, R(FROM) /* align source */ + MOVW R(TS), savedts+4(SP) MOVW (R(FROM)), R(BR0) /* prime first block register */ _bu16loop: @@ -196,18 +206,10 @@ _bu16loop: B _bu16loop _bu1tail: + MOVW savedts+4(SP), R(TS) ADD R(OFFSET), R(FROM) B _b1tail -FW0 = 7 -FR0 = 8 -FW1 = 8 -FR1 = 9 -FW2 = 9 -FR2 = 10 -FW3 = 10 -FR3 = 11 - _funaligned: CMP $2, R(TMP) @@ -227,7 +229,8 @@ _funaligned: CMP R(TMP), R(TS) BHS _f1tail - AND $~0x03, R(FROM) /* align source */ + BIC $3, R(FROM) /* align source */ + MOVW R(TE), savedte+4(SP) MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit write back */ _fu16loop: @@ -235,7 +238,7 @@ _fu16loop: BHS _fu1tail MOVW R(FR3)>>R(RSHIFT), R(FW0) - MOVM.IA.W (R(FROM)), [R(FR0)-R(FR3)] + MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)] ORR R(FR0)<<R(LSHIFT), R(FW0) MOVW R(FR0)>>R(RSHIFT), R(FW1) @@ -247,9 +250,10 @@ _fu16loop: MOVW R(FR2)>>R(RSHIFT), R(FW3) ORR R(FR3)<<R(LSHIFT), R(FW3) - MOVM.IA.W [R(FW0)-R(FW3)], (R(TS)) + MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS)) B _fu16loop _fu1tail: + MOVW savedte+4(SP), R(TE) SUB R(OFFSET), R(FROM) B _f1tail diff --git a/src/pkg/runtime/mfinal.c b/src/pkg/runtime/mfinal.c index 1fa5ea401..2f5e4277d 100644 --- a/src/pkg/runtime/mfinal.c +++ b/src/pkg/runtime/mfinal.c @@ -11,8 +11,8 @@ enum { debug = 0 }; typedef struct Fin Fin; struct Fin { - void (*fn)(void*); - int32 nret; + FuncVal *fn; + uintptr nret; }; // Finalizer hash table. Direct hash, linear scan, at most 3/4 full. @@ -42,7 +42,7 @@ static struct { } fintab[TABSZ]; static void -addfintab(Fintab *t, void *k, void (*fn)(void*), int32 nret) +addfintab(Fintab *t, void *k, FuncVal *fn, uintptr nret) { int32 i, j; @@ -137,7 +137,7 @@ resizefintab(Fintab *tab) } bool -runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) +runtime·addfinalizer(void *p, FuncVal *f, uintptr nret) { Fintab *tab; byte *base; @@ -175,7 +175,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) // get finalizer; if del, delete finalizer. // caller is responsible for updating RefHasFinalizer (special) bit. bool -runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret) +runtime·getfinalizer(void *p, bool del, FuncVal **fn, uintptr *nret) { Fintab *tab; bool res; diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index e8fb266f4..010f9cd96 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -8,15 +8,28 @@ #include "arch_GOARCH.h" #include "malloc.h" #include "stack.h" +#include "mgc0.h" +#include "race.h" +#include "type.h" +#include "typekind.h" +#include "hashmap.h" enum { Debug = 0, - PtrSize = sizeof(void*), DebugMark = 0, // run second pass to check mark + CollectStats = 0, // Four bits per word (see #defines below). wordsPerBitmapWord = sizeof(void*)*8/4, bitShift = sizeof(void*)*8/4, + + handoffThreshold = 4, + IntermediateBufferCapacity = 64, + + // Bits in type information + PRECISE = 1, + LOOP = 2, + PC_BITS = PRECISE | LOOP, }; // Bits in per-word bitmap. @@ -67,25 +80,34 @@ enum { // uint32 runtime·worldsema = 1; -// TODO: Make these per-M. -static uint64 nhandoff; - static int32 gctrace; +typedef struct Obj Obj; +struct Obj +{ + byte *p; // data pointer + uintptr n; // size of data in bytes + uintptr ti; // type info +}; + +// The size of Workbuf is N*PageSize. typedef struct Workbuf Workbuf; struct Workbuf { - Workbuf *next; +#define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr)) + LFNode node; // must be first uintptr nobj; - byte *obj[512-2]; + Obj obj[SIZE/sizeof(Obj) - 1]; + uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)]; +#undef SIZE }; typedef struct Finalizer Finalizer; struct Finalizer { - void (*fn)(void*); + FuncVal *fn; void *arg; - int32 nret; + uintptr nret; }; typedef struct FinBlock FinBlock; @@ -99,9 +121,13 @@ struct FinBlock }; extern byte data[]; -extern byte etext[]; +extern byte edata[]; +extern byte bss[]; extern byte ebss[]; +extern byte gcdata[]; +extern byte gcbss[]; + static G *fing; static FinBlock *finq; // list of finalizers that are to be executed static FinBlock *finc; // cache of free blocks @@ -116,89 +142,244 @@ static void putempty(Workbuf*); static Workbuf* handoff(Workbuf*); static struct { - Lock fmu; - Workbuf *full; - Lock emu; - Workbuf *empty; + uint64 full; // lock-free list of full blocks + uint64 empty; // lock-free list of empty blocks + byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait uint32 nproc; volatile uint32 nwait; volatile uint32 ndone; + volatile uint32 debugmarkdone; Note alldone; - Lock markgate; - Lock sweepgate; - MSpan *spans; + ParFor *markfor; + ParFor *sweepfor; Lock; byte *chunk; uintptr nchunk; + + Obj *roots; + uint32 nroot; + uint32 rootcap; } work; -// scanblock scans a block of n bytes starting at pointer b for references -// to other objects, scanning any it finds recursively until there are no -// unscanned objects left. Instead of using an explicit recursion, it keeps -// a work list in the Workbuf* structures and loops in the main function -// body. Keeping an explicit work list is easier on the stack allocator and -// more efficient. +enum { + GC_DEFAULT_PTR = GC_NUM_INSTR, + GC_MAP_NEXT, + GC_CHAN, + + GC_NUM_INSTR2 +}; + +static struct { + struct { + uint64 sum; + uint64 cnt; + } ptr; + uint64 nbytes; + struct { + uint64 sum; + uint64 cnt; + uint64 notype; + uint64 typelookup; + } obj; + uint64 rescan; + uint64 rescanbytes; + uint64 instr[GC_NUM_INSTR2]; + uint64 putempty; + uint64 getfull; +} gcstats; + +// markonly marks an object. It returns true if the object +// has been marked by this function, false otherwise. +// This function isn't thread-safe and doesn't append the object to any buffer. +static bool +markonly(void *obj) +{ + byte *p; + uintptr *bitp, bits, shift, x, xbits, off; + MSpan *s; + PageID k; + + // Words outside the arena cannot be pointers. + if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used) + return false; + + // obj may be a pointer to a live object. + // Try to find the beginning of the object. + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Find bits for this word. + off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; + bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Pointing at the beginning of a block? + if((bits & (bitAllocated|bitBlockBoundary)) != 0) + goto found; + + // Otherwise consult span table to find beginning. + // (Manually inlined copy of MHeap_LookupMaybe.) + k = (uintptr)obj>>PageShift; + x = k; + if(sizeof(void*) == 8) + x -= (uintptr)runtime·mheap->arena_start>>PageShift; + s = runtime·mheap->map[x]; + if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + return false; + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + } else { + if((byte*)obj >= (byte*)s->limit) + return false; + uintptr size = s->elemsize; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; + bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + +found: + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // Only care about allocated and not marked. + if((bits & (bitAllocated|bitMarked)) != bitAllocated) + return false; + *bitp |= bitMarked<<shift; + + // The object is now marked + return true; +} + +// PtrTarget and BitTarget are structures used by intermediate buffers. +// The intermediate buffers hold GC data before it +// is moved/flushed to the work buffer (Workbuf). +// The size of an intermediate buffer is very small, +// such as 32 or 64 elements. +typedef struct PtrTarget PtrTarget; +struct PtrTarget +{ + void *p; + uintptr ti; +}; + +typedef struct BitTarget BitTarget; +struct BitTarget +{ + void *p; + uintptr ti; + uintptr *bitp, shift; +}; + +typedef struct BufferList BufferList; +struct BufferList +{ + PtrTarget ptrtarget[IntermediateBufferCapacity]; + BitTarget bittarget[IntermediateBufferCapacity]; + Obj obj[IntermediateBufferCapacity]; + BufferList *next; +}; +static BufferList *bufferList; + +static Lock lock; +static Type *itabtype; + +static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj); + +// flushptrbuf moves data from the PtrTarget buffer to the work buffer. +// The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned, +// while the work buffer contains blocks which have been marked +// and are prepared to be scanned by the garbage collector. +// +// _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer. +// bitbuf holds temporary data generated by this function. +// +// A simplified drawing explaining how the todo-list moves from a structure to another: +// +// scanblock +// (find pointers) +// Obj ------> PtrTarget (pointer targets) +// ↑ | +// | | flushptrbuf (1st part, +// | | find block start) +// | ↓ +// `--------- BitTarget (pointer targets and the corresponding locations in bitmap) +// flushptrbuf +// (2nd part, mark and enqueue) static void -scanblock(byte *b, int64 n) +flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj, BitTarget *bitbuf) { - byte *obj, *arena_start, *arena_used, *p; - void **vp; - uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc; + byte *p, *arena_start, *obj; + uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n; MSpan *s; PageID k; - void **wp; + Obj *wp; Workbuf *wbuf; - bool keepworking; + PtrTarget *ptrbuf_end; + BitTarget *bitbufpos, *bt; - if((int64)(uintptr)n != n || n < 0) { - runtime·printf("scanblock %p %D\n", b, n); - runtime·throw("scanblock"); - } + arena_start = runtime·mheap->arena_start; - // Memory arena parameters. - arena_start = runtime·mheap.arena_start; - arena_used = runtime·mheap.arena_used; - nproc = work.nproc; + wp = *_wp; + wbuf = *_wbuf; + nobj = *_nobj; - wbuf = nil; // current work buffer - wp = nil; // storage for next queued pointer (write pointer) - nobj = 0; // number of queued objects + ptrbuf_end = *ptrbufpos; + n = ptrbuf_end - ptrbuf; + *ptrbufpos = ptrbuf; - // Scanblock helpers pass b==nil. - // The main proc needs to return to make more - // calls to scanblock. But if work.nproc==1 then - // might as well process blocks as soon as we - // have them. - keepworking = b == nil || work.nproc == 1; + if(CollectStats) { + runtime·xadd64(&gcstats.ptr.sum, n); + runtime·xadd64(&gcstats.ptr.cnt, 1); + } - // Align b to a word boundary. - off = (uintptr)b & (PtrSize-1); - if(off != 0) { - b += PtrSize - off; - n -= PtrSize - off; + // If buffer is nearly full, get a new one. + if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) { + if(wbuf != nil) + wbuf->nobj = nobj; + wbuf = getempty(wbuf); + wp = wbuf->obj; + nobj = 0; + + if(n >= nelem(wbuf->obj)) + runtime·throw("ptrbuf has to be smaller than WorkBuf"); } - for(;;) { - // Each iteration scans the block b of length n, queueing pointers in - // the work buffer. - if(Debug > 1) - runtime·printf("scanblock %p %D\n", b, n); + // TODO(atom): This block is a branch of an if-then-else statement. + // The single-threaded branch may be added in a next CL. + { + // Multi-threaded version. - vp = (void**)b; - n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ - for(i=0; i<n; i++) { - obj = (byte*)vp[i]; + bitbufpos = bitbuf; - // Words outside the arena cannot be pointers. - if((byte*)obj < arena_start || (byte*)obj >= arena_used) - continue; + while(ptrbuf < ptrbuf_end) { + obj = ptrbuf->p; + ti = ptrbuf->ti; + ptrbuf++; + + // obj belongs to interval [mheap.arena_start, mheap.arena_used). + if(Debug > 1) { + if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used) + runtime·throw("object is outside of mheap"); + } // obj may be a pointer to a live object. // Try to find the beginning of the object. // Round down to word boundary. - obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) { + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + ti = 0; + } // Find bits for this word. off = (uintptr*)obj - (uintptr*)arena_start; @@ -211,6 +392,8 @@ scanblock(byte *b, int64 n) if((bits & (bitAllocated|bitBlockBoundary)) != 0) goto found; + ti = 0; + // Pointing just past the beginning? // Scan backward a little to find a block boundary. for(j=shift; j-->0; ) { @@ -228,16 +411,16 @@ scanblock(byte *b, int64 n) x = k; if(sizeof(void*) == 8) x -= (uintptr)arena_start>>PageShift; - s = runtime·mheap.map[x]; + s = runtime·mheap->map[x]; if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) continue; - p = (byte*)((uintptr)s->start<<PageShift); + p = (byte*)((uintptr)s->start<<PageShift); if(s->sizeclass == 0) { obj = p; } else { if((byte*)obj >= (byte*)s->limit) continue; - size = runtime·class_to_size[s->sizeclass]; + size = s->elemsize; int32 i = ((byte*)obj - p)/size; obj = p+i*size; } @@ -255,80 +438,606 @@ scanblock(byte *b, int64 n) // Only care about allocated and not marked. if((bits & (bitAllocated|bitMarked)) != bitAllocated) continue; - if(nproc == 1) - *bitp |= bitMarked<<shift; - else { - for(;;) { - x = *bitp; - if(x & (bitMarked<<shift)) - goto continue_obj; - if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) - break; - } - } + + *bitbufpos++ = (BitTarget){obj, ti, bitp, shift}; + } + + runtime·lock(&lock); + for(bt=bitbuf; bt<bitbufpos; bt++){ + xbits = *bt->bitp; + bits = xbits >> bt->shift; + if((bits & bitMarked) != 0) + continue; + + // Mark the block + *bt->bitp = xbits | (bitMarked << bt->shift); // If object has no pointers, don't need to scan further. if((bits & bitNoPointers) != 0) continue; - // If another proc wants a pointer, give it some. - if(nobj > 4 && work.nwait > 0 && work.full == nil) { + obj = bt->p; + + // Ask span about size class. + // (Manually inlined copy of MHeap_Lookup.) + x = (uintptr)obj >> PageShift; + if(sizeof(void*) == 8) + x -= (uintptr)arena_start>>PageShift; + s = runtime·mheap->map[x]; + + PREFETCH(obj); + + *wp = (Obj){obj, s->elemsize, bt->ti}; + wp++; + nobj++; + } + runtime·unlock(&lock); + + // If another proc wants a pointer, give it some. + if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = wbuf->obj + nobj; + } + } + + *_wp = wp; + *_wbuf = wbuf; + *_nobj = nobj; +} + +static void +flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj) +{ + uintptr nobj, off; + Obj *wp, obj; + Workbuf *wbuf; + Obj *objbuf_end; + + wp = *_wp; + wbuf = *_wbuf; + nobj = *_nobj; + + objbuf_end = *objbufpos; + *objbufpos = objbuf; + + while(objbuf < objbuf_end) { + obj = *objbuf++; + + // Align obj.b to a word boundary. + off = (uintptr)obj.p & (PtrSize-1); + if(off != 0) { + obj.p += PtrSize - off; + obj.n -= PtrSize - off; + obj.ti = 0; + } + + if(obj.p == nil || obj.n == 0) + continue; + + // If buffer is full, get a new one. + if(wbuf == nil || nobj >= nelem(wbuf->obj)) { + if(wbuf != nil) wbuf->nobj = nobj; - wbuf = handoff(wbuf); - nobj = wbuf->nobj; - wp = wbuf->obj + nobj; + wbuf = getempty(wbuf); + wp = wbuf->obj; + nobj = 0; + } + + *wp = obj; + wp++; + nobj++; + } + + // If another proc wants a pointer, give it some. + if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = wbuf->obj + nobj; + } + + *_wp = wp; + *_wbuf = wbuf; + *_nobj = nobj; +} + +// Program that scans the whole block and treats every block element as a potential pointer +static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; + +// Hashmap iterator program +static uintptr mapProg[2] = {0, GC_MAP_NEXT}; + +// Hchan program +static uintptr chanProg[2] = {0, GC_CHAN}; + +// Local variables of a program fragment or loop +typedef struct Frame Frame; +struct Frame { + uintptr count, elemsize, b; + uintptr *loop_or_ret; +}; + +// scanblock scans a block of n bytes starting at pointer b for references +// to other objects, scanning any it finds recursively until there are no +// unscanned objects left. Instead of using an explicit recursion, it keeps +// a work list in the Workbuf* structures and loops in the main function +// body. Keeping an explicit work list is easier on the stack allocator and +// more efficient. +// +// wbuf: current work buffer +// wp: storage for next queued pointer (write pointer) +// nobj: number of queued objects +static void +scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) +{ + byte *b, *arena_start, *arena_used; + uintptr n, i, end_b, elemsize, size, ti, objti, count, type; + uintptr *pc, precise_type, nominal_size; + uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti; + void *obj; + Type *t; + Slice *sliceptr; + Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4]; + BufferList *scanbuffers; + PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos; + BitTarget *bitbuf; + Obj *objbuf, *objbuf_end, *objbufpos; + Eface *eface; + Iface *iface; + Hmap *hmap; + MapType *maptype; + bool didmark, mapkey_kind, mapval_kind; + struct hash_gciter map_iter; + struct hash_gciter_data d; + Hchan *chan; + ChanType *chantype; + + if(sizeof(Workbuf) % PageSize != 0) + runtime·throw("scanblock: size of Workbuf is suboptimal"); + + // Memory arena parameters. + arena_start = runtime·mheap->arena_start; + arena_used = runtime·mheap->arena_used; + + stack_ptr = stack+nelem(stack)-1; + + precise_type = false; + nominal_size = 0; + + // Allocate ptrbuf, bitbuf + { + runtime·lock(&lock); + + if(bufferList == nil) { + bufferList = runtime·SysAlloc(sizeof(*bufferList)); + if(bufferList == nil) + runtime·throw("runtime: cannot allocate memory"); + bufferList->next = nil; + } + scanbuffers = bufferList; + bufferList = bufferList->next; + + ptrbuf = &scanbuffers->ptrtarget[0]; + ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget); + bitbuf = &scanbuffers->bittarget[0]; + objbuf = &scanbuffers->obj[0]; + objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj); + + runtime·unlock(&lock); + } + + ptrbufpos = ptrbuf; + objbufpos = objbuf; + + // (Silence the compiler) + map_ret = nil; + mapkey_size = mapval_size = 0; + mapkey_kind = mapval_kind = false; + mapkey_ti = mapval_ti = 0; + chan = nil; + chantype = nil; + + goto next_block; + + for(;;) { + // Each iteration scans the block b of length n, queueing pointers in + // the work buffer. + if(Debug > 1) { + runtime·printf("scanblock %p %D\n", b, (int64)n); + } + + if(CollectStats) { + runtime·xadd64(&gcstats.nbytes, n); + runtime·xadd64(&gcstats.obj.sum, nobj); + runtime·xadd64(&gcstats.obj.cnt, 1); + } + + if(ti != 0) { + pc = (uintptr*)(ti & ~(uintptr)PC_BITS); + precise_type = (ti & PRECISE); + stack_top.elemsize = pc[0]; + if(!precise_type) + nominal_size = pc[0]; + if(ti & LOOP) { + stack_top.count = 0; // 0 means an infinite number of iterations + stack_top.loop_or_ret = pc+1; + } else { + stack_top.count = 1; } + } else if(UseSpanType) { + if(CollectStats) + runtime·xadd64(&gcstats.obj.notype, 1); + + type = runtime·gettype(b); + if(type != 0) { + if(CollectStats) + runtime·xadd64(&gcstats.obj.typelookup, 1); + + t = (Type*)(type & ~(uintptr)(PtrSize-1)); + switch(type & (PtrSize-1)) { + case TypeInfo_SingleObject: + pc = (uintptr*)t->gc; + precise_type = true; // type information about 'b' is precise + stack_top.count = 1; + stack_top.elemsize = pc[0]; + break; + case TypeInfo_Array: + pc = (uintptr*)t->gc; + if(pc[0] == 0) + goto next_block; + precise_type = true; // type information about 'b' is precise + stack_top.count = 0; // 0 means an infinite number of iterations + stack_top.elemsize = pc[0]; + stack_top.loop_or_ret = pc+1; + break; + case TypeInfo_Map: + hmap = (Hmap*)b; + maptype = (MapType*)t; + if(hash_gciter_init(hmap, &map_iter)) { + mapkey_size = maptype->key->size; + mapkey_kind = maptype->key->kind; + mapkey_ti = (uintptr)maptype->key->gc | PRECISE; + mapval_size = maptype->elem->size; + mapval_kind = maptype->elem->kind; + mapval_ti = (uintptr)maptype->elem->gc | PRECISE; + + map_ret = 0; + pc = mapProg; + } else { + goto next_block; + } + break; + case TypeInfo_Chan: + chan = (Hchan*)b; + chantype = (ChanType*)t; + pc = chanProg; + break; + default: + runtime·throw("scanblock: invalid type"); + return; + } + } else { + pc = defaultProg; + } + } else { + pc = defaultProg; + } + + pc++; + stack_top.b = (uintptr)b; - // If buffer is full, get a new one. - if(wbuf == nil || nobj >= nelem(wbuf->obj)) { - if(wbuf != nil) - wbuf->nobj = nobj; - wbuf = getempty(wbuf); - wp = wbuf->obj; - nobj = 0; + end_b = (uintptr)b + n - PtrSize; + + for(;;) { + if(CollectStats) + runtime·xadd64(&gcstats.instr[pc[0]], 1); + + obj = nil; + objti = 0; + switch(pc[0]) { + case GC_PTR: + obj = *(void**)(stack_top.b + pc[1]); + objti = pc[2]; + pc += 3; + break; + + case GC_SLICE: + sliceptr = (Slice*)(stack_top.b + pc[1]); + if(sliceptr->cap != 0) { + obj = sliceptr->array; + objti = pc[2] | PRECISE | LOOP; } - *wp++ = obj; - nobj++; - continue_obj:; + pc += 3; + break; + + case GC_APTR: + obj = *(void**)(stack_top.b + pc[1]); + pc += 2; + break; + + case GC_STRING: + obj = *(void**)(stack_top.b + pc[1]); + pc += 2; + break; + + case GC_EFACE: + eface = (Eface*)(stack_top.b + pc[1]); + pc += 2; + if(eface->type != nil && (eface->data >= arena_start && eface->data < arena_used)) { + t = eface->type; + if(t->size <= sizeof(void*)) { + if((t->kind & KindNoPointers)) + break; + + obj = eface->data; + if((t->kind & ~KindNoPointers) == KindPtr) + objti = (uintptr)((PtrType*)t)->elem->gc; + } else { + obj = eface->data; + objti = (uintptr)t->gc; + } + } + break; + + case GC_IFACE: + iface = (Iface*)(stack_top.b + pc[1]); + pc += 2; + if(iface->tab == nil) + break; + + // iface->tab + if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) { + *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc}; + if(ptrbufpos == ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + } + + // iface->data + if(iface->data >= arena_start && iface->data < arena_used) { + t = iface->tab->type; + if(t->size <= sizeof(void*)) { + if((t->kind & KindNoPointers)) + break; + + obj = iface->data; + if((t->kind & ~KindNoPointers) == KindPtr) + objti = (uintptr)((PtrType*)t)->elem->gc; + } else { + obj = iface->data; + objti = (uintptr)t->gc; + } + } + break; + + case GC_DEFAULT_PTR: + while((i = stack_top.b) <= end_b) { + stack_top.b += PtrSize; + obj = *(byte**)i; + if(obj >= arena_start && obj < arena_used) { + *ptrbufpos++ = (PtrTarget){obj, 0}; + if(ptrbufpos == ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + } + } + goto next_block; + + case GC_END: + if(--stack_top.count != 0) { + // Next iteration of a loop if possible. + elemsize = stack_top.elemsize; + stack_top.b += elemsize; + if(stack_top.b + elemsize <= end_b+PtrSize) { + pc = stack_top.loop_or_ret; + continue; + } + i = stack_top.b; + } else { + // Stack pop if possible. + if(stack_ptr+1 < stack+nelem(stack)) { + pc = stack_top.loop_or_ret; + stack_top = *(++stack_ptr); + continue; + } + i = (uintptr)b + nominal_size; + } + if(!precise_type) { + // Quickly scan [b+i,b+n) for possible pointers. + for(; i<=end_b; i+=PtrSize) { + if(*(byte**)i != nil) { + // Found a value that may be a pointer. + // Do a rescan of the entire block. + enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj); + if(CollectStats) { + runtime·xadd64(&gcstats.rescan, 1); + runtime·xadd64(&gcstats.rescanbytes, n); + } + break; + } + } + } + goto next_block; + + case GC_ARRAY_START: + i = stack_top.b + pc[1]; + count = pc[2]; + elemsize = pc[3]; + pc += 4; + + // Stack push. + *stack_ptr-- = stack_top; + stack_top = (Frame){count, elemsize, i, pc}; + continue; + + case GC_ARRAY_NEXT: + if(--stack_top.count != 0) { + stack_top.b += stack_top.elemsize; + pc = stack_top.loop_or_ret; + } else { + // Stack pop. + stack_top = *(++stack_ptr); + pc += 1; + } + continue; + + case GC_CALL: + // Stack push. + *stack_ptr-- = stack_top; + stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/}; + pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction + continue; + + case GC_MAP_PTR: + hmap = *(Hmap**)(stack_top.b + pc[1]); + if(hmap == nil) { + pc += 3; + continue; + } + runtime·lock(&lock); + didmark = markonly(hmap); + runtime·unlock(&lock); + if(didmark) { + maptype = (MapType*)pc[2]; + if(hash_gciter_init(hmap, &map_iter)) { + mapkey_size = maptype->key->size; + mapkey_kind = maptype->key->kind; + mapkey_ti = (uintptr)maptype->key->gc | PRECISE; + mapval_size = maptype->elem->size; + mapval_kind = maptype->elem->kind; + mapval_ti = (uintptr)maptype->elem->gc | PRECISE; + + // Start mapProg. + map_ret = pc+3; + pc = mapProg+1; + } else { + pc += 3; + } + } else { + pc += 3; + } + continue; + + case GC_MAP_NEXT: + // Add all keys and values to buffers, mark all subtables. + while(hash_gciter_next(&map_iter, &d)) { + // buffers: reserve space for 2 objects. + if(ptrbufpos+2 >= ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + if(objbufpos+2 >= objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + + if(d.st != nil) { + runtime·lock(&lock); + markonly(d.st); + runtime·unlock(&lock); + } + if(d.key_data != nil) { + if(!(mapkey_kind & KindNoPointers) || d.indirectkey) { + if(!d.indirectkey) + *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti}; + else + *ptrbufpos++ = (PtrTarget){*(void**)d.key_data, mapkey_ti}; + } + if(!(mapval_kind & KindNoPointers) || d.indirectval) { + if(!d.indirectval) + *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti}; + else + *ptrbufpos++ = (PtrTarget){*(void**)d.val_data, mapval_ti}; + } + } + } + if(map_ret == 0) + goto next_block; + pc = map_ret; + continue; + + case GC_REGION: + obj = (void*)(stack_top.b + pc[1]); + size = pc[2]; + objti = pc[3]; + pc += 4; + + *objbufpos++ = (Obj){obj, size, objti}; + if(objbufpos == objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + break; + + case GC_CHAN: + // There are no heap pointers in struct Hchan, + // so we can ignore the leading sizeof(Hchan) bytes. + if(!(chantype->elem->kind & KindNoPointers)) { + // Channel's buffer follows Hchan immediately in memory. + // Size of buffer (cap(c)) is second int in the chan struct. + n = ((uintgo*)chan)[1]; + if(n > 0) { + // TODO(atom): split into two chunks so that only the + // in-use part of the circular buffer is scanned. + // (Channel routines zero the unused part, so the current + // code does not lead to leaks, it's just a little inefficient.) + *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, n*chantype->elem->size, + (uintptr)chantype->elem->gc | PRECISE | LOOP}; + if(objbufpos == objbuf_end) + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + } + } + goto next_block; + + default: + runtime·throw("scanblock: invalid GC instruction"); + return; } + if(obj >= arena_start && obj < arena_used) { + *ptrbufpos++ = (PtrTarget){obj, objti}; + if(ptrbufpos == ptrbuf_end) + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + } + } + + next_block: // Done scanning [b, b+n). Prepare for the next iteration of - // the loop by setting b and n to the parameters for the next block. + // the loop by setting b, n, ti to the parameters for the next block. - // Fetch b from the work buffer. if(nobj == 0) { - if(!keepworking) { - putempty(wbuf); - return; + flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf); + flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); + + if(nobj == 0) { + if(!keepworking) { + if(wbuf) + putempty(wbuf); + goto endscan; + } + // Emptied our buffer: refill. + wbuf = getfull(wbuf); + if(wbuf == nil) + goto endscan; + nobj = wbuf->nobj; + wp = wbuf->obj + wbuf->nobj; } - // Emptied our buffer: refill. - wbuf = getfull(wbuf); - if(wbuf == nil) - return; - nobj = wbuf->nobj; - wp = wbuf->obj + wbuf->nobj; } - b = *--wp; - nobj--; - // Ask span about size class. - // (Manually inlined copy of MHeap_Lookup.) - x = (uintptr)b>>PageShift; - if(sizeof(void*) == 8) - x -= (uintptr)arena_start>>PageShift; - s = runtime·mheap.map[x]; - if(s->sizeclass == 0) - n = s->npages<<PageShift; - else - n = runtime·class_to_size[s->sizeclass]; + // Fetch b from the work buffer. + --wp; + b = wp->p; + n = wp->n; + ti = wp->ti; + nobj--; } + +endscan: + runtime·lock(&lock); + scanbuffers->next = bufferList; + bufferList = scanbuffers; + runtime·unlock(&lock); } // debug_scanblock is the debug copy of scanblock. // it is simpler, slower, single-threaded, recursive, // and uses bitSpecial as the mark bit. static void -debug_scanblock(byte *b, int64 n) +debug_scanblock(byte *b, uintptr n) { byte *obj, *p; void **vp; @@ -338,8 +1047,8 @@ debug_scanblock(byte *b, int64 n) if(!DebugMark) runtime·throw("debug_scanblock without DebugMark"); - if((int64)(uintptr)n != n || n < 0) { - runtime·printf("debug_scanblock %p %D\n", b, n); + if((intptr)n < 0) { + runtime·printf("debug_scanblock %p %D\n", b, (int64)n); runtime·throw("debug_scanblock"); } @@ -356,33 +1065,31 @@ debug_scanblock(byte *b, int64 n) obj = (byte*)vp[i]; // Words outside the arena cannot be pointers. - if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used) + if((byte*)obj < runtime·mheap->arena_start || (byte*)obj >= runtime·mheap->arena_used) continue; // Round down to word boundary. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); // Consult span table to find beginning. - s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj); + s = runtime·MHeap_LookupMaybe(runtime·mheap, obj); if(s == nil) continue; - p = (byte*)((uintptr)s->start<<PageShift); + size = s->elemsize; if(s->sizeclass == 0) { obj = p; - size = (uintptr)s->npages<<PageShift; } else { if((byte*)obj >= (byte*)s->limit) continue; - size = runtime·class_to_size[s->sizeclass]; int32 i = ((byte*)obj - p)/size; obj = p+i*size; } // Now that we know the object header, reload bits. - off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start; - bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start; + bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; @@ -404,53 +1111,98 @@ debug_scanblock(byte *b, int64 n) } } +// Append obj to the work buffer. +// _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer. +static void +enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj) +{ + uintptr nobj, off; + Obj *wp; + Workbuf *wbuf; + + if(Debug > 1) + runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti); + + // Align obj.b to a word boundary. + off = (uintptr)obj.p & (PtrSize-1); + if(off != 0) { + obj.p += PtrSize - off; + obj.n -= PtrSize - off; + obj.ti = 0; + } + + if(obj.p == nil || obj.n == 0) + return; + + // Load work buffer state + wp = *_wp; + wbuf = *_wbuf; + nobj = *_nobj; + + // If another proc wants a pointer, give it some. + if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = wbuf->obj + nobj; + } + + // If buffer is full, get a new one. + if(wbuf == nil || nobj >= nelem(wbuf->obj)) { + if(wbuf != nil) + wbuf->nobj = nobj; + wbuf = getempty(wbuf); + wp = wbuf->obj; + nobj = 0; + } + + *wp = obj; + wp++; + nobj++; + + // Save work buffer state + *_wp = wp; + *_wbuf = wbuf; + *_nobj = nobj; +} + +static void +markroot(ParFor *desc, uint32 i) +{ + Obj *wp; + Workbuf *wbuf; + uintptr nobj; + + USED(&desc); + wp = nil; + wbuf = nil; + nobj = 0; + enqueue(work.roots[i], &wbuf, &wp, &nobj); + scanblock(wbuf, wp, nobj, false); +} + // Get an empty work buffer off the work.empty list, // allocating new buffers as needed. static Workbuf* getempty(Workbuf *b) { - if(work.nproc == 1) { - // Put b on full list. - if(b != nil) { - b->next = work.full; - work.full = b; + if(b != nil) + runtime·lfstackpush(&work.full, &b->node); + b = (Workbuf*)runtime·lfstackpop(&work.empty); + if(b == nil) { + // Need to allocate. + runtime·lock(&work); + if(work.nchunk < sizeof *b) { + work.nchunk = 1<<20; + work.chunk = runtime·SysAlloc(work.nchunk); + if(work.chunk == nil) + runtime·throw("runtime: cannot allocate memory"); } - // Grab from empty list if possible. - b = work.empty; - if(b != nil) { - work.empty = b->next; - goto haveb; - } - } else { - // Put b on full list. - if(b != nil) { - runtime·lock(&work.fmu); - b->next = work.full; - work.full = b; - runtime·unlock(&work.fmu); - } - // Grab from empty list if possible. - runtime·lock(&work.emu); - b = work.empty; - if(b != nil) - work.empty = b->next; - runtime·unlock(&work.emu); - if(b != nil) - goto haveb; + b = (Workbuf*)work.chunk; + work.chunk += sizeof *b; + work.nchunk -= sizeof *b; + runtime·unlock(&work); } - - // Need to allocate. - runtime·lock(&work); - if(work.nchunk < sizeof *b) { - work.nchunk = 1<<20; - work.chunk = runtime·SysAlloc(work.nchunk); - } - b = (Workbuf*)work.chunk; - work.chunk += sizeof *b; - work.nchunk -= sizeof *b; - runtime·unlock(&work); - -haveb: b->nobj = 0; return b; } @@ -458,19 +1210,10 @@ haveb: static void putempty(Workbuf *b) { - if(b == nil) - return; + if(CollectStats) + runtime·xadd64(&gcstats.putempty, 1); - if(work.nproc == 1) { - b->next = work.empty; - work.empty = b; - return; - } - - runtime·lock(&work.emu); - b->next = work.empty; - work.empty = b; - runtime·unlock(&work.emu); + runtime·lfstackpush(&work.empty, &b->node); } // Get a full work buffer off the work.full list, or return nil. @@ -478,63 +1221,37 @@ static Workbuf* getfull(Workbuf *b) { int32 i; - Workbuf *b1; - - if(work.nproc == 1) { - // Put b on empty list. - if(b != nil) { - b->next = work.empty; - work.empty = b; - } - // Grab from full list if possible. - // Since work.nproc==1, no one else is - // going to give us work. - b = work.full; - if(b != nil) - work.full = b->next; - return b; - } - putempty(b); + if(CollectStats) + runtime·xadd64(&gcstats.getfull, 1); - // Grab buffer from full list if possible. - for(;;) { - b1 = work.full; - if(b1 == nil) - break; - runtime·lock(&work.fmu); - if(work.full != nil) { - b1 = work.full; - work.full = b1->next; - runtime·unlock(&work.fmu); - return b1; - } - runtime·unlock(&work.fmu); - } + if(b != nil) + runtime·lfstackpush(&work.empty, &b->node); + b = (Workbuf*)runtime·lfstackpop(&work.full); + if(b != nil || work.nproc == 1) + return b; runtime·xadd(&work.nwait, +1); for(i=0;; i++) { - b1 = work.full; - if(b1 != nil) { - runtime·lock(&work.fmu); - if(work.full != nil) { - runtime·xadd(&work.nwait, -1); - b1 = work.full; - work.full = b1->next; - runtime·unlock(&work.fmu); - return b1; - } - runtime·unlock(&work.fmu); - continue; + if(work.full != 0) { + runtime·xadd(&work.nwait, -1); + b = (Workbuf*)runtime·lfstackpop(&work.full); + if(b != nil) + return b; + runtime·xadd(&work.nwait, +1); } if(work.nwait == work.nproc) return nil; - if(i < 10) + if(i < 10) { + m->gcstats.nprocyield++; runtime·procyield(20); - else if(i < 20) + } else if(i < 20) { + m->gcstats.nosyield++; runtime·osyield(); - else + } else { + m->gcstats.nsleep++; runtime·usleep(100); + } } } @@ -550,20 +1267,40 @@ handoff(Workbuf *b) b->nobj -= n; b1->nobj = n; runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); - nhandoff += n; + m->gcstats.nhandoff++; + m->gcstats.nhandoffcnt += n; // Put b on full list - let first half of b get stolen. - runtime·lock(&work.fmu); - b->next = work.full; - work.full = b; - runtime·unlock(&work.fmu); - + runtime·lfstackpush(&work.full, &b->node); return b1; } -// Scanstack calls scanblock on each of gp's stack segments. static void -scanstack(void (*scanblock)(byte*, int64), G *gp) +addroot(Obj obj) +{ + uint32 cap; + Obj *new; + + if(work.nroot >= work.rootcap) { + cap = PageSize/sizeof(Obj); + if(cap < 2*work.rootcap) + cap = 2*work.rootcap; + new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj)); + if(new == nil) + runtime·throw("runtime: cannot allocate memory"); + if(work.roots != nil) { + runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj)); + runtime·SysFree(work.roots, work.rootcap*sizeof(Obj)); + } + work.roots = new; + work.rootcap = cap; + } + work.roots[work.nroot] = obj; + work.nroot++; +} + +static void +addstackroots(G *gp) { M *mp; int32 n; @@ -571,7 +1308,7 @@ scanstack(void (*scanblock)(byte*, int64), G *gp) byte *sp, *guard; stk = (Stktop*)gp->stackbase; - guard = gp->stackguard; + guard = (byte*)gp->stackguard; if(gp == g) { // Scanning our own stack: start at &gp. @@ -582,72 +1319,82 @@ scanstack(void (*scanblock)(byte*, int64), G *gp) } else { // Scanning another goroutine's stack. // The goroutine is usually asleep (the world is stopped). - sp = gp->sched.sp; + sp = (byte*)gp->sched.sp; // The exception is that if the goroutine is about to enter or might // have just exited a system call, it may be executing code such // as schedlock and may have needed to start a new stack segment. // Use the stack segment and stack pointer at the time of // the system call instead, since that won't change underfoot. - if(gp->gcstack != nil) { + if(gp->gcstack != (uintptr)nil) { stk = (Stktop*)gp->gcstack; - sp = gp->gcsp; - guard = gp->gcguard; + sp = (byte*)gp->gcsp; + guard = (byte*)gp->gcguard; } } - if(Debug > 1) - runtime·printf("scanstack %d %p\n", gp->goid, sp); n = 0; while(stk) { if(sp < guard-StackGuard || (byte*)stk < sp) { - runtime·printf("scanstack inconsistent: g%d#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); + runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); runtime·throw("scanstack"); } - scanblock(sp, (byte*)stk - sp); - sp = stk->gobuf.sp; + addroot((Obj){sp, (byte*)stk - sp, 0}); + sp = (byte*)stk->gobuf.sp; guard = stk->stackguard; stk = (Stktop*)stk->stackbase; n++; } } -// Markfin calls scanblock on the blocks that have finalizers: -// the things pointed at cannot be freed until the finalizers have run. static void -markfin(void *v) +addfinroots(void *v) { uintptr size; + void *base; size = 0; - if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) + if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base)) runtime·throw("mark - finalizer inconsistency"); // do not mark the finalizer block itself. just mark the things it points at. - scanblock(v, size); + addroot((Obj){base, size, 0}); } static void -debug_markfin(void *v) -{ - uintptr size; - - if(!runtime·mlookup(v, &v, &size, nil)) - runtime·throw("debug_mark - finalizer inconsistency"); - debug_scanblock(v, size); -} - -// Mark -static void -mark(void (*scan)(byte*, int64)) +addroots(void) { G *gp; FinBlock *fb; + MSpan *s, **allspans; + uint32 spanidx; + + work.nroot = 0; + + // data & bss + // TODO(atom): load balancing + addroot((Obj){data, edata - data, (uintptr)gcdata}); + addroot((Obj){bss, ebss - bss, (uintptr)gcbss}); + + // MSpan.types + allspans = runtime·mheap->allspans; + for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) { + s = allspans[spanidx]; + if(s->state == MSpanInUse) { + switch(s->types.compression) { + case MTypes_Empty: + case MTypes_Single: + break; + case MTypes_Words: + case MTypes_Bytes: + // TODO(atom): consider using defaultProg instead of 0 + addroot((Obj){(byte*)&s->types.data, sizeof(void*), 0}); + break; + } + } + } - // mark data+bss. - scan(data, ebss - data); - - // mark stacks + // stacks for(gp=runtime·allg; gp!=nil; gp=gp->alllink) { switch(gp->status){ default: @@ -658,37 +1405,30 @@ mark(void (*scan)(byte*, int64)) case Grunning: if(gp != g) runtime·throw("mark - world not stopped"); - scanstack(scan, gp); + addstackroots(gp); break; case Grunnable: case Gsyscall: case Gwaiting: - scanstack(scan, gp); + addstackroots(gp); break; } } - // mark things pointed at by objects with finalizers - if(scan == debug_scanblock) - runtime·walkfintab(debug_markfin); - else - runtime·walkfintab(markfin); + runtime·walkfintab(addfinroots); for(fb=allfin; fb; fb=fb->alllink) - scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); - - // in multiproc mode, join in the queued work. - scan(nil, 0); + addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); } static bool handlespecial(byte *p, uintptr size) { - void (*fn)(void*); - int32 nret; + FuncVal *fn; + uintptr nret; FinBlock *block; Finalizer *f; - + if(!runtime·getfinalizer(p, true, &fn, &nret)) { runtime·setblockspecial(p, false); runtime·MProf_Free(p, size); @@ -699,6 +1439,8 @@ handlespecial(byte *p, uintptr size) if(finq == nil || finq->cnt == finq->cap) { if(finc == nil) { finc = runtime·SysAlloc(PageSize); + if(finc == nil) + runtime·throw("runtime: cannot allocate memory"); finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; finc->alllink = allfin; allfin = finc; @@ -713,124 +1455,225 @@ handlespecial(byte *p, uintptr size) f->fn = fn; f->nret = nret; f->arg = p; - runtime·unlock(&finlock); + runtime·unlock(&finlock); return true; } // Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. static void -sweep(void) +sweepspan(ParFor *desc, uint32 idx) { - MSpan *s; int32 cl, n, npages; uintptr size; byte *p; MCache *c; byte *arena_start; - int64 now; + MLink head, *end; + int32 nfree; + byte *type_data; + byte compression; + uintptr type_data_inc; + MSpan *s; - arena_start = runtime·mheap.arena_start; - now = runtime·nanotime(); + USED(&desc); + s = runtime·mheap->allspans[idx]; + if(s->state != MSpanInUse) + return; + arena_start = runtime·mheap->arena_start; + p = (byte*)(s->start << PageShift); + cl = s->sizeclass; + size = s->elemsize; + if(cl == 0) { + n = 1; + } else { + // Chunk full of small blocks. + npages = runtime·class_to_allocnpages[cl]; + n = (npages << PageShift) / size; + } + nfree = 0; + end = &head; + c = m->mcache; + + type_data = (byte*)s->types.data; + type_data_inc = sizeof(uintptr); + compression = s->types.compression; + switch(compression) { + case MTypes_Bytes: + type_data += 8*sizeof(uintptr); + type_data_inc = 1; + break; + } - for(;;) { - s = work.spans; - if(s == nil) - break; - if(!runtime·casp(&work.spans, s, s->allnext)) - continue; + // Sweep through n objects of given size starting at p. + // This thread owns the span now, so it can manipulate + // the block bitmap without atomic operations. + for(; n > 0; n--, p += size, type_data+=type_data_inc) { + uintptr off, *bitp, shift, bits; - // Stamp newly unused spans. The scavenger will use that - // info to potentially give back some pages to the OS. - if(s->state == MSpanFree && s->unusedsince == 0) - s->unusedsince = now; + off = (uintptr*)p - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + bits = *bitp>>shift; - if(s->state != MSpanInUse) + if((bits & bitAllocated) == 0) continue; - p = (byte*)(s->start << PageShift); - cl = s->sizeclass; + if((bits & bitMarked) != 0) { + if(DebugMark) { + if(!(bits & bitSpecial)) + runtime·printf("found spurious mark on %p\n", p); + *bitp &= ~(bitSpecial<<shift); + } + *bitp &= ~(bitMarked<<shift); + continue; + } + + // Special means it has a finalizer or is being profiled. + // In DebugMark mode, the bit has been coopted so + // we have to assume all blocks are special. + if(DebugMark || (bits & bitSpecial) != 0) { + if(handlespecial(p, size)) + continue; + } + + // Mark freed; restore block boundary bit. + *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + if(cl == 0) { - size = s->npages<<PageShift; - n = 1; + // Free large span. + runtime·unmarkspan(p, 1<<PageShift); + *(uintptr*)p = 1; // needs zeroing + runtime·MHeap_Free(runtime·mheap, s, 1); + c->local_alloc -= size; + c->local_nfree++; } else { - // Chunk full of small blocks. - size = runtime·class_to_size[cl]; - npages = runtime·class_to_allocnpages[cl]; - n = (npages << PageShift) / size; + // Free small object. + switch(compression) { + case MTypes_Words: + *(uintptr*)type_data = 0; + break; + case MTypes_Bytes: + *(byte*)type_data = 0; + break; + } + if(size > sizeof(uintptr)) + ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + + end->next = (MLink*)p; + end = (MLink*)p; + nfree++; } + } - // Sweep through n objects of given size starting at p. - // This thread owns the span now, so it can manipulate - // the block bitmap without atomic operations. - for(; n > 0; n--, p += size) { - uintptr off, *bitp, shift, bits; + if(nfree) { + c->local_by_size[cl].nfree += nfree; + c->local_alloc -= size * nfree; + c->local_nfree += nfree; + c->local_cachealloc -= nfree * size; + c->local_objects -= nfree; + runtime·MCentral_FreeSpan(&runtime·mheap->central[cl], s, nfree, head.next, end); + } +} - off = (uintptr*)p - (uintptr*)arena_start; - bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; - shift = off % wordsPerBitmapWord; - bits = *bitp>>shift; +static void +dumpspan(uint32 idx) +{ + int32 sizeclass, n, npages, i, column; + uintptr size; + byte *p; + byte *arena_start; + MSpan *s; + bool allocated, special; - if((bits & bitAllocated) == 0) - continue; + s = runtime·mheap->allspans[idx]; + if(s->state != MSpanInUse) + return; + arena_start = runtime·mheap->arena_start; + p = (byte*)(s->start << PageShift); + sizeclass = s->sizeclass; + size = s->elemsize; + if(sizeclass == 0) { + n = 1; + } else { + npages = runtime·class_to_allocnpages[sizeclass]; + n = (npages << PageShift) / size; + } + + runtime·printf("%p .. %p:\n", p, p+n*size); + column = 0; + for(; n>0; n--, p+=size) { + uintptr off, *bitp, shift, bits; - if((bits & bitMarked) != 0) { - if(DebugMark) { - if(!(bits & bitSpecial)) - runtime·printf("found spurious mark on %p\n", p); - *bitp &= ~(bitSpecial<<shift); - } - *bitp &= ~(bitMarked<<shift); - continue; - } + off = (uintptr*)p - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + bits = *bitp>>shift; - // Special means it has a finalizer or is being profiled. - // In DebugMark mode, the bit has been coopted so - // we have to assume all blocks are special. - if(DebugMark || (bits & bitSpecial) != 0) { - if(handlespecial(p, size)) - continue; + allocated = ((bits & bitAllocated) != 0); + special = ((bits & bitSpecial) != 0); + + for(i=0; i<size; i+=sizeof(void*)) { + if(column == 0) { + runtime·printf("\t"); + } + if(i == 0) { + runtime·printf(allocated ? "(" : "["); + runtime·printf(special ? "@" : ""); + runtime·printf("%p: ", p+i); + } else { + runtime·printf(" "); } - // Mark freed; restore block boundary bit. - *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + runtime·printf("%p", *(void**)(p+i)); - c = m->mcache; - if(s->sizeclass == 0) { - // Free large span. - runtime·unmarkspan(p, 1<<PageShift); - *(uintptr*)p = 1; // needs zeroing - runtime·MHeap_Free(&runtime·mheap, s, 1); - } else { - // Free small object. - if(size > sizeof(uintptr)) - ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" - c->local_by_size[s->sizeclass].nfree++; - runtime·MCache_Free(c, p, s->sizeclass, size); + if(i+sizeof(void*) >= size) { + runtime·printf(allocated ? ") " : "] "); + } + + column++; + if(column == 8) { + runtime·printf("\n"); + column = 0; } - c->local_alloc -= size; - c->local_nfree++; } } + runtime·printf("\n"); +} + +// A debugging function to dump the contents of memory +void +runtime·memorydump(void) +{ + uint32 spanidx; + + for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) { + dumpspan(spanidx); + } } void runtime·gchelper(void) { - // Wait until main proc is ready for mark help. - runtime·lock(&work.markgate); - runtime·unlock(&work.markgate); - scanblock(nil, 0); + // parallel mark for over gc roots + runtime·parfordo(work.markfor); + + // help other threads scan secondary blocks + scanblock(nil, nil, 0, true); - // Wait until main proc is ready for sweep help. - runtime·lock(&work.sweepgate); - runtime·unlock(&work.sweepgate); - sweep(); + if(DebugMark) { + // wait while the main thread executes mark(debug_scanblock) + while(runtime·atomicload(&work.debugmarkdone) == 0) + runtime·usleep(10); + } + runtime·parfordo(work.sweepfor); if(runtime·xadd(&work.ndone, +1) == work.nproc-1) runtime·notewakeup(&work.alldone); } +#define GcpercentUnknown (-2) + // Initialized from $GOGC. GOGC=off means no gc. // // Next gc is after we've allocated an extra amount of @@ -840,33 +1683,36 @@ runtime·gchelper(void) // proportion to the allocation cost. Adjusting gcpercent // just changes the linear constant (and also the amount of // extra memory used). -static int32 gcpercent = -2; +static int32 gcpercent = GcpercentUnknown; static void -stealcache(void) +cachestats(GCStats *stats) { - M *m; - - for(m=runtime·allm; m; m=m->alllink) - runtime·MCache_ReleaseAll(m->mcache); -} - -static void -cachestats(void) -{ - M *m; + M *mp; MCache *c; + P *p, **pp; int32 i; uint64 stacks_inuse; - uint64 stacks_sys; + uint64 *src, *dst; + if(stats) + runtime·memclr((byte*)stats, sizeof(*stats)); stacks_inuse = 0; - stacks_sys = 0; - for(m=runtime·allm; m; m=m->alllink) { - runtime·purgecachedstats(m); - stacks_inuse += m->stackalloc->inuse; - stacks_sys += m->stackalloc->sys; - c = m->mcache; + for(mp=runtime·allm; mp; mp=mp->alllink) { + stacks_inuse += mp->stackinuse*FixedStack; + if(stats) { + src = (uint64*)&mp->gcstats; + dst = (uint64*)stats; + for(i=0; i<sizeof(*stats)/sizeof(uint64); i++) + dst[i] += src[i]; + runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); + } + } + for(pp=runtime·allp; p=*pp; pp++) { + c = p->mcache; + if(c==nil) + continue; + runtime·purgecachedstats(c); for(i=0; i<nelem(c->local_by_size); i++) { mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; c->local_by_size[i].nmalloc = 0; @@ -875,16 +1721,42 @@ cachestats(void) } } mstats.stacks_inuse = stacks_inuse; - mstats.stacks_sys = stacks_sys; +} + +// Structure of arguments passed to function gc(). +// This allows the arguments to be passed via reflect·call. +struct gc_args +{ + int32 force; +}; + +static void gc(struct gc_args *args); + +static int32 +readgogc(void) +{ + byte *p; + + p = runtime·getenv("GOGC"); + if(p == nil || p[0] == '\0') + return 100; + if(runtime·strcmp(p, (byte*)"off") == 0) + return -1; + return runtime·atoi(p); } void runtime·gc(int32 force) { - int64 t0, t1, t2, t3; - uint64 heap0, heap1, obj0, obj1; byte *p; - bool extra; + struct gc_args a, *ap; + FuncVal gcv; + + // The atomic operations are not atomic if the uint64s + // are not aligned on uint64 boundaries. This has been + // a problem in the past. + if((((uintptr)&work.empty) & 7) != 0) + runtime·throw("runtime: gc work buffer is misaligned"); // The gc is turned off (via enablegc) until // the bootstrap has completed. @@ -897,14 +1769,8 @@ runtime·gc(int32 force) if(!mstats.enablegc || m->locks > 0 || runtime·panicking) return; - if(gcpercent == -2) { // first time through - p = runtime·getenv("GOGC"); - if(p == nil || p[0] == '\0') - gcpercent = 100; - else if(runtime·strcmp(p, (byte*)"off") == 0) - gcpercent = -1; - else - gcpercent = runtime·atoi(p); + if(gcpercent == GcpercentUnknown) { // first time through + gcpercent = readgogc(); p = runtime·getenv("GOGCTRACE"); if(p != nil) @@ -913,103 +1779,171 @@ runtime·gc(int32 force) if(gcpercent < 0) return; + // Run gc on a bigger stack to eliminate + // a potentially large number of calls to runtime·morestack. + a.force = force; + ap = &a; + m->moreframesize_minalloc = StackBig; + gcv.fn = (void*)gc; + reflect·call(&gcv, (byte*)&ap, sizeof(ap)); + + if(gctrace > 1 && !force) { + a.force = 1; + gc(&a); + } +} + +static FuncVal runfinqv = {runfinq}; + +static void +gc(struct gc_args *args) +{ + int64 t0, t1, t2, t3, t4; + uint64 heap0, heap1, obj0, obj1, ninstr; + GCStats stats; + M *mp; + uint32 i; + Eface eface; + runtime·semacquire(&runtime·worldsema); - if(!force && mstats.heap_alloc < mstats.next_gc) { + if(!args->force && mstats.heap_alloc < mstats.next_gc) { runtime·semrelease(&runtime·worldsema); return; } t0 = runtime·nanotime(); - nhandoff = 0; m->gcing = 1; runtime·stoptheworld(); - cachestats(); - heap0 = mstats.heap_alloc; - obj0 = mstats.nmalloc - mstats.nfree; + if(CollectStats) + runtime·memclr((byte*)&gcstats, sizeof(gcstats)); - runtime·lock(&work.markgate); - runtime·lock(&work.sweepgate); + for(mp=runtime·allm; mp; mp=mp->alllink) + runtime·settype_flush(mp, false); - extra = false; - work.nproc = 1; - if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) { - runtime·noteclear(&work.alldone); - work.nproc += runtime·helpgc(&extra); + heap0 = 0; + obj0 = 0; + if(gctrace) { + cachestats(nil); + heap0 = mstats.heap_alloc; + obj0 = mstats.nmalloc - mstats.nfree; } + + m->locks++; // disable gc during mallocs in parforalloc + if(work.markfor == nil) + work.markfor = runtime·parforalloc(MaxGcproc); + if(work.sweepfor == nil) + work.sweepfor = runtime·parforalloc(MaxGcproc); + m->locks--; + + if(itabtype == nil) { + // get C pointer to the Go type "itab" + runtime·gc_itab_ptr(&eface); + itabtype = ((PtrType*)eface.type)->elem; + } + work.nwait = 0; work.ndone = 0; + work.debugmarkdone = 0; + work.nproc = runtime·gcprocs(); + addroots(); + runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); + runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap->nspan, nil, true, sweepspan); + if(work.nproc > 1) { + runtime·noteclear(&work.alldone); + runtime·helpgc(work.nproc); + } - runtime·unlock(&work.markgate); // let the helpers in - mark(scanblock); - if(DebugMark) - mark(debug_scanblock); t1 = runtime·nanotime(); - work.spans = runtime·mheap.allspans; - runtime·unlock(&work.sweepgate); // let the helpers in - sweep(); + runtime·parfordo(work.markfor); + scanblock(nil, nil, 0, true); + + if(DebugMark) { + for(i=0; i<work.nroot; i++) + debug_scanblock(work.roots[i].p, work.roots[i].n); + runtime·atomicstore(&work.debugmarkdone, 1); + } + t2 = runtime·nanotime(); + + runtime·parfordo(work.sweepfor); + t3 = runtime·nanotime(); + if(work.nproc > 1) runtime·notesleep(&work.alldone); - t2 = runtime·nanotime(); - stealcache(); - cachestats(); + cachestats(&stats); + + stats.nprocyield += work.sweepfor->nprocyield; + stats.nosyield += work.sweepfor->nosyield; + stats.nsleep += work.sweepfor->nsleep; mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; m->gcing = 0; - m->locks++; // disable gc during the mallocs in newproc if(finq != nil) { + m->locks++; // disable gc during the mallocs in newproc // kick off or wake up goroutine to run queued finalizers if(fing == nil) - fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc); + fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); else if(fingwait) { fingwait = 0; runtime·ready(fing); } + m->locks--; } - m->locks--; - cachestats(); heap1 = mstats.heap_alloc; obj1 = mstats.nmalloc - mstats.nfree; - t3 = runtime·nanotime(); - mstats.last_gc = t3; - mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; - mstats.pause_total_ns += t3 - t0; + t4 = runtime·nanotime(); + mstats.last_gc = t4; + mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; + mstats.pause_total_ns += t4 - t0; mstats.numgc++; if(mstats.debuggc) - runtime·printf("pause %D\n", t3-t0); + runtime·printf("pause %D\n", t4-t0); if(gctrace) { - runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n", - mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, + runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," + " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", + mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, heap0>>20, heap1>>20, obj0, obj1, mstats.nmalloc, mstats.nfree, - nhandoff); + stats.nhandoff, stats.nhandoffcnt, + work.sweepfor->nsteal, work.sweepfor->nstealcnt, + stats.nprocyield, stats.nosyield, stats.nsleep); + if(CollectStats) { + runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n", + gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup); + if(gcstats.ptr.cnt != 0) + runtime·printf("avg ptrbufsize: %D (%D/%D)\n", + gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt); + if(gcstats.obj.cnt != 0) + runtime·printf("avg nobj: %D (%D/%D)\n", + gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt); + runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes); + + runtime·printf("instruction counts:\n"); + ninstr = 0; + for(i=0; i<nelem(gcstats.instr); i++) { + runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]); + ninstr += gcstats.instr[i]; + } + runtime·printf("\ttotal:\t%D\n", ninstr); + + runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); + } } - + runtime·MProf_GC(); runtime·semrelease(&runtime·worldsema); + runtime·starttheworld(); - // If we could have used another helper proc, start one now, - // in the hope that it will be available next time. - // It would have been even better to start it before the collection, - // but doing so requires allocating memory, so it's tricky to - // coordinate. This lazy approach works out in practice: - // we don't mind if the first couple gc rounds don't have quite - // the maximum number of procs. - runtime·starttheworld(extra); - - // give the queued finalizers, if any, a chance to run - if(finq != nil) + // give the queued finalizers, if any, a chance to run + if(finq != nil) runtime·gosched(); - - if(gctrace > 1 && !force) - runtime·gc(1); } void @@ -1022,11 +1956,56 @@ runtime·ReadMemStats(MStats *stats) runtime·semacquire(&runtime·worldsema); m->gcing = 1; runtime·stoptheworld(); - cachestats(); + cachestats(nil); *stats = mstats; m->gcing = 0; runtime·semrelease(&runtime·worldsema); - runtime·starttheworld(false); + runtime·starttheworld(); +} + +void +runtime∕debug·readGCStats(Slice *pauses) +{ + uint64 *p; + uint32 i, n; + + // Calling code in runtime/debug should make the slice large enough. + if(pauses->cap < nelem(mstats.pause_ns)+3) + runtime·throw("runtime: short slice passed to readGCStats"); + + // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. + p = (uint64*)pauses->array; + runtime·lock(runtime·mheap); + n = mstats.numgc; + if(n > nelem(mstats.pause_ns)) + n = nelem(mstats.pause_ns); + + // The pause buffer is circular. The most recent pause is at + // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward + // from there to go back farther in time. We deliver the times + // most recent first (in p[0]). + for(i=0; i<n; i++) + p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)]; + + p[n] = mstats.last_gc; + p[n+1] = mstats.numgc; + p[n+2] = mstats.pause_total_ns; + runtime·unlock(runtime·mheap); + pauses->len = n+3; +} + +void +runtime∕debug·setGCPercent(intgo in, intgo out) +{ + runtime·lock(runtime·mheap); + if(gcpercent == GcpercentUnknown) + gcpercent = readgogc(); + out = gcpercent; + if(in < 0) + in = -1; + gcpercent = in; + runtime·unlock(runtime·mheap); + FLUSH(&out); } static void @@ -1050,11 +2029,11 @@ runfinq(void) finq = nil; if(fb == nil) { fingwait = 1; - g->status = Gwaiting; - g->waitreason = "finalizer wait"; - runtime·gosched(); + runtime·park(nil, nil, "finalizer wait"); continue; } + if(raceenabled) + runtime·racefingo(); for(; fb; fb=next) { next = fb->next; for(i=0; i<fb->cnt; i++) { @@ -1066,7 +2045,7 @@ runfinq(void) framecap = framesz; } *(void**)frame = f->arg; - reflect·call((byte*)f->fn, frame, sizeof(uintptr) + f->nret); + reflect·call(f->fn, frame, sizeof(uintptr) + f->nret); f->fn = nil; f->arg = nil; } @@ -1088,11 +2067,11 @@ runtime·markallocated(void *v, uintptr n, bool noptr) if(0) runtime·printf("markallocated %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) runtime·throw("markallocated: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { @@ -1120,11 +2099,11 @@ runtime·markfreed(void *v, uintptr n) if(0) runtime·printf("markallocated %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) runtime·throw("markallocated: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { @@ -1150,11 +2129,11 @@ runtime·checkfreed(void *v, uintptr n) if(!runtime·checking) return; - if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) return; // not allocated, so okay - off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; bits = *b>>shift; @@ -1173,7 +2152,7 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) uintptr *b, off, shift; byte *p; - if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + if((byte*)v+size*n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) runtime·throw("markspan: bad pointer"); p = v; @@ -1184,8 +2163,8 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) // the entire span, and each bitmap word has bits for only // one span, so no other goroutines are changing these // bitmap words. - off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)p - (uintptr*)runtime·mheap->arena_start; // word offset + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); } @@ -1197,14 +2176,14 @@ runtime·unmarkspan(void *v, uintptr n) { uintptr *p, *b, off; - if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start) runtime·throw("markspan: bad pointer"); p = v; - off = p - (uintptr*)runtime·mheap.arena_start; // word offset + off = p - (uintptr*)runtime·mheap->arena_start; // word offset if(off % wordsPerBitmapWord != 0) runtime·throw("markspan: unaligned pointer"); - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; n /= PtrSize; if(n%wordsPerBitmapWord != 0) runtime·throw("unmarkspan: unaligned length"); @@ -1225,8 +2204,8 @@ runtime·blockspecial(void *v) if(DebugMark) return true; - off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; return (*b & (bitSpecial<<shift)) != 0; @@ -1240,8 +2219,8 @@ runtime·setblockspecial(void *v, bool s) if(DebugMark) return; - off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; - b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; + b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { diff --git a/src/pkg/runtime/mgc0.go b/src/pkg/runtime/mgc0.go new file mode 100644 index 000000000..b15054662 --- /dev/null +++ b/src/pkg/runtime/mgc0.go @@ -0,0 +1,15 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +// Called from C. Returns the Go type *m. +func gc_m_ptr(ret *interface{}) { + *ret = (*m)(nil) +} + +// Called from C. Returns the Go type *itab. +func gc_itab_ptr(ret *interface{}) { + *ret = (*itab)(nil) +} diff --git a/src/pkg/runtime/mgc0.h b/src/pkg/runtime/mgc0.h new file mode 100644 index 000000000..87b604a36 --- /dev/null +++ b/src/pkg/runtime/mgc0.h @@ -0,0 +1,43 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector (GC) + +// GC instruction opcodes. +// +// The opcode of an instruction is followed by zero or more +// arguments to the instruction. +// +// Meaning of arguments: +// off Offset (in bytes) from the start of the current object +// objgc Pointer to GC info of an object +// objgcrel Offset to GC info of an object +// len Length of an array +// elemsize Size (in bytes) of an element +// size Size (in bytes) +enum { + GC_END, // End of object, loop or subroutine. Args: none + GC_PTR, // A typed pointer. Args: (off, objgc) + GC_APTR, // Pointer to an arbitrary object. Args: (off) + GC_ARRAY_START, // Start an array with a fixed length. Args: (off, len, elemsize) + GC_ARRAY_NEXT, // The next element of an array. Args: none + GC_CALL, // Call a subroutine. Args: (off, objgcrel) + GC_MAP_PTR, // Go map. Args: (off, MapType*) + GC_STRING, // Go string. Args: (off) + GC_EFACE, // interface{}. Args: (off) + GC_IFACE, // interface{...}. Args: (off) + GC_SLICE, // Go slice. Args: (off, objgc) + GC_REGION, // A region/part of the current object. Args: (off, size, objgc) + + GC_NUM_INSTR, // Number of instruction opcodes +}; + +enum { + // Size of GC's fixed stack. + // + // The current GC implementation permits: + // - at most 1 stack allocation because of GC_CALL + // - at most GC_STACK_CAPACITY allocations because of GC_ARRAY_START + GC_STACK_CAPACITY = 8, +}; diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index c877bfca9..f45149d63 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c @@ -27,11 +27,26 @@ RecordSpan(void *vh, byte *p) { MHeap *h; MSpan *s; + MSpan **all; + uint32 cap; h = vh; s = (MSpan*)p; - s->allnext = h->allspans; - h->allspans = s; + if(h->nspan >= h->nspancap) { + cap = 64*1024/sizeof(all[0]); + if(cap < h->nspancap*3/2) + cap = h->nspancap*3/2; + all = (MSpan**)runtime·SysAlloc(cap*sizeof(all[0])); + if(all == nil) + runtime·throw("runtime: cannot allocate memory"); + if(h->allspans) { + runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0])); + runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0])); + } + h->allspans = all; + h->nspancap = cap; + } + h->allspans[h->nspan++] = s; } // Initialize the heap; fetch memory using alloc. @@ -53,12 +68,12 @@ runtime·MHeap_Init(MHeap *h, void *(*alloc)(uintptr)) // Allocate a new span of npage pages from the heap // and record its size class in the HeapMap and HeapMapCache. MSpan* -runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct) +runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed) { MSpan *s; runtime·lock(h); - runtime·purgecachedstats(m); + runtime·purgecachedstats(m->mcache); s = MHeap_AllocLocked(h, npage, sizeclass); if(s != nil) { mstats.heap_inuse += npage<<PageShift; @@ -68,6 +83,8 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct) } } runtime·unlock(h); + if(s != nil && *(uintptr*)(s->start<<PageShift) != 0 && zeroed) + runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift); return s; } @@ -123,14 +140,15 @@ HaveSpan: *(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark t->state = MSpanInUse; MHeap_FreeLocked(h, t); + t->unusedsince = s->unusedsince; // preserve age } - - if(*(uintptr*)(s->start<<PageShift) != 0) - runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift); + s->unusedsince = 0; // Record span info, because gc needs to be // able to map interior pointer to containing span. s->sizeclass = sizeclass; + s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]); + s->types.compression = MTypes_Empty; p = s->start; if(sizeof(void*) == 8) p -= ((uintptr)h->arena_start>>PageShift); @@ -259,7 +277,7 @@ void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct) { runtime·lock(h); - runtime·purgecachedstats(m); + runtime·purgecachedstats(m->mcache); mstats.heap_inuse -= s->npages<<PageShift; if(acct) { mstats.heap_alloc -= s->npages<<PageShift; @@ -276,16 +294,22 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) MSpan *t; PageID p; + if(s->types.sysalloc) + runtime·settype_sysfree(s); + s->types.compression = MTypes_Empty; + if(s->state != MSpanInUse || s->ref != 0) { runtime·printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref); runtime·throw("MHeap_FreeLocked - invalid free"); } mstats.heap_idle += s->npages<<PageShift; s->state = MSpanFree; - s->unusedsince = 0; - s->npreleased = 0; runtime·MSpanList_Remove(s); sp = (uintptr*)(s->start<<PageShift); + // Stamp newly unused spans. The scavenger will use that + // info to potentially give back some pages to the OS. + s->unusedsince = runtime·nanotime(); + s->npreleased = 0; // Coalesce with earlier, later spans. p = s->start; @@ -325,6 +349,52 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) runtime·MSpanList_Insert(&h->large, s); } +static void +forcegchelper(Note *note) +{ + runtime·gc(1); + runtime·notewakeup(note); +} + +static uintptr +scavengelist(MSpan *list, uint64 now, uint64 limit) +{ + uintptr released, sumreleased; + MSpan *s; + + if(runtime·MSpanList_IsEmpty(list)) + return 0; + + sumreleased = 0; + for(s=list->next; s != list; s=s->next) { + if((now - s->unusedsince) > limit) { + released = (s->npages - s->npreleased) << PageShift; + mstats.heap_released += released; + sumreleased += released; + s->npreleased = s->npages; + runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift); + } + } + return sumreleased; +} + +static uintptr +scavenge(uint64 now, uint64 limit) +{ + uint32 i; + uintptr sumreleased; + MHeap *h; + + h = runtime·mheap; + sumreleased = 0; + for(i=0; i < nelem(h->free); i++) + sumreleased += scavengelist(&h->free[i], now, limit); + sumreleased += scavengelist(&h->large, now, limit); + return sumreleased; +} + +static FuncVal forcegchelperv = {(void(*)(void))forcegchelper}; + // Release (part of) unused memory to OS. // Goroutine created at startup. // Loop forever. @@ -332,13 +402,12 @@ void runtime·MHeap_Scavenger(void) { MHeap *h; - MSpan *s, *list; uint64 tick, now, forcegc, limit; - uint32 k, i; - uintptr released, sumreleased; + uint32 k; + uintptr sumreleased; byte *env; bool trace; - Note note; + Note note, *notep; // If we go two minutes without a garbage collection, force one to run. forcegc = 2*60*1e9; @@ -356,10 +425,10 @@ runtime·MHeap_Scavenger(void) if(env != nil) trace = runtime·atoi(env) > 0; - h = &runtime·mheap; + h = runtime·mheap; for(k=0;; k++) { runtime·noteclear(¬e); - runtime·entersyscall(); + runtime·entersyscallblock(); runtime·notetsleep(¬e, tick); runtime·exitsyscall(); @@ -367,30 +436,21 @@ runtime·MHeap_Scavenger(void) now = runtime·nanotime(); if(now - mstats.last_gc > forcegc) { runtime·unlock(h); - runtime·gc(1); + // The scavenger can not block other goroutines, + // otherwise deadlock detector can fire spuriously. + // GC blocks other goroutines via the runtime·worldsema. + runtime·noteclear(¬e); + notep = ¬e; + runtime·newproc1(&forcegchelperv, (byte*)¬ep, sizeof(notep), 0, runtime·MHeap_Scavenger); + runtime·entersyscallblock(); + runtime·notesleep(¬e); + runtime·exitsyscall(); + if(trace) + runtime·printf("scvg%d: GC forced\n", k); runtime·lock(h); now = runtime·nanotime(); - if (trace) - runtime·printf("scvg%d: GC forced\n", k); - } - sumreleased = 0; - for(i=0; i < nelem(h->free)+1; i++) { - if(i < nelem(h->free)) - list = &h->free[i]; - else - list = &h->large; - if(runtime·MSpanList_IsEmpty(list)) - continue; - for(s=list->next; s != list; s=s->next) { - if(s->unusedsince != 0 && (now - s->unusedsince) > limit) { - released = (s->npages - s->npreleased) << PageShift; - mstats.heap_released += released; - sumreleased += released; - s->npreleased = s->npages; - runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift); - } - } } + sumreleased = scavenge(now, limit); runtime·unlock(h); if(trace) { @@ -403,6 +463,15 @@ runtime·MHeap_Scavenger(void) } } +void +runtime∕debug·freeOSMemory(void) +{ + runtime·gc(1); + runtime·lock(runtime·mheap); + scavenge(~(uintptr)0, 0); + runtime·unlock(runtime·mheap); +} + // Initialize a new span with the given start and npages. void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages) @@ -414,9 +483,11 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages) span->freelist = nil; span->ref = 0; span->sizeclass = 0; + span->elemsize = 0; span->state = 0; span->unusedsince = 0; span->npreleased = 0; + span->types.compression = MTypes_Empty; } // Initialize an empty doubly-linked list. diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc index 0bbce8583..ebc1e3e66 100644 --- a/src/pkg/runtime/mprof.goc +++ b/src/pkg/runtime/mprof.goc @@ -13,23 +13,73 @@ package runtime #include "type.h" // NOTE(rsc): Everything here could use cas if contention became an issue. -static Lock proflock; +static Lock proflock, alloclock; -// Per-call-stack allocation information. +// All memory allocations are local and do not escape outside of the profiler. +// The profiler is forbidden from referring to garbage-collected memory. + +static byte *pool; // memory allocation pool +static uintptr poolfree; // number of bytes left in the pool +enum { + Chunk = 32*PageSize, // initial size of the pool +}; + +// Memory allocation local to this file. +// There is no way to return the allocated memory back to the OS. +static void* +allocate(uintptr size) +{ + void *v; + + if(size == 0) + return nil; + + if(size >= Chunk/2) + return runtime·SysAlloc(size); + + runtime·lock(&alloclock); + if(size > poolfree) { + pool = runtime·SysAlloc(Chunk); + if(pool == nil) + runtime·throw("runtime: cannot allocate memory"); + poolfree = Chunk; + } + v = pool; + pool += size; + poolfree -= size; + runtime·unlock(&alloclock); + return v; +} + +enum { MProf, BProf }; // profile types + +// Per-call-stack profiling information. // Lookup by hashing call stack into a linked-list hash table. typedef struct Bucket Bucket; struct Bucket { Bucket *next; // next in hash list - Bucket *allnext; // next in list of all buckets - uintptr allocs; - uintptr frees; - uintptr alloc_bytes; - uintptr free_bytes; - uintptr recent_allocs; // since last gc - uintptr recent_frees; - uintptr recent_alloc_bytes; - uintptr recent_free_bytes; + Bucket *allnext; // next in list of all mbuckets/bbuckets + int32 typ; + union + { + struct // typ == MProf + { + uintptr allocs; + uintptr frees; + uintptr alloc_bytes; + uintptr free_bytes; + uintptr recent_allocs; // since last gc + uintptr recent_frees; + uintptr recent_alloc_bytes; + uintptr recent_free_bytes; + }; + struct // typ == BProf + { + int64 count; + int64 cycles; + }; + }; uintptr hash; uintptr nstk; uintptr stk[1]; @@ -38,12 +88,13 @@ enum { BuckHashSize = 179999, }; static Bucket **buckhash; -static Bucket *buckets; +static Bucket *mbuckets; // memory profile buckets +static Bucket *bbuckets; // blocking profile buckets static uintptr bucketmem; // Return the bucket for stk[0:nstk], allocating new bucket if needed. static Bucket* -stkbucket(uintptr *stk, int32 nstk, bool alloc) +stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) { int32 i; uintptr h; @@ -51,6 +102,8 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc) if(buckhash == nil) { buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]); + if(buckhash == nil) + runtime·throw("runtime: cannot allocate memory"); mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; } @@ -66,33 +119,39 @@ stkbucket(uintptr *stk, int32 nstk, bool alloc) i = h%BuckHashSize; for(b = buckhash[i]; b; b=b->next) - if(b->hash == h && b->nstk == nstk && + if(b->typ == typ && b->hash == h && b->nstk == nstk && runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) return b; if(!alloc) return nil; - b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); + b = allocate(sizeof *b + nstk*sizeof stk[0]); + if(b == nil) + runtime·throw("runtime: cannot allocate memory"); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); + b->typ = typ; b->hash = h; b->nstk = nstk; b->next = buckhash[i]; buckhash[i] = b; - b->allnext = buckets; - buckets = b; + if(typ == MProf) { + b->allnext = mbuckets; + mbuckets = b; + } else { + b->allnext = bbuckets; + bbuckets = b; + } return b; } -// Record that a gc just happened: all the 'recent' statistics are now real. -void -runtime·MProf_GC(void) +static void +MProf_GC(void) { Bucket *b; - - runtime·lock(&proflock); - for(b=buckets; b; b=b->allnext) { + + for(b=mbuckets; b; b=b->allnext) { b->allocs += b->recent_allocs; b->frees += b->recent_frees; b->alloc_bytes += b->recent_alloc_bytes; @@ -102,25 +161,39 @@ runtime·MProf_GC(void) b->recent_alloc_bytes = 0; b->recent_free_bytes = 0; } +} + +// Record that a gc just happened: all the 'recent' statistics are now real. +void +runtime·MProf_GC(void) +{ + runtime·lock(&proflock); + MProf_GC(); runtime·unlock(&proflock); } // Map from pointer to Bucket* that allocated it. // Three levels: -// Linked-list hash table for top N-20 bits. -// Array index for next 13 bits. -// Linked list for next 7 bits. +// Linked-list hash table for top N-AddrHashShift bits. +// Array index for next AddrDenseBits bits. +// Linked list for next AddrHashShift-AddrDenseBits bits. // This is more efficient than using a general map, // because of the typical clustering of the pointer keys. typedef struct AddrHash AddrHash; typedef struct AddrEntry AddrEntry; +enum { + AddrHashBits = 12, // good for 4GB of used address space + AddrHashShift = 20, // each AddrHash knows about 1MB of address space + AddrDenseBits = 8, // good for a profiling rate of 4096 bytes +}; + struct AddrHash { AddrHash *next; // next in top-level hash table linked list uintptr addr; // addr>>20 - AddrEntry *dense[1<<13]; + AddrEntry *dense[1<<AddrDenseBits]; }; struct AddrEntry @@ -130,10 +203,7 @@ struct AddrEntry Bucket *b; }; -enum { - AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space -}; -static AddrHash *addrhash[1<<AddrHashBits]; +static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] static AddrEntry *addrfree; static uintptr addrmem; @@ -155,29 +225,29 @@ setaddrbucket(uintptr addr, Bucket *b) AddrHash *ah; AddrEntry *e; - h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); + h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); for(ah=addrhash[h]; ah; ah=ah->next) - if(ah->addr == (addr>>20)) + if(ah->addr == (addr>>AddrHashShift)) goto found; - ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); + ah = allocate(sizeof *ah); addrmem += sizeof *ah; ah->next = addrhash[h]; - ah->addr = addr>>20; + ah->addr = addr>>AddrHashShift; addrhash[h] = ah; found: if((e = addrfree) == nil) { - e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); + e = allocate(64*sizeof *e); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; e[63].next = nil; } addrfree = e->next; - e->addr = (uint32)~(addr & ((1<<20)-1)); + e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); e->b = b; - h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. + h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. e->next = ah->dense[h]; ah->dense[h] = e; } @@ -191,16 +261,16 @@ getaddrbucket(uintptr addr) AddrEntry *e, **l; Bucket *b; - h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); + h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); for(ah=addrhash[h]; ah; ah=ah->next) - if(ah->addr == (addr>>20)) + if(ah->addr == (addr>>AddrHashShift)) goto found; return nil; found: - h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. + h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { - if(e->addr == (uint32)~(addr & ((1<<20)-1))) { + if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { *l = e->next; b = e->b; e->next = addrfree; @@ -225,7 +295,7 @@ runtime·MProf_Malloc(void *p, uintptr size) m->nomemprof++; nstk = runtime·callers(1, stk, 32); runtime·lock(&proflock); - b = stkbucket(stk, nstk, true); + b = stkbucket(MProf, stk, nstk, true); b->recent_allocs++; b->recent_alloc_bytes += size; setaddrbucket((uintptr)p, b); @@ -253,9 +323,37 @@ runtime·MProf_Free(void *p, uintptr size) m->nomemprof--; } +int64 runtime·blockprofilerate; // in CPU ticks + +void +runtime·SetBlockProfileRate(intgo rate) +{ + runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000)); +} + +void +runtime·blockevent(int64 cycles, int32 skip) +{ + int32 nstk; + int64 rate; + uintptr stk[32]; + Bucket *b; -// Go interface to profile data. (Declared in extern.go) -// Assumes Go sizeof(int) == sizeof(int32) + if(cycles <= 0) + return; + rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate); + if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) + return; + + nstk = runtime·callers(skip, stk, 32); + runtime·lock(&proflock); + b = stkbucket(BProf, stk, nstk, true); + b->count++; + b->cycles += cycles; + runtime·unlock(&proflock); +} + +// Go interface to profile data. (Declared in debug.go) // Must match MemProfileRecord in debug.go. typedef struct Record Record; @@ -281,52 +379,101 @@ record(Record *r, Bucket *b) r->stk[i] = 0; } -func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) { +func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { Bucket *b; Record *r; + bool clear; runtime·lock(&proflock); n = 0; - for(b=buckets; b; b=b->allnext) + clear = true; + for(b=mbuckets; b; b=b->allnext) { if(include_inuse_zero || b->alloc_bytes != b->free_bytes) n++; + if(b->allocs != 0 || b->frees != 0) + clear = false; + } + if(clear) { + // Absolutely no data, suggesting that a garbage collection + // has not yet happened. In order to allow profiling when + // garbage collection is disabled from the beginning of execution, + // accumulate stats as if a GC just happened, and recount buckets. + MProf_GC(); + n = 0; + for(b=mbuckets; b; b=b->allnext) + if(include_inuse_zero || b->alloc_bytes != b->free_bytes) + n++; + } ok = false; if(n <= p.len) { ok = true; r = (Record*)p.array; - for(b=buckets; b; b=b->allnext) + for(b=mbuckets; b; b=b->allnext) if(include_inuse_zero || b->alloc_bytes != b->free_bytes) record(r++, b); } runtime·unlock(&proflock); } +// Must match BlockProfileRecord in debug.go. +typedef struct BRecord BRecord; +struct BRecord { + int64 count; + int64 cycles; + uintptr stk[32]; +}; + +func BlockProfile(p Slice) (n int, ok bool) { + Bucket *b; + BRecord *r; + int32 i; + + runtime·lock(&proflock); + n = 0; + for(b=bbuckets; b; b=b->allnext) + n++; + ok = false; + if(n <= p.len) { + ok = true; + r = (BRecord*)p.array; + for(b=bbuckets; b; b=b->allnext, r++) { + r->count = b->count; + r->cycles = b->cycles; + for(i=0; i<b->nstk && i<nelem(r->stk); i++) + r->stk[i] = b->stk[i]; + for(; i<nelem(r->stk); i++) + r->stk[i] = 0; + } + } + runtime·unlock(&proflock); +} + // Must match StackRecord in debug.go. typedef struct TRecord TRecord; struct TRecord { uintptr stk[32]; }; -func ThreadCreateProfile(p Slice) (n int32, ok bool) { +func ThreadCreateProfile(p Slice) (n int, ok bool) { TRecord *r; - M *first, *m; + M *first, *mp; first = runtime·atomicloadp(&runtime·allm); n = 0; - for(m=first; m; m=m->alllink) + for(mp=first; mp; mp=mp->alllink) n++; ok = false; if(n <= p.len) { ok = true; r = (TRecord*)p.array; - for(m=first; m; m=m->alllink) { - runtime·memmove(r->stk, m->createstack, sizeof r->stk); + for(mp=first; mp; mp=mp->alllink) { + runtime·memmove(r->stk, mp->createstack, sizeof r->stk); r++; } } } -func Stack(b Slice, all bool) (n int32) { +func Stack(b Slice, all bool) (n int) { byte *pc, *sp; sp = runtime·getcallersp(&b); @@ -355,21 +502,21 @@ func Stack(b Slice, all bool) (n int32) { if(all) { m->gcing = 0; runtime·semrelease(&runtime·worldsema); - runtime·starttheworld(false); + runtime·starttheworld(); } } static void -saveg(byte *pc, byte *sp, G *g, TRecord *r) +saveg(byte *pc, byte *sp, G *gp, TRecord *r) { int32 n; - n = runtime·gentraceback(pc, sp, 0, g, 0, r->stk, nelem(r->stk)); + n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk)); if(n < nelem(r->stk)) r->stk[n] = 0; } -func GoroutineProfile(b Slice) (n int32, ok bool) { +func GoroutineProfile(b Slice) (n int, ok bool) { byte *pc, *sp; TRecord *r; G *gp; @@ -392,13 +539,18 @@ func GoroutineProfile(b Slice) (n int32, ok bool) { for(gp = runtime·allg; gp != nil; gp = gp->alllink) { if(gp == g || gp->status == Gdead) continue; - saveg(gp->sched.pc, gp->sched.sp, gp, r++); + saveg(gp->sched.pc, (byte*)gp->sched.sp, gp, r++); } } m->gcing = 0; runtime·semrelease(&runtime·worldsema); - runtime·starttheworld(false); + runtime·starttheworld(); } } +void +runtime·mprofinit(void) +{ + addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash); +} diff --git a/src/pkg/runtime/os_darwin.h b/src/pkg/runtime/os_darwin.h index eb5d2daa3..5fcb717cb 100644 --- a/src/pkg/runtime/os_darwin.h +++ b/src/pkg/runtime/os_darwin.h @@ -4,9 +4,11 @@ #define SIG_DFL ((void*)0) #define SIG_IGN ((void*)1) +#define SIGHUP 1 +#define SS_DISABLE 4 int32 runtime·bsdthread_create(void*, M*, G*, void(*)(void)); -void runtime·bsdthread_register(void); +int32 runtime·bsdthread_register(void); int32 runtime·mach_msg_trap(MachHeader*, int32, uint32, uint32, uint32, uint32, uint32); uint32 runtime·mach_reply_port(void); int32 runtime·mach_semacquire(uint32, int64); diff --git a/src/pkg/runtime/os_freebsd.h b/src/pkg/runtime/os_freebsd.h index 5e8de5434..a37ad7cd8 100644 --- a/src/pkg/runtime/os_freebsd.h +++ b/src/pkg/runtime/os_freebsd.h @@ -1,5 +1,7 @@ #define SIG_DFL ((void*)0) #define SIG_IGN ((void*)1) +#define SIGHUP 1 +#define SS_DISABLE 4 int32 runtime·thr_new(ThrParam*, int32); void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp); @@ -15,7 +17,7 @@ int32 runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr); void runtime·raisesigpipe(void); #define NSIG 33 -#define SI_USER 0 +#define SI_USER 0x10001 #define RLIMIT_AS 10 typedef struct Rlimit Rlimit; diff --git a/src/pkg/runtime/os_linux.h b/src/pkg/runtime/os_linux.h index 87daa3bb1..a23fe0f73 100644 --- a/src/pkg/runtime/os_linux.h +++ b/src/pkg/runtime/os_linux.h @@ -4,13 +4,15 @@ #define SIG_DFL ((void*)0) #define SIG_IGN ((void*)1) +#define SIGHUP 1 +#define SS_DISABLE 2 // Linux-specific system calls int32 runtime·futex(uint32*, int32, uint32, Timespec*, uint32*, uint32); int32 runtime·clone(int32, void*, M*, G*, void(*)(void)); struct Sigaction; -void runtime·rt_sigaction(uintptr, struct Sigaction*, void*, uintptr); +int32 runtime·rt_sigaction(uintptr, struct Sigaction*, void*, uintptr); void runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool); void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp); diff --git a/src/pkg/runtime/os_netbsd.h b/src/pkg/runtime/os_netbsd.h index 4ecf78d88..19d72fd25 100644 --- a/src/pkg/runtime/os_netbsd.h +++ b/src/pkg/runtime/os_netbsd.h @@ -4,18 +4,29 @@ #define SIG_DFL ((void*)0) #define SIG_IGN ((void*)1) +#define SIGHUP 1 +#define SS_DISABLE 4 + +#define SIG_BLOCK 1 +#define SIG_UNBLOCK 2 +#define SIG_SETMASK 3 struct sigaction; -void runtime·sigpanic(void); -void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); -void runtime·sigaction(int32, struct sigaction*, struct sigaction*); +void runtime·raisesigpipe(void); void runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool); void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp); +void runtime·sigpanic(void); + void runtime·setitimer(int32, Itimerval*, Itimerval*); +void runtime·sigaction(int32, struct sigaction*, struct sigaction*); +void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); +void runtime·sigprocmask(int32, Sigset*, Sigset*); int32 runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr); -void runtime·raisesigpipe(void); - #define NSIG 33 #define SI_USER 0 + +// From NetBSD's <sys/ucontext.h> +#define _UC_SIGMASK 0x01 +#define _UC_CPU 0x04 diff --git a/src/pkg/runtime/os_openbsd.h b/src/pkg/runtime/os_openbsd.h index 4ecf78d88..a599aad05 100644 --- a/src/pkg/runtime/os_openbsd.h +++ b/src/pkg/runtime/os_openbsd.h @@ -4,18 +4,25 @@ #define SIG_DFL ((void*)0) #define SIG_IGN ((void*)1) +#define SIGHUP 1 +#define SS_DISABLE 4 + +#define SIG_BLOCK 1 +#define SIG_UNBLOCK 2 +#define SIG_SETMASK 3 struct sigaction; +void runtime·raisesigpipe(void); +void runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool); void runtime·sigpanic(void); -void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); + +void runtime·setitimer(int32, Itimerval*, Itimerval*); void runtime·sigaction(int32, struct sigaction*, struct sigaction*); -void runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool); +void runtime·sigaltstack(Sigaltstack*, Sigaltstack*); void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp); -void runtime·setitimer(int32, Itimerval*, Itimerval*); +Sigset runtime·sigprocmask(int32, Sigset); int32 runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr); -void runtime·raisesigpipe(void); - #define NSIG 33 #define SI_USER 0 diff --git a/src/pkg/runtime/os_plan9.h b/src/pkg/runtime/os_plan9.h index cc6343c8e..c2cdf5b44 100644 --- a/src/pkg/runtime/os_plan9.h +++ b/src/pkg/runtime/os_plan9.h @@ -7,13 +7,22 @@ int32 runtime·open(uint8 *file, int32 mode); int32 runtime·pread(int32 fd, void *buf, int32 nbytes, int64 offset); int32 runtime·pwrite(int32 fd, void *buf, int32 nbytes, int64 offset); int32 runtime·read(int32 fd, void *buf, int32 nbytes); +int64 runtime·seek(int32 fd, int64 offset, int32 whence); int32 runtime·close(int32 fd); void runtime·exits(int8* msg); -int32 runtime·brk_(void*); +intptr runtime·brk_(void*); int32 runtime·sleep(int32 ms); -int32 runtime·rfork(int32 flags, void *stk, M *m, G *g, void (*fn)(void)); +int32 runtime·rfork(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void)); int32 runtime·plan9_semacquire(uint32 *addr, int32 block); +int32 runtime·plan9_tsemacquire(uint32 *addr, int32 ms); int32 runtime·plan9_semrelease(uint32 *addr, int32 count); +int32 runtime·notify(void (*fn)(void*, int8*)); +int32 runtime·noted(int32); +void runtime·sigtramp(void*, int8*); +int32 runtime·sighandler(void*, int8*, G*); +void runtime·sigpanic(void); +void runtime·goexitsall(int8*); +void runtime·setfpmasks(void); /* open */ enum @@ -45,6 +54,13 @@ enum RFNOMNT = (1<<14) }; +/* notify */ +enum +{ + NCONT = 0, + NDFLT = 1 +}; + typedef struct Tos Tos; typedef intptr Plink; @@ -66,4 +82,5 @@ struct Tos { /* top of stack is here */ }; -#define NSIG 1 +#define NSIG 5 /* number of signals in runtime·SigTab array */ +#define ERRMAX 128 /* max length of note string */ diff --git a/src/pkg/runtime/os_windows.h b/src/pkg/runtime/os_windows.h index 9d387b7ad..cf0ecb68e 100644 --- a/src/pkg/runtime/os_windows.h +++ b/src/pkg/runtime/os_windows.h @@ -28,5 +28,8 @@ uint32 runtime·ctrlhandler(uint32 type); byte *runtime·compilecallback(Eface fn, bool cleanstack); void *runtime·callbackasm(void); +void runtime·install_exception_handler(void); +void runtime·remove_exception_handler(void); + // TODO(brainman): should not need those #define NSIG 65 diff --git a/src/pkg/runtime/panic.c b/src/pkg/runtime/panic.c new file mode 100644 index 000000000..2f553f417 --- /dev/null +++ b/src/pkg/runtime/panic.c @@ -0,0 +1,487 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "arch_GOARCH.h" +#include "stack.h" + +// Code related to defer, panic and recover. + +uint32 runtime·panicking; +static Lock paniclk; + +enum +{ + DeferChunkSize = 2048 +}; + +// Allocate a Defer, usually as part of the larger frame of deferred functions. +// Each defer must be released with both popdefer and freedefer. +static Defer* +newdefer(int32 siz) +{ + int32 total; + DeferChunk *c; + Defer *d; + + c = g->dchunk; + total = sizeof(*d) + ROUND(siz, sizeof(uintptr)) - sizeof(d->args); + if(c == nil || total > DeferChunkSize - c->off) { + if(total > DeferChunkSize / 2) { + // Not worth putting in any chunk. + // Allocate a separate block. + d = runtime·malloc(total); + d->siz = siz; + d->special = 1; + d->free = 1; + d->link = g->defer; + g->defer = d; + return d; + } + + // Cannot fit in current chunk. + // Switch to next chunk, allocating if necessary. + c = g->dchunknext; + if(c == nil) + c = runtime·malloc(DeferChunkSize); + c->prev = g->dchunk; + c->off = sizeof(*c); + g->dchunk = c; + g->dchunknext = nil; + } + + d = (Defer*)((byte*)c + c->off); + c->off += total; + d->siz = siz; + d->special = 0; + d->free = 0; + d->link = g->defer; + g->defer = d; + return d; +} + +// Pop the current defer from the defer stack. +// Its contents are still valid until the goroutine begins executing again. +// In particular it is safe to call reflect.call(d->fn, d->argp, d->siz) after +// popdefer returns. +static void +popdefer(void) +{ + Defer *d; + DeferChunk *c; + int32 total; + + d = g->defer; + if(d == nil) + runtime·throw("runtime: popdefer nil"); + g->defer = d->link; + if(d->special) { + // Nothing else to do. + return; + } + total = sizeof(*d) + ROUND(d->siz, sizeof(uintptr)) - sizeof(d->args); + c = g->dchunk; + if(c == nil || (byte*)d+total != (byte*)c+c->off) + runtime·throw("runtime: popdefer phase error"); + c->off -= total; + if(c->off == sizeof(*c)) { + // Chunk now empty, so pop from stack. + // Save in dchunknext both to help with pingponging between frames + // and to make sure d is still valid on return. + if(g->dchunknext != nil) + runtime·free(g->dchunknext); + g->dchunknext = c; + g->dchunk = c->prev; + } +} + +// Free the given defer. +// For defers in the per-goroutine chunk this just clears the saved arguments. +// For large defers allocated on the heap, this frees them. +// The defer cannot be used after this call. +static void +freedefer(Defer *d) +{ + if(d->special) { + if(d->free) + runtime·free(d); + } else { + runtime·memclr((byte*)d->args, d->siz); + } +} + +// Create a new deferred function fn with siz bytes of arguments. +// The compiler turns a defer statement into a call to this. +// Cannot split the stack because it assumes that the arguments +// are available sequentially after &fn; they would not be +// copied if a stack split occurred. It's OK for this to call +// functions that split the stack. +#pragma textflag 7 +uintptr +runtime·deferproc(int32 siz, FuncVal *fn, ...) +{ + Defer *d; + + d = newdefer(siz); + d->fn = fn; + d->pc = runtime·getcallerpc(&siz); + if(thechar == '5') + d->argp = (byte*)(&fn+2); // skip caller's saved link register + else + d->argp = (byte*)(&fn+1); + runtime·memmove(d->args, d->argp, d->siz); + + // deferproc returns 0 normally. + // a deferred func that stops a panic + // makes the deferproc return 1. + // the code the compiler generates always + // checks the return value and jumps to the + // end of the function if deferproc returns != 0. + return 0; +} + +// Run a deferred function if there is one. +// The compiler inserts a call to this at the end of any +// function which calls defer. +// If there is a deferred function, this will call runtime·jmpdefer, +// which will jump to the deferred function such that it appears +// to have been called by the caller of deferreturn at the point +// just before deferreturn was called. The effect is that deferreturn +// is called again and again until there are no more deferred functions. +// Cannot split the stack because we reuse the caller's frame to +// call the deferred function. +#pragma textflag 7 +void +runtime·deferreturn(uintptr arg0) +{ + Defer *d; + byte *argp; + FuncVal *fn; + + d = g->defer; + if(d == nil) + return; + argp = (byte*)&arg0; + if(d->argp != argp) + return; + runtime·memmove(argp, d->args, d->siz); + fn = d->fn; + popdefer(); + freedefer(d); + runtime·jmpdefer(fn, argp); +} + +// Run all deferred functions for the current goroutine. +static void +rundefer(void) +{ + Defer *d; + + while((d = g->defer) != nil) { + popdefer(); + reflect·call(d->fn, (byte*)d->args, d->siz); + freedefer(d); + } +} + +// Print all currently active panics. Used when crashing. +static void +printpanics(Panic *p) +{ + if(p->link) { + printpanics(p->link); + runtime·printf("\t"); + } + runtime·printf("panic: "); + runtime·printany(p->arg); + if(p->recovered) + runtime·printf(" [recovered]"); + runtime·printf("\n"); +} + +static void recovery(G*); + +// The implementation of the predeclared function panic. +void +runtime·panic(Eface e) +{ + Defer *d; + Panic *p; + void *pc, *argp; + + p = runtime·mal(sizeof *p); + p->arg = e; + p->link = g->panic; + p->stackbase = (byte*)g->stackbase; + g->panic = p; + + for(;;) { + d = g->defer; + if(d == nil) + break; + // take defer off list in case of recursive panic + popdefer(); + g->ispanic = true; // rock for newstack, where reflect.call ends up + argp = d->argp; + pc = d->pc; + reflect·call(d->fn, (byte*)d->args, d->siz); + freedefer(d); + if(p->recovered) { + g->panic = p->link; + if(g->panic == nil) // must be done with signal + g->sig = 0; + runtime·free(p); + // Pass information about recovering frame to recovery. + g->sigcode0 = (uintptr)argp; + g->sigcode1 = (uintptr)pc; + runtime·mcall(recovery); + runtime·throw("recovery failed"); // mcall should not return + } + } + + // ran out of deferred calls - old-school panic now + runtime·startpanic(); + printpanics(g->panic); + runtime·dopanic(0); +} + +// Unwind the stack after a deferred function calls recover +// after a panic. Then arrange to continue running as though +// the caller of the deferred function returned normally. +static void +recovery(G *gp) +{ + void *argp; + void *pc; + + // Info about defer passed in G struct. + argp = (void*)gp->sigcode0; + pc = (void*)gp->sigcode1; + + // Unwind to the stack frame with d's arguments in it. + runtime·unwindstack(gp, argp); + + // Make the deferproc for this d return again, + // this time returning 1. The calling function will + // jump to the standard return epilogue. + // The -2*sizeof(uintptr) makes up for the + // two extra words that are on the stack at + // each call to deferproc. + // (The pc we're returning to does pop pop + // before it tests the return value.) + // On the arm there are 2 saved LRs mixed in too. + if(thechar == '5') + gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr); + else + gp->sched.sp = (uintptr)argp - 2*sizeof(uintptr); + gp->sched.pc = pc; + runtime·gogo(&gp->sched, 1); +} + +// Free stack frames until we hit the last one +// or until we find the one that contains the sp. +void +runtime·unwindstack(G *gp, byte *sp) +{ + Stktop *top; + byte *stk; + + // Must be called from a different goroutine, usually m->g0. + if(g == gp) + runtime·throw("unwindstack on self"); + + while((top = (Stktop*)gp->stackbase) != nil && top->stackbase != nil) { + stk = (byte*)gp->stackguard - StackGuard; + if(stk <= sp && sp < (byte*)gp->stackbase) + break; + gp->stackbase = (uintptr)top->stackbase; + gp->stackguard = (uintptr)top->stackguard; + if(top->free != 0) + runtime·stackfree(stk, top->free); + } + + if(sp != nil && (sp < (byte*)gp->stackguard - StackGuard || (byte*)gp->stackbase < sp)) { + runtime·printf("recover: %p not in [%p, %p]\n", sp, gp->stackguard - StackGuard, gp->stackbase); + runtime·throw("bad unwindstack"); + } +} + +// The implementation of the predeclared function recover. +// Cannot split the stack because it needs to reliably +// find the stack segment of its caller. +#pragma textflag 7 +void +runtime·recover(byte *argp, Eface ret) +{ + Stktop *top, *oldtop; + Panic *p; + + // Must be a panic going on. + if((p = g->panic) == nil || p->recovered) + goto nomatch; + + // Frame must be at the top of the stack segment, + // because each deferred call starts a new stack + // segment as a side effect of using reflect.call. + // (There has to be some way to remember the + // variable argument frame size, and the segment + // code already takes care of that for us, so we + // reuse it.) + // + // As usual closures complicate things: the fp that + // the closure implementation function claims to have + // is where the explicit arguments start, after the + // implicit pointer arguments and PC slot. + // If we're on the first new segment for a closure, + // then fp == top - top->args is correct, but if + // the closure has its own big argument frame and + // allocated a second segment (see below), + // the fp is slightly above top - top->args. + // That condition can't happen normally though + // (stack pointers go down, not up), so we can accept + // any fp between top and top - top->args as + // indicating the top of the segment. + top = (Stktop*)g->stackbase; + if(argp < (byte*)top - top->argsize || (byte*)top < argp) + goto nomatch; + + // The deferred call makes a new segment big enough + // for the argument frame but not necessarily big + // enough for the function's local frame (size unknown + // at the time of the call), so the function might have + // made its own segment immediately. If that's the + // case, back top up to the older one, the one that + // reflect.call would have made for the panic. + // + // The fp comparison here checks that the argument + // frame that was copied during the split (the top->args + // bytes above top->fp) abuts the old top of stack. + // This is a correct test for both closure and non-closure code. + oldtop = (Stktop*)top->stackbase; + if(oldtop != nil && top->argp == (byte*)oldtop - top->argsize) + top = oldtop; + + // Now we have the segment that was created to + // run this call. It must have been marked as a panic segment. + if(!top->panic) + goto nomatch; + + // Okay, this is the top frame of a deferred call + // in response to a panic. It can see the panic argument. + p->recovered = 1; + ret = p->arg; + FLUSH(&ret); + return; + +nomatch: + ret.type = nil; + ret.data = nil; + FLUSH(&ret); +} + +void +runtime·startpanic(void) +{ + if(m->mcache == nil) // can happen if called from signal handler or throw + m->mcache = runtime·allocmcache(); + if(m->dying) { + runtime·printf("panic during panic\n"); + runtime·exit(3); + } + m->dying = 1; + runtime·xadd(&runtime·panicking, 1); + runtime·lock(&paniclk); +} + +void +runtime·dopanic(int32 unused) +{ + static bool didothers; + + if(g->sig != 0) + runtime·printf("[signal %x code=%p addr=%p pc=%p]\n", + g->sig, g->sigcode0, g->sigcode1, g->sigpc); + + if(runtime·gotraceback()){ + if(g != m->g0) { + runtime·printf("\n"); + runtime·goroutineheader(g); + runtime·traceback(runtime·getcallerpc(&unused), runtime·getcallersp(&unused), 0, g); + } + if(!didothers) { + didothers = true; + runtime·tracebackothers(g); + } + } + runtime·unlock(&paniclk); + if(runtime·xadd(&runtime·panicking, -1) != 0) { + // Some other m is panicking too. + // Let it print what it needs to print. + // Wait forever without chewing up cpu. + // It will exit when it's done. + static Lock deadlock; + runtime·lock(&deadlock); + runtime·lock(&deadlock); + } + + runtime·exit(2); +} + +void +runtime·panicindex(void) +{ + runtime·panicstring("index out of range"); +} + +void +runtime·panicslice(void) +{ + runtime·panicstring("slice bounds out of range"); +} + +void +runtime·throwreturn(void) +{ + // can only happen if compiler is broken + runtime·throw("no return at end of a typed function - compiler is broken"); +} + +void +runtime·throwinit(void) +{ + // can only happen with linker skew + runtime·throw("recursive call during initialization - linker skew"); +} + +void +runtime·throw(int8 *s) +{ + if(m->throwing == 0) + m->throwing = 1; + runtime·startpanic(); + runtime·printf("fatal error: %s\n", s); + runtime·dopanic(0); + *(int32*)0 = 0; // not reached + runtime·exit(1); // even more not reached +} + +void +runtime·panicstring(int8 *s) +{ + Eface err; + + if(m->gcing) { + runtime·printf("panic: %s\n", s); + runtime·throw("panic during gc"); + } + runtime·newErrorString(runtime·gostringnocopy((byte*)s), &err); + runtime·panic(err); +} + +void +runtime·Goexit(void) +{ + rundefer(); + runtime·goexit(); +} diff --git a/src/pkg/runtime/parfor.c b/src/pkg/runtime/parfor.c new file mode 100644 index 000000000..aa5537d02 --- /dev/null +++ b/src/pkg/runtime/parfor.c @@ -0,0 +1,215 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parallel for algorithm. + +#include "runtime.h" +#include "arch_GOARCH.h" + +struct ParForThread +{ + // the thread's iteration space [32lsb, 32msb) + uint64 pos; + // stats + uint64 nsteal; + uint64 nstealcnt; + uint64 nprocyield; + uint64 nosyield; + uint64 nsleep; + byte pad[CacheLineSize]; +}; + +ParFor* +runtime·parforalloc(uint32 nthrmax) +{ + ParFor *desc; + + // The ParFor object is followed by CacheLineSize padding + // and then nthrmax ParForThread. + desc = (ParFor*)runtime·malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread)); + desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize); + desc->nthrmax = nthrmax; + return desc; +} + +// For testing from Go +// func parforalloc2(nthrmax uint32) *ParFor +void +runtime·parforalloc2(uint32 nthrmax, ParFor *desc) +{ + desc = runtime·parforalloc(nthrmax); + FLUSH(&desc); +} + +void +runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32)) +{ + uint32 i, begin, end; + + if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) { + runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body); + runtime·throw("parfor: invalid args"); + } + + desc->body = body; + desc->done = 0; + desc->nthr = nthr; + desc->thrseq = 0; + desc->cnt = n; + desc->ctx = ctx; + desc->wait = wait; + desc->nsteal = 0; + desc->nstealcnt = 0; + desc->nprocyield = 0; + desc->nosyield = 0; + desc->nsleep = 0; + for(i=0; i<nthr; i++) { + begin = (uint64)n*i / nthr; + end = (uint64)n*(i+1) / nthr; + desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32); + } +} + +// For testing from Go +// func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32)) +void +runtime·parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body) +{ + runtime·parforsetup(desc, nthr, n, ctx, wait, *(void(**)(ParFor*, uint32))body); +} + +void +runtime·parfordo(ParFor *desc) +{ + ParForThread *me; + uint32 tid, begin, end, begin2, try, victim, i; + uint64 *mypos, *victimpos, pos, newpos; + void (*body)(ParFor*, uint32); + bool idle; + + // Obtain 0-based thread index. + tid = runtime·xadd(&desc->thrseq, 1) - 1; + if(tid >= desc->nthr) { + runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr); + runtime·throw("parfor: invalid tid"); + } + + // If single-threaded, just execute the for serially. + if(desc->nthr==1) { + for(i=0; i<desc->cnt; i++) + desc->body(desc, i); + return; + } + + body = desc->body; + me = &desc->thr[tid]; + mypos = &me->pos; + for(;;) { + for(;;) { + // While there is local work, + // bump low index and execute the iteration. + pos = runtime·xadd64(mypos, 1); + begin = (uint32)pos-1; + end = (uint32)(pos>>32); + if(begin < end) { + body(desc, begin); + continue; + } + break; + } + + // Out of work, need to steal something. + idle = false; + for(try=0;; try++) { + // If we don't see any work for long enough, + // increment the done counter... + if(try > desc->nthr*4 && !idle) { + idle = true; + runtime·xadd(&desc->done, 1); + } + // ...if all threads have incremented the counter, + // we are done. + if(desc->done + !idle == desc->nthr) { + if(!idle) + runtime·xadd(&desc->done, 1); + goto exit; + } + // Choose a random victim for stealing. + victim = runtime·fastrand1() % (desc->nthr-1); + if(victim >= tid) + victim++; + victimpos = &desc->thr[victim].pos; + pos = runtime·atomicload64(victimpos); + for(;;) { + // See if it has any work. + begin = (uint32)pos; + end = (uint32)(pos>>32); + if(begin+1 >= end) { + begin = end = 0; + break; + } + if(idle) { + runtime·xadd(&desc->done, -1); + idle = false; + } + begin2 = begin + (end-begin)/2; + newpos = (uint64)begin | (uint64)begin2<<32; + if(runtime·cas64(victimpos, &pos, newpos)) { + begin = begin2; + break; + } + } + if(begin < end) { + // Has successfully stolen some work. + if(idle) + runtime·throw("parfor: should not be idle"); + runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32); + me->nsteal++; + me->nstealcnt += end-begin; + break; + } + // Backoff. + if(try < desc->nthr) { + // nothing + } else if (try < 4*desc->nthr) { + me->nprocyield++; + runtime·procyield(20); + // If a caller asked not to wait for the others, exit now + // (assume that most work is already done at this point). + } else if (!desc->wait) { + if(!idle) + runtime·xadd(&desc->done, 1); + goto exit; + } else if (try < 6*desc->nthr) { + me->nosyield++; + runtime·osyield(); + } else { + me->nsleep++; + runtime·usleep(1); + } + } + } +exit: + runtime·xadd64(&desc->nsteal, me->nsteal); + runtime·xadd64(&desc->nstealcnt, me->nstealcnt); + runtime·xadd64(&desc->nprocyield, me->nprocyield); + runtime·xadd64(&desc->nosyield, me->nosyield); + runtime·xadd64(&desc->nsleep, me->nsleep); + me->nsteal = 0; + me->nstealcnt = 0; + me->nprocyield = 0; + me->nosyield = 0; + me->nsleep = 0; +} + +// For testing from Go +// func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr) +void +runtime·parforiters(ParFor *desc, uintptr tid, uintptr start, uintptr end) +{ + start = (uint32)desc->thr[tid].pos; + end = (uint32)(desc->thr[tid].pos>>32); + FLUSH(&start); + FLUSH(&end); +} diff --git a/src/pkg/runtime/parfor_test.go b/src/pkg/runtime/parfor_test.go new file mode 100644 index 000000000..4c69a68ce --- /dev/null +++ b/src/pkg/runtime/parfor_test.go @@ -0,0 +1,144 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The race detector does not understand ParFor synchronization. +// +build !race + +package runtime_test + +import ( + . "runtime" + "testing" + "unsafe" +) + +var gdata []uint64 + +// Simple serial sanity test for parallelfor. +func TestParFor(t *testing.T) { + const P = 1 + const N = 20 + data := make([]uint64, N) + for i := uint64(0); i < N; i++ { + data[i] = i + } + desc := NewParFor(P) + // Avoid making func a closure: parfor cannot invoke them. + // Since it doesn't happen in the C code, it's not worth doing + // just for the test. + gdata = data + ParForSetup(desc, P, N, nil, true, func(desc *ParFor, i uint32) { + data := gdata + data[i] = data[i]*data[i] + 1 + }) + ParForDo(desc) + for i := uint64(0); i < N; i++ { + if data[i] != i*i+1 { + t.Fatalf("Wrong element %d: %d", i, data[i]) + } + } +} + +// Test that nonblocking parallelfor does not block. +func TestParFor2(t *testing.T) { + const P = 7 + const N = 1003 + data := make([]uint64, N) + for i := uint64(0); i < N; i++ { + data[i] = i + } + desc := NewParFor(P) + ParForSetup(desc, P, N, (*byte)(unsafe.Pointer(&data)), false, func(desc *ParFor, i uint32) { + d := *(*[]uint64)(unsafe.Pointer(desc.Ctx)) + d[i] = d[i]*d[i] + 1 + }) + for p := 0; p < P; p++ { + ParForDo(desc) + } + for i := uint64(0); i < N; i++ { + if data[i] != i*i+1 { + t.Fatalf("Wrong element %d: %d", i, data[i]) + } + } +} + +// Test that iterations are properly distributed. +func TestParForSetup(t *testing.T) { + const P = 11 + const N = 101 + desc := NewParFor(P) + for n := uint32(0); n < N; n++ { + for p := uint32(1); p <= P; p++ { + ParForSetup(desc, p, n, nil, true, func(desc *ParFor, i uint32) {}) + sum := uint32(0) + size0 := uint32(0) + end0 := uint32(0) + for i := uint32(0); i < p; i++ { + begin, end := ParForIters(desc, i) + size := end - begin + sum += size + if i == 0 { + size0 = size + if begin != 0 { + t.Fatalf("incorrect begin: %d (n=%d, p=%d)", begin, n, p) + } + } else { + if size != size0 && size != size0+1 { + t.Fatalf("incorrect size: %d/%d (n=%d, p=%d)", size, size0, n, p) + } + if begin != end0 { + t.Fatalf("incorrect begin/end: %d/%d (n=%d, p=%d)", begin, end0, n, p) + } + } + end0 = end + } + if sum != n { + t.Fatalf("incorrect sum: %d/%d (p=%d)", sum, n, p) + } + } + } +} + +// Test parallel parallelfor. +func TestParForParallel(t *testing.T) { + if GOARCH != "amd64" { + t.Log("temporarily disabled, see http://golang.org/issue/4155") + return + } + + N := uint64(1e7) + if testing.Short() { + N /= 10 + } + data := make([]uint64, N) + for i := uint64(0); i < N; i++ { + data[i] = i + } + P := GOMAXPROCS(-1) + c := make(chan bool, P) + desc := NewParFor(uint32(P)) + gdata = data + ParForSetup(desc, uint32(P), uint32(N), nil, false, func(desc *ParFor, i uint32) { + data := gdata + data[i] = data[i]*data[i] + 1 + }) + for p := 1; p < P; p++ { + go func() { + ParForDo(desc) + c <- true + }() + } + ParForDo(desc) + for p := 1; p < P; p++ { + <-c + } + for i := uint64(0); i < N; i++ { + if data[i] != i*i+1 { + t.Fatalf("Wrong element %d: %d", i, data[i]) + } + } + + data, desc = nil, nil + GC() +} diff --git a/src/pkg/runtime/pprof/pprof.go b/src/pkg/runtime/pprof/pprof.go index f67e8a8f9..32c1098b9 100644 --- a/src/pkg/runtime/pprof/pprof.go +++ b/src/pkg/runtime/pprof/pprof.go @@ -36,8 +36,9 @@ import ( // goroutine - stack traces of all current goroutines // heap - a sampling of all heap allocations // threadcreate - stack traces that led to the creation of new OS threads +// block - stack traces that led to blocking on synchronization primitives // -// These predefine profiles maintain themselves and panic on an explicit +// These predefined profiles maintain themselves and panic on an explicit // Add or Remove method call. // // The CPU profile is not available as a Profile. It has a special API, @@ -76,6 +77,12 @@ var heapProfile = &Profile{ write: writeHeap, } +var blockProfile = &Profile{ + name: "block", + count: countBlock, + write: writeBlock, +} + func lockProfiles() { profiles.mu.Lock() if profiles.m == nil { @@ -84,6 +91,7 @@ func lockProfiles() { "goroutine": goroutineProfile, "threadcreate": threadcreateProfile, "heap": heapProfile, + "block": blockProfile, } } } @@ -310,21 +318,33 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro // for a single stack trace. func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { show := allFrames - for _, pc := range stk { + wasPanic := false + for i, pc := range stk { f := runtime.FuncForPC(pc) if f == nil { show = true fmt.Fprintf(w, "#\t%#x\n", pc) + wasPanic = false } else { - file, line := f.FileLine(pc) + tracepc := pc + // Back up to call instruction. + if i > 0 && pc > f.Entry() && !wasPanic { + if runtime.GOARCH == "386" || runtime.GOARCH == "amd64" { + tracepc-- + } else { + tracepc -= 4 // arm, etc + } + } + file, line := f.FileLine(tracepc) name := f.Name() // Hide runtime.goexit and any runtime functions at the beginning. // This is useful mainly for allocation traces. + wasPanic = name == "runtime.panic" if name == "runtime.goexit" || !show && strings.HasPrefix(name, "runtime.") { continue } show = true - fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, f.Name(), pc-f.Entry(), file, line) + fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, name, pc-f.Entry(), file, line) } } if !show { @@ -352,26 +372,26 @@ func WriteHeapProfile(w io.Writer) error { // countHeap returns the number of records in the heap profile. func countHeap() int { - n, _ := runtime.MemProfile(nil, false) + n, _ := runtime.MemProfile(nil, true) return n } -// writeHeapProfile writes the current runtime heap profile to w. +// writeHeap writes the current runtime heap profile to w. func writeHeap(w io.Writer, debug int) error { - // Find out how many records there are (MemProfile(nil, false)), + // Find out how many records there are (MemProfile(nil, true)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case. var p []runtime.MemProfileRecord - n, ok := runtime.MemProfile(nil, false) + n, ok := runtime.MemProfile(nil, true) for { // Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to MemProfile. p = make([]runtime.MemProfileRecord, n+50) - n, ok = runtime.MemProfile(p, false) + n, ok = runtime.MemProfile(p, true) if ok { p = p[0:n] break @@ -431,11 +451,14 @@ func writeHeap(w io.Writer, debug int) error { fmt.Fprintf(w, "# Sys = %d\n", s.Sys) fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) + fmt.Fprintf(w, "# Frees = %d\n", s.Frees) fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) + fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) + fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) @@ -597,3 +620,60 @@ func StopCPUProfile() { runtime.SetCPUProfileRate(0) <-cpu.done } + +type byCycles []runtime.BlockProfileRecord + +func (x byCycles) Len() int { return len(x) } +func (x byCycles) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles } + +// countBlock returns the number of records in the blocking profile. +func countBlock() int { + n, _ := runtime.BlockProfile(nil) + return n +} + +// writeBlock writes the current blocking profile to w. +func writeBlock(w io.Writer, debug int) error { + var p []runtime.BlockProfileRecord + n, ok := runtime.BlockProfile(nil) + for { + p = make([]runtime.BlockProfileRecord, n+50) + n, ok = runtime.BlockProfile(p) + if ok { + p = p[:n] + break + } + } + + sort.Sort(byCycles(p)) + + b := bufio.NewWriter(w) + var tw *tabwriter.Writer + w = b + if debug > 0 { + tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) + w = tw + } + + fmt.Fprintf(w, "--- contention:\n") + fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) + for i := range p { + r := &p[i] + fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) + for _, pc := range r.Stack() { + fmt.Fprintf(w, " %#x", pc) + } + fmt.Fprint(w, "\n") + if debug > 0 { + printStackRecord(w, r.Stack(), false) + } + } + + if tw != nil { + tw.Flush() + } + return b.Flush() +} + +func runtime_cyclesPerSecond() int64 diff --git a/src/pkg/runtime/pprof/pprof_test.go b/src/pkg/runtime/pprof/pprof_test.go index 82bb2a292..6d5764f4a 100644 --- a/src/pkg/runtime/pprof/pprof_test.go +++ b/src/pkg/runtime/pprof/pprof_test.go @@ -26,8 +26,7 @@ func TestCPUProfile(t *testing.T) { t.Logf("uname -a: %v", vers) // Lion uses "Darwin Kernel Version 11". if strings.Contains(vers, "Darwin Kernel Version 10") && strings.Contains(vers, "RELEASE_X86_64") { - t.Logf("skipping test on known-broken kernel (64-bit Leopard / Snow Leopard)") - return + t.Skip("skipping test on known-broken kernel (64-bit Leopard / Snow Leopard)") } case "plan9": // unimplemented @@ -49,19 +48,25 @@ func TestCPUProfile(t *testing.T) { // Convert []byte to []uintptr. bytes := prof.Bytes() + l := len(bytes) / int(unsafe.Sizeof(uintptr(0))) val := *(*[]uintptr)(unsafe.Pointer(&bytes)) - val = val[:len(bytes)/int(unsafe.Sizeof(uintptr(0)))] + val = val[:l] - if len(val) < 10 { + if l < 13 { t.Fatalf("profile too short: %#x", val) } - if val[0] != 0 || val[1] != 3 || val[2] != 0 || val[3] != 1e6/100 || val[4] != 0 { - t.Fatalf("unexpected header %#x", val[:5]) + + hd, val, tl := val[:5], val[5:l-3], val[l-3:] + if hd[0] != 0 || hd[1] != 3 || hd[2] != 0 || hd[3] != 1e6/100 || hd[4] != 0 { + t.Fatalf("unexpected header %#x", hd) + } + + if tl[0] != 0 || tl[1] != 1 || tl[2] != 0 { + t.Fatalf("malformed end-of-data marker %#x", tl) } // Check that profile is well formed and contains ChecksumIEEE. found := false - val = val[5:] for len(val) > 0 { if len(val) < 2 || val[0] < 1 || val[1] < 1 || uintptr(len(val)) < 2+val[1] { t.Fatalf("malformed profile. leftover: %#x", val) diff --git a/src/pkg/runtime/print.c b/src/pkg/runtime/print.c index 6702c3cde..5b601599b 100644 --- a/src/pkg/runtime/print.c +++ b/src/pkg/runtime/print.c @@ -18,10 +18,10 @@ gwrite(void *v, int32 n) runtime·write(2, v, n); return; } - + if(g->writenbuf == 0) return; - + if(n > g->writenbuf) n = g->writenbuf; runtime·memmove(g->writebuf, v, n); @@ -84,40 +84,41 @@ vprintf(int8 *s, byte *base) narg = 0; switch(*p) { case 't': + case 'c': narg = arg + 1; break; case 'd': // 32-bit case 'x': - arg = runtime·rnd(arg, 4); + arg = ROUND(arg, 4); narg = arg + 4; break; case 'D': // 64-bit case 'U': case 'X': case 'f': - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + 8; break; case 'C': - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + 16; break; case 'p': // pointer-sized case 's': - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + sizeof(uintptr); break; case 'S': // pointer-aligned but bigger - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + sizeof(String); break; case 'a': // pointer-aligned but bigger - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + sizeof(Slice); break; case 'i': // pointer-aligned but bigger case 'e': - arg = runtime·rnd(arg, sizeof(uintptr)); + arg = ROUND(arg, sizeof(uintptr)); narg = arg + sizeof(Eface); break; } @@ -126,6 +127,9 @@ vprintf(int8 *s, byte *base) case 'a': runtime·printslice(*(Slice*)v); break; + case 'c': + runtime·printbyte(*(int8*)v); + break; case 'd': runtime·printint(*(int32*)v); break; @@ -203,21 +207,27 @@ runtime·printbool(bool v) } void +runtime·printbyte(int8 c) +{ + gwrite(&c, 1); +} + +void runtime·printfloat(float64 v) { byte buf[20]; int32 e, s, i, n; float64 h; - if(runtime·isNaN(v)) { + if(ISNAN(v)) { gwrite("NaN", 3); return; } - if(runtime·isInf(v, 1)) { + if(v == runtime·posinf) { gwrite("+Inf", 4); return; } - if(runtime·isInf(v, -1)) { + if(v == runtime·neginf) { gwrite("-Inf", 4); return; } @@ -343,7 +353,7 @@ runtime·printstring(String v) extern uint32 runtime·maxstring; if(v.len > runtime·maxstring) { - gwrite("[invalid string]", 16); + gwrite("[string too long]", 17); return; } if(v.len > 0) diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 04a992628..4ce0a718c 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -4,169 +4,112 @@ #include "runtime.h" #include "arch_GOARCH.h" -#include "defs_GOOS_GOARCH.h" #include "malloc.h" -#include "os_GOOS.h" #include "stack.h" +#include "race.h" +#include "type.h" -bool runtime·iscgo; - -static void unwindstack(G*, byte*); -static void schedule(G*); - -typedef struct Sched Sched; - -M runtime·m0; -G runtime·g0; // idle goroutine for m0 - -static int32 debug = 0; - -int32 runtime·gcwaiting; - -// Go scheduler -// -// The go scheduler's job is to match ready-to-run goroutines (`g's) -// with waiting-for-work schedulers (`m's). If there are ready g's -// and no waiting m's, ready() will start a new m running in a new -// OS thread, so that all ready g's can run simultaneously, up to a limit. -// For now, m's never go away. -// -// By default, Go keeps only one kernel thread (m) running user code -// at a single time; other threads may be blocked in the operating system. -// Setting the environment variable $GOMAXPROCS or calling -// runtime.GOMAXPROCS() will change the number of user threads -// allowed to execute simultaneously. $GOMAXPROCS is thus an -// approximation of the maximum number of cores to use. +// Goroutine scheduler +// The scheduler's job is to distribute ready-to-run goroutines over worker threads. // -// Even a program that can run without deadlock in a single process -// might use more m's if given the chance. For example, the prime -// sieve will use as many m's as there are primes (up to runtime·sched.mmax), -// allowing different stages of the pipeline to execute in parallel. -// We could revisit this choice, only kicking off new m's for blocking -// system calls, but that would limit the amount of parallel computation -// that go would try to do. +// The main concepts are: +// G - goroutine. +// M - worker thread, or machine. +// P - processor, a resource that is required to execute Go code. +// M must have an associated P to execute Go code, however it can be +// blocked or in a syscall w/o an associated P. // -// In general, one could imagine all sorts of refinements to the -// scheduler, but the goal now is just to get something working on -// Linux and OS X. +// Design doc at http://golang.org/s/go11sched. +typedef struct Sched Sched; struct Sched { Lock; - G *gfree; // available g's (status == Gdead) - int32 goidgen; + uint64 goidgen; - G *ghead; // g's waiting to run - G *gtail; - int32 gwait; // number of g's waiting to run - int32 gcount; // number of g's that are alive - int32 grunning; // number of g's running on cpu or in syscall + M* midle; // idle m's waiting for work + int32 nmidle; // number of idle m's waiting for work + int32 mlocked; // number of locked m's waiting for work + int32 mcount; // number of m's that have been created - M *mhead; // m's waiting for work - int32 mwait; // number of m's waiting for work - int32 mcount; // number of m's that have been created + P* pidle; // idle P's + uint32 npidle; + uint32 nmspinning; - volatile uint32 atomic; // atomic scheduling word (see below) + // Global runnable queue. + G* runqhead; + G* runqtail; + int32 runqsize; - int32 profilehz; // cpu profiling rate + // Global cache of dead G's. + Lock gflock; + G* gfree; - bool init; // running initialization - bool lockmain; // init called runtime.LockOSThread + int32 stopwait; + Note stopnote; + uint32 sysmonwait; + Note sysmonnote; - Note stopped; // one g can set waitstop and wait here for m's to stop + int32 profilehz; // cpu profiling rate }; -// The atomic word in sched is an atomic uint32 that -// holds these fields. -// -// [15 bits] mcpu number of m's executing on cpu -// [15 bits] mcpumax max number of m's allowed on cpu -// [1 bit] waitstop some g is waiting on stopped -// [1 bit] gwaiting gwait != 0 -// -// These fields are the information needed by entersyscall -// and exitsyscall to decide whether to coordinate with the -// scheduler. Packing them into a single machine word lets -// them use a fast path with a single atomic read/write and -// no lock/unlock. This greatly reduces contention in -// syscall- or cgo-heavy multithreaded programs. -// -// Except for entersyscall and exitsyscall, the manipulations -// to these fields only happen while holding the schedlock, -// so the routines holding schedlock only need to worry about -// what entersyscall and exitsyscall do, not the other routines -// (which also use the schedlock). -// -// In particular, entersyscall and exitsyscall only read mcpumax, -// waitstop, and gwaiting. They never write them. Thus, writes to those -// fields can be done (holding schedlock) without fear of write conflicts. -// There may still be logic conflicts: for example, the set of waitstop must -// be conditioned on mcpu >= mcpumax or else the wait may be a -// spurious sleep. The Promela model in proc.p verifies these accesses. -enum { - mcpuWidth = 15, - mcpuMask = (1<<mcpuWidth) - 1, - mcpuShift = 0, - mcpumaxShift = mcpuShift + mcpuWidth, - waitstopShift = mcpumaxShift + mcpuWidth, - gwaitingShift = waitstopShift+1, - - // The max value of GOMAXPROCS is constrained - // by the max value we can store in the bit fields - // of the atomic word. Reserve a few high values - // so that we can detect accidental decrement - // beyond zero. - maxgomaxprocs = mcpuMask - 10, -}; - -#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask) -#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask) -#define atomic_waitstop(v) (((v)>>waitstopShift)&1) -#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1) - -Sched runtime·sched; -int32 runtime·gomaxprocs; -bool runtime·singleproc; - -static bool canaddmcpu(void); - -// An m that is waiting for notewakeup(&m->havenextg). This may -// only be accessed while the scheduler lock is held. This is used to -// minimize the number of times we call notewakeup while the scheduler -// lock is held, since the m will normally move quickly to lock the -// scheduler itself, producing lock contention. -static M* mwakeup; - -// Scheduling helpers. Sched must be locked. -static void gput(G*); // put/get on ghead/gtail -static G* gget(void); -static void mput(M*); // put/get on mhead -static M* mget(G*); -static void gfput(G*); // put/get on gfree -static G* gfget(void); -static void matchmg(void); // match m's to g's -static void readylocked(G*); // ready, but sched is locked -static void mnextg(M*, G*); -static void mcommoninit(M*); - -void -setmcpumax(uint32 n) -{ - uint32 v, w; - - for(;;) { - v = runtime·sched.atomic; - w = v; - w &= ~(mcpuMask<<mcpumaxShift); - w |= n<<mcpumaxShift; - if(runtime·cas(&runtime·sched.atomic, v, w)) - break; - } -} +// The max value of GOMAXPROCS. +// There are no fundamental restrictions on the value. +enum { MaxGomaxprocs = 1<<8 }; +Sched runtime·sched; +int32 runtime·gomaxprocs; +bool runtime·singleproc; +bool runtime·iscgo; +uint32 runtime·gcwaiting; +M runtime·m0; +G runtime·g0; // idle goroutine for m0 +G* runtime·allg; +G* runtime·lastg; +M* runtime·allm; +M* runtime·extram; +int8* runtime·goos; +int32 runtime·ncpu; +static int32 newprocs; // Keep trace of scavenger's goroutine for deadlock detection. static G *scvg; +void runtime·mstart(void); +static void runqput(P*, G*); +static G* runqget(P*); +static void runqgrow(P*); +static G* runqsteal(P*, P*); +static void mput(M*); +static M* mget(void); +static void mcommoninit(M*); +static void schedule(void); +static void procresize(int32); +static void acquirep(P*); +static P* releasep(void); +static void newm(void(*)(void), P*); +static void goidle(void); +static void stopm(void); +static void startm(P*, bool); +static void handoffp(P*); +static void wakep(void); +static void stoplockedm(void); +static void startlockedm(G*); +static void sysmon(void); +static uint32 retake(uint32*); +static void inclocked(int32); +static void checkdead(void); +static void exitsyscall0(G*); +static void park0(G*); +static void gosched0(G*); +static void goexit0(G*); +static void gfput(P*, G*); +static G* gfget(P*); +static void gfpurge(P*); +static void globrunqput(G*); +static G* globrunqget(P*); +static P* pidleget(void); +static void pidleput(P*); + // The bootstrap sequence is: // // call osinit @@ -178,10 +121,11 @@ static G *scvg; void runtime·schedinit(void) { - int32 n; + int32 n, procs; byte *p; m->nomemprof++; + runtime·mprofinit(); runtime·mallocinit(); mcommoninit(m); @@ -193,93 +137,70 @@ runtime·schedinit(void) // so that we don't need to call malloc when we crash. // runtime·findfunc(0); - runtime·gomaxprocs = 1; + procs = 1; p = runtime·getenv("GOMAXPROCS"); - if(p != nil && (n = runtime·atoi(p)) != 0) { - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime·gomaxprocs = n; + if(p != nil && (n = runtime·atoi(p)) > 0) { + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + procs = n; } - // wait for the main goroutine to start before taking - // GOMAXPROCS into account. - setmcpumax(1); - runtime·singleproc = runtime·gomaxprocs == 1; - - canaddmcpu(); // mcpu++ to account for bootstrap m - m->helpgc = 1; // flag to tell schedule() to mcpu-- - runtime·sched.grunning++; + runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0])); + procresize(procs); mstats.enablegc = 1; m->nomemprof--; + + if(raceenabled) + g->racectx = runtime·raceinit(); } extern void main·init(void); extern void main·main(void); +static FuncVal scavenger = {runtime·MHeap_Scavenger}; + // The main goroutine. void runtime·main(void) { + newm(sysmon, nil); + // Lock the main goroutine onto this, the main OS thread, // during initialization. Most programs won't care, but a few // do require certain calls to be made by the main thread. // Those can arrange for main.main to run in the main thread // by calling runtime.LockOSThread during initialization // to preserve the lock. - runtime·LockOSThread(); - // From now on, newgoroutines may use non-main threads. - setmcpumax(runtime·gomaxprocs); - runtime·sched.init = true; - scvg = runtime·newproc1((byte*)runtime·MHeap_Scavenger, nil, 0, 0, runtime·main); + runtime·lockOSThread(); + if(m != &runtime·m0) + runtime·throw("runtime·main not on m0"); + scvg = runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); + scvg->issystem = true; main·init(); - runtime·sched.init = false; - if(!runtime·sched.lockmain) - runtime·UnlockOSThread(); - - // The deadlock detection has false negatives. - // Let scvg start up, to eliminate the false negative - // for the trivial program func main() { select{} }. - runtime·gosched(); + runtime·unlockOSThread(); main·main(); + if(raceenabled) + runtime·racefini(); + + // Make racy client program work: if panicking on + // another goroutine at the same time as main returns, + // let the other goroutine finish printing the panic trace. + // Once it does, it will exit. See issue 3934. + if(runtime·panicking) + runtime·park(nil, nil, "panicwait"); + runtime·exit(0); for(;;) *(int32*)runtime·main = 0; } -// Lock the scheduler. -static void -schedlock(void) -{ - runtime·lock(&runtime·sched); -} - -// Unlock the scheduler. -static void -schedunlock(void) -{ - M *m; - - m = mwakeup; - mwakeup = nil; - runtime·unlock(&runtime·sched); - if(m != nil) - runtime·notewakeup(&m->havenextg); -} - -void -runtime·goexit(void) -{ - g->status = Gmoribund; - runtime·gosched(); -} - void -runtime·goroutineheader(G *g) +runtime·goroutineheader(G *gp) { int8 *status; - switch(g->status) { + switch(gp->status) { case Gidle: status = "idle"; break; @@ -293,609 +214,748 @@ runtime·goroutineheader(G *g) status = "syscall"; break; case Gwaiting: - if(g->waitreason) - status = g->waitreason; + if(gp->waitreason) + status = gp->waitreason; else status = "waiting"; break; - case Gmoribund: - status = "moribund"; - break; default: status = "???"; break; } - runtime·printf("goroutine %d [%s]:\n", g->goid, status); + runtime·printf("goroutine %D [%s]:\n", gp->goid, status); } void runtime·tracebackothers(G *me) { - G *g; + G *gp; + int32 traceback; - for(g = runtime·allg; g != nil; g = g->alllink) { - if(g == me || g->status == Gdead) + traceback = runtime·gotraceback(); + for(gp = runtime·allg; gp != nil; gp = gp->alllink) { + if(gp == me || gp->status == Gdead) + continue; + if(gp->issystem && traceback < 2) continue; runtime·printf("\n"); - runtime·goroutineheader(g); - runtime·traceback(g->sched.pc, g->sched.sp, 0, g); + runtime·goroutineheader(gp); + runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp); } } -// Mark this g as m's idle goroutine. -// This functionality might be used in environments where programs -// are limited to a single thread, to simulate a select-driven -// network server. It is not exposed via the standard runtime API. -void -runtime·idlegoroutine(void) -{ - if(g->idlem != nil) - runtime·throw("g is already an idle goroutine"); - g->idlem = m; -} - static void -mcommoninit(M *m) +mcommoninit(M *mp) { - m->id = runtime·sched.mcount++; - m->fastrand = 0x49f6428aUL + m->id + runtime·cputicks(); - m->stackalloc = runtime·malloc(sizeof(*m->stackalloc)); - runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil); + // If there is no mcache runtime·callers() will crash, + // and we are most likely in sysmon thread so the stack is senseless anyway. + if(m->mcache) + runtime·callers(1, mp->createstack, nelem(mp->createstack)); - if(m->mcache == nil) - m->mcache = runtime·allocmcache(); + mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks(); - runtime·callers(1, m->createstack, nelem(m->createstack)); + runtime·lock(&runtime·sched); + mp->id = runtime·sched.mcount++; + + runtime·mpreinit(mp); // Add to runtime·allm so garbage collector doesn't free m // when it is just in a register or thread-local storage. - m->alllink = runtime·allm; + mp->alllink = runtime·allm; // runtime·NumCgoCall() iterates over allm w/o schedlock, // so we need to publish it safely. - runtime·atomicstorep(&runtime·allm, m); + runtime·atomicstorep(&runtime·allm, mp); + runtime·unlock(&runtime·sched); } -// Try to increment mcpu. Report whether succeeded. -static bool -canaddmcpu(void) +// Mark gp ready to run. +void +runtime·ready(G *gp) { - uint32 v; - - for(;;) { - v = runtime·sched.atomic; - if(atomic_mcpu(v) >= atomic_mcpumax(v)) - return 0; - if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift))) - return 1; + // Mark runnable. + if(gp->status != Gwaiting) { + runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status); + runtime·throw("bad g->status in ready"); } + gp->status = Grunnable; + runqput(m->p, gp); + if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0) // TODO: fast atomic + wakep(); } -// Put on `g' queue. Sched must be locked. -static void -gput(G *g) +int32 +runtime·gcprocs(void) { - M *m; - - // If g is wired, hand it off directly. - if((m = g->lockedm) != nil && canaddmcpu()) { - mnextg(m, g); - return; - } + int32 n; - // If g is the idle goroutine for an m, hand it off. - if(g->idlem != nil) { - if(g->idlem->idleg != nil) { - runtime·printf("m%d idle out of sync: g%d g%d\n", - g->idlem->id, - g->idlem->idleg->goid, g->goid); - runtime·throw("runtime: double idle"); - } - g->idlem->idleg = g; - return; - } + // Figure out how many CPUs to use during GC. + // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + runtime·lock(&runtime·sched); + n = runtime·gomaxprocs; + if(n > runtime·ncpu) + n = runtime·ncpu; + if(n > MaxGcproc) + n = MaxGcproc; + if(n > runtime·sched.nmidle+1) // one M is currently running + n = runtime·sched.nmidle+1; + runtime·unlock(&runtime·sched); + return n; +} - g->schedlink = nil; - if(runtime·sched.ghead == nil) - runtime·sched.ghead = g; - else - runtime·sched.gtail->schedlink = g; - runtime·sched.gtail = g; +static bool +needaddgcproc(void) +{ + int32 n; - // increment gwait. - // if it transitions to nonzero, set atomic gwaiting bit. - if(runtime·sched.gwait++ == 0) - runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift); + runtime·lock(&runtime·sched); + n = runtime·gomaxprocs; + if(n > runtime·ncpu) + n = runtime·ncpu; + if(n > MaxGcproc) + n = MaxGcproc; + n -= runtime·sched.nmidle+1; // one M is currently running + runtime·unlock(&runtime·sched); + return n > 0; } -// Report whether gget would return something. -static bool -haveg(void) +void +runtime·helpgc(int32 nproc) { - return runtime·sched.ghead != nil || m->idleg != nil; + M *mp; + int32 n, pos; + + runtime·lock(&runtime·sched); + pos = 0; + for(n = 1; n < nproc; n++) { // one M is currently running + if(runtime·allp[pos]->mcache == m->mcache) + pos++; + mp = mget(); + if(mp == nil) + runtime·throw("runtime·gcprocs inconsistency"); + mp->helpgc = 1; + mp->mcache = runtime·allp[pos]->mcache; + pos++; + runtime·notewakeup(&mp->park); + } + runtime·unlock(&runtime·sched); } -// Get from `g' queue. Sched must be locked. -static G* -gget(void) +void +runtime·stoptheworld(void) { - G *g; + int32 i; + uint32 s; + P *p; + bool wait; - g = runtime·sched.ghead; - if(g){ - runtime·sched.ghead = g->schedlink; - if(runtime·sched.ghead == nil) - runtime·sched.gtail = nil; - // decrement gwait. - // if it transitions to zero, clear atomic gwaiting bit. - if(--runtime·sched.gwait == 0) - runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift); - } else if(m->idleg != nil) { - g = m->idleg; - m->idleg = nil; + runtime·lock(&runtime·sched); + runtime·sched.stopwait = runtime·gomaxprocs; + runtime·atomicstore((uint32*)&runtime·gcwaiting, 1); + // stop current P + m->p->status = Pgcstop; + runtime·sched.stopwait--; + // try to retake all P's in Psyscall status + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + s = p->status; + if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop)) + runtime·sched.stopwait--; + } + // stop idle P's + while(p = pidleget()) { + p->status = Pgcstop; + runtime·sched.stopwait--; + } + wait = runtime·sched.stopwait > 0; + runtime·unlock(&runtime·sched); + + // wait for remaining P's to stop voluntary + if(wait) { + runtime·notesleep(&runtime·sched.stopnote); + runtime·noteclear(&runtime·sched.stopnote); + } + if(runtime·sched.stopwait) + runtime·throw("stoptheworld: not stopped"); + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p->status != Pgcstop) + runtime·throw("stoptheworld: not stopped"); } - return g; } -// Put on `m' list. Sched must be locked. static void -mput(M *m) +mhelpgc(void) { - m->schedlink = runtime·sched.mhead; - runtime·sched.mhead = m; - runtime·sched.mwait++; + m->helpgc = 1; } -// Get an `m' to run `g'. Sched must be locked. -static M* -mget(G *g) +void +runtime·starttheworld(void) { - M *m; + P *p; + M *mp; + bool add; - // if g has its own m, use it. - if(g && (m = g->lockedm) != nil) - return m; + add = needaddgcproc(); + runtime·lock(&runtime·sched); + if(newprocs) { + procresize(newprocs); + newprocs = 0; + } else + procresize(runtime·gomaxprocs); + runtime·gcwaiting = 0; - // otherwise use general m pool. - if((m = runtime·sched.mhead) != nil){ - runtime·sched.mhead = m->schedlink; - runtime·sched.mwait--; + while(p = pidleget()) { + // procresize() puts p's with work at the beginning of the list. + // Once we reach a p without a run queue, the rest don't have one either. + if(p->runqhead == p->runqtail) { + pidleput(p); + break; + } + mp = mget(); + if(mp == nil) { + pidleput(p); + break; + } + if(mp->nextp) + runtime·throw("starttheworld: inconsistent mp->nextp"); + mp->nextp = p; + runtime·notewakeup(&mp->park); + } + if(runtime·sched.sysmonwait) { + runtime·sched.sysmonwait = false; + runtime·notewakeup(&runtime·sched.sysmonnote); + } + runtime·unlock(&runtime·sched); + + if(add) { + // If GC could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + newm(mhelpgc, nil); } - return m; } -// Mark g ready to run. +// Called to start an M. void -runtime·ready(G *g) +runtime·mstart(void) { - schedlock(); - readylocked(g); - schedunlock(); -} + // It is used by windows-386 only. Unfortunately, seh needs + // to be located on os stack, and mstart runs on os stack + // for both m0 and m. + SEH seh; -// Mark g ready to run. Sched is already locked. -// G might be running already and about to stop. -// The sched lock protects g->status from changing underfoot. -static void -readylocked(G *g) -{ - if(g->m){ - // Running on another machine. - // Ready it when it stops. - g->readyonstop = 1; - return; + if(g != m->g0) + runtime·throw("bad runtime·mstart"); + + // Record top of stack for use by mcall. + // Once we call schedule we're never coming back, + // so other calls can reuse this stack space. + runtime·gosave(&m->g0->sched); + m->g0->sched.pc = (void*)-1; // make sure it is never used + m->seh = &seh; + runtime·asminit(); + runtime·minit(); + + // Install signal handlers; after minit so that minit can + // prepare the thread to be able to handle the signals. + if(m == &runtime·m0) { + runtime·initsig(); + if(runtime·iscgo) + runtime·newextram(); } + + if(m->mstartfn) + m->mstartfn(); - // Mark runnable. - if(g->status == Grunnable || g->status == Grunning) { - runtime·printf("goroutine %d has status %d\n", g->goid, g->status); - runtime·throw("bad g->status in ready"); + if(m->helpgc) { + m->helpgc = false; + stopm(); + } else if(m != &runtime·m0) { + acquirep(m->nextp); + m->nextp = nil; } - g->status = Grunnable; + schedule(); - gput(g); - matchmg(); + // TODO(brainman): This point is never reached, because scheduler + // does not release os threads at the moment. But once this path + // is enabled, we must remove our seh here. } -static void -nop(void) -{ -} +// When running with cgo, we call _cgo_thread_start +// to start threads for us so that we can play nicely with +// foreign code. +void (*_cgo_thread_start)(void*); -// Same as readylocked but a different symbol so that -// debuggers can set a breakpoint here and catch all -// new goroutines. -static void -newprocreadylocked(G *g) +typedef struct CgoThreadStart CgoThreadStart; +struct CgoThreadStart { - nop(); // avoid inlining in 6l - readylocked(g); -} + M *m; + G *g; + void (*fn)(void); +}; -// Pass g to m for running. -// Caller has already incremented mcpu. -static void -mnextg(M *m, G *g) +// Allocate a new m unassociated with any thread. +// Can use p for allocation context if needed. +M* +runtime·allocm(P *p) { - runtime·sched.grunning++; - m->nextg = g; - if(m->waitnextg) { - m->waitnextg = 0; - if(mwakeup != nil) - runtime·notewakeup(&mwakeup->havenextg); - mwakeup = m; + M *mp; + static Type *mtype; // The Go type M + + m->locks++; // disable GC because it can be called from sysmon + if(m->p == nil) + acquirep(p); // temporarily borrow p for mallocs in this function + if(mtype == nil) { + Eface e; + runtime·gc_m_ptr(&e); + mtype = ((PtrType*)e.type)->elem; } -} -// Get the next goroutine that m should run. -// Sched must be locked on entry, is unlocked on exit. -// Makes sure that at most $GOMAXPROCS g's are -// running on cpus (not in system calls) at any given time. -static G* -nextgandunlock(void) -{ - G *gp; - uint32 v; + mp = runtime·cnew(mtype); + mcommoninit(mp); -top: - if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs) - runtime·throw("negative mcpu"); - - // If there is a g waiting as m->nextg, the mcpu++ - // happened before it was passed to mnextg. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } + // In case of cgo, pthread_create will make us a stack. + // Windows will layout sched stack on OS stack. + if(runtime·iscgo || Windows) + mp->g0 = runtime·malg(-1); + else + mp->g0 = runtime·malg(8192); - if(m->lockedg != nil) { - // We can only run one g, and it's not available. - // Make sure some other cpu is running to handle - // the ordinary run queue. - if(runtime·sched.gwait != 0) { - matchmg(); - // m->lockedg might have been on the queue. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } - } - } else { - // Look for work on global queue. - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime·throw("gget inconsistency"); - - if(gp->lockedm) { - mnextg(gp->lockedm, gp); - continue; - } - runtime·sched.grunning++; - schedunlock(); - return gp; - } + if(p == m->p) + releasep(); + m->locks--; - // The while loop ended either because the g queue is empty - // or because we have maxed out our m procs running go - // code (mcpu >= mcpumax). We need to check that - // concurrent actions by entersyscall/exitsyscall cannot - // invalidate the decision to end the loop. - // - // We hold the sched lock, so no one else is manipulating the - // g queue or changing mcpumax. Entersyscall can decrement - // mcpu, but if does so when there is something on the g queue, - // the gwait bit will be set, so entersyscall will take the slow path - // and use the sched lock. So it cannot invalidate our decision. - // - // Wait on global m queue. - mput(m); - } - - // Look for deadlock situation. - // There is a race with the scavenger that causes false negatives: - // if the scavenger is just starting, then we have - // scvg != nil && grunning == 0 && gwait == 0 - // and we do not detect a deadlock. It is possible that we should - // add that case to the if statement here, but it is too close to Go 1 - // to make such a subtle change. Instead, we work around the - // false negative in trivial programs by calling runtime.gosched - // from the main goroutine just before main.main. - // See runtime·main above. - // - // On a related note, it is also possible that the scvg == nil case is - // wrong and should include gwait, but that does not happen in - // standard Go programs, which all start the scavenger. - // - if((scvg == nil && runtime·sched.grunning == 0) || - (scvg != nil && runtime·sched.grunning == 1 && runtime·sched.gwait == 0 && - (scvg->status == Grunning || scvg->status == Gsyscall))) { - runtime·throw("all goroutines are asleep - deadlock!"); - } - - m->nextg = nil; - m->waitnextg = 1; - runtime·noteclear(&m->havenextg); - - // Stoptheworld is waiting for all but its cpu to go to stop. - // Entersyscall might have decremented mcpu too, but if so - // it will see the waitstop and take the slow path. - // Exitsyscall never increments mcpu beyond mcpumax. - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - // set waitstop = 0 (known to be 1) - runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); - runtime·notewakeup(&runtime·sched.stopped); - } - schedunlock(); - - runtime·notesleep(&m->havenextg); - if(m->helpgc) { - runtime·gchelper(); - m->helpgc = 0; - runtime·lock(&runtime·sched); - goto top; - } - if((gp = m->nextg) == nil) - runtime·throw("bad m->nextg in nextgoroutine"); - m->nextg = nil; - return gp; + return mp; } -int32 -runtime·helpgc(bool *extra) +static M* lockextra(bool nilokay); +static void unlockextra(M*); + +// needm is called when a cgo callback happens on a +// thread without an m (a thread not created by Go). +// In this case, needm is expected to find an m to use +// and return with m, g initialized correctly. +// Since m and g are not set now (likely nil, but see below) +// needm is limited in what routines it can call. In particular +// it can only call nosplit functions (textflag 7) and cannot +// do any scheduling that requires an m. +// +// In order to avoid needing heavy lifting here, we adopt +// the following strategy: there is a stack of available m's +// that can be stolen. Using compare-and-swap +// to pop from the stack has ABA races, so we simulate +// a lock by doing an exchange (via casp) to steal the stack +// head and replace the top pointer with MLOCKED (1). +// This serves as a simple spin lock that we can use even +// without an m. The thread that locks the stack in this way +// unlocks the stack by storing a valid stack head pointer. +// +// In order to make sure that there is always an m structure +// available to be stolen, we maintain the invariant that there +// is always one more than needed. At the beginning of the +// program (if cgo is in use) the list is seeded with a single m. +// If needm finds that it has taken the last m off the list, its job +// is - once it has installed its own m so that it can do things like +// allocate memory - to create a spare m and put it on the list. +// +// Each of these extra m's also has a g0 and a curg that are +// pressed into service as the scheduling stack and current +// goroutine for the duration of the cgo callback. +// +// When the callback is done with the m, it calls dropm to +// put the m back on the list. +#pragma textflag 7 +void +runtime·needm(byte x) { M *mp; - int32 n, max; - // Figure out how many CPUs to use. - // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. - max = runtime·gomaxprocs; - if(max > runtime·ncpu) - max = runtime·ncpu; - if(max > MaxGcproc) - max = MaxGcproc; + // Lock extra list, take head, unlock popped list. + // nilokay=false is safe here because of the invariant above, + // that the extra list always contains or will soon contain + // at least one m. + mp = lockextra(false); + + // Set needextram when we've just emptied the list, + // so that the eventual call into cgocallbackg will + // allocate a new m for the extra list. We delay the + // allocation until then so that it can be done + // after exitsyscall makes sure it is okay to be + // running at all (that is, there's no garbage collection + // running right now). + mp->needextram = mp->schedlink == nil; + unlockextra(mp->schedlink); + + // Install m and g (= m->g0) and set the stack bounds + // to match the current stack. We don't actually know + // how big the stack is, like we don't know how big any + // scheduling stack is, but we assume there's at least 32 kB, + // which is more than enough for us. + runtime·setmg(mp, mp->g0); + g->stackbase = (uintptr)(&x + 1024); + g->stackguard = (uintptr)(&x - 32*1024); + + // On windows/386, we need to put an SEH frame (two words) + // somewhere on the current stack. We are called + // from needm, and we know there is some available + // space one word into the argument frame. Use that. + m->seh = (SEH*)((uintptr*)&x + 1); + + // Initialize this thread to use the m. + runtime·asminit(); + runtime·minit(); +} - // We're going to use one CPU no matter what. - // Figure out the max number of additional CPUs. - max--; +// newextram allocates an m and puts it on the extra list. +// It is called with a working local m, so that it can do things +// like call schedlock and allocate. +void +runtime·newextram(void) +{ + M *mp, *mnext; + G *gp; + // Create extra goroutine locked to extra m. + // The goroutine is the context in which the cgo callback will run. + // The sched.pc will never be returned to, but setting it to + // runtime.goexit makes clear to the traceback routines where + // the goroutine stack ends. + mp = runtime·allocm(nil); + gp = runtime·malg(4096); + gp->sched.pc = (void*)runtime·goexit; + gp->sched.sp = gp->stackbase; + gp->sched.g = gp; + gp->status = Gsyscall; + mp->curg = gp; + mp->locked = LockInternal; + mp->lockedg = gp; + gp->lockedm = mp; + // put on allg for garbage collector runtime·lock(&runtime·sched); - n = 0; - while(n < max && (mp = mget(nil)) != nil) { - n++; - mp->helpgc = 1; - mp->waitnextg = 0; - runtime·notewakeup(&mp->havenextg); - } + if(runtime·lastg == nil) + runtime·allg = gp; + else + runtime·lastg->alllink = gp; + runtime·lastg = gp; runtime·unlock(&runtime·sched); - if(extra) - *extra = n != max; - return n; + gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + if(raceenabled) + gp->racectx = runtime·racegostart(runtime·newextram); + + // Add m to the extra list. + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); } +// dropm is called when a cgo callback has called needm but is now +// done with the callback and returning back into the non-Go thread. +// It puts the current m back onto the extra list. +// +// The main expense here is the call to signalstack to release the +// m's signal stack, and then the call to needm on the next callback +// from this thread. It is tempting to try to save the m for next time, +// which would eliminate both these costs, but there might not be +// a next time: the current thread (which Go does not control) might exit. +// If we saved the m for that thread, there would be an m leak each time +// such a thread exited. Instead, we acquire and release an m on each +// call. These should typically not be scheduling operations, just a few +// atomics, so the cost should be small. +// +// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread +// variable using pthread_key_create. Unlike the pthread keys we already use +// on OS X, this dummy key would never be read by Go code. It would exist +// only so that we could register at thread-exit-time destructor. +// That destructor would put the m back onto the extra list. +// This is purely a performance optimization. The current version, +// in which dropm happens on each cgo call, is still correct too. +// We may have to keep the current version on systems with cgo +// but without pthreads, like Windows. void -runtime·stoptheworld(void) +runtime·dropm(void) { - uint32 v; + M *mp, *mnext; - schedlock(); - runtime·gcwaiting = 1; + // Undo whatever initialization minit did during needm. + runtime·unminit(); - setmcpumax(1); + // Clear m and g, and return m to the extra list. + // After the call to setmg we can only call nosplit functions. + mp = m; + runtime·setmg(nil, nil); - // while mcpu > 1 - for(;;) { - v = runtime·sched.atomic; - if(atomic_mcpu(v) <= 1) - break; + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); +} - // It would be unsafe for multiple threads to be using - // the stopped note at once, but there is only - // ever one thread doing garbage collection. - runtime·noteclear(&runtime·sched.stopped); - if(atomic_waitstop(v)) - runtime·throw("invalid waitstop"); +#define MLOCKED ((M*)1) - // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above - // still being true. - if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift))) - continue; +// lockextra locks the extra list and returns the list head. +// The caller must unlock the list by storing a new list head +// to runtime.extram. If nilokay is true, then lockextra will +// return a nil list head if that's what it finds. If nilokay is false, +// lockextra will keep waiting until the list head is no longer nil. +#pragma textflag 7 +static M* +lockextra(bool nilokay) +{ + M *mp; + void (*yield)(void); - schedunlock(); - runtime·notesleep(&runtime·sched.stopped); - schedlock(); + for(;;) { + mp = runtime·atomicloadp(&runtime·extram); + if(mp == MLOCKED) { + yield = runtime·osyield; + yield(); + continue; + } + if(mp == nil && !nilokay) { + runtime·usleep(1); + continue; + } + if(!runtime·casp(&runtime·extram, mp, MLOCKED)) { + yield = runtime·osyield; + yield(); + continue; + } + break; } - runtime·singleproc = runtime·gomaxprocs == 1; - schedunlock(); + return mp; } -void -runtime·starttheworld(bool extra) +#pragma textflag 7 +static void +unlockextra(M *mp) { - M *m; - - schedlock(); - runtime·gcwaiting = 0; - setmcpumax(runtime·gomaxprocs); - matchmg(); - if(extra && canaddmcpu()) { - // Start a new m that will (we hope) be idle - // and so available to help when the next - // garbage collection happens. - // canaddmcpu above did mcpu++ - // (necessary, because m will be doing various - // initialization work so is definitely running), - // but m is not running a specific goroutine, - // so set the helpgc flag as a signal to m's - // first schedule(nil) to mcpu-- and grunning--. - m = runtime·newm(); - m->helpgc = 1; - runtime·sched.grunning++; - } - schedunlock(); + runtime·atomicstorep(&runtime·extram, mp); } -// Called to start an M. -void -runtime·mstart(void) + +// Create a new m. It will start off with a call to fn, or else the scheduler. +static void +newm(void(*fn)(void), P *p) { - if(g != m->g0) - runtime·throw("bad runtime·mstart"); + M *mp; - // Record top of stack for use by mcall. - // Once we call schedule we're never coming back, - // so other calls can reuse this stack space. - runtime·gosave(&m->g0->sched); - m->g0->sched.pc = (void*)-1; // make sure it is never used - runtime·asminit(); - runtime·minit(); + mp = runtime·allocm(p); + mp->nextp = p; + mp->mstartfn = fn; - // Install signal handlers; after minit so that minit can - // prepare the thread to be able to handle the signals. - if(m == &runtime·m0) - runtime·initsig(); + if(runtime·iscgo) { + CgoThreadStart ts; - schedule(nil); + if(_cgo_thread_start == nil) + runtime·throw("_cgo_thread_start missing"); + ts.m = mp; + ts.g = mp->g0; + ts.fn = runtime·mstart; + runtime·asmcgocall(_cgo_thread_start, &ts); + return; + } + runtime·newosproc(mp, (byte*)mp->g0->stackbase); } -// When running with cgo, we call libcgo_thread_start -// to start threads for us so that we can play nicely with -// foreign code. -void (*libcgo_thread_start)(void*); +// Stops execution of the current m until new work is available. +// Returns with acquired P. +static void +stopm(void) +{ + if(m->locks) + runtime·throw("stopm holding locks"); + if(m->p) + runtime·throw("stopm holding p"); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } -typedef struct CgoThreadStart CgoThreadStart; -struct CgoThreadStart +retry: + runtime·lock(&runtime·sched); + mput(m); + runtime·unlock(&runtime·sched); + runtime·notesleep(&m->park); + runtime·noteclear(&m->park); + if(m->helpgc) { + m->helpgc = 0; + runtime·gchelper(); + m->mcache = nil; + goto retry; + } + acquirep(m->nextp); + m->nextp = nil; +} + +static void +mspinning(void) { - M *m; - G *g; - void (*fn)(void); -}; + m->spinning = true; +} -// Kick off new m's as needed (up to mcpumax). -// Sched is locked. +// Schedules some M to run the p (creates an M if necessary). +// If p==nil, tries to get an idle P, if no idle P's returns false. static void -matchmg(void) +startm(P *p, bool spinning) { - G *gp; M *mp; + void (*fn)(void); - if(m->mallocing || m->gcing) + runtime·lock(&runtime·sched); + if(p == nil) { + p = pidleget(); + if(p == nil) { + runtime·unlock(&runtime·sched); + if(spinning) + runtime·xadd(&runtime·sched.nmspinning, -1); + return; + } + } + mp = mget(); + runtime·unlock(&runtime·sched); + if(mp == nil) { + fn = nil; + if(spinning) + fn = mspinning; + newm(fn, p); return; - - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime·throw("gget inconsistency"); - - // Find the m that will run gp. - if((mp = mget(gp)) == nil) - mp = runtime·newm(); - mnextg(mp, gp); } + if(mp->spinning) + runtime·throw("startm: m is spinning"); + if(mp->nextp) + runtime·throw("startm: m has p"); + mp->spinning = spinning; + mp->nextp = p; + runtime·notewakeup(&mp->park); } -// Create a new m. It will start off with a call to runtime·mstart. -M* -runtime·newm(void) +// Hands off P from syscall or locked M. +static void +handoffp(P *p) { - M *m; + // if it has local work, start it straight away + if(p->runqhead != p->runqtail || runtime·sched.runqsize) { + startm(p, false); + return; + } + // no local work, check that there are no spinning/idle M's, + // otherwise our help is not required + if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 && // TODO: fast atomic + runtime·cas(&runtime·sched.nmspinning, 0, 1)) { + startm(p, true); + return; + } + runtime·lock(&runtime·sched); + if(runtime·gcwaiting) { + p->status = Pgcstop; + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + runtime·unlock(&runtime·sched); + return; + } + if(runtime·sched.runqsize) { + runtime·unlock(&runtime·sched); + startm(p, false); + return; + } + pidleput(p); + runtime·unlock(&runtime·sched); +} - m = runtime·malloc(sizeof(M)); - mcommoninit(m); +// Tries to add one more P to execute G's. +// Called when a G is made runnable (newproc, ready). +static void +wakep(void) +{ + // be conservative about spinning threads + if(!runtime·cas(&runtime·sched.nmspinning, 0, 1)) + return; + startm(nil, true); +} - if(runtime·iscgo) { - CgoThreadStart ts; +// Stops execution of the current m that is locked to a g until the g is runnable again. +// Returns with acquired P. +static void +stoplockedm(void) +{ + P *p; - if(libcgo_thread_start == nil) - runtime·throw("libcgo_thread_start missing"); - // pthread_create will make us a stack. - m->g0 = runtime·malg(-1); - ts.m = m; - ts.g = m->g0; - ts.fn = runtime·mstart; - runtime·asmcgocall(libcgo_thread_start, &ts); - } else { - if(Windows) - // windows will layout sched stack on os stack - m->g0 = runtime·malg(-1); - else - m->g0 = runtime·malg(8192); - runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart); + if(m->lockedg == nil || m->lockedg->lockedm != m) + runtime·throw("stoplockedm: inconsistent locking"); + if(m->p) { + // Schedule another M to run this p. + p = releasep(); + handoffp(p); } + inclocked(1); + // Wait until another thread schedules lockedg again. + runtime·notesleep(&m->park); + runtime·noteclear(&m->park); + if(m->lockedg->status != Grunnable) + runtime·throw("stoplockedm: not runnable"); + acquirep(m->nextp); + m->nextp = nil; +} + +// Schedules the locked m to run the locked gp. +static void +startlockedm(G *gp) +{ + M *mp; + P *p; + + mp = gp->lockedm; + if(mp == m) + runtime·throw("startlockedm: locked to me"); + if(mp->nextp) + runtime·throw("startlockedm: m has p"); + // directly handoff current P to the locked m + inclocked(-1); + p = releasep(); + mp->nextp = p; + runtime·notewakeup(&mp->park); + stopm(); +} + +// Stops the current m for stoptheworld. +// Returns when the world is restarted. +static void +gcstopm(void) +{ + P *p; - return m; + if(!runtime·gcwaiting) + runtime·throw("gcstopm: not waiting for gc"); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + p = releasep(); + runtime·lock(&runtime·sched); + p->status = Pgcstop; + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + runtime·unlock(&runtime·sched); + stopm(); } -// One round of scheduler: find a goroutine and run it. -// The argument is the goroutine that was running before -// schedule was called, or nil if this is the first call. +// Schedules gp to run on the current M. // Never returns. static void -schedule(G *gp) +execute(G *gp) { int32 hz; - uint32 v; - - schedlock(); - if(gp != nil) { - // Just finished running gp. - gp->m = nil; - runtime·sched.grunning--; - - // atomic { mcpu-- } - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime·throw("negative mcpu in scheduler"); - - switch(gp->status){ - case Grunnable: - case Gdead: - // Shouldn't have been running! - runtime·throw("bad gp->status in sched"); - case Grunning: - gp->status = Grunnable; - gput(gp); - break; - case Gmoribund: - gp->status = Gdead; - if(gp->lockedm) { - gp->lockedm = nil; - m->lockedg = nil; - } - gp->idlem = nil; - unwindstack(gp, nil); - gfput(gp); - if(--runtime·sched.gcount == 0) - runtime·exit(0); - break; - } - if(gp->readyonstop){ - gp->readyonstop = 0; - readylocked(gp); - } - } else if(m->helpgc) { - // Bootstrap m or new m started by starttheworld. - // atomic { mcpu-- } - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime·throw("negative mcpu in scheduler"); - // Compensate for increment in starttheworld(). - runtime·sched.grunning--; - m->helpgc = 0; - } else if(m->nextg != nil) { - // New m started by matchmg. - } else { - runtime·throw("invalid m state in scheduler"); - } - // Find (or wait for) g to run. Unlocks runtime·sched. - gp = nextgandunlock(); - gp->readyonstop = 0; + if(gp->status != Grunnable) { + runtime·printf("execute: bad g status %d\n", gp->status); + runtime·throw("execute: bad g status"); + } gp->status = Grunning; + m->p->tick++; m->curg = gp; gp->m = m; @@ -904,27 +964,209 @@ schedule(G *gp) if(m->profilehz != hz) runtime·resetcpuprofiler(hz); - if(gp->sched.pc == (byte*)runtime·goexit) { // kickoff - runtime·gogocall(&gp->sched, (void(*)(void))gp->entry); - } + if(gp->sched.pc == (byte*)runtime·goexit) // kickoff + runtime·gogocallfn(&gp->sched, gp->fnstart); runtime·gogo(&gp->sched, 0); } -// Enter scheduler. If g->status is Grunning, -// re-queues g and runs everyone else who is waiting -// before running g again. If g->status is Gmoribund, -// kills off g. -// Cannot split stack because it is called from exitsyscall. -// See comment below. -#pragma textflag 7 +// Finds a runnable goroutine to execute. +// Tries to steal from other P's and get g from global queue. +static G* +findrunnable(void) +{ + G *gp; + P *p; + int32 i; + +top: + if(runtime·gcwaiting) { + gcstopm(); + goto top; + } + // local runq + gp = runqget(m->p); + if(gp) + return gp; + // global runq + if(runtime·sched.runqsize) { + runtime·lock(&runtime·sched); + gp = globrunqget(m->p); + runtime·unlock(&runtime·sched); + if(gp) + return gp; + } + // If number of spinning M's >= number of busy P's, block. + // This is necessary to prevent excessive CPU consumption + // when GOMAXPROCS>>1 but the program parallelism is low. + if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle)) // TODO: fast atomic + goto stop; + if(!m->spinning) { + m->spinning = true; + runtime·xadd(&runtime·sched.nmspinning, 1); + } + // random steal from other P's + for(i = 0; i < 2*runtime·gomaxprocs; i++) { + if(runtime·gcwaiting) + goto top; + p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs]; + if(p == m->p) + gp = runqget(p); + else + gp = runqsteal(m->p, p); + if(gp) + return gp; + } +stop: + // return P and block + runtime·lock(&runtime·sched); + if(runtime·gcwaiting) { + runtime·unlock(&runtime·sched); + goto top; + } + if(runtime·sched.runqsize) { + gp = globrunqget(m->p); + runtime·unlock(&runtime·sched); + return gp; + } + p = releasep(); + pidleput(p); + runtime·unlock(&runtime·sched); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + // check all runqueues once again + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p && p->runqhead != p->runqtail) { + runtime·lock(&runtime·sched); + p = pidleget(); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + goto top; + } + break; + } + } + stopm(); + goto top; +} + +// One round of scheduler: find a runnable goroutine and execute it. +// Never returns. +static void +schedule(void) +{ + G *gp; + + if(m->locks) + runtime·throw("schedule: holding locks"); + +top: + if(runtime·gcwaiting) { + gcstopm(); + goto top; + } + + gp = runqget(m->p); + if(gp == nil) + gp = findrunnable(); + + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another M here. + if (m->p->runqhead != m->p->runqtail && + runtime·atomicload(&runtime·sched.nmspinning) == 0 && + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic + wakep(); + + if(gp->lockedm) { + startlockedm(gp); + goto top; + } + + execute(gp); +} + +// Puts the current goroutine into a waiting state and unlocks the lock. +// The goroutine can be made runnable again by calling runtime·ready(gp). +void +runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason) +{ + m->waitlock = lock; + m->waitunlockf = unlockf; + g->waitreason = reason; + runtime·mcall(park0); +} + +// runtime·park continuation on g0. +static void +park0(G *gp) +{ + gp->status = Gwaiting; + gp->m = nil; + m->curg = nil; + if(m->waitunlockf) { + m->waitunlockf(m->waitlock); + m->waitunlockf = nil; + } + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Scheduler yield. void runtime·gosched(void) { - if(m->locks != 0) - runtime·throw("gosched holding locks"); - if(g == m->g0) - runtime·throw("gosched of g0"); - runtime·mcall(schedule); + runtime·mcall(gosched0); +} + +// runtime·gosched continuation on g0. +static void +gosched0(G *gp) +{ + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime·lock(&runtime·sched); + globrunqput(gp); + runtime·unlock(&runtime·sched); + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Finishes execution of the current goroutine. +void +runtime·goexit(void) +{ + if(raceenabled) + runtime·racegoend(); + runtime·mcall(goexit0); +} + +// runtime·goexit continuation on g0. +static void +goexit0(G *gp) +{ + gp->status = Gdead; + gp->m = nil; + gp->lockedm = nil; + m->curg = nil; + m->lockedg = nil; + runtime·unwindstack(gp, nil); + gfput(m->p, gp); + schedule(); } // The goroutine g is about to enter a system call. @@ -935,21 +1177,19 @@ runtime·gosched(void) // Entersyscall cannot split the stack: the runtime·gosave must // make g->sched refer to the caller's stack segment, because // entersyscall is going to return immediately after. -// It's okay to call matchmg and notewakeup even after -// decrementing mcpu, because we haven't released the -// sched lock yet, so the garbage collector cannot be running. #pragma textflag 7 void -runtime·entersyscall(void) +·entersyscall(int32 dummy) { - uint32 v; - if(m->profilehz > 0) runtime·setprof(false); // Leave SP around for gc and traceback. - runtime·gosave(&g->sched); + g->sched.sp = (uintptr)runtime·getcallersp(&dummy); + g->sched.pc = runtime·getcallerpc(&dummy); + g->sched.g = g; g->gcsp = g->sched.sp; + g->gcpc = g->sched.pc; g->gcstack = g->stackbase; g->gcguard = g->stackguard; g->status = Gsyscall; @@ -959,34 +1199,61 @@ runtime·entersyscall(void) runtime·throw("entersyscall"); } - // Fast path. - // The slow path inside the schedlock/schedunlock will get - // through without stopping if it does: - // mcpu-- - // gwait not true - // waitstop && mcpu <= mcpumax not true - // If we can do the same with a single atomic add, - // then we can skip the locks. - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v))) - return; - - schedlock(); - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_gwaiting(v)) { - matchmg(); - v = runtime·atomicload(&runtime·sched.atomic); + if(runtime·atomicload(&runtime·sched.sysmonwait)) { // TODO: fast atomic + runtime·lock(&runtime·sched); + if(runtime·atomicload(&runtime·sched.sysmonwait)) { + runtime·atomicstore(&runtime·sched.sysmonwait, 0); + runtime·notewakeup(&runtime·sched.sysmonnote); + } + runtime·unlock(&runtime·sched); + runtime·gosave(&g->sched); // re-save for traceback } - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); - runtime·notewakeup(&runtime·sched.stopped); + + m->mcache = nil; + m->p->tick++; + m->p->m = nil; + runtime·atomicstore(&m->p->status, Psyscall); + if(runtime·gcwaiting) { + runtime·lock(&runtime·sched); + if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) { + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + } + runtime·unlock(&runtime·sched); + runtime·gosave(&g->sched); // re-save for traceback } +} - // Re-save sched in case one of the calls - // (notewakeup, matchmg) triggered something using it. - runtime·gosave(&g->sched); +// The same as runtime·entersyscall(), but with a hint that the syscall is blocking. +#pragma textflag 7 +void +·entersyscallblock(int32 dummy) +{ + P *p; - schedunlock(); + if(m->profilehz > 0) + runtime·setprof(false); + + // Leave SP around for gc and traceback. + g->sched.sp = (uintptr)runtime·getcallersp(&dummy); + g->sched.pc = runtime·getcallerpc(&dummy); + g->sched.g = g; + g->gcsp = g->sched.sp; + g->gcpc = g->sched.pc; + g->gcstack = g->stackbase; + g->gcguard = g->stackguard; + g->status = Gsyscall; + if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) { + // runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n", + // g->gcsp, g->gcguard-StackGuard, g->gcstack); + runtime·throw("entersyscallblock"); + } + + p = releasep(); + handoffp(p); + if(g == scvg) // do not consider blocked scavenger for deadlock detection + inclocked(1); + runtime·gosave(&g->sched); // re-save for traceback } // The goroutine g exited its system call. @@ -996,177 +1263,81 @@ runtime·entersyscall(void) void runtime·exitsyscall(void) { - uint32 v; + P *p; - // Fast path. - // If we can do the mcpu++ bookkeeping and - // find that we still have mcpu <= mcpumax, then we can - // start executing Go code immediately, without having to - // schedlock/schedunlock. - v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift)); - if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) { + // Check whether the profiler needs to be turned on. + if(m->profilehz > 0) + runtime·setprof(true); + + // Try to re-acquire the last P. + if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) { // There's a cpu for us, so we can run. + m->mcache = m->p->mcache; + m->p->m = m; + m->p->tick++; g->status = Grunning; // Garbage collector isn't running (since we are), - // so okay to clear gcstack. - g->gcstack = nil; - - if(m->profilehz > 0) - runtime·setprof(true); + // so okay to clear gcstack and gcsp. + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; return; } - // Tell scheduler to put g back on the run queue: - // mostly equivalent to g->status = Grunning, - // but keeps the garbage collector from thinking - // that g is running right now, which it's not. - g->readyonstop = 1; + if(g == scvg) // do not consider blocked scavenger for deadlock detection + inclocked(-1); + // Try to get any other idle P. + m->p = nil; + if(runtime·sched.pidle) { + runtime·lock(&runtime·sched); + p = pidleget(); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; + return; + } + } - // All the cpus are taken. - // The scheduler will ready g and put this m to sleep. - // When the scheduler takes g away from m, - // it will undo the runtime·sched.mcpu++ above. - runtime·gosched(); + // Call the scheduler. + runtime·mcall(exitsyscall0); - // Gosched returned, so we're allowed to run now. + // Scheduler returned, so we're allowed to run now. // Delete the gcstack information that we left for // the garbage collector during the system call. // Must wait until now because until gosched returns // we don't know for sure that the garbage collector // is not running. - g->gcstack = nil; + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; } -// Called from runtime·lessstack when returning from a function which -// allocated a new stack segment. The function's return value is in -// m->cret. -void -runtime·oldstack(void) +// runtime·exitsyscall slow path on g0. +// Failed to acquire P, enqueue gp as runnable. +static void +exitsyscall0(G *gp) { - Stktop *top, old; - uint32 argsize; - uintptr cret; - byte *sp; - G *g1; - int32 goid; - -//printf("oldstack m->cret=%p\n", m->cret); - - g1 = m->curg; - top = (Stktop*)g1->stackbase; - sp = (byte*)top; - old = *top; - argsize = old.argsize; - if(argsize > 0) { - sp -= argsize; - runtime·memmove(top->argp, sp, argsize); - } - goid = old.gobuf.g->goid; // fault if g is bad, before gogo - USED(goid); - - if(old.free != 0) - runtime·stackfree(g1->stackguard - StackGuard, old.free); - g1->stackbase = old.stackbase; - g1->stackguard = old.stackguard; - - cret = m->cret; - m->cret = 0; // drop reference - runtime·gogo(&old.gobuf, cret); -} - -// Called from reflect·call or from runtime·morestack when a new -// stack segment is needed. Allocate a new stack big enough for -// m->moreframesize bytes, copy m->moreargsize bytes to the new frame, -// and then act as though runtime·lessstack called the function at -// m->morepc. -void -runtime·newstack(void) -{ - int32 framesize, argsize; - Stktop *top; - byte *stk, *sp; - G *g1; - Gobuf label; - bool reflectcall; - uintptr free; - - framesize = m->moreframesize; - argsize = m->moreargsize; - g1 = m->curg; - - if(m->morebuf.sp < g1->stackguard - StackGuard) { - runtime·printf("runtime: split stack overflow: %p < %p\n", m->morebuf.sp, g1->stackguard - StackGuard); - runtime·throw("runtime: split stack overflow"); - } - if(argsize % sizeof(uintptr) != 0) { - runtime·printf("runtime: stack split with misaligned argsize %d\n", argsize); - runtime·throw("runtime: stack split argsize"); - } - - reflectcall = framesize==1; - if(reflectcall) - framesize = 0; - - if(reflectcall && m->morebuf.sp - sizeof(Stktop) - argsize - 32 > g1->stackguard) { - // special case: called from reflect.call (framesize==1) - // to call code with an arbitrary argument size, - // and we have enough space on the current stack. - // the new Stktop* is necessary to unwind, but - // we don't need to create a new segment. - top = (Stktop*)(m->morebuf.sp - sizeof(*top)); - stk = g1->stackguard - StackGuard; - free = 0; - } else { - // allocate new segment. - framesize += argsize; - framesize += StackExtra; // room for more functions, Stktop. - if(framesize < StackMin) - framesize = StackMin; - framesize += StackSystem; - stk = runtime·stackalloc(framesize); - top = (Stktop*)(stk+framesize-sizeof(*top)); - free = framesize; - } - -//runtime·printf("newstack framesize=%d argsize=%d morepc=%p moreargp=%p gobuf=%p, %p top=%p old=%p\n", -//framesize, argsize, m->morepc, m->moreargp, m->morebuf.pc, m->morebuf.sp, top, g1->stackbase); - - top->stackbase = g1->stackbase; - top->stackguard = g1->stackguard; - top->gobuf = m->morebuf; - top->argp = m->moreargp; - top->argsize = argsize; - top->free = free; - m->moreargp = nil; - m->morebuf.pc = nil; - m->morebuf.sp = nil; - - // copy flag from panic - top->panic = g1->ispanic; - g1->ispanic = false; - - g1->stackbase = (byte*)top; - g1->stackguard = stk + StackGuard; - - sp = (byte*)top; - if(argsize > 0) { - sp -= argsize; - runtime·memmove(sp, top->argp, argsize); + P *p; + + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime·lock(&runtime·sched); + p = pidleget(); + if(p == nil) + globrunqput(gp); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + execute(gp); // Never returns. } - if(thechar == '5') { - // caller would have saved its LR below args. - sp -= sizeof(void*); - *(void**)sp = nil; + if(m->lockedg) { + // Wait until another thread schedules gp and so m again. + stoplockedm(); + execute(gp); // Never returns. } - - // Continue as if lessstack had just called m->morepc - // (the PC that decided to grow the stack). - label.sp = sp; - label.pc = (byte*)runtime·lessstack; - label.g = m->curg; - runtime·gogocall(&label, m->morepc); - - *(int32*)345 = 123; // never return + stopm(); + schedule(); // Never returns. } // Hook used by runtime·malg to call runtime·stackalloc on the @@ -1204,10 +1375,10 @@ runtime·malg(int32 stacksize) stk = g->param; g->param = nil; } - newg->stack0 = stk; - newg->stackguard = stk + StackGuard; - newg->stackbase = stk + StackSystem + stacksize - sizeof(Stktop); - runtime·memclr(newg->stackbase, sizeof(Stktop)); + newg->stack0 = (uintptr)stk; + newg->stackguard = (uintptr)stk + StackGuard; + newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop); + runtime·memclr((byte*)newg->stackbase, sizeof(Stktop)); } return newg; } @@ -1221,7 +1392,7 @@ runtime·malg(int32 stacksize) // functions that split the stack. #pragma textflag 7 void -runtime·newproc(int32 siz, byte* fn, ...) +runtime·newproc(int32 siz, FuncVal* fn, ...) { byte *argp; @@ -1237,7 +1408,7 @@ runtime·newproc(int32 siz, byte* fn, ...) // address of the go statement that created this. The new g is put // on the queue of g's waiting to run. G* -runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) +runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc) { byte *sp; G *newg; @@ -1254,23 +1425,21 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) if(siz > StackMin - 1024) runtime·throw("runtime.newproc: function arguments too large for new goroutine"); - schedlock(); - - if((newg = gfget()) != nil){ + if((newg = gfget(m->p)) != nil) { if(newg->stackguard - StackGuard != newg->stack0) runtime·throw("invalid stack in newg"); } else { newg = runtime·malg(StackMin); + runtime·lock(&runtime·sched); if(runtime·lastg == nil) runtime·allg = newg; else runtime·lastg->alllink = newg; runtime·lastg = newg; + runtime·unlock(&runtime·sched); } - newg->status = Gwaiting; - newg->waitreason = "new goroutine"; - sp = newg->stackbase; + sp = (byte*)newg->stackbase; sp -= siz; runtime·memmove(sp, argp, narg); if(thechar == '5') { @@ -1279,318 +1448,88 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) *(void**)sp = nil; } - newg->sched.sp = sp; + newg->sched.sp = (uintptr)sp; newg->sched.pc = (byte*)runtime·goexit; newg->sched.g = newg; - newg->entry = fn; + newg->fnstart = fn; newg->gopc = (uintptr)callerpc; - - runtime·sched.gcount++; - runtime·sched.goidgen++; - newg->goid = runtime·sched.goidgen; - - newprocreadylocked(newg); - schedunlock(); - + newg->status = Grunnable; + newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + if(raceenabled) + newg->racectx = runtime·racegostart(callerpc); + runqput(m->p, newg); + + if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic + wakep(); return newg; -//printf(" goid=%d\n", newg->goid); } -// Create a new deferred function fn with siz bytes of arguments. -// The compiler turns a defer statement into a call to this. -// Cannot split the stack because it assumes that the arguments -// are available sequentially after &fn; they would not be -// copied if a stack split occurred. It's OK for this to call -// functions that split the stack. -#pragma textflag 7 -uintptr -runtime·deferproc(int32 siz, byte* fn, ...) -{ - Defer *d; - - d = runtime·malloc(sizeof(*d) + siz - sizeof(d->args)); - d->fn = fn; - d->siz = siz; - d->pc = runtime·getcallerpc(&siz); - if(thechar == '5') - d->argp = (byte*)(&fn+2); // skip caller's saved link register - else - d->argp = (byte*)(&fn+1); - runtime·memmove(d->args, d->argp, d->siz); - - d->link = g->defer; - g->defer = d; - - // deferproc returns 0 normally. - // a deferred func that stops a panic - // makes the deferproc return 1. - // the code the compiler generates always - // checks the return value and jumps to the - // end of the function if deferproc returns != 0. - return 0; -} - -// Run a deferred function if there is one. -// The compiler inserts a call to this at the end of any -// function which calls defer. -// If there is a deferred function, this will call runtime·jmpdefer, -// which will jump to the deferred function such that it appears -// to have been called by the caller of deferreturn at the point -// just before deferreturn was called. The effect is that deferreturn -// is called again and again until there are no more deferred functions. -// Cannot split the stack because we reuse the caller's frame to -// call the deferred function. -#pragma textflag 7 -void -runtime·deferreturn(uintptr arg0) -{ - Defer *d; - byte *argp, *fn; - - d = g->defer; - if(d == nil) - return; - argp = (byte*)&arg0; - if(d->argp != argp) - return; - runtime·memmove(argp, d->args, d->siz); - g->defer = d->link; - fn = d->fn; - if(!d->nofree) - runtime·free(d); - runtime·jmpdefer(fn, argp); -} - -// Run all deferred functions for the current goroutine. +// Put on gfree list. +// If local list is too long, transfer a batch to the global list. static void -rundefer(void) +gfput(P *p, G *gp) { - Defer *d; - - while((d = g->defer) != nil) { - g->defer = d->link; - reflect·call(d->fn, d->args, d->siz); - if(!d->nofree) - runtime·free(d); + if(gp->stackguard - StackGuard != gp->stack0) + runtime·throw("invalid stack in gfput"); + gp->schedlink = p->gfree; + p->gfree = gp; + p->gfreecnt++; + if(p->gfreecnt >= 64) { + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt >= 32) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime·sched.gfree; + runtime·sched.gfree = gp; + } + runtime·unlock(&runtime·sched.gflock); } } -// Free stack frames until we hit the last one -// or until we find the one that contains the argp. -static void -unwindstack(G *gp, byte *sp) +// Get from gfree list. +// If local list is empty, grab a batch from global list. +static G* +gfget(P *p) { - Stktop *top; - byte *stk; - - // Must be called from a different goroutine, usually m->g0. - if(g == gp) - runtime·throw("unwindstack on self"); + G *gp; - while((top = (Stktop*)gp->stackbase) != nil && top->stackbase != nil) { - stk = gp->stackguard - StackGuard; - if(stk <= sp && sp < gp->stackbase) - break; - gp->stackbase = top->stackbase; - gp->stackguard = top->stackguard; - if(top->free != 0) - runtime·stackfree(stk, top->free); +retry: + gp = p->gfree; + if(gp == nil && runtime·sched.gfree) { + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt < 32 && runtime·sched.gfree) { + p->gfreecnt++; + gp = runtime·sched.gfree; + runtime·sched.gfree = gp->schedlink; + gp->schedlink = p->gfree; + p->gfree = gp; + } + runtime·unlock(&runtime·sched.gflock); + goto retry; } - - if(sp != nil && (sp < gp->stackguard - StackGuard || gp->stackbase < sp)) { - runtime·printf("recover: %p not in [%p, %p]\n", sp, gp->stackguard - StackGuard, gp->stackbase); - runtime·throw("bad unwindstack"); + if(gp) { + p->gfree = gp->schedlink; + p->gfreecnt--; } + return gp; } -// Print all currently active panics. Used when crashing. +// Purge all cached G's from gfree list to the global list. static void -printpanics(Panic *p) -{ - if(p->link) { - printpanics(p->link); - runtime·printf("\t"); - } - runtime·printf("panic: "); - runtime·printany(p->arg); - if(p->recovered) - runtime·printf(" [recovered]"); - runtime·printf("\n"); -} - -static void recovery(G*); - -// The implementation of the predeclared function panic. -void -runtime·panic(Eface e) +gfpurge(P *p) { - Defer *d; - Panic *p; - - p = runtime·mal(sizeof *p); - p->arg = e; - p->link = g->panic; - p->stackbase = g->stackbase; - g->panic = p; + G *gp; - for(;;) { - d = g->defer; - if(d == nil) - break; - // take defer off list in case of recursive panic - g->defer = d->link; - g->ispanic = true; // rock for newstack, where reflect.call ends up - reflect·call(d->fn, d->args, d->siz); - if(p->recovered) { - g->panic = p->link; - if(g->panic == nil) // must be done with signal - g->sig = 0; - runtime·free(p); - // put recovering defer back on list - // for scheduler to find. - d->link = g->defer; - g->defer = d; - runtime·mcall(recovery); - runtime·throw("recovery failed"); // mcall should not return - } - if(!d->nofree) - runtime·free(d); + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime·sched.gfree; + runtime·sched.gfree = gp; } - - // ran out of deferred calls - old-school panic now - runtime·startpanic(); - printpanics(g->panic); - runtime·dopanic(0); -} - -// Unwind the stack after a deferred function calls recover -// after a panic. Then arrange to continue running as though -// the caller of the deferred function returned normally. -static void -recovery(G *gp) -{ - Defer *d; - - // Rewind gp's stack; we're running on m->g0's stack. - d = gp->defer; - gp->defer = d->link; - - // Unwind to the stack frame with d's arguments in it. - unwindstack(gp, d->argp); - - // Make the deferproc for this d return again, - // this time returning 1. The calling function will - // jump to the standard return epilogue. - // The -2*sizeof(uintptr) makes up for the - // two extra words that are on the stack at - // each call to deferproc. - // (The pc we're returning to does pop pop - // before it tests the return value.) - // On the arm there are 2 saved LRs mixed in too. - if(thechar == '5') - gp->sched.sp = (byte*)d->argp - 4*sizeof(uintptr); - else - gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr); - gp->sched.pc = d->pc; - if(!d->nofree) - runtime·free(d); - runtime·gogo(&gp->sched, 1); -} - -// The implementation of the predeclared function recover. -// Cannot split the stack because it needs to reliably -// find the stack segment of its caller. -#pragma textflag 7 -void -runtime·recover(byte *argp, Eface ret) -{ - Stktop *top, *oldtop; - Panic *p; - - // Must be a panic going on. - if((p = g->panic) == nil || p->recovered) - goto nomatch; - - // Frame must be at the top of the stack segment, - // because each deferred call starts a new stack - // segment as a side effect of using reflect.call. - // (There has to be some way to remember the - // variable argument frame size, and the segment - // code already takes care of that for us, so we - // reuse it.) - // - // As usual closures complicate things: the fp that - // the closure implementation function claims to have - // is where the explicit arguments start, after the - // implicit pointer arguments and PC slot. - // If we're on the first new segment for a closure, - // then fp == top - top->args is correct, but if - // the closure has its own big argument frame and - // allocated a second segment (see below), - // the fp is slightly above top - top->args. - // That condition can't happen normally though - // (stack pointers go down, not up), so we can accept - // any fp between top and top - top->args as - // indicating the top of the segment. - top = (Stktop*)g->stackbase; - if(argp < (byte*)top - top->argsize || (byte*)top < argp) - goto nomatch; - - // The deferred call makes a new segment big enough - // for the argument frame but not necessarily big - // enough for the function's local frame (size unknown - // at the time of the call), so the function might have - // made its own segment immediately. If that's the - // case, back top up to the older one, the one that - // reflect.call would have made for the panic. - // - // The fp comparison here checks that the argument - // frame that was copied during the split (the top->args - // bytes above top->fp) abuts the old top of stack. - // This is a correct test for both closure and non-closure code. - oldtop = (Stktop*)top->stackbase; - if(oldtop != nil && top->argp == (byte*)oldtop - top->argsize) - top = oldtop; - - // Now we have the segment that was created to - // run this call. It must have been marked as a panic segment. - if(!top->panic) - goto nomatch; - - // Okay, this is the top frame of a deferred call - // in response to a panic. It can see the panic argument. - p->recovered = 1; - ret = p->arg; - FLUSH(&ret); - return; - -nomatch: - ret.type = nil; - ret.data = nil; - FLUSH(&ret); -} - - -// Put on gfree list. Sched must be locked. -static void -gfput(G *g) -{ - if(g->stackguard - StackGuard != g->stack0) - runtime·throw("invalid stack in gfput"); - g->schedlink = runtime·sched.gfree; - runtime·sched.gfree = g; -} - -// Get from gfree list. Sched must be locked. -static G* -gfget(void) -{ - G *g; - - g = runtime·sched.gfree; - if(g) - runtime·sched.gfree = g->schedlink; - return g; + runtime·unlock(&runtime·sched.gflock); } void @@ -1600,80 +1539,85 @@ runtime·Breakpoint(void) } void -runtime·Goexit(void) -{ - rundefer(); - runtime·goexit(); -} - -void runtime·Gosched(void) { runtime·gosched(); } // Implementation of runtime.GOMAXPROCS. -// delete when scheduler is stronger +// delete when scheduler is even stronger int32 runtime·gomaxprocsfunc(int32 n) { int32 ret; - uint32 v; - schedlock(); + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + runtime·lock(&runtime·sched); ret = runtime·gomaxprocs; - if(n <= 0) - n = ret; - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime·gomaxprocs = n; - if(runtime·gomaxprocs > 1) - runtime·singleproc = false; - if(runtime·gcwaiting != 0) { - if(atomic_mcpumax(runtime·sched.atomic) != 1) - runtime·throw("invalid mcpumax during gc"); - schedunlock(); + if(n <= 0 || n == ret) { + runtime·unlock(&runtime·sched); return ret; } + runtime·unlock(&runtime·sched); - setmcpumax(n); + runtime·semacquire(&runtime·worldsema); + m->gcing = 1; + runtime·stoptheworld(); + newprocs = n; + m->gcing = 0; + runtime·semrelease(&runtime·worldsema); + runtime·starttheworld(); - // If there are now fewer allowed procs - // than procs running, stop. - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_mcpu(v) > n) { - schedunlock(); - runtime·gosched(); - return ret; - } - // handle more procs - matchmg(); - schedunlock(); return ret; } -void -runtime·LockOSThread(void) +static void +LockOSThread(void) { - if(m == &runtime·m0 && runtime·sched.init) { - runtime·sched.lockmain = true; - return; - } m->lockedg = g; g->lockedm = m; } void -runtime·UnlockOSThread(void) +runtime·LockOSThread(void) { - if(m == &runtime·m0 && runtime·sched.init) { - runtime·sched.lockmain = false; + m->locked |= LockExternal; + LockOSThread(); +} + +void +runtime·lockOSThread(void) +{ + m->locked += LockInternal; + LockOSThread(); +} + +static void +UnlockOSThread(void) +{ + if(m->locked != 0) return; - } m->lockedg = nil; g->lockedm = nil; } +void +runtime·UnlockOSThread(void) +{ + m->locked &= ~LockExternal; + UnlockOSThread(); +} + +void +runtime·unlockOSThread(void) +{ + if(m->locked < LockInternal) + runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); + m->locked -= LockInternal; + UnlockOSThread(); +} + bool runtime·lockedOSThread(void) { @@ -1697,16 +1641,31 @@ runtime·mid(uint32 ret) } void -runtime·NumGoroutine(int32 ret) +runtime·NumGoroutine(intgo ret) { - ret = runtime·sched.gcount; + ret = runtime·gcount(); FLUSH(&ret); } int32 runtime·gcount(void) { - return runtime·sched.gcount; + G *gp; + int32 n, s; + + n = 0; + runtime·lock(&runtime·sched); + // TODO(dvyukov): runtime.NumGoroutine() is O(N). + // We do not want to increment/decrement centralized counter in newproc/goexit, + // just to make runtime.NumGoroutine() faster. + // Compromise solution is to introduce per-P counters of active goroutines. + for(gp = runtime·allg; gp; gp = gp->alllink) { + s = gp->status; + if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) + n++; + } + runtime·unlock(&runtime·sched); + return n; } int32 @@ -1740,6 +1699,9 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) { int32 n; + // Windows does profiling in a dedicated thread w/o m. + if(!Windows && (m == nil || m->mcache == nil)) + return; if(prof.fn == nil || prof.hz == 0) return; @@ -1783,27 +1745,533 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) runtime·resetcpuprofiler(hz); } -void (*libcgo_setenv)(byte**); +// Change number of processors. The world is stopped, sched is locked. +static void +procresize(int32 new) +{ + int32 i, old; + G *gp; + P *p; + + old = runtime·gomaxprocs; + if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) + runtime·throw("procresize: invalid arg"); + // initialize new P's + for(i = 0; i < new; i++) { + p = runtime·allp[i]; + if(p == nil) { + p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1); + p->status = Pgcstop; + runtime·atomicstorep(&runtime·allp[i], p); + } + if(p->mcache == nil) { + if(old==0 && i==0) + p->mcache = m->mcache; // bootstrap + else + p->mcache = runtime·allocmcache(); + } + if(p->runq == nil) { + p->runqsize = 128; + p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1); + } + } + + // redistribute runnable G's evenly + for(i = 0; i < old; i++) { + p = runtime·allp[i]; + while(gp = runqget(p)) + globrunqput(gp); + } + // start at 1 because current M already executes some G and will acquire allp[0] below, + // so if we have a spare G we want to put it into allp[1]. + for(i = 1; runtime·sched.runqhead; i++) { + gp = runtime·sched.runqhead; + runtime·sched.runqhead = gp->schedlink; + runqput(runtime·allp[i%new], gp); + } + runtime·sched.runqtail = nil; + runtime·sched.runqsize = 0; + + // free unused P's + for(i = new; i < old; i++) { + p = runtime·allp[i]; + runtime·freemcache(p->mcache); + p->mcache = nil; + gfpurge(p); + p->status = Pdead; + // can't free P itself because it can be referenced by an M in syscall + } + + if(m->p) + m->p->m = nil; + m->p = nil; + m->mcache = nil; + p = runtime·allp[0]; + p->m = nil; + p->status = Pidle; + acquirep(p); + for(i = new-1; i > 0; i--) { + p = runtime·allp[i]; + p->status = Pidle; + pidleput(p); + } + runtime·singleproc = new == 1; + runtime·atomicstore((uint32*)&runtime·gomaxprocs, new); +} + +// Associate p and the current m. +static void +acquirep(P *p) +{ + if(m->p || m->mcache) + runtime·throw("acquirep: already in go"); + if(p->m || p->status != Pidle) { + runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status); + runtime·throw("acquirep: invalid p state"); + } + m->mcache = p->mcache; + m->p = p; + p->m = m; + p->status = Prunning; +} -// Update the C environment if cgo is loaded. -// Called from syscall.Setenv. -void -syscall·setenv_c(String k, String v) +// Disassociate p and the current m. +static P* +releasep(void) { - byte *arg[2]; + P *p; + + if(m->p == nil || m->mcache == nil) + runtime·throw("releasep: invalid arg"); + p = m->p; + if(p->m != m || p->mcache != m->mcache || p->status != Prunning) { + runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", + m, m->p, p->m, m->mcache, p->mcache, p->status); + runtime·throw("releasep: invalid p state"); + } + m->p = nil; + m->mcache = nil; + p->m = nil; + p->status = Pidle; + return p; +} - if(libcgo_setenv == nil) +static void +inclocked(int32 v) +{ + runtime·lock(&runtime·sched); + runtime·sched.mlocked += v; + if(v > 0) + checkdead(); + runtime·unlock(&runtime·sched); +} + +// Check for deadlock situation. +// The check is based on number of running M's, if 0 -> deadlock. +static void +checkdead(void) +{ + G *gp; + int32 run, grunning, s; + + // -1 for sysmon + run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1; + if(run > 0) return; + if(run < 0) { + runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n", + runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount); + runtime·throw("checkdead: inconsistent counts"); + } + grunning = 0; + for(gp = runtime·allg; gp; gp = gp->alllink) { + if(gp == scvg) + continue; + s = gp->status; + if(s == Gwaiting) + grunning++; + else if(s == Grunnable || s == Grunning || s == Gsyscall) { + runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s); + runtime·throw("checkdead: runnable g"); + } + } + if(grunning == 0) // possible if main goroutine calls runtime·Goexit() + runtime·exit(0); + m->throwing = -1; // do not dump full stacks + runtime·throw("all goroutines are asleep - deadlock!"); +} + +static void +sysmon(void) +{ + uint32 idle, delay; + uint32 ticks[MaxGomaxprocs]; + + idle = 0; // how many cycles in succession we had not wokeup somebody + delay = 0; + for(;;) { + if(idle == 0) // start with 20us sleep... + delay = 20; + else if(idle > 50) // start doubling the sleep after 1ms... + delay *= 2; + if(delay > 10*1000) // up to 10ms + delay = 10*1000; + runtime·usleep(delay); + if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { // TODO: fast atomic + runtime·lock(&runtime·sched); + if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { + runtime·atomicstore(&runtime·sched.sysmonwait, 1); + runtime·unlock(&runtime·sched); + runtime·notesleep(&runtime·sched.sysmonnote); + runtime·noteclear(&runtime·sched.sysmonnote); + idle = 0; + delay = 20; + } else + runtime·unlock(&runtime·sched); + } + if(retake(ticks)) + idle = 0; + else + idle++; + } +} + +static uint32 +retake(uint32 *ticks) +{ + uint32 i, s, n; + int64 t; + P *p; + + n = 0; + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p==nil) + continue; + t = p->tick; + if(ticks[i] != t) { + ticks[i] = t; + continue; + } + s = p->status; + if(s != Psyscall) + continue; + if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic + continue; + // Need to increment number of locked M's before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + inclocked(-1); + if(runtime·cas(&p->status, s, Pidle)) { + n++; + handoffp(p); + } + inclocked(1); + } + return n; +} + +// Put mp on midle list. +// Sched must be locked. +static void +mput(M *mp) +{ + mp->schedlink = runtime·sched.midle; + runtime·sched.midle = mp; + runtime·sched.nmidle++; + checkdead(); +} + +// Try to get an m from midle list. +// Sched must be locked. +static M* +mget(void) +{ + M *mp; + + if((mp = runtime·sched.midle) != nil){ + runtime·sched.midle = mp->schedlink; + runtime·sched.nmidle--; + } + return mp; +} + +// Put gp on the global runnable queue. +// Sched must be locked. +static void +globrunqput(G *gp) +{ + gp->schedlink = nil; + if(runtime·sched.runqtail) + runtime·sched.runqtail->schedlink = gp; + else + runtime·sched.runqhead = gp; + runtime·sched.runqtail = gp; + runtime·sched.runqsize++; +} + +// Try get a batch of G's from the global runnable queue. +// Sched must be locked. +static G* +globrunqget(P *p) +{ + G *gp, *gp1; + int32 n; + + if(runtime·sched.runqsize == 0) + return nil; + n = runtime·sched.runqsize/runtime·gomaxprocs+1; + if(n > runtime·sched.runqsize) + n = runtime·sched.runqsize; + runtime·sched.runqsize -= n; + if(runtime·sched.runqsize == 0) + runtime·sched.runqtail = nil; + gp = runtime·sched.runqhead; + runtime·sched.runqhead = gp->schedlink; + n--; + while(n--) { + gp1 = runtime·sched.runqhead; + runtime·sched.runqhead = gp1->schedlink; + runqput(p, gp1); + } + return gp; +} + +// Put p to on pidle list. +// Sched must be locked. +static void +pidleput(P *p) +{ + p->link = runtime·sched.pidle; + runtime·sched.pidle = p; + runtime·xadd(&runtime·sched.npidle, 1); // TODO: fast atomic +} + +// Try get a p from pidle list. +// Sched must be locked. +static P* +pidleget(void) +{ + P *p; - arg[0] = runtime·malloc(k.len + 1); - runtime·memmove(arg[0], k.str, k.len); - arg[0][k.len] = 0; + p = runtime·sched.pidle; + if(p) { + runtime·sched.pidle = p->link; + runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic + } + return p; +} - arg[1] = runtime·malloc(v.len + 1); - runtime·memmove(arg[1], v.str, v.len); - arg[1][v.len] = 0; +// Put g on local runnable queue. +// TODO(dvyukov): consider using lock-free queue. +static void +runqput(P *p, G *gp) +{ + int32 h, t, s; + + runtime·lock(p); +retry: + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h-1 || (h == 0 && t == s-1)) { + runqgrow(p); + goto retry; + } + p->runq[t++] = gp; + if(t == s) + t = 0; + p->runqtail = t; + runtime·unlock(p); +} + +// Get g from local runnable queue. +static G* +runqget(P *p) +{ + G *gp; + int32 t, h, s; + + if(p->runqhead == p->runqtail) + return nil; + runtime·lock(p); + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h) { + runtime·unlock(p); + return nil; + } + gp = p->runq[h++]; + if(h == s) + h = 0; + p->runqhead = h; + runtime·unlock(p); + return gp; +} - runtime·asmcgocall((void*)libcgo_setenv, arg); - runtime·free(arg[0]); - runtime·free(arg[1]); +// Grow local runnable queue. +// TODO(dvyukov): consider using fixed-size array +// and transfer excess to the global list (local queue can grow way too big). +static void +runqgrow(P *p) +{ + G **q; + int32 s, t, h, t2; + + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + t2 = 0; + q = runtime·malloc(2*s*sizeof(*q)); + while(t != h) { + q[t2++] = p->runq[h++]; + if(h == s) + h = 0; + } + runtime·free(p->runq); + p->runq = q; + p->runqhead = 0; + p->runqtail = t2; + p->runqsize = 2*s; } + +// Steal half of elements from local runnable queue of p2 +// and put onto local runnable queue of p. +// Returns one of the stolen elements (or nil if failed). +static G* +runqsteal(P *p, P *p2) +{ + G *gp, *gp1; + int32 t, h, s, t2, h2, s2, c, i; + + if(p2->runqhead == p2->runqtail) + return nil; + // sort locks to prevent deadlocks + if(p < p2) + runtime·lock(p); + runtime·lock(p2); + if(p2->runqhead == p2->runqtail) { + runtime·unlock(p2); + if(p < p2) + runtime·unlock(p); + return nil; + } + if(p >= p2) + runtime·lock(p); + // now we've locked both queues and know the victim is not empty + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + h2 = p2->runqhead; + t2 = p2->runqtail; + s2 = p2->runqsize; + gp = p2->runq[h2++]; // return value + if(h2 == s2) + h2 = 0; + // steal roughly half + if(t2 > h2) + c = (t2 - h2) / 2; + else + c = (s2 - h2 + t2) / 2; + // copy + for(i = 0; i != c; i++) { + // the target queue is full? + if(t == h-1 || (h == 0 && t == s-1)) + break; + // the victim queue is empty? + if(t2 == h2) + break; + gp1 = p2->runq[h2++]; + if(h2 == s2) + h2 = 0; + p->runq[t++] = gp1; + if(t == s) + t = 0; + } + p->runqtail = t; + p2->runqhead = h2; + runtime·unlock(p2); + runtime·unlock(p); + return gp; +} + +void +runtime·testSchedLocalQueue(void) +{ + P p; + G gs[1000]; + int32 i, j; + + runtime·memclr((byte*)&p, sizeof(p)); + p.runqsize = 1; + p.runqhead = 0; + p.runqtail = 0; + p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq)); + + for(i = 0; i < nelem(gs); i++) { + if(runqget(&p) != nil) + runtime·throw("runq is not empty initially"); + for(j = 0; j < i; j++) + runqput(&p, &gs[i]); + for(j = 0; j < i; j++) { + if(runqget(&p) != &gs[i]) { + runtime·printf("bad element at iter %d/%d\n", i, j); + runtime·throw("bad element"); + } + } + if(runqget(&p) != nil) + runtime·throw("runq is not empty afterwards"); + } +} + +void +runtime·testSchedLocalQueueSteal(void) +{ + P p1, p2; + G gs[1000], *gp; + int32 i, j, s; + + runtime·memclr((byte*)&p1, sizeof(p1)); + p1.runqsize = 1; + p1.runqhead = 0; + p1.runqtail = 0; + p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq)); + + runtime·memclr((byte*)&p2, sizeof(p2)); + p2.runqsize = nelem(gs); + p2.runqhead = 0; + p2.runqtail = 0; + p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq)); + + for(i = 0; i < nelem(gs); i++) { + for(j = 0; j < i; j++) { + gs[j].sig = 0; + runqput(&p1, &gs[j]); + } + gp = runqsteal(&p2, &p1); + s = 0; + if(gp) { + s++; + gp->sig++; + } + while(gp = runqget(&p2)) { + s++; + gp->sig++; + } + while(gp = runqget(&p1)) + gp->sig++; + for(j = 0; j < i; j++) { + if(gs[j].sig != 1) { + runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); + runtime·throw("bad element"); + } + } + if(s != i/2 && s != i/2+1) { + runtime·printf("bad steal %d, want %d or %d, iter %d\n", + s, i/2, i/2+1, i); + runtime·throw("bad steal"); + } + } +} + diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go index 32111080a..21fb9c2f7 100644 --- a/src/pkg/runtime/proc_test.go +++ b/src/pkg/runtime/proc_test.go @@ -5,9 +5,11 @@ package runtime_test import ( + "math" "runtime" "sync/atomic" "testing" + "time" ) var stop = make(chan bool, 1) @@ -22,8 +24,7 @@ func perpetuumMobile() { func TestStopTheWorldDeadlock(t *testing.T) { if testing.Short() { - t.Logf("skipping during short test") - return + t.Skip("skipping during short test") } maxprocs := runtime.GOMAXPROCS(3) compl := make(chan bool, 2) @@ -46,6 +47,66 @@ func TestStopTheWorldDeadlock(t *testing.T) { runtime.GOMAXPROCS(maxprocs) } +func TestYieldProgress(t *testing.T) { + testYieldProgress(t, false) +} + +func TestYieldLockedProgress(t *testing.T) { + testYieldProgress(t, true) +} + +func testYieldProgress(t *testing.T, locked bool) { + c := make(chan bool) + cack := make(chan bool) + go func() { + if locked { + runtime.LockOSThread() + } + for { + select { + case <-c: + cack <- true + return + default: + runtime.Gosched() + } + } + }() + time.Sleep(10 * time.Millisecond) + c <- true + <-cack +} + +func TestYieldLocked(t *testing.T) { + const N = 10 + c := make(chan bool) + go func() { + runtime.LockOSThread() + for i := 0; i < N; i++ { + runtime.Gosched() + time.Sleep(time.Millisecond) + } + c <- true + // runtime.UnlockOSThread() is deliberately omitted + }() + <-c +} + +func TestBlockLocked(t *testing.T) { + const N = 10 + c := make(chan bool) + go func() { + runtime.LockOSThread() + for i := 0; i < N; i++ { + c <- true + } + runtime.UnlockOSThread() + }() + for i := 0; i < N; i++ { + <-c + } +} + func stackGrowthRecursive(i int) { var pad [128]uint64 if i != 0 && pad[0] == 0 { @@ -53,7 +114,15 @@ func stackGrowthRecursive(i int) { } } -func BenchmarkStackGrowth(b *testing.B) { +func TestSchedLocalQueue(t *testing.T) { + runtime.TestSchedLocalQueue1() +} + +func TestSchedLocalQueueSteal(t *testing.T) { + runtime.TestSchedLocalQueueSteal1() +} + +func benchmarkStackGrowth(b *testing.B, rec int) { const CallsPerSched = 1000 procs := runtime.GOMAXPROCS(-1) N := int32(b.N / CallsPerSched) @@ -63,7 +132,7 @@ func BenchmarkStackGrowth(b *testing.B) { for atomic.AddInt32(&N, -1) >= 0 { runtime.Gosched() for g := 0; g < CallsPerSched; g++ { - stackGrowthRecursive(10) + stackGrowthRecursive(rec) } } c <- true @@ -74,32 +143,33 @@ func BenchmarkStackGrowth(b *testing.B) { } } +func BenchmarkStackGrowth(b *testing.B) { + benchmarkStackGrowth(b, 10) +} + +func BenchmarkStackGrowthDeep(b *testing.B) { + benchmarkStackGrowth(b, 1024) +} + func BenchmarkSyscall(b *testing.B) { - const CallsPerSched = 1000 - procs := runtime.GOMAXPROCS(-1) - N := int32(b.N / CallsPerSched) - c := make(chan bool, procs) - for p := 0; p < procs; p++ { - go func() { - for atomic.AddInt32(&N, -1) >= 0 { - runtime.Gosched() - for g := 0; g < CallsPerSched; g++ { - runtime.Entersyscall() - runtime.Exitsyscall() - } - } - c <- true - }() - } - for p := 0; p < procs; p++ { - <-c - } + benchmarkSyscall(b, 0, 1) } func BenchmarkSyscallWork(b *testing.B) { + benchmarkSyscall(b, 100, 1) +} + +func BenchmarkSyscallExcess(b *testing.B) { + benchmarkSyscall(b, 0, 4) +} + +func BenchmarkSyscallExcessWork(b *testing.B) { + benchmarkSyscall(b, 100, 4) +} + +func benchmarkSyscall(b *testing.B, work, excess int) { const CallsPerSched = 1000 - const LocalWork = 100 - procs := runtime.GOMAXPROCS(-1) + procs := runtime.GOMAXPROCS(-1) * excess N := int32(b.N / CallsPerSched) c := make(chan bool, procs) for p := 0; p < procs; p++ { @@ -109,7 +179,7 @@ func BenchmarkSyscallWork(b *testing.B) { runtime.Gosched() for g := 0; g < CallsPerSched; g++ { runtime.Entersyscall() - for i := 0; i < LocalWork; i++ { + for i := 0; i < work; i++ { foo *= 2 foo /= 2 } @@ -123,3 +193,93 @@ func BenchmarkSyscallWork(b *testing.B) { <-c } } + +func BenchmarkCreateGoroutines(b *testing.B) { + benchmarkCreateGoroutines(b, 1) +} + +func BenchmarkCreateGoroutinesParallel(b *testing.B) { + benchmarkCreateGoroutines(b, runtime.GOMAXPROCS(-1)) +} + +func benchmarkCreateGoroutines(b *testing.B, procs int) { + c := make(chan bool) + var f func(n int) + f = func(n int) { + if n == 0 { + c <- true + return + } + go f(n - 1) + } + for i := 0; i < procs; i++ { + go f(b.N / procs) + } + for i := 0; i < procs; i++ { + <-c + } +} + +type Matrix [][]float64 + +func BenchmarkMatmult(b *testing.B) { + b.StopTimer() + // matmult is O(N**3) but testing expects O(b.N), + // so we need to take cube root of b.N + n := int(math.Cbrt(float64(b.N))) + 1 + A := makeMatrix(n) + B := makeMatrix(n) + C := makeMatrix(n) + b.StartTimer() + matmult(nil, A, B, C, 0, n, 0, n, 0, n, 8) +} + +func makeMatrix(n int) Matrix { + m := make(Matrix, n) + for i := 0; i < n; i++ { + m[i] = make([]float64, n) + for j := 0; j < n; j++ { + m[i][j] = float64(i*n + j) + } + } + return m +} + +func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, threshold int) { + di := i1 - i0 + dj := j1 - j0 + dk := k1 - k0 + if di >= dj && di >= dk && di >= threshold { + // divide in two by y axis + mi := i0 + di/2 + done1 := make(chan struct{}, 1) + go matmult(done1, A, B, C, i0, mi, j0, j1, k0, k1, threshold) + matmult(nil, A, B, C, mi, i1, j0, j1, k0, k1, threshold) + <-done1 + } else if dj >= dk && dj >= threshold { + // divide in two by x axis + mj := j0 + dj/2 + done1 := make(chan struct{}, 1) + go matmult(done1, A, B, C, i0, i1, j0, mj, k0, k1, threshold) + matmult(nil, A, B, C, i0, i1, mj, j1, k0, k1, threshold) + <-done1 + } else if dk >= threshold { + // divide in two by "k" axis + // deliberately not parallel because of data races + mk := k0 + dk/2 + matmult(nil, A, B, C, i0, i1, j0, j1, k0, mk, threshold) + matmult(nil, A, B, C, i0, i1, j0, j1, mk, k1, threshold) + } else { + // the matrices are small enough, compute directly + for i := i0; i < i1; i++ { + for j := j0; j < j1; j++ { + for k := k0; k < k1; k++ { + C[i][j] += A[i][k] * B[k][j] + } + } + } + } + if done != nil { + done <- struct{}{} + } +} diff --git a/src/pkg/runtime/race.c b/src/pkg/runtime/race.c new file mode 100644 index 000000000..cfd97041a --- /dev/null +++ b/src/pkg/runtime/race.c @@ -0,0 +1,350 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Implementation of the race detector API. +// +build race + +#include "runtime.h" +#include "arch_GOARCH.h" +#include "malloc.h" +#include "race.h" + +void runtime∕race·Initialize(uintptr *racectx); +void runtime∕race·MapShadow(void *addr, uintptr size); +void runtime∕race·Finalize(void); +void runtime∕race·FinalizerGoroutine(uintptr racectx); +void runtime∕race·Read(uintptr racectx, void *addr, void *pc); +void runtime∕race·Write(uintptr racectx, void *addr, void *pc); +void runtime∕race·ReadRange(uintptr racectx, void *addr, uintptr sz, uintptr step, void *pc); +void runtime∕race·WriteRange(uintptr racectx, void *addr, uintptr sz, uintptr step, void *pc); +void runtime∕race·FuncEnter(uintptr racectx, void *pc); +void runtime∕race·FuncExit(uintptr racectx); +void runtime∕race·Malloc(uintptr racectx, void *p, uintptr sz, void *pc); +void runtime∕race·Free(void *p); +void runtime∕race·GoStart(uintptr racectx, uintptr *chracectx, void *pc); +void runtime∕race·GoEnd(uintptr racectx); +void runtime∕race·Acquire(uintptr racectx, void *addr); +void runtime∕race·Release(uintptr racectx, void *addr); +void runtime∕race·ReleaseMerge(uintptr racectx, void *addr); + +extern byte noptrdata[]; +extern byte enoptrbss[]; + +static bool onstack(uintptr argp); + +uintptr +runtime·raceinit(void) +{ + uintptr racectx; + + m->racecall = true; + runtime∕race·Initialize(&racectx); + runtime∕race·MapShadow(noptrdata, enoptrbss - noptrdata); + m->racecall = false; + return racectx; +} + +void +runtime·racefini(void) +{ + m->racecall = true; + runtime∕race·Finalize(); + m->racecall = false; +} + +void +runtime·racemapshadow(void *addr, uintptr size) +{ + m->racecall = true; + runtime∕race·MapShadow(addr, size); + m->racecall = false; +} + +// Called from instrumented code. +// If we split stack, getcallerpc() can return runtime·lessstack(). +#pragma textflag 7 +void +runtime·racewrite(uintptr addr) +{ + if(!onstack(addr)) { + m->racecall = true; + runtime∕race·Write(g->racectx, (void*)addr, runtime·getcallerpc(&addr)); + m->racecall = false; + } +} + +// Called from instrumented code. +// If we split stack, getcallerpc() can return runtime·lessstack(). +#pragma textflag 7 +void +runtime·raceread(uintptr addr) +{ + if(!onstack(addr)) { + m->racecall = true; + runtime∕race·Read(g->racectx, (void*)addr, runtime·getcallerpc(&addr)); + m->racecall = false; + } +} + +// Called from runtime·racefuncenter (assembly). +#pragma textflag 7 +void +runtime·racefuncenter1(uintptr pc) +{ + // If the caller PC is lessstack, use slower runtime·callers + // to walk across the stack split to find the real caller. + if(pc == (uintptr)runtime·lessstack) + runtime·callers(2, &pc, 1); + + m->racecall = true; + runtime∕race·FuncEnter(g->racectx, (void*)pc); + m->racecall = false; +} + +// Called from instrumented code. +#pragma textflag 7 +void +runtime·racefuncexit(void) +{ + m->racecall = true; + runtime∕race·FuncExit(g->racectx); + m->racecall = false; +} + +void +runtime·racemalloc(void *p, uintptr sz, void *pc) +{ + // use m->curg because runtime·stackalloc() is called from g0 + if(m->curg == nil) + return; + m->racecall = true; + runtime∕race·Malloc(m->curg->racectx, p, sz, pc); + m->racecall = false; +} + +void +runtime·racefree(void *p) +{ + m->racecall = true; + runtime∕race·Free(p); + m->racecall = false; +} + +uintptr +runtime·racegostart(void *pc) +{ + uintptr racectx; + + m->racecall = true; + runtime∕race·GoStart(g->racectx, &racectx, pc); + m->racecall = false; + return racectx; +} + +void +runtime·racegoend(void) +{ + m->racecall = true; + runtime∕race·GoEnd(g->racectx); + m->racecall = false; +} + +static void +memoryaccess(void *addr, uintptr callpc, uintptr pc, bool write) +{ + uintptr racectx; + + if(!onstack((uintptr)addr)) { + m->racecall = true; + racectx = g->racectx; + if(callpc) { + if(callpc == (uintptr)runtime·lessstack) + runtime·callers(3, &callpc, 1); + runtime∕race·FuncEnter(racectx, (void*)callpc); + } + if(write) + runtime∕race·Write(racectx, addr, (void*)pc); + else + runtime∕race·Read(racectx, addr, (void*)pc); + if(callpc) + runtime∕race·FuncExit(racectx); + m->racecall = false; + } +} + +void +runtime·racewritepc(void *addr, void *callpc, void *pc) +{ + memoryaccess(addr, (uintptr)callpc, (uintptr)pc, true); +} + +void +runtime·racereadpc(void *addr, void *callpc, void *pc) +{ + memoryaccess(addr, (uintptr)callpc, (uintptr)pc, false); +} + +static void +rangeaccess(void *addr, uintptr size, uintptr step, uintptr callpc, uintptr pc, bool write) +{ + uintptr racectx; + + if(!onstack((uintptr)addr)) { + m->racecall = true; + racectx = g->racectx; + if(callpc) { + if(callpc == (uintptr)runtime·lessstack) + runtime·callers(3, &callpc, 1); + runtime∕race·FuncEnter(racectx, (void*)callpc); + } + if(write) + runtime∕race·WriteRange(racectx, addr, size, step, (void*)pc); + else + runtime∕race·ReadRange(racectx, addr, size, step, (void*)pc); + if(callpc) + runtime∕race·FuncExit(racectx); + m->racecall = false; + } +} + +void +runtime·racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc) +{ + rangeaccess(addr, sz, step, (uintptr)callpc, (uintptr)pc, true); +} + +void +runtime·racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc) +{ + rangeaccess(addr, sz, step, (uintptr)callpc, (uintptr)pc, false); +} + +void +runtime·raceacquire(void *addr) +{ + runtime·raceacquireg(g, addr); +} + +void +runtime·raceacquireg(G *gp, void *addr) +{ + if(g->raceignore) + return; + m->racecall = true; + runtime∕race·Acquire(gp->racectx, addr); + m->racecall = false; +} + +void +runtime·racerelease(void *addr) +{ + runtime·racereleaseg(g, addr); +} + +void +runtime·racereleaseg(G *gp, void *addr) +{ + if(g->raceignore) + return; + m->racecall = true; + runtime∕race·Release(gp->racectx, addr); + m->racecall = false; +} + +void +runtime·racereleasemerge(void *addr) +{ + runtime·racereleasemergeg(g, addr); +} + +void +runtime·racereleasemergeg(G *gp, void *addr) +{ + if(g->raceignore) + return; + m->racecall = true; + runtime∕race·ReleaseMerge(gp->racectx, addr); + m->racecall = false; +} + +void +runtime·racefingo(void) +{ + m->racecall = true; + runtime∕race·FinalizerGoroutine(g->racectx); + m->racecall = false; +} + +// func RaceAcquire(addr unsafe.Pointer) +void +runtime·RaceAcquire(void *addr) +{ + runtime·raceacquire(addr); +} + +// func RaceRelease(addr unsafe.Pointer) +void +runtime·RaceRelease(void *addr) +{ + runtime·racerelease(addr); +} + +// func RaceReleaseMerge(addr unsafe.Pointer) +void +runtime·RaceReleaseMerge(void *addr) +{ + runtime·racereleasemerge(addr); +} + +// func RaceSemacquire(s *uint32) +void runtime·RaceSemacquire(uint32 *s) +{ + runtime·semacquire(s); +} + +// func RaceSemrelease(s *uint32) +void runtime·RaceSemrelease(uint32 *s) +{ + runtime·semrelease(s); +} + +// func RaceRead(addr unsafe.Pointer) +#pragma textflag 7 +void +runtime·RaceRead(void *addr) +{ + memoryaccess(addr, 0, (uintptr)runtime·getcallerpc(&addr), false); +} + +// func RaceWrite(addr unsafe.Pointer) +#pragma textflag 7 +void +runtime·RaceWrite(void *addr) +{ + memoryaccess(addr, 0, (uintptr)runtime·getcallerpc(&addr), true); +} + +// func RaceDisable() +void runtime·RaceDisable(void) +{ + g->raceignore++; +} + +// func RaceEnable() +void runtime·RaceEnable(void) +{ + g->raceignore--; +} + +static bool +onstack(uintptr argp) +{ + // noptrdata, data, bss, noptrbss + // the layout is in ../../cmd/ld/data.c + if((byte*)argp >= noptrdata && (byte*)argp < enoptrbss) + return false; + if((byte*)argp >= runtime·mheap->arena_start && (byte*)argp < runtime·mheap->arena_used) + return false; + return true; +} diff --git a/src/pkg/runtime/race.go b/src/pkg/runtime/race.go new file mode 100644 index 000000000..1d64ba389 --- /dev/null +++ b/src/pkg/runtime/race.go @@ -0,0 +1,29 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +// Public race detection API, present iff build with -race. + +package runtime + +import ( + "unsafe" +) + +// RaceDisable disables handling of race events in the current goroutine. +func RaceDisable() + +// RaceEnable re-enables handling of race events in the current goroutine. +func RaceEnable() + +func RaceAcquire(addr unsafe.Pointer) +func RaceRelease(addr unsafe.Pointer) +func RaceReleaseMerge(addr unsafe.Pointer) + +func RaceRead(addr unsafe.Pointer) +func RaceWrite(addr unsafe.Pointer) + +func RaceSemacquire(s *uint32) +func RaceSemrelease(s *uint32) diff --git a/src/pkg/runtime/race.h b/src/pkg/runtime/race.h new file mode 100644 index 000000000..432a8a97d --- /dev/null +++ b/src/pkg/runtime/race.h @@ -0,0 +1,33 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Definitions related to data race detection. + +#ifdef RACE +enum { raceenabled = 1 }; +#else +enum { raceenabled = 0 }; +#endif + +// Initialize race detection subsystem. +uintptr runtime·raceinit(void); +// Finalize race detection subsystem, does not return. +void runtime·racefini(void); + +void runtime·racemapshadow(void *addr, uintptr size); +void runtime·racemalloc(void *p, uintptr sz, void *pc); +void runtime·racefree(void *p); +uintptr runtime·racegostart(void *pc); +void runtime·racegoend(void); +void runtime·racewritepc(void *addr, void *callpc, void *pc); +void runtime·racereadpc(void *addr, void *callpc, void *pc); +void runtime·racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); +void runtime·racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); +void runtime·racefingo(void); +void runtime·raceacquire(void *addr); +void runtime·raceacquireg(G *gp, void *addr); +void runtime·racerelease(void *addr); +void runtime·racereleaseg(G *gp, void *addr); +void runtime·racereleasemerge(void *addr); +void runtime·racereleasemergeg(G *gp, void *addr); diff --git a/src/pkg/runtime/race/README b/src/pkg/runtime/race/README new file mode 100644 index 000000000..8bedb09cd --- /dev/null +++ b/src/pkg/runtime/race/README @@ -0,0 +1,11 @@ +runtime/race package contains the data race detector runtime library. +It is based on ThreadSanitizer race detector, that is currently a part of +the LLVM project. + +To update the .syso files you need to: +$ svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk +$ cd compiler-rt/lib/tsan/go +$ ./buildgo.sh + +Tested with gcc 4.6.1 and 4.7.0. On Windows it's built with 64-bit MinGW. + diff --git a/src/pkg/runtime/race/race.go b/src/pkg/runtime/race/race.go new file mode 100644 index 000000000..b0a5c9a50 --- /dev/null +++ b/src/pkg/runtime/race/race.go @@ -0,0 +1,122 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race,linux,amd64 race,darwin,amd64 race,windows,amd64 + +// Package race provides low-level facilities for data race detection. +package race + +/* +void __tsan_init(void **racectx); +void __tsan_fini(void); +void __tsan_map_shadow(void *addr, void *size); +void __tsan_go_start(void *racectx, void **chracectx, void *pc); +void __tsan_go_end(void *racectx); +void __tsan_read(void *racectx, void *addr, void *pc); +void __tsan_write(void *racectx, void *addr, void *pc); +void __tsan_read_range(void *racectx, void *addr, long sz, long step, void *pc); +void __tsan_write_range(void *racectx, void *addr, long sz, long step, void *pc); +void __tsan_func_enter(void *racectx, void *pc); +void __tsan_func_exit(void *racectx); +void __tsan_malloc(void *racectx, void *p, long sz, void *pc); +void __tsan_free(void *p); +void __tsan_acquire(void *racectx, void *addr); +void __tsan_release(void *racectx, void *addr); +void __tsan_release_merge(void *racectx, void *addr); +void __tsan_finalizer_goroutine(void *racectx); +*/ +import "C" + +import ( + "runtime" + "unsafe" +) + +func Initialize(racectx *uintptr) { + C.__tsan_init((*unsafe.Pointer)(unsafe.Pointer(racectx))) +} + +func Finalize() { + C.__tsan_fini() +} + +func MapShadow(addr, size uintptr) { + C.__tsan_map_shadow(unsafe.Pointer(addr), unsafe.Pointer(size)) +} + +func FinalizerGoroutine(racectx uintptr) { + C.__tsan_finalizer_goroutine(unsafe.Pointer(racectx)) +} + +func Read(racectx uintptr, addr, pc uintptr) { + C.__tsan_read(unsafe.Pointer(racectx), unsafe.Pointer(addr), unsafe.Pointer(pc)) +} + +func Write(racectx uintptr, addr, pc uintptr) { + C.__tsan_write(unsafe.Pointer(racectx), unsafe.Pointer(addr), unsafe.Pointer(pc)) +} + +func ReadRange(racectx uintptr, addr, sz, step, pc uintptr) { + C.__tsan_read_range(unsafe.Pointer(racectx), unsafe.Pointer(addr), + C.long(sz), C.long(step), unsafe.Pointer(pc)) +} + +func WriteRange(racectx uintptr, addr, sz, step, pc uintptr) { + C.__tsan_write_range(unsafe.Pointer(racectx), unsafe.Pointer(addr), + C.long(sz), C.long(step), unsafe.Pointer(pc)) +} + +func FuncEnter(racectx uintptr, pc uintptr) { + C.__tsan_func_enter(unsafe.Pointer(racectx), unsafe.Pointer(pc)) +} + +func FuncExit(racectx uintptr) { + C.__tsan_func_exit(unsafe.Pointer(racectx)) +} + +func Malloc(racectx uintptr, p, sz, pc uintptr) { + C.__tsan_malloc(unsafe.Pointer(racectx), unsafe.Pointer(p), C.long(sz), unsafe.Pointer(pc)) +} + +func Free(p uintptr) { + C.__tsan_free(unsafe.Pointer(p)) +} + +func GoStart(racectx uintptr, chracectx *uintptr, pc uintptr) { + C.__tsan_go_start(unsafe.Pointer(racectx), (*unsafe.Pointer)(unsafe.Pointer(chracectx)), unsafe.Pointer(pc)) +} + +func GoEnd(racectx uintptr) { + C.__tsan_go_end(unsafe.Pointer(racectx)) +} + +func Acquire(racectx uintptr, addr uintptr) { + C.__tsan_acquire(unsafe.Pointer(racectx), unsafe.Pointer(addr)) +} + +func Release(racectx uintptr, addr uintptr) { + C.__tsan_release(unsafe.Pointer(racectx), unsafe.Pointer(addr)) +} + +func ReleaseMerge(racectx uintptr, addr uintptr) { + C.__tsan_release_merge(unsafe.Pointer(racectx), unsafe.Pointer(addr)) +} + +//export __tsan_symbolize +func __tsan_symbolize(pc uintptr, fun, file **C.char, line, off *C.int) C.int { + f := runtime.FuncForPC(pc) + if f == nil { + *fun = C.CString("??") + *file = C.CString("-") + *line = 0 + *off = C.int(pc) + return 1 + } + fi, l := f.FileLine(pc) + *fun = C.CString(f.Name()) + *file = C.CString(fi) + *line = C.int(l) + *off = C.int(pc - f.Entry()) + return 1 +} diff --git a/src/pkg/runtime/race/race_darwin_amd64.syso b/src/pkg/runtime/race/race_darwin_amd64.syso Binary files differnew file mode 100644 index 000000000..24a00497c --- /dev/null +++ b/src/pkg/runtime/race/race_darwin_amd64.syso diff --git a/src/pkg/runtime/race/race_linux_amd64.syso b/src/pkg/runtime/race/race_linux_amd64.syso Binary files differnew file mode 100644 index 000000000..b15091ba8 --- /dev/null +++ b/src/pkg/runtime/race/race_linux_amd64.syso diff --git a/src/pkg/runtime/race/race_test.go b/src/pkg/runtime/race/race_test.go new file mode 100644 index 000000000..c77569c37 --- /dev/null +++ b/src/pkg/runtime/race/race_test.go @@ -0,0 +1,157 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +// This program is used to verify the race detector +// by running the tests and parsing their output. +// It does not check stack correctness, completeness or anything else: +// it merely verifies that if a test is expected to be racy +// then the race is detected. +package race_test + +import ( + "bufio" + "bytes" + "fmt" + "io" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +var ( + passedTests = 0 + totalTests = 0 + falsePos = 0 + falseNeg = 0 + failingPos = 0 + failingNeg = 0 + failed = false +) + +const ( + visibleLen = 40 + testPrefix = "=== RUN Test" +) + +func TestRace(t *testing.T) { + testOutput, err := runTests() + if err != nil { + t.Fatalf("Failed to run tests: %v\n%v", err, string(testOutput)) + } + reader := bufio.NewReader(bytes.NewBuffer(testOutput)) + + funcName := "" + var tsanLog []string + for { + s, err := nextLine(reader) + if err != nil { + fmt.Printf("%s\n", processLog(funcName, tsanLog)) + break + } + if strings.HasPrefix(s, testPrefix) { + fmt.Printf("%s\n", processLog(funcName, tsanLog)) + tsanLog = make([]string, 0, 100) + funcName = s[len(testPrefix):] + } else { + tsanLog = append(tsanLog, s) + } + } + + fmt.Printf("\nPassed %d of %d tests (%.02f%%, %d+, %d-)\n", + passedTests, totalTests, 100*float64(passedTests)/float64(totalTests), falsePos, falseNeg) + fmt.Printf("%d expected failures (%d has not fail)\n", failingPos+failingNeg, failingNeg) + if failed { + t.Fail() + } +} + +// nextLine is a wrapper around bufio.Reader.ReadString. +// It reads a line up to the next '\n' character. Error +// is non-nil if there are no lines left, and nil +// otherwise. +func nextLine(r *bufio.Reader) (string, error) { + s, err := r.ReadString('\n') + if err != nil { + if err != io.EOF { + log.Fatalf("nextLine: expected EOF, received %v", err) + } + return s, err + } + return s[:len(s)-1], nil +} + +// processLog verifies whether the given ThreadSanitizer's log +// contains a race report, checks this information against +// the name of the testcase and returns the result of this +// comparison. +func processLog(testName string, tsanLog []string) string { + if !strings.HasPrefix(testName, "Race") && !strings.HasPrefix(testName, "NoRace") { + return "" + } + gotRace := false + for _, s := range tsanLog { + if strings.Contains(s, "DATA RACE") { + gotRace = true + break + } + } + + failing := strings.Contains(testName, "Failing") + expRace := !strings.HasPrefix(testName, "No") + for len(testName) < visibleLen { + testName += " " + } + if expRace == gotRace { + passedTests++ + totalTests++ + if failing { + failed = true + failingNeg++ + } + return fmt.Sprintf("%s .", testName) + } + pos := "" + if expRace { + falseNeg++ + } else { + falsePos++ + pos = "+" + } + if failing { + failingPos++ + } else { + failed = true + } + totalTests++ + return fmt.Sprintf("%s %s%s", testName, "FAILED", pos) +} + +// runTests assures that the package and its dependencies is +// built with instrumentation enabled and returns the output of 'go test' +// which includes possible data race reports from ThreadSanitizer. +func runTests() ([]byte, error) { + tests, err := filepath.Glob("./testdata/*_test.go") + if err != nil { + return nil, err + } + args := []string{"test", "-race", "-v"} + args = append(args, tests...) + cmd := exec.Command("go", args...) + // The following flags turn off heuristics that suppress seemingly identical reports. + // It is required because the tests contain a lot of data races on the same addresses + // (the tests are simple and the memory is constantly reused). + for _, env := range os.Environ() { + if strings.HasPrefix(env, "GOMAXPROCS=") { + continue + } + cmd.Env = append(cmd.Env, env) + } + cmd.Env = append(cmd.Env, `GORACE="suppress_equal_stacks=0 suppress_equal_addresses=0 exitcode=0"`) + return cmd.CombinedOutput() +} diff --git a/src/pkg/runtime/race/race_windows_amd64.syso b/src/pkg/runtime/race/race_windows_amd64.syso Binary files differnew file mode 100644 index 000000000..0a3a58354 --- /dev/null +++ b/src/pkg/runtime/race/race_windows_amd64.syso diff --git a/src/pkg/runtime/race/testdata/atomic_test.go b/src/pkg/runtime/race/testdata/atomic_test.go new file mode 100644 index 000000000..0c5c2c008 --- /dev/null +++ b/src/pkg/runtime/race/testdata/atomic_test.go @@ -0,0 +1,271 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "runtime" + "sync/atomic" + "testing" + "unsafe" +) + +func TestNoRaceAtomicAddInt64(t *testing.T) { + var x1, x2 int8 + var s int64 + ch := make(chan bool, 2) + go func() { + x1 = 1 + if atomic.AddInt64(&s, 1) == 2 { + x2 = 1 + } + ch <- true + }() + go func() { + x2 = 1 + if atomic.AddInt64(&s, 1) == 2 { + x1 = 1 + } + ch <- true + }() + <-ch + <-ch +} + +func TestRaceAtomicAddInt64(t *testing.T) { + var x1, x2 int8 + var s int64 + ch := make(chan bool, 2) + go func() { + x1 = 1 + if atomic.AddInt64(&s, 1) == 1 { + x2 = 1 + } + ch <- true + }() + go func() { + x2 = 1 + if atomic.AddInt64(&s, 1) == 1 { + x1 = 1 + } + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceAtomicAddInt32(t *testing.T) { + var x1, x2 int8 + var s int32 + ch := make(chan bool, 2) + go func() { + x1 = 1 + if atomic.AddInt32(&s, 1) == 2 { + x2 = 1 + } + ch <- true + }() + go func() { + x2 = 1 + if atomic.AddInt32(&s, 1) == 2 { + x1 = 1 + } + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceAtomicLoadAddInt32(t *testing.T) { + var x int64 + var s int32 + go func() { + x = 2 + atomic.AddInt32(&s, 1) + }() + for atomic.LoadInt32(&s) != 1 { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicLoadStoreInt32(t *testing.T) { + var x int64 + var s int32 + go func() { + x = 2 + atomic.StoreInt32(&s, 1) + }() + for atomic.LoadInt32(&s) != 1 { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicStoreCASInt32(t *testing.T) { + var x int64 + var s int32 + go func() { + x = 2 + atomic.StoreInt32(&s, 1) + }() + for !atomic.CompareAndSwapInt32(&s, 1, 0) { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicCASLoadInt32(t *testing.T) { + var x int64 + var s int32 + go func() { + x = 2 + if !atomic.CompareAndSwapInt32(&s, 0, 1) { + panic("") + } + }() + for atomic.LoadInt32(&s) != 1 { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicCASCASInt32(t *testing.T) { + var x int64 + var s int32 + go func() { + x = 2 + if !atomic.CompareAndSwapInt32(&s, 0, 1) { + panic("") + } + }() + for !atomic.CompareAndSwapInt32(&s, 1, 0) { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicCASCASInt32_2(t *testing.T) { + var x1, x2 int8 + var s int32 + ch := make(chan bool, 2) + go func() { + x1 = 1 + if !atomic.CompareAndSwapInt32(&s, 0, 1) { + x2 = 1 + } + ch <- true + }() + go func() { + x2 = 1 + if !atomic.CompareAndSwapInt32(&s, 0, 1) { + x1 = 1 + } + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceAtomicLoadInt64(t *testing.T) { + var x int32 + var s int64 + go func() { + x = 2 + atomic.AddInt64(&s, 1) + }() + for atomic.LoadInt64(&s) != 1 { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicCASCASUInt64(t *testing.T) { + var x int64 + var s uint64 + go func() { + x = 2 + if !atomic.CompareAndSwapUint64(&s, 0, 1) { + panic("") + } + }() + for !atomic.CompareAndSwapUint64(&s, 1, 0) { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicLoadStorePointer(t *testing.T) { + var x int64 + var s unsafe.Pointer + var y int = 2 + var p unsafe.Pointer = unsafe.Pointer(&y) + go func() { + x = 2 + atomic.StorePointer(&s, p) + }() + for atomic.LoadPointer(&s) != p { + runtime.Gosched() + } + x = 1 +} + +func TestNoRaceAtomicStoreCASUint64(t *testing.T) { + var x int64 + var s uint64 + go func() { + x = 2 + atomic.StoreUint64(&s, 1) + }() + for !atomic.CompareAndSwapUint64(&s, 1, 0) { + runtime.Gosched() + } + x = 1 +} + +// Races with non-atomic loads are not detected. +func TestRaceFailingAtomicStoreLoad(t *testing.T) { + c := make(chan bool) + var a uint64 + go func() { + atomic.StoreUint64(&a, 1) + c <- true + }() + _ = a + <-c +} + +func TestRaceAtomicLoadStore(t *testing.T) { + c := make(chan bool) + var a uint64 + go func() { + _ = atomic.LoadUint64(&a) + c <- true + }() + a = 1 + <-c +} + +// Races with non-atomic loads are not detected. +func TestRaceFailingAtomicAddLoad(t *testing.T) { + c := make(chan bool) + var a uint64 + go func() { + atomic.AddUint64(&a, 1) + c <- true + }() + _ = a + <-c +} + +func TestRaceAtomicAddStore(t *testing.T) { + c := make(chan bool) + var a uint64 + go func() { + atomic.AddUint64(&a, 1) + c <- true + }() + a = 42 + <-c +} diff --git a/src/pkg/runtime/race/testdata/cgo_test.go b/src/pkg/runtime/race/testdata/cgo_test.go new file mode 100644 index 000000000..ba7e7b562 --- /dev/null +++ b/src/pkg/runtime/race/testdata/cgo_test.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "os" + "os/exec" + "testing" +) + +func TestNoRaceCgoSync(t *testing.T) { + cmd := exec.Command("go", "run", "-race", "cgo_test_main.go") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + t.Fatalf("program exited with error: %v\n", err) + } +} diff --git a/src/pkg/runtime/race/testdata/cgo_test_main.go b/src/pkg/runtime/race/testdata/cgo_test_main.go new file mode 100644 index 000000000..620cea18b --- /dev/null +++ b/src/pkg/runtime/race/testdata/cgo_test_main.go @@ -0,0 +1,30 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +/* +int sync; + +void Notify(void) +{ + __sync_fetch_and_add(&sync, 1); +} + +void Wait(void) +{ + while(__sync_fetch_and_add(&sync, 0) == 0) {} +} +*/ +import "C" + +func main() { + data := 0 + go func() { + data = 1 + C.Notify() + }() + C.Wait() + _ = data +} diff --git a/src/pkg/runtime/race/testdata/chan_test.go b/src/pkg/runtime/race/testdata/chan_test.go new file mode 100644 index 000000000..2332f097e --- /dev/null +++ b/src/pkg/runtime/race/testdata/chan_test.go @@ -0,0 +1,457 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "runtime" + "testing" + "time" +) + +func TestNoRaceChanSync(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + v = 1 + c <- 0 + }() + <-c + v = 2 +} + +func TestNoRaceChanSyncRev(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + c <- 0 + v = 2 + }() + v = 1 + <-c +} + +func TestNoRaceChanAsync(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + c <- 0 + }() + <-c + v = 2 +} + +func TestRaceChanAsyncRev(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + c <- 0 + v = 1 + }() + v = 2 + <-c +} + +func TestNoRaceChanAsyncCloseRecv(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + close(c) + }() + func() { + defer func() { + recover() + v = 2 + }() + <-c + }() +} + +func TestNoRaceChanAsyncCloseRecv2(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + close(c) + }() + _, _ = <-c + v = 2 +} + +func TestNoRaceChanAsyncCloseRecv3(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + close(c) + }() + for _ = range c { + } + v = 2 +} + +func TestNoRaceChanSyncCloseRecv(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + v = 1 + close(c) + }() + func() { + defer func() { + recover() + v = 2 + }() + <-c + }() +} + +func TestNoRaceChanSyncCloseRecv2(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + v = 1 + close(c) + }() + _, _ = <-c + v = 2 +} + +func TestNoRaceChanSyncCloseRecv3(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + v = 1 + close(c) + }() + for _ = range c { + } + v = 2 +} + +func TestRaceChanSyncCloseSend(t *testing.T) { + v := 0 + c := make(chan int) + go func() { + v = 1 + close(c) + }() + func() { + defer func() { + recover() + }() + c <- 0 + }() + v = 2 +} + +func TestRaceChanAsyncCloseSend(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + close(c) + }() + func() { + defer func() { + recover() + }() + for { + c <- 0 + } + }() + v = 2 +} + +func TestRaceChanCloseClose(t *testing.T) { + compl := make(chan bool, 2) + v1 := 0 + v2 := 0 + c := make(chan int) + go func() { + defer func() { + if recover() != nil { + v2 = 2 + } + compl <- true + }() + v1 = 1 + close(c) + }() + go func() { + defer func() { + if recover() != nil { + v1 = 2 + } + compl <- true + }() + v2 = 1 + close(c) + }() + <-compl + <-compl +} + +func TestRaceChanSendLen(t *testing.T) { + v := 0 + c := make(chan int, 10) + go func() { + v = 1 + c <- 1 + }() + for len(c) == 0 { + runtime.Gosched() + } + v = 2 +} + +func TestRaceChanRecvLen(t *testing.T) { + v := 0 + c := make(chan int, 10) + c <- 1 + go func() { + v = 1 + <-c + }() + for len(c) != 0 { + runtime.Gosched() + } + v = 2 +} + +func TestRaceChanSendSend(t *testing.T) { + compl := make(chan bool, 2) + v1 := 0 + v2 := 0 + c := make(chan int, 1) + go func() { + v1 = 1 + select { + case c <- 1: + default: + v2 = 2 + } + compl <- true + }() + go func() { + v2 = 1 + select { + case c <- 1: + default: + v1 = 2 + } + compl <- true + }() + <-compl + <-compl +} + +func TestNoRaceChanPtr(t *testing.T) { + type msg struct { + x int + } + c := make(chan *msg) + go func() { + c <- &msg{1} + }() + m := <-c + m.x = 2 +} + +func TestRaceChanWrongSend(t *testing.T) { + v1 := 0 + v2 := 0 + c := make(chan int, 2) + go func() { + v1 = 1 + c <- 1 + }() + go func() { + v2 = 2 + c <- 2 + }() + time.Sleep(1e7) + if <-c == 1 { + v2 = 3 + } else { + v1 = 3 + } +} + +func TestRaceChanWrongClose(t *testing.T) { + v1 := 0 + v2 := 0 + c := make(chan int, 1) + go func() { + defer func() { + recover() + }() + v1 = 1 + c <- 1 + }() + go func() { + time.Sleep(1e7) + v2 = 2 + close(c) + }() + time.Sleep(2e7) + if _, who := <-c; who { + v2 = 2 + } else { + v1 = 2 + } +} + +func TestRaceChanSendClose(t *testing.T) { + compl := make(chan bool, 2) + c := make(chan int, 1) + go func() { + defer func() { + recover() + }() + c <- 1 + compl <- true + }() + go func() { + time.Sleep(1e7) + close(c) + compl <- true + }() + <-compl + <-compl +} + +func TestNoRaceProducerConsumerUnbuffered(t *testing.T) { + type Task struct { + f func() + done chan bool + } + + queue := make(chan Task) + + go func() { + t := <-queue + t.f() + t.done <- true + }() + + doit := func(f func()) { + done := make(chan bool, 1) + queue <- Task{f, done} + <-done + } + + x := 0 + doit(func() { + x = 1 + }) + _ = x +} + +func TestRaceChanItselfSend(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int, 10) + go func() { + c <- 0 + compl <- true + }() + c = make(chan int, 20) + <-compl +} + +func TestRaceChanItselfRecv(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int, 10) + c <- 1 + go func() { + <-c + compl <- true + }() + time.Sleep(1e7) + c = make(chan int, 20) + <-compl +} + +func TestRaceChanItselfNil(t *testing.T) { + c := make(chan int, 10) + go func() { + c <- 0 + }() + time.Sleep(1e7) + c = nil + _ = c +} + +func TestRaceChanItselfClose(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int) + go func() { + close(c) + compl <- true + }() + c = make(chan int) + <-compl +} + +func TestRaceChanItselfLen(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int) + go func() { + _ = len(c) + compl <- true + }() + c = make(chan int) + <-compl +} + +func TestRaceChanItselfCap(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int) + go func() { + _ = cap(c) + compl <- true + }() + c = make(chan int) + <-compl +} + +func TestRaceChanCloseLen(t *testing.T) { + v := 0 + c := make(chan int, 10) + c <- 0 + go func() { + v = 1 + close(c) + }() + time.Sleep(1e7) + _ = len(c) + v = 2 +} + +func TestRaceChanSameCell(t *testing.T) { + c := make(chan int, 1) + v := 0 + go func() { + v = 1 + c <- 42 + <-c + }() + time.Sleep(1e7) + c <- 43 + <-c + _ = v +} + +func TestRaceChanCloseSend(t *testing.T) { + compl := make(chan bool, 1) + c := make(chan int, 10) + go func() { + close(c) + compl <- true + }() + c <- 0 + <-compl +} diff --git a/src/pkg/runtime/race/testdata/comp_test.go b/src/pkg/runtime/race/testdata/comp_test.go new file mode 100644 index 000000000..754e4db6d --- /dev/null +++ b/src/pkg/runtime/race/testdata/comp_test.go @@ -0,0 +1,132 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "testing" +) + +type P struct { + x, y int +} + +type S struct { + s1, s2 P +} + +func TestNoRaceComp(t *testing.T) { + c := make(chan bool, 1) + var s S + go func() { + s.s2.x = 1 + c <- true + }() + s.s2.y = 2 + <-c +} + +func TestNoRaceComp2(t *testing.T) { + c := make(chan bool, 1) + var s S + go func() { + s.s1.x = 1 + c <- true + }() + s.s1.y = 2 + <-c +} + +func TestRaceComp(t *testing.T) { + c := make(chan bool, 1) + var s S + go func() { + s.s2.y = 1 + c <- true + }() + s.s2.y = 2 + <-c +} + +func TestRaceComp2(t *testing.T) { + c := make(chan bool, 1) + var s S + go func() { + s.s1.x = 1 + c <- true + }() + s = S{} + <-c +} + +func TestRaceComp3(t *testing.T) { + c := make(chan bool, 1) + var s S + go func() { + s.s2.y = 1 + c <- true + }() + s = S{} + <-c +} + +func TestRaceCompArray(t *testing.T) { + c := make(chan bool, 1) + s := make([]S, 10) + x := 4 + go func() { + s[x].s2.y = 1 + c <- true + }() + x = 5 + <-c +} + +type Ptr struct { + s1, s2 *P +} + +func TestNoRaceCompPtr(t *testing.T) { + c := make(chan bool, 1) + p := Ptr{&P{}, &P{}} + go func() { + p.s1.x = 1 + c <- true + }() + p.s1.y = 2 + <-c +} + +func TestNoRaceCompPtr2(t *testing.T) { + c := make(chan bool, 1) + p := Ptr{&P{}, &P{}} + go func() { + p.s1.x = 1 + c <- true + }() + _ = p + <-c +} + +func TestRaceCompPtr(t *testing.T) { + c := make(chan bool, 1) + p := Ptr{&P{}, &P{}} + go func() { + p.s2.x = 1 + c <- true + }() + p.s2.x = 2 + <-c +} + +func TestRaceCompPtr2(t *testing.T) { + c := make(chan bool, 1) + p := Ptr{&P{}, &P{}} + go func() { + p.s2.x = 1 + c <- true + }() + p.s2 = &P{} + <-c +} diff --git a/src/pkg/runtime/race/testdata/finalizer_test.go b/src/pkg/runtime/race/testdata/finalizer_test.go new file mode 100644 index 000000000..2b2607689 --- /dev/null +++ b/src/pkg/runtime/race/testdata/finalizer_test.go @@ -0,0 +1,67 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "runtime" + "sync" + "testing" + "time" +) + +func TestNoRaceFin(t *testing.T) { + c := make(chan bool) + go func() { + x := new(int) + runtime.SetFinalizer(x, func(x *int) { + *x = 42 + }) + *x = 66 + c <- true + }() + <-c + runtime.GC() + time.Sleep(1e8) +} + +var finVar struct { + sync.Mutex + cnt int +} + +func TestNoRaceFinGlobal(t *testing.T) { + c := make(chan bool) + go func() { + x := new(int) + runtime.SetFinalizer(x, func(x *int) { + finVar.Lock() + finVar.cnt++ + finVar.Unlock() + }) + c <- true + }() + <-c + runtime.GC() + time.Sleep(1e8) + finVar.Lock() + finVar.cnt++ + finVar.Unlock() +} + +func TestRaceFin(t *testing.T) { + c := make(chan bool) + y := 0 + go func() { + x := new(int) + runtime.SetFinalizer(x, func(x *int) { + y = 42 + }) + c <- true + }() + <-c + runtime.GC() + time.Sleep(1e8) + y = 66 +} diff --git a/src/pkg/runtime/race/testdata/io_test.go b/src/pkg/runtime/race/testdata/io_test.go new file mode 100644 index 000000000..9eb3552dc --- /dev/null +++ b/src/pkg/runtime/race/testdata/io_test.go @@ -0,0 +1,69 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "fmt" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "testing" + "time" +) + +func TestNoRaceIOFile(t *testing.T) { + x := 0 + path, _ := ioutil.TempDir("", "race_test") + fname := filepath.Join(path, "data") + go func() { + x = 42 + f, _ := os.Create(fname) + f.Write([]byte("done")) + f.Close() + }() + for { + f, err := os.Open(fname) + if err != nil { + time.Sleep(1e6) + continue + } + buf := make([]byte, 100) + count, err := f.Read(buf) + if count == 0 { + time.Sleep(1e6) + continue + } + break + } + _ = x +} + +func TestNoRaceIOHttp(t *testing.T) { + x := 0 + go func() { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + x = 41 + fmt.Fprintf(w, "test") + x = 42 + }) + err := http.ListenAndServe(":23651", nil) + if err != nil { + t.Fatalf("http.ListenAndServe: %v", err) + } + }() + time.Sleep(1e7) + x = 1 + _, err := http.Get("http://127.0.0.1:23651") + if err != nil { + t.Fatalf("http.Get: %v", err) + } + x = 2 + _, err = http.Get("http://127.0.0.1:23651") + if err != nil { + t.Fatalf("http.Get: %v", err) + } + x = 3 +} diff --git a/src/pkg/runtime/race/testdata/map_test.go b/src/pkg/runtime/race/testdata/map_test.go new file mode 100644 index 000000000..6f86a50b7 --- /dev/null +++ b/src/pkg/runtime/race/testdata/map_test.go @@ -0,0 +1,163 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "testing" +) + +func TestRaceMapRW(t *testing.T) { + m := make(map[int]int) + ch := make(chan bool, 1) + go func() { + _ = m[1] + ch <- true + }() + m[1] = 1 + <-ch +} + +func TestRaceMapRW2(t *testing.T) { + m := make(map[int]int) + ch := make(chan bool, 1) + go func() { + _, _ = m[1] + ch <- true + }() + m[1] = 1 + <-ch +} + +func TestRaceMapRWArray(t *testing.T) { + // Check instrumentation of unaddressable arrays (issue 4578). + m := make(map[int][2]int) + ch := make(chan bool, 1) + go func() { + _ = m[1][1] + ch <- true + }() + m[2] = [2]int{1, 2} + <-ch +} + +func TestNoRaceMapRR(t *testing.T) { + m := make(map[int]int) + ch := make(chan bool, 1) + go func() { + _, _ = m[1] + ch <- true + }() + _ = m[1] + <-ch +} + +func TestRaceMapRange(t *testing.T) { + m := make(map[int]int) + ch := make(chan bool, 1) + go func() { + for _ = range m { + } + ch <- true + }() + m[1] = 1 + <-ch +} + +func TestRaceMapRange2(t *testing.T) { + m := make(map[int]int) + ch := make(chan bool, 1) + go func() { + for _ = range m { + } + ch <- true + }() + m[1] = 1 + <-ch +} + +func TestNoRaceMapRangeRange(t *testing.T) { + m := make(map[int]int) + // now the map is not empty and range triggers an event + // should work without this (as in other tests) + // so it is suspicious if this test passes and others don't + m[0] = 0 + ch := make(chan bool, 1) + go func() { + for _ = range m { + } + ch <- true + }() + for _ = range m { + } + <-ch +} + +// Map len is not instrumented. +func TestRaceFailingMapLen(t *testing.T) { + m := make(map[string]bool) + ch := make(chan bool, 1) + go func() { + _ = len(m) + ch <- true + }() + m[""] = true + <-ch +} + +func TestRaceMapDelete(t *testing.T) { + m := make(map[string]bool) + ch := make(chan bool, 1) + go func() { + delete(m, "") + ch <- true + }() + m[""] = true + <-ch +} + +// Map len is not instrumented. +func TestRaceFailingMapLenDelete(t *testing.T) { + m := make(map[string]bool) + ch := make(chan bool, 1) + go func() { + delete(m, "a") + ch <- true + }() + _ = len(m) + <-ch +} + +func TestRaceMapVariable(t *testing.T) { + ch := make(chan bool, 1) + m := make(map[int]int) + go func() { + m = make(map[int]int) + ch <- true + }() + m = make(map[int]int) + <-ch +} + +func TestRaceMapVariable2(t *testing.T) { + ch := make(chan bool, 1) + m := make(map[int]int) + go func() { + m[1] = 1 + ch <- true + }() + m = make(map[int]int) + <-ch +} + +func TestRaceMapVariable3(t *testing.T) { + ch := make(chan bool, 1) + m := make(map[int]int) + go func() { + _ = m[1] + ch <- true + }() + m = make(map[int]int) + <-ch +} diff --git a/src/pkg/runtime/race/testdata/mop_test.go b/src/pkg/runtime/race/testdata/mop_test.go new file mode 100644 index 000000000..f2daa3730 --- /dev/null +++ b/src/pkg/runtime/race/testdata/mop_test.go @@ -0,0 +1,1384 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "errors" + "fmt" + "runtime" + "sync" + "testing" + "time" + "unsafe" +) + +type Point struct { + x, y int +} + +type NamedPoint struct { + name string + p Point +} + +type DummyWriter struct { + state int +} +type Writer interface { + Write(p []byte) (n int) +} + +func (d DummyWriter) Write(p []byte) (n int) { + return 0 +} + +var GlobalX, GlobalY int = 0, 0 +var GlobalCh chan int = make(chan int, 2) + +func GlobalFunc1() { + GlobalY = GlobalX + GlobalCh <- 1 +} + +func GlobalFunc2() { + GlobalX = 1 + GlobalCh <- 1 +} + +func TestRaceIntRWGlobalFuncs(t *testing.T) { + go GlobalFunc1() + go GlobalFunc2() + <-GlobalCh + <-GlobalCh +} + +func TestRaceIntRWClosures(t *testing.T) { + var x, y int + ch := make(chan int, 2) + + go func() { + y = x + ch <- 1 + }() + go func() { + x = 1 + ch <- 1 + }() + <-ch + <-ch +} + +func TestNoRaceIntRWClosures(t *testing.T) { + var x, y int + ch := make(chan int, 1) + + go func() { + y = x + ch <- 1 + }() + <-ch + go func() { + x = 1 + ch <- 1 + }() + <-ch + +} + +func TestRaceInt32RWClosures(t *testing.T) { + var x, y int32 + ch := make(chan bool, 2) + + go func() { + y = x + ch <- true + }() + go func() { + x = 1 + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceCase(t *testing.T) { + var y int + for x := -1; x <= 1; x++ { + switch { + case x < 0: + y = -1 + case x == 0: + y = 0 + case x > 0: + y = 1 + } + } + y++ +} + +func TestRaceCaseCondition(t *testing.T) { + var x int = 0 + ch := make(chan int, 2) + + go func() { + x = 2 + ch <- 1 + }() + go func() { + switch x < 2 { + case true: + x = 1 + //case false: + // x = 5 + } + ch <- 1 + }() + <-ch + <-ch +} + +func TestRaceCaseCondition2(t *testing.T) { + // switch body is rearranged by the compiler so the tests + // passes even if we don't instrument '<' + var x int = 0 + ch := make(chan int, 2) + + go func() { + x = 2 + ch <- 1 + }() + go func() { + switch x < 2 { + case true: + x = 1 + case false: + x = 5 + } + ch <- 1 + }() + <-ch + <-ch +} + +func TestRaceCaseBody(t *testing.T) { + var x, y int + ch := make(chan int, 2) + + go func() { + y = x + ch <- 1 + }() + go func() { + switch { + default: + x = 1 + case x == 100: + x = -x + } + ch <- 1 + }() + <-ch + <-ch +} + +func TestNoRaceCaseFallthrough(t *testing.T) { + var x, y, z int + ch := make(chan int, 2) + z = 1 + + go func() { + y = x + ch <- 1 + }() + go func() { + switch { + case z == 1: + case z == 2: + x = 2 + } + ch <- 1 + }() + <-ch + <-ch +} + +func TestRaceCaseFallthrough(t *testing.T) { + var x, y, z int + ch := make(chan int, 2) + z = 1 + + go func() { + y = x + ch <- 1 + }() + go func() { + switch { + case z == 1: + fallthrough + case z == 2: + x = 2 + } + ch <- 1 + }() + + <-ch + <-ch +} + +func TestNoRaceRange(t *testing.T) { + ch := make(chan int, 3) + a := [...]int{1, 2, 3} + for _, v := range a { + ch <- v + } + close(ch) +} + +func TestRaceRange(t *testing.T) { + const N = 2 + var a [N]int + var x, y int + done := make(chan bool, N) + for i, v := range a { + go func(i int) { + // we don't want a write-vs-write race + // so there is no array b here + if i == 0 { + x = v + } else { + y = v + } + done <- true + }(i) + } + for i := 0; i < N; i++ { + <-done + } +} + +func TestRacePlus(t *testing.T) { + var x, y, z int + ch := make(chan int, 2) + + go func() { + y = x + z + ch <- 1 + }() + go func() { + y = x + z + z + ch <- 1 + }() + <-ch + <-ch +} + +func TestRacePlus2(t *testing.T) { + var x, y, z int + ch := make(chan int, 2) + + go func() { + x = 1 + ch <- 1 + }() + go func() { + y = +x + z + ch <- 1 + }() + <-ch + <-ch +} + +func TestNoRacePlus(t *testing.T) { + var x, y, z, f int + ch := make(chan int, 2) + + go func() { + y = x + z + ch <- 1 + }() + go func() { + f = z + x + ch <- 1 + }() + <-ch + <-ch +} + +// May crash if the instrumentation is reckless. +func TestNoRaceEnoughRegisters(t *testing.T) { + // from erf.go + const ( + sa1 = 1 + sa2 = 2 + sa3 = 3 + sa4 = 4 + sa5 = 5 + sa6 = 6 + sa7 = 7 + sa8 = 8 + ) + var s, S float64 + s = 3.1415 + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + s = S +} + +// emptyFunc should not be inlined. +func emptyFunc(x int) { + if false { + fmt.Println(x) + } +} + +func TestRaceFuncArgument(t *testing.T) { + var x int + ch := make(chan bool, 1) + go func() { + emptyFunc(x) + ch <- true + }() + x = 1 + <-ch +} + +func TestRaceFuncArgument2(t *testing.T) { + var x int + ch := make(chan bool, 2) + go func() { + x = 42 + ch <- true + }() + go func(y int) { + ch <- true + }(x) + <-ch + <-ch +} + +func TestRaceSprint(t *testing.T) { + var x int + ch := make(chan bool, 1) + go func() { + fmt.Sprint(x) + ch <- true + }() + x = 1 + <-ch +} + +// Not implemented. +func TestRaceFailingArrayCopy(t *testing.T) { + ch := make(chan bool, 1) + var a [5]int + go func() { + a[3] = 1 + ch <- true + }() + a = [5]int{1, 2, 3, 4, 5} + <-ch +} + +func TestRaceStructRW(t *testing.T) { + p := Point{0, 0} + ch := make(chan bool, 1) + go func() { + p = Point{1, 1} + ch <- true + }() + q := p + <-ch + p = q +} + +func TestRaceStructFieldRW1(t *testing.T) { + p := Point{0, 0} + ch := make(chan bool, 1) + go func() { + p.x = 1 + ch <- true + }() + _ = p.x + <-ch +} + +func TestNoRaceStructFieldRW1(t *testing.T) { + // Same struct, different variables, no + // pointers. The layout is known (at compile time?) -> + // no read on p + // writes on x and y + p := Point{0, 0} + ch := make(chan bool, 1) + go func() { + p.x = 1 + ch <- true + }() + p.y = 1 + <-ch + _ = p +} + +func TestNoRaceStructFieldRW2(t *testing.T) { + // Same as NoRaceStructFieldRW1 + // but p is a pointer, so there is a read on p + p := Point{0, 0} + ch := make(chan bool, 1) + go func() { + p.x = 1 + ch <- true + }() + p.y = 1 + <-ch + _ = p +} + +func TestRaceStructFieldRW2(t *testing.T) { + p := &Point{0, 0} + ch := make(chan bool, 1) + go func() { + p.x = 1 + ch <- true + }() + _ = p.x + <-ch +} + +func TestRaceStructFieldRW3(t *testing.T) { + p := NamedPoint{name: "a", p: Point{0, 0}} + ch := make(chan bool, 1) + go func() { + p.p.x = 1 + ch <- true + }() + _ = p.p.x + <-ch +} + +func TestRaceEfaceWW(t *testing.T) { + var a, b interface{} + ch := make(chan bool, 1) + go func() { + a = 1 + ch <- true + }() + a = 2 + <-ch + _, _ = a, b +} + +func TestRaceIfaceWW(t *testing.T) { + var a, b Writer + ch := make(chan bool, 1) + go func() { + a = DummyWriter{1} + ch <- true + }() + a = DummyWriter{2} + <-ch + b = a + a = b +} + +func TestRaceEfaceConv(t *testing.T) { + c := make(chan bool) + v := 0 + go func() { + go func(x interface{}) { + }(v) + c <- true + }() + v = 42 + <-c +} + +type OsFile struct{} + +func (*OsFile) Read() { +} + +type IoReader interface { + Read() +} + +func TestRaceIfaceConv(t *testing.T) { + c := make(chan bool) + f := &OsFile{} + go func() { + go func(x IoReader) { + }(f) + c <- true + }() + f = &OsFile{} + <-c +} + +func TestRaceError(t *testing.T) { + ch := make(chan bool, 1) + var err error + go func() { + err = nil + ch <- true + }() + _ = err + <-ch +} + +func TestRaceIntptrRW(t *testing.T) { + var x, y int + var p *int = &x + ch := make(chan bool, 1) + go func() { + *p = 5 + ch <- true + }() + y = *p + x = y + <-ch +} + +func TestRaceStringRW(t *testing.T) { + ch := make(chan bool, 1) + s := "" + go func() { + s = "abacaba" + ch <- true + }() + _ = s + <-ch +} + +func TestRaceStringPtrRW(t *testing.T) { + ch := make(chan bool, 1) + var x string + p := &x + go func() { + *p = "a" + ch <- true + }() + _ = *p + <-ch +} + +func TestRaceFloat64WW(t *testing.T) { + var x, y float64 + ch := make(chan bool, 1) + go func() { + x = 1.0 + ch <- true + }() + x = 2.0 + <-ch + + y = x + x = y +} + +func TestRaceComplex128WW(t *testing.T) { + var x, y complex128 + ch := make(chan bool, 1) + go func() { + x = 2 + 2i + ch <- true + }() + x = 4 + 4i + <-ch + + y = x + x = y +} + +func TestRaceUnsafePtrRW(t *testing.T) { + var x, y, z int + x, y, z = 1, 2, 3 + var p unsafe.Pointer = unsafe.Pointer(&x) + ch := make(chan bool, 1) + go func() { + p = (unsafe.Pointer)(&z) + ch <- true + }() + y = *(*int)(p) + x = y + <-ch +} + +func TestRaceFuncVariableRW(t *testing.T) { + var f func(x int) int + f = func(x int) int { + return x * x + } + ch := make(chan bool, 1) + go func() { + f = func(x int) int { + return x + } + ch <- true + }() + y := f(1) + <-ch + x := y + y = x +} + +func TestRaceFuncVariableWW(t *testing.T) { + var f func(x int) int + ch := make(chan bool, 1) + go func() { + f = func(x int) int { + return x + } + ch <- true + }() + f = func(x int) int { + return x * x + } + <-ch +} + +// This one should not belong to mop_test +func TestRacePanic(t *testing.T) { + var x int + var zero int = 0 + ch := make(chan bool, 2) + go func() { + defer func() { + err := recover() + if err == nil { + panic("should be panicking") + } + x = 1 + ch <- true + }() + var y int = 1 / zero + zero = y + }() + go func() { + defer func() { + err := recover() + if err == nil { + panic("should be panicking") + } + x = 2 + ch <- true + }() + var y int = 1 / zero + zero = y + }() + + <-ch + <-ch + if zero != 0 { + panic("zero has changed") + } +} + +func TestNoRaceBlank(t *testing.T) { + var a [5]int + ch := make(chan bool, 1) + go func() { + _, _ = a[0], a[1] + ch <- true + }() + _, _ = a[2], a[3] + <-ch + a[1] = a[0] +} + +func TestRaceAppendRW(t *testing.T) { + a := make([]int, 10) + ch := make(chan bool) + go func() { + _ = append(a, 1) + ch <- true + }() + a[0] = 1 + <-ch +} + +func TestRaceAppendLenRW(t *testing.T) { + a := make([]int, 0) + ch := make(chan bool) + go func() { + a = append(a, 1) + ch <- true + }() + _ = len(a) + <-ch +} + +func TestRaceAppendCapRW(t *testing.T) { + a := make([]int, 0) + ch := make(chan string) + go func() { + a = append(a, 1) + ch <- "" + }() + _ = cap(a) + <-ch +} + +func TestNoRaceFuncArgsRW(t *testing.T) { + ch := make(chan byte, 1) + var x byte + go func(y byte) { + _ = y + ch <- 0 + }(x) + x = 1 + <-ch +} + +func TestRaceFuncArgsRW(t *testing.T) { + ch := make(chan byte, 1) + var x byte + go func(y *byte) { + _ = *y + ch <- 0 + }(&x) + x = 1 + <-ch +} + +// from the mailing list, slightly modified +// unprotected concurrent access to seen[] +func TestRaceCrawl(t *testing.T) { + url := "dummyurl" + depth := 3 + seen := make(map[string]bool) + ch := make(chan int, 100) + var wg sync.WaitGroup + var crawl func(string, int) + crawl = func(u string, d int) { + nurl := 0 + defer func() { + ch <- nurl + }() + seen[u] = true + if d <= 0 { + return + } + urls := [...]string{"a", "b", "c"} + for _, uu := range urls { + if _, ok := seen[uu]; !ok { + wg.Add(1) + go crawl(uu, d-1) + nurl++ + } + } + wg.Done() + } + wg.Add(1) + go crawl(url, depth) + wg.Wait() +} + +func TestRaceIndirection(t *testing.T) { + ch := make(chan struct{}, 1) + var y int + var x *int = &y + go func() { + *x = 1 + ch <- struct{}{} + }() + *x = 2 + <-ch + _ = *x +} + +func TestRaceRune(t *testing.T) { + c := make(chan bool) + var x rune + go func() { + x = 1 + c <- true + }() + _ = x + <-c +} + +func TestRaceEmptyInterface1(t *testing.T) { + c := make(chan bool) + var x interface{} + go func() { + x = nil + c <- true + }() + _ = x + <-c +} + +func TestRaceEmptyInterface2(t *testing.T) { + c := make(chan bool) + var x interface{} + go func() { + x = &Point{} + c <- true + }() + _ = x + <-c +} + +func TestRaceTLS(t *testing.T) { + comm := make(chan *int) + done := make(chan bool, 2) + go func() { + var x int + comm <- &x + x = 1 + x = *(<-comm) + done <- true + }() + go func() { + p := <-comm + *p = 2 + comm <- p + done <- true + }() + <-done + <-done +} + +func TestNoRaceHeapReallocation(t *testing.T) { + // It is possible that a future implementation + // of memory allocation will ruin this test. + // Increasing n might help in this case, so + // this test is a bit more generic than most of the + // others. + const n = 2 + done := make(chan bool, n) + empty := func(p *int) {} + for i := 0; i < n; i++ { + ms := i + go func() { + <-time.After(time.Duration(ms) * time.Millisecond) + runtime.GC() + var x int + empty(&x) // x goes to the heap + done <- true + }() + } + for i := 0; i < n; i++ { + <-done + } +} + +func TestRaceAnd(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if x == 1 && y == 1 { + } + <-c +} + +// OANDAND is not instrumented in the compiler. +func TestRaceFailingAnd2(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if y == 0 && x == 1 { + } + <-c +} + +func TestNoRaceAnd(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if y == 1 && x == 1 { + } + <-c +} + +func TestRaceOr(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if x == 1 || y == 1 { + } + <-c +} + +// OOROR is not instrumented in the compiler. +func TestRaceFailingOr2(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if y == 1 || x == 1 { + } + <-c +} + +func TestNoRaceOr(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + x = 1 + c <- true + }() + if y == 0 || x == 1 { + } + <-c +} + +func TestNoRaceShortCalc(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + y = 1 + c <- true + }() + if x == 0 || y == 0 { + } + <-c +} + +func TestNoRaceShortCalc2(t *testing.T) { + c := make(chan bool) + x, y := 0, 0 + go func() { + y = 1 + c <- true + }() + if x == 1 && y == 0 { + } + <-c +} + +func TestRaceFuncItself(t *testing.T) { + c := make(chan bool) + f := func() {} + go func() { + f() + c <- true + }() + f = func() {} + <-c +} + +func TestNoRaceFuncUnlock(t *testing.T) { + ch := make(chan bool, 1) + var mu sync.Mutex + x := 0 + go func() { + mu.Lock() + x = 42 + mu.Unlock() + ch <- true + }() + x = func(mu *sync.Mutex) int { + mu.Lock() + return 43 + }(&mu) + mu.Unlock() + <-ch +} + +func TestRaceStructInit(t *testing.T) { + type X struct { + x, y int + } + c := make(chan bool, 1) + y := 0 + go func() { + y = 42 + c <- true + }() + x := X{x: y} + _ = x + <-c +} + +func TestRaceArrayInit(t *testing.T) { + c := make(chan bool, 1) + y := 0 + go func() { + y = 42 + c <- true + }() + x := []int{0, y, 42} + _ = x + <-c +} + +func TestRaceMapInit(t *testing.T) { + c := make(chan bool, 1) + y := 0 + go func() { + y = 42 + c <- true + }() + x := map[int]int{0: 42, y: 42} + _ = x + <-c +} + +func TestRaceMapInit2(t *testing.T) { + c := make(chan bool, 1) + y := 0 + go func() { + y = 42 + c <- true + }() + x := map[int]int{0: 42, 42: y} + _ = x + <-c +} + +type Inter interface { + Foo(x int) +} +type InterImpl struct { + x, y int +} + +func (p InterImpl) Foo(x int) { + // prevent inlining + z := 42 + x = 85 + y := x / z + z = y * z + x = z * y + _, _, _ = x, y, z +} + +func TestRaceInterCall(t *testing.T) { + c := make(chan bool, 1) + p := InterImpl{} + var x Inter = p + go func() { + p2 := InterImpl{} + x = p2 + c <- true + }() + x.Foo(0) + <-c +} + +func TestRaceInterCall2(t *testing.T) { + c := make(chan bool, 1) + p := InterImpl{} + var x Inter = p + z := 0 + go func() { + z = 42 + c <- true + }() + x.Foo(z) + <-c +} + +func TestRaceFuncCall(t *testing.T) { + c := make(chan bool, 1) + f := func(x, y int) {} + x, y := 0, 0 + go func() { + y = 42 + c <- true + }() + f(x, y) + <-c +} + +func TestRaceMethodCall(t *testing.T) { + c := make(chan bool, 1) + i := InterImpl{} + x := 0 + go func() { + x = 42 + c <- true + }() + i.Foo(x) + <-c +} + +func TestRaceMethodCall2(t *testing.T) { + c := make(chan bool, 1) + i := &InterImpl{} + go func() { + i = &InterImpl{} + c <- true + }() + i.Foo(0) + <-c +} + +func TestRacePanicArg(t *testing.T) { + c := make(chan bool, 1) + err := errors.New("err") + go func() { + err = errors.New("err2") + c <- true + }() + defer func() { + recover() + <-c + }() + panic(err) +} + +func TestRaceDeferArg(t *testing.T) { + c := make(chan bool, 1) + x := 0 + go func() { + x = 42 + c <- true + }() + func() { + defer func(x int) { + }(x) + }() + <-c +} + +type DeferT int + +func (d DeferT) Foo() { +} + +func TestRaceDeferArg2(t *testing.T) { + c := make(chan bool, 1) + var x DeferT + go func() { + var y DeferT + x = y + c <- true + }() + func() { + defer x.Foo() + }() + <-c +} + +func TestNoRaceAddrExpr(t *testing.T) { + c := make(chan bool, 1) + x := 0 + go func() { + x = 42 + c <- true + }() + _ = &x + <-c +} + +type AddrT struct { + _ [256]byte + x int +} + +type AddrT2 struct { + _ [512]byte + p *AddrT +} + +func TestRaceAddrExpr(t *testing.T) { + c := make(chan bool, 1) + a := AddrT2{p: &AddrT{x: 42}} + go func() { + a.p = &AddrT{x: 43} + c <- true + }() + _ = &a.p.x + <-c +} + +func TestRaceTypeAssert(t *testing.T) { + c := make(chan bool, 1) + x := 0 + var i interface{} = x + go func() { + y := 0 + i = y + c <- true + }() + _ = i.(int) + <-c +} + +func TestRaceBlockAs(t *testing.T) { + c := make(chan bool, 1) + var x, y int + go func() { + x = 42 + c <- true + }() + x, y = y, x + <-c +} + +func TestRaceSliceSlice(t *testing.T) { + c := make(chan bool, 1) + x := make([]int, 10) + go func() { + x = make([]int, 20) + c <- true + }() + _ = x[2:3] + <-c +} + +func TestRaceSliceSlice2(t *testing.T) { + c := make(chan bool, 1) + x := make([]int, 10) + i := 2 + go func() { + i = 3 + c <- true + }() + _ = x[i:4] + <-c +} + +// http://golang.org/issue/4453 +func TestRaceFailingSliceStruct(t *testing.T) { + type X struct { + x, y int + } + c := make(chan bool, 1) + x := make([]X, 10) + go func() { + y := make([]X, 10) + copy(y, x) + c <- true + }() + x[1].y = 42 + <-c +} + +func TestRaceStructInd(t *testing.T) { + c := make(chan bool, 1) + type Item struct { + x, y int + } + i := Item{} + go func(p *Item) { + *p = Item{} + c <- true + }(&i) + i.y = 42 + <-c +} + +func TestRaceAsFunc1(t *testing.T) { + var s []byte + c := make(chan bool, 1) + go func() { + var err error + s, err = func() ([]byte, error) { + t := []byte("hello world") + return t, nil + }() + c <- true + _ = err + }() + _ = string(s) + <-c +} + +func TestRaceAsFunc2(t *testing.T) { + c := make(chan bool, 1) + x := 0 + go func() { + func(x int) { + }(x) + c <- true + }() + x = 42 + <-c +} + +func TestRaceAsFunc3(t *testing.T) { + c := make(chan bool, 1) + var mu sync.Mutex + x := 0 + go func() { + func(x int) { + mu.Lock() + }(x) // Read of x must be outside of the mutex. + mu.Unlock() + c <- true + }() + mu.Lock() + x = 42 + mu.Unlock() + <-c +} + +func TestNoRaceAsFunc4(t *testing.T) { + c := make(chan bool, 1) + var mu sync.Mutex + x := 0 + go func() { + x = func() int { // Write of x must be under the mutex. + mu.Lock() + return 42 + }() + mu.Unlock() + c <- true + }() + mu.Lock() + x = 42 + mu.Unlock() + <-c +} + +func TestRaceHeapParam(t *testing.T) { + x := func() (x int) { + go func() { + x = 42 + }() + return + }() + _ = x +} + +func TestNoRaceEmptyStruct(t *testing.T) { + type Empty struct{} + type X struct { + y int64 + Empty + } + type Y struct { + x X + y int64 + } + c := make(chan X) + var y Y + go func() { + x := y.x + c <- x + }() + y.y = 42 + <-c +} + +func TestRaceNestedStruct(t *testing.T) { + type X struct { + x, y int + } + type Y struct { + x X + } + c := make(chan Y) + var y Y + go func() { + c <- y + }() + y.x.y = 42 + <-c +} diff --git a/src/pkg/runtime/race/testdata/mutex_test.go b/src/pkg/runtime/race/testdata/mutex_test.go new file mode 100644 index 000000000..3cf03ae6b --- /dev/null +++ b/src/pkg/runtime/race/testdata/mutex_test.go @@ -0,0 +1,138 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "sync" + "testing" + "time" +) + +func TestNoRaceMutex(t *testing.T) { + var mu sync.Mutex + var x int16 = 0 + ch := make(chan bool, 2) + go func() { + mu.Lock() + defer mu.Unlock() + x = 1 + ch <- true + }() + go func() { + mu.Lock() + x = 2 + mu.Unlock() + ch <- true + }() + <-ch + <-ch +} + +func TestRaceMutex(t *testing.T) { + var mu sync.Mutex + var x int16 = 0 + ch := make(chan bool, 2) + go func() { + x = 1 + mu.Lock() + defer mu.Unlock() + ch <- true + }() + go func() { + x = 2 + mu.Lock() + mu.Unlock() + ch <- true + }() + <-ch + <-ch +} + +func TestRaceMutex2(t *testing.T) { + var mu1 sync.Mutex + var mu2 sync.Mutex + var x int8 = 0 + ch := make(chan bool, 2) + go func() { + mu1.Lock() + defer mu1.Unlock() + x = 1 + ch <- true + }() + go func() { + mu2.Lock() + x = 2 + mu2.Unlock() + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceMutexPureHappensBefore(t *testing.T) { + var mu sync.Mutex + var x int16 = 0 + ch := make(chan bool, 2) + go func() { + x = 1 + mu.Lock() + mu.Unlock() + ch <- true + }() + go func() { + <-time.After(1e5) + mu.Lock() + mu.Unlock() + x = 1 + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceMutexSemaphore(t *testing.T) { + var mu sync.Mutex + ch := make(chan bool, 2) + x := 0 + mu.Lock() + go func() { + x = 1 + mu.Unlock() + ch <- true + }() + go func() { + mu.Lock() + x = 2 + mu.Unlock() + ch <- true + }() + <-ch + <-ch +} + +// from doc/go_mem.html +func TestNoRaceMutexExampleFromHtml(t *testing.T) { + var l sync.Mutex + a := "" + + l.Lock() + go func() { + a = "hello, world" + l.Unlock() + }() + l.Lock() + _ = a +} + +func TestRaceMutexOverwrite(t *testing.T) { + c := make(chan bool, 1) + var mu sync.Mutex + go func() { + mu = sync.Mutex{} + c <- true + }() + mu.Lock() + <-c +} diff --git a/src/pkg/runtime/race/testdata/regression_test.go b/src/pkg/runtime/race/testdata/regression_test.go new file mode 100644 index 000000000..afe8cc5ec --- /dev/null +++ b/src/pkg/runtime/race/testdata/regression_test.go @@ -0,0 +1,150 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code patterns that caused problems in the past. + +package race_test + +import ( + "testing" +) + +type LogImpl struct { + x int +} + +func NewLog() (l LogImpl) { + c := make(chan bool) + go func() { + _ = l + c <- true + }() + l = LogImpl{} + <-c + return +} + +var _ LogImpl = NewLog() + +func MakeMap() map[int]int { + return make(map[int]int) +} + +func InstrumentMapLen() { + _ = len(MakeMap()) +} + +func InstrumentMapLen2() { + m := make(map[int]map[int]int) + _ = len(m[0]) +} + +func InstrumentMapLen3() { + m := make(map[int]*map[int]int) + _ = len(*m[0]) +} + +type Rect struct { + x, y int +} + +type Image struct { + min, max Rect +} + +func NewImage() Image { + var pleaseDoNotInlineMe stack + pleaseDoNotInlineMe.push(1) + _ = pleaseDoNotInlineMe.pop() + return Image{} +} + +func AddrOfTemp() { + _ = NewImage().min +} + +type TypeID int + +func (t *TypeID) encodeType(x int) (tt TypeID, err error) { + switch x { + case 0: + return t.encodeType(x * x) + } + return 0, nil +} + +type stack []int + +func (s *stack) push(x int) { + *s = append(*s, x) +} + +func (s *stack) pop() int { + i := len(*s) + n := (*s)[i-1] + *s = (*s)[:i-1] + return n +} + +func TestNoRaceStackPushPop(t *testing.T) { + var s stack + go func(s *stack) {}(&s) + s.push(1) + x := s.pop() + _ = x +} + +type RpcChan struct { + c chan bool +} + +var makeChanCalls int + +func makeChan() *RpcChan { + var pleaseDoNotInlineMe stack + pleaseDoNotInlineMe.push(1) + _ = pleaseDoNotInlineMe.pop() + + makeChanCalls++ + c := &RpcChan{make(chan bool, 1)} + c.c <- true + return c +} + +func call() bool { + x := <-makeChan().c + return x +} + +func TestNoRaceRpcChan(t *testing.T) { + makeChanCalls = 0 + _ = call() + if makeChanCalls != 1 { + t.Fatalf("makeChanCalls %d, expected 1\n", makeChanCalls) + } +} + +func divInSlice() { + v := make([]int64, 10) + i := 1 + _ = v[(i*4)/3] +} + +func TestNoRaceReturn(t *testing.T) { + c := make(chan int) + noRaceReturn(c) + <-c +} + +// Return used to do an implicit a = a, causing a read/write race +// with the goroutine. Compiler has an optimization to avoid that now. +// See issue 4014. +func noRaceReturn(c chan int) (a, b int) { + a = 42 + go func() { + _ = a + c <- 1 + }() + return a, 10 +} diff --git a/src/pkg/runtime/race/testdata/rwmutex_test.go b/src/pkg/runtime/race/testdata/rwmutex_test.go new file mode 100644 index 000000000..85cb5df3c --- /dev/null +++ b/src/pkg/runtime/race/testdata/rwmutex_test.go @@ -0,0 +1,134 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "sync" + "testing" + "time" +) + +func TestRaceMutexRWMutex(t *testing.T) { + var mu1 sync.Mutex + var mu2 sync.RWMutex + var x int16 = 0 + ch := make(chan bool, 2) + go func() { + mu1.Lock() + defer mu1.Unlock() + x = 1 + ch <- true + }() + go func() { + mu2.Lock() + x = 2 + mu2.Unlock() + ch <- true + }() + <-ch + <-ch +} + +func TestNoRaceRWMutex(t *testing.T) { + var mu sync.RWMutex + var x, y int64 = 0, 1 + ch := make(chan bool, 2) + go func() { + mu.Lock() + defer mu.Unlock() + x = 2 + ch <- true + }() + go func() { + mu.RLock() + y = x + mu.RUnlock() + ch <- true + }() + <-ch + <-ch +} + +func TestRaceRWMutexMultipleReaders(t *testing.T) { + var mu sync.RWMutex + var x, y int64 = 0, 1 + ch := make(chan bool, 3) + go func() { + mu.Lock() + defer mu.Unlock() + x = 2 + ch <- true + }() + go func() { + mu.RLock() + y = x + 1 + mu.RUnlock() + ch <- true + }() + go func() { + mu.RLock() + y = x + 2 + mu.RUnlock() + ch <- true + }() + <-ch + <-ch + <-ch + _ = y +} + +func TestNoRaceRWMutexMultipleReaders(t *testing.T) { + var mu sync.RWMutex + x := int64(0) + ch := make(chan bool, 3) + go func() { + mu.Lock() + defer mu.Unlock() + x = 2 + ch <- true + }() + go func() { + mu.RLock() + y := x + 1 + _ = y + mu.RUnlock() + ch <- true + }() + go func() { + mu.RLock() + y := x + 2 + _ = y + mu.RUnlock() + ch <- true + }() + <-ch + <-ch + <-ch +} + +func TestNoRaceRWMutexTransitive(t *testing.T) { + var mu sync.RWMutex + x := int64(0) + ch := make(chan bool, 2) + go func() { + mu.RLock() + _ = x + mu.RUnlock() + ch <- true + }() + go func() { + time.Sleep(1e7) + mu.RLock() + _ = x + mu.RUnlock() + ch <- true + }() + time.Sleep(2e7) + mu.Lock() + x = 42 + mu.Unlock() + <-ch + <-ch +} diff --git a/src/pkg/runtime/race/testdata/select_test.go b/src/pkg/runtime/race/testdata/select_test.go new file mode 100644 index 000000000..4a3a23647 --- /dev/null +++ b/src/pkg/runtime/race/testdata/select_test.go @@ -0,0 +1,286 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "runtime" + "testing" +) + +func TestNoRaceSelect1(t *testing.T) { + var x int + compl := make(chan bool) + c := make(chan bool) + c1 := make(chan bool) + + go func() { + x = 1 + // At least two channels are needed because + // otherwise the compiler optimizes select out. + // See comment in runtime/chan.c:^selectgo. + select { + case c <- true: + case c1 <- true: + } + compl <- true + }() + select { + case <-c: + case c1 <- true: + } + x = 2 + <-compl +} + +func TestNoRaceSelect2(t *testing.T) { + var x int + compl := make(chan bool) + c := make(chan bool) + c1 := make(chan bool) + go func() { + select { + case <-c: + case <-c1: + } + x = 1 + compl <- true + }() + x = 2 + close(c) + runtime.Gosched() + <-compl +} + +func TestNoRaceSelect3(t *testing.T) { + var x int + compl := make(chan bool) + c := make(chan bool, 10) + c1 := make(chan bool) + go func() { + x = 1 + select { + case c <- true: + case <-c1: + } + compl <- true + }() + <-c + x = 2 + <-compl +} + +func TestNoRaceSelect4(t *testing.T) { + type Task struct { + f func() + done chan bool + } + + queue := make(chan Task) + dummy := make(chan bool) + + go func() { + for { + select { + case t := <-queue: + t.f() + t.done <- true + } + } + }() + + doit := func(f func()) { + done := make(chan bool, 1) + select { + case queue <- Task{f, done}: + case <-dummy: + } + select { + case <-done: + case <-dummy: + } + } + + var x int + doit(func() { + x = 1 + }) + _ = x +} + +func TestNoRaceSelect5(t *testing.T) { + test := func(sel, needSched bool) { + var x int + ch := make(chan bool) + c1 := make(chan bool) + + done := make(chan bool, 2) + go func() { + if needSched { + runtime.Gosched() + } + // println(1) + x = 1 + if sel { + select { + case ch <- true: + case <-c1: + } + } else { + ch <- true + } + done <- true + }() + + go func() { + // println(2) + if sel { + select { + case <-ch: + case <-c1: + } + } else { + <-ch + } + x = 1 + done <- true + }() + <-done + <-done + } + + test(true, true) + test(true, false) + test(false, true) + test(false, false) +} + +func TestRaceSelect1(t *testing.T) { + var x int + compl := make(chan bool, 2) + c := make(chan bool) + c1 := make(chan bool) + + go func() { + <-c + <-c + }() + f := func() { + select { + case c <- true: + case c1 <- true: + } + x = 1 + compl <- true + } + go f() + go f() + <-compl + <-compl +} + +func TestRaceSelect2(t *testing.T) { + var x int + compl := make(chan bool) + c := make(chan bool) + c1 := make(chan bool) + go func() { + x = 1 + select { + case <-c: + case <-c1: + } + compl <- true + }() + close(c) + x = 2 + <-compl +} + +func TestRaceSelect3(t *testing.T) { + var x int + compl := make(chan bool) + c := make(chan bool) + c1 := make(chan bool) + go func() { + x = 1 + select { + case c <- true: + case c1 <- true: + } + compl <- true + }() + x = 2 + select { + case <-c: + } + <-compl +} + +func TestRaceSelect4(t *testing.T) { + done := make(chan bool, 1) + var x int + go func() { + select { + default: + x = 2 + } + done <- true + }() + _ = x + <-done +} + +// The idea behind this test: +// there are two variables, access to one +// of them is synchronized, access to the other +// is not. +// Select must (unconditionaly) choose the non-synchronized variable +// thus causing exactly one race. +// Currently this test doesn't look like it accomplishes +// this goal. +func TestRaceSelect5(t *testing.T) { + done := make(chan bool, 1) + c1 := make(chan bool, 1) + c2 := make(chan bool) + var x, y int + go func() { + select { + case c1 <- true: + x = 1 + case c2 <- true: + y = 1 + } + done <- true + }() + _ = x + _ = y + <-done +} + +// select statements may introduce +// flakiness: whether this test contains +// a race depends on the scheduling +// (some may argue that the code contains +// this race by definition) +/* +func TestFlakyDefault(t *testing.T) { + var x int + c := make(chan bool, 1) + done := make(chan bool, 1) + go func() { + select { + case <-c: + x = 2 + default: + x = 3 + } + done <- true + }() + x = 1 + c <- true + _ = x + <-done +} +*/ diff --git a/src/pkg/runtime/race/testdata/slice_test.go b/src/pkg/runtime/race/testdata/slice_test.go new file mode 100644 index 000000000..773463662 --- /dev/null +++ b/src/pkg/runtime/race/testdata/slice_test.go @@ -0,0 +1,465 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "testing" +) + +func TestRaceSliceRW(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 2) + go func() { + a[1] = 1 + ch <- true + }() + _ = a[1] + <-ch +} + +func TestNoRaceSliceRW(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 2) + go func() { + a[0] = 1 + ch <- true + }() + _ = a[1] + <-ch +} + +func TestRaceSliceWW(t *testing.T) { + a := make([]int, 10) + ch := make(chan bool, 1) + go func() { + a[1] = 1 + ch <- true + }() + a[1] = 2 + <-ch +} + +func TestNoRaceArrayWW(t *testing.T) { + var a [5]int + ch := make(chan bool, 1) + go func() { + a[0] = 1 + ch <- true + }() + a[1] = 2 + <-ch +} + +func TestRaceArrayWW(t *testing.T) { + var a [5]int + ch := make(chan bool, 1) + go func() { + a[1] = 1 + ch <- true + }() + a[1] = 2 + <-ch +} + +func TestNoRaceSliceWriteLen(t *testing.T) { + ch := make(chan bool, 1) + a := make([]bool, 1) + go func() { + a[0] = true + ch <- true + }() + _ = len(a) + <-ch +} + +func TestNoRaceSliceWriteCap(t *testing.T) { + ch := make(chan bool, 1) + a := make([]uint64, 100) + go func() { + a[50] = 123 + ch <- true + }() + _ = cap(a) + <-ch +} + +func TestRaceSliceCopyRead(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 10) + b := make([]int, 10) + go func() { + _ = a[5] + ch <- true + }() + copy(a, b) + <-ch +} + +func TestNoRaceSliceWriteCopy(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 10) + b := make([]int, 10) + go func() { + a[5] = 1 + ch <- true + }() + copy(a[:5], b[:5]) + <-ch +} + +func TestRaceSliceCopyWrite2(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 10) + b := make([]int, 10) + go func() { + b[5] = 1 + ch <- true + }() + copy(a, b) + <-ch +} + +func TestRaceSliceCopyWrite3(t *testing.T) { + ch := make(chan bool, 1) + a := make([]byte, 10) + go func() { + a[7] = 1 + ch <- true + }() + copy(a, "qwertyqwerty") + <-ch +} + +func TestNoRaceSliceCopyRead(t *testing.T) { + ch := make(chan bool, 1) + a := make([]int, 10) + b := make([]int, 10) + go func() { + _ = b[5] + ch <- true + }() + copy(a, b) + <-ch +} + +func TestNoRaceSliceWriteSlice2(t *testing.T) { + ch := make(chan bool, 1) + a := make([]float64, 10) + go func() { + a[2] = 1.0 + ch <- true + }() + _ = a[0:5] + <-ch +} + +func TestRaceSliceWriteSlice(t *testing.T) { + ch := make(chan bool, 1) + a := make([]float64, 10) + go func() { + a[2] = 1.0 + ch <- true + }() + a = a[5:10] + <-ch +} + +func TestNoRaceSliceWriteSlice(t *testing.T) { + ch := make(chan bool, 1) + a := make([]float64, 10) + go func() { + a[2] = 1.0 + ch <- true + }() + _ = a[5:10] + <-ch +} + +func TestNoRaceSliceLenCap(t *testing.T) { + ch := make(chan bool, 1) + a := make([]struct{}, 10) + go func() { + _ = len(a) + ch <- true + }() + _ = cap(a) + <-ch +} + +func TestNoRaceStructSlicesRangeWrite(t *testing.T) { + type Str struct { + a []int + b []int + } + ch := make(chan bool, 1) + var s Str + s.a = make([]int, 10) + s.b = make([]int, 10) + go func() { + for _ = range s.a { + } + ch <- true + }() + s.b[5] = 5 + <-ch +} + +func TestRaceSliceDifferent(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + s2 := s + go func() { + s[3] = 3 + c <- true + }() + // false negative because s2 is PAUTO w/o PHEAP + // so we do not instrument it + s2[3] = 3 + <-c +} + +func TestRaceSliceRangeWrite(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s[3] = 3 + c <- true + }() + for _, v := range s { + _ = v + } + <-c +} + +func TestNoRaceSliceRangeWrite(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s[3] = 3 + c <- true + }() + for _ = range s { + } + <-c +} + +func TestRaceSliceRangeAppend(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s = append(s, 3) + c <- true + }() + for _ = range s { + } + <-c +} + +func TestNoRaceSliceRangeAppend(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + _ = append(s, 3) + c <- true + }() + for _ = range s { + } + <-c +} + +func TestRaceSliceVarWrite(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s[3] = 3 + c <- true + }() + s = make([]int, 20) + <-c +} + +func TestRaceSliceVarRead(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + _ = s[3] + c <- true + }() + s = make([]int, 20) + <-c +} + +func TestRaceSliceVarRange(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + for _ = range s { + } + c <- true + }() + s = make([]int, 20) + <-c +} + +func TestRaceSliceVarAppend(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + _ = append(s, 10) + c <- true + }() + s = make([]int, 20) + <-c +} + +func TestRaceSliceVarCopy(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s2 := make([]int, 10) + copy(s, s2) + c <- true + }() + s = make([]int, 20) + <-c +} + +func TestRaceSliceVarCopy2(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s2 := make([]int, 10) + copy(s2, s) + c <- true + }() + s = make([]int, 20) + <-c +} + +// Not implemented. +func TestRaceFailingSliceAppend(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10, 20) + go func() { + _ = append(s, 1) + c <- true + }() + _ = append(s, 2) + <-c +} + +func TestRaceSliceAppendWrite(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + _ = append(s, 1) + c <- true + }() + s[0] = 42 + <-c +} + +func TestRaceSliceAppendSlice(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + go func() { + s2 := make([]int, 10) + _ = append(s, s2...) + c <- true + }() + s[0] = 42 + <-c +} + +func TestRaceSliceAppendSlice2(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + s2foobar := make([]int, 10) + go func() { + _ = append(s, s2foobar...) + c <- true + }() + s2foobar[5] = 42 + <-c +} + +func TestRaceSliceAppendString(t *testing.T) { + c := make(chan bool, 1) + s := make([]byte, 10) + go func() { + _ = append(s, "qwerty"...) + c <- true + }() + s[0] = 42 + <-c +} + +func TestNoRaceSliceIndexAccess(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + v := 0 + go func() { + _ = v + c <- true + }() + s[v] = 1 + <-c +} + +func TestNoRaceSliceIndexAccess2(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + v := 0 + go func() { + _ = v + c <- true + }() + _ = s[v] + <-c +} + +func TestRaceSliceIndexAccess(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + v := 0 + go func() { + v = 1 + c <- true + }() + s[v] = 1 + <-c +} + +func TestRaceSliceIndexAccess2(t *testing.T) { + c := make(chan bool, 1) + s := make([]int, 10) + v := 0 + go func() { + v = 1 + c <- true + }() + _ = s[v] + <-c +} + +func TestRaceSliceByteToString(t *testing.T) { + c := make(chan string) + s := make([]byte, 10) + go func() { + c <- string(s) + }() + s[0] = 42 + <-c +} + +func TestRaceSliceRuneToString(t *testing.T) { + c := make(chan string) + s := make([]rune, 10) + go func() { + c <- string(s) + }() + s[9] = 42 + <-c +} diff --git a/src/pkg/runtime/race/testdata/sync_test.go b/src/pkg/runtime/race/testdata/sync_test.go new file mode 100644 index 000000000..e80ba3b74 --- /dev/null +++ b/src/pkg/runtime/race/testdata/sync_test.go @@ -0,0 +1,197 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "sync" + "testing" + "time" +) + +func TestNoRaceCond(t *testing.T) { // tsan's test02 + ch := make(chan bool, 1) + var x int = 0 + var mu sync.Mutex + var cond *sync.Cond = sync.NewCond(&mu) + var condition int = 0 + var waker func() + waker = func() { + x = 1 + mu.Lock() + condition = 1 + cond.Signal() + mu.Unlock() + } + + var waiter func() + waiter = func() { + go waker() + cond.L.Lock() + for condition != 1 { + cond.Wait() + } + cond.L.Unlock() + x = 2 + ch <- true + } + go waiter() + <-ch +} + +func TestRaceCond(t *testing.T) { // tsan's test50 + ch := make(chan bool, 2) + + var x int = 0 + var mu sync.Mutex + var condition int = 0 + var cond *sync.Cond = sync.NewCond(&mu) + + var waker func() = func() { + <-time.After(1e5) + x = 1 + mu.Lock() + condition = 1 + cond.Signal() + mu.Unlock() + <-time.After(1e5) + mu.Lock() + x = 3 + mu.Unlock() + ch <- true + } + + var waiter func() = func() { + mu.Lock() + for condition != 1 { + cond.Wait() + } + mu.Unlock() + x = 2 + ch <- true + } + x = 0 + go waker() + go waiter() + <-ch + <-ch +} + +// We do not currently automatically +// parse this test. It is intended that the creation +// stack is observed manually not to contain +// off-by-one errors +func TestRaceAnnounceThreads(t *testing.T) { + const N = 7 + allDone := make(chan bool, N) + + var x int + + var f, g, h func() + f = func() { + x = 1 + go g() + go func() { + x = 1 + allDone <- true + }() + x = 2 + allDone <- true + } + + g = func() { + for i := 0; i < 2; i++ { + go func() { + x = 1 + allDone <- true + }() + allDone <- true + } + } + + h = func() { + x = 1 + x = 2 + go f() + allDone <- true + } + + go h() + + for i := 0; i < N; i++ { + <-allDone + } +} + +func TestNoRaceAfterFunc1(t *testing.T) { + i := 2 + c := make(chan bool) + var f func() + f = func() { + i-- + if i >= 0 { + time.AfterFunc(0, f) + } else { + c <- true + } + } + + time.AfterFunc(0, f) + <-c +} + +func TestNoRaceAfterFunc2(t *testing.T) { + var x int + timer := time.AfterFunc(10, func() { + x = 1 + }) + defer timer.Stop() + _ = x +} + +func TestNoRaceAfterFunc3(t *testing.T) { + c := make(chan bool, 1) + x := 0 + time.AfterFunc(1e7, func() { + x = 1 + c <- true + }) + <-c +} + +func TestRaceAfterFunc3(t *testing.T) { + c := make(chan bool, 2) + x := 0 + time.AfterFunc(1e7, func() { + x = 1 + c <- true + }) + time.AfterFunc(2e7, func() { + x = 2 + c <- true + }) + <-c + <-c +} + +// This test's output is intended to be +// observed manually. One should check +// that goroutine creation stack is +// comprehensible. +func TestRaceGoroutineCreationStack(t *testing.T) { + var x int + var ch = make(chan bool, 1) + + f1 := func() { + x = 1 + ch <- true + } + f2 := func() { go f1() } + f3 := func() { go f2() } + f4 := func() { go f3() } + + go f4() + x = 2 + <-ch +} diff --git a/src/pkg/runtime/race/testdata/waitgroup_test.go b/src/pkg/runtime/race/testdata/waitgroup_test.go new file mode 100644 index 000000000..7ea21fa7e --- /dev/null +++ b/src/pkg/runtime/race/testdata/waitgroup_test.go @@ -0,0 +1,232 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package race_test + +import ( + "runtime" + "sync" + "testing" + "time" +) + +func TestNoRaceWaitGroup(t *testing.T) { + var x int + var wg sync.WaitGroup + n := 1 + for i := 0; i < n; i++ { + wg.Add(1) + j := i + go func() { + x = j + wg.Done() + }() + } + wg.Wait() +} + +func TestRaceWaitGroup(t *testing.T) { + var x int + var wg sync.WaitGroup + n := 2 + for i := 0; i < n; i++ { + wg.Add(1) + j := i + go func() { + x = j + wg.Done() + }() + } + wg.Wait() +} + +func TestNoRaceWaitGroup2(t *testing.T) { + var x int + var wg sync.WaitGroup + wg.Add(1) + go func() { + x = 1 + wg.Done() + }() + wg.Wait() + x = 2 +} + +// incrementing counter in Add and locking wg's mutex +func TestRaceWaitGroupAsMutex(t *testing.T) { + var x int + var wg sync.WaitGroup + c := make(chan bool, 2) + go func() { + wg.Wait() + time.Sleep(100 * time.Millisecond) + wg.Add(+1) + x = 1 + wg.Add(-1) + c <- true + }() + go func() { + wg.Wait() + time.Sleep(100 * time.Millisecond) + wg.Add(+1) + x = 2 + wg.Add(-1) + c <- true + }() + <-c + <-c +} + +// Incorrect usage: Add is too late. +func TestRaceWaitGroupWrongWait(t *testing.T) { + c := make(chan bool, 2) + var x int + var wg sync.WaitGroup + go func() { + wg.Add(1) + runtime.Gosched() + x = 1 + wg.Done() + c <- true + }() + go func() { + wg.Add(1) + runtime.Gosched() + x = 2 + wg.Done() + c <- true + }() + wg.Wait() + <-c + <-c +} + +// A common WaitGroup misuse that can potentially be caught be the race detector. +// For this simple case we must emulate Add() as read on &wg and Wait() as write on &wg. +// However it will have false positives if there are several concurrent Wait() calls. +func TestRaceFailingWaitGroupWrongAdd(t *testing.T) { + c := make(chan bool, 2) + var wg sync.WaitGroup + go func() { + wg.Add(1) + wg.Done() + c <- true + }() + go func() { + wg.Add(1) + wg.Done() + c <- true + }() + wg.Wait() + <-c + <-c +} + +func TestNoRaceWaitGroupMultipleWait(t *testing.T) { + c := make(chan bool, 2) + var wg sync.WaitGroup + go func() { + wg.Wait() + c <- true + }() + go func() { + wg.Wait() + c <- true + }() + wg.Wait() + <-c + <-c +} + +func TestNoRaceWaitGroupMultipleWait2(t *testing.T) { + c := make(chan bool, 2) + var wg sync.WaitGroup + wg.Add(2) + go func() { + wg.Done() + wg.Wait() + c <- true + }() + go func() { + wg.Done() + wg.Wait() + c <- true + }() + wg.Wait() + <-c + <-c +} + +// Correct usage but still a race +func TestRaceWaitGroup2(t *testing.T) { + var x int + var wg sync.WaitGroup + wg.Add(2) + go func() { + x = 1 + wg.Done() + }() + go func() { + x = 2 + wg.Done() + }() + wg.Wait() +} + +func TestNoRaceWaitGroupPanicRecover(t *testing.T) { + var x int + var wg sync.WaitGroup + defer func() { + err := recover() + if err != "sync: negative WaitGroup counter" { + t.Fatalf("Unexpected panic: %#v", err) + } + x = 2 + }() + x = 1 + wg.Add(-1) +} + +// TODO: this is actually a panic-synchronization test, not a +// WaitGroup test. Move it to another *_test file +// Is it possible to get a race by synchronization via panic? +func TestNoRaceWaitGroupPanicRecover2(t *testing.T) { + var x int + var wg sync.WaitGroup + ch := make(chan bool, 1) + var f func() = func() { + x = 2 + ch <- true + } + go func() { + defer func() { + err := recover() + if err != "sync: negative WaitGroup counter" { + } + go f() + }() + x = 1 + wg.Add(-1) + }() + + <-ch +} + +func TestNoRaceWaitGroupTransitive(t *testing.T) { + x, y := 0, 0 + var wg sync.WaitGroup + wg.Add(2) + go func() { + x = 42 + wg.Done() + }() + go func() { + time.Sleep(1e7) + y = 42 + wg.Done() + }() + wg.Wait() + _ = x + _ = y +} diff --git a/src/pkg/runtime/race0.c b/src/pkg/runtime/race0.c new file mode 100644 index 000000000..1c5f05a7e --- /dev/null +++ b/src/pkg/runtime/race0.c @@ -0,0 +1,133 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Stub implementation of the race detector API. +// +build !race + +#include "runtime.h" + +uintptr +runtime·raceinit(void) +{ + return 0; +} + +void +runtime·racefini(void) +{ +} + + +void +runtime·racemapshadow(void *addr, uintptr size) +{ + USED(addr); + USED(size); +} + +void +runtime·racewritepc(void *addr, void *callpc, void *pc) +{ + USED(addr); + USED(callpc); + USED(pc); +} + +void +runtime·racereadpc(void *addr, void *callpc, void *pc) +{ + USED(addr); + USED(callpc); + USED(pc); +} + +void +runtime·racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc) +{ + USED(addr); + USED(sz); + USED(step); + USED(callpc); + USED(pc); +} + +void +runtime·racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc) +{ + USED(addr); + USED(sz); + USED(step); + USED(callpc); + USED(pc); +} + +void +runtime·raceacquire(void *addr) +{ + USED(addr); +} + +void +runtime·raceacquireg(G *gp, void *addr) +{ + USED(gp); + USED(addr); +} + +void +runtime·racerelease(void *addr) +{ + USED(addr); +} + +void +runtime·racereleaseg(G *gp, void *addr) +{ + USED(gp); + USED(addr); +} + +void +runtime·racereleasemerge(void *addr) +{ + USED(addr); +} + +void +runtime·racereleasemergeg(G *gp, void *addr) +{ + USED(gp); + USED(addr); +} + +void +runtime·racefingo(void) +{ +} + +void +runtime·racemalloc(void *p, uintptr sz, void *pc) +{ + USED(p); + USED(sz); + USED(pc); +} + +void +runtime·racefree(void *p) +{ + USED(p); +} + +uintptr +runtime·racegostart(void *pc) +{ + USED(pc); + return 0; +} + +void +runtime·racegoend(void) +{ +} diff --git a/src/pkg/runtime/race_amd64.s b/src/pkg/runtime/race_amd64.s new file mode 100644 index 000000000..83e300905 --- /dev/null +++ b/src/pkg/runtime/race_amd64.s @@ -0,0 +1,14 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +// func runtime·racefuncenter(pc uintptr) +TEXT runtime·racefuncenter(SB), 7, $16 + MOVQ DX, saved-8(SP) // save function entry context (for closures) + MOVQ pc+0(FP), DX + MOVQ DX, arg-16(SP) + CALL runtime·racefuncenter1(SB) + MOVQ saved-8(SP), DX + RET diff --git a/src/pkg/runtime/rt0_freebsd_arm.s b/src/pkg/runtime/rt0_freebsd_arm.s new file mode 100644 index 000000000..085fccf9d --- /dev/null +++ b/src/pkg/runtime/rt0_freebsd_arm.s @@ -0,0 +1,8 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// FreeBSD and Linux use the same linkage to main + +TEXT _rt0_arm_freebsd(SB),7,$-4 + B _rt0_arm(SB) diff --git a/src/pkg/runtime/rt0_linux_arm.s b/src/pkg/runtime/rt0_linux_arm.s index e08cf907d..a648160cf 100644 --- a/src/pkg/runtime/rt0_linux_arm.s +++ b/src/pkg/runtime/rt0_linux_arm.s @@ -20,15 +20,27 @@ TEXT _rt0_arm_linux(SB),7,$-4 MOVM.DB.W [R0-R3], (R13) MOVW $4, R0 // SIGILL MOVW R13, R1 // sa - MOVW $0, R2 // old_sa + SUB $16, R13 + MOVW R13, R2 // old_sa MOVW $8, R3 // c MOVW $174, R7 // sys_sigaction BL oabi_syscall<>(SB) - ADD $16, R13 + // do an EABI syscall MOVW $20, R7 // sys_getpid - SWI $0 // this will trigger SIGILL on OABI systems + SWI $0 // this will trigger SIGILL on OABI systems + + MOVW $4, R0 // SIGILL + MOVW R13, R1 // sa + MOVW $0, R2 // old_sa + MOVW $8, R3 // c + MOVW $174, R7 // sys_sigaction + SWI $0 // restore signal handler + ADD $32, R13 + SUB $4, R13 // fake a stack frame for runtime·setup_auxv + BL runtime·setup_auxv(SB) + ADD $4, R13 B _rt0_arm(SB) TEXT bad_abi<>(SB),7,$-4 diff --git a/src/pkg/runtime/rt0_netbsd_arm.s b/src/pkg/runtime/rt0_netbsd_arm.s new file mode 100644 index 000000000..8c1588f2e --- /dev/null +++ b/src/pkg/runtime/rt0_netbsd_arm.s @@ -0,0 +1,8 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// FreeBSD/NetBSD and Linux use the same linkage to main + +TEXT _rt0_arm_netbsd(SB),7,$-4 + B _rt0_arm(SB) diff --git a/src/pkg/runtime/rt0_plan9_386.s b/src/pkg/runtime/rt0_plan9_386.s index b56c8b325..56f3a0f6c 100644 --- a/src/pkg/runtime/rt0_plan9_386.s +++ b/src/pkg/runtime/rt0_plan9_386.s @@ -25,6 +25,7 @@ argv_fix: ADDL $4, BP LOOP argv_fix + CALL runtime·asminit(SB) JMP _rt0_386(SB) DATA runtime·isplan9(SB)/4, $1 diff --git a/src/pkg/runtime/rt0_plan9_amd64.s b/src/pkg/runtime/rt0_plan9_amd64.s new file mode 100644 index 000000000..2b1fa2ae1 --- /dev/null +++ b/src/pkg/runtime/rt0_plan9_amd64.s @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +TEXT _rt0_amd64_plan9(SB),7, $0 + MOVQ $_rt0_amd64(SB), AX + MOVQ SP, DI + JMP AX + +DATA runtime·isplan9(SB)/4, $1 +GLOBL runtime·isplan9(SB), $4 diff --git a/src/pkg/runtime/rt0_windows_386.s b/src/pkg/runtime/rt0_windows_386.s index 3b023de2f..a06aa787e 100644 --- a/src/pkg/runtime/rt0_windows_386.s +++ b/src/pkg/runtime/rt0_windows_386.s @@ -3,11 +3,6 @@ // license that can be found in the LICENSE file. TEXT _rt0_386_windows(SB),7,$0 - // Set up SEH frame for bootstrap m - PUSHL $runtime·sigtramp(SB) - PUSHL 0(FS) - MOVL SP, 0(FS) - JMP _rt0_386(SB) DATA runtime·iswindows(SB)/4, $1 diff --git a/src/pkg/runtime/rune.c b/src/pkg/runtime/rune.c index 86ee76ddd..ed867269d 100644 --- a/src/pkg/runtime/rune.c +++ b/src/pkg/runtime/rune.c @@ -47,6 +47,9 @@ enum Runeerror = 0xFFFD, Runeself = 0x80, + SurrogateMin = 0xD800, + SurrogateMax = 0xDFFF, + Bad = Runeerror, Runemax = 0x10FFFF, /* maximum rune value */ @@ -128,6 +131,8 @@ runtime·charntorune(int32 *rune, uint8 *str, int32 length) l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; if(l <= Rune2) goto bad; + if (SurrogateMin <= l && l <= SurrogateMax) + goto bad; *rune = l; return 3; } @@ -193,13 +198,15 @@ runtime·runetochar(byte *str, int32 rune) /* note: in original, arg2 was point } /* - * If the Rune is out of range, convert it to the error rune. + * If the Rune is out of range or a surrogate half, convert it to the error rune. * Do this test here because the error rune encodes to three bytes. * Doing it earlier would duplicate work, since an out of range * Rune wouldn't have fit in one or two bytes. */ if (c > Runemax) c = Runeerror; + if (SurrogateMin <= c && c <= SurrogateMax) + c = Runeerror; /* * three character sequence diff --git a/src/pkg/runtime/runtime-gdb.py b/src/pkg/runtime/runtime-gdb.py index 629c39e98..eff9a4003 100644 --- a/src/pkg/runtime/runtime-gdb.py +++ b/src/pkg/runtime/runtime-gdb.py @@ -149,8 +149,8 @@ goobjfile.pretty_printers.extend([makematcher(k) for k in vars().values() if has # # For reference, this is what we're trying to do: -# eface: p *(*(struct 'runtime.commonType'*)'main.e'->type_->data)->string -# iface: p *(*(struct 'runtime.commonType'*)'main.s'->tab->Type->data)->string +# eface: p *(*(struct 'runtime.rtype'*)'main.e'->type_->data)->string +# iface: p *(*(struct 'runtime.rtype'*)'main.s'->tab->Type->data)->string # # interface types can't be recognized by their name, instead we check # if they have the expected fields. Unfortunately the mapping of @@ -186,8 +186,7 @@ def lookup_type(name): except: pass -_rctp_type = gdb.lookup_type("struct runtime.commonType").pointer() -_rtp_type = gdb.lookup_type("struct runtime._type").pointer() +_rctp_type = gdb.lookup_type("struct runtime.rtype").pointer() def iface_commontype(obj): if is_iface(obj): @@ -196,18 +195,13 @@ def iface_commontype(obj): go_type_ptr = obj['_type'] else: return - - # sanity check: reflection type description ends in a loop. - tt = go_type_ptr['_type'].cast(_rtp_type).dereference()['_type'] - if tt != tt.cast(_rtp_type).dereference()['_type']: - return - return go_type_ptr['ptr'].cast(_rctp_type).dereference() + return go_type_ptr.cast(_rctp_type).dereference() def iface_dtype(obj): "Decode type of the data field of an eface or iface struct." - # known issue: dtype_name decoded from runtime.commonType is "nested.Foo" + # known issue: dtype_name decoded from runtime.rtype is "nested.Foo" # but the dwarf table lists it as "full/path/to/nested.Foo" dynamic_go_type = iface_commontype(obj) @@ -381,6 +375,7 @@ class GoroutineCmd(gdb.Command): def invoke(self, arg, from_tty): goid, cmd = arg.split(None, 1) + goid = gdb.parse_and_eval(goid) pc, sp = find_goroutine(int(goid)) if not pc: print "No such goroutine: ", goid diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c index ebb5544fb..4d57cbafd 100644 --- a/src/pkg/runtime/runtime.c +++ b/src/pkg/runtime/runtime.c @@ -3,15 +3,12 @@ // license that can be found in the LICENSE file. #include "runtime.h" -#include "stack.h" +#include "arch_GOARCH.h" enum { maxround = sizeof(uintptr), }; -uint32 runtime·panicking; -void (*runtime·destroylock)(Lock*); - /* * We assume that all architectures turn faults and the like * into apparent calls to runtime.sigpanic. If we see a "call" @@ -31,103 +28,6 @@ runtime·gotraceback(void) return runtime·atoi(p); } -static Lock paniclk; - -void -runtime·startpanic(void) -{ - if(m->dying) { - runtime·printf("panic during panic\n"); - runtime·exit(3); - } - m->dying = 1; - runtime·xadd(&runtime·panicking, 1); - runtime·lock(&paniclk); -} - -void -runtime·dopanic(int32 unused) -{ - static bool didothers; - - if(g->sig != 0) - runtime·printf("[signal %x code=%p addr=%p pc=%p]\n", - g->sig, g->sigcode0, g->sigcode1, g->sigpc); - - if(runtime·gotraceback()){ - if(g != m->g0) { - runtime·printf("\n"); - runtime·goroutineheader(g); - runtime·traceback(runtime·getcallerpc(&unused), runtime·getcallersp(&unused), 0, g); - } - if(!didothers) { - didothers = true; - runtime·tracebackothers(g); - } - } - runtime·unlock(&paniclk); - if(runtime·xadd(&runtime·panicking, -1) != 0) { - // Some other m is panicking too. - // Let it print what it needs to print. - // Wait forever without chewing up cpu. - // It will exit when it's done. - static Lock deadlock; - runtime·lock(&deadlock); - runtime·lock(&deadlock); - } - - runtime·exit(2); -} - -void -runtime·panicindex(void) -{ - runtime·panicstring("index out of range"); -} - -void -runtime·panicslice(void) -{ - runtime·panicstring("slice bounds out of range"); -} - -void -runtime·throwreturn(void) -{ - // can only happen if compiler is broken - runtime·throw("no return at end of a typed function - compiler is broken"); -} - -void -runtime·throwinit(void) -{ - // can only happen with linker skew - runtime·throw("recursive call during initialization - linker skew"); -} - -void -runtime·throw(int8 *s) -{ - runtime·startpanic(); - runtime·printf("throw: %s\n", s); - runtime·dopanic(0); - *(int32*)0 = 0; // not reached - runtime·exit(1); // even more not reached -} - -void -runtime·panicstring(int8 *s) -{ - Eface err; - - if(m->gcing) { - runtime·printf("panic: %s\n", s); - runtime·throw("panic during gc"); - } - runtime·newErrorString(runtime·gostringnocopy((byte*)s), &err); - runtime·panic(err); -} - int32 runtime·mcmp(byte *s1, byte *s2, uint32 n) { @@ -155,30 +55,21 @@ runtime·mchr(byte *p, byte c, byte *ep) return nil; } -uint32 -runtime·rnd(uint32 n, uint32 m) -{ - uint32 r; - - if(m > maxround) - m = maxround; - r = n % m; - if(r) - n += m-r; - return n; -} - static int32 argc; static uint8** argv; Slice os·Args; Slice syscall·envs; +void (*runtime·sysargs)(int32, uint8**); + void runtime·args(int32 c, uint8 **v) { argc = c; argv = v; + if(runtime·sysargs != nil) + runtime·sysargs(c, v); } int32 runtime·isplan9; @@ -219,33 +110,6 @@ runtime·goenvs_unix(void) syscall·envs.cap = n; } -byte* -runtime·getenv(int8 *s) -{ - int32 i, j, len; - byte *v, *bs; - String* envv; - int32 envc; - - bs = (byte*)s; - len = runtime·findnull(bs); - envv = (String*)syscall·envs.array; - envc = syscall·envs.len; - for(i=0; i<envc; i++){ - if(envv[i].len <= len) - continue; - v = envv[i].str; - for(j=0; j<len; j++) - if(bs[j] != v[j]) - goto nomatch; - if(v[len] != '=') - goto nomatch; - return v+len+1; - nomatch:; - } - return nil; -} - void runtime·getgoroot(String out) { @@ -267,6 +131,33 @@ runtime·atoi(byte *p) return n; } +static void +TestAtomic64(void) +{ + uint64 z64, x64; + + z64 = 42; + x64 = 0; + PREFETCH(&z64); + if(runtime·cas64(&z64, &x64, 1)) + runtime·throw("cas64 failed"); + if(x64 != 42) + runtime·throw("cas64 failed"); + if(!runtime·cas64(&z64, &x64, 1)) + runtime·throw("cas64 failed"); + if(x64 != 42 || z64 != 1) + runtime·throw("cas64 failed"); + if(runtime·atomicload64(&z64) != 1) + runtime·throw("load64 failed"); + runtime·atomicstore64(&z64, (1ull<<40)+1); + if(runtime·atomicload64(&z64) != (1ull<<40)+1) + runtime·throw("store64 failed"); + if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2) + runtime·throw("xadd64 failed"); + if(runtime·atomicload64(&z64) != (2ull<<40)+2) + runtime·throw("xadd64 failed"); +} + void runtime·check(void) { @@ -342,10 +233,12 @@ runtime·check(void) runtime·throw("float32nan2"); if(!(i != i1)) runtime·throw("float32nan3"); + + TestAtomic64(); } void -runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool retbool) +runtime·Caller(intgo skip, uintptr retpc, String retfile, intgo retline, bool retbool) { Func *f, *g; uintptr pc; @@ -382,7 +275,7 @@ runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool r } void -runtime·Callers(int32 skip, Slice pc, int32 retn) +runtime·Callers(intgo skip, Slice pc, intgo retn) { // runtime.callers uses pc.array==nil as a signal // to print a stack trace. Pick off 0-length pc here @@ -413,3 +306,40 @@ runtime·fastrand1(void) m->fastrand = x; return x; } + +static Lock ticksLock; +static int64 ticks; + +int64 +runtime·tickspersecond(void) +{ + int64 res, t0, t1, c0, c1; + + res = (int64)runtime·atomicload64((uint64*)&ticks); + if(res != 0) + return ticks; + runtime·lock(&ticksLock); + res = ticks; + if(res == 0) { + t0 = runtime·nanotime(); + c0 = runtime·cputicks(); + runtime·usleep(100*1000); + t1 = runtime·nanotime(); + c1 = runtime·cputicks(); + if(t1 == t0) + t1++; + res = (c1-c0)*1000*1000*1000/(t1-t0); + if(res == 0) + res++; + runtime·atomicstore64((uint64*)&ticks, res); + } + runtime·unlock(&ticksLock); + return res; +} + +void +runtime∕pprof·runtime_cyclesPerSecond(int64 res) +{ + res = runtime·tickspersecond(); + FLUSH(&res); +} diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 6f5aea11d..08f43a69b 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -19,9 +19,13 @@ typedef double float64; #ifdef _64BIT typedef uint64 uintptr; typedef int64 intptr; +typedef int64 intgo; // Go's int +typedef uint64 uintgo; // Go's uint #else typedef uint32 uintptr; -typedef int32 intptr; +typedef int32 intptr; +typedef int32 intgo; // Go's int +typedef uint32 uintgo; // Go's uint #endif /* @@ -48,44 +52,53 @@ typedef struct G G; typedef struct Gobuf Gobuf; typedef union Lock Lock; typedef struct M M; +typedef struct P P; typedef struct Mem Mem; typedef union Note Note; typedef struct Slice Slice; typedef struct Stktop Stktop; typedef struct String String; +typedef struct FuncVal FuncVal; typedef struct SigTab SigTab; typedef struct MCache MCache; typedef struct FixAlloc FixAlloc; typedef struct Iface Iface; typedef struct Itab Itab; +typedef struct InterfaceType InterfaceType; typedef struct Eface Eface; typedef struct Type Type; typedef struct ChanType ChanType; typedef struct MapType MapType; typedef struct Defer Defer; +typedef struct DeferChunk DeferChunk; typedef struct Panic Panic; typedef struct Hmap Hmap; typedef struct Hchan Hchan; typedef struct Complex64 Complex64; typedef struct Complex128 Complex128; typedef struct WinCall WinCall; +typedef struct SEH SEH; typedef struct Timers Timers; typedef struct Timer Timer; +typedef struct GCStats GCStats; +typedef struct LFNode LFNode; +typedef struct ParFor ParFor; +typedef struct ParForThread ParForThread; +typedef struct CgoMal CgoMal; /* - * per-cpu declaration. - * "extern register" is a special storage class implemented by 6c, 8c, etc. - * on machines with lots of registers, it allocates a register that will not be - * used in generated code. on the x86, it allocates a slot indexed by a - * segment register. + * Per-CPU declaration. * - * amd64: allocated downwards from R15 - * x86: allocated upwards from 0(GS) - * arm: allocated downwards from R10 + * "extern register" is a special storage class implemented by 6c, 8c, etc. + * On the ARM, it is an actual register; elsewhere it is a slot in thread- + * local storage indexed by a segment register. See zasmhdr in + * src/cmd/dist/buildruntime.c for details, and be aware that the linker may + * make further OS-specific changes to the compiler's output. For example, + * 6l/linux rewrites 0(GS) as -16(FS). * - * every C file linked into a Go program must include runtime.h - * so that the C compiler knows to avoid other uses of these registers. - * the Go compilers know to avoid them. + * Every C file linked into a Go program must include runtime.h so that the + * C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated + * registers. The Go compiler (6g, 8g, etc.) knows to avoid them. */ extern register G* g; extern register M* m; @@ -105,14 +118,34 @@ enum Grunning, Gsyscall, Gwaiting, - Gmoribund, + Gmoribund_unused, // currently unused, but hardcoded in gdb scripts Gdead, }; enum { + // P status + Pidle, + Prunning, + Psyscall, + Pgcstop, + Pdead, +}; +enum +{ true = 1, false = 0, }; +enum +{ + PtrSize = sizeof(void*), +}; +enum +{ + // Per-M stack segment cache size. + StackCacheSize = 32, + // Global <-> per-M stack segment cache transfer batch size. + StackCacheBatch = 16, +}; /* * structures @@ -130,7 +163,12 @@ union Note struct String { byte* str; - int32 len; + intgo len; +}; +struct FuncVal +{ + void (*fn)(void); + // variable-size, fn-specific data here }; struct Iface { @@ -156,47 +194,61 @@ struct Complex128 struct Slice { // must not move anything byte* array; // actual data - uint32 len; // number of elements - uint32 cap; // allocated number of elements + uintgo len; // number of elements + uintgo cap; // allocated number of elements }; struct Gobuf { // The offsets of these fields are known to (hard-coded in) libmach. - byte* sp; + uintptr sp; byte* pc; G* g; }; +struct GCStats +{ + // the struct must consist of only uint64's, + // because it is casted to uint64[]. + uint64 nhandoff; + uint64 nhandoffcnt; + uint64 nprocyield; + uint64 nosyield; + uint64 nsleep; +}; struct G { - byte* stackguard; // cannot move - also known to linker, libmach, runtime/cgo - byte* stackbase; // cannot move - also known to libmach, runtime/cgo + uintptr stackguard; // cannot move - also known to linker, libmach, runtime/cgo + uintptr stackbase; // cannot move - also known to libmach, runtime/cgo Defer* defer; Panic* panic; Gobuf sched; - byte* gcstack; // if status==Gsyscall, gcstack = stackbase to use during gc - byte* gcsp; // if status==Gsyscall, gcsp = sched.sp to use during gc - byte* gcguard; // if status==Gsyscall, gcguard = stackguard to use during gc - byte* stack0; - byte* entry; // initial function + uintptr gcstack; // if status==Gsyscall, gcstack = stackbase to use during gc + uintptr gcsp; // if status==Gsyscall, gcsp = sched.sp to use during gc + byte* gcpc; // if status==Gsyscall, gcpc = sched.pc to use during gc + uintptr gcguard; // if status==Gsyscall, gcguard = stackguard to use during gc + uintptr stack0; + FuncVal* fnstart; // initial function G* alllink; // on allg void* param; // passed parameter on wakeup int16 status; - int32 goid; + int64 goid; uint32 selgen; // valid sudog pointer int8* waitreason; // if status==Gwaiting G* schedlink; - bool readyonstop; bool ispanic; + bool issystem; + int8 raceignore; // ignore race detection events M* m; // for debuggers, but offset not hard-coded M* lockedm; - M* idlem; int32 sig; int32 writenbuf; byte* writebuf; + DeferChunk *dchunk; + DeferChunk *dchunknext; uintptr sigcode0; uintptr sigcode1; uintptr sigpc; uintptr gopc; // pc of go statement that created this goroutine + uintptr racectx; uintptr end[]; }; struct M @@ -213,43 +265,98 @@ struct M uintptr cret; // return value from C uint64 procid; // for debuggers, but offset not hard-coded G* gsignal; // signal-handling G - uint32 tls[8]; // thread-local storage (for 386 extern register) + uintptr tls[4]; // thread-local storage (for x86 extern register) + void (*mstartfn)(void); G* curg; // current running goroutine + P* p; // attached P for executing Go code (nil if not executing Go code) + P* nextp; int32 id; int32 mallocing; + int32 throwing; int32 gcing; int32 locks; int32 nomemprof; - int32 waitnextg; int32 dying; int32 profilehz; int32 helpgc; + bool blockingsyscall; + bool spinning; uint32 fastrand; - uint64 ncgocall; - Note havenextg; - G* nextg; + uint64 ncgocall; // number of cgo calls in total + int32 ncgo; // number of cgo calls currently in progress + CgoMal* cgomal; + Note park; M* alllink; // on allm M* schedlink; uint32 machport; // Return address for Mach IPC (OS X) MCache *mcache; - FixAlloc *stackalloc; + int32 stackinuse; + uint32 stackcachepos; + uint32 stackcachecnt; + void* stackcache[StackCacheSize]; G* lockedg; - G* idleg; uintptr createstack[32]; // Stack that created this thread. uint32 freglo[16]; // D[i] lsb and F[i] uint32 freghi[16]; // D[i] msb and F[i+16] uint32 fflag; // floating point compare flags + uint32 locked; // tracking for LockOSThread M* nextwaitm; // next M waiting for lock uintptr waitsema; // semaphore for parking on locks uint32 waitsemacount; uint32 waitsemalock; + GCStats gcstats; + bool racecall; + bool needextram; + void* racepc; + void (*waitunlockf)(Lock*); + Lock* waitlock; + uint32 moreframesize_minalloc; + + uintptr settype_buf[1024]; + uintptr settype_bufsize; #ifdef GOOS_windows void* thread; // thread handle #endif +#ifdef GOOS_plan9 + int8* notesig; +#endif + SEH* seh; uintptr end[]; }; +struct P +{ + Lock; + + uint32 status; // one of Pidle/Prunning/... + P* link; + uint32 tick; // incremented on every scheduler or system call + M* m; // back-link to associated M (nil if idle) + MCache* mcache; + + // Queue of runnable goroutines. + G** runq; + int32 runqhead; + int32 runqtail; + int32 runqsize; + + // Available G's (status == Gdead) + G* gfree; + int32 gfreecnt; + + byte pad[64]; +}; + +// The m->locked word holds a single bit saying whether +// external calls to LockOSThread are in effect, and then a counter +// of the internal nesting depth of lockOSThread / unlockOSThread. +enum +{ + LockExternal = 1, + LockInternal = 2, +}; + struct Stktop { // The offsets of these fields are known to (hard-coded in) libmach. @@ -288,8 +395,19 @@ struct Func uintptr pc0; // starting pc, ln for table int32 ln0; int32 frame; // stack frame size - int32 args; // number of 32-bit in/out args - int32 locals; // number of 32-bit locals + int32 args; // in/out args size + int32 locals; // locals size +}; + +// layout of Itab known to compilers +struct Itab +{ + InterfaceType* inter; + Type* type; + Itab* link; + int32 bad; + int32 unused; + void (*fun[])(void); }; struct WinCall @@ -301,6 +419,11 @@ struct WinCall uintptr r2; uintptr err; // error number }; +struct SEH +{ + void* prev; + void* handler; +}; #ifdef GOOS_windows enum { @@ -335,10 +458,46 @@ struct Timer // a well-behaved function and not block. int64 when; int64 period; - void (*f)(int64, Eface); + FuncVal *fv; Eface arg; }; +// Lock-free stack node. +struct LFNode +{ + LFNode *next; + uintptr pushcnt; +}; + +// Parallel for descriptor. +struct ParFor +{ + void (*body)(ParFor*, uint32); // executed for each element + uint32 done; // number of idle threads + uint32 nthr; // total number of threads + uint32 nthrmax; // maximum number of threads + uint32 thrseq; // thread id sequencer + uint32 cnt; // iteration space [0, cnt) + void *ctx; // arbitrary user context + bool wait; // if true, wait while all threads finish processing, + // otherwise parfor may return while other threads are still working + ParForThread *thr; // array of thread descriptors + // stats + uint64 nsteal; + uint64 nstealcnt; + uint64 nprocyield; + uint64 nosyield; + uint64 nsleep; +}; + +// Track memory allocated by code not written in Go during a cgo call, +// so that the garbage collector can see them. +struct CgoMal +{ + CgoMal *next; + byte *alloc; +}; + /* * defined macros * you need super-gopher-guru privilege @@ -347,6 +506,7 @@ struct Timer #define nelem(x) (sizeof(x)/sizeof((x)[0])) #define nil ((void*)0) #define offsetof(s,m) (uint32)(&(((s*)0)->m)) +#define ROUND(x, n) (((x)+(n)-1)&~((n)-1)) /* all-caps to mark as macro: it evaluates n twice */ /* * known to compiler @@ -430,12 +590,19 @@ void runtime·nilintercopy(uintptr, void*, void*); struct Defer { int32 siz; - bool nofree; + bool special; // not part of defer frame + bool free; // if special, free when done byte* argp; // where args were copied from byte* pc; - byte* fn; + FuncVal* fn; Defer* link; - byte args[8]; // padded to actual size + void* args[1]; // padded to actual size +}; + +struct DeferChunk +{ + DeferChunk *prev; + uintptr off; }; /* @@ -453,16 +620,21 @@ struct Panic * external data */ extern String runtime·emptystring; -G* runtime·allg; -G* runtime·lastg; -M* runtime·allm; +extern uintptr runtime·zerobase; +extern G* runtime·allg; +extern G* runtime·lastg; +extern M* runtime·allm; +extern P** runtime·allp; extern int32 runtime·gomaxprocs; extern bool runtime·singleproc; extern uint32 runtime·panicking; -extern int32 runtime·gcwaiting; // gc is waiting to run -int8* runtime·goos; -int32 runtime·ncpu; +extern uint32 runtime·gcwaiting; // gc is waiting to run +extern int8* runtime·goos; +extern int32 runtime·ncpu; extern bool runtime·iscgo; +extern void (*runtime·sysargs)(int32, uint8**); +extern uint32 runtime·maxstring; +extern uint32 runtime·Hchansize; /* * common functions and data @@ -481,7 +653,8 @@ int32 runtime·charntorune(int32*, uint8*, int32); #define FLUSH(x) USED(x) void runtime·gogo(Gobuf*, uintptr); -void runtime·gogocall(Gobuf*, void(*)(void)); +void runtime·gogocall(Gobuf*, void(*)(void), uintptr); +void runtime·gogocallfn(Gobuf*, FuncVal*); void runtime·gosave(Gobuf*); void runtime·lessstack(void); void runtime·goargs(void); @@ -490,7 +663,6 @@ void runtime·goenvs_unix(void); void* runtime·getu(void); void runtime·throw(int8*); void runtime·panicstring(int8*); -uint32 runtime·rnd(uint32, uint32); void runtime·prints(int8*); void runtime·printf(int8*, ...); byte* runtime·mchr(byte*, byte, byte*); @@ -499,8 +671,8 @@ void runtime·memmove(void*, void*, uint32); void* runtime·mal(uintptr); String runtime·catstring(String, String); String runtime·gostring(byte*); -String runtime·gostringn(byte*, int32); -Slice runtime·gobytes(byte*, int32); +String runtime·gostringn(byte*, intgo); +Slice runtime·gobytes(byte*, intgo); String runtime·gostringnocopy(byte*); String runtime·gostringw(uint16*); void runtime·initsig(void); @@ -512,38 +684,47 @@ void runtime·tracebackothers(G*); int32 runtime·write(int32, void*, int32); int32 runtime·mincore(void*, uintptr, byte*); bool runtime·cas(uint32*, uint32, uint32); +bool runtime·cas64(uint64*, uint64*, uint64); bool runtime·casp(void**, void*, void*); // Don't confuse with XADD x86 instruction, // this one is actually 'addx', that is, add-and-fetch. uint32 runtime·xadd(uint32 volatile*, int32); +uint64 runtime·xadd64(uint64 volatile*, int64); uint32 runtime·xchg(uint32 volatile*, uint32); uint32 runtime·atomicload(uint32 volatile*); void runtime·atomicstore(uint32 volatile*, uint32); +void runtime·atomicstore64(uint64 volatile*, uint64); +uint64 runtime·atomicload64(uint64 volatile*); void* runtime·atomicloadp(void* volatile*); void runtime·atomicstorep(void* volatile*, void*); -void runtime·jmpdefer(byte*, void*); +void runtime·jmpdefer(FuncVal*, void*); void runtime·exit1(int32); void runtime·ready(G*); byte* runtime·getenv(int8*); int32 runtime·atoi(byte*); -void runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)); -void runtime·signalstack(byte*, int32); +void runtime·newosproc(M *mp, void *stk); +void runtime·mstart(void); G* runtime·malg(int32); void runtime·asminit(void); +void runtime·mpreinit(M*); void runtime·minit(void); +void runtime·unminit(void); +void runtime·signalstack(byte*, int32); Func* runtime·findfunc(uintptr); int32 runtime·funcline(Func*, uintptr); void* runtime·stackalloc(uint32); void runtime·stackfree(void*, uintptr); MCache* runtime·allocmcache(void); +void runtime·freemcache(MCache*); void runtime·mallocinit(void); +void runtime·mprofinit(void); bool runtime·ifaceeq_c(Iface, Iface); bool runtime·efaceeq_c(Eface, Eface); -uintptr runtime·ifacehash(Iface); -uintptr runtime·efacehash(Eface); +uintptr runtime·ifacehash(Iface, uintptr); +uintptr runtime·efacehash(Eface, uintptr); void* runtime·malloc(uintptr size); void runtime·free(void *v); -bool runtime·addfinalizer(void*, void(*fn)(void*), int32); +bool runtime·addfinalizer(void*, FuncVal *fn, uintptr); void runtime·runpanic(Panic*); void* runtime·getcallersp(void*); int32 runtime·mcount(void); @@ -551,27 +732,35 @@ int32 runtime·gcount(void); void runtime·mcall(void(*)(G*)); uint32 runtime·fastrand1(void); +void runtime·setmg(M*, G*); +void runtime·newextram(void); void runtime·exit(int32); void runtime·breakpoint(void); void runtime·gosched(void); -void runtime·tsleep(int64); +void runtime·park(void(*)(Lock*), Lock*, int8*); +void runtime·tsleep(int64, int8*); M* runtime·newm(void); void runtime·goexit(void); void runtime·asmcgocall(void (*fn)(void*), void*); void runtime·entersyscall(void); +void runtime·entersyscallblock(void); void runtime·exitsyscall(void); -G* runtime·newproc1(byte*, byte*, int32, int32, void*); +G* runtime·newproc1(FuncVal*, byte*, int32, int32, void*); bool runtime·sigsend(int32 sig); int32 runtime·callers(int32, uintptr*, int32); int32 runtime·gentraceback(byte*, byte*, byte*, G*, int32, uintptr*, int32); int64 runtime·nanotime(void); void runtime·dopanic(int32); void runtime·startpanic(void); +void runtime·unwindstack(G*, byte*); void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp); void runtime·resetcpuprofiler(int32); void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32); void runtime·usleep(uint32); int64 runtime·cputicks(void); +int64 runtime·tickspersecond(void); +void runtime·blockevent(int64, int32); +extern int64 runtime·blockprofilerate; #pragma varargck argpos runtime·printf 1 #pragma varargck type "d" int32 @@ -589,7 +778,7 @@ int64 runtime·cputicks(void); #pragma varargck type "S" String void runtime·stoptheworld(void); -void runtime·starttheworld(bool); +void runtime·starttheworld(void); extern uint32 runtime·worldsema; /* @@ -636,6 +825,27 @@ void runtime·futexsleep(uint32*, uint32, int64); void runtime·futexwakeup(uint32*, uint32); /* + * Lock-free stack. + * Initialize uint64 head to 0, compare with 0 to test for emptiness. + * The stack does not keep pointers to nodes, + * so they can be garbage collected if there are no other pointers to nodes. + */ +void runtime·lfstackpush(uint64 *head, LFNode *node); +LFNode* runtime·lfstackpop(uint64 *head); + +/* + * Parallel for over [0, n). + * body() is executed for each iteration. + * nthr - total number of worker threads. + * ctx - arbitrary user context. + * if wait=true, threads return from parfor() when all work is done; + * otherwise, threads can return while other threads are still finishing processing. + */ +ParFor* runtime·parforalloc(uint32 nthrmax); +void runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32)); +void runtime·parfordo(ParFor *desc); + +/* * This is consistent across Linux and BSD. * If a new OS is added that is different, move this to * $GOOS/$GOARCH/defs.h. @@ -645,6 +855,9 @@ void runtime·futexwakeup(uint32*, uint32); /* * low level C-called */ +// for mmap, we only pass the lower 32 bits of file offset to the +// assembly routine; the higher bits (if required), should be provided +// by the assembly routine as 0. uint8* runtime·mmap(byte*, uintptr, int32, int32, int32, uint32); void runtime·munmap(byte*, uintptr); void runtime·madvise(byte*, uintptr, int32); @@ -656,6 +869,7 @@ void* runtime·getcallerpc(void*); * runtime go-called */ void runtime·printbool(bool); +void runtime·printbyte(int8); void runtime·printfloat(float64); void runtime·printint(int64); void runtime·printiface(Iface); @@ -667,7 +881,7 @@ void runtime·printuint(uint64); void runtime·printhex(uint64); void runtime·printslice(Slice); void runtime·printcomplex(Complex128); -void reflect·call(byte*, byte*, uint32); +void reflect·call(FuncVal*, byte*, uint32); void runtime·panic(Eface); void runtime·panicindex(void); void runtime·panicslice(void); @@ -708,8 +922,8 @@ void runtime·semrelease(uint32*); int32 runtime·gomaxprocsfunc(int32 n); void runtime·procyield(uint32); void runtime·osyield(void); -void runtime·LockOSThread(void); -void runtime·UnlockOSThread(void); +void runtime·lockOSThread(void); +void runtime·unlockOSThread(void); void runtime·mapassign(MapType*, Hmap*, byte*, byte*); void runtime·mapaccess(MapType*, Hmap*, byte*, byte*, bool*); @@ -719,13 +933,11 @@ void runtime·mapiterkeyvalue(struct hash_iter*, void*, void*); Hmap* runtime·makemap_c(MapType*, int64); Hchan* runtime·makechan_c(ChanType*, int64); -void runtime·chansend(ChanType*, Hchan*, byte*, bool*); +void runtime·chansend(ChanType*, Hchan*, byte*, bool*, void*); void runtime·chanrecv(ChanType*, Hchan*, byte*, bool*, bool*); -int32 runtime·chanlen(Hchan*); -int32 runtime·chancap(Hchan*); -bool runtime·showframe(Func*); +bool runtime·showframe(Func*, bool); -void runtime·ifaceE2I(struct InterfaceType*, Eface, Iface*); +void runtime·ifaceE2I(InterfaceType*, Eface, Iface*); uintptr runtime·memlimit(void); @@ -738,3 +950,17 @@ uintptr runtime·memlimit(void); // is forced to deliver the signal to a thread that's actually running. // This is a no-op on other systems. void runtime·setprof(bool); + +// float.c +extern float64 runtime·nan; +extern float64 runtime·posinf; +extern float64 runtime·neginf; +extern uint64 ·nan; +extern uint64 ·posinf; +extern uint64 ·neginf; +#define ISNAN(f) ((f) != (f)) + +enum +{ + UseSpanType = 1, +}; diff --git a/src/pkg/runtime/runtime1.goc b/src/pkg/runtime/runtime1.goc index 667131c1e..d2c38dfef 100644 --- a/src/pkg/runtime/runtime1.goc +++ b/src/pkg/runtime/runtime1.goc @@ -5,10 +5,10 @@ package runtime #include "runtime.h" -func GOMAXPROCS(n int32) (ret int32) { +func GOMAXPROCS(n int) (ret int) { ret = runtime·gomaxprocsfunc(n); } -func NumCPU() (ret int32) { +func NumCPU() (ret int) { ret = runtime·ncpu; } diff --git a/src/pkg/runtime/runtime_test.go b/src/pkg/runtime/runtime_test.go index d68b363e9..e45879349 100644 --- a/src/pkg/runtime/runtime_test.go +++ b/src/pkg/runtime/runtime_test.go @@ -38,3 +38,44 @@ func BenchmarkIfaceCmpNil100(b *testing.B) { } } } + +func BenchmarkDefer(b *testing.B) { + for i := 0; i < b.N; i++ { + defer1() + } +} + +func defer1() { + defer func(x, y, z int) { + if recover() != nil || x != 1 || y != 2 || z != 3 { + panic("bad recover") + } + }(1, 2, 3) + return +} + +func BenchmarkDefer10(b *testing.B) { + for i := 0; i < b.N/10; i++ { + defer2() + } +} + +func defer2() { + for i := 0; i < 10; i++ { + defer func(x, y, z int) { + if recover() != nil || x != 1 || y != 2 || z != 3 { + panic("bad recover") + } + }(1, 2, 3) + } +} + +func BenchmarkDeferMany(b *testing.B) { + for i := 0; i < b.N; i++ { + defer func(x, y, z int) { + if recover() != nil || x != 1 || y != 2 || z != 3 { + panic("bad recover") + } + }(1, 2, 3) + } +} diff --git a/src/pkg/runtime/sema.goc b/src/pkg/runtime/sema.goc index 2300c56aa..c4b5247b3 100644 --- a/src/pkg/runtime/sema.goc +++ b/src/pkg/runtime/sema.goc @@ -24,30 +24,33 @@ package sync typedef struct Sema Sema; struct Sema { - uint32 volatile *addr; - G *g; - Sema *prev; - Sema *next; + uint32 volatile* addr; + G* g; + int64 releasetime; + Sema* prev; + Sema* next; }; typedef struct SemaRoot SemaRoot; struct SemaRoot { - Lock; - Sema *head; - Sema *tail; + Lock; + Sema* head; + Sema* tail; // Number of waiters. Read w/o the lock. - uint32 volatile nwait; + uint32 volatile nwait; }; // Prime to not correlate with any user patterns. #define SEMTABLESZ 251 -static union +union semtable { SemaRoot; uint8 pad[CacheLineSize]; -} semtable[SEMTABLESZ]; +}; +#pragma dataflag 16 /* mark semtable as 'no pointers', hiding from garbage collector */ +static union semtable semtable[SEMTABLESZ]; static SemaRoot* semroot(uint32 *addr) @@ -95,12 +98,13 @@ cansemacquire(uint32 *addr) return 0; } -void -runtime·semacquire(uint32 volatile *addr) +static void +semacquireimpl(uint32 volatile *addr, int32 profile) { - Sema s; + Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it SemaRoot *root; - + int64 t0; + // Easy case. if(cansemacquire(addr)) return; @@ -112,6 +116,12 @@ runtime·semacquire(uint32 volatile *addr) // sleep // (waiter descriptor is dequeued by signaler) root = semroot(addr); + t0 = 0; + s.releasetime = 0; + if(profile && runtime·blockprofilerate > 0) { + t0 = runtime·cputicks(); + s.releasetime = -1; + } for(;;) { runtime·lock(root); // Add ourselves to nwait to disable "easy case" in semrelease. @@ -125,16 +135,22 @@ runtime·semacquire(uint32 volatile *addr) // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. semqueue(root, addr, &s); - g->status = Gwaiting; - g->waitreason = "semacquire"; - runtime·unlock(root); - runtime·gosched(); - if(cansemacquire(addr)) + runtime·park(runtime·unlock, root, "semacquire"); + if(cansemacquire(addr)) { + if(t0) + runtime·blockevent(s.releasetime - t0, 3); return; + } } } void +runtime·semacquire(uint32 volatile *addr) +{ + semacquireimpl(addr, 0); +} + +void runtime·semrelease(uint32 volatile *addr) { Sema *s; @@ -165,12 +181,15 @@ runtime·semrelease(uint32 volatile *addr) } } runtime·unlock(root); - if(s) + if(s) { + if(s->releasetime) + s->releasetime = runtime·cputicks(); runtime·ready(s->g); + } } func runtime_Semacquire(addr *uint32) { - runtime·semacquire(addr); + semacquireimpl(addr, 1); } func runtime_Semrelease(addr *uint32) { diff --git a/src/pkg/runtime/signal_darwin_386.c b/src/pkg/runtime/signal_darwin_386.c index 9e986352b..132ca931b 100644 --- a/src/pkg/runtime/signal_darwin_386.c +++ b/src/pkg/runtime/signal_darwin_386.c @@ -47,7 +47,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Work around Leopard bug that doesn't set FPE_INTDIV. // Look at instruction to see if it is a divide. @@ -101,7 +101,11 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); } - runtime·printf("pc: %x\n", r->eip); + runtime·printf("PC=%x\n", r->eip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -121,6 +125,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -129,6 +135,15 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(*(void**)sa.__sigaction_u == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) diff --git a/src/pkg/runtime/signal_darwin_amd64.c b/src/pkg/runtime/signal_darwin_amd64.c index d9c5f48e7..4b7256bf4 100644 --- a/src/pkg/runtime/signal_darwin_amd64.c +++ b/src/pkg/runtime/signal_darwin_amd64.c @@ -55,7 +55,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Work around Leopard bug that doesn't set FPE_INTDIV. // Look at instruction to see if it is a divide. @@ -111,7 +111,11 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); } - runtime·printf("pc: %X\n", r->rip); + runtime·printf("PC=%X\n", r->rip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -131,6 +135,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -139,6 +145,15 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(*(void**)sa.__sigaction_u == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) diff --git a/src/pkg/runtime/signal_freebsd_386.c b/src/pkg/runtime/signal_freebsd_386.c index 80da95d98..254e5e277 100644 --- a/src/pkg/runtime/signal_freebsd_386.c +++ b/src/pkg/runtime/signal_freebsd_386.c @@ -15,7 +15,7 @@ typedef struct sigaction { void (*__sa_sigaction)(int32, Siginfo*, void *); } __sigaction_u; /* signal handler */ int32 sa_flags; /* see signal options below */ - int64 sa_mask; /* signal mask to apply */ + Sigset sa_mask; /* signal mask to apply */ } Sigaction; void @@ -54,7 +54,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -97,6 +97,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->mc_eip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -116,6 +120,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = (int8*)p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -124,11 +130,23 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa.__sigaction_u.__sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) sa.sa_flags |= SA_RESTART; - sa.sa_mask = ~0ULL; + sa.sa_mask.__bits[0] = ~(uint32)0; + sa.sa_mask.__bits[1] = ~(uint32)0; + sa.sa_mask.__bits[2] = ~(uint32)0; + sa.sa_mask.__bits[3] = ~(uint32)0; if (fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.__sigaction_u.__sa_sigaction = (void*)fn; diff --git a/src/pkg/runtime/signal_freebsd_amd64.c b/src/pkg/runtime/signal_freebsd_amd64.c index e4307682f..7dbf36075 100644 --- a/src/pkg/runtime/signal_freebsd_amd64.c +++ b/src/pkg/runtime/signal_freebsd_amd64.c @@ -15,7 +15,7 @@ typedef struct sigaction { void (*__sa_sigaction)(int32, Siginfo*, void *); } __sigaction_u; /* signal handler */ int32 sa_flags; /* see signal options below */ - int64 sa_mask; /* signal mask to apply */ + Sigset sa_mask; /* signal mask to apply */ } Sigaction; void @@ -62,7 +62,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -105,6 +105,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->mc_rip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -124,6 +128,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = (int8*)p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -132,11 +138,23 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa.__sigaction_u.__sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) sa.sa_flags |= SA_RESTART; - sa.sa_mask = ~0ULL; + sa.sa_mask.__bits[0] = ~(uint32)0; + sa.sa_mask.__bits[1] = ~(uint32)0; + sa.sa_mask.__bits[2] = ~(uint32)0; + sa.sa_mask.__bits[3] = ~(uint32)0; if (fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.__sigaction_u.__sa_sigaction = (void*)fn; diff --git a/src/pkg/runtime/signal_freebsd_arm.c b/src/pkg/runtime/signal_freebsd_arm.c new file mode 100644 index 000000000..50c3221bb --- /dev/null +++ b/src/pkg/runtime/signal_freebsd_arm.c @@ -0,0 +1,193 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "defs_GOOS_GOARCH.h" +#include "signals_GOOS.h" +#include "os_GOOS.h" + +#define r0 __gregs[0] +#define r1 __gregs[1] +#define r2 __gregs[2] +#define r3 __gregs[3] +#define r4 __gregs[4] +#define r5 __gregs[5] +#define r6 __gregs[6] +#define r7 __gregs[7] +#define r8 __gregs[8] +#define r9 __gregs[9] +#define r10 __gregs[10] +#define r11 __gregs[11] +#define r12 __gregs[12] +#define r13 __gregs[13] +#define r14 __gregs[14] +#define r15 __gregs[15] +#define cpsr __gregs[16] + +void +runtime·dumpregs(Mcontext *r) +{ + runtime·printf("r0 %x\n", r->r0); + runtime·printf("r1 %x\n", r->r1); + runtime·printf("r2 %x\n", r->r2); + runtime·printf("r3 %x\n", r->r3); + runtime·printf("r4 %x\n", r->r4); + runtime·printf("r5 %x\n", r->r5); + runtime·printf("r6 %x\n", r->r6); + runtime·printf("r7 %x\n", r->r7); + runtime·printf("r8 %x\n", r->r8); + runtime·printf("r9 %x\n", r->r9); + runtime·printf("r10 %x\n", r->r10); + runtime·printf("fp %x\n", r->r11); + runtime·printf("ip %x\n", r->r12); + runtime·printf("sp %x\n", r->r13); + runtime·printf("lr %x\n", r->r14); + runtime·printf("pc %x\n", r->r15); + runtime·printf("cpsr %x\n", r->cpsr); +} + +extern void runtime·sigtramp(void); + +typedef struct sigaction { + union { + void (*__sa_handler)(int32); + void (*__sa_sigaction)(int32, Siginfo*, void *); + } __sigaction_u; /* signal handler */ + int32 sa_flags; /* see signal options below */ + Sigset sa_mask; /* signal mask to apply */ +} Sigaction; + +void +runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) +{ + Ucontext *uc; + Mcontext *r; + SigTab *t; + + uc = context; + r = &uc->uc_mcontext; + + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->r15, (uint8*)r->r13, (uint8*)r->r14, gp); + return; + } + + t = &runtime·sigtab[sig]; + if(info->si_code != SI_USER && (t->flags & SigPanic)) { + if(gp == nil || gp == m->g0) + goto Throw; + // Make it look like a call to the signal func. + // Have to pass arguments out of band since + // augmenting the stack frame would break + // the unwinding code. + gp->sig = sig; + gp->sigcode0 = info->si_code; + gp->sigcode1 = (uintptr)info->si_addr; + gp->sigpc = r->r15; + + // Only push runtime·sigpanic if r->mc_rip != 0. + // If r->mc_rip == 0, probably panicked because of a + // call to a nil func. Not pushing that onto sp will + // make the trace look like a call to runtime·sigpanic instead. + // (Otherwise the trace will end at runtime·sigpanic and we + // won't get to see who faulted.) + if(r->r15 != 0) + r->r14 = r->r15; + // In case we are panicking from external C code + r->r10 = (uintptr)gp; + r->r9 = (uintptr)m; + r->r15 = (uintptr)runtime·sigpanic; + return; + } + + if(info->si_code == SI_USER || (t->flags & SigNotify)) + if(runtime·sigsend(sig)) + return; + if(t->flags & SigKill) + runtime·exit(2); + if(!(t->flags & SigThrow)) + return; + +Throw: + runtime·startpanic(); + + if(sig < 0 || sig >= NSIG) + runtime·printf("Signal %d\n", sig); + else + runtime·printf("%s\n", runtime·sigtab[sig].name); + + runtime·printf("PC=%x\n", r->r15); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } + runtime·printf("\n"); + + if(runtime·gotraceback()){ + runtime·traceback((void*)r->r15, (void*)r->r13, (void*)r->r14, gp); + runtime·tracebackothers(gp); + runtime·printf("\n"); + runtime·dumpregs(r); + } + +// breakpoint(); + runtime·exit(2); +} + +void +runtime·signalstack(byte *p, int32 n) +{ + Sigaltstack st; + + st.ss_sp = (uint8*)p; + st.ss_size = n; + st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; + runtime·sigaltstack(&st, nil); +} + +void +runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa.__sigaction_u.__sa_sigaction == SIG_IGN) + return; + } + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask.__bits[0] = ~(uint32)0; + sa.sa_mask.__bits[1] = ~(uint32)0; + sa.sa_mask.__bits[2] = ~(uint32)0; + sa.sa_mask.__bits[3] = ~(uint32)0; + if (fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa.__sigaction_u.__sa_sigaction = (void*)fn; + runtime·sigaction(i, &sa, nil); +} + +void +runtime·checkgoarm(void) +{ + // TODO(minux) +} + +#pragma textflag 7 +int64 +runtime·cputicks(void) +{ + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // TODO: need more entropy to better seed fastrand1. + return runtime·nanotime(); +} diff --git a/src/pkg/runtime/signal_linux_386.c b/src/pkg/runtime/signal_linux_386.c index b154ad887..9b45ec3bd 100644 --- a/src/pkg/runtime/signal_linux_386.c +++ b/src/pkg/runtime/signal_linux_386.c @@ -50,7 +50,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -93,6 +93,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->eip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -112,6 +116,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -120,6 +126,16 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction read failure"); + if(sa.k_sa_handler == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; if(restart) @@ -129,7 +145,8 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) if(fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.k_sa_handler = fn; - runtime·rt_sigaction(i, &sa, nil, 8); + if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction failure"); } #define AT_NULL 0 diff --git a/src/pkg/runtime/signal_linux_amd64.c b/src/pkg/runtime/signal_linux_amd64.c index 14095ba61..c4e39a6ab 100644 --- a/src/pkg/runtime/signal_linux_amd64.c +++ b/src/pkg/runtime/signal_linux_amd64.c @@ -60,7 +60,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -103,6 +103,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->rip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -122,6 +126,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -130,14 +136,27 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction read failure"); + if(sa.sa_handler == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; if(restart) sa.sa_flags |= SA_RESTART; sa.sa_mask = ~0ULL; + // TODO(adonovan): Linux manpage says "sa_restorer element is + // obsolete and should not be used". Avoid it here, and test. sa.sa_restorer = (void*)runtime·sigreturn; if(fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.sa_handler = fn; - runtime·rt_sigaction(i, &sa, nil, 8); + if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction failure"); } diff --git a/src/pkg/runtime/signal_linux_arm.c b/src/pkg/runtime/signal_linux_arm.c index 176a4ce56..c26caa7cd 100644 --- a/src/pkg/runtime/signal_linux_arm.c +++ b/src/pkg/runtime/signal_linux_arm.c @@ -57,7 +57,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -68,13 +68,22 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) gp->sigcode1 = r->fault_address; gp->sigpc = r->arm_pc; - // If this is a leaf function, we do smash LR, - // but we're not going back there anyway. - // Don't bother smashing if r->arm_pc is 0, - // which is probably a call to a nil func: the - // old link register is more useful in the stack trace. + // We arrange lr, and pc to pretend the panicking + // function calls sigpanic directly. + // Always save LR to stack so that panics in leaf + // functions are correctly handled. This smashes + // the stack frame but we're not going back there + // anyway. + r->arm_sp -= 4; + *(uint32 *)r->arm_sp = r->arm_lr; + // Don't bother saving PC if it's zero, which is + // probably a call to a nil func: the old link register + // is more useful in the stack trace. if(r->arm_pc != 0) r->arm_lr = r->arm_pc; + // In case we are panicking from external C code + r->arm_r10 = (uintptr)gp; + r->arm_r9 = (uintptr)m; r->arm_pc = (uintptr)runtime·sigpanic; return; } @@ -98,6 +107,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%x\n", r->arm_pc); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -119,6 +132,8 @@ runtime·signalstack(byte *p, int32 n) st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -127,6 +142,16 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction read failure"); + if(sa.sa_handler == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER; if(restart) @@ -136,5 +161,81 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) if(fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.sa_handler = fn; - runtime·rt_sigaction(i, &sa, nil, 8); + if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0) + runtime·throw("rt_sigaction failure"); +} + +#define AT_NULL 0 +#define AT_PLATFORM 15 // introduced in at least 2.6.11 +#define AT_HWCAP 16 // introduced in at least 2.6.11 +#define AT_RANDOM 25 // introduced in 2.6.29 +#define HWCAP_VFP (1 << 6) // introduced in at least 2.6.11 +#define HWCAP_VFPv3 (1 << 13) // introduced in 2.6.30 +static uint32 runtime·randomNumber; +uint8 runtime·armArch = 6; // we default to ARMv6 +uint32 runtime·hwcap; // set by setup_auxv +uint8 runtime·goarm; // set by 5l + +void +runtime·checkgoarm(void) +{ + if(runtime·goarm > 5 && !(runtime·hwcap & HWCAP_VFP)) { + runtime·printf("runtime: this CPU has no floating point hardware, so it cannot run\n"); + runtime·printf("this GOARM=%d binary. Recompile using GOARM=5.\n", runtime·goarm); + runtime·exit(1); + } + if(runtime·goarm > 6 && !(runtime·hwcap & HWCAP_VFPv3)) { + runtime·printf("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n"); + runtime·printf("this GOARM=%d binary. Recompile using GOARM=6.\n", runtime·goarm); + runtime·exit(1); + } +} + +#pragma textflag 7 +void +runtime·setup_auxv(int32 argc, void *argv_list) +{ + byte **argv; + byte **envp; + byte *rnd; + uint32 *auxv; + uint32 t; + + argv = &argv_list; + + // skip envp to get to ELF auxiliary vector. + for(envp = &argv[argc+1]; *envp != nil; envp++) + ; + envp++; + + for(auxv=(uint32*)envp; auxv[0] != AT_NULL; auxv += 2) { + switch(auxv[0]) { + case AT_RANDOM: // kernel provided 16-byte worth of random data + if(auxv[1]) { + rnd = (byte*)auxv[1]; + runtime·randomNumber = rnd[4] | rnd[5]<<8 | rnd[6]<<16 | rnd[7]<<24; + } + break; + case AT_PLATFORM: // v5l, v6l, v7l + if(auxv[1]) { + t = *(uint8*)(auxv[1]+1); + if(t >= '5' && t <= '7') + runtime·armArch = t - '0'; + } + break; + case AT_HWCAP: // CPU capability bit flags + runtime·hwcap = auxv[1]; + break; + } + } +} + +#pragma textflag 7 +int64 +runtime·cputicks(void) +{ + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // runtime·randomNumber provides better seeding of fastrand1. + return runtime·nanotime() + runtime·randomNumber; } diff --git a/src/pkg/runtime/signal_netbsd_386.c b/src/pkg/runtime/signal_netbsd_386.c index 39d829484..08744c425 100644 --- a/src/pkg/runtime/signal_netbsd_386.c +++ b/src/pkg/runtime/signal_netbsd_386.c @@ -7,76 +7,79 @@ #include "signals_GOOS.h" #include "os_GOOS.h" +extern void runtime·lwp_tramp(void); extern void runtime·sigtramp(void); typedef struct sigaction { union { - void (*__sa_handler)(int32); - void (*__sa_sigaction)(int32, Siginfo*, void *); - } __sigaction_u; /* signal handler */ - uint32 sa_mask; /* signal mask to apply */ + void (*_sa_handler)(int32); + void (*_sa_sigaction)(int32, Siginfo*, void *); + } _sa_u; /* signal handler */ + uint32 sa_mask[4]; /* signal mask to apply */ int32 sa_flags; /* see signal options below */ } Sigaction; void -runtime·dumpregs(Sigcontext *r) +runtime·dumpregs(McontextT *mc) { - runtime·printf("eax %x\n", r->sc_eax); - runtime·printf("ebx %x\n", r->sc_ebx); - runtime·printf("ecx %x\n", r->sc_ecx); - runtime·printf("edx %x\n", r->sc_edx); - runtime·printf("edi %x\n", r->sc_edi); - runtime·printf("esi %x\n", r->sc_esi); - runtime·printf("ebp %x\n", r->sc_ebp); - runtime·printf("esp %x\n", r->sc_esp); - runtime·printf("eip %x\n", r->sc_eip); - runtime·printf("eflags %x\n", r->sc_eflags); - runtime·printf("cs %x\n", r->sc_cs); - runtime·printf("fs %x\n", r->sc_fs); - runtime·printf("gs %x\n", r->sc_gs); + runtime·printf("eax %x\n", mc->__gregs[REG_EAX]); + runtime·printf("ebx %x\n", mc->__gregs[REG_EBX]); + runtime·printf("ecx %x\n", mc->__gregs[REG_ECX]); + runtime·printf("edx %x\n", mc->__gregs[REG_EDX]); + runtime·printf("edi %x\n", mc->__gregs[REG_EDI]); + runtime·printf("esi %x\n", mc->__gregs[REG_ESI]); + runtime·printf("ebp %x\n", mc->__gregs[REG_EBP]); + runtime·printf("esp %x\n", mc->__gregs[REG_UESP]); + runtime·printf("eip %x\n", mc->__gregs[REG_EIP]); + runtime·printf("eflags %x\n", mc->__gregs[REG_EFL]); + runtime·printf("cs %x\n", mc->__gregs[REG_CS]); + runtime·printf("fs %x\n", mc->__gregs[REG_FS]); + runtime·printf("gs %x\n", mc->__gregs[REG_GS]); } void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) { - Sigcontext *r = context; + UcontextT *uc = context; + McontextT *mc = &uc->uc_mcontext; uintptr *sp; SigTab *t; if(sig == SIGPROF) { - runtime·sigprof((uint8*)r->sc_eip, (uint8*)r->sc_esp, nil, gp); + runtime·sigprof((uint8*)mc->__gregs[REG_EIP], + (uint8*)mc->__gregs[REG_UESP], nil, gp); return; } t = &runtime·sigtab[sig]; - if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(info->_code != SI_USER && (t->flags & SigPanic)) { + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. - // Have to pass arguments out of band since + // We need to pass arguments out of band since // augmenting the stack frame would break // the unwinding code. gp->sig = sig; - gp->sigcode0 = info->si_code; - gp->sigcode1 = *(uintptr*)((byte*)info + 12); /* si_addr */ - gp->sigpc = r->sc_eip; - - // Only push runtime·sigpanic if r->sc_eip != 0. - // If r->sc_eip == 0, probably panicked because of a - // call to a nil func. Not pushing that onto sp will - // make the trace look like a call to runtime·sigpanic instead. - // (Otherwise the trace will end at runtime·sigpanic and we - // won't get to see who faulted.) - if(r->sc_eip != 0) { - sp = (uintptr*)r->sc_esp; - *--sp = r->sc_eip; - r->sc_esp = (uintptr)sp; + gp->sigcode0 = info->_code; + gp->sigcode1 = *(uintptr*)&info->_reason[0]; /* _addr */ + gp->sigpc = mc->__gregs[REG_EIP]; + + // Only push runtime·sigpanic if __gregs[REG_EIP] != 0. + // If __gregs[REG_EIP] == 0, probably panicked because of a + // call to a nil func. Not pushing that onto sp will make the + // trace look like a call to runtime·sigpanic instead. + // (Otherwise the trace will end at runtime·sigpanic + // and we won't get to see who faulted.) + if(mc->__gregs[REG_EIP] != 0) { + sp = (uintptr*)mc->__gregs[REG_UESP]; + *--sp = mc->__gregs[REG_EIP]; + mc->__gregs[REG_UESP] = (uintptr)sp; } - r->sc_eip = (uintptr)runtime·sigpanic; + mc->__gregs[REG_EIP] = (uintptr)runtime·sigpanic; return; } - if(info->si_code == SI_USER || (t->flags & SigNotify)) + if(info->_code == SI_USER || (t->flags & SigNotify)) if(runtime·sigsend(sig)) return; if(t->flags & SigKill) @@ -92,13 +95,18 @@ Throw: else runtime·printf("%s\n", runtime·sigtab[sig].name); - runtime·printf("PC=%X\n", r->sc_eip); + runtime·printf("PC=%X\n", mc->__gregs[REG_EIP]); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ - runtime·traceback((void*)r->sc_eip, (void*)r->sc_esp, 0, gp); + runtime·traceback((void*)mc->__gregs[REG_EIP], + (void*)mc->__gregs[REG_UESP], 0, gp); runtime·tracebackothers(gp); - runtime·dumpregs(r); + runtime·dumpregs(mc); } runtime·exit(2); @@ -109,9 +117,11 @@ runtime·signalstack(byte *p, int32 n) { Sigaltstack st; - st.ss_sp = (int8*)p; + st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -120,13 +130,35 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa._sa_u._sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) sa.sa_flags |= SA_RESTART; - sa.sa_mask = ~0ULL; + sa.sa_mask[0] = ~0U; + sa.sa_mask[1] = ~0U; + sa.sa_mask[2] = ~0U; + sa.sa_mask[3] = ~0U; if (fn == runtime·sighandler) fn = (void*)runtime·sigtramp; - sa.__sigaction_u.__sa_sigaction = (void*)fn; + sa._sa_u._sa_sigaction = (void*)fn; runtime·sigaction(i, &sa, nil); } + +void +runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void)) +{ + mc->__gregs[REG_EIP] = (uint32)runtime·lwp_tramp; + mc->__gregs[REG_UESP] = (uint32)stack; + mc->__gregs[REG_EBX] = (uint32)mp; + mc->__gregs[REG_EDX] = (uint32)gp; + mc->__gregs[REG_ESI] = (uint32)fn; +} diff --git a/src/pkg/runtime/signal_netbsd_amd64.c b/src/pkg/runtime/signal_netbsd_amd64.c index 8b4f624e7..46afb682b 100644 --- a/src/pkg/runtime/signal_netbsd_amd64.c +++ b/src/pkg/runtime/signal_netbsd_amd64.c @@ -7,85 +7,86 @@ #include "signals_GOOS.h" #include "os_GOOS.h" +extern void runtime·lwp_tramp(void); extern void runtime·sigtramp(void); typedef struct sigaction { union { - void (*__sa_handler)(int32); - void (*__sa_sigaction)(int32, Siginfo*, void *); - } __sigaction_u; /* signal handler */ - uint32 sa_mask; /* signal mask to apply */ + void (*_sa_handler)(int32); + void (*_sa_sigaction)(int32, Siginfo*, void *); + } _sa_u; /* signal handler */ + uint32 sa_mask[4]; /* signal mask to apply */ int32 sa_flags; /* see signal options below */ } Sigaction; void -runtime·dumpregs(Sigcontext *r) +runtime·dumpregs(McontextT *mc) { - runtime·printf("rax %X\n", r->sc_rax); - runtime·printf("rbx %X\n", r->sc_rbx); - runtime·printf("rcx %X\n", r->sc_rcx); - runtime·printf("rdx %X\n", r->sc_rdx); - runtime·printf("rdi %X\n", r->sc_rdi); - runtime·printf("rsi %X\n", r->sc_rsi); - runtime·printf("rbp %X\n", r->sc_rbp); - runtime·printf("rsp %X\n", r->sc_rsp); - runtime·printf("r8 %X\n", r->sc_r8); - runtime·printf("r9 %X\n", r->sc_r9); - runtime·printf("r10 %X\n", r->sc_r10); - runtime·printf("r11 %X\n", r->sc_r11); - runtime·printf("r12 %X\n", r->sc_r12); - runtime·printf("r13 %X\n", r->sc_r13); - runtime·printf("r14 %X\n", r->sc_r14); - runtime·printf("r15 %X\n", r->sc_r15); - runtime·printf("rip %X\n", r->sc_rip); - runtime·printf("rflags %X\n", r->sc_rflags); - runtime·printf("cs %X\n", r->sc_cs); - runtime·printf("fs %X\n", r->sc_fs); - runtime·printf("gs %X\n", r->sc_gs); + runtime·printf("rax %X\n", mc->__gregs[REG_RAX]); + runtime·printf("rbx %X\n", mc->__gregs[REG_RBX]); + runtime·printf("rcx %X\n", mc->__gregs[REG_RCX]); + runtime·printf("rdx %X\n", mc->__gregs[REG_RDX]); + runtime·printf("rdi %X\n", mc->__gregs[REG_RDI]); + runtime·printf("rsi %X\n", mc->__gregs[REG_RSI]); + runtime·printf("rbp %X\n", mc->__gregs[REG_RBP]); + runtime·printf("rsp %X\n", mc->__gregs[REG_RSP]); + runtime·printf("r8 %X\n", mc->__gregs[REG_R8]); + runtime·printf("r9 %X\n", mc->__gregs[REG_R9]); + runtime·printf("r10 %X\n", mc->__gregs[REG_R10]); + runtime·printf("r11 %X\n", mc->__gregs[REG_R11]); + runtime·printf("r12 %X\n", mc->__gregs[REG_R12]); + runtime·printf("r13 %X\n", mc->__gregs[REG_R13]); + runtime·printf("r14 %X\n", mc->__gregs[REG_R14]); + runtime·printf("r15 %X\n", mc->__gregs[REG_R15]); + runtime·printf("rip %X\n", mc->__gregs[REG_RIP]); + runtime·printf("rflags %X\n", mc->__gregs[REG_RFLAGS]); + runtime·printf("cs %X\n", mc->__gregs[REG_CS]); + runtime·printf("fs %X\n", mc->__gregs[REG_FS]); + runtime·printf("gs %X\n", mc->__gregs[REG_GS]); } void runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) { - Sigcontext *r = context; + UcontextT *uc = context; + McontextT *mc = &uc->uc_mcontext; uintptr *sp; SigTab *t; if(sig == SIGPROF) { - runtime·sigprof((uint8*)r->sc_rip, - (uint8*)r->sc_rsp, nil, gp); + runtime·sigprof((uint8*)mc->__gregs[REG_RIP], + (uint8*)mc->__gregs[REG_RSP], nil, gp); return; } t = &runtime·sigtab[sig]; - if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(info->_code != SI_USER && (t->flags & SigPanic)) { + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. - // Have to pass arguments out of band since - // augmenting the stack frame would break - // the unwinding code. + // We need to pass arguments out of band since augmenting the + // stack frame would break the unwinding code. gp->sig = sig; - gp->sigcode0 = info->si_code; - gp->sigcode1 = *(uintptr*)((byte*)info + 16); /* si_addr */ - gp->sigpc = r->sc_rip; - - // Only push runtime·sigpanic if r->mc_rip != 0. - // If r->mc_rip == 0, probably panicked because of a - // call to a nil func. Not pushing that onto sp will - // make the trace look like a call to runtime·sigpanic instead. - // (Otherwise the trace will end at runtime·sigpanic and we - // won't get to see who faulted.) - if(r->sc_rip != 0) { - sp = (uintptr*)r->sc_rsp; - *--sp = r->sc_rip; - r->sc_rsp = (uintptr)sp; + gp->sigcode0 = info->_code; + gp->sigcode1 = *(uintptr*)&info->_reason[0]; /* _addr */ + gp->sigpc = mc->__gregs[REG_RIP]; + + // Only push runtime·sigpanic if __gregs[REG_RIP] != 0. + // If __gregs[REG_RIP] == 0, probably panicked because of a + // call to a nil func. Not pushing that onto sp will make the + // trace look like a call to runtime·sigpanic instead. + // (Otherwise the trace will end at runtime·sigpanic + // and we won't get to see who faulted.) + if(mc->__gregs[REG_RIP] != 0) { + sp = (uintptr*)mc->__gregs[REG_RSP]; + *--sp = mc->__gregs[REG_RIP]; + mc->__gregs[REG_RSP] = (uintptr)sp; } - r->sc_rip = (uintptr)runtime·sigpanic; + mc->__gregs[REG_RIP] = (uintptr)runtime·sigpanic; return; } - if(info->si_code == SI_USER || (t->flags & SigNotify)) + if(info->_code == SI_USER || (t->flags & SigNotify)) if(runtime·sigsend(sig)) return; if(t->flags & SigKill) @@ -101,13 +102,18 @@ Throw: else runtime·printf("%s\n", runtime·sigtab[sig].name); - runtime·printf("PC=%X\n", r->sc_rip); + runtime·printf("PC=%X\n", mc->__gregs[REG_RIP]); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ - runtime·traceback((void*)r->sc_rip, (void*)r->sc_rsp, 0, gp); + runtime·traceback((void*)mc->__gregs[REG_RIP], + (void*)mc->__gregs[REG_RSP], 0, gp); runtime·tracebackothers(gp); - runtime·dumpregs(r); + runtime·dumpregs(mc); } runtime·exit(2); @@ -118,9 +124,11 @@ runtime·signalstack(byte *p, int32 n) { Sigaltstack st; - st.ss_sp = (int8*)p; + st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -129,13 +137,36 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa._sa_u._sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) sa.sa_flags |= SA_RESTART; - sa.sa_mask = ~0ULL; + sa.sa_mask[0] = ~0U; + sa.sa_mask[1] = ~0U; + sa.sa_mask[2] = ~0U; + sa.sa_mask[3] = ~0U; if (fn == runtime·sighandler) fn = (void*)runtime·sigtramp; - sa.__sigaction_u.__sa_sigaction = (void*)fn; + sa._sa_u._sa_sigaction = (void*)fn; runtime·sigaction(i, &sa, nil); } + +void +runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void)) +{ + // Machine dependent mcontext initialisation for LWP. + mc->__gregs[REG_RIP] = (uint64)runtime·lwp_tramp; + mc->__gregs[REG_RSP] = (uint64)stack; + mc->__gregs[REG_R8] = (uint64)mp; + mc->__gregs[REG_R9] = (uint64)gp; + mc->__gregs[REG_R12] = (uint64)fn; +} diff --git a/src/pkg/runtime/signal_netbsd_arm.c b/src/pkg/runtime/signal_netbsd_arm.c new file mode 100644 index 000000000..97f62687b --- /dev/null +++ b/src/pkg/runtime/signal_netbsd_arm.c @@ -0,0 +1,208 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "defs_GOOS_GOARCH.h" +#include "signals_GOOS.h" +#include "os_GOOS.h" + +#define r0 __gregs[0] +#define r1 __gregs[1] +#define r2 __gregs[2] +#define r3 __gregs[3] +#define r4 __gregs[4] +#define r5 __gregs[5] +#define r6 __gregs[6] +#define r7 __gregs[7] +#define r8 __gregs[8] +#define r9 __gregs[9] +#define r10 __gregs[10] +#define r11 __gregs[11] +#define r12 __gregs[12] +#define r13 __gregs[13] +#define r14 __gregs[14] +#define r15 __gregs[15] +#define cpsr __gregs[16] + +void +runtime·dumpregs(McontextT *r) +{ + runtime·printf("r0 %x\n", r->r0); + runtime·printf("r1 %x\n", r->r1); + runtime·printf("r2 %x\n", r->r2); + runtime·printf("r3 %x\n", r->r3); + runtime·printf("r4 %x\n", r->r4); + runtime·printf("r5 %x\n", r->r5); + runtime·printf("r6 %x\n", r->r6); + runtime·printf("r7 %x\n", r->r7); + runtime·printf("r8 %x\n", r->r8); + runtime·printf("r9 %x\n", r->r9); + runtime·printf("r10 %x\n", r->r10); + runtime·printf("fp %x\n", r->r11); + runtime·printf("ip %x\n", r->r12); + runtime·printf("sp %x\n", r->r13); + runtime·printf("lr %x\n", r->r14); + runtime·printf("pc %x\n", r->r15); + runtime·printf("cpsr %x\n", r->cpsr); +} + +extern void runtime·lwp_tramp(void); +extern void runtime·sigtramp(void); + +typedef struct sigaction { + union { + void (*_sa_handler)(int32); + void (*_sa_sigaction)(int32, Siginfo*, void *); + } _sa_u; /* signal handler */ + uint32 sa_mask[4]; /* signal mask to apply */ + int32 sa_flags; /* see signal options below */ +} Sigaction; + +void +runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) +{ + UcontextT *uc; + McontextT *r; + SigTab *t; + + uc = context; + r = &uc->uc_mcontext; + + if(sig == SIGPROF) { + runtime·sigprof((uint8*)r->r15, (uint8*)r->r13, (uint8*)r->r14, gp); + return; + } + + t = &runtime·sigtab[sig]; + if(info->_code != SI_USER && (t->flags & SigPanic)) { + if(gp == nil || gp == m->g0) + goto Throw; + // Make it look like a call to the signal func. + // We have to pass arguments out of band since + // augmenting the stack frame would break + // the unwinding code. + gp->sig = sig; + gp->sigcode0 = info->_code; + gp->sigcode1 = *(uintptr*)&info->_reason[0]; /* _addr */ + gp->sigpc = r->r15; + + // We arrange lr, and pc to pretend the panicking + // function calls sigpanic directly. + // Always save LR to stack so that panics in leaf + // functions are correctly handled. This smashes + // the stack frame but we're not going back there + // anyway. + r->r13 -= 4; + *(uint32 *)r->r13 = r->r14; + // Don't bother saving PC if it's zero, which is + // probably a call to a nil func: the old link register + // is more useful in the stack trace. + if(r->r15 != 0) + r->r14 = r->r15; + // In case we are panicking from external C code + r->r10 = (uintptr)gp; + r->r9 = (uintptr)m; + r->r15 = (uintptr)runtime·sigpanic; + return; + } + + if(info->_code == SI_USER || (t->flags & SigNotify)) + if(runtime·sigsend(sig)) + return; + if(t->flags & SigKill) + runtime·exit(2); + if(!(t->flags & SigThrow)) + return; + +Throw: + runtime·startpanic(); + + if(sig < 0 || sig >= NSIG) + runtime·printf("Signal %d\n", sig); + else + runtime·printf("%s\n", runtime·sigtab[sig].name); + + runtime·printf("PC=%x\n", r->r15); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } + runtime·printf("\n"); + + if(runtime·gotraceback()){ + runtime·traceback((void*)r->r15, (void*)r->r13, (void*)r->r14, gp); + runtime·tracebackothers(gp); + runtime·printf("\n"); + runtime·dumpregs(r); + } + +// breakpoint(); + runtime·exit(2); +} + +void +runtime·signalstack(byte *p, int32 n) +{ + Sigaltstack st; + + st.ss_sp = (uint8*)p; + st.ss_size = n; + st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; + runtime·sigaltstack(&st, nil); +} + +void +runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) +{ + Sigaction sa; + + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa._sa_u._sa_sigaction == SIG_IGN) + return; + } + + runtime·memclr((byte*)&sa, sizeof sa); + sa.sa_flags = SA_SIGINFO|SA_ONSTACK; + if(restart) + sa.sa_flags |= SA_RESTART; + sa.sa_mask[0] = ~0U; + sa.sa_mask[1] = ~0U; + sa.sa_mask[2] = ~0U; + sa.sa_mask[3] = ~0U; + if (fn == runtime·sighandler) + fn = (void*)runtime·sigtramp; + sa._sa_u._sa_sigaction = (void*)fn; + runtime·sigaction(i, &sa, nil); +} + +void +runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void)) +{ + mc->r15 = (uint32)runtime·lwp_tramp; + mc->r13 = (uint32)stack; + mc->r0 = (uint32)mp; + mc->r1 = (uint32)gp; + mc->r2 = (uint32)fn; +} + +void +runtime·checkgoarm(void) +{ + // TODO(minux) +} + +#pragma textflag 7 +int64 +runtime·cputicks() { + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // TODO: need more entropy to better seed fastrand1. + return runtime·nanotime(); +} diff --git a/src/pkg/runtime/signal_openbsd_386.c b/src/pkg/runtime/signal_openbsd_386.c index 39d829484..516797c8d 100644 --- a/src/pkg/runtime/signal_openbsd_386.c +++ b/src/pkg/runtime/signal_openbsd_386.c @@ -50,7 +50,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -93,6 +93,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->sc_eip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -109,9 +113,11 @@ runtime·signalstack(byte *p, int32 n) { Sigaltstack st; - st.ss_sp = (int8*)p; + st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -120,6 +126,15 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa.__sigaction_u.__sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) diff --git a/src/pkg/runtime/signal_openbsd_amd64.c b/src/pkg/runtime/signal_openbsd_amd64.c index 8b4f624e7..0d0db770b 100644 --- a/src/pkg/runtime/signal_openbsd_amd64.c +++ b/src/pkg/runtime/signal_openbsd_amd64.c @@ -59,7 +59,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) t = &runtime·sigtab[sig]; if(info->si_code != SI_USER && (t->flags & SigPanic)) { - if(gp == nil) + if(gp == nil || gp == m->g0) goto Throw; // Make it look like a call to the signal func. // Have to pass arguments out of band since @@ -70,8 +70,8 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) gp->sigcode1 = *(uintptr*)((byte*)info + 16); /* si_addr */ gp->sigpc = r->sc_rip; - // Only push runtime·sigpanic if r->mc_rip != 0. - // If r->mc_rip == 0, probably panicked because of a + // Only push runtime·sigpanic if r->sc_rip != 0. + // If r->sc_rip == 0, probably panicked because of a // call to a nil func. Not pushing that onto sp will // make the trace look like a call to runtime·sigpanic instead. // (Otherwise the trace will end at runtime·sigpanic and we @@ -102,6 +102,10 @@ Throw: runtime·printf("%s\n", runtime·sigtab[sig].name); runtime·printf("PC=%X\n", r->sc_rip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ @@ -118,9 +122,11 @@ runtime·signalstack(byte *p, int32 n) { Sigaltstack st; - st.ss_sp = (int8*)p; + st.ss_sp = p; st.ss_size = n; st.ss_flags = 0; + if(p == nil) + st.ss_flags = SS_DISABLE; runtime·sigaltstack(&st, nil); } @@ -129,12 +135,21 @@ runtime·setsig(int32 i, void (*fn)(int32, Siginfo*, void*, G*), bool restart) { Sigaction sa; + // If SIGHUP handler is SIG_IGN, assume running + // under nohup and do not set explicit handler. + if(i == SIGHUP) { + runtime·memclr((byte*)&sa, sizeof sa); + runtime·sigaction(i, nil, &sa); + if(sa.__sigaction_u.__sa_sigaction == SIG_IGN) + return; + } + runtime·memclr((byte*)&sa, sizeof sa); sa.sa_flags = SA_SIGINFO|SA_ONSTACK; if(restart) sa.sa_flags |= SA_RESTART; - sa.sa_mask = ~0ULL; - if (fn == runtime·sighandler) + sa.sa_mask = ~0U; + if(fn == runtime·sighandler) fn = (void*)runtime·sigtramp; sa.__sigaction_u.__sa_sigaction = (void*)fn; runtime·sigaction(i, &sa, nil); diff --git a/src/pkg/runtime/signal_plan9_386.c b/src/pkg/runtime/signal_plan9_386.c index d26688516..17bc11749 100644 --- a/src/pkg/runtime/signal_plan9_386.c +++ b/src/pkg/runtime/signal_plan9_386.c @@ -3,6 +3,107 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "defs_GOOS_GOARCH.h" +#include "os_GOOS.h" +#include "signals_GOOS.h" + +void +runtime·dumpregs(Ureg *u) +{ + runtime·printf("ax %X\n", u->ax); + runtime·printf("bx %X\n", u->bx); + runtime·printf("cx %X\n", u->cx); + runtime·printf("dx %X\n", u->dx); + runtime·printf("di %X\n", u->di); + runtime·printf("si %X\n", u->si); + runtime·printf("bp %X\n", u->bp); + runtime·printf("sp %X\n", u->sp); + runtime·printf("pc %X\n", u->pc); + runtime·printf("flags %X\n", u->flags); + runtime·printf("cs %X\n", u->cs); + runtime·printf("fs %X\n", u->fs); + runtime·printf("gs %X\n", u->gs); +} + +int32 +runtime·sighandler(void *v, int8 *s, G *gp) +{ + Ureg *ureg; + uintptr *sp; + SigTab *sig, *nsig; + int32 len, i; + + if(!s) + return NCONT; + + len = runtime·findnull((byte*)s); + if(len <= 4 || runtime·mcmp((byte*)s, (byte*)"sys:", 4) != 0) + return NDFLT; + + nsig = nil; + sig = runtime·sigtab; + for(i=0; i < NSIG; i++) { + if(runtime·strstr((byte*)s, (byte*)sig->name)) { + nsig = sig; + break; + } + sig++; + } + + if(nsig == nil) + return NDFLT; + + ureg = v; + if(nsig->flags & SigPanic) { + if(gp == nil || m->notesig == 0) + goto Throw; + + // Save error string from sigtramp's stack, + // into gsignal->sigcode0, so we can reliably + // access it from the panic routines. + if(len > ERRMAX) + len = ERRMAX; + runtime·memmove((void*)m->notesig, (void*)s, len); + + gp->sig = i; + gp->sigpc = ureg->pc; + + // Only push runtime·sigpanic if ureg->pc != 0. + // If ureg->pc == 0, probably panicked because of a + // call to a nil func. Not pushing that onto sp will + // make the trace look like a call to runtime·sigpanic instead. + // (Otherwise the trace will end at runtime·sigpanic and we + // won't get to see who faulted.) + if(ureg->pc != 0) { + sp = (uintptr*)ureg->sp; + *--sp = ureg->pc; + ureg->sp = (uint32)sp; + } + ureg->pc = (uintptr)runtime·sigpanic; + return NCONT; + } + + if(!(nsig->flags & SigThrow)) + return NDFLT; + +Throw: + runtime·startpanic(); + + runtime·printf("%s\n", s); + runtime·printf("PC=%X\n", ureg->pc); + runtime·printf("\n"); + + if(runtime·gotraceback()) { + runtime·traceback((void*)ureg->pc, (void*)ureg->sp, 0, gp); + runtime·tracebackothers(gp); + runtime·dumpregs(ureg); + } + runtime·goexitsall(""); + runtime·exits(s); + + return 0; +} + void runtime·sigenable(uint32 sig) diff --git a/src/pkg/runtime/signal_plan9_amd64.c b/src/pkg/runtime/signal_plan9_amd64.c new file mode 100644 index 000000000..e4f946abc --- /dev/null +++ b/src/pkg/runtime/signal_plan9_amd64.c @@ -0,0 +1,127 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "defs_GOOS_GOARCH.h" +#include "os_GOOS.h" +#include "signals_GOOS.h" + +void +runtime·dumpregs(Ureg *u) +{ + runtime·printf("ax %X\n", u->ax); + runtime·printf("bx %X\n", u->bx); + runtime·printf("cx %X\n", u->cx); + runtime·printf("dx %X\n", u->dx); + runtime·printf("di %X\n", u->di); + runtime·printf("si %X\n", u->si); + runtime·printf("bp %X\n", u->bp); + runtime·printf("sp %X\n", u->sp); + runtime·printf("r8 %X\n", u->r8); + runtime·printf("r9 %X\n", u->r9); + runtime·printf("r10 %X\n", u->r10); + runtime·printf("r11 %X\n", u->r11); + runtime·printf("r12 %X\n", u->r12); + runtime·printf("r13 %X\n", u->r13); + runtime·printf("r14 %X\n", u->r14); + runtime·printf("r15 %X\n", u->r15); + runtime·printf("ip %X\n", u->ip); + runtime·printf("flags %X\n", u->flags); + runtime·printf("cs %X\n", (uint64)u->cs); + runtime·printf("fs %X\n", (uint64)u->fs); + runtime·printf("gs %X\n", (uint64)u->gs); +} + +int32 +runtime·sighandler(void *v, int8 *s, G *gp) +{ + Ureg *ureg; + uintptr *sp; + SigTab *sig, *nsig; + int32 len, i; + + if(!s) + return NCONT; + + len = runtime·findnull((byte*)s); + if(len <= 4 || runtime·mcmp((byte*)s, (byte*)"sys:", 4) != 0) + return NDFLT; + + nsig = nil; + sig = runtime·sigtab; + for(i=0; i < NSIG; i++) { + if(runtime·strstr((byte*)s, (byte*)sig->name)) { + nsig = sig; + break; + } + sig++; + } + + if(nsig == nil) + return NDFLT; + + ureg = v; + if(nsig->flags & SigPanic) { + if(gp == nil || m->notesig == 0) + goto Throw; + + // Save error string from sigtramp's stack, + // into gsignal->sigcode0, so we can reliably + // access it from the panic routines. + if(len > ERRMAX) + len = ERRMAX; + runtime·memmove((void*)m->notesig, (void*)s, len); + + gp->sig = i; + gp->sigpc = ureg->ip; + + // Only push runtime·sigpanic if ureg->ip != 0. + // If ureg->ip == 0, probably panicked because of a + // call to a nil func. Not pushing that onto sp will + // make the trace look like a call to runtime·sigpanic instead. + // (Otherwise the trace will end at runtime·sigpanic and we + // won't get to see who faulted.) + if(ureg->ip != 0) { + sp = (uintptr*)ureg->sp; + *--sp = ureg->ip; + ureg->sp = (uint64)sp; + } + ureg->ip = (uintptr)runtime·sigpanic; + return NCONT; + } + + if(!(nsig->flags & SigThrow)) + return NDFLT; + +Throw: + runtime·startpanic(); + + runtime·printf("%s\n", s); + runtime·printf("PC=%X\n", ureg->ip); + runtime·printf("\n"); + + if(runtime·gotraceback()) { + runtime·traceback((void*)ureg->ip, (void*)ureg->sp, 0, gp); + runtime·tracebackothers(gp); + runtime·dumpregs(ureg); + } + runtime·goexitsall(""); + runtime·exits(s); + + return 0; +} + +void +runtime·sigenable(uint32 sig) +{ + USED(sig); +} + +void +runtime·resetcpuprofiler(int32 hz) +{ + // TODO: Enable profiling interrupts. + + m->profilehz = hz; +} diff --git a/src/pkg/runtime/signal_windows_386.c b/src/pkg/runtime/signal_windows_386.c index a248374db..d76d5bf4b 100644 --- a/src/pkg/runtime/signal_windows_386.c +++ b/src/pkg/runtime/signal_windows_386.c @@ -68,11 +68,15 @@ runtime·sighandler(ExceptionRecord *info, Context *r, G *gp) info->ExceptionInformation[0], info->ExceptionInformation[1]); runtime·printf("PC=%x\n", r->Eip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ - runtime·traceback((void*)r->Eip, (void*)r->Esp, 0, m->curg); - runtime·tracebackothers(m->curg); + runtime·traceback((void*)r->Eip, (void*)r->Esp, 0, gp); + runtime·tracebackothers(gp); runtime·dumpregs(r); } diff --git a/src/pkg/runtime/signal_windows_amd64.c b/src/pkg/runtime/signal_windows_amd64.c index 1cdf1cac4..3729aa57b 100644 --- a/src/pkg/runtime/signal_windows_amd64.c +++ b/src/pkg/runtime/signal_windows_amd64.c @@ -75,6 +75,10 @@ runtime·sighandler(ExceptionRecord *info, Context *r, G *gp) info->ExceptionInformation[0], info->ExceptionInformation[1]); runtime·printf("PC=%X\n", r->Rip); + if(m->lockedg != nil && m->ncgo > 0 && gp == m->g0) { + runtime·printf("signal arrived during cgo execution\n"); + gp = m->lockedg; + } runtime·printf("\n"); if(runtime·gotraceback()){ diff --git a/src/pkg/runtime/signals_linux.h b/src/pkg/runtime/signals_linux.h index 345a6c5d1..9c3567007 100644 --- a/src/pkg/runtime/signals_linux.h +++ b/src/pkg/runtime/signals_linux.h @@ -42,7 +42,7 @@ SigTab runtime·sigtab[] = { /* 30 */ N, "SIGPWR: power failure restart", /* 31 */ N, "SIGSYS: bad system call", /* 32 */ N, "signal 32", - /* 33 */ N, "signal 33", + /* 33 */ 0, "signal 33", /* SIGSETXID; see issue 3871 */ /* 34 */ N, "signal 34", /* 35 */ N, "signal 35", /* 36 */ N, "signal 36", diff --git a/src/pkg/runtime/signals_netbsd.h b/src/pkg/runtime/signals_netbsd.h index 4d27e050d..7140de86f 100644 --- a/src/pkg/runtime/signals_netbsd.h +++ b/src/pkg/runtime/signals_netbsd.h @@ -9,16 +9,16 @@ #define D SigDefault SigTab runtime·sigtab[] = { - /* 0 */ 0, "SIGNONE: no trap", - /* 1 */ N+K, "SIGHUP: terminal line hangup", - /* 2 */ N+K, "SIGINT: interrupt", - /* 3 */ N+T, "SIGQUIT: quit", - /* 4 */ T, "SIGILL: illegal instruction", - /* 5 */ T, "SIGTRAP: trace trap", - /* 6 */ N+T, "SIGABRT: abort", - /* 7 */ T, "SIGEMT: emulate instruction executed", - /* 8 */ P, "SIGFPE: floating-point exception", - /* 9 */ 0, "SIGKILL: kill", + /* 0 */ 0, "SIGNONE: no trap", + /* 1 */ N+K, "SIGHUP: terminal line hangup", + /* 2 */ N+K, "SIGINT: interrupt", + /* 3 */ N+T, "SIGQUIT: quit", + /* 4 */ T, "SIGILL: illegal instruction", + /* 5 */ T, "SIGTRAP: trace trap", + /* 6 */ N+T, "SIGABRT: abort", + /* 7 */ T, "SIGEMT: emulate instruction executed", + /* 8 */ P, "SIGFPE: floating-point exception", + /* 9 */ 0, "SIGKILL: kill", /* 10 */ P, "SIGBUS: bus error", /* 11 */ P, "SIGSEGV: segmentation violation", /* 12 */ T, "SIGSYS: bad system call", diff --git a/src/pkg/runtime/signals_openbsd.h b/src/pkg/runtime/signals_openbsd.h index 4d27e050d..7140de86f 100644 --- a/src/pkg/runtime/signals_openbsd.h +++ b/src/pkg/runtime/signals_openbsd.h @@ -9,16 +9,16 @@ #define D SigDefault SigTab runtime·sigtab[] = { - /* 0 */ 0, "SIGNONE: no trap", - /* 1 */ N+K, "SIGHUP: terminal line hangup", - /* 2 */ N+K, "SIGINT: interrupt", - /* 3 */ N+T, "SIGQUIT: quit", - /* 4 */ T, "SIGILL: illegal instruction", - /* 5 */ T, "SIGTRAP: trace trap", - /* 6 */ N+T, "SIGABRT: abort", - /* 7 */ T, "SIGEMT: emulate instruction executed", - /* 8 */ P, "SIGFPE: floating-point exception", - /* 9 */ 0, "SIGKILL: kill", + /* 0 */ 0, "SIGNONE: no trap", + /* 1 */ N+K, "SIGHUP: terminal line hangup", + /* 2 */ N+K, "SIGINT: interrupt", + /* 3 */ N+T, "SIGQUIT: quit", + /* 4 */ T, "SIGILL: illegal instruction", + /* 5 */ T, "SIGTRAP: trace trap", + /* 6 */ N+T, "SIGABRT: abort", + /* 7 */ T, "SIGEMT: emulate instruction executed", + /* 8 */ P, "SIGFPE: floating-point exception", + /* 9 */ 0, "SIGKILL: kill", /* 10 */ P, "SIGBUS: bus error", /* 11 */ P, "SIGSEGV: segmentation violation", /* 12 */ T, "SIGSYS: bad system call", diff --git a/src/pkg/runtime/signals_plan9.h b/src/pkg/runtime/signals_plan9.h index 5df757613..0f1165e2a 100644 --- a/src/pkg/runtime/signals_plan9.h +++ b/src/pkg/runtime/signals_plan9.h @@ -1 +1,24 @@ -// nothing to see here +#define N SigNotify +#define T SigThrow +#define P SigPanic + +SigTab runtime·sigtab[] = { + P, "sys: fp:", + + // Go libraries expect to be able + // to recover from memory + // read/write errors, so we flag + // those as panics. All other traps + // are generally more serious and + // should immediately throw an + // exception. + P, "sys: trap: fault read addr", + P, "sys: trap: fault write addr", + T, "sys: trap:", + + N, "sys: bad sys call", +}; + +#undef N +#undef T +#undef P diff --git a/src/pkg/runtime/sigqueue.goc b/src/pkg/runtime/sigqueue.goc index b49fdba86..ab5f312e4 100644 --- a/src/pkg/runtime/sigqueue.goc +++ b/src/pkg/runtime/sigqueue.goc @@ -5,36 +5,24 @@ // This file implements runtime support for signal handling. // // Most synchronization primitives are not available from -// the signal handler (it cannot block and cannot use locks) +// the signal handler (it cannot block, allocate memory, or use locks) // so the handler communicates with a processing goroutine // via struct sig, below. // -// Ownership for sig.Note passes back and forth between -// the signal handler and the signal goroutine in rounds. -// The initial state is that sig.note is cleared (setup by signal_enable). -// At the beginning of each round, mask == 0. -// The round goes through three stages: -// -// (In parallel) -// 1a) One or more signals arrive and are handled -// by sigsend using cas to set bits in sig.mask. -// The handler that changes sig.mask from zero to non-zero -// calls notewakeup(&sig). -// 1b) Sigrecv calls notesleep(&sig) to wait for the wakeup. -// -// 2) Having received the wakeup, sigrecv knows that sigsend -// will not send another wakeup, so it can noteclear(&sig) -// to prepare for the next round. (Sigsend may still be adding -// signals to sig.mask at this point, which is fine.) -// -// 3) Sigrecv uses cas to grab the current sig.mask and zero it, -// triggering the next round. -// -// The signal handler takes ownership of the note by atomically -// changing mask from a zero to non-zero value. It gives up -// ownership by calling notewakeup. The signal goroutine takes -// ownership by returning from notesleep (caused by the notewakeup) -// and gives up ownership by clearing mask. +// sigsend() is called by the signal handler to queue a new signal. +// signal_recv() is called by the Go program to receive a newly queued signal. +// Synchronization between sigsend() and signal_recv() is based on the sig.state +// variable. It can be in 3 states: 0, HASWAITER and HASSIGNAL. +// HASWAITER means that signal_recv() is blocked on sig.Note and there are no +// new pending signals. +// HASSIGNAL means that sig.mask *may* contain new pending signals, +// signal_recv() can't be blocked in this state. +// 0 means that there are no new pending signals and signal_recv() is not blocked. +// Transitions between states are done atomically with CAS. +// When signal_recv() is unblocked, it resets sig.Note and rechecks sig.mask. +// If several sigsend()'s and signal_recv() execute concurrently, it can lead to +// unnecessary rechecks of sig.mask, but must not lead to missed signals +// nor deadlocks. package runtime #include "runtime.h" @@ -45,15 +33,20 @@ static struct { Note; uint32 mask[(NSIG+31)/32]; uint32 wanted[(NSIG+31)/32]; - uint32 kick; + uint32 state; bool inuse; } sig; +enum { + HASWAITER = 1, + HASSIGNAL = 2, +}; + // Called from sighandler to send a signal back out of the signal handling thread. bool runtime·sigsend(int32 s) { - uint32 bit, mask; + uint32 bit, mask, old, new; if(!sig.inuse || s < 0 || s >= 32*nelem(sig.wanted) || !(sig.wanted[s/32]&(1U<<(s&31)))) return false; @@ -65,8 +58,20 @@ runtime·sigsend(int32 s) if(runtime·cas(&sig.mask[s/32], mask, mask|bit)) { // Added to queue. // Only send a wakeup if the receiver needs a kick. - if(runtime·cas(&sig.kick, 1, 0)) - runtime·notewakeup(&sig); + for(;;) { + old = runtime·atomicload(&sig.state); + if(old == HASSIGNAL) + break; + if(old == HASWAITER) + new = 0; + else // if(old == 0) + new = HASSIGNAL; + if(runtime·cas(&sig.state, old, new)) { + if (old == HASWAITER) + runtime·notewakeup(&sig); + break; + } + } break; } } @@ -77,7 +82,7 @@ runtime·sigsend(int32 s) // Must only be called from a single goroutine at a time. func signal_recv() (m uint32) { static uint32 recv[nelem(sig.mask)]; - int32 i, more; + uint32 i, old, new; for(;;) { // Serve from local copy if there are bits left. @@ -89,15 +94,27 @@ func signal_recv() (m uint32) { } } - // Get a new local copy. - // Ask for a kick if more signals come in - // during or after our check (before the sleep). - if(sig.kick == 0) { - runtime·noteclear(&sig); - runtime·cas(&sig.kick, 0, 1); + // Check and update sig.state. + for(;;) { + old = runtime·atomicload(&sig.state); + if(old == HASWAITER) + runtime·throw("inconsistent state in signal_recv"); + if(old == HASSIGNAL) + new = 0; + else // if(old == 0) + new = HASWAITER; + if(runtime·cas(&sig.state, old, new)) { + if (new == HASWAITER) { + runtime·entersyscallblock(); + runtime·notesleep(&sig); + runtime·exitsyscall(); + runtime·noteclear(&sig); + } + break; + } } - more = 0; + // Get a new local copy. for(i=0; i<nelem(sig.mask); i++) { for(;;) { m = sig.mask[i]; @@ -105,16 +122,7 @@ func signal_recv() (m uint32) { break; } recv[i] = m; - if(m != 0) - more = 1; } - if(more) - continue; - - // Sleep waiting for more. - runtime·entersyscall(); - runtime·notesleep(&sig); - runtime·exitsyscall(); } done:; diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index e5726c93b..354c54c86 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -5,22 +5,30 @@ #include "runtime.h" #include "arch_GOARCH.h" #include "type.h" +#include "typekind.h" #include "malloc.h" +#include "race.h" -static int32 debug = 0; +static bool debug = 0; -static void makeslice1(SliceType*, int32, int32, Slice*); -static void growslice1(SliceType*, Slice, int32, Slice *); - void runtime·copy(Slice to, Slice fm, uintptr width, int32 ret); +static void makeslice1(SliceType*, intgo, intgo, Slice*); +static void growslice1(SliceType*, Slice, intgo, Slice *); + void runtime·copy(Slice to, Slice fm, uintptr width, intgo ret); // see also unsafe·NewArray // makeslice(typ *Type, len, cap int64) (ary []any); void runtime·makeslice(SliceType *t, int64 len, int64 cap, Slice ret) { - if(len < 0 || (int32)len != len) + // NOTE: The len > MaxMem/elemsize check here is not strictly necessary, + // but it produces a 'len out of range' error instead of a 'cap out of range' error + // when someone does make([]T, bignumber). 'cap out of range' is true too, + // but since the cap is only being supplied implicitly, saying len is clearer. + // See issue 4085. + if(len < 0 || (intgo)len != len || t->elem->size > 0 && len > MaxMem / t->elem->size) runtime·panicstring("makeslice: len out of range"); - if(cap < len || (int32)cap != cap || t->elem->size > 0 && cap > ((uintptr)-1) / t->elem->size) + + if(cap < len || (intgo)cap != cap || t->elem->size > 0 && cap > MaxMem / t->elem->size) runtime·panicstring("makeslice: cap out of range"); makeslice1(t, len, cap, &ret); @@ -35,10 +43,10 @@ runtime·makeslice(SliceType *t, int64 len, int64 cap, Slice ret) // Dummy word to use as base pointer for make([]T, 0). // Since you cannot take the address of such a slice, // you can't tell that they all have the same base pointer. -static uintptr zerobase; +uintptr runtime·zerobase; static void -makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret) +makeslice1(SliceType *t, intgo len, intgo cap, Slice *ret) { uintptr size; @@ -47,22 +55,34 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret) ret->len = len; ret->cap = cap; - if(cap == 0) - ret->array = (byte*)&zerobase; + if(size == 0) + ret->array = (byte*)&runtime·zerobase; else if((t->elem->kind&KindNoPointers)) ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1); - else - ret->array = runtime·mal(size); + else { + ret->array = runtime·mallocgc(size, 0, 1, 1); + + if(UseSpanType) { + if(false) { + runtime·printf("new slice [%D]%S: %p\n", (int64)cap, *t->elem->string, ret->array); + } + runtime·settype(ret->array, (uintptr)t->elem | TypeInfo_Array); + } + } } // appendslice(type *Type, x, y, []T) []T +#pragma textflag 7 void runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret) { - int32 m; + intgo m; uintptr w; + void *pc; + uint8 *p, *q; m = x.len+y.len; + w = t->elem->size; if(m < x.len) runtime·throw("append: slice overflow"); @@ -72,30 +92,83 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret) else ret = x; - w = t->elem->size; - runtime·memmove(ret.array + ret.len*w, y.array, y.len*w); + if(raceenabled) { + // Don't mark read/writes on the newly allocated slice. + pc = runtime·getcallerpc(&t); + // read x[:len] + if(m > x.cap) + runtime·racereadrangepc(x.array, x.len*w, w, pc, runtime·appendslice); + // read y + runtime·racereadrangepc(y.array, y.len*w, w, pc, runtime·appendslice); + // write x[len(x):len(x)+len(y)] + if(m <= x.cap) + runtime·racewriterangepc(ret.array+ret.len*w, y.len*w, w, pc, runtime·appendslice); + } + + // A very common case is appending bytes. Small appends can avoid the overhead of memmove. + // We can generalize a bit here, and just pick small-sized appends. + p = ret.array+ret.len*w; + q = y.array; + w *= y.len; + if(w <= appendCrossover) { + if(p <= q || w <= p-q) // No overlap. + while(w-- > 0) + *p++ = *q++; + else { + p += w; + q += w; + while(w-- > 0) + *--p = *--q; + } + } else { + runtime·memmove(p, q, w); + } ret.len += y.len; FLUSH(&ret); } // appendstr([]byte, string) []byte +#pragma textflag 7 void runtime·appendstr(SliceType *t, Slice x, String y, Slice ret) { - int32 m; + intgo m; + void *pc; + uintptr w; + uint8 *p, *q; m = x.len+y.len; if(m < x.len) - runtime·throw("append: slice overflow"); + runtime·throw("append: string overflow"); if(m > x.cap) growslice1(t, x, m, &ret); else ret = x; - runtime·memmove(ret.array + ret.len, y.str, y.len); + if(raceenabled) { + // Don't mark read/writes on the newly allocated slice. + pc = runtime·getcallerpc(&t); + // read x[:len] + if(m > x.cap) + runtime·racereadrangepc(x.array, x.len, 1, pc, runtime·appendstr); + // write x[len(x):len(x)+len(y)] + if(m <= x.cap) + runtime·racewriterangepc(ret.array+ret.len, y.len, 1, pc, runtime·appendstr); + } + + // Small appends can avoid the overhead of memmove. + w = y.len; + p = ret.array+ret.len; + q = y.str; + if(w <= appendCrossover) { + while(w-- > 0) + *p++ = *q++; + } else { + runtime·memmove(p, q, w); + } ret.len += y.len; FLUSH(&ret); } @@ -106,15 +179,21 @@ void runtime·growslice(SliceType *t, Slice old, int64 n, Slice ret) { int64 cap; + void *pc; if(n < 1) runtime·panicstring("growslice: invalid n"); cap = old.cap + n; - if((int32)cap != cap || cap > ((uintptr)-1) / t->elem->size) + if((intgo)cap != cap || cap < old.cap || (t->elem->size > 0 && cap > MaxMem/t->elem->size)) runtime·panicstring("growslice: cap out of range"); + if(raceenabled) { + pc = runtime·getcallerpc(&t); + runtime·racereadrangepc(old.array, old.len*t->elem->size, t->elem->size, pc, runtime·growslice); + } + growslice1(t, old, cap, &ret); FLUSH(&ret); @@ -128,12 +207,17 @@ runtime·growslice(SliceType *t, Slice old, int64 n, Slice ret) } static void -growslice1(SliceType *t, Slice x, int32 newcap, Slice *ret) +growslice1(SliceType *t, Slice x, intgo newcap, Slice *ret) { - int32 m; + intgo m; m = x.cap; - if(m == 0) + + // Using newcap directly for m+m < newcap handles + // both the case where m == 0 and also the case where + // m+m/4 wraps around, in which case the loop + // below might never terminate. + if(m+m < newcap) m = newcap; else { do { @@ -147,153 +231,13 @@ growslice1(SliceType *t, Slice x, int32 newcap, Slice *ret) runtime·memmove(ret->array, x.array, ret->len * t->elem->size); } -// sliceslice(old []any, lb uint64, hb uint64, width uint64) (ary []any); -void -runtime·sliceslice(Slice old, uint64 lb, uint64 hb, uint64 width, Slice ret) -{ - if(hb > old.cap || lb > hb) { - if(debug) { - runtime·prints("runtime.sliceslice: old="); - runtime·printslice(old); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; hb="); - runtime·printint(hb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("\n"); - - runtime·prints("oldarray: nel="); - runtime·printint(old.len); - runtime·prints("; cap="); - runtime·printint(old.cap); - runtime·prints("\n"); - } - runtime·panicslice(); - } - - // new array is inside old array - ret.len = hb - lb; - ret.cap = old.cap - lb; - ret.array = old.array + lb*width; - - FLUSH(&ret); - - if(debug) { - runtime·prints("runtime.sliceslice: old="); - runtime·printslice(old); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; hb="); - runtime·printint(hb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("; ret="); - runtime·printslice(ret); - runtime·prints("\n"); - } -} - -// sliceslice1(old []any, lb uint64, width uint64) (ary []any); -void -runtime·sliceslice1(Slice old, uint64 lb, uint64 width, Slice ret) -{ - if(lb > old.len) { - if(debug) { - runtime·prints("runtime.sliceslice: old="); - runtime·printslice(old); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("\n"); - - runtime·prints("oldarray: nel="); - runtime·printint(old.len); - runtime·prints("; cap="); - runtime·printint(old.cap); - runtime·prints("\n"); - } - runtime·panicslice(); - } - - // new array is inside old array - ret.len = old.len - lb; - ret.cap = old.cap - lb; - ret.array = old.array + lb*width; - - FLUSH(&ret); - - if(debug) { - runtime·prints("runtime.sliceslice: old="); - runtime·printslice(old); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("; ret="); - runtime·printslice(ret); - runtime·prints("\n"); - } -} - -// slicearray(old *any, nel uint64, lb uint64, hb uint64, width uint64) (ary []any); +// copy(to any, fr any, wid uintptr) int +#pragma textflag 7 void -runtime·slicearray(byte* old, uint64 nel, uint64 lb, uint64 hb, uint64 width, Slice ret) +runtime·copy(Slice to, Slice fm, uintptr width, intgo ret) { - if(nel > 0 && old == nil) { - // crash if old == nil. - // could give a better message - // but this is consistent with all the in-line checks - // that the compiler inserts for other uses. - *old = 0; - } - - if(hb > nel || lb > hb) { - if(debug) { - runtime·prints("runtime.slicearray: old="); - runtime·printpointer(old); - runtime·prints("; nel="); - runtime·printint(nel); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; hb="); - runtime·printint(hb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("\n"); - } - runtime·panicslice(); - } - - // new array is inside old array - ret.len = hb-lb; - ret.cap = nel-lb; - ret.array = old + lb*width; - - FLUSH(&ret); - - if(debug) { - runtime·prints("runtime.slicearray: old="); - runtime·printpointer(old); - runtime·prints("; nel="); - runtime·printint(nel); - runtime·prints("; lb="); - runtime·printint(lb); - runtime·prints("; hb="); - runtime·printint(hb); - runtime·prints("; width="); - runtime·printint(width); - runtime·prints("; ret="); - runtime·printslice(ret); - runtime·prints("\n"); - } -} + void *pc; -// copy(to any, fr any, wid uint32) int -void -runtime·copy(Slice to, Slice fm, uintptr width, int32 ret) -{ if(fm.len == 0 || to.len == 0 || width == 0) { ret = 0; goto out; @@ -303,6 +247,12 @@ runtime·copy(Slice to, Slice fm, uintptr width, int32 ret) if(to.len < ret) ret = to.len; + if(raceenabled) { + pc = runtime·getcallerpc(&to); + runtime·racewriterangepc(to.array, ret*width, width, pc, runtime·copy); + runtime·racereadrangepc(fm.array, ret*width, width, pc, runtime·copy); + } + if(ret == 1 && width == 1) { // common case worth about 2x to do here *to.array = *fm.array; // known to be a byte pointer } else { @@ -325,9 +275,12 @@ out: } } +#pragma textflag 7 void -runtime·slicestringcopy(Slice to, String fm, int32 ret) +runtime·slicestringcopy(Slice to, String fm, intgo ret) { + void *pc; + if(fm.len == 0 || to.len == 0) { ret = 0; goto out; @@ -337,6 +290,11 @@ runtime·slicestringcopy(Slice to, String fm, int32 ret) if(to.len < ret) ret = to.len; + if(raceenabled) { + pc = runtime·getcallerpc(&to); + runtime·racewriterangepc(to.array, ret, 1, pc, runtime·slicestringcopy); + } + runtime·memmove(to.array, fm.str, ret); out: diff --git a/src/pkg/runtime/softfloat_arm.c b/src/pkg/runtime/softfloat_arm.c index bd73cb15b..9a5440630 100644 --- a/src/pkg/runtime/softfloat_arm.c +++ b/src/pkg/runtime/softfloat_arm.c @@ -420,6 +420,23 @@ stage3: // regd, regm are 4bit variables regd, regm, m->freghi[regd], m->freglo[regd]); break; + case 0xeeb00bc0: // D[regd] = abs D[regm] + m->freglo[regd] = m->freglo[regm]; + m->freghi[regd] = m->freghi[regm] & ((1<<31)-1); + + if(trace) + runtime·printf("*** D[%d] = abs D[%d] %x-%x\n", + regd, regm, m->freghi[regd], m->freglo[regd]); + break; + + case 0xeeb00ac0: // F[regd] = abs F[regm] + m->freglo[regd] = m->freglo[regm] & ((1<<31)-1); + + if(trace) + runtime·printf("*** F[%d] = abs F[%d] %x\n", + regd, regm, m->freglo[regd]); + break; + case 0xeeb40bc0: // D[regd] :: D[regm] (CMPD) runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan); m->fflag = fstatus(nan, cmp); diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c new file mode 100644 index 000000000..e9a35672f --- /dev/null +++ b/src/pkg/runtime/stack.c @@ -0,0 +1,282 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "arch_GOARCH.h" +#include "malloc.h" +#include "stack.h" + +typedef struct StackCacheNode StackCacheNode; +struct StackCacheNode +{ + StackCacheNode *next; + void* batch[StackCacheBatch-1]; +}; + +static StackCacheNode *stackcache; +static Lock stackcachemu; + +// stackcacherefill/stackcacherelease implement a global cache of stack segments. +// The cache is required to prevent unlimited growth of per-thread caches. +static void +stackcacherefill(void) +{ + StackCacheNode *n; + int32 i, pos; + + runtime·lock(&stackcachemu); + n = stackcache; + if(n) + stackcache = n->next; + runtime·unlock(&stackcachemu); + if(n == nil) { + n = (StackCacheNode*)runtime·SysAlloc(FixedStack*StackCacheBatch); + if(n == nil) + runtime·throw("out of memory (stackcacherefill)"); + runtime·xadd64(&mstats.stacks_sys, FixedStack*StackCacheBatch); + for(i = 0; i < StackCacheBatch-1; i++) + n->batch[i] = (byte*)n + (i+1)*FixedStack; + } + pos = m->stackcachepos; + for(i = 0; i < StackCacheBatch-1; i++) { + m->stackcache[pos] = n->batch[i]; + pos = (pos + 1) % StackCacheSize; + } + m->stackcache[pos] = n; + pos = (pos + 1) % StackCacheSize; + m->stackcachepos = pos; + m->stackcachecnt += StackCacheBatch; +} + +static void +stackcacherelease(void) +{ + StackCacheNode *n; + uint32 i, pos; + + pos = (m->stackcachepos - m->stackcachecnt) % StackCacheSize; + n = (StackCacheNode*)m->stackcache[pos]; + pos = (pos + 1) % StackCacheSize; + for(i = 0; i < StackCacheBatch-1; i++) { + n->batch[i] = m->stackcache[pos]; + pos = (pos + 1) % StackCacheSize; + } + m->stackcachecnt -= StackCacheBatch; + runtime·lock(&stackcachemu); + n->next = stackcache; + stackcache = n; + runtime·unlock(&stackcachemu); +} + +void* +runtime·stackalloc(uint32 n) +{ + uint32 pos; + void *v; + + // Stackalloc must be called on scheduler stack, so that we + // never try to grow the stack during the code that stackalloc runs. + // Doing so would cause a deadlock (issue 1547). + if(g != m->g0) + runtime·throw("stackalloc not on scheduler stack"); + + // Stack allocator uses malloc/free most of the time, + // but if we're in the middle of malloc and need stack, + // we have to do something else to avoid deadlock. + // In that case, we fall back on a fixed-size free-list + // allocator, assuming that inside malloc all the stack + // frames are small, so that all the stack allocations + // will be a single size, the minimum (right now, 5k). + if(n == FixedStack || m->mallocing || m->gcing) { + if(n != FixedStack) { + runtime·printf("stackalloc: in malloc, size=%d want %d\n", FixedStack, n); + runtime·throw("stackalloc"); + } + if(m->stackcachecnt == 0) + stackcacherefill(); + pos = m->stackcachepos; + pos = (pos - 1) % StackCacheSize; + v = m->stackcache[pos]; + m->stackcachepos = pos; + m->stackcachecnt--; + m->stackinuse++; + return v; + } + return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0); +} + +void +runtime·stackfree(void *v, uintptr n) +{ + uint32 pos; + + if(n == FixedStack || m->mallocing || m->gcing) { + if(m->stackcachecnt == StackCacheSize) + stackcacherelease(); + pos = m->stackcachepos; + m->stackcache[pos] = v; + m->stackcachepos = (pos + 1) % StackCacheSize; + m->stackcachecnt++; + m->stackinuse--; + return; + } + runtime·free(v); +} + +// Called from runtime·lessstack when returning from a function which +// allocated a new stack segment. The function's return value is in +// m->cret. +void +runtime·oldstack(void) +{ + Stktop *top; + Gobuf label; + uint32 argsize; + uintptr cret; + byte *sp, *old; + uintptr *src, *dst, *dstend; + G *gp; + int64 goid; + +//printf("oldstack m->cret=%p\n", m->cret); + + gp = m->curg; + top = (Stktop*)gp->stackbase; + old = (byte*)gp->stackguard - StackGuard; + sp = (byte*)top; + argsize = top->argsize; + if(argsize > 0) { + sp -= argsize; + dst = (uintptr*)top->argp; + dstend = dst + argsize/sizeof(*dst); + src = (uintptr*)sp; + while(dst < dstend) + *dst++ = *src++; + } + goid = top->gobuf.g->goid; // fault if g is bad, before gogo + USED(goid); + + label = top->gobuf; + gp->stackbase = (uintptr)top->stackbase; + gp->stackguard = (uintptr)top->stackguard; + if(top->free != 0) + runtime·stackfree(old, top->free); + + cret = m->cret; + m->cret = 0; // drop reference + runtime·gogo(&label, cret); +} + +// Called from reflect·call or from runtime·morestack when a new +// stack segment is needed. Allocate a new stack big enough for +// m->moreframesize bytes, copy m->moreargsize bytes to the new frame, +// and then act as though runtime·lessstack called the function at +// m->morepc. +void +runtime·newstack(void) +{ + int32 framesize, minalloc, argsize; + Stktop *top; + byte *stk, *sp; + uintptr *src, *dst, *dstend; + G *gp; + Gobuf label; + bool reflectcall; + uintptr free; + + framesize = m->moreframesize; + argsize = m->moreargsize; + gp = m->curg; + + if(m->morebuf.sp < gp->stackguard - StackGuard) { + runtime·printf("runtime: split stack overflow: %p < %p\n", m->morebuf.sp, gp->stackguard - StackGuard); + runtime·throw("runtime: split stack overflow"); + } + if(argsize % sizeof(uintptr) != 0) { + runtime·printf("runtime: stack split with misaligned argsize %d\n", argsize); + runtime·throw("runtime: stack split argsize"); + } + + minalloc = 0; + reflectcall = framesize==1; + if(reflectcall) { + framesize = 0; + // moreframesize_minalloc is only set in runtime·gc(), + // that calls newstack via reflect·call(). + minalloc = m->moreframesize_minalloc; + m->moreframesize_minalloc = 0; + if(framesize < minalloc) + framesize = minalloc; + } + + if(reflectcall && minalloc == 0 && m->morebuf.sp - sizeof(Stktop) - argsize - 32 > gp->stackguard) { + // special case: called from reflect.call (framesize==1) + // to call code with an arbitrary argument size, + // and we have enough space on the current stack. + // the new Stktop* is necessary to unwind, but + // we don't need to create a new segment. + top = (Stktop*)(m->morebuf.sp - sizeof(*top)); + stk = (byte*)gp->stackguard - StackGuard; + free = 0; + } else { + // allocate new segment. + framesize += argsize; + framesize += StackExtra; // room for more functions, Stktop. + if(framesize < StackMin) + framesize = StackMin; + framesize += StackSystem; + stk = runtime·stackalloc(framesize); + top = (Stktop*)(stk+framesize-sizeof(*top)); + free = framesize; + } + + if(0) { + runtime·printf("newstack framesize=%d argsize=%d morepc=%p moreargp=%p gobuf=%p, %p top=%p old=%p\n", + framesize, argsize, m->morepc, m->moreargp, m->morebuf.pc, m->morebuf.sp, top, gp->stackbase); + } + + top->stackbase = (byte*)gp->stackbase; + top->stackguard = (byte*)gp->stackguard; + top->gobuf = m->morebuf; + top->argp = m->moreargp; + top->argsize = argsize; + top->free = free; + m->moreargp = nil; + m->morebuf.pc = nil; + m->morebuf.sp = (uintptr)nil; + + // copy flag from panic + top->panic = gp->ispanic; + gp->ispanic = false; + + gp->stackbase = (uintptr)top; + gp->stackguard = (uintptr)stk + StackGuard; + + sp = (byte*)top; + if(argsize > 0) { + sp -= argsize; + dst = (uintptr*)sp; + dstend = dst + argsize/sizeof(*dst); + src = (uintptr*)top->argp; + while(dst < dstend) + *dst++ = *src++; + } + if(thechar == '5') { + // caller would have saved its LR below args. + sp -= sizeof(void*); + *(void**)sp = nil; + } + + // Continue as if lessstack had just called m->morepc + // (the PC that decided to grow the stack). + label.sp = (uintptr)sp; + label.pc = (byte*)runtime·lessstack; + label.g = m->curg; + if(reflectcall) + runtime·gogocallfn(&label, (FuncVal*)m->morepc); + else + runtime·gogocall(&label, m->morepc, m->cret); + + *(int32*)345 = 123; // never return +} diff --git a/src/pkg/runtime/stack.h b/src/pkg/runtime/stack.h index d42385d6c..06b0c568c 100644 --- a/src/pkg/runtime/stack.h +++ b/src/pkg/runtime/stack.h @@ -55,13 +55,19 @@ functions to make sure that this limit cannot be violated. enum { // StackSystem is a number of additional bytes to add // to each stack below the usual guard area for OS-specific - // purposes like signal handling. Used on Windows because - // it does not use a separate stack. + // purposes like signal handling. Used on Windows and on + // Plan 9 because they do not use a separate stack. #ifdef GOOS_windows StackSystem = 512 * sizeof(uintptr), #else +#ifdef GOOS_plan9 + // The size of the note handler frame varies among architectures, + // but 512 bytes should be enough for every implementation. + StackSystem = 512, +#else StackSystem = 0, -#endif +#endif // Plan 9 +#endif // Windows // The amount of extra stack to allocate beyond the size // needed for the single frame that triggered the split. diff --git a/src/pkg/runtime/stack_test.go b/src/pkg/runtime/stack_test.go index 01c6b6e2f..759f7c46e 100644 --- a/src/pkg/runtime/stack_test.go +++ b/src/pkg/runtime/stack_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Test stack split logic by calling functions of every frame size +// Test stack split logic by calling functions of every frame size // from near 0 up to and beyond the default segment size (4k). // Each of those functions reports its SP + stack limit, and then // the test (the caller) checks that those make sense. By not @@ -34,6 +34,7 @@ package runtime_test import ( . "runtime" "testing" + "time" "unsafe" ) @@ -1526,3 +1527,51 @@ func stack4988() (uintptr, uintptr) { var buf [4988]byte; use(buf[:]); return St func stack4992() (uintptr, uintptr) { var buf [4992]byte; use(buf[:]); return Stackguard() } func stack4996() (uintptr, uintptr) { var buf [4996]byte; use(buf[:]); return Stackguard() } func stack5000() (uintptr, uintptr) { var buf [5000]byte; use(buf[:]); return Stackguard() } + +// TestStackMem measures per-thread stack segment cache behavior. +// The test consumed up to 500MB in the past. +func TestStackMem(t *testing.T) { + const ( + BatchSize = 32 + BatchCount = 512 + ArraySize = 1024 + RecursionDepth = 128 + ) + if testing.Short() { + return + } + defer GOMAXPROCS(GOMAXPROCS(BatchSize)) + s0 := new(MemStats) + ReadMemStats(s0) + for b := 0; b < BatchCount; b++ { + c := make(chan bool, BatchSize) + for i := 0; i < BatchSize; i++ { + go func() { + var f func(k int, a [ArraySize]byte) + f = func(k int, a [ArraySize]byte) { + if k == 0 { + time.Sleep(time.Millisecond) + return + } + f(k-1, a) + } + f(RecursionDepth, [ArraySize]byte{}) + c <- true + }() + } + for i := 0; i < BatchSize; i++ { + <-c + } + } + s1 := new(MemStats) + ReadMemStats(s1) + consumed := s1.StackSys - s0.StackSys + t.Logf("Consumed %vMB for stack mem", consumed>>20) + estimate := uint64(8 * BatchSize * ArraySize * RecursionDepth) // 8 is to reduce flakiness. + if consumed > estimate { + t.Fatalf("Stack mem: want %v, got %v", estimate, consumed) + } + if s1.StackInuse > 4<<20 { + t.Fatalf("Stack inuse: want %v, got %v", 4<<20, s1.StackInuse) + } +} diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc index 7e1f8a1e8..c0d3f2bde 100644 --- a/src/pkg/runtime/string.goc +++ b/src/pkg/runtime/string.goc @@ -6,6 +6,7 @@ package runtime #include "runtime.h" #include "arch_GOARCH.h" #include "malloc.h" +#include "race.h" String runtime·emptystring; @@ -33,10 +34,10 @@ runtime·findnullw(uint16 *s) return l; } -uint32 runtime·maxstring = 256; +uint32 runtime·maxstring = 256; // a hint for print static String -gostringsize(int32 l) +gostringsize(intgo l) { String s; uint32 ms; @@ -58,7 +59,7 @@ gostringsize(int32 l) String runtime·gostring(byte *str) { - int32 l; + intgo l; String s; l = runtime·findnull(str); @@ -68,7 +69,7 @@ runtime·gostring(byte *str) } String -runtime·gostringn(byte *str, int32 l) +runtime·gostringn(byte *str, intgo l) { String s; @@ -78,7 +79,7 @@ runtime·gostringn(byte *str, int32 l) } Slice -runtime·gobytes(byte *p, int32 n) +runtime·gobytes(byte *p, intgo n) { Slice sl; @@ -102,7 +103,7 @@ runtime·gostringnocopy(byte *str) String runtime·gostringw(uint16 *str) { - int32 n1, n2, i; + intgo n1, n2, i; byte buf[8]; String s; @@ -139,17 +140,26 @@ runtime·catstring(String s1, String s2) } static String -concatstring(int32 n, String *s) +concatstring(intgo n, String *s) { - int32 i, l; + intgo i, l, count; String out; l = 0; + count = 0; for(i=0; i<n; i++) { if(l + s[i].len < l) runtime·throw("string concatenation too long"); l += s[i].len; + if(s[i].len > 0) { + count++; + out = s[i]; + } } + if(count == 0) + return runtime·emptystring; + if(count == 1) // zero or one non-empty string in concatenation + return out; out = gostringsize(l); l = 0; @@ -163,14 +173,14 @@ concatstring(int32 n, String *s) #pragma textflag 7 // s1 is the first of n strings. // the output string follows. -func concatstring(n int32, s1 String) { +func concatstring(n int, s1 String) { (&s1)[n] = concatstring(n, &s1); } static int32 cmpstring(String s1, String s2) { - uint32 i, l; + uintgo i, l; byte c1, c2; l = s1.len; @@ -191,14 +201,34 @@ cmpstring(String s1, String s2) return 0; } -func cmpstring(s1 String, s2 String) (v int32) { +func cmpstring(s1 String, s2 String) (v int) { v = cmpstring(s1, s2); } +func eqstring(s1 String, s2 String) (v bool) { + uintgo i, l; + + if(s1.len != s2.len) { + v = false; + return; + } + if(s1.str == s2.str) { + v = true; + return; + } + l = s1.len; + for(i=0; i<l; i++) + if(s1.str[i] != s2.str[i]) { + v = false; + return; + } + v = true; +} + int32 runtime·strcmp(byte *s1, byte *s2) { - uint32 i; + uintptr i; byte c1, c2; for(i=0;; i++) { @@ -235,31 +265,6 @@ runtime·strstr(byte *s1, byte *s2) return nil; } -func slicestring(si String, lindex int32, hindex int32) (so String) { - int32 l; - - if(lindex < 0 || lindex > si.len || - hindex < lindex || hindex > si.len) { - runtime·panicslice(); - } - - l = hindex-lindex; - so.str = si.str + lindex; - so.len = l; -} - -func slicestring1(si String, lindex int32) (so String) { - int32 l; - - if(lindex < 0 || lindex > si.len) { - runtime·panicslice(); - } - - l = si.len-lindex; - so.str = si.str + lindex; - so.len = l; -} - func intstring(v int64) (s String) { s = gostringsize(8); s.len = runtime·runetochar(s.str, v); @@ -267,6 +272,12 @@ func intstring(v int64) (s String) { } func slicebytetostring(b Slice) (s String) { + void *pc; + + if(raceenabled) { + pc = runtime·getcallerpc(&b); + runtime·racereadrangepc(b.array, b.len, 1, pc, runtime·slicebytetostring); + } s = gostringsize(b.len); runtime·memmove(s.str, b.array, s.len); } @@ -279,10 +290,15 @@ func stringtoslicebyte(s String) (b Slice) { } func slicerunetostring(b Slice) (s String) { - int32 siz1, siz2, i; + intgo siz1, siz2, i; int32 *a; byte dum[8]; + void *pc; + if(raceenabled) { + pc = runtime·getcallerpc(&b); + runtime·racereadrangepc(b.array, b.len*sizeof(*a), sizeof(*a), pc, runtime·slicerunetostring); + } a = (int32*)b.array; siz1 = 0; for(i=0; i<b.len; i++) { @@ -302,7 +318,7 @@ func slicerunetostring(b Slice) (s String) { } func stringtoslicerune(s String) (b Slice) { - int32 n; + intgo n; int32 dum, *r; uint8 *p, *ep; @@ -330,7 +346,7 @@ enum Runeself = 0x80, }; -func stringiter(s String, k int32) (retk int32) { +func stringiter(s String, k int) (retk int) { int32 l; if(k >= s.len) { @@ -351,7 +367,7 @@ func stringiter(s String, k int32) (retk int32) { out: } -func stringiter2(s String, k int32) (retk int32, retv int32) { +func stringiter2(s String, k int) (retk int, retv int32) { if(k >= s.len) { // retk=0 is end of iteration retk = 0; diff --git a/src/pkg/runtime/string_test.go b/src/pkg/runtime/string_test.go new file mode 100644 index 000000000..8f13f0f42 --- /dev/null +++ b/src/pkg/runtime/string_test.go @@ -0,0 +1,45 @@ +package runtime_test + +import ( + "testing" +) + +func BenchmarkCompareStringEqual(b *testing.B) { + bytes := []byte("Hello Gophers!") + s1, s2 := string(bytes), string(bytes) + for i := 0; i < b.N; i++ { + if s1 != s2 { + b.Fatal("s1 != s2") + } + } +} + +func BenchmarkCompareStringIdentical(b *testing.B) { + s1 := "Hello Gophers!" + s2 := s1 + for i := 0; i < b.N; i++ { + if s1 != s2 { + b.Fatal("s1 != s2") + } + } +} + +func BenchmarkCompareStringSameLength(b *testing.B) { + s1 := "Hello Gophers!" + s2 := "Hello, Gophers" + for i := 0; i < b.N; i++ { + if s1 == s2 { + b.Fatal("s1 == s2") + } + } +} + +func BenchmarkCompareStringDifferentLength(b *testing.B) { + s1 := "Hello Gophers!" + s2 := "Hello, Gophers!" + for i := 0; i < b.N; i++ { + if s1 == s2 { + b.Fatal("s1 == s2") + } + } +} diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index f29276bd7..d7221c476 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -2,20 +2,59 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Runtime symbol table access. Work in progress. -// The Plan 9 symbol table is not in a particularly convenient form. -// The routines here massage it into a more usable form; eventually -// we'll change 6l to do this for us, but it is easier to experiment -// here than to change 6l and all the other tools. +// Runtime symbol table parsing. // -// The symbol table also needs to be better integrated with the type -// strings table in the future. This is just a quick way to get started -// and figure out exactly what we want. +// The Go tools use a symbol table derived from the Plan 9 symbol table +// format. The symbol table is kept in its own section treated as +// read-only memory when the binary is running: the binary consults the +// table. +// +// The format used by Go 1.0 was basically the Plan 9 format. Each entry +// is variable sized but had this format: +// +// 4-byte value, big endian +// 1-byte type ([A-Za-z] + 0x80) +// name, NUL terminated (or for 'z' and 'Z' entries, double-NUL terminated) +// 4-byte Go type address, big endian (new in Go) +// +// In order to support greater interoperation with standard toolchains, +// Go 1.1 uses a more flexible yet smaller encoding of the entries. +// The overall structure is unchanged from Go 1.0 and, for that matter, +// from Plan 9. +// +// The Go 1.1 table is a re-encoding of the data in a Go 1.0 table. +// To identify a new table as new, it begins one of two eight-byte +// sequences: +// +// FF FF FF FD 00 00 00 xx - big endian new table +// FD FF FF FF 00 00 00 xx - little endian new table +// +// This sequence was chosen because old tables stop at an entry with type +// 0, so old code reading a new table will see only an empty table. The +// first four bytes are the target-endian encoding of 0xfffffffd. The +// final xx gives AddrSize, the width of a full-width address. +// +// After that header, each entry is encoded as follows. +// +// 1-byte type (0-51 + two flag bits) +// AddrSize-byte value, host-endian OR varint-encoded value +// AddrSize-byte Go type address OR nothing +// [n] name, terminated as before +// +// The type byte comes first, but 'A' encodes as 0 and 'a' as 26, so that +// the type itself is only in the low 6 bits. The upper two bits specify +// the format of the next two fields. If the 0x40 bit is set, the value +// is encoded as an full-width 4- or 8-byte target-endian word. Otherwise +// the value is a varint-encoded number. If the 0x80 bit is set, the Go +// type is present, again as a 4- or 8-byte target-endian word. If not, +// there is no Go type in this entry. The NUL-terminated name ends the +// entry. #include "runtime.h" #include "defs_GOOS_GOARCH.h" #include "os_GOOS.h" #include "arch_GOARCH.h" +#include "malloc.h" extern byte pclntab[], epclntab[], symtab[], esymtab[]; @@ -28,24 +67,100 @@ struct Sym // byte *gotype; }; +static uintptr mainoffset; + +// A dynamically allocated string containing multiple substrings. +// Individual strings are slices of hugestring. +static String hugestring; +static int32 hugestring_len; + +extern void main·main(void); + +static uintptr +readword(byte **pp, byte *ep) +{ + byte *p; + + p = *pp; + if(ep - p < sizeof(void*)) { + *pp = ep; + return 0; + } + *pp = p + sizeof(void*); + + // Hairy, but only one of these four cases gets compiled. + if(sizeof(void*) == 8) { + if(BigEndian) { + return ((uint64)p[0]<<56) | ((uint64)p[1]<<48) | ((uint64)p[2]<<40) | ((uint64)p[3]<<32) | + ((uint64)p[4]<<24) | ((uint64)p[5]<<16) | ((uint64)p[6]<<8) | ((uint64)p[7]); + } + return ((uint64)p[7]<<56) | ((uint64)p[6]<<48) | ((uint64)p[5]<<40) | ((uint64)p[4]<<32) | + ((uint64)p[3]<<24) | ((uint64)p[2]<<16) | ((uint64)p[1]<<8) | ((uint64)p[0]); + } + if(BigEndian) { + return ((uint32)p[0]<<24) | ((uint32)p[1]<<16) | ((uint32)p[2]<<8) | ((uint32)p[3]); + } + return ((uint32)p[3]<<24) | ((uint32)p[2]<<16) | ((uint32)p[1]<<8) | ((uint32)p[0]); +} + // Walk over symtab, calling fn(&s) for each symbol. static void walksymtab(void (*fn)(Sym*)) { byte *p, *ep, *q; Sym s; + int32 widevalue, havetype, shift; p = symtab; ep = esymtab; + + // Table must begin with correct magic number. + if(ep - p < 8 || p[4] != 0x00 || p[5] != 0x00 || p[6] != 0x00 || p[7] != sizeof(void*)) + return; + if(BigEndian) { + if(p[0] != 0xff || p[1] != 0xff || p[2] != 0xff || p[3] != 0xfd) + return; + } else { + if(p[0] != 0xfd || p[1] != 0xff || p[2] != 0xff || p[3] != 0xff) + return; + } + p += 8; + while(p < ep) { - if(p + 7 > ep) - break; - s.value = ((uint32)p[0]<<24) | ((uint32)p[1]<<16) | ((uint32)p[2]<<8) | ((uint32)p[3]); + s.symtype = p[0]&0x3F; + widevalue = p[0]&0x40; + havetype = p[0]&0x80; + if(s.symtype < 26) + s.symtype += 'A'; + else + s.symtype += 'a' - 26; + p++; + + // Value, either full-width or varint-encoded. + if(widevalue) { + s.value = readword(&p, ep); + } else { + s.value = 0; + shift = 0; + while(p < ep && (p[0]&0x80) != 0) { + s.value |= (uintptr)(p[0]&0x7F)<<shift; + shift += 7; + p++; + } + if(p >= ep) + break; + s.value |= (uintptr)p[0]<<shift; + p++; + } + + // Go type, if present. Ignored but must skip over. + if(havetype) + readword(&p, ep); - if(!(p[4]&0x80)) + // Name. + if(ep - p < 2) break; - s.symtype = p[4] & ~0x80; - p += 5; + s.name = p; if(s.symtype == 'z' || s.symtype == 'Z') { // path reference string - skip first byte, @@ -65,7 +180,7 @@ walksymtab(void (*fn)(Sym*)) break; p = q+1; } - p += 4; // go type + fn(&s); } } @@ -80,12 +195,13 @@ static int32 nfname; static uint32 funcinit; static Lock funclock; +static uintptr lastvalue; static void dofunc(Sym *sym) { Func *f; - + switch(sym->symtype) { case 't': case 'T': @@ -93,6 +209,11 @@ dofunc(Sym *sym) case 'L': if(runtime·strcmp(sym->name, (byte*)"etext") == 0) break; + if(sym->value < lastvalue) { + runtime·printf("symbols out of order: %p before %p\n", lastvalue, sym->value); + runtime·throw("malformed symbol table"); + } + lastvalue = sym->value; if(func == nil) { nfunc++; break; @@ -104,24 +225,24 @@ dofunc(Sym *sym) f->frame = -sizeof(uintptr); break; case 'm': - if(nfunc > 0 && func != nil) - func[nfunc-1].frame += sym->value; - break; - case 'p': - if(nfunc > 0 && func != nil) { - f = &func[nfunc-1]; - // args counts 32-bit words. - // sym->value is the arg's offset. - // don't know width of this arg, so assume it is 64 bits. - if(f->args < sym->value/4 + 2) - f->args = sym->value/4 + 2; + if(nfunc <= 0 || func == nil) + break; + if(runtime·strcmp(sym->name, (byte*)".frame") == 0) + func[nfunc-1].frame = sym->value; + else if(runtime·strcmp(sym->name, (byte*)".locals") == 0) + func[nfunc-1].locals = sym->value; + else if(runtime·strcmp(sym->name, (byte*)".args") == 0) + func[nfunc-1].args = sym->value; + else { + runtime·printf("invalid 'm' symbol named '%s'\n", sym->name); + runtime·throw("mangled symbol table"); } break; case 'f': if(fname == nil) { if(sym->value >= nfname) { if(sym->value >= 0x10000) { - runtime·printf("invalid symbol file index %p\n", sym->value); + runtime·printf("runtime: invalid symbol file index %p\n", sym->value); runtime·throw("mangled symbol table"); } nfname = sym->value+1; @@ -135,14 +256,15 @@ dofunc(Sym *sym) // put together the path name for a z entry. // the f entries have been accumulated into fname already. -static void +// returns the length of the path name. +static int32 makepath(byte *buf, int32 nbuf, byte *path) { int32 n, len; byte *p, *ep, *q; if(nbuf <= 0) - return; + return 0; p = buf; ep = buf + nbuf; @@ -163,6 +285,26 @@ makepath(byte *buf, int32 nbuf, byte *path) runtime·memmove(p, q, len+1); p += len; } + return p - buf; +} + +// appends p to hugestring +static String +gostringn(byte *p, int32 l) +{ + String s; + + if(l == 0) + return runtime·emptystring; + if(hugestring.str == nil) { + hugestring_len += l; + return runtime·emptystring; + } + s.str = hugestring.str + hugestring.len; + s.len = l; + hugestring.len += s.len; + runtime·memmove(s.str, p, l); + return s; } // walk symtab accumulating path names for use by pc/ln table. @@ -181,11 +323,13 @@ dosrcline(Sym *sym) static int32 incstart; static int32 nfunc, nfile, nhist; Func *f; - int32 i; + int32 i, l; switch(sym->symtype) { case 't': case 'T': + if(hugestring.str == nil) + break; if(runtime·strcmp(sym->name, (byte*)"etext") == 0) break; f = &func[nfunc++]; @@ -200,23 +344,23 @@ dosrcline(Sym *sym) case 'z': if(sym->value == 1) { // entry for main source file for a new object. - makepath(srcbuf, sizeof srcbuf, sym->name+1); + l = makepath(srcbuf, sizeof srcbuf, sym->name+1); nhist = 0; nfile = 0; if(nfile == nelem(files)) return; - files[nfile].srcstring = runtime·gostring(srcbuf); + files[nfile].srcstring = gostringn(srcbuf, l); files[nfile].aline = 0; files[nfile++].delta = 0; } else { // push or pop of included file. - makepath(srcbuf, sizeof srcbuf, sym->name+1); + l = makepath(srcbuf, sizeof srcbuf, sym->name+1); if(srcbuf[0] != '\0') { if(nhist++ == 0) incstart = sym->value; if(nhist == 0 && nfile < nelem(files)) { // new top-level file - files[nfile].srcstring = runtime·gostring(srcbuf); + files[nfile].srcstring = gostringn(srcbuf, l); files[nfile].aline = sym->value; // this is "line 0" files[nfile++].delta = sym->value - 1; @@ -275,7 +419,7 @@ splitpcln(void) line += *p++; else line -= *p++ - 64; - + // pc, line now match. // Because the state machine begins at pc==entry and line==0, // it can happen - just at the beginning! - that the update may @@ -297,7 +441,7 @@ splitpcln(void) while(f < ef && pc >= (f+1)->entry); f->pcln.array = p; // pc0 and ln0 are the starting values for - // the loop over f->pcln, so pc must be + // the loop over f->pcln, so pc must be // adjusted by the same pcquant update // that we're going to do as we continue our loop. f->pc0 = pc + pcquant; @@ -323,11 +467,11 @@ runtime·funcline(Func *f, uintptr targetpc) uintptr pc; int32 line; int32 pcquant; - + enum { debug = 0 }; - + switch(thechar) { case '5': pcquant = 4; @@ -354,7 +498,7 @@ runtime·funcline(Func *f, uintptr targetpc) if(debug && !runtime·panicking) runtime·printf("pc<%p targetpc=%p line=%d\n", pc, targetpc, line); - + // If the pc has advanced too far or we're out of data, // stop and the last known line number. if(pc > targetpc || p >= ep) @@ -382,7 +526,7 @@ runtime·funcline(Func *f, uintptr targetpc) } void -runtime·funcline_go(Func *f, uintptr targetpc, String retfile, int32 retline) +runtime·funcline_go(Func *f, uintptr targetpc, String retfile, intgo retline) { retfile = f->src; retline = runtime·funcline(f, targetpc); @@ -406,20 +550,30 @@ buildfuncs(void) // count funcs, fnames nfunc = 0; nfname = 0; + lastvalue = 0; walksymtab(dofunc); - // initialize tables - func = runtime·mal((nfunc+1)*sizeof func[0]); + // Initialize tables. + // Can use FlagNoPointers - all pointers either point into sections of the executable + // or point into hugestring. + func = runtime·mallocgc((nfunc+1)*sizeof func[0], FlagNoPointers, 0, 1); func[nfunc].entry = (uint64)etext; - fname = runtime·mal(nfname*sizeof fname[0]); + fname = runtime·mallocgc(nfname*sizeof fname[0], FlagNoPointers, 0, 1); nfunc = 0; + lastvalue = 0; walksymtab(dofunc); // split pc/ln table by func splitpcln(); // record src file and line info for each func - walksymtab(dosrcline); + walksymtab(dosrcline); // pass 1: determine hugestring_len + hugestring.str = runtime·mallocgc(hugestring_len, FlagNoPointers, 0, 0); + hugestring.len = 0; + walksymtab(dosrcline); // pass 2: fill and use hugestring + + if(hugestring.len != hugestring_len) + runtime·throw("buildfunc: problem in initialization procedure"); m->nomemprof--; } @@ -482,7 +636,7 @@ static bool hasprefix(String s, int8 *p) { int32 i; - + for(i=0; i<s.len; i++) { if(p[i] == 0) return 1; @@ -496,7 +650,7 @@ static bool contains(String s, int8 *p) { int32 i; - + if(p[0] == 0) return 1; for(i=0; i<s.len; i++) { @@ -509,11 +663,13 @@ contains(String s, int8 *p) } bool -runtime·showframe(Func *f) +runtime·showframe(Func *f, bool current) { static int32 traceback = -1; - + + if(current && m->throwing > 0) + return 1; if(traceback < 0) traceback = runtime·gotraceback(); - return traceback > 1 || contains(f->name, ".") && !hasprefix(f->name, "runtime."); + return traceback > 1 || f != nil && contains(f->name, ".") && !hasprefix(f->name, "runtime."); } diff --git a/src/pkg/runtime/sys_darwin_386.s b/src/pkg/runtime/sys_darwin_386.s index 3cf3506ad..8a938f9f4 100644 --- a/src/pkg/runtime/sys_darwin_386.s +++ b/src/pkg/runtime/sys_darwin_386.s @@ -47,8 +47,7 @@ TEXT runtime·mmap(SB),7,$0 TEXT runtime·madvise(SB),7,$0 MOVL $75, AX INT $0x80 - JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + // ignore failure - maybe pages are locked RET TEXT runtime·munmap(SB),7,$0 @@ -63,40 +62,133 @@ TEXT runtime·setitimer(SB),7,$0 INT $0x80 RET -// func now() (sec int64, nsec int32) -TEXT time·now(SB), 7, $32 - LEAL 12(SP), AX // must be non-nil, unused - MOVL AX, 4(SP) - MOVL $0, 8(SP) // time zone pointer - MOVL $116, AX - INT $0x80 - MOVL DX, BX +// OS X comm page time offsets +// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h +#define cpu_capabilities 0x20 +#define nt_tsc_base 0x50 +#define nt_scale 0x58 +#define nt_shift 0x5c +#define nt_ns_base 0x60 +#define nt_generation 0x68 +#define gtod_generation 0x6c +#define gtod_ns_base 0x70 +#define gtod_sec_base 0x78 + +// called from assembly +// 64-bit unix nanoseconds returned in DX:AX. +// I'd much rather write this in C but we need +// assembly for the 96-bit multiply and RDTSC. +TEXT runtime·now(SB),7,$40 + MOVL $0xffff0000, BP /* comm page base */ + + // Test for slow CPU. If so, the math is completely + // different, and unimplemented here, so use the + // system call. + MOVL cpu_capabilities(BP), AX + TESTL $0x4000, AX + JNZ systime + + // Loop trying to take a consistent snapshot + // of the time parameters. +timeloop: + MOVL gtod_generation(BP), BX + TESTL BX, BX + JZ systime + MOVL nt_generation(BP), CX + TESTL CX, CX + JZ timeloop + RDTSC + MOVL nt_tsc_base(BP), SI + MOVL (nt_tsc_base+4)(BP), DI + MOVL SI, 0(SP) + MOVL DI, 4(SP) + MOVL nt_scale(BP), SI + MOVL SI, 8(SP) + MOVL nt_ns_base(BP), SI + MOVL (nt_ns_base+4)(BP), DI + MOVL SI, 12(SP) + MOVL DI, 16(SP) + CMPL nt_generation(BP), CX + JNE timeloop + MOVL gtod_ns_base(BP), SI + MOVL (gtod_ns_base+4)(BP), DI + MOVL SI, 20(SP) + MOVL DI, 24(SP) + MOVL gtod_sec_base(BP), SI + MOVL (gtod_sec_base+4)(BP), DI + MOVL SI, 28(SP) + MOVL DI, 32(SP) + CMPL gtod_generation(BP), BX + JNE timeloop + + // Gathered all the data we need. Compute time. + // ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9 + // The multiply and shift extracts the top 64 bits of the 96-bit product. + SUBL 0(SP), AX // DX:AX = (tsc - nt_tsc_base) + SBBL 4(SP), DX + + // We have x = tsc - nt_tsc_base - DX:AX to be + // multiplied by y = nt_scale = 8(SP), keeping the top 64 bits of the 96-bit product. + // x*y = (x&0xffffffff)*y + (x&0xffffffff00000000)*y + // (x*y)>>32 = ((x&0xffffffff)*y)>>32 + (x>>32)*y + MOVL DX, CX // SI = (x&0xffffffff)*y >> 32 + MOVL $0, DX + MULL 8(SP) + MOVL DX, SI - // sec is in AX, usec in BX - MOVL AX, sec+0(FP) - MOVL $0, sec+4(FP) - IMULL $1000, BX - MOVL BX, nsec+8(FP) + MOVL CX, AX // DX:AX = (x>>32)*y + MOVL $0, DX + MULL 8(SP) + + ADDL SI, AX // DX:AX += (x&0xffffffff)*y >> 32 + ADCL $0, DX + + // DX:AX is now ((tsc - nt_tsc_base) * nt_scale) >> 32. + ADDL 12(SP), AX // DX:AX += nt_ns_base + ADCL 16(SP), DX + SUBL 20(SP), AX // DX:AX -= gtod_ns_base + SBBL 24(SP), DX + MOVL AX, SI // DI:SI = DX:AX + MOVL DX, DI + MOVL 28(SP), AX // DX:AX = gtod_sec_base*1e9 + MOVL 32(SP), DX + MOVL $1000000000, CX + MULL CX + ADDL SI, AX // DX:AX += DI:SI + ADCL DI, DX RET -// int64 nanotime(void) so really -// void nanotime(int64 *nsec) -TEXT runtime·nanotime(SB), 7, $32 +systime: + // Fall back to system call (usually first call in this thread) LEAL 12(SP), AX // must be non-nil, unused MOVL AX, 4(SP) MOVL $0, 8(SP) // time zone pointer MOVL $116, AX INT $0x80 - MOVL DX, BX - - // sec is in AX, usec in BX + // sec is in AX, usec in DX // convert to DX:AX nsec + MOVL DX, BX MOVL $1000000000, CX MULL CX IMULL $1000, BX ADDL BX, AX ADCL $0, DX + RET + +// func now() (sec int64, nsec int32) +TEXT time·now(SB),7,$0 + CALL runtime·now(SB) + MOVL $1000000000, CX + DIVL CX + MOVL AX, sec+0(FP) + MOVL $0, sec+4(FP) + MOVL DX, nsec+8(FP) + RET +// int64 nanotime(void) so really +// void nanotime(int64 *nsec) +TEXT runtime·nanotime(SB),7,$0 + CALL runtime·now(SB) MOVL ret+0(FP), DI MOVL AX, 0(DI) MOVL DX, 4(DI) @@ -120,8 +212,8 @@ TEXT runtime·sigaction(SB),7,$0 // It is called with the following arguments on the stack: // 0(FP) "return address" - ignored // 4(FP) actual handler -// 8(FP) siginfo style - ignored -// 12(FP) signal number +// 8(FP) signal number +// 12(FP) siginfo style // 16(FP) siginfo // 20(FP) context TEXT runtime·sigtramp(SB),7,$40 @@ -130,8 +222,11 @@ TEXT runtime·sigtramp(SB),7,$40 // check that m exists MOVL m(CX), BP CMPL BP, $0 - JNE 2(PC) + JNE 5(PC) + MOVL sig+8(FP), BX + MOVL BX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVL g(CX), DI @@ -196,7 +291,7 @@ TEXT runtime·usleep(SB),7,$32 INT $0x80 RET -// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void)) +// void bsdthread_create(void *stk, M *mp, G *gp, void (*fn)(void)) // System call args are: func arg stack pthread flags. TEXT runtime·bsdthread_create(SB),7,$32 MOVL $360, AX @@ -268,8 +363,10 @@ TEXT runtime·bsdthread_register(SB),7,$40 MOVL $0, 20(SP) // targetconc_ptr MOVL $0, 24(SP) // dispatchqueue_offset INT $0x80 - JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + JAE 3(PC) + NEGL AX + RET + MOVL $0, AX RET // Invoke Mach system call. diff --git a/src/pkg/runtime/sys_darwin_amd64.s b/src/pkg/runtime/sys_darwin_amd64.s index 90571baae..4e43a76c3 100644 --- a/src/pkg/runtime/sys_darwin_amd64.s +++ b/src/pkg/runtime/sys_darwin_amd64.s @@ -61,30 +61,63 @@ TEXT runtime·madvise(SB), 7, $0 MOVL 24(SP), DX // arg 3 advice MOVL $(0x2000000+75), AX // syscall entry madvise SYSCALL - JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + // ignore failure - maybe pages are locked RET -// func now() (sec int64, nsec int32) -TEXT time·now(SB), 7, $32 - MOVQ SP, DI // must be non-nil, unused - MOVQ $0, SI - MOVL $(0x2000000+116), AX - SYSCALL - - // sec is in AX, usec in DX - MOVQ AX, sec+0(FP) - IMULQ $1000, DX - MOVL DX, nsec+8(FP) - RET +// OS X comm page time offsets +// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h +#define nt_tsc_base 0x50 +#define nt_scale 0x58 +#define nt_shift 0x5c +#define nt_ns_base 0x60 +#define nt_generation 0x68 +#define gtod_generation 0x6c +#define gtod_ns_base 0x70 +#define gtod_sec_base 0x78 // int64 nanotime(void) TEXT runtime·nanotime(SB), 7, $32 + MOVQ $0x7fffffe00000, BP /* comm page base */ + // Loop trying to take a consistent snapshot + // of the time parameters. +timeloop: + MOVL gtod_generation(BP), R8 + TESTL R8, R8 + JZ systime + MOVL nt_generation(BP), R9 + TESTL R9, R9 + JZ timeloop + RDTSC + MOVQ nt_tsc_base(BP), R10 + MOVL nt_scale(BP), R11 + MOVQ nt_ns_base(BP), R12 + CMPL nt_generation(BP), R9 + JNE timeloop + MOVQ gtod_ns_base(BP), R13 + MOVQ gtod_sec_base(BP), R14 + CMPL gtod_generation(BP), R8 + JNE timeloop + + // Gathered all the data we need. Compute time. + // ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9 + // The multiply and shift extracts the top 64 bits of the 96-bit product. + SHLQ $32, DX + ADDQ DX, AX + SUBQ R10, AX + MULQ R11 + SHRQ $32, AX:DX + ADDQ R12, AX + SUBQ R13, AX + IMULQ $1000000000, R14 + ADDQ R14, AX + RET + +systime: + // Fall back to system call (usually first call in this thread). MOVQ SP, DI // must be non-nil, unused MOVQ $0, SI MOVL $(0x2000000+116), AX SYSCALL - // sec is in AX, usec in DX // return nsec in AX IMULQ $1000000000, AX @@ -92,6 +125,25 @@ TEXT runtime·nanotime(SB), 7, $32 ADDQ DX, AX RET +// func now() (sec int64, nsec int32) +TEXT time·now(SB),7,$0 + CALL runtime·nanotime(SB) + + // generated code for + // func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 } + // adapted to reduce duplication + MOVQ AX, CX + MOVQ $1360296554856532783, AX + MULQ CX + ADDQ CX, DX + RCRQ $1, DX + SHRQ $29, DX + MOVQ DX, sec+0(FP) + IMULQ $1000000000, DX + SUBQ DX, CX + MOVL CX, nsec+8(FP) + RET + TEXT runtime·sigprocmask(SB),7,$0 MOVL 8(SP), DI MOVQ 16(SP), SI @@ -120,8 +172,10 @@ TEXT runtime·sigtramp(SB),7,$64 // check that m exists MOVQ m(BX), BP CMPQ BP, $0 - JNE 2(PC) + JNE 4(PC) + MOVL DX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVQ g(BX), R10 @@ -199,7 +253,7 @@ TEXT runtime·usleep(SB),7,$16 SYSCALL RET -// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void)) +// void bsdthread_create(void *stk, M *mp, G *gp, void (*fn)(void)) TEXT runtime·bsdthread_create(SB),7,$0 // Set up arguments to bsdthread_create system call. // The ones in quotes pass through to the thread callback @@ -265,8 +319,10 @@ TEXT runtime·bsdthread_register(SB),7,$0 MOVQ $0, R9 // dispatchqueue_offset MOVQ $(0x2000000+366), AX // bsdthread_register SYSCALL - JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + JCC 3(PC) + NEGL AX + RET + MOVL $0, AX RET // Mach system calls use 0x1000000 instead of the BSD's 0x2000000. diff --git a/src/pkg/runtime/sys_freebsd_386.s b/src/pkg/runtime/sys_freebsd_386.s index a72d8972b..d5370267a 100644 --- a/src/pkg/runtime/sys_freebsd_386.s +++ b/src/pkg/runtime/sys_freebsd_386.s @@ -39,6 +39,7 @@ TEXT runtime·thr_start(SB),7,$0 MOVL AX, m(CX) CALL runtime·stackcheck(SB) // smashes AX CALL runtime·mstart(SB) + MOVL 0, AX // crash (not reached) // Exit the entire program (like C exit) @@ -89,7 +90,7 @@ TEXT runtime·mmap(SB),7,$32 MOVSL MOVSL MOVSL - MOVL $0, AX // top 64 bits of file offset + MOVL $0, AX // top 32 bits of file offset STOSL MOVL $477, AX INT $0x80 @@ -102,6 +103,12 @@ TEXT runtime·munmap(SB),7,$-4 MOVL $0xf1, 0xf1 // crash RET +TEXT runtime·madvise(SB),7,$-4 + MOVL $75, AX // madvise + INT $0x80 + // ignore failure - maybe pages are locked + RET + TEXT runtime·setitimer(SB), 7, $-4 MOVL $83, AX INT $0x80 @@ -109,40 +116,38 @@ TEXT runtime·setitimer(SB), 7, $-4 // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - MOVL $116, AX + MOVL $232, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) + MOVL BX, 8(SP) INT $0x80 MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec + MOVL 16(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX MOVL AX, sec+0(FP) MOVL $0, sec+4(FP) - IMULL $1000, BX MOVL BX, nsec+8(FP) RET // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB), 7, $32 - MOVL $116, AX + MOVL $232, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) + MOVL BX, 8(SP) INT $0x80 MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec + MOVL 16(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX // convert to DX:AX nsec MOVL $1000000000, CX MULL CX - IMULL $1000, BX ADDL BX, AX ADCL $0, DX - + MOVL ret+0(FP), DI MOVL AX, 0(DI) MOVL DX, 4(DI) @@ -162,8 +167,11 @@ TEXT runtime·sigtramp(SB),7,$44 // check that m exists MOVL m(CX), BX CMPL BX, $0 - JNE 2(PC) + JNE 5(PC) + MOVL signo+0(FP), BX + MOVL BX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVL g(CX), DI diff --git a/src/pkg/runtime/sys_freebsd_amd64.s b/src/pkg/runtime/sys_freebsd_amd64.s index 36e034a80..40c6237e2 100644 --- a/src/pkg/runtime/sys_freebsd_amd64.s +++ b/src/pkg/runtime/sys_freebsd_amd64.s @@ -38,8 +38,9 @@ TEXT runtime·thr_start(SB),7,$0 MOVQ m_g0(R13), DI MOVQ DI, g(CX) - CALL runtime·stackcheck(SB) - CALL runtime·mstart(SB) + CALL runtime·stackcheck(SB) + CALL runtime·mstart(SB) + MOVQ 0, AX // crash (not reached) // Exit the entire program (like C exit) @@ -94,31 +95,29 @@ TEXT runtime·setitimer(SB), 7, $-8 // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - MOVL $116, AX - LEAQ 8(SP), DI - MOVQ $0, SI + MOVL $232, AX + MOVQ $0, DI + LEAQ 8(SP), SI SYSCALL MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVQ 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX MOVQ AX, sec+0(FP) - IMULQ $1000, DX MOVL DX, nsec+8(FP) RET TEXT runtime·nanotime(SB), 7, $32 - MOVL $116, AX - LEAQ 8(SP), DI - MOVQ $0, SI + MOVL $232, AX + MOVQ $0, DI + LEAQ 8(SP), SI SYSCALL MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVQ 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX // return nsec in AX IMULQ $1000000000, AX - IMULQ $1000, DX ADDQ DX, AX RET @@ -138,8 +137,10 @@ TEXT runtime·sigtramp(SB),7,$64 // check that m exists MOVQ m(BX), BP CMPQ BP, $0 - JNE 2(PC) + JNE 4(PC) + MOVQ DI, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVQ g(BX), R10 @@ -182,6 +183,15 @@ TEXT runtime·munmap(SB),7,$0 MOVL $0xf1, 0xf1 // crash RET +TEXT runtime·madvise(SB),7,$0 + MOVQ 8(SP), DI + MOVQ 16(SP), SI + MOVQ 24(SP), DX + MOVQ $75, AX // madvise + SYSCALL + // ignore failure - maybe pages are locked + RET + TEXT runtime·sigaltstack(SB),7,$-8 MOVQ new+8(SP), DI MOVQ old+16(SP), SI diff --git a/src/pkg/runtime/sys_freebsd_arm.s b/src/pkg/runtime/sys_freebsd_arm.s new file mode 100644 index 000000000..77050e8d0 --- /dev/null +++ b/src/pkg/runtime/sys_freebsd_arm.s @@ -0,0 +1,260 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// System calls and other sys.stuff for ARM, FreeBSD +// /usr/src/sys/kern/syscalls.master for syscall numbers. +// + +#include "zasm_GOOS_GOARCH.h" + +TEXT runtime·sys_umtx_op(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW 12(FP), R3 + ADD $20, R13 // arg 5 is passed on stack + SWI $454 + SUB $20, R13 + // BCS error + RET + +TEXT runtime·thr_new(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + SWI $455 + RET + +TEXT runtime·thr_start(SB),7,$0 + MOVW R0, R9 // m + + // TODO(minux): set up TLS? + + // set up g + MOVW m_g0(R9), R10 + BL runtime·emptyfunc(SB) // fault if stack check is wrong + BL runtime·mstart(SB) + + MOVW $2, R9 // crash (not reached) + MOVW R9, (R9) + RET + +// Exit the entire program (like C exit) +TEXT runtime·exit(SB),7,$-8 + MOVW 0(FP), R0 // arg 1 exit status + SWI $1 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·exit1(SB),7,$-8 + MOVW 0(FP), R0 // arg 1 exit status + SWI $431 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·write(SB),7,$-8 + MOVW 0(FP), R0 // arg 1 fd + MOVW 4(FP), R1 // arg 2 buf + MOVW 8(FP), R2 // arg 3 count + SWI $4 + RET + +TEXT runtime·getrlimit(SB),7,$-8 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + SWI $194 + RET + +TEXT runtime·raisesigpipe(SB),7,$8 + // thr_self(&4(R13)) + MOVW $4(R13), R0 // arg 1 &4(R13) + SWI $432 + // thr_kill(self, SIGPIPE) + MOVW 4(R13), R0 // arg 1 id + MOVW $13, R1 // arg 2 SIGPIPE + SWI $433 + RET + +TEXT runtime·setitimer(SB), 7, $-8 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + SWI $83 + RET + +// func now() (sec int64, nsec int32) +TEXT time·now(SB), 7, $32 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 + SWI $232 // clock_gettime + + MOVW 8(R13), R0 // sec.low + MOVW 12(R13), R1 // sec.high + MOVW 16(R13), R2 // nsec + + MOVW R0, 0(FP) + MOVW R1, 4(FP) + MOVW R2, 8(FP) + RET + +// int64 nanotime(void) so really +// void nanotime(int64 *nsec) +TEXT runtime·nanotime(SB), 7, $32 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 + SWI $232 // clock_gettime + + MOVW 8(R13), R0 // sec.low + MOVW 12(R13), R4 // sec.high + MOVW 16(R13), R2 // nsec + + MOVW $1000000000, R3 + MULLU R0, R3, (R1, R0) + MUL R3, R4 + ADD.S R2, R0 + ADC R4, R1 + + MOVW 0(FP), R3 + MOVW R0, 0(R3) + MOVW R1, 4(R3) + RET + +TEXT runtime·sigaction(SB),7,$-8 + MOVW 0(FP), R0 // arg 1 sig + MOVW 4(FP), R1 // arg 2 act + MOVW 8(FP), R2 // arg 3 oact + SWI $416 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·sigtramp(SB),7,$24 + // this might be called in external code context, + // where g and m are not set. + // first save R0, because _cgo_load_gm will clobber it + // TODO(adonovan): call runtime·badsignal if m=0, like other platforms? + MOVW R0, 4(R13) // signum + MOVW _cgo_load_gm(SB), R0 + CMP $0, R0 + BL.NE (R0) + + // save g + MOVW R10, R4 + MOVW R10, 20(R13) + + // g = m->signal + MOVW m_gsignal(R9), R10 + + // R0 is already saved + MOVW R1, 8(R13) // info + MOVW R2, 12(R13) // context + MOVW R4, 16(R13) // oldg + + BL runtime·sighandler(SB) + + // restore g + MOVW 20(R13), R10 + RET + +TEXT runtime·mmap(SB),7,$12 + MOVW 0(FP), R0 // arg 1 addr + MOVW 4(FP), R1 // arg 2 len + MOVW 8(FP), R2 // arg 3 prot + MOVW 12(FP), R3 // arg 4 flags + // arg 5 (fid) and arg6 (offset_lo, offset_hi) are passed on stack + // note the C runtime only passes the 32-bit offset_lo to us + MOVW 16(FP), R4 // arg 5 + MOVW R4, 4(R13) + MOVW 20(FP), R5 // arg 6 lower 32-bit + MOVW R5, 8(R13) + MOVW $0, R6 // higher 32-bit for arg 6 + MOVW R6, 12(R13) + ADD $4, R13 // pass arg 5 and arg 6 on stack + SWI $477 + SUB $4, R13 + RET + +TEXT runtime·munmap(SB),7,$0 + MOVW 0(FP), R0 // arg 1 addr + MOVW 4(FP), R1 // arg 2 len + SWI $73 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·madvise(SB),7,$0 + MOVW 0(FP), R0 // arg 1 addr + MOVW 4(FP), R1 // arg 2 len + MOVW 8(FP), R2 // arg 3 flags + SWI $75 + // ignore failure - maybe pages are locked + RET + +TEXT runtime·sigaltstack(SB),7,$-8 + MOVW new+0(FP), R0 + MOVW old+4(FP), R1 + SWI $53 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·usleep(SB),7,$16 + MOVW usec+0(FP), R0 + MOVW R0, R2 + MOVW $1000000, R1 + DIV R1, R0 + // 0(R13) is the saved LR, don't use it + MOVW R0, 4(R13) // tv_sec.low + MOVW $0, R0 + MOVW R0, 8(R13) // tv_sec.high + MOD R1, R2 + MOVW $1000, R1 + MUL R1, R2 + MOVW R2, 12(R13) // tv_nsec + + MOVW $4(R13), R0 // arg 1 - rqtp + MOVW $0, R1 // arg 2 - rmtp + SWI $240 // sys_nanosleep + RET + +TEXT runtime·sysctl(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - name + MOVW 4(FP), R1 // arg 2 - namelen + MOVW 8(FP), R2 // arg 3 - oldp + MOVW 12(FP), R3 // arg 4 - oldlenp + // arg 5 (newp) and arg 6 (newlen) are passed on stack + ADD $20, R13 + SWI $202 // sys___sysctl + SUB.CS $0, R0, R0 + SUB $20, R13 + RET + +TEXT runtime·osyield(SB),7,$-4 + SWI $331 // sys_sched_yield + RET + +TEXT runtime·sigprocmask(SB),7,$0 + MOVW $3, R0 // arg 1 - how (SIG_SETMASK) + MOVW 0(FP), R1 // arg 2 - set + MOVW 4(FP), R2 // arg 3 - oset + SWI $340 // sys_sigprocmask + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·casp(SB),7,$0 + B runtime·cas(SB) + +// TODO(minux): this is only valid for ARMv6+ +// bool armcas(int32 *val, int32 old, int32 new) +// Atomically: +// if(*val == old){ +// *val = new; +// return 1; +// }else +// return 0; +TEXT runtime·cas(SB),7,$0 + B runtime·armcas(SB) diff --git a/src/pkg/runtime/sys_linux_386.s b/src/pkg/runtime/sys_linux_386.s index 602d9ddac..f27fd4713 100644 --- a/src/pkg/runtime/sys_linux_386.s +++ b/src/pkg/runtime/sys_linux_386.s @@ -104,40 +104,38 @@ TEXT runtime·mincore(SB),7,$0-24 // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - MOVL $78, AX // syscall - gettimeofday - LEAL 8(SP), BX - MOVL $0, CX + MOVL $265, AX // syscall - clock_gettime + MOVL $0, BX + LEAL 8(SP), CX MOVL $0, DX CALL *runtime·_vdso(SB) MOVL 8(SP), AX // sec - MOVL 12(SP), BX // usec + MOVL 12(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX MOVL AX, sec+0(FP) MOVL $0, sec+4(FP) - IMULL $1000, BX MOVL BX, nsec+8(FP) RET // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB), 7, $32 - MOVL $78, AX // syscall - gettimeofday - LEAL 8(SP), BX - MOVL $0, CX + MOVL $265, AX // syscall - clock_gettime + MOVL $0, BX + LEAL 8(SP), CX MOVL $0, DX CALL *runtime·_vdso(SB) MOVL 8(SP), AX // sec - MOVL 12(SP), BX // usec + MOVL 12(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX // convert to DX:AX nsec MOVL $1000000000, CX MULL CX - IMULL $1000, BX ADDL BX, AX ADCL $0, DX - + MOVL ret+0(FP), DI MOVL AX, 0(DI) MOVL DX, 4(DI) @@ -170,8 +168,11 @@ TEXT runtime·sigtramp(SB),7,$44 // check that m exists MOVL m(CX), BX CMPL BX, $0 - JNE 2(PC) + JNE 5(PC) + MOVL sig+0(FP), BX + MOVL BX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVL g(CX), DI @@ -240,9 +241,7 @@ TEXT runtime·madvise(SB),7,$0 MOVL 8(SP), CX MOVL 12(SP), DX CALL *runtime·_vdso(SB) - CMPL AX, $0xfffff001 - JLS 2(PC) - INT $3 + // ignore failure - maybe pages are locked RET // int32 futex(int32 *uaddr, int32 op, int32 val, @@ -258,7 +257,7 @@ TEXT runtime·futex(SB),7,$0 CALL *runtime·_vdso(SB) RET -// int32 clone(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); +// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void)); TEXT runtime·clone(SB),7,$0 MOVL $120, AX // clone MOVL flags+4(SP), BX @@ -266,7 +265,7 @@ TEXT runtime·clone(SB),7,$0 MOVL $0, DX // parent tid ptr MOVL $0, DI // child tid ptr - // Copy m, g, fn off parent stack for use by child. + // Copy mp, gp, fn off parent stack for use by child. SUBL $16, CX MOVL mm+12(SP), SI MOVL SI, 0(CX) @@ -423,3 +422,11 @@ TEXT runtime·osyield(SB),7,$0 MOVL $158, AX CALL *runtime·_vdso(SB) RET + +TEXT runtime·sched_getaffinity(SB),7,$0 + MOVL $242, AX // syscall - sched_getaffinity + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + CALL *runtime·_vdso(SB) + RET diff --git a/src/pkg/runtime/sys_linux_amd64.s b/src/pkg/runtime/sys_linux_amd64.s index 657ab7e0b..e45943758 100644 --- a/src/pkg/runtime/sys_linux_amd64.s +++ b/src/pkg/runtime/sys_linux_amd64.s @@ -101,32 +101,61 @@ TEXT runtime·mincore(SB),7,$0-24 RET // func now() (sec int64, nsec int32) -TEXT time·now(SB), 7, $32 - LEAQ 8(SP), DI - MOVQ $0, SI - MOVQ $0xffffffffff600000, AX +TEXT time·now(SB),7,$16 + // Be careful. We're calling a function with gcc calling convention here. + // We're guaranteed 128 bytes on entry, and we've taken 16, and the + // call uses another 8. + // That leaves 104 for the gettime code to use. Hope that's enough! + MOVQ runtime·__vdso_clock_gettime_sym(SB), AX + CMPQ AX, $0 + JEQ fallback_gtod + MOVL $0, DI // CLOCK_REALTIME + LEAQ 0(SP), SI CALL AX - MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec - - // sec is in AX, usec in DX + MOVQ 0(SP), AX // sec + MOVQ 8(SP), DX // nsec MOVQ AX, sec+0(FP) - IMULQ $1000, DX MOVL DX, nsec+8(FP) RET - -TEXT runtime·nanotime(SB), 7, $32 - LEAQ 8(SP), DI +fallback_gtod: + LEAQ 0(SP), DI MOVQ $0, SI - MOVQ $0xffffffffff600000, AX + MOVQ runtime·__vdso_gettimeofday_sym(SB), AX CALL AX - MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVQ 0(SP), AX // sec + MOVL 8(SP), DX // usec + IMULQ $1000, DX + MOVQ AX, sec+0(FP) + MOVL DX, nsec+8(FP) + RET - // sec is in AX, usec in DX +TEXT runtime·nanotime(SB),7,$16 + // Duplicate time.now here to avoid using up precious stack space. + // See comment above in time.now. + MOVQ runtime·__vdso_clock_gettime_sym(SB), AX + CMPQ AX, $0 + JEQ fallback_gtod_nt + MOVL $0, DI // CLOCK_REALTIME + LEAQ 0(SP), SI + CALL AX + MOVQ 0(SP), AX // sec + MOVQ 8(SP), DX // nsec + // sec is in AX, nsec in DX // return nsec in AX IMULQ $1000000000, AX + ADDQ DX, AX + RET +fallback_gtod_nt: + LEAQ 0(SP), DI + MOVQ $0, SI + MOVQ runtime·__vdso_gettimeofday_sym(SB), AX + CALL AX + MOVQ 0(SP), AX // sec + MOVL 8(SP), DX // usec IMULQ $1000, DX + // sec is in AX, nsec in DX + // return nsec in AX + IMULQ $1000000000, AX ADDQ DX, AX RET @@ -157,8 +186,10 @@ TEXT runtime·sigtramp(SB),7,$64 // check that m exists MOVQ m(BX), BP CMPQ BP, $0 - JNE 2(PC) + JNE 4(PC) + MOVQ DI, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVQ g(BX), R10 @@ -219,9 +250,7 @@ TEXT runtime·madvise(SB),7,$0 MOVQ 24(SP), DX MOVQ $28, AX // madvise SYSCALL - CMPQ AX, $0xfffffffffffff001 - JLS 2(PC) - MOVL $0xf1, 0xf1 // crash + // ignore failure - maybe pages are locked RET // int64 futex(int32 *uaddr, int32 op, int32 val, @@ -237,12 +266,12 @@ TEXT runtime·futex(SB),7,$0 SYSCALL RET -// int64 clone(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); +// int64 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void)); TEXT runtime·clone(SB),7,$0 MOVL flags+8(SP), DI MOVQ stack+16(SP), SI - // Copy m, g, fn off parent stack for use by child. + // Copy mp, gp, fn off parent stack for use by child. // Careful: Linux system call clobbers CX and R11. MOVQ mm+24(SP), R8 MOVQ gg+32(SP), R9 @@ -310,3 +339,11 @@ TEXT runtime·osyield(SB),7,$0 MOVL $24, AX SYSCALL RET + +TEXT runtime·sched_getaffinity(SB),7,$0 + MOVQ 8(SP), DI + MOVL 16(SP), SI + MOVQ 24(SP), DX + MOVL $204, AX // syscall entry + SYSCALL + RET diff --git a/src/pkg/runtime/sys_linux_arm.s b/src/pkg/runtime/sys_linux_arm.s index 03e173d26..8bae2933f 100644 --- a/src/pkg/runtime/sys_linux_arm.s +++ b/src/pkg/runtime/sys_linux_arm.s @@ -34,9 +34,10 @@ #define SYS_sched_yield (SYS_BASE + 158) #define SYS_select (SYS_BASE + 142) // newselect #define SYS_ugetrlimit (SYS_BASE + 191) +#define SYS_sched_getaffinity (SYS_BASE + 242) +#define SYS_clock_gettime (SYS_BASE + 263) #define ARM_BASE (SYS_BASE + 0x0f0000) -#define SYS_ARM_cacheflush (ARM_BASE + 2) TEXT runtime·open(SB),7,$0 MOVW 0(FP), R0 @@ -131,10 +132,7 @@ TEXT runtime·madvise(SB),7,$0 MOVW 8(FP), R2 MOVW $SYS_madvise, R7 SWI $0 - MOVW $0xfffff001, R6 - CMP R6, R0 - MOVW.HI $0, R9 // crash on syscall failure - MOVW.HI R9, (R9) + // ignore failure - maybe pages are locked RET TEXT runtime·setitimer(SB),7,$0 @@ -154,41 +152,37 @@ TEXT runtime·mincore(SB),7,$0 RET TEXT time·now(SB), 7, $32 - MOVW $8(R13), R0 // timeval - MOVW $0, R1 // zone - MOVW $SYS_gettimeofday, R7 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 // timespec + MOVW $SYS_clock_gettime, R7 SWI $0 MOVW 8(R13), R0 // sec - MOVW 12(R13), R2 // usec + MOVW 12(R13), R2 // nsec MOVW R0, 0(FP) MOVW $0, R1 MOVW R1, 4(FP) - MOVW $1000, R3 - MUL R3, R2 MOVW R2, 8(FP) RET // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB),7,$32 - MOVW $8(R13), R0 // timeval - MOVW $0, R1 // zone - MOVW $SYS_gettimeofday, R7 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 // timespec + MOVW $SYS_clock_gettime, R7 SWI $0 MOVW 8(R13), R0 // sec - MOVW 12(R13), R2 // usec + MOVW 12(R13), R2 // nsec MOVW $1000000000, R3 MULLU R0, R3, (R1, R0) - MOVW $1000, R3 MOVW $0, R4 - MUL R3, R2 ADD.S R2, R0 ADC R4, R1 - + MOVW 0(FP), R3 MOVW R0, 0(R3) MOVW R1, 4(R3) @@ -208,7 +202,7 @@ TEXT runtime·futex(SB),7,$0 RET -// int32 clone(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); +// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void)); TEXT runtime·clone(SB),7,$0 MOVW flags+0(FP), R0 MOVW stack+4(FP), R1 @@ -217,7 +211,7 @@ TEXT runtime·clone(SB),7,$0 MOVW $0, R4 // child tid ptr MOVW $0, R5 - // Copy m, g, fn off parent stack for use by child. + // Copy mp, gp, fn off parent stack for use by child. // TODO(kaib): figure out which registers are clobbered by clone and avoid stack copying MOVW $-16(R1), R1 MOVW mm+8(FP), R6 @@ -272,15 +266,6 @@ TEXT runtime·clone(SB),7,$0 MOVW $1005, R1 MOVW R0, (R1) - -TEXT runtime·cacheflush(SB),7,$0 - MOVW 0(FP), R0 - MOVW 4(FP), R1 - MOVW $0, R2 - MOVW $SYS_ARM_cacheflush, R7 - SWI $0 - RET - TEXT runtime·sigaltstack(SB),7,$0 MOVW 0(FP), R0 MOVW 4(FP), R1 @@ -293,6 +278,15 @@ TEXT runtime·sigaltstack(SB),7,$0 RET TEXT runtime·sigtramp(SB),7,$24 + // this might be called in external code context, + // where g and m are not set. + // first save R0, because _cgo_load_gm will clobber it + // TODO(adonovan): call runtime·badsignal if m=0, like other platforms? + MOVW R0, 4(R13) + MOVW _cgo_load_gm(SB), R0 + CMP $0, R0 + BL.NE (R0) + // save g MOVW g, R3 MOVW g, 20(R13) @@ -301,7 +295,7 @@ TEXT runtime·sigtramp(SB),7,$24 MOVW m_gsignal(m), g // copy arguments for call to sighandler - MOVW R0, 4(R13) + // R0 is already saved above MOVW R1, 8(R13) MOVW R2, 12(R13) MOVW R3, 16(R13) @@ -385,3 +379,11 @@ TEXT runtime·osyield(SB),7,$0 MOVW $SYS_sched_yield, R7 SWI $0 RET + +TEXT runtime·sched_getaffinity(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW $SYS_sched_getaffinity, R7 + SWI $0 + RET diff --git a/src/pkg/runtime/sys_netbsd_386.s b/src/pkg/runtime/sys_netbsd_386.s index 11f8c7aaa..3d3d31273 100644 --- a/src/pkg/runtime/sys_netbsd_386.s +++ b/src/pkg/runtime/sys_netbsd_386.s @@ -12,14 +12,14 @@ TEXT runtime·exit(SB),7,$-4 MOVL $1, AX INT $0x80 - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·exit1(SB),7,$-4 - MOVL $302, AX // sys_threxit + MOVL $310, AX // sys__lwp_exit INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·write(SB),7,$-4 @@ -27,31 +27,32 @@ TEXT runtime·write(SB),7,$-4 INT $0x80 RET -TEXT runtime·usleep(SB),7,$20 +TEXT runtime·usleep(SB),7,$24 MOVL $0, DX MOVL usec+0(FP), AX MOVL $1000000, CX DIVL CX - MOVL AX, 12(SP) // tv_sec + MOVL AX, 12(SP) // tv_sec - l32 + MOVL $0, 16(SP) // tv_sec - h32 MOVL $1000, AX MULL DX - MOVL AX, 16(SP) // tv_nsec + MOVL AX, 20(SP) // tv_nsec MOVL $0, 0(SP) LEAL 12(SP), AX MOVL AX, 4(SP) // arg 1 - rqtp MOVL $0, 8(SP) // arg 2 - rmtp - MOVL $240, AX // sys_nanosleep + MOVL $430, AX // sys_nanosleep INT $0x80 RET TEXT runtime·raisesigpipe(SB),7,$12 - MOVL $299, AX // sys_getthrid + MOVL $311, AX // sys__lwp_self INT $0x80 MOVL $0, 0(SP) - MOVL AX, 4(SP) // arg 1 - pid - MOVL $13, 8(SP) // arg 2 - signum == SIGPIPE - MOVL $37, AX // sys_kill + MOVL AX, 4(SP) // arg 1 - target + MOVL $13, 8(SP) // arg 2 - signo == SIGPIPE + MOVL $318, AX // sys__lwp_kill INT $0x80 RET @@ -67,7 +68,7 @@ TEXT runtime·mmap(SB),7,$36 MOVL $0, AX STOSL // arg 6 - pad MOVSL // arg 7 - offset - MOVL $0, AX // top 64 bits of file offset + MOVL $0, AX // top 32 bits of file offset STOSL MOVL $197, AX // sys_mmap INT $0x80 @@ -79,60 +80,100 @@ TEXT runtime·munmap(SB),7,$-4 MOVL $73, AX // sys_munmap INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·madvise(SB),7,$-4 + MOVL $75, AX // sys_madvise + INT $0x80 + // ignore failure - maybe pages are locked RET TEXT runtime·setitimer(SB),7,$-4 - MOVL $83, AX + MOVL $425, AX // sys_setitimer INT $0x80 RET // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - MOVL $116, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) // arg 1 - clock_id + MOVL BX, 8(SP) // arg 2 - tp + MOVL $427, AX // sys_clock_gettime INT $0x80 - MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec - // sec is in AX, usec in BX + MOVL 12(SP), AX // sec - l32 MOVL AX, sec+0(FP) - MOVL $0, sec+4(FP) - IMULL $1000, BX + MOVL 16(SP), AX // sec - h32 + MOVL AX, sec+4(FP) + + MOVL 20(SP), BX // nsec MOVL BX, nsec+8(FP) RET // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB),7,$32 - MOVL $116, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) // arg 1 - clock_id + MOVL BX, 8(SP) // arg 2 - tp + MOVL $427, AX // sys_clock_gettime INT $0x80 - MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec - - // sec is in AX, usec in BX - // convert to DX:AX nsec - MOVL $1000000000, CX - MULL CX - IMULL $1000, BX + + MOVL 16(SP), CX // sec - h32 + IMULL $1000000000, CX + + MOVL 12(SP), AX // sec - l32 + MOVL $1000000000, BX + MULL BX // result in dx:ax + + MOVL 20(SP), BX // nsec ADDL BX, AX - ADCL $0, DX - + ADCL CX, DX // add high bits with carry + MOVL ret+0(FP), DI MOVL AX, 0(DI) MOVL DX, 4(DI) RET -TEXT runtime·sigaction(SB),7,$-4 - MOVL $46, AX // sys_sigaction +TEXT runtime·getcontext(SB),7,$-4 + MOVL $307, AX // sys_getcontext + INT $0x80 + JAE 2(PC) + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigprocmask(SB),7,$-4 + MOVL $293, AX // sys_sigprocmask + INT $0x80 + JAE 2(PC) + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigreturn_tramp(SB),7,$0 + LEAL 140(SP), AX // Load address of ucontext + MOVL AX, 4(SP) + MOVL $308, AX // sys_setcontext + INT $0x80 + MOVL $-1, 4(SP) // Something failed... + MOVL $1, AX // sys_exit + INT $0x80 + +TEXT runtime·sigaction(SB),7,$24 + LEAL arg0+0(FP), SI + LEAL 4(SP), DI + CLD + MOVSL // arg 1 - sig + MOVSL // arg 2 - act + MOVSL // arg 3 - oact + LEAL runtime·sigreturn_tramp(SB), AX + STOSL // arg 4 - tramp + MOVL $2, AX + STOSL // arg 5 - vers + MOVL $340, AX // sys___sigaction_sigtramp INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·sigtramp(SB),7,$44 @@ -141,13 +182,16 @@ TEXT runtime·sigtramp(SB),7,$44 // check that m exists MOVL m(CX), BX CMPL BX, $0 - JNE 2(PC) + JNE 5(PC) + MOVL signo+0(FP), BX + MOVL BX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVL g(CX), DI MOVL DI, 20(SP) - + // g = m->gsignal MOVL m_gsignal(BX), BX MOVL BX, g(CX) @@ -167,67 +211,24 @@ TEXT runtime·sigtramp(SB),7,$44 get_tls(CX) MOVL 20(SP), BX MOVL BX, g(CX) - - // call sigreturn - MOVL context+8(FP), AX - MOVL $0, 0(SP) // syscall gap - MOVL AX, 4(SP) // arg 1 - sigcontext - MOVL $103, AX // sys_sigreturn - INT $0x80 - MOVL $0xf1, 0xf1 // crash RET -// int32 rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -TEXT runtime·rfork_thread(SB),7,$8 - MOVL flags+8(SP), AX - MOVL stack+12(SP), CX - - // Copy m, g, fn off parent stack for use by child. - SUBL $16, CX - MOVL mm+16(SP), SI - MOVL SI, 0(CX) - MOVL gg+20(SP), SI - MOVL SI, 4(CX) - MOVL fn+24(SP), SI - MOVL SI, 8(CX) - MOVL $1234, 12(CX) - MOVL CX, SI - - MOVL $0, 0(SP) // syscall gap - MOVL AX, 4(SP) // arg 1 - flags - MOVL $251, AX // sys_rfork +// int32 lwp_create(void *context, uintptr flags, void *lwpid); +TEXT runtime·lwp_create(SB),7,$16 + MOVL $0, 0(SP) + MOVL context+0(FP), AX + MOVL AX, 4(SP) // arg 1 - context + MOVL flags+4(FP), AX + MOVL AX, 8(SP) // arg 2 - flags + MOVL lwpid+8(FP), AX + MOVL AX, 12(SP) // arg 3 - lwpid + MOVL $309, AX // sys__lwp_create INT $0x80 - - // Return if rfork syscall failed - JCC 4(PC) + JCC 2(PC) NEGL AX - MOVL AX, 48(SP) RET - // In parent, return. - CMPL AX, $0 - JEQ 3(PC) - MOVL AX, 48(SP) - RET - - // In child, on new stack. - MOVL SI, SP - - // Paranoia: check that SP is as we expect. - MOVL 12(SP), BP - CMPL BP, $1234 - JEQ 2(PC) - INT $3 - - // Reload registers - MOVL 0(SP), BX // m - MOVL 4(SP), DX // g - MOVL 8(SP), SI // fn - - // Initialize m->procid to thread ID - MOVL $299, AX // sys_getthrid - INT $0x80 - MOVL AX, m_procid(BX) +TEXT runtime·lwp_tramp(SB),7,$0 // Set FS to point at m->tls LEAL m_tls(BX), BP @@ -236,7 +237,7 @@ TEXT runtime·rfork_thread(SB),7,$8 CALL runtime·settls(SB) POPL AX POPAL - + // Now segment is established. Initialize m, g. get_tls(AX) MOVL DX, g(AX) @@ -259,7 +260,7 @@ TEXT runtime·rfork_thread(SB),7,$8 RET TEXT runtime·sigaltstack(SB),7,$-8 - MOVL $288, AX // sys_sigaltstack + MOVL $281, AX // sys___sigaltstack14 MOVL new+4(SP), BX MOVL old+8(SP), CX INT $0x80 @@ -277,30 +278,33 @@ TEXT runtime·setldt(SB),7,$8 TEXT runtime·settls(SB),7,$16 // adjust for ELF: wants to use -8(GS) and -4(GS) for g and m - MOVL 20(SP), CX + MOVL base+0(FP), CX ADDL $8, CX - MOVL CX, 0(CX) MOVL $0, 0(SP) // syscall gap - MOVL $9, 4(SP) // I386_SET_GSBASE (machine/sysarch.h) - MOVL CX, 8(SP) // pointer to base - MOVL $165, AX // sys_sysarch + MOVL CX, 4(SP) // arg 1 - ptr + MOVL $317, AX // sys__lwp_setprivate INT $0x80 JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·osyield(SB),7,$-4 - MOVL $298, AX // sys_sched_yield + MOVL $350, AX // sys_sched_yield + INT $0x80 + RET + +TEXT runtime·lwp_park(SB),7,$-4 + MOVL $434, AX // sys__lwp_park INT $0x80 RET -TEXT runtime·thrsleep(SB),7,$-4 - MOVL $300, AX // sys_thrsleep +TEXT runtime·lwp_unpark(SB),7,$-4 + MOVL $321, AX // sys__lwp_unpark INT $0x80 RET -TEXT runtime·thrwakeup(SB),7,$-4 - MOVL $301, AX // sys_thrwakeup +TEXT runtime·lwp_self(SB),7,$-4 + MOVL $311, AX // sys__lwp_self INT $0x80 RET diff --git a/src/pkg/runtime/sys_netbsd_amd64.s b/src/pkg/runtime/sys_netbsd_amd64.s index 0b83cd4d8..e73e83ded 100644 --- a/src/pkg/runtime/sys_netbsd_amd64.s +++ b/src/pkg/runtime/sys_netbsd_amd64.s @@ -8,42 +8,24 @@ #include "zasm_GOOS_GOARCH.h" -// int64 rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -TEXT runtime·rfork_thread(SB),7,$0 - MOVL flags+8(SP), DI - MOVQ stack+16(SP), SI - - // Copy m, g, fn off parent stack for use by child. - MOVQ mm+24(SP), R8 - MOVQ gg+32(SP), R9 - MOVQ fn+40(SP), R12 - - MOVL $251, AX // sys_rfork +// int32 lwp_create(void *context, uintptr flags, void *lwpid) +TEXT runtime·lwp_create(SB),7,$0 + MOVQ context+0(FP), DI + MOVQ flags+8(FP), SI + MOVQ lwpid+16(FP), DX + MOVL $309, AX // sys__lwp_create SYSCALL - - // Return if rfork syscall failed - JCC 3(PC) + JCC 2(PC) NEGL AX RET - // In parent, return. - CMPL AX, $0 - JEQ 2(PC) - RET - - // In child, on new stack. - MOVQ SI, SP - - // Initialize m->procid to thread ID - MOVL $299, AX // sys_getthrid - SYSCALL - MOVQ AX, m_procid(R8) - +TEXT runtime·lwp_tramp(SB),7,$0 + // Set FS to point at m->tls. LEAQ m_tls(R8), DI CALL runtime·settls(SB) - // In child, set up new stack + // Set up new stack. get_tls(CX) MOVQ R8, m(CX) MOVQ R9, g(CX) @@ -52,29 +34,34 @@ TEXT runtime·rfork_thread(SB),7,$0 // Call fn CALL R12 - // It shouldn't return. If it does, exit - MOVL $302, AX // sys_threxit + // It shouldn't return. If it does, exit. + MOVL $310, AX // sys__lwp_exit SYSCALL JMP -3(PC) // keep exiting TEXT runtime·osyield(SB),7,$0 - MOVL $298, AX // sys_sched_yield + MOVL $350, AX // sys_sched_yield SYSCALL RET -TEXT runtime·thrsleep(SB),7,$0 - MOVQ 8(SP), DI // arg 1 - ident - MOVL 16(SP), SI // arg 2 - clock_id - MOVQ 24(SP), DX // arg 3 - tp - MOVQ 32(SP), R10 // arg 4 - lock - MOVL $300, AX // sys_thrsleep +TEXT runtime·lwp_park(SB),7,$0 + MOVQ 8(SP), DI // arg 1 - abstime + MOVL 16(SP), SI // arg 2 - unpark + MOVQ 24(SP), DX // arg 3 - hint + MOVQ 32(SP), R10 // arg 4 - unparkhint + MOVL $434, AX // sys__lwp_park SYSCALL RET -TEXT runtime·thrwakeup(SB),7,$0 - MOVQ 8(SP), DI // arg 1 - ident - MOVL 16(SP), SI // arg 2 - n - MOVL $301, AX // sys_thrwakeup +TEXT runtime·lwp_unpark(SB),7,$0 + MOVQ 8(SP), DI // arg 1 - lwp + MOVL 16(SP), SI // arg 2 - hint + MOVL $321, AX // sys__lwp_unpark + SYSCALL + RET + +TEXT runtime·lwp_self(SB),7,$0 + MOVL $311, AX // sys__lwp_self SYSCALL RET @@ -83,13 +70,13 @@ TEXT runtime·exit(SB),7,$-8 MOVL 8(SP), DI // arg 1 - exit status MOVL $1, AX // sys_exit SYSCALL - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·exit1(SB),7,$-8 - MOVL $302, AX // sys_threxit + MOVL $310, AX // sys__lwp_exit SYSCALL - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·write(SB),7,$-8 @@ -112,16 +99,16 @@ TEXT runtime·usleep(SB),7,$16 MOVQ SP, DI // arg 1 - rqtp MOVQ $0, SI // arg 2 - rmtp - MOVL $240, AX // sys_nanosleep + MOVL $430, AX // sys_nanosleep SYSCALL RET TEXT runtime·raisesigpipe(SB),7,$16 - MOVL $299, AX // sys_getthrid + MOVL $311, AX // sys__lwp_self SYSCALL - MOVQ AX, DI // arg 1 - pid - MOVQ $13, SI // arg 2 - signum == SIGPIPE - MOVL $37, AX // sys_kill + MOVQ AX, DI // arg 1 - target + MOVQ $13, SI // arg 2 - signo == SIGPIPE + MOVL $318, AX // sys__lwp_kill SYSCALL RET @@ -129,72 +116,101 @@ TEXT runtime·setitimer(SB),7,$-8 MOVL 8(SP), DI // arg 1 - which MOVQ 16(SP), SI // arg 2 - itv MOVQ 24(SP), DX // arg 3 - oitv - MOVL $83, AX // sys_setitimer + MOVL $425, AX // sys_setitimer SYSCALL RET // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - LEAQ 8(SP), DI // arg 1 - tp - MOVQ $0, SI // arg 2 - tzp - MOVL $116, AX // sys_gettimeofday + MOVQ $0, DI // arg 1 - clock_id + LEAQ 8(SP), SI // arg 2 - tp + MOVL $427, AX // sys_clock_gettime SYSCALL MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVL 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX MOVQ AX, sec+0(FP) - IMULQ $1000, DX MOVL DX, nsec+8(FP) RET TEXT runtime·nanotime(SB),7,$32 - LEAQ 8(SP), DI // arg 1 - tp - MOVQ $0, SI // arg 2 - tzp - MOVL $116, AX // sys_gettimeofday + MOVQ $0, DI // arg 1 - clock_id + LEAQ 8(SP), SI // arg 2 - tp + MOVL $427, AX // sys_clock_gettime SYSCALL MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVL 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX // return nsec in AX IMULQ $1000000000, AX - IMULQ $1000, DX ADDQ DX, AX RET +TEXT runtime·getcontext(SB),7,$-8 + MOVQ 8(SP), DI // arg 1 - context + MOVL $307, AX // sys_getcontext + SYSCALL + JCC 2(PC) + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigprocmask(SB),7,$0 + MOVL 8(SP), DI // arg 1 - how + MOVQ 16(SP), SI // arg 2 - set + MOVQ 24(SP), DX // arg 3 - oset + MOVL $293, AX // sys_sigprocmask + SYSCALL + JCC 2(PC) + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigreturn_tramp(SB),7,$-8 + MOVQ R15, DI // Load address of ucontext + MOVQ $308, AX // sys_setcontext + SYSCALL + MOVQ $-1, DI // Something failed... + MOVL $1, AX // sys_exit + SYSCALL + TEXT runtime·sigaction(SB),7,$-8 MOVL 8(SP), DI // arg 1 - signum MOVQ 16(SP), SI // arg 2 - nsa MOVQ 24(SP), DX // arg 3 - osa - MOVL $46, AX + // arg 4 - tramp + LEAQ runtime·sigreturn_tramp(SB), R10 + MOVQ $2, R8 // arg 5 - vers + MOVL $340, AX // sys___sigaction_sigtramp SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·sigtramp(SB),7,$64 get_tls(BX) - + // check that m exists MOVQ m(BX), BP CMPQ BP, $0 - JNE 2(PC) + JNE 4(PC) + MOVQ DI, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVQ g(BX), R10 MOVQ R10, 40(SP) - + // g = m->signal MOVQ m_gsignal(BP), BP MOVQ BP, g(BX) - + MOVQ DI, 0(SP) MOVQ SI, 8(SP) MOVQ DX, 16(SP) MOVQ R10, 24(SP) - + CALL runtime·sighandler(SB) // restore g @@ -213,7 +229,7 @@ TEXT runtime·mmap(SB),7,$0 SUBQ $16, SP MOVQ R9, 8(SP) // arg 7 - offset (passed on stack) MOVQ $0, R9 // arg 6 - pad - MOVL $197, AX + MOVL $197, AX // sys_mmap SYSCALL JCC 2(PC) NEGL AX @@ -226,29 +242,36 @@ TEXT runtime·munmap(SB),7,$0 MOVL $73, AX // sys_munmap SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + + +TEXT runtime·madvise(SB),7,$0 + MOVQ addr+0(FP), DI // arg 1 - addr + MOVQ len+8(FP), SI // arg 2 - len + MOVQ behav+16(FP), DX // arg 3 - behav + MOVQ $75, AX // sys_madvise + SYSCALL + // ignore failure - maybe pages are locked RET TEXT runtime·sigaltstack(SB),7,$-8 MOVQ new+8(SP), DI // arg 1 - nss MOVQ old+16(SP), SI // arg 2 - oss - MOVQ $288, AX // sys_sigaltstack + MOVQ $281, AX // sys___sigaltstack14 SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET // set tls base to DI TEXT runtime·settls(SB),7,$8 // adjust for ELF: wants to use -16(FS) and -8(FS) for g and m - ADDQ $16, DI - MOVQ DI, 0(SP) - MOVQ SP, SI - MOVQ $12, DI // AMD64_SET_FSBASE (machine/sysarch.h) - MOVQ $165, AX // sys_sysarch + ADDQ $16, DI // arg 1 - ptr + MOVQ $317, AX // sys__lwp_setprivate SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·sysctl(SB),7,$0 diff --git a/src/pkg/runtime/sys_netbsd_arm.s b/src/pkg/runtime/sys_netbsd_arm.s new file mode 100644 index 000000000..4a119c5de --- /dev/null +++ b/src/pkg/runtime/sys_netbsd_arm.s @@ -0,0 +1,280 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// System calls and other sys.stuff for ARM, NetBSD +// /usr/src/sys/kern/syscalls.master for syscall numbers. +// + +#include "zasm_GOOS_GOARCH.h" + +// Exit the entire program (like C exit) +TEXT runtime·exit(SB),7,$-4 + MOVW 0(FP), R0 // arg 1 exit status + SWI $0xa00001 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·exit1(SB),7,$-4 + SWI $0xa00136 // sys__lwp_exit + MOVW $1, R9 // crash + MOVW R9, (R9) + RET + +TEXT runtime·write(SB),7,$-4 + MOVW 0(FP), R0 // arg 1 - fd + MOVW 4(FP), R1 // arg 2 - buf + MOVW 8(FP), R2 // arg 3 - nbyte + SWI $0xa00004 // sys_write + RET + +// int32 lwp_create(void *context, uintptr flags, void *lwpid) +TEXT runtime·lwp_create(SB),7,$0 + MOVW context+0(FP), R0 + MOVW flags+4(FP), R1 + MOVW lwpid+8(FP), R2 + SWI $0xa00135 // sys__lwp_create + RET + +TEXT runtime·osyield(SB),7,$0 + SWI $0xa0015e // sys_sched_yield + RET + +TEXT runtime·lwp_park(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - abstime + MOVW 4(FP), R1 // arg 2 - unpark + MOVW 8(FP), R2 // arg 3 - hint + MOVW 12(FP), R3 // arg 4 - unparkhint + SWI $0xa001b2 // sys__lwp_park + RET + +TEXT runtime·lwp_unpark(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - lwp + MOVW 4(FP), R1 // arg 2 - hint + SWI $0xa00141 // sys__lwp_unpark + RET + +TEXT runtime·lwp_self(SB),7,$0 + SWI $0xa00137 // sys__lwp_self + RET + +TEXT runtime·lwp_tramp(SB),7,$0 + MOVW R0, R9 // m + MOVW R1, R10 // g + + BL runtime·emptyfunc(SB) // fault if stack check is wrong + BL (R2) + MOVW $2, R9 // crash (not reached) + MOVW R9, (R9) + RET + +TEXT runtime·usleep(SB),7,$16 + MOVW usec+0(FP), R0 + MOVW R0, R2 + MOVW $1000000, R1 + DIV R1, R0 + // 0(R13) is the saved LR, don't use it + MOVW R0, 4(R13) // tv_sec.low + MOVW $0, R0 + MOVW R0, 8(R13) // tv_sec.high + MOD R1, R2 + MOVW $1000, R1 + MUL R1, R2 + MOVW R2, 12(R13) // tv_nsec + + MOVW $4(R13), R0 // arg 1 - rqtp + MOVW $0, R1 // arg 2 - rmtp + SWI $0xa001ae // sys_nanosleep + RET + +TEXT runtime·raisesigpipe(SB),7,$16 + SWI $0xa00137 // sys__lwp_self, the returned R0 is arg 1 + MOVW $13, R1 // arg 2 - signo == SIGPIPE + SWI $0xa0013e // sys__lwp_kill + RET + +TEXT runtime·setitimer(SB),7,$-4 + MOVW 0(FP), R0 // arg 1 - which + MOVW 4(FP), R1 // arg 2 - itv + MOVW 8(FP), R2 // arg 3 - oitv + SWI $0xa001a9 // sys_setitimer + RET + +// func now() (sec int64, nsec int32) +TEXT time·now(SB), 7, $32 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 + SWI $0xa001ab // clock_gettime + + MOVW 8(R13), R0 // sec.low + MOVW 12(R13), R1 // sec.high + MOVW 16(R13), R2 // nsec + + MOVW R0, 0(FP) + MOVW R1, 4(FP) + MOVW R2, 8(FP) + RET + +// int64 nanotime(void) so really +// void nanotime(int64 *nsec) +TEXT runtime·nanotime(SB), 7, $32 + MOVW $0, R0 // CLOCK_REALTIME + MOVW $8(R13), R1 + SWI $0xa001ab // clock_gettime + + MOVW 8(R13), R0 // sec.low + MOVW 12(R13), R4 // sec.high + MOVW 16(R13), R2 // nsec + + MOVW $1000000000, R3 + MULLU R0, R3, (R1, R0) + MUL R3, R4 + ADD.S R2, R0 + ADC R4, R1 + + MOVW 0(FP), R3 + MOVW R0, 0(R3) + MOVW R1, 4(R3) + RET + +TEXT runtime·getcontext(SB),7,$-4 + MOVW 0(FP), R0 // arg 1 - context + SWI $0xa00133 // sys_getcontext + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·sigprocmask(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - how + MOVW 4(FP), R1 // arg 2 - set + MOVW 8(FP), R2 // arg 3 - oset + SWI $0xa00125 // sys_sigprocmask + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·sigreturn_tramp(SB),7,$-4 + // in runtime·sigtramp, we saved ucontext into m->tls[0], + // here we just load it and call sys_setcontext + MOVW m_tls(m), R0 + SWI $0xa00134 // sys_setcontext + // something failed, we have to exit + MOVW $0x4242, R0 // magic return number + SWI $0xa00001 // sys_exit + B -2(PC) // continue exit + +TEXT runtime·sigaction(SB),7,$4 + MOVW 0(FP), R0 // arg 1 - signum + MOVW 4(FP), R1 // arg 2 - nsa + MOVW 8(FP), R2 // arg 3 - osa + MOVW $runtime·sigreturn_tramp(SB), R3 // arg 4 - tramp + MOVW $2, R4 // arg 5 - vers + MOVW R4, 4(R13) + ADD $4, R13 // pass arg 5 on stack + SWI $0xa00154 // sys___sigaction_sigtramp + SUB $4, R13 + MOVW.CS $3, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·sigtramp(SB),7,$24 + // this might be called in external code context, + // where g and m are not set. + // first save R0, because _cgo_load_gm will clobber it + // TODO(adonovan): call runtime·badsignal if m=0, like other platforms? + MOVW R0, 4(R13) // signum + MOVW _cgo_load_gm(SB), R0 + CMP $0, R0 + BL.NE (R0) + + // save g + MOVW R10, R4 + MOVW R10, 20(R13) + + // g = m->signal + MOVW m_gsignal(R9), R10 + + // R0 is already saved + MOVW R1, 8(R13) // info + MOVW R2, 12(R13) // context + MOVW R4, 16(R13) // gp + // we also save the ucontext into m->tls[0] for easy + // signal return + MOVW R2, m_tls(m) + + BL runtime·sighandler(SB) + + // restore g + MOVW 20(R13), R10 + RET + +TEXT runtime·mmap(SB),7,$12 + MOVW 0(FP), R0 // arg 1 - addr + MOVW 4(FP), R1 // arg 2 - len + MOVW 8(FP), R2 // arg 3 - prot + MOVW 12(FP), R3 // arg 4 - flags + // arg 5 (fid) and arg6 (offset_lo, offset_hi) are passed on stack + // note the C runtime only passes the 32-bit offset_lo to us + MOVW 16(FP), R4 // arg 5 + MOVW R4, 4(R13) + MOVW 20(FP), R5 // arg 6 lower 32-bit + MOVW R5, 8(R13) + MOVW $0, R6 // higher 32-bit for arg 6 + MOVW R6, 12(R13) + ADD $4, R13 // pass arg 5 and arg 6 on stack + SWI $0xa000c5 // sys_mmap + SUB $4, R13 + RET + +TEXT runtime·munmap(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - addr + MOVW 4(FP), R1 // arg 2 - len + SWI $0xa00049 // sys_munmap + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·madvise(SB),7,$0 + MOVW 0(FP), R0 // arg 1 - addr + MOVW 4(FP), R1 // arg 2 - len + MOVW 8(FP), R2 // arg 3 - behav + SWI $0xa0004b // sys_madvise + // ignore failure - maybe pages are locked + RET + +TEXT runtime·sigaltstack(SB),7,$-4 + MOVW 0(FP), R0 // arg 1 - nss + MOVW 4(FP), R1 // arg 2 - oss + SWI $0xa00119 // sys___sigaltstack14 + MOVW.CS $0, R9 // crash on syscall failure + MOVW.CS R9, (R9) + RET + +TEXT runtime·sysctl(SB),7,$8 + MOVW 0(FP), R0 // arg 1 - name + MOVW 4(FP), R1 // arg 2 - namelen + MOVW 8(FP), R2 // arg 3 - oldp + MOVW 12(FP), R3 // arg 4 - oldlenp + MOVW 16(FP), R4 // arg 5 - newp + MOVW R4, 4(R13) + MOVW 20(FP), R4 // arg 6 - newlen + MOVW R4, 8(R13) + ADD $4, R13 // pass arg 5 and 6 on stack + SWI $0xa000ca // sys___sysctl + SUB $4, R13 + RET + +TEXT runtime·casp(SB),7,$0 + B runtime·cas(SB) + +// TODO(minux): this is only valid for ARMv6+ +// bool armcas(int32 *val, int32 old, int32 new) +// Atomically: +// if(*val == old){ +// *val = new; +// return 1; +// }else +// return 0; +TEXT runtime·cas(SB),7,$0 + B runtime·armcas(SB) diff --git a/src/pkg/runtime/sys_openbsd_386.s b/src/pkg/runtime/sys_openbsd_386.s index 593b4a9df..c62e0f949 100644 --- a/src/pkg/runtime/sys_openbsd_386.s +++ b/src/pkg/runtime/sys_openbsd_386.s @@ -12,14 +12,16 @@ TEXT runtime·exit(SB),7,$-4 MOVL $1, AX INT $0x80 - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET -TEXT runtime·exit1(SB),7,$-4 - MOVL $302, AX // sys_threxit +TEXT runtime·exit1(SB),7,$8 + MOVL $0, 0(SP) + MOVL $0, 4(SP) // arg 1 - notdead + MOVL $302, AX // sys___threxit INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·write(SB),7,$-4 @@ -67,7 +69,7 @@ TEXT runtime·mmap(SB),7,$36 MOVL $0, AX STOSL // arg 6 - pad MOVSL // arg 7 - offset - MOVL $0, AX // top 64 bits of file offset + MOVL $0, AX // top 32 bits of file offset STOSL MOVL $197, AX // sys_mmap INT $0x80 @@ -79,7 +81,14 @@ TEXT runtime·munmap(SB),7,$-4 MOVL $73, AX // sys_munmap INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·madvise(SB),7,$-4 + MOVL $75, AX // sys_madvise + INT $0x80 + JAE 2(PC) + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·setitimer(SB),7,$-4 @@ -89,40 +98,38 @@ TEXT runtime·setitimer(SB),7,$-4 // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - MOVL $116, AX + MOVL $232, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) + MOVL BX, 8(SP) INT $0x80 MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec + MOVL 16(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX MOVL AX, sec+0(FP) MOVL $0, sec+4(FP) - IMULL $1000, BX MOVL BX, nsec+8(FP) RET // int64 nanotime(void) so really // void nanotime(int64 *nsec) TEXT runtime·nanotime(SB),7,$32 - MOVL $116, AX + MOVL $232, AX LEAL 12(SP), BX - MOVL BX, 4(SP) - MOVL $0, 8(SP) + MOVL $0, 4(SP) + MOVL BX, 8(SP) INT $0x80 MOVL 12(SP), AX // sec - MOVL 16(SP), BX // usec + MOVL 16(SP), BX // nsec - // sec is in AX, usec in BX + // sec is in AX, nsec in BX // convert to DX:AX nsec MOVL $1000000000, CX MULL CX - IMULL $1000, BX ADDL BX, AX ADCL $0, DX - + MOVL ret+0(FP), DI MOVL AX, 0(DI) MOVL DX, 4(DI) @@ -132,7 +139,15 @@ TEXT runtime·sigaction(SB),7,$-4 MOVL $46, AX // sys_sigaction INT $0x80 JAE 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigprocmask(SB),7,$-4 + MOVL $48, AX // sys_sigprocmask + INT $0x80 + JAE 2(PC) + MOVL $0xf1, 0xf1 // crash + MOVL AX, oset+0(FP) RET TEXT runtime·sigtramp(SB),7,$44 @@ -141,8 +156,11 @@ TEXT runtime·sigtramp(SB),7,$44 // check that m exists MOVL m(CX), BX CMPL BX, $0 - JNE 2(PC) + JNE 5(PC) + MOVL signo+0(FP), BX + MOVL BX, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVL g(CX), DI @@ -174,62 +192,59 @@ TEXT runtime·sigtramp(SB),7,$44 MOVL AX, 4(SP) // arg 1 - sigcontext MOVL $103, AX // sys_sigreturn INT $0x80 - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET -// int32 rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -TEXT runtime·rfork_thread(SB),7,$8 - MOVL flags+8(SP), AX - MOVL stack+12(SP), CX +// int32 tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void)); +TEXT runtime·tfork(SB),7,$12 - // Copy m, g, fn off parent stack for use by child. + // Copy mp, gp and fn from the parent stack onto the child stack. + MOVL params+4(FP), AX + MOVL 8(AX), CX // tf_stack SUBL $16, CX - MOVL mm+16(SP), SI + MOVL CX, 8(AX) + MOVL mm+12(FP), SI MOVL SI, 0(CX) - MOVL gg+20(SP), SI + MOVL gg+16(FP), SI MOVL SI, 4(CX) - MOVL fn+24(SP), SI + MOVL fn+20(FP), SI MOVL SI, 8(CX) MOVL $1234, 12(CX) - MOVL CX, SI MOVL $0, 0(SP) // syscall gap - MOVL AX, 4(SP) // arg 1 - flags - MOVL $251, AX // sys_rfork + MOVL params+4(FP), AX + MOVL AX, 4(SP) // arg 1 - param + MOVL psize+8(FP), AX + MOVL AX, 8(SP) // arg 2 - psize + MOVL $8, AX // sys___tfork INT $0x80 - // Return if rfork syscall failed - JCC 4(PC) + // Return if tfork syscall failed. + JCC 5(PC) NEGL AX - MOVL AX, 48(SP) + MOVL ret+0(FP), DX + MOVL AX, 0(DX) RET // In parent, return. CMPL AX, $0 - JEQ 3(PC) - MOVL AX, 48(SP) + JEQ 4(PC) + MOVL ret+0(FP), DX + MOVL AX, 0(DX) RET - // In child, on new stack. - MOVL SI, SP - // Paranoia: check that SP is as we expect. MOVL 12(SP), BP CMPL BP, $1234 JEQ 2(PC) INT $3 - // Reload registers + // Reload registers. MOVL 0(SP), BX // m MOVL 4(SP), DX // g MOVL 8(SP), SI // fn - // Initialize m->procid to thread ID - MOVL $299, AX // sys_getthrid - INT $0x80 - MOVL AX, m_procid(BX) - - // Set FS to point at m->tls + // Set FS to point at m->tls. LEAL m_tls(BX), BP PUSHAL // save registers PUSHL BP @@ -246,12 +261,12 @@ TEXT runtime·rfork_thread(SB),7,$8 MOVL 0(DX), DX // paranoia; check they are not nil MOVL 0(BX), BX - // more paranoia; check that stack splitting code works + // More paranoia; check that stack splitting code works. PUSHAL CALL runtime·emptyfunc(SB) POPAL - // Call fn + // Call fn. CALL SI CALL runtime·exit1(SB) @@ -268,25 +283,23 @@ TEXT runtime·sigaltstack(SB),7,$-8 INT $3 RET -TEXT runtime·setldt(SB),7,$8 +TEXT runtime·setldt(SB),7,$4 // Under OpenBSD we set the GS base instead of messing with the LDT. - MOVL 16(SP), AX // tls0 + MOVL tls0+4(FP), AX MOVL AX, 0(SP) CALL runtime·settls(SB) RET -TEXT runtime·settls(SB),7,$16 +TEXT runtime·settls(SB),7,$8 // adjust for ELF: wants to use -8(GS) and -4(GS) for g and m - MOVL 20(SP), CX + MOVL tlsbase+0(FP), CX ADDL $8, CX - MOVL CX, 0(CX) MOVL $0, 0(SP) // syscall gap - MOVL $9, 4(SP) // I386_SET_GSBASE (machine/sysarch.h) - MOVL CX, 8(SP) // pointer to base - MOVL $165, AX // sys_sysarch + MOVL CX, 4(SP) // arg 1 - tcb + MOVL $329, AX // sys___set_tcb INT $0x80 JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·osyield(SB),7,$-4 @@ -295,12 +308,12 @@ TEXT runtime·osyield(SB),7,$-4 RET TEXT runtime·thrsleep(SB),7,$-4 - MOVL $300, AX // sys_thrsleep + MOVL $300, AX // sys___thrsleep INT $0x80 RET TEXT runtime·thrwakeup(SB),7,$-4 - MOVL $301, AX // sys_thrwakeup + MOVL $301, AX // sys___thrwakeup INT $0x80 RET diff --git a/src/pkg/runtime/sys_openbsd_amd64.s b/src/pkg/runtime/sys_openbsd_amd64.s index d2d48e6b5..8a736507f 100644 --- a/src/pkg/runtime/sys_openbsd_amd64.s +++ b/src/pkg/runtime/sys_openbsd_amd64.s @@ -8,20 +8,20 @@ #include "zasm_GOOS_GOARCH.h" -// int64 rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -TEXT runtime·rfork_thread(SB),7,$0 - MOVL flags+8(SP), DI - MOVQ stack+16(SP), SI +// int64 tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void)); +TEXT runtime·tfork(SB),7,$32 - // Copy m, g, fn off parent stack for use by child. - MOVQ mm+24(SP), R8 - MOVQ gg+32(SP), R9 - MOVQ fn+40(SP), R12 + // Copy mp, gp and fn off parent stack for use by child. + MOVQ mm+16(FP), R8 + MOVQ gg+24(FP), R9 + MOVQ fn+32(FP), R12 - MOVL $251, AX // sys_rfork + MOVQ param+0(FP), DI + MOVQ psize+8(FP), SI + MOVL $8, AX // sys___tfork SYSCALL - // Return if rfork syscall failed + // Return if tfork syscall failed. JCC 3(PC) NEGL AX RET @@ -31,19 +31,11 @@ TEXT runtime·rfork_thread(SB),7,$0 JEQ 2(PC) RET - // In child, on new stack. - MOVQ SI, SP - - // Initialize m->procid to thread ID - MOVL $299, AX // sys_getthrid - SYSCALL - MOVQ AX, m_procid(R8) - // Set FS to point at m->tls. LEAQ m_tls(R8), DI CALL runtime·settls(SB) - // In child, set up new stack + // In child, set up new stack. get_tls(CX) MOVQ R8, m(CX) MOVQ R9, g(CX) @@ -53,12 +45,13 @@ TEXT runtime·rfork_thread(SB),7,$0 CALL R12 // It shouldn't return. If it does, exit - MOVL $302, AX // sys_threxit + MOVQ $0, DI // arg 1 - notdead + MOVL $302, AX // sys___threxit SYSCALL JMP -3(PC) // keep exiting TEXT runtime·osyield(SB),7,$0 - MOVL $298, AX // sys_sched_yield + MOVL $298, AX // sys_sched_yield SYSCALL RET @@ -67,14 +60,15 @@ TEXT runtime·thrsleep(SB),7,$0 MOVL 16(SP), SI // arg 2 - clock_id MOVQ 24(SP), DX // arg 3 - tp MOVQ 32(SP), R10 // arg 4 - lock - MOVL $300, AX // sys_thrsleep + MOVQ 40(SP), R8 // arg 5 - abort + MOVL $300, AX // sys___thrsleep SYSCALL RET TEXT runtime·thrwakeup(SB),7,$0 MOVQ 8(SP), DI // arg 1 - ident MOVL 16(SP), SI // arg 2 - n - MOVL $301, AX // sys_thrwakeup + MOVL $301, AX // sys___thrwakeup SYSCALL RET @@ -83,13 +77,14 @@ TEXT runtime·exit(SB),7,$-8 MOVL 8(SP), DI // arg 1 - exit status MOVL $1, AX // sys_exit SYSCALL - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·exit1(SB),7,$-8 - MOVL $302, AX // sys_threxit + MOVQ $0, DI // arg 1 - notdead + MOVL $302, AX // sys___threxit SYSCALL - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·write(SB),7,$-8 @@ -135,31 +130,29 @@ TEXT runtime·setitimer(SB),7,$-8 // func now() (sec int64, nsec int32) TEXT time·now(SB), 7, $32 - LEAQ 8(SP), DI // arg 1 - tp - MOVQ $0, SI // arg 2 - tzp - MOVL $116, AX // sys_gettimeofday + MOVQ $0, DI // arg 1 - clock_id + LEAQ 8(SP), SI // arg 2 - tp + MOVL $232, AX // sys_clock_gettime SYSCALL - MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVL 8(SP), AX // sec + MOVQ 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX MOVQ AX, sec+0(FP) - IMULQ $1000, DX MOVL DX, nsec+8(FP) RET TEXT runtime·nanotime(SB),7,$32 - LEAQ 8(SP), DI // arg 1 - tp - MOVQ $0, SI // arg 2 - tzp - MOVL $116, AX // sys_gettimeofday + MOVQ $0, DI // arg 1 - clock_id + LEAQ 8(SP), SI // arg 2 - tp + MOVL $232, AX // sys_clock_gettime SYSCALL - MOVQ 8(SP), AX // sec - MOVL 16(SP), DX // usec + MOVL 8(SP), AX // sec + MOVQ 16(SP), DX // nsec - // sec is in AX, usec in DX + // sec is in AX, nsec in DX // return nsec in AX IMULQ $1000000000, AX - IMULQ $1000, DX ADDQ DX, AX RET @@ -170,7 +163,17 @@ TEXT runtime·sigaction(SB),7,$-8 MOVL $46, AX SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·sigprocmask(SB),7,$0 + MOVL 8(SP), DI // arg 1 - how + MOVL 12(SP), SI // arg 2 - set + MOVL $48, AX // sys_sigprocmask + SYSCALL + JCC 2(PC) + MOVL $0xf1, 0xf1 // crash + MOVL AX, oset+0(FP) // Return oset RET TEXT runtime·sigtramp(SB),7,$64 @@ -179,8 +182,10 @@ TEXT runtime·sigtramp(SB),7,$64 // check that m exists MOVQ m(BX), BP CMPQ BP, $0 - JNE 2(PC) + JNE 4(PC) + MOVQ DI, 0(SP) CALL runtime·badsignal(SB) + RET // save g MOVQ g(BX), R10 @@ -226,7 +231,16 @@ TEXT runtime·munmap(SB),7,$0 MOVL $73, AX // sys_munmap SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash + RET + +TEXT runtime·madvise(SB),7,$0 + MOVQ addr+0(FP), DI // arg 1 - addr + MOVQ len+8(FP), SI // arg 2 - len + MOVQ behav+16(FP), DX // arg 3 - behav + MOVQ $75, AX // sys_madvise + SYSCALL + // ignore failure - maybe pages are locked RET TEXT runtime·sigaltstack(SB),7,$-8 @@ -235,20 +249,17 @@ TEXT runtime·sigaltstack(SB),7,$-8 MOVQ $288, AX // sys_sigaltstack SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET // set tls base to DI -TEXT runtime·settls(SB),7,$8 +TEXT runtime·settls(SB),7,$0 // adjust for ELF: wants to use -16(FS) and -8(FS) for g and m ADDQ $16, DI - MOVQ DI, 0(SP) - MOVQ SP, SI - MOVQ $12, DI // AMD64_SET_FSBASE (machine/sysarch.h) - MOVQ $165, AX // sys_sysarch + MOVQ $329, AX // sys___settcb SYSCALL JCC 2(PC) - MOVL $0xf1, 0xf1 // crash + MOVL $0xf1, 0xf1 // crash RET TEXT runtime·sysctl(SB),7,$0 @@ -260,7 +271,7 @@ TEXT runtime·sysctl(SB),7,$0 MOVQ 48(SP), R9 // arg 6 - newlen MOVQ $202, AX // sys___sysctl SYSCALL - JCC 3(PC) + JCC 3(PC) NEGL AX RET MOVL $0, AX diff --git a/src/pkg/runtime/sys_plan9_386.s b/src/pkg/runtime/sys_plan9_386.s index 94c36aa41..3385b083a 100644 --- a/src/pkg/runtime/sys_plan9_386.s +++ b/src/pkg/runtime/sys_plan9_386.s @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "defs_GOOS_GOARCH.h" #include "zasm_GOOS_GOARCH.h" // setldt(int entry, int address, int limit) @@ -24,9 +23,19 @@ TEXT runtime·pwrite(SB),7,$0 INT $64 RET +TEXT runtime·seek(SB),7,$0 + MOVL $39, AX + INT $64 + CMPL AX, $-1 + JNE 4(PC) + MOVL a+0(FP), CX + MOVL AX, 0(CX) + MOVL AX, 4(CX) + RET + TEXT runtime·close(SB),7,$0 MOVL $4, AX - INT $64 + INT $64 RET TEXT runtime·exits(SB),7,$0 @@ -48,6 +57,21 @@ TEXT runtime·plan9_semacquire(SB),7,$0 MOVL $37, AX INT $64 RET + +TEXT runtime·plan9_tsemacquire(SB),7,$0 + MOVL $52, AX + INT $64 + RET + +TEXT runtime·notify(SB),7,$0 + MOVL $28, AX + INT $64 + RET + +TEXT runtime·noted(SB),7,$0 + MOVL $29, AX + INT $64 + RET TEXT runtime·plan9_semrelease(SB),7,$0 MOVL $38, AX @@ -77,9 +101,8 @@ TEXT runtime·rfork(SB),7,$0 MOVL DX, g(AX) MOVL BX, m(AX) - // Initialize AX from _tos->pid - MOVL _tos(SB), AX - MOVL tos_pid(AX), AX + // Initialize AX from TOS struct. + MOVL procid(AX), AX MOVL AX, m_procid(BX) // save pid as m->procid CALL runtime·stackcheck(SB) // smashes AX, CX @@ -95,3 +118,55 @@ TEXT runtime·rfork(SB),7,$0 CALL SI // fn() CALL runtime·exit(SB) RET + +// void sigtramp(void *ureg, int8 *note) +TEXT runtime·sigtramp(SB),7,$0 + get_tls(AX) + + // check that m exists + MOVL m(AX), BX + CMPL BX, $0 + JNE 3(PC) + CALL runtime·badsignal(SB) // will exit + RET + + // save args + MOVL ureg+4(SP), CX + MOVL note+8(SP), DX + + // change stack + MOVL m_gsignal(BX), BP + MOVL g_stackbase(BP), BP + MOVL BP, SP + + // make room for args and g + SUBL $16, SP + + // save g + MOVL g(AX), BP + MOVL BP, 12(SP) + + // g = m->gsignal + MOVL m_gsignal(BX), DI + MOVL DI, g(AX) + + // load args and call sighandler + MOVL CX, 0(SP) + MOVL DX, 4(SP) + MOVL BP, 8(SP) + + CALL runtime·sighandler(SB) + + // restore g + get_tls(BX) + MOVL 12(SP), BP + MOVL BP, g(BX) + + // call noted(AX) + MOVL AX, 0(SP) + CALL runtime·noted(SB) + RET + +// Only used by the 64-bit runtime. +TEXT runtime·setfpmasks(SB),7,$0 + RET diff --git a/src/pkg/runtime/sys_plan9_amd64.s b/src/pkg/runtime/sys_plan9_amd64.s new file mode 100644 index 000000000..b34f98a68 --- /dev/null +++ b/src/pkg/runtime/sys_plan9_amd64.s @@ -0,0 +1,208 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "zasm_GOOS_GOARCH.h" + +// setldt(int entry, int address, int limit) +TEXT runtime·setldt(SB),7,$0 + RET + +TEXT runtime·open(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $14, BP + SYSCALL + RET + +TEXT runtime·pread(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $50, BP + SYSCALL + RET + +TEXT runtime·pwrite(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $51, BP + SYSCALL + RET + +// int32 _seek(int64*, int32, int64, int32) +TEXT _seek<>(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $39, BP + SYSCALL + RET + +// int64 seek(int32, int64, int32) +TEXT runtime·seek(SB),7,$56 + LEAQ new+48(SP), CX + MOVQ CX, 0(SP) + MOVQ fd+0(FP), CX + MOVQ CX, 8(SP) + MOVQ off+8(FP), CX + MOVQ CX, 16(SP) + MOVQ whence+16(FP), CX + MOVQ CX, 24(SP) + CALL _seek<>(SB) + CMPL AX, $0 + JGE 2(PC) + MOVQ $-1, new+48(SP) + MOVQ new+48(SP), AX + RET + +TEXT runtime·close(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $4, BP + SYSCALL + RET + +TEXT runtime·exits(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $8, BP + SYSCALL + RET + +TEXT runtime·brk_(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $24, BP + SYSCALL + RET + +TEXT runtime·sleep(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $17, BP + SYSCALL + RET + +TEXT runtime·plan9_semacquire(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $37, BP + SYSCALL + RET + +TEXT runtime·plan9_tsemacquire(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $52, BP + SYSCALL + RET + +TEXT runtime·notify(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $28, BP + SYSCALL + RET + +TEXT runtime·noted(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $29, BP + SYSCALL + RET + +TEXT runtime·plan9_semrelease(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $38, BP + SYSCALL + RET + +TEXT runtime·nanotime(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $60, BP + SYSCALL + RET + +TEXT runtime·rfork(SB),7,$0 + MOVQ $0x8000, AX + MOVQ $19, BP // rfork + SYSCALL + + // In parent, return. + CMPQ AX, $0 + JEQ 2(PC) + RET + + // In child on forked stack. + MOVQ mm+24(SP), BX // m + MOVQ gg+32(SP), DX // g + MOVQ fn+40(SP), SI // fn + + // set SP to be on the new child stack + MOVQ stack+16(SP), CX + MOVQ CX, SP + + // Initialize m, g. + get_tls(AX) + MOVQ DX, g(AX) + MOVQ BX, m(AX) + + // Initialize AX from pid in TLS. + MOVQ procid(AX), AX + MOVQ AX, m_procid(BX) // save pid as m->procid + + CALL runtime·stackcheck(SB) // smashes AX, CX + + MOVQ 0(DX), DX // paranoia; check they are not nil + MOVQ 0(BX), BX + + CALL SI // fn() + CALL runtime·exit(SB) + RET + +// This is needed by asm_amd64.s +TEXT runtime·settls(SB),7,$0 + RET + +// void sigtramp(void *ureg, int8 *note) +TEXT runtime·sigtramp(SB),7,$0 + get_tls(AX) + + // check that m exists + MOVQ m(AX), BX + CMPQ BX, $0 + JNE 3(PC) + CALL runtime·badsignal(SB) // will exit + RET + + // save args + MOVQ ureg+8(SP), CX + MOVQ note+16(SP), DX + + // change stack + MOVQ m_gsignal(BX), R10 + MOVQ g_stackbase(R10), BP + MOVQ BP, SP + + // make room for args and g + SUBQ $32, SP + + // save g + MOVQ g(AX), BP + MOVQ BP, 24(SP) + + // g = m->gsignal + MOVQ R10, g(AX) + + // load args and call sighandler + MOVQ CX, 0(SP) + MOVQ DX, 8(SP) + MOVQ BP, 16(SP) + + CALL runtime·sighandler(SB) + + // restore g + get_tls(BX) + MOVQ 24(SP), R10 + MOVQ R10, g(BX) + + // call noted(AX) + MOVQ AX, 0(SP) + CALL runtime·noted(SB) + RET + +TEXT runtime·setfpmasks(SB),7,$8 + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL $(0x3F<<7), AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff --git a/src/pkg/runtime/sys_windows_386.s b/src/pkg/runtime/sys_windows_386.s index d5646bfea..ca59f0a1d 100644 --- a/src/pkg/runtime/sys_windows_386.s +++ b/src/pkg/runtime/sys_windows_386.s @@ -216,7 +216,7 @@ TEXT runtime·callbackasm+0(SB),7,$0 CLD - CALL runtime·cgocallback(SB) + CALL runtime·cgocallback_gofunc(SB) POPL AX POPL CX @@ -243,11 +243,6 @@ TEXT runtime·tstart(SB),7,$0 MOVL newm+4(SP), CX // m MOVL m_g0(CX), DX // g - // Set up SEH frame - PUSHL $runtime·sigtramp(SB) - PUSHL 0(FS) - MOVL SP, 0(FS) - // Layout new m scheduler stack on os stack. MOVL SP, AX MOVL AX, g_stackbase(DX) @@ -264,14 +259,8 @@ TEXT runtime·tstart(SB),7,$0 CLD CALL runtime·stackcheck(SB) // clobbers AX,CX - CALL runtime·mstart(SB) - // Pop SEH frame - MOVL 0(FS), SP - POPL 0(FS) - POPL CX - RET // uint32 tstart_stdcall(M *newm); @@ -296,3 +285,32 @@ TEXT runtime·setldt(SB),7,$0 MOVL address+4(FP), CX MOVL CX, 0x14(FS) RET + +// void install_exception_handler() +TEXT runtime·install_exception_handler(SB),7,$0 + get_tls(CX) + MOVL m(CX), CX // m + + // Set up SEH frame + MOVL m_seh(CX), DX + MOVL $runtime·sigtramp(SB), AX + MOVL AX, seh_handler(DX) + MOVL 0(FS), AX + MOVL AX, seh_prev(DX) + + // Install it + MOVL DX, 0(FS) + + RET + +// void remove_exception_handler() +TEXT runtime·remove_exception_handler(SB),7,$0 + get_tls(CX) + MOVL m(CX), CX // m + + // Remove SEH frame + MOVL m_seh(CX), DX + MOVL seh_prev(DX), AX + MOVL AX, 0(FS) + + RET diff --git a/src/pkg/runtime/sys_windows_amd64.s b/src/pkg/runtime/sys_windows_amd64.s index 11909cda2..fe88f3b75 100644 --- a/src/pkg/runtime/sys_windows_amd64.s +++ b/src/pkg/runtime/sys_windows_amd64.s @@ -272,13 +272,13 @@ TEXT runtime·callbackasm(SB),7,$0 MOVQ R15, 0(SP) // prepare call stack. use SUBQ to hide from stack frame checks - // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) + // cgocallback(Go func, void *frame, uintptr framesize) SUBQ $24, SP MOVQ DX, 16(SP) // uintptr framesize MOVQ CX, 8(SP) // void *frame - MOVQ AX, 0(SP) // void (*fn)(void*) + MOVQ AX, 0(SP) // Go func CLD - CALL runtime·cgocallback(SB) + CALL runtime·cgocallback_gofunc(SB) MOVQ 0(SP), AX MOVQ 8(SP), CX MOVQ 16(SP), DX @@ -328,7 +328,6 @@ TEXT runtime·tstart_stdcall(SB),7,$0 // Someday the convention will be D is always cleared. CLD - CALL runtime·setstacklimits(SB) CALL runtime·stackcheck(SB) // clobbers AX,CX CALL runtime·mstart(SB) @@ -337,6 +336,13 @@ TEXT runtime·tstart_stdcall(SB),7,$0 // set tls base to DI TEXT runtime·settls(SB),7,$0 - CALL runtime·setstacklimits(SB) MOVQ DI, 0x28(GS) RET + +// void install_exception_handler() +TEXT runtime·install_exception_handler(SB),7,$0 + CALL runtime·setstacklimits(SB) + RET + +TEXT runtime·remove_exception_handler(SB),7,$0 + RET diff --git a/src/pkg/runtime/syscall_windows_test.go b/src/pkg/runtime/syscall_windows_test.go index c8327fdef..f04d2cd54 100644 --- a/src/pkg/runtime/syscall_windows_test.go +++ b/src/pkg/runtime/syscall_windows_test.go @@ -112,9 +112,10 @@ func Test64BitReturnStdCall(t *testing.T) { func TestCDecl(t *testing.T) { var buf [50]byte + fmtp, _ := syscall.BytePtrFromString("%d %d %d") a, _, _ := GetDLL(t, "user32.dll").Proc("wsprintfA").Call( uintptr(unsafe.Pointer(&buf[0])), - uintptr(unsafe.Pointer(syscall.StringBytePtr("%d %d %d"))), + uintptr(unsafe.Pointer(fmtp)), 1000, 2000, 3000) if string(buf[:a]) != "1000 2000 3000" { t.Error("cdecl USER32.wsprintfA returns", a, "buf=", buf[:a]) diff --git a/src/pkg/runtime/thread_darwin.c b/src/pkg/runtime/thread_darwin.c index 6a83e48a3..adb1ffe6a 100644 --- a/src/pkg/runtime/thread_darwin.c +++ b/src/pkg/runtime/thread_darwin.c @@ -9,8 +9,8 @@ extern SigTab runtime·sigtab[]; -static Sigset sigset_all = ~(Sigset)0; static Sigset sigset_none; +static Sigset sigset_all = ~(Sigset)0; static Sigset sigset_prof = 1<<(SIGPROF-1); static void @@ -50,11 +50,8 @@ runtime·semacreate(void) void runtime·osinit(void) { - // Register our thread-creation callback (see sys_darwin_{amd64,386}.s) - // but only if we're not using cgo. If we are using cgo we need - // to let the C pthread libary install its own thread-creation callback. - if(!runtime·iscgo) - runtime·bsdthread_register(); + // bsdthread_register delayed until end of goenvs so that we + // can look at the environment first. // Use sysctl to fetch hw.ncpu. uint32 mib[2]; @@ -75,22 +72,34 @@ void runtime·goenvs(void) { runtime·goenvs_unix(); + + // Register our thread-creation callback (see sys_darwin_{amd64,386}.s) + // but only if we're not using cgo. If we are using cgo we need + // to let the C pthread libary install its own thread-creation callback. + if(!runtime·iscgo) { + if(runtime·bsdthread_register() != 0) { + if(runtime·getenv("DYLD_INSERT_LIBRARIES")) + runtime·throw("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)"); + runtime·throw("runtime: bsdthread_register error"); + } + } + } void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { int32 errno; Sigset oset; - m->tls[0] = m->id; // so 386 asm can find it + mp->tls[0] = mp->id; // so 386 asm can find it if(0){ - runtime·printf("newosproc stk=%p m=%p g=%p fn=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, m->id, m->tls[0], &m); + runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp); } runtime·sigprocmask(SIG_SETMASK, &sigset_all, &oset); - errno = runtime·bsdthread_create(stk, m, g, fn); + errno = runtime·bsdthread_create(stk, mp, mp->g0, runtime·mstart); runtime·sigprocmask(SIG_SETMASK, &oset, nil); if(errno < 0) { @@ -100,17 +109,30 @@ runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) } // Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { // Initialize signal handling. - m->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K - runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); + runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); - if(m->profilehz > 0) - runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil); - else - runtime·sigprocmask(SIG_SETMASK, &sigset_prof, nil); + runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil); + runtime·setprof(m->profilehz > 0); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·signalstack(nil, 0); } // Mach IPC, to get at semaphores @@ -431,10 +453,11 @@ runtime·sigpanic(void) runtime·panicstring(runtime·sigtab[g->sig].name); } -// TODO(rsc): place holder to fix build. +#pragma textflag 7 void runtime·osyield(void) { + runtime·usleep(1); } uintptr @@ -488,12 +511,22 @@ runtime·badcallback(void) runtime·write(2, badcallback, sizeof badcallback - 1); } -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; +static int8 badsignal[] = "runtime: signal received on thread not created by Go: "; // This runs on a foreign stack, without an m or a g. No stack split. #pragma textflag 7 void -runtime·badsignal(void) +runtime·badsignal(int32 sig) { + if (sig == SIGPROF) { + return; // Ignore SIGPROFs intended for a non-Go thread. + } runtime·write(2, badsignal, sizeof badsignal - 1); + if (0 <= sig && sig < NSIG) { + // Call runtime·findnull dynamically to circumvent static stack size check. + static int32 (*findnull)(byte*) = runtime·findnull; + runtime·write(2, runtime·sigtab[sig].name, findnull((byte*)runtime·sigtab[sig].name)); + } + runtime·write(2, "\n", 1); + runtime·exit(1); } diff --git a/src/pkg/runtime/thread_freebsd.c b/src/pkg/runtime/thread_freebsd.c index 4c546178f..3ae14ee0a 100644 --- a/src/pkg/runtime/thread_freebsd.c +++ b/src/pkg/runtime/thread_freebsd.c @@ -13,8 +13,8 @@ extern int32 runtime·sys_umtx_op(uint32*, int32, uint32, void*, void*); #define CTL_HW 6 #define HW_NCPU 3 +static Sigset sigset_none; static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, }; -static Sigset sigset_none = { 0, 0, 0, 0, }; static int32 getncpu(void) @@ -77,32 +77,33 @@ runtime·futexwakeup(uint32 *addr, uint32 cnt) void runtime·thr_start(void*); void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { ThrParam param; Sigset oset; - USED(fn); // thr_start assumes fn == mstart - USED(g); // thr_start assumes g == m->g0 - if(0){ - runtime·printf("newosproc stk=%p m=%p g=%p fn=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, m->id, m->tls[0], &m); + runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp); } runtime·sigprocmask(&sigset_all, &oset); runtime·memclr((byte*)¶m, sizeof param); param.start_func = runtime·thr_start; - param.arg = m; - param.stack_base = (int8*)g->stackbase; - param.stack_size = (byte*)stk - (byte*)g->stackbase; - param.child_tid = (intptr*)&m->procid; + param.arg = (byte*)mp; + + // NOTE(rsc): This code is confused. stackbase is the top of the stack + // and is equal to stk. However, it's working, so I'm not changing it. + param.stack_base = (void*)mp->g0->stackbase; + param.stack_size = (byte*)stk - (byte*)mp->g0->stackbase; + + param.child_tid = (intptr*)&mp->procid; param.parent_tid = nil; - param.tls_base = (int8*)&m->tls[0]; - param.tls_size = sizeof m->tls; + param.tls_base = (void*)&mp->tls[0]; + param.tls_size = sizeof mp->tls; - m->tls[0] = m->id; // so 386 asm can find it + mp->tls[0] = mp->id; // so 386 asm can find it runtime·thr_new(¶m, sizeof param); runtime·sigprocmask(&oset, nil); @@ -121,15 +122,30 @@ runtime·goenvs(void) } // Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + mp->gsignal = runtime·malg(32*1024); +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { // Initialize signal handling - m->gsignal = runtime·malg(32*1024); - runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); + runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); runtime·sigprocmask(&sigset_none, nil); } +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·signalstack(nil, 0); +} + void runtime·sigpanic(void) { @@ -206,12 +222,22 @@ runtime·badcallback(void) runtime·write(2, badcallback, sizeof badcallback - 1); } -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; +static int8 badsignal[] = "runtime: signal received on thread not created by Go: "; // This runs on a foreign stack, without an m or a g. No stack split. #pragma textflag 7 void -runtime·badsignal(void) +runtime·badsignal(int32 sig) { + if (sig == SIGPROF) { + return; // Ignore SIGPROFs intended for a non-Go thread. + } runtime·write(2, badsignal, sizeof badsignal - 1); + if (0 <= sig && sig < NSIG) { + // Call runtime·findnull dynamically to circumvent static stack size check. + static int32 (*findnull)(byte*) = runtime·findnull; + runtime·write(2, runtime·sigtab[sig].name, findnull((byte*)runtime·sigtab[sig].name)); + } + runtime·write(2, "\n", 1); + runtime·exit(1); } diff --git a/src/pkg/runtime/thread_linux.c b/src/pkg/runtime/thread_linux.c index 858be7036..78ddef878 100644 --- a/src/pkg/runtime/thread_linux.c +++ b/src/pkg/runtime/thread_linux.c @@ -13,8 +13,8 @@ int32 runtime·open(uint8*, int32, int32); int32 runtime·close(int32); int32 runtime·read(int32, void*, int32); -static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 }; static Sigset sigset_none; +static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 }; // Linux futex. // @@ -80,33 +80,23 @@ runtime·futexwakeup(uint32 *addr, uint32 cnt) *(int32*)0x1006 = 0x1006; } +extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf); static int32 getproccount(void) { - int32 fd, rd, cnt, cpustrlen; - byte *cpustr, *pos, *bufpos; - byte buf[256]; + uintptr buf[16], t; + int32 r, cnt, i; - fd = runtime·open((byte*)"/proc/stat", O_RDONLY|O_CLOEXEC, 0); - if(fd == -1) - return 1; cnt = 0; - bufpos = buf; - cpustr = (byte*)"\ncpu"; - cpustrlen = runtime·findnull(cpustr); - for(;;) { - rd = runtime·read(fd, bufpos, sizeof(buf)-cpustrlen); - if(rd == -1) - break; - bufpos[rd] = 0; - for(pos=buf; pos=runtime·strstr(pos, cpustr); cnt++, pos++) { - } - if(rd < cpustrlen) - break; - runtime·memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1); - bufpos = buf+cpustrlen-1; + r = runtime·sched_getaffinity(0, sizeof(buf), buf); + if(r > 0) + for(i = 0; i < r/sizeof(buf[0]); i++) { + t = buf[i]; + t = t - ((t >> 1) & 0x5555555555555555ULL); + t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL); + cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); } - runtime·close(fd); + return cnt ? cnt : 1; } @@ -134,7 +124,7 @@ enum }; void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { int32 ret; int32 flags; @@ -150,16 +140,16 @@ runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) | CLONE_THREAD /* revisit - okay for now */ ; - m->tls[0] = m->id; // so 386 asm can find it + mp->tls[0] = mp->id; // so 386 asm can find it if(0){ - runtime·printf("newosproc stk=%p m=%p g=%p fn=%p clone=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, runtime·clone, m->id, m->tls[0], &m); + runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp); } // Disable signals during clone, so that the new thread starts // with signals disabled. It will enable them in minit. runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset); - ret = runtime·clone(flags, stk, m, g, fn); + ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart); runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset); if(ret < 0) { @@ -181,13 +171,28 @@ runtime·goenvs(void) } // Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + mp->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { // Initialize signal handling. - m->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K - runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); - runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof sigset_none); + runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); + runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset)); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·signalstack(nil, 0); } void @@ -266,12 +271,22 @@ runtime·badcallback(void) runtime·write(2, badcallback, sizeof badcallback - 1); } -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; +static int8 badsignal[] = "runtime: signal received on thread not created by Go: "; // This runs on a foreign stack, without an m or a g. No stack split. #pragma textflag 7 void -runtime·badsignal(void) +runtime·badsignal(int32 sig) { + if (sig == SIGPROF) { + return; // Ignore SIGPROFs intended for a non-Go thread. + } runtime·write(2, badsignal, sizeof badsignal - 1); + if (0 <= sig && sig < NSIG) { + // Call runtime·findnull dynamically to circumvent static stack size check. + static int32 (*findnull)(byte*) = runtime·findnull; + runtime·write(2, runtime·sigtab[sig].name, findnull((byte*)runtime·sigtab[sig].name)); + } + runtime·write(2, "\n", 1); + runtime·exit(1); } diff --git a/src/pkg/runtime/thread_netbsd.c b/src/pkg/runtime/thread_netbsd.c index 1b2df85cd..f333c6dd8 100644 --- a/src/pkg/runtime/thread_netbsd.c +++ b/src/pkg/runtime/thread_netbsd.c @@ -11,7 +11,7 @@ enum ESRCH = 3, ENOTSUP = 91, - // From NetBSD's sys/time.h + // From NetBSD's <sys/time.h> CLOCK_REALTIME = 0, CLOCK_VIRTUAL = 1, CLOCK_PROF = 2, @@ -20,9 +20,15 @@ enum extern SigTab runtime·sigtab[]; -extern int64 runtime·rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -extern int32 runtime·thrsleep(void *ident, int32 clock_id, void *tsp, void *lock); -extern int32 runtime·thrwakeup(void *ident, int32 n); +static Sigset sigset_none; +static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, }; + +extern void runtime·getcontext(UcontextT *context); +extern int32 runtime·lwp_create(UcontextT *context, uintptr flags, void *lwpid); +extern void runtime·lwp_mcontext_init(void *mc, void *stack, M *mp, G *gp, void (*fn)(void)); +extern int32 runtime·lwp_park(Timespec *abstime, int32 unpark, void *hint, void *unparkhint); +extern int32 runtime·lwp_unpark(int32 lwp, void *hint); +extern int32 runtime·lwp_self(void); // From NetBSD's <sys/sysctl.h> #define CTL_HW 6 @@ -68,13 +74,30 @@ runtime·semasleep(int64 ns) if(m->waitsemacount == 0) { // sleep until semaphore != 0 or timeout. // thrsleep unlocks m->waitsemalock. - if(ns < 0) - runtime·thrsleep(&m->waitsemacount, 0, nil, &m->waitsemalock); - else { + if(ns < 0) { + // TODO(jsing) - potential deadlock! + // + // There is a potential deadlock here since we + // have to release the waitsemalock mutex + // before we call lwp_park() to suspend the + // thread. This allows another thread to + // release the lock and call lwp_unpark() + // before the thread is actually suspended. + // If this occurs the current thread will end + // up sleeping indefinitely. Unfortunately + // the NetBSD kernel does not appear to provide + // a mechanism for unlocking the userspace + // mutex once the thread is actually parked. + runtime·atomicstore(&m->waitsemalock, 0); + runtime·lwp_park(nil, 0, &m->waitsemacount, nil); + } else { ns += runtime·nanotime(); ts.tv_sec = ns/1000000000LL; ts.tv_nsec = ns%1000000000LL; - runtime·thrsleep(&m->waitsemacount, CLOCK_REALTIME, &ts, &m->waitsemalock); + // TODO(jsing) - potential deadlock! + // See above for details. + runtime·atomicstore(&m->waitsemalock, 0); + runtime·lwp_park(&ts, 0, &m->waitsemacount, nil); } // reacquire lock while(runtime·xchg(&m->waitsemalock, 1)) @@ -112,39 +135,41 @@ runtime·semawakeup(M *mp) while(runtime·xchg(&mp->waitsemalock, 1)) runtime·osyield(); mp->waitsemacount++; - ret = runtime·thrwakeup(&mp->waitsemacount, 1); + // TODO(jsing) - potential deadlock, see semasleep() for details. + // Confirm that LWP is parked before unparking... + ret = runtime·lwp_unpark(mp->procid, &mp->waitsemacount); if(ret != 0 && ret != ESRCH) runtime·printf("thrwakeup addr=%p sem=%d ret=%d\n", &mp->waitsemacount, mp->waitsemacount, ret); // spin-mutex unlock runtime·atomicstore(&mp->waitsemalock, 0); } -// From NetBSD's sys/param.h -#define RFPROC (1<<4) /* change child (else changes curproc) */ -#define RFMEM (1<<5) /* share `address space' */ -#define RFNOWAIT (1<<6) /* parent need not wait() on child */ -#define RFTHREAD (1<<13) /* create a thread, not a process */ - void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { - int32 flags; + UcontextT uc; int32 ret; - flags = RFPROC | RFTHREAD | RFMEM | RFNOWAIT; - - if (0) { + if(0) { runtime·printf( - "newosproc stk=%p m=%p g=%p fn=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, m->id, m->tls[0], &m); + "newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp); } - m->tls[0] = m->id; // so 386 asm can find it + mp->tls[0] = mp->id; // so 386 asm can find it + + runtime·getcontext(&uc); + + uc.uc_flags = _UC_SIGMASK | _UC_CPU; + uc.uc_link = nil; + uc.uc_sigmask = sigset_all; + + runtime·lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp->g0, runtime·mstart); - if((ret = runtime·rfork_thread(flags, stk, m, g, fn)) < 0) { + ret = runtime·lwp_create(&uc, 0, &mp->procid); + + if(ret < 0) { runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount() - 1, -ret); - if (ret == -ENOTSUP) - runtime·printf("runtime: is kern.rthreads disabled?\n"); runtime·throw("runtime.newosproc"); } } @@ -162,12 +187,30 @@ runtime·goenvs(void) } // Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + mp->gsignal = runtime·malg(32*1024); +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { + m->procid = runtime·lwp_self(); + // Initialize signal handling - m->gsignal = runtime·malg(32*1024); - runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); + runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); + runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·signalstack(nil, 0); } void @@ -224,12 +267,22 @@ runtime·badcallback(void) runtime·write(2, badcallback, sizeof badcallback - 1); } -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; +static int8 badsignal[] = "runtime: signal received on thread not created by Go: "; // This runs on a foreign stack, without an m or a g. No stack split. #pragma textflag 7 void -runtime·badsignal(void) +runtime·badsignal(int32 sig) { + if (sig == SIGPROF) { + return; // Ignore SIGPROFs intended for a non-Go thread. + } runtime·write(2, badsignal, sizeof badsignal - 1); + if (0 <= sig && sig < NSIG) { + // Call runtime·findnull dynamically to circumvent static stack size check. + static int32 (*findnull)(byte*) = runtime·findnull; + runtime·write(2, runtime·sigtab[sig].name, findnull((byte*)runtime·sigtab[sig].name)); + } + runtime·write(2, "\n", 1); + runtime·exit(1); } diff --git a/src/pkg/runtime/thread_openbsd.c b/src/pkg/runtime/thread_openbsd.c index d0f947210..700c48147 100644 --- a/src/pkg/runtime/thread_openbsd.c +++ b/src/pkg/runtime/thread_openbsd.c @@ -20,8 +20,11 @@ enum extern SigTab runtime·sigtab[]; -extern int64 runtime·rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void)); -extern int32 runtime·thrsleep(void *ident, int32 clock_id, void *tsp, void *lock); +static Sigset sigset_none; +static Sigset sigset_all = ~(Sigset)0; + +extern int64 runtime·tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void)); +extern int32 runtime·thrsleep(void *ident, int32 clock_id, void *tsp, void *lock, const int32 *abort); extern int32 runtime·thrwakeup(void *ident, int32 n); // From OpenBSD's <sys/sysctl.h> @@ -69,12 +72,12 @@ runtime·semasleep(int64 ns) // sleep until semaphore != 0 or timeout. // thrsleep unlocks m->waitsemalock. if(ns < 0) - runtime·thrsleep(&m->waitsemacount, 0, nil, &m->waitsemalock); + runtime·thrsleep(&m->waitsemacount, 0, nil, &m->waitsemalock, nil); else { ns += runtime·nanotime(); ts.tv_sec = ns/1000000000LL; ts.tv_nsec = ns%1000000000LL; - runtime·thrsleep(&m->waitsemacount, CLOCK_REALTIME, &ts, &m->waitsemalock); + runtime·thrsleep(&m->waitsemacount, CLOCK_REALTIME, &ts, &m->waitsemalock, nil); } // reacquire lock while(runtime·xchg(&m->waitsemalock, 1)) @@ -119,29 +122,30 @@ runtime·semawakeup(M *mp) runtime·atomicstore(&mp->waitsemalock, 0); } -// From OpenBSD's sys/param.h -#define RFPROC (1<<4) /* change child (else changes curproc) */ -#define RFMEM (1<<5) /* share `address space' */ -#define RFNOWAIT (1<<6) /* parent need not wait() on child */ -#define RFTHREAD (1<<13) /* create a thread, not a process */ - void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { - int32 flags; + Tfork param; + Sigset oset; int32 ret; - flags = RFPROC | RFTHREAD | RFMEM | RFNOWAIT; - - if (0) { + if(0) { runtime·printf( - "newosproc stk=%p m=%p g=%p fn=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, m->id, m->tls[0], &m); + "newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp); } - m->tls[0] = m->id; // so 386 asm can find it + mp->tls[0] = mp->id; // so 386 asm can find it + + param.tf_tcb = (byte*)&mp->tls[0]; + param.tf_tid = (int32*)&mp->procid; + param.tf_stack = stk; - if((ret = runtime·rfork_thread(flags, stk, m, g, fn)) < 0) { + oset = runtime·sigprocmask(SIG_SETMASK, sigset_all); + ret = runtime·tfork((byte*)¶m, sizeof(param), mp, mp->g0, runtime·mstart); + runtime·sigprocmask(SIG_SETMASK, oset); + + if(ret < 0) { runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount() - 1, -ret); if (ret == -ENOTSUP) runtime·printf("runtime: is kern.rthreads disabled?\n"); @@ -162,12 +166,28 @@ runtime·goenvs(void) } // Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + mp->gsignal = runtime·malg(32*1024); +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { // Initialize signal handling - m->gsignal = runtime·malg(32*1024); - runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); + runtime·signalstack((byte*)m->gsignal->stackguard - StackGuard, 32*1024); + runtime·sigprocmask(SIG_SETMASK, sigset_none); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·signalstack(nil, 0); } void @@ -224,12 +244,22 @@ runtime·badcallback(void) runtime·write(2, badcallback, sizeof badcallback - 1); } -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; +static int8 badsignal[] = "runtime: signal received on thread not created by Go: "; // This runs on a foreign stack, without an m or a g. No stack split. #pragma textflag 7 void -runtime·badsignal(void) +runtime·badsignal(int32 sig) { + if (sig == SIGPROF) { + return; // Ignore SIGPROFs intended for a non-Go thread. + } runtime·write(2, badsignal, sizeof badsignal - 1); + if (0 <= sig && sig < NSIG) { + // Call runtime·findnull dynamically to circumvent static stack size check. + static int32 (*findnull)(byte*) = runtime·findnull; + runtime·write(2, runtime·sigtab[sig].name, findnull((byte*)runtime·sigtab[sig].name)); + } + runtime·write(2, "\n", 1); + runtime·exit(1); } diff --git a/src/pkg/runtime/thread_plan9.c b/src/pkg/runtime/thread_plan9.c index 3b0dca69f..7f94623e7 100644 --- a/src/pkg/runtime/thread_plan9.c +++ b/src/pkg/runtime/thread_plan9.c @@ -7,12 +7,37 @@ #include "arch_GOARCH.h" int8 *goos = "plan9"; +extern SigTab runtime·sigtab[]; +int32 runtime·postnote(int32, int8*); + +// Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + // Initialize stack and goroutine for note handling. + mp->gsignal = runtime·malg(32*1024); + mp->notesig = (int8*)runtime·malloc(ERRMAX*sizeof(int8)); +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { + // Mask all SSE floating-point exceptions + // when running on the 64-bit kernel. + runtime·setfpmasks(); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ } + static int32 getproccount(void) { @@ -36,10 +61,30 @@ getproccount(void) return ncpu > 0 ? ncpu : 1; } +static int32 +getpid(void) +{ + byte b[20], *c; + int32 fd; + + runtime·memclr(b, sizeof(b)); + fd = runtime·open((byte*)"#c/pid", 0); + if(fd >= 0) { + runtime·read(fd, b, sizeof(b)); + runtime·close(fd); + } + c = b; + while(*c == ' ' || *c == '\t') + c++; + return runtime·atoi(c); +} + void runtime·osinit(void) { runtime·ncpu = getproccount(); + m->procid = getpid(); + runtime·notify(runtime·sigtramp); } void @@ -52,6 +97,7 @@ runtime·initsig(void) { } +#pragma textflag 7 void runtime·osyield(void) { @@ -69,34 +115,6 @@ runtime·usleep(uint32 µs) runtime·sleep(ms); } -int64 -runtime·nanotime(void) -{ - static int32 fd = -1; - byte b[8]; - uint32 hi, lo; - - // As long as all goroutines share the same file - // descriptor table we can get away with using - // just a static fd. Without a lock the file can - // be opened twice but that's okay. - // - // Using /dev/bintime gives us a latency on the - // order of ten microseconds between two calls. - // - // The naïve implementation (without the cached - // file descriptor) is roughly four times slower - // in 9vx on a 2.16 GHz Intel Core 2 Duo. - - if(fd < 0 && (fd = runtime·open((byte*)"/dev/bintime", OREAD|OCEXEC)) < 0) - return 0; - if(runtime·pread(fd, b, sizeof b, 0) != sizeof b) - return 0; - hi = b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3]; - lo = b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7]; - return (int64)hi<<32 | (int64)lo; -} - void time·now(int64 sec, int32 nsec) { @@ -109,48 +127,111 @@ time·now(int64 sec, int32 nsec) FLUSH(&nsec); } -extern Tos *_tos; void -runtime·exit(int32) +runtime·itoa(int32 n, byte *p, uint32 len) { - int32 fd; + byte *q, c; + uint32 i; + + if(len <= 1) + return; + + runtime·memclr(p, len); + q = p; + + if(n==0) { + *q++ = '0'; + USED(q); + return; + } + if(n < 0) { + *q++ = '-'; + p++; + n = -n; + } + for(i=0; n > 0 && i < len; i++) { + *q++ = '0' + (n%10); + n = n/10; + } + for(q--; q >= p; ) { + c = *p; + *p++ = *q; + *q-- = c; + } +} + +void +runtime·goexitsall(int8 *status) +{ + M *mp; + int32 pid; + + pid = getpid(); + for(mp=runtime·atomicloadp(&runtime·allm); mp; mp=mp->alllink) + if(mp->procid != pid) + runtime·postnote(mp->procid, status); +} + +int32 +runtime·postnote(int32 pid, int8* msg) +{ + int32 fd, len; uint8 buf[128]; uint8 tmp[16]; uint8 *p, *q; - int32 pid; runtime·memclr(buf, sizeof buf); - runtime·memclr(tmp, sizeof tmp); - pid = _tos->pid; - /* build path string /proc/pid/notepg */ - for(q=tmp; pid > 0;) { - *q++ = '0' + (pid%10); - pid = pid/10; - } + /* build path string /proc/pid/note */ + q = tmp; p = buf; + runtime·itoa(pid, tmp, sizeof tmp); runtime·memmove((void*)p, (void*)"/proc/", 6); - p += 6; - for(q--; q >= tmp;) - *p++ = *q--; - runtime·memmove((void*)p, (void*)"/notepg", 7); + for(p += 6; *p++ = *q++; ); + p--; + runtime·memmove((void*)p, (void*)"/note", 5); - /* post interrupt note */ fd = runtime·open(buf, OWRITE); - runtime·write(fd, "interrupt", 9); - runtime·exits(nil); + if(fd < 0) + return -1; + + len = runtime·findnull((byte*)msg); + if(runtime·write(fd, msg, len) != len) { + runtime·close(fd); + return -1; + } + runtime·close(fd); + return 0; } void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·exit(int32 e) { - m->tls[0] = m->id; // so 386 asm can find it + byte tmp[16]; + int8 *status; + + if(e == 0) + status = ""; + else { + /* build error string */ + runtime·itoa(e, tmp, sizeof tmp); + status = (int8*)tmp; + } + + runtime·goexitsall(status); + runtime·exits(status); +} + +void +runtime·newosproc(M *mp, void *stk) +{ + mp->tls[0] = mp->id; // so 386 asm can find it if(0){ - runtime·printf("newosproc stk=%p m=%p g=%p fn=%p rfork=%p id=%d/%d ostk=%p\n", - stk, m, g, fn, runtime·rfork, m->id, m->tls[0], &m); + runtime·printf("newosproc stk=%p m=%p g=%p rfork=%p id=%d/%d ostk=%p\n", + stk, mp, mp->g0, runtime·rfork, mp->id, (int32)mp->tls[0], &mp); } - if(runtime·rfork(RFPROC|RFMEM|RFNOWAIT, stk, m, g, fn) < 0) + if(runtime·rfork(RFPROC|RFMEM|RFNOWAIT, stk, mp, mp->g0, runtime·mstart) < 0) runtime·throw("newosproc: rfork failed"); } @@ -167,36 +248,18 @@ runtime·semasleep(int64 ns) int32 ms; if(ns >= 0) { - // TODO: Plan 9 needs a new system call, tsemacquire. - // The kernel implementation is the same as semacquire - // except with a tsleep and check for timeout. - // It would be great if the implementation returned the - // value that was added to the semaphore, so that on - // timeout the return value would be 0, on success 1. - // Then the error string does not have to be parsed - // to detect timeout. - // - // If a negative time indicates no timeout, then - // semacquire can be implemented (in the kernel) - // as tsemacquire(p, v, -1). - runtime·throw("semasleep: timed sleep not implemented on Plan 9"); - - /* - if(ns < 0) - ms = -1; - else if(ns/1000 > 0x7fffffffll) + if(ns/1000000 > 0x7fffffffll) ms = 0x7fffffff; else - ms = ns/1000; - ret = runtime·plan9_tsemacquire(&m->waitsemacount, 1, ms); + ms = ns/1000000; + ret = runtime·plan9_tsemacquire(&m->waitsemacount, ms); if(ret == 1) return 0; // success return -1; // timeout or interrupted - */ } while(runtime·plan9_semacquire(&m->waitsemacount, 1) < 0) { - /* interrupted; try again */ + /* interrupted; try again (c.f. lock_sema.c) */ } return 0; // success } @@ -213,15 +276,15 @@ os·sigpipe(void) runtime·throw("too many writes on closed pipe"); } -/* - * placeholder - once notes are implemented, - * a signal generating a panic must appear as - * a call to this function for correct handling by - * traceback. - */ void runtime·sigpanic(void) { + if(g->sigpc == 0) + runtime·panicstring("call of nil func value"); + runtime·panicstring(m->notesig); + + if(g->sig == 1 || g->sig == 2) + runtime·throw("fault"); } int32 @@ -266,4 +329,5 @@ void runtime·badsignal(void) { runtime·pwrite(2, badsignal, sizeof badsignal - 1, -1LL); + runtime·exits(badsignal); } diff --git a/src/pkg/runtime/thread_windows.c b/src/pkg/runtime/thread_windows.c index f684d3733..ae4e82e50 100644 --- a/src/pkg/runtime/thread_windows.c +++ b/src/pkg/runtime/thread_windows.c @@ -135,6 +135,7 @@ runtime·write(int32 fd, void *buf, int32 n) return written; } +#pragma textflag 7 void runtime·osyield(void) { @@ -186,28 +187,43 @@ runtime·semacreate(void) #define STACK_SIZE_PARAM_IS_A_RESERVATION ((uintptr)0x00010000) void -runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) +runtime·newosproc(M *mp, void *stk) { void *thandle; USED(stk); - USED(g); // assuming g = m->g0 - USED(fn); // assuming fn = mstart thandle = runtime·stdcall(runtime·CreateThread, 6, - nil, (uintptr)0x20000, runtime·tstart_stdcall, m, + nil, (uintptr)0x20000, runtime·tstart_stdcall, mp, STACK_SIZE_PARAM_IS_A_RESERVATION, nil); if(thandle == nil) { runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror()); runtime·throw("runtime.newosproc"); } - runtime·atomicstorep(&m->thread, thandle); + runtime·atomicstorep(&mp->thread, thandle); +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +void +runtime·mpreinit(M *mp) +{ + USED(mp); } // Called to initialize a new m (including the bootstrap m). +// Called on the new thread, can not allocate memory. void runtime·minit(void) { + runtime·install_exception_handler(); +} + +// Called from dropm to undo the effect of an minit. +void +runtime·unminit(void) +{ + runtime·remove_exception_handler(); } int64 diff --git a/src/pkg/runtime/time.goc b/src/pkg/runtime/time.goc index b18902f00..2babb173d 100644 --- a/src/pkg/runtime/time.goc +++ b/src/pkg/runtime/time.goc @@ -11,6 +11,7 @@ package time #include "os_GOOS.h" #include "arch_GOARCH.h" #include "malloc.h" +#include "race.h" static Timers timers; static void addtimer(Timer*); @@ -23,14 +24,16 @@ static bool deltimer(Timer*); // Sleep puts the current goroutine to sleep for at least ns nanoseconds. func Sleep(ns int64) { - g->status = Gwaiting; - g->waitreason = "sleep"; - runtime·tsleep(ns); + runtime·tsleep(ns, "sleep"); } // startTimer adds t to the timer heap. func startTimer(t *Timer) { + if(raceenabled) + runtime·racerelease(t); + runtime·lock(&timers); addtimer(t); + runtime·unlock(&timers); } // stopTimer removes t from the timer heap if it is there. @@ -54,27 +57,28 @@ ready(int64 now, Eface e) runtime·ready(e.data); } +static FuncVal readyv = {(void(*)(void))ready}; + // Put the current goroutine to sleep for ns nanoseconds. -// The caller must have set g->status and g->waitreason. void -runtime·tsleep(int64 ns) +runtime·tsleep(int64 ns, int8 *reason) { Timer t; - if(ns <= 0) { - g->status = Grunning; - g->waitreason = nil; + if(ns <= 0) return; - } t.when = runtime·nanotime() + ns; t.period = 0; - t.f = ready; + t.fv = &readyv; t.arg.data = g; + runtime·lock(&timers); addtimer(&t); - runtime·gosched(); + runtime·park(runtime·unlock, &timers, reason); } +static FuncVal timerprocv = {timerproc}; + // Add a timer to the heap and start or kick the timer proc // if the new timer is earlier than any of the others. static void @@ -83,7 +87,6 @@ addtimer(Timer *t) int32 n; Timer **nt; - runtime·lock(&timers); if(timers.len >= timers.cap) { // Grow slice. n = 16; @@ -109,9 +112,10 @@ addtimer(Timer *t) runtime·ready(timers.timerproc); } } - if(timers.timerproc == nil) - timers.timerproc = runtime·newproc1((byte*)timerproc, nil, 0, 0, addtimer); - runtime·unlock(&timers); + if(timers.timerproc == nil) { + timers.timerproc = runtime·newproc1(&timerprocv, nil, 0, 0, addtimer); + timers.timerproc->issystem = true; + } } // Delete timer t from the heap. @@ -182,26 +186,25 @@ timerproc(void) siftdown(0); t->i = -1; // mark as removed } - f = t->f; + f = (void*)t->fv->fn; arg = t->arg; runtime·unlock(&timers); + if(raceenabled) + runtime·raceacquire(t); f(now, arg); runtime·lock(&timers); } if(delta < 0) { // No timers left - put goroutine to sleep. timers.rescheduling = true; - g->status = Gwaiting; - g->waitreason = "timer goroutine (idle)"; - runtime·unlock(&timers); - runtime·gosched(); + runtime·park(runtime·unlock, &timers, "timer goroutine (idle)"); continue; } // At least one timer pending. Sleep until then. timers.sleeping = true; runtime·noteclear(&timers.waitnote); runtime·unlock(&timers); - runtime·entersyscall(); + runtime·entersyscallblock(); runtime·notetsleep(&timers.waitnote, delta); runtime·exitsyscall(); } diff --git a/src/pkg/runtime/time_plan9_386.c b/src/pkg/runtime/time_plan9_386.c new file mode 100644 index 000000000..a29d45715 --- /dev/null +++ b/src/pkg/runtime/time_plan9_386.c @@ -0,0 +1,34 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "os_GOOS.h" + +int64 +runtime·nanotime(void) +{ + static int32 fd = -1; + byte b[8]; + uint32 hi, lo; + + // As long as all goroutines share the same file + // descriptor table we can get away with using + // just a static fd. Without a lock the file can + // be opened twice but that's okay. + // + // Using /dev/bintime gives us a latency on the + // order of ten microseconds between two calls. + // + // The naïve implementation (without the cached + // file descriptor) is roughly four times slower + // in 9vx on a 2.16 GHz Intel Core 2 Duo. + + if(fd < 0 && (fd = runtime·open((byte*)"/dev/bintime", OREAD|OCEXEC)) < 0) + return 0; + if(runtime·pread(fd, b, sizeof b, 0) != sizeof b) + return 0; + hi = b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3]; + lo = b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7]; + return (int64)hi<<32 | (int64)lo; +} diff --git a/src/pkg/runtime/traceback_arm.c b/src/pkg/runtime/traceback_arm.c index 22e0bc3a6..dd85cc02c 100644 --- a/src/pkg/runtime/traceback_arm.c +++ b/src/pkg/runtime/traceback_arm.c @@ -17,23 +17,23 @@ void _divu(void); void _modu(void); int32 -runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) +runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *gp, int32 skip, uintptr *pcbuf, int32 max) { int32 i, n, iter; uintptr pc, lr, tracepc, x; - byte *fp, *p; + byte *fp; bool waspanic; Stktop *stk; Func *f; - + pc = (uintptr)pc0; lr = (uintptr)lr0; fp = nil; waspanic = false; // If the PC is goexit, the goroutine hasn't started yet. - if(pc == (uintptr)runtime·goexit) { - pc = (uintptr)g->entry; + if(pc == (uintptr)runtime·goexit && gp->fnstart != nil) { + pc = (uintptr)gp->fnstart->fn; lr = (uintptr)runtime·goexit; } @@ -45,7 +45,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr } n = 0; - stk = (Stktop*)g->stackbase; + stk = (Stktop*)gp->stackbase; for(iter = 0; iter < 100 && n < max; iter++) { // iter avoids looping forever // Typically: // pc is the PC of the running function. @@ -57,52 +57,17 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr if(pc == (uintptr)runtime·lessstack) { // Hit top of stack segment. Unwind to next segment. pc = (uintptr)stk->gobuf.pc; - sp = stk->gobuf.sp; + sp = (byte*)stk->gobuf.sp; lr = 0; fp = nil; - if(pcbuf == nil) + if(pcbuf == nil && runtime·showframe(nil, gp == m->curg)) runtime·printf("----- stack segment boundary -----\n"); stk = (Stktop*)stk->stackbase; continue; } - if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) { - // Dangerous, but worthwhile: see if this is a closure by - // decoding the instruction stream. - // - // We check p < p+4 to avoid wrapping and faulting if - // we have lost track of where we are. - p = (byte*)pc; - if((pc&3) == 0 && p < p+4 && - runtime·mheap.arena_start < p && - p+4 < runtime·mheap.arena_used) { - x = *(uintptr*)p; - if((x&0xfffff000) == 0xe49df000) { - // End of closure: - // MOVW.P frame(R13), R15 - pc = *(uintptr*)sp; - lr = 0; - sp += x & 0xfff; - fp = nil; - continue; - } - if((x&0xfffff000) == 0xe52de000 && lr == (uintptr)runtime·goexit) { - // Beginning of closure. - // Closure at top of stack, not yet started. - p += 5*4; - if((x&0xfff) != 4) { - // argument copying - p += 7*4; - } - if((byte*)pc < p && p < p+4 && p+4 < runtime·mheap.arena_used) { - pc = *(uintptr*)p; - fp = nil; - continue; - } - } - } + if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) break; - } // Found an actual function. if(lr == 0) @@ -118,15 +83,17 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr else if(pcbuf != nil) pcbuf[n++] = pc; else { - if(runtime·showframe(f)) { + if(runtime·showframe(f, gp == m->curg)) { // Print during crash. // main(0x1, 0x2, 0x3) // /home/rsc/go/src/runtime/x.go:23 +0xf tracepc = pc; // back up to CALL instruction for funcline. if(n > 0 && pc > f->entry && !waspanic) tracepc -= sizeof(uintptr); + if(m->throwing && gp == m->curg) + runtime·printf("[fp=%p] ", fp); runtime·printf("%S(", f->name); - for(i = 0; i < f->args; i++) { + for(i = 0; i < f->args/sizeof(uintptr); i++) { if(i != 0) runtime·prints(", "); runtime·printhex(((uintptr*)fp)[1+i]); @@ -146,22 +113,22 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr waspanic = f->entry == (uintptr)runtime·sigpanic; - if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) { - runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid); + if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && gp == m->g0) { + runtime·printf("----- newstack called from goroutine %D -----\n", m->curg->goid); pc = (uintptr)m->morepc; sp = (byte*)m->moreargp - sizeof(void*); lr = (uintptr)m->morebuf.pc; - fp = m->morebuf.sp; - g = m->curg; - stk = (Stktop*)g->stackbase; + fp = (byte*)m->morebuf.sp; + gp = m->curg; + stk = (Stktop*)gp->stackbase; continue; } - if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && g == m->g0) { - runtime·printf("----- lessstack called from goroutine %d -----\n", m->curg->goid); - g = m->curg; - stk = (Stktop*)g->stackbase; - sp = stk->gobuf.sp; + if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && gp == m->g0) { + runtime·printf("----- lessstack called from goroutine %D -----\n", m->curg->goid); + gp = m->curg; + stk = (Stktop*)gp->stackbase; + sp = (byte*)stk->gobuf.sp; pc = (uintptr)stk->gobuf.pc; fp = nil; lr = 0; @@ -182,9 +149,21 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr // If this was deferproc or newproc, the caller had an extra 12. if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) sp += 12; + + // sighandler saves the lr on stack before faking a call to sigpanic + if(waspanic) { + x = *(uintptr *)sp; + sp += 4; + f = runtime·findfunc(pc); + if (f == nil) { + pc = x; + } else if (f->frame == 0) + lr = x; + } } - if(pcbuf == nil && (pc = g->gopc) != 0 && (f = runtime·findfunc(pc)) != nil && g->goid != 1) { + if(pcbuf == nil && (pc = gp->gopc) != 0 && (f = runtime·findfunc(pc)) != nil + && runtime·showframe(f, gp == m->curg) && gp->goid != 1) { runtime·printf("created by %S\n", f->name); tracepc = pc; // back up to CALL instruction for funcline. if(n > 0 && pc > f->entry) @@ -199,9 +178,15 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr } void -runtime·traceback(byte *pc0, byte *sp, byte *lr, G *g) +runtime·traceback(byte *pc0, byte *sp, byte *lr, G *gp) { - runtime·gentraceback(pc0, sp, lr, g, 0, nil, 100); + if(gp->status == Gsyscall) { + // Override signal registers if blocked in system call. + pc0 = gp->sched.pc; + sp = (byte*)gp->sched.sp; + lr = nil; + } + runtime·gentraceback(pc0, sp, lr, gp, 0, nil, 100); } // func caller(n int) (pc uintptr, file string, line int, ok bool) diff --git a/src/pkg/runtime/traceback_x86.c b/src/pkg/runtime/traceback_x86.c index be35bab00..72603ae8e 100644 --- a/src/pkg/runtime/traceback_x86.c +++ b/src/pkg/runtime/traceback_x86.c @@ -8,7 +8,6 @@ #include "arch_GOARCH.h" #include "malloc.h" -static uintptr isclosureentry(uintptr); void runtime·deferproc(void); void runtime·newproc(void); void runtime·newstack(void); @@ -23,9 +22,8 @@ void runtime·sigpanic(void); // A little clunky to merge the two but avoids duplicating // the code and all its subtlety. int32 -runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) +runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *gp, int32 skip, uintptr *pcbuf, int32 max) { - byte *p; int32 i, n, iter, sawnewstack; uintptr pc, lr, tracepc; byte *fp; @@ -40,29 +38,22 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr waspanic = false; // If the PC is goexit, the goroutine hasn't started yet. - if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { + if(pc0 == gp->sched.pc && sp == (byte*)gp->sched.sp && pc0 == (byte*)runtime·goexit && gp->fnstart != nil) { fp = sp; lr = pc; - pc = (uintptr)g->entry; + pc = (uintptr)gp->fnstart->fn; } // If the PC is zero, it's likely a nil function call. // Start in the caller's frame. if(pc == 0) { - pc = lr; - lr = 0; - } - - // If the PC is zero, it's likely a nil function call. - // Start in the caller's frame. - if(pc == 0) { pc = *(uintptr*)sp; sp += sizeof(uintptr); } n = 0; sawnewstack = 0; - stk = (Stktop*)g->stackbase; + stk = (Stktop*)gp->stackbase; for(iter = 0; iter < 100 && n < max; iter++) { // iter avoids looping forever // Typically: // pc is the PC of the running function. @@ -74,41 +65,16 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr if(pc == (uintptr)runtime·lessstack) { // Hit top of stack segment. Unwind to next segment. pc = (uintptr)stk->gobuf.pc; - sp = stk->gobuf.sp; + sp = (byte*)stk->gobuf.sp; lr = 0; fp = nil; - if(pcbuf == nil) + if(pcbuf == nil && runtime·showframe(nil, gp == m->curg)) runtime·printf("----- stack segment boundary -----\n"); stk = (Stktop*)stk->stackbase; continue; } - if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) { - // Dangerous, but worthwhile: see if this is a closure: - // ADDQ $wwxxyyzz, SP; RET - // [48] 81 c4 zz yy xx ww c3 - // The 0x48 byte is only on amd64. - p = (byte*)pc; - // We check p < p+8 to avoid wrapping and faulting if we lose track. - if(runtime·mheap.arena_start < p && p < p+8 && p+8 < runtime·mheap.arena_used && // pointer in allocated memory - (sizeof(uintptr) != 8 || *p++ == 0x48) && // skip 0x48 byte on amd64 - p[0] == 0x81 && p[1] == 0xc4 && p[6] == 0xc3) { - sp += *(uint32*)(p+2); - pc = *(uintptr*)sp; - sp += sizeof(uintptr); - lr = 0; - fp = nil; - continue; - } - - // Closure at top of stack, not yet started. - if(lr == (uintptr)runtime·goexit && (pc = isclosureentry(pc)) != 0) { - fp = sp; - continue; - } - - // Unknown pc: stop. + if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) break; - } // Found an actual function. if(fp == nil) { @@ -126,7 +92,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr else if(pcbuf != nil) pcbuf[n++] = pc; else { - if(runtime·showframe(f)) { + if(runtime·showframe(f, gp == m->curg)) { // Print during crash. // main(0x1, 0x2, 0x3) // /home/rsc/go/src/runtime/x.go:23 +0xf @@ -134,8 +100,10 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr tracepc = pc; // back up to CALL instruction for funcline. if(n > 0 && pc > f->entry && !waspanic) tracepc--; + if(m->throwing && gp == m->curg) + runtime·printf("[fp=%p] ", fp); runtime·printf("%S(", f->name); - for(i = 0; i < f->args; i++) { + for(i = 0; i < f->args/sizeof(uintptr); i++) { if(i != 0) runtime·prints(", "); runtime·printhex(((uintptr*)fp)[i]); @@ -161,27 +129,27 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr if(f->entry == (uintptr)runtime·newstack) sawnewstack = 1; - if(pcbuf == nil && f->entry == (uintptr)runtime·morestack && g == m->g0 && sawnewstack) { + if(pcbuf == nil && f->entry == (uintptr)runtime·morestack && gp == m->g0 && sawnewstack) { // The fact that we saw newstack means that morestack // has managed to record its information in m, so we can // use it to keep unwinding the stack. - runtime·printf("----- morestack called from goroutine %d -----\n", m->curg->goid); + runtime·printf("----- morestack called from goroutine %D -----\n", m->curg->goid); pc = (uintptr)m->morepc; - sp = m->morebuf.sp - sizeof(void*); + sp = (byte*)m->morebuf.sp - sizeof(void*); lr = (uintptr)m->morebuf.pc; - fp = m->morebuf.sp; + fp = (byte*)m->morebuf.sp; sawnewstack = 0; - g = m->curg; - stk = (Stktop*)g->stackbase; + gp = m->curg; + stk = (Stktop*)gp->stackbase; continue; } - if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && g == m->g0) { + if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && gp == m->g0) { // Lessstack is running on scheduler stack. Switch to original goroutine. - runtime·printf("----- lessstack called from goroutine %d -----\n", m->curg->goid); - g = m->curg; - stk = (Stktop*)g->stackbase; - sp = stk->gobuf.sp; + runtime·printf("----- lessstack called from goroutine %D -----\n", m->curg->goid); + gp = m->curg; + stk = (Stktop*)gp->stackbase; + sp = (byte*)stk->gobuf.sp; pc = (uintptr)stk->gobuf.pc; fp = nil; lr = 0; @@ -196,7 +164,8 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr } // Show what created goroutine, except main goroutine (goid 1). - if(pcbuf == nil && (pc = g->gopc) != 0 && (f = runtime·findfunc(pc)) != nil && g->goid != 1) { + if(pcbuf == nil && (pc = gp->gopc) != 0 && (f = runtime·findfunc(pc)) != nil + && runtime·showframe(f, gp == m->curg) && gp->goid != 1) { runtime·printf("created by %S\n", f->name); tracepc = pc; // back up to CALL instruction for funcline. if(n > 0 && pc > f->entry) @@ -211,9 +180,14 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr } void -runtime·traceback(byte *pc0, byte *sp, byte*, G *g) +runtime·traceback(byte *pc0, byte *sp, byte*, G *gp) { - runtime·gentraceback(pc0, sp, nil, g, 0, nil, 100); + if(gp->status == Gsyscall) { + // Override signal registers if blocked in system call. + pc0 = gp->sched.pc; + sp = (byte*)gp->sched.sp; + } + runtime·gentraceback(pc0, sp, nil, gp, 0, nil, 100); } int32 @@ -227,77 +201,3 @@ runtime·callers(int32 skip, uintptr *pcbuf, int32 m) return runtime·gentraceback(pc, sp, nil, g, skip, pcbuf, m); } - -static uintptr -isclosureentry(uintptr pc) -{ - byte *p; - int32 i, siz; - - p = (byte*)pc; - if(p < runtime·mheap.arena_start || p+32 > runtime·mheap.arena_used) - return 0; - - if(*p == 0xe8) { - // CALL fn - return pc+5+*(int32*)(p+1); - } - - if(sizeof(uintptr) == 8 && p[0] == 0x48 && p[1] == 0xb9 && p[10] == 0xff && p[11] == 0xd1) { - // MOVQ $fn, CX; CALL *CX - return *(uintptr*)(p+2); - } - - // SUBQ $siz, SP - if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x81 || *p++ != 0xec) - return 0; - siz = *(uint32*)p; - p += 4; - - // MOVQ $q, SI - if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xbe) - return 0; - p += sizeof(uintptr); - - // MOVQ SP, DI - if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x89 || *p++ != 0xe7) - return 0; - - // CLD on 32-bit - if(sizeof(uintptr) == 4 && *p++ != 0xfc) - return 0; - - if(siz <= 4*sizeof(uintptr)) { - // MOVSQ... - for(i=0; i<siz; i+=sizeof(uintptr)) - if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xa5) - return 0; - } else { - // MOVQ $(siz/8), CX [32-bit immediate siz/8] - if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xc7 || *p++ != 0xc1) - return 0; - p += 4; - - // REP MOVSQ - if(*p++ != 0xf3 || (sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0xa5) - return 0; - } - - // CALL fn - if(*p == 0xe8) { - p++; - return (uintptr)p+4 + *(int32*)p; - } - - // MOVQ $fn, CX; CALL *CX - if(sizeof(uintptr) != 8 || *p++ != 0x48 || *p++ != 0xb9) - return 0; - - pc = *(uintptr*)p; - p += 8; - - if(*p++ != 0xff || *p != 0xd1) - return 0; - - return pc; -} diff --git a/src/pkg/runtime/type.go b/src/pkg/runtime/type.go index 6af6b237f..374754afa 100644 --- a/src/pkg/runtime/type.go +++ b/src/pkg/runtime/type.go @@ -14,24 +14,25 @@ package runtime import "unsafe" -type commonType struct { +type rtype struct { size uintptr hash uint32 _ uint8 align uint8 fieldAlign uint8 kind uint8 - alg *uintptr + alg unsafe.Pointer + gc unsafe.Pointer string *string *uncommonType - ptrToThis *interface{} + ptrToThis *rtype } type _method struct { name *string pkgPath *string - mtyp *interface{} - typ *interface{} + mtyp *rtype + typ *rtype ifn unsafe.Pointer tfn unsafe.Pointer } @@ -45,10 +46,10 @@ type uncommonType struct { type _imethod struct { name *string pkgPath *string - typ *interface{} + typ *rtype } type interfaceType struct { - commonType + rtype methods []_imethod } diff --git a/src/pkg/runtime/type.h b/src/pkg/runtime/type.h index c1d9facd1..769a8071b 100644 --- a/src/pkg/runtime/type.h +++ b/src/pkg/runtime/type.h @@ -5,21 +5,20 @@ /* * Runtime type representation; master is type.go * - * The *Types here correspond 1-1 to type.go's *Type's, but are - * prefixed with an extra header of 2 pointers, corresponding to the - * interface{} structure, which itself is called type Type again on - * the Go side. + * The Type*s here correspond 1-1 to type.go's *rtype. */ -typedef struct CommonType CommonType; +typedef struct Type Type; typedef struct UncommonType UncommonType; typedef struct InterfaceType InterfaceType; typedef struct Method Method; typedef struct IMethod IMethod; typedef struct SliceType SliceType; typedef struct FuncType FuncType; +typedef struct PtrType PtrType; -struct CommonType +// Needs to be in sync with typekind.h/CommonSize +struct Type { uintptr size; uint32 hash; @@ -28,42 +27,12 @@ struct CommonType uint8 fieldAlign; uint8 kind; Alg *alg; + void *gc; String *string; UncommonType *x; Type *ptrto; }; -enum { - KindBool = 1, - KindInt, - KindInt8, - KindInt16, - KindInt32, - KindInt64, - KindUint, - KindUint8, - KindUint16, - KindUint32, - KindUint64, - KindUintptr, - KindFloat32, - KindFloat64, - KindComplex64, - KindComplex128, - KindArray, - KindChan, - KindFunc, - KindInterface, - KindMap, - KindPtr, - KindSlice, - KindString, - KindStruct, - KindUnsafePointer, - - KindNoPointers = 1<<7, -}; - struct Method { String *name; @@ -82,13 +51,6 @@ struct UncommonType Method m[]; }; -struct Type -{ - void *type; // interface{} value - void *ptr; - CommonType; -}; - struct IMethod { String *name; @@ -130,3 +92,9 @@ struct FuncType Slice in; Slice out; }; + +struct PtrType +{ + Type; + Type *elem; +}; diff --git a/src/pkg/runtime/typekind.h b/src/pkg/runtime/typekind.h new file mode 100644 index 000000000..9bae2a871 --- /dev/null +++ b/src/pkg/runtime/typekind.h @@ -0,0 +1,41 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// PtrSize vs sizeof(void*): This file is also included from src/cmd/ld/... +// which defines PtrSize to be different from sizeof(void*) when crosscompiling. + +enum { + KindBool = 1, + KindInt, + KindInt8, + KindInt16, + KindInt32, + KindInt64, + KindUint, + KindUint8, + KindUint16, + KindUint32, + KindUint64, + KindUintptr, + KindFloat32, + KindFloat64, + KindComplex64, + KindComplex128, + KindArray, + KindChan, + KindFunc, + KindInterface, + KindMap, + KindPtr, + KindSlice, + KindString, + KindStruct, + KindUnsafePointer, + + KindNoPointers = 1<<7, + + // size of Type structure. + CommonSize = 6*PtrSize + 8, +}; + diff --git a/src/pkg/runtime/vdso_linux_amd64.c b/src/pkg/runtime/vdso_linux_amd64.c new file mode 100644 index 000000000..ab68c23c3 --- /dev/null +++ b/src/pkg/runtime/vdso_linux_amd64.c @@ -0,0 +1,331 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" + +#define AT_SYSINFO_EHDR 33 +#define AT_NULL 0 /* End of vector */ +#define PT_LOAD 1 /* Loadable program segment */ +#define PT_DYNAMIC 2 /* Dynamic linking information */ +#define DT_NULL 0 /* Marks end of dynamic section */ +#define DT_STRTAB 5 /* Address of string table */ +#define DT_SYMTAB 6 /* Address of symbol table */ +#define DT_VERSYM 0x6ffffff0 +#define DT_VERDEF 0x6ffffffc + +#define VER_FLG_BASE 0x1 /* Version definition of file itself */ +#define SHN_UNDEF 0 /* Undefined section */ +#define SHT_DYNSYM 11 /* Dynamic linker symbol table */ +#define STT_FUNC 2 /* Symbol is a code object */ +#define STB_GLOBAL 1 /* Global symbol */ +#define STB_WEAK 2 /* Weak symbol */ + +/* How to extract and insert information held in the st_info field. */ +#define ELF64_ST_BIND(val) (((byte) (val)) >> 4) +#define ELF64_ST_TYPE(val) ((val) & 0xf) + +#define EI_NIDENT (16) + +typedef uint16 Elf64_Half; +typedef uint32 Elf64_Word; +typedef int32 Elf64_Sword; +typedef uint64 Elf64_Xword; +typedef int64 Elf64_Sxword; +typedef uint64 Elf64_Addr; +typedef uint64 Elf64_Off; +typedef uint16 Elf64_Section; +typedef Elf64_Half Elf64_Versym; + + +typedef struct +{ + Elf64_Word st_name; + byte st_info; + byte st_other; + Elf64_Section st_shndx; + Elf64_Addr st_value; + Elf64_Xword st_size; +} Elf64_Sym; + +typedef struct +{ + Elf64_Half vd_version; /* Version revision */ + Elf64_Half vd_flags; /* Version information */ + Elf64_Half vd_ndx; /* Version Index */ + Elf64_Half vd_cnt; /* Number of associated aux entries */ + Elf64_Word vd_hash; /* Version name hash value */ + Elf64_Word vd_aux; /* Offset in bytes to verdaux array */ + Elf64_Word vd_next; /* Offset in bytes to next verdef entry */ +} Elf64_Verdef; + +typedef struct +{ + byte e_ident[EI_NIDENT]; /* Magic number and other info */ + Elf64_Half e_type; /* Object file type */ + Elf64_Half e_machine; /* Architecture */ + Elf64_Word e_version; /* Object file version */ + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; /* Processor-specific flags */ + Elf64_Half e_ehsize; /* ELF header size in bytes */ + Elf64_Half e_phentsize; /* Program header table entry size */ + Elf64_Half e_phnum; /* Program header table entry count */ + Elf64_Half e_shentsize; /* Section header table entry size */ + Elf64_Half e_shnum; /* Section header table entry count */ + Elf64_Half e_shstrndx; /* Section header string table index */ +} Elf64_Ehdr; + +typedef struct +{ + Elf64_Word p_type; /* Segment type */ + Elf64_Word p_flags; /* Segment flags */ + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment */ +} Elf64_Phdr; + +typedef struct +{ + Elf64_Word sh_name; /* Section name (string tbl index) */ + Elf64_Word sh_type; /* Section type */ + Elf64_Xword sh_flags; /* Section flags */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Section size in bytes */ + Elf64_Word sh_link; /* Link to another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +typedef struct +{ + Elf64_Sxword d_tag; /* Dynamic entry type */ + union + { + Elf64_Xword d_val; /* Integer value */ + Elf64_Addr d_ptr; /* Address value */ + } d_un; +} Elf64_Dyn; + +typedef struct +{ + Elf64_Word vda_name; /* Version or dependency names */ + Elf64_Word vda_next; /* Offset in bytes to next verdaux entry */ +} Elf64_Verdaux; + +typedef struct +{ + uint64 a_type; /* Entry type */ + union + { + uint64 a_val; /* Integer value */ + } a_un; +} Elf64_auxv_t; + + +typedef struct { + byte* name; + void** var_ptr; +} symbol_key; + +typedef struct { + byte* version; + int32 ver_hash; +} version_key; + +struct vdso_info { + bool valid; + + /* Load information */ + uintptr load_addr; + uintptr load_offset; /* load_addr - recorded vaddr */ + + /* Symbol table */ + int32 num_sym; + Elf64_Sym *symtab; + const byte *symstrings; + + /* Version table */ + Elf64_Versym *versym; + Elf64_Verdef *verdef; +}; + +static version_key linux26 = { (byte*)"LINUX_2.6", 0x3ae75f6 }; + +// initialize with vsyscall fallbacks +void* runtime·__vdso_time_sym = (void*)0xffffffffff600400ULL; +void* runtime·__vdso_gettimeofday_sym = (void*)0xffffffffff600000ULL; +void* runtime·__vdso_clock_gettime_sym = (void*)0; + +#define SYM_KEYS_COUNT 3 +static symbol_key sym_keys[] = { + { (byte*)"__vdso_time", &runtime·__vdso_time_sym }, + { (byte*)"__vdso_gettimeofday", &runtime·__vdso_gettimeofday_sym }, + { (byte*)"__vdso_clock_gettime", &runtime·__vdso_clock_gettime_sym }, +}; + +static void +vdso_init_from_sysinfo_ehdr(struct vdso_info *vdso_info, Elf64_Ehdr* hdr) +{ + uint64 i; + bool found_vaddr = false; + + vdso_info->load_addr = (uintptr) hdr; + + Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info->load_addr + hdr->e_phoff); + Elf64_Shdr *sh = (Elf64_Shdr*)(vdso_info->load_addr + hdr->e_shoff); + Elf64_Dyn *dyn = 0; + + for(i=0; i<hdr->e_shnum; i++) { + if(sh[i].sh_type == SHT_DYNSYM) { + vdso_info->num_sym = sh[i].sh_size / sizeof(Elf64_Sym); + } + } + + // We need two things from the segment table: the load offset + // and the dynamic table. + for(i=0; i<hdr->e_phnum; i++) { + if(pt[i].p_type == PT_LOAD && found_vaddr == false) { + found_vaddr = true; + vdso_info->load_offset = (uintptr)hdr + + (uintptr)pt[i].p_offset + - (uintptr)pt[i].p_vaddr; + } else if(pt[i].p_type == PT_DYNAMIC) { + dyn = (Elf64_Dyn*)((uintptr)hdr + pt[i].p_offset); + } + } + + if(found_vaddr == false || dyn == nil) + return; // Failed + + // Fish out the useful bits of the dynamic table. + for(i=0; dyn[i].d_tag!=DT_NULL; i++) { + switch(dyn[i].d_tag) { + case DT_STRTAB: + vdso_info->symstrings = (const byte *) + ((uintptr)dyn[i].d_un.d_ptr + + vdso_info->load_offset); + break; + case DT_SYMTAB: + vdso_info->symtab = (Elf64_Sym *) + ((uintptr)dyn[i].d_un.d_ptr + + vdso_info->load_offset); + break; + case DT_VERSYM: + vdso_info->versym = (Elf64_Versym *) + ((uintptr)dyn[i].d_un.d_ptr + + vdso_info->load_offset); + break; + case DT_VERDEF: + vdso_info->verdef = (Elf64_Verdef *) + ((uintptr)dyn[i].d_un.d_ptr + + vdso_info->load_offset); + break; + } + } + if(vdso_info->symstrings == nil || vdso_info->symtab == nil) + return; // Failed + + if(vdso_info->verdef == nil) + vdso_info->versym = 0; + + // That's all we need. + vdso_info->valid = true; +} + +static int32 +vdso_find_version(struct vdso_info *vdso_info, version_key* ver) +{ + if(vdso_info->valid == false) { + return 0; + } + Elf64_Verdef *def = vdso_info->verdef; + while(true) { + if((def->vd_flags & VER_FLG_BASE) == 0) { + Elf64_Verdaux *aux = (Elf64_Verdaux*)((byte *)def + def->vd_aux); + if(def->vd_hash == ver->ver_hash && + runtime·strcmp(ver->version, vdso_info->symstrings + aux->vda_name) == 0) { + return def->vd_ndx & 0x7fff; + } + } + + if(def->vd_next == 0) { + break; + } + def = (Elf64_Verdef *)((byte *)def + def->vd_next); + } + return 0; +} + +static void +vdso_parse_symbols(struct vdso_info *vdso_info, int32 version) +{ + int32 i, j; + + if(vdso_info->valid == false) + return; + + for(i=0; i<vdso_info->num_sym; i++) { + Elf64_Sym *sym = &vdso_info->symtab[i]; + + // Check for a defined global or weak function w/ right name. + if(ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + continue; + if(ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) + continue; + if(sym->st_shndx == SHN_UNDEF) + continue; + + for(j=0; j<SYM_KEYS_COUNT; j++) { + if(runtime·strcmp(sym_keys[j].name, vdso_info->symstrings + sym->st_name) != 0) + continue; + + // Check symbol version. + if(vdso_info->versym != nil && version != 0 + && vdso_info->versym[i] & 0x7fff != version) + continue; + + *sym_keys[j].var_ptr = (void *)(vdso_info->load_offset + sym->st_value); + } + } +} + +static void +runtime·linux_setup_vdso(int32 argc, uint8** argv) +{ + struct vdso_info vdso_info; + + // skip argvc + byte **p = argv; + p = &p[argc+1]; + + // skip envp to get to ELF auxiliary vector. + for(; *p!=0; p++) {} + + // skip NULL separator + p++; + + // now, p points to auxv + Elf64_auxv_t *elf_auxv = (Elf64_auxv_t*) p; + + for(int32 i=0; elf_auxv[i].a_type!=AT_NULL; i++) { + if(elf_auxv[i].a_type == AT_SYSINFO_EHDR) { + if(elf_auxv[i].a_un.a_val == 0) { + // Something went wrong + return; + } + vdso_init_from_sysinfo_ehdr(&vdso_info, (Elf64_Ehdr*)elf_auxv[i].a_un.a_val); + vdso_parse_symbols(&vdso_info, vdso_find_version(&vdso_info, &linux26)); + return; + } + } +} + +void (*runtime·sysargs)(int32, uint8**) = runtime·linux_setup_vdso; diff --git a/src/pkg/runtime/vlop_arm.s b/src/pkg/runtime/vlop_arm.s index fc679f0ee..0dedc316a 100644 --- a/src/pkg/runtime/vlop_arm.s +++ b/src/pkg/runtime/vlop_arm.s @@ -23,9 +23,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#define UMULL(Rs,Rm,Rhi,Rlo,S) WORD $((14<<28)|(4<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm) -#define UMLAL(Rs,Rm,Rhi,Rlo,S) WORD $((14<<28)|(5<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm) -#define MUL(Rs,Rm,Rd,S) WORD $((14<<28)|(0<<21)|(S<<20)|(Rd<<16)|(Rs<<8)|(9<<4)|Rm) arg=0 /* replaced use of R10 by R11 because the former can be the data segment base register */ @@ -36,136 +33,15 @@ TEXT _mulv(SB), $0 MOVW 8(FP), R11 /* h0 */ MOVW 12(FP), R4 /* l1 */ MOVW 16(FP), R5 /* h1 */ - UMULL(4, 2, 7, 6, 0) - MUL(11, 4, 8, 0) + MULLU R4, R2, (R7,R6) + MUL R11, R4, R8 ADD R8, R7 - MUL(2, 5, 8, 0) + MUL R2, R5, R8 ADD R8, R7 MOVW R6, 0(R(arg)) MOVW R7, 4(R(arg)) RET - -Q = 0 -N = 1 -D = 2 -CC = 3 -TMP = 11 - -TEXT save<>(SB), 7, $0 - MOVW R(Q), 0(FP) - MOVW R(N), 4(FP) - MOVW R(D), 8(FP) - MOVW R(CC), 12(FP) - - MOVW R(TMP), R(Q) /* numerator */ - MOVW 20(FP), R(D) /* denominator */ - CMP $0, R(D) - BNE s1 - BL runtime·panicdivide(SB) -/* MOVW -1(R(D)), R(TMP) /* divide by zero fault */ -s1: RET - -TEXT rest<>(SB), 7, $0 - MOVW 0(FP), R(Q) - MOVW 4(FP), R(N) - MOVW 8(FP), R(D) - MOVW 12(FP), R(CC) -/* - * return to caller - * of rest<> - */ - MOVW 0(R13), R14 - ADD $20, R13 - B (R14) - -TEXT div<>(SB), 7, $0 - MOVW $32, R(CC) -/* - * skip zeros 8-at-a-time - */ -e1: - AND.S $(0xff<<24),R(Q), R(N) - BNE e2 - SLL $8, R(Q) - SUB.S $8, R(CC) - BNE e1 - RET -e2: - MOVW $0, R(N) - -loop: -/* - * shift R(N||Q) left one - */ - SLL $1, R(N) - CMP $0, R(Q) - ORR.LT $1, R(N) - SLL $1, R(Q) - -/* - * compare numerator to denominator - * if less, subtract and set quotient bit - */ - CMP R(D), R(N) - ORR.HS $1, R(Q) - SUB.HS R(D), R(N) - SUB.S $1, R(CC) - BNE loop - RET - -TEXT _div(SB), 7, $16 - BL save<>(SB) - CMP $0, R(Q) - BGE d1 - RSB $0, R(Q), R(Q) - CMP $0, R(D) - BGE d2 - RSB $0, R(D), R(D) -d0: - BL div<>(SB) /* none/both neg */ - MOVW R(Q), R(TMP) - B out -d1: - CMP $0, R(D) - BGE d0 - RSB $0, R(D), R(D) -d2: - BL div<>(SB) /* one neg */ - RSB $0, R(Q), R(TMP) - B out - -TEXT _mod(SB), 7, $16 - BL save<>(SB) - CMP $0, R(D) - RSB.LT $0, R(D), R(D) - CMP $0, R(Q) - BGE m1 - RSB $0, R(Q), R(Q) - BL div<>(SB) /* neg numerator */ - RSB $0, R(N), R(TMP) - B out -m1: - BL div<>(SB) /* pos numerator */ - MOVW R(N), R(TMP) - B out - -TEXT _divu(SB), 7, $16 - BL save<>(SB) - BL div<>(SB) - MOVW R(Q), R(TMP) - B out - -TEXT _modu(SB), 7, $16 - BL save<>(SB) - BL div<>(SB) - MOVW R(N), R(TMP) - B out - -out: - BL rest<>(SB) - B out - // trampoline for _sfloat2. passes LR as arg0 and // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can // be changed by _sfloat2. @@ -186,5 +62,189 @@ TEXT _sfloat(SB), 7, $64 // 4 arg + 14*4 saved regs + cpsr MOVM.IA.W (R0), [R1-R12] MOVW 8(R13), R0 RET - +// func udiv(n, d uint32) (q, r uint32) +// Reference: +// Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software +// Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740 +q = 0 // input d, output q +r = 1 // input n, output r +s = 2 // three temporary variables +m = 3 +a = 11 +// Please be careful when changing this, it is pretty fragile: +// 1, don't use unconditional branch as the linker is free to reorder the blocks; +// 2. if a == 11, beware that the linker will use R11 if you use certain instructions. +TEXT udiv<>(SB),7,$-4 + CLZ R(q), R(s) // find normalizing shift + MOVW.S R(q)<<R(s), R(a) + ADD R(a)>>25, PC, R(a) // most significant 7 bits of divisor + MOVBU.NE (4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin + +begin: + SUB.S $7, R(s) + RSB $0, R(q), R(m) // m = -q + MOVW.PL R(a)<<R(s), R(q) + + // 1st Newton iteration + MUL.PL R(m), R(q), R(a) // a = -q*d + BMI udiv_by_large_d + MULAWT R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32) + TEQ R(m)->1, R(m) // check for d=0 or d=1 + + // 2nd Newton iteration + MUL.NE R(m), R(q), R(a) + MOVW.NE $0, R(s) + MULAL.NE R(q), R(a), (R(q),R(s)) + BEQ udiv_by_0_or_1 + + // q now accurate enough for a remainder r, 0<=r<3*d + MULLU R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32 + ADD R(m), R(r), R(r) // r = n - d + MULA R(m), R(q), R(r), R(r) // r = n - (q+1)*d + + // since 0 <= n-q*d < 3*d; thus -d <= r < 2*d + CMN R(m), R(r) // t = r-d + SUB.CS R(m), R(r), R(r) // if (t<-d || t>=0) r=r+d + ADD.CC $1, R(q) + ADD.PL R(m)<<1, R(r) + ADD.PL $2, R(q) + + // return, can't use RET here or fast_udiv_tab will be dropped during linking + MOVW R14, R15 + +udiv_by_large_d: + // at this point we know d>=2^(31-6)=2^25 + SUB $4, R(a), R(a) + RSB $0, R(s), R(s) + MOVW R(a)>>R(s), R(q) + MULLU R(q), R(r), (R(q),R(s)) + MULA R(m), R(q), R(r), R(r) + + // q now accurate enough for a remainder r, 0<=r<4*d + CMN R(r)>>1, R(m) // if(r/2 >= d) + ADD.CS R(m)<<1, R(r) + ADD.CS $2, R(q) + CMN R(r), R(m) + ADD.CS R(m), R(r) + ADD.CS $1, R(q) + + // return, can't use RET here or fast_udiv_tab will be dropped during linking + MOVW R14, R15 + +udiv_by_0_or_1: + // carry set if d==1, carry clear if d==0 + MOVW.CS R(r), R(q) + MOVW.CS $0, R(r) + BL.CC runtime·panicdivide(SB) // no way back + + // return, can't use RET here or fast_udiv_tab will be dropped during linking + MOVW R14, R15 + +fast_udiv_tab: + // var tab [64]byte + // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } + // laid out here as little-endian uint32s + WORD $0xf4f8fcff + WORD $0xe6eaedf0 + WORD $0xdadde0e3 + WORD $0xcfd2d4d7 + WORD $0xc5c7cacc + WORD $0xbcbec0c3 + WORD $0xb4b6b8ba + WORD $0xacaeb0b2 + WORD $0xa5a7a8aa + WORD $0x9fa0a2a3 + WORD $0x999a9c9d + WORD $0x93949697 + WORD $0x8e8f9092 + WORD $0x898a8c8d + WORD $0x85868788 + WORD $0x81828384 + +// The linker will pass numerator in R(TMP), and it also +// expects the result in R(TMP) +TMP = 11 + +TEXT _divu(SB), 7, $16 + MOVW R(q), 4(R13) + MOVW R(r), 8(R13) + MOVW R(s), 12(R13) + MOVW R(m), 16(R13) + + MOVW R(TMP), R(r) /* numerator */ + MOVW 0(FP), R(q) /* denominator */ + BL udiv<>(SB) + MOVW R(q), R(TMP) + MOVW 4(R13), R(q) + MOVW 8(R13), R(r) + MOVW 12(R13), R(s) + MOVW 16(R13), R(m) + RET + +TEXT _modu(SB), 7, $16 + MOVW R(q), 4(R13) + MOVW R(r), 8(R13) + MOVW R(s), 12(R13) + MOVW R(m), 16(R13) + + MOVW R(TMP), R(r) /* numerator */ + MOVW 0(FP), R(q) /* denominator */ + BL udiv<>(SB) + MOVW R(r), R(TMP) + MOVW 4(R13), R(q) + MOVW 8(R13), R(r) + MOVW 12(R13), R(s) + MOVW 16(R13), R(m) + RET + +TEXT _div(SB),7,$16 + MOVW R(q), 4(R13) + MOVW R(r), 8(R13) + MOVW R(s), 12(R13) + MOVW R(m), 16(R13) + MOVW R(TMP), R(r) /* numerator */ + MOVW 0(FP), R(q) /* denominator */ + CMP $0, R(r) + BGE d1 + RSB $0, R(r), R(r) + CMP $0, R(q) + BGE d2 + RSB $0, R(q), R(q) +d0: + BL udiv<>(SB) /* none/both neg */ + MOVW R(q), R(TMP) + B out +d1: + CMP $0, R(q) + BGE d0 + RSB $0, R(q), R(q) +d2: + BL udiv<>(SB) /* one neg */ + RSB $0, R(q), R(TMP) + B out + +TEXT _mod(SB),7,$16 + MOVW R(q), 4(R13) + MOVW R(r), 8(R13) + MOVW R(s), 12(R13) + MOVW R(m), 16(R13) + MOVW R(TMP), R(r) /* numerator */ + MOVW 0(FP), R(q) /* denominator */ + CMP $0, R(q) + RSB.LT $0, R(q), R(q) + CMP $0, R(r) + BGE m1 + RSB $0, R(r), R(r) + BL udiv<>(SB) /* neg numerator */ + RSB $0, R(r), R(TMP) + B out +m1: + BL udiv<>(SB) /* pos numerator */ + MOVW R(r), R(TMP) +out: + MOVW 4(R13), R(q) + MOVW 8(R13), R(r) + MOVW 12(R13), R(s) + MOVW 16(R13), R(m) + RET diff --git a/src/pkg/runtime/vlop_arm_test.go b/src/pkg/runtime/vlop_arm_test.go new file mode 100644 index 000000000..cd28419ad --- /dev/null +++ b/src/pkg/runtime/vlop_arm_test.go @@ -0,0 +1,70 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import "testing" + +// arm soft division benchmarks adapted from +// http://ridiculousfish.com/files/division_benchmarks.tar.gz + +const numeratorsSize = 1 << 21 + +var numerators = randomNumerators() + +type randstate struct { + hi, lo uint32 +} + +func (r *randstate) rand() uint32 { + r.hi = r.hi<<16 + r.hi>>16 + r.hi += r.lo + r.lo += r.hi + return r.hi +} + +func randomNumerators() []uint32 { + numerators := make([]uint32, numeratorsSize) + random := &randstate{2147483563, 2147483563 ^ 0x49616E42} + for i := range numerators { + numerators[i] = random.rand() + } + return numerators +} + +func bmUint32Div(divisor uint32, b *testing.B) { + var sum uint32 + for i := 0; i < b.N; i++ { + sum += numerators[i&(numeratorsSize-1)] / divisor + } +} + +func BenchmarkUint32Div7(b *testing.B) { bmUint32Div(7, b) } +func BenchmarkUint32Div37(b *testing.B) { bmUint32Div(37, b) } +func BenchmarkUint32Div123(b *testing.B) { bmUint32Div(123, b) } +func BenchmarkUint32Div763(b *testing.B) { bmUint32Div(763, b) } +func BenchmarkUint32Div1247(b *testing.B) { bmUint32Div(1247, b) } +func BenchmarkUint32Div9305(b *testing.B) { bmUint32Div(9305, b) } +func BenchmarkUint32Div13307(b *testing.B) { bmUint32Div(13307, b) } +func BenchmarkUint32Div52513(b *testing.B) { bmUint32Div(52513, b) } +func BenchmarkUint32Div60978747(b *testing.B) { bmUint32Div(60978747, b) } +func BenchmarkUint32Div106956295(b *testing.B) { bmUint32Div(106956295, b) } + +func bmUint32Mod(divisor uint32, b *testing.B) { + var sum uint32 + for i := 0; i < b.N; i++ { + sum += numerators[i&(numeratorsSize-1)] % divisor + } +} + +func BenchmarkUint32Mod7(b *testing.B) { bmUint32Mod(7, b) } +func BenchmarkUint32Mod37(b *testing.B) { bmUint32Mod(37, b) } +func BenchmarkUint32Mod123(b *testing.B) { bmUint32Mod(123, b) } +func BenchmarkUint32Mod763(b *testing.B) { bmUint32Mod(763, b) } +func BenchmarkUint32Mod1247(b *testing.B) { bmUint32Mod(1247, b) } +func BenchmarkUint32Mod9305(b *testing.B) { bmUint32Mod(9305, b) } +func BenchmarkUint32Mod13307(b *testing.B) { bmUint32Mod(13307, b) } +func BenchmarkUint32Mod52513(b *testing.B) { bmUint32Mod(52513, b) } +func BenchmarkUint32Mod60978747(b *testing.B) { bmUint32Mod(60978747, b) } +func BenchmarkUint32Mod106956295(b *testing.B) { bmUint32Mod(106956295, b) } |