diff options
Diffstat (limited to 'src/pkg/runtime')
55 files changed, 2141 insertions, 700 deletions
diff --git a/src/pkg/runtime/386/asm.s b/src/pkg/runtime/386/asm.s index e2cabef14..a14518839 100644 --- a/src/pkg/runtime/386/asm.s +++ b/src/pkg/runtime/386/asm.s @@ -28,15 +28,18 @@ TEXT _rt0_386(SB),7,$0 TESTL AX, AX JZ 4(PC) CALL AX + // skip runtime·ldt0setup(SB) and tls test after initcgo for non-windows CMPL runtime·iswindows(SB), $0 JEQ ok + // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases + CMPL runtime·isplan9(SB), $1 + JEQ ok + // set up %gs CALL runtime·ldt0setup(SB) // store through it, to make sure it works - CMPL runtime·isplan9(SB), $1 - JEQ ok get_tls(BX) MOVL $0x123, g(BX) MOVL runtime·tls0(SB), AX @@ -318,6 +321,45 @@ TEXT runtime·casp(SB), 7, $0 MOVL $1, AX RET +// uint32 xadd(uint32 volatile *val, int32 delta) +// Atomically: +// *val += delta; +// return *val; +TEXT runtime·xadd(SB), 7, $0 + MOVL 4(SP), BX + MOVL 8(SP), AX + MOVL AX, CX + LOCK + XADDL AX, 0(BX) + ADDL CX, AX + RET + +TEXT runtime·xchg(SB), 7, $0 + MOVL 4(SP), BX + MOVL 8(SP), AX + XCHGL AX, 0(BX) + RET + +TEXT runtime·procyield(SB),7,$0 + MOVL 4(SP), AX +again: + PAUSE + SUBL $1, AX + JNZ again + RET + +TEXT runtime·atomicstorep(SB), 7, $0 + MOVL 4(SP), BX + MOVL 8(SP), AX + XCHGL AX, 0(BX) + RET + +TEXT runtime·atomicstore(SB), 7, $0 + MOVL 4(SP), BX + MOVL 8(SP), AX + XCHGL AX, 0(BX) + RET + // void jmpdefer(fn, sp); // called from deferreturn. // 1. pop the caller @@ -460,12 +502,16 @@ TEXT runtime·stackcheck(SB), 7, $0 TEXT runtime·memclr(SB),7,$0 MOVL 4(SP), DI // arg 1 addr MOVL 8(SP), CX // arg 2 count - ADDL $3, CX + MOVL CX, BX + ANDL $3, BX SHRL $2, CX MOVL $0, AX CLD REP STOSL + MOVL BX, CX + REP + STOSB RET TEXT runtime·getcallerpc(SB),7,$0 diff --git a/src/pkg/runtime/386/atomic.c b/src/pkg/runtime/386/atomic.c index c031cc4f6..a4f2a114f 100644 --- a/src/pkg/runtime/386/atomic.c +++ b/src/pkg/runtime/386/atomic.c @@ -10,3 +10,10 @@ runtime·atomicload(uint32 volatile* addr) { return *addr; } + +#pragma textflag 7 +void* +runtime·atomicloadp(void* volatile* addr) +{ + return *addr; +} diff --git a/src/pkg/runtime/386/closure.c b/src/pkg/runtime/386/closure.c index b0d4cc41a..b4d867711 100644 --- a/src/pkg/runtime/386/closure.c +++ b/src/pkg/runtime/386/closure.c @@ -45,7 +45,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0) q = p + n - siz; if(siz > 0) { - runtime·mcpy(q, (byte*)&arg0, siz); + runtime·memmove(q, (byte*)&arg0, siz); // SUBL $siz, SP *p++ = 0x81; diff --git a/src/pkg/runtime/386/memmove.s b/src/pkg/runtime/386/memmove.s index 471553ba2..203a8187c 100644 --- a/src/pkg/runtime/386/memmove.s +++ b/src/pkg/runtime/386/memmove.s @@ -27,9 +27,6 @@ TEXT runtime·memmove(SB), 7, $0 MOVL to+0(FP), DI MOVL fr+4(FP), SI MOVL n+8(FP), BX - CMPL BX, $0 - JLT fault - /* * check and set for backwards */ @@ -87,12 +84,3 @@ back: MOVL to+0(FP),AX RET -/* - * if called with negative count, - * treat as error rather than - * rotating all of memory - */ -fault: - MOVL $0,SI - MOVL 0(SI), AX - RET diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile index 03f960cb8..64bd2b771 100644 --- a/src/pkg/runtime/Makefile +++ b/src/pkg/runtime/Makefile @@ -120,7 +120,7 @@ $(GOARCH)/asm.h: mkasmh.sh runtime.acid.$(GOARCH) mv -f $@.x $@ goc2c: goc2c.c - quietgcc -o $@ $< + quietgcc -o $@ -I "$(GOROOT)/include" $< "$(GOROOT)/lib/lib9.a" mkversion: mkversion.c quietgcc -o $@ -I "$(GOROOT)/include" $< "$(GOROOT)/lib/lib9.a" diff --git a/src/pkg/runtime/amd64/asm.s b/src/pkg/runtime/amd64/asm.s index 46d82e365..3e3818c10 100644 --- a/src/pkg/runtime/amd64/asm.s +++ b/src/pkg/runtime/amd64/asm.s @@ -18,7 +18,8 @@ TEXT _rt0_amd64(SB),7,$-8 TESTQ AX, AX JZ needtls CALL AX - JMP ok + CMPL runtime·iswindows(SB), $0 + JEQ ok needtls: LEAQ runtime·tls0(SB), DI @@ -364,6 +365,45 @@ TEXT runtime·casp(SB), 7, $0 MOVL $1, AX RET +// uint32 xadd(uint32 volatile *val, int32 delta) +// Atomically: +// *val += delta; +// return *val; +TEXT runtime·xadd(SB), 7, $0 + MOVQ 8(SP), BX + MOVL 16(SP), AX + MOVL AX, CX + LOCK + XADDL AX, 0(BX) + ADDL CX, AX + RET + +TEXT runtime·xchg(SB), 7, $0 + MOVQ 8(SP), BX + MOVL 16(SP), AX + XCHGL AX, 0(BX) + RET + +TEXT runtime·procyield(SB),7,$0 + MOVL 8(SP), AX +again: + PAUSE + SUBL $1, AX + JNZ again + RET + +TEXT runtime·atomicstorep(SB), 7, $0 + MOVQ 8(SP), BX + MOVQ 16(SP), AX + XCHGQ AX, 0(BX) + RET + +TEXT runtime·atomicstore(SB), 7, $0 + MOVQ 8(SP), BX + MOVL 16(SP), AX + XCHGL AX, 0(BX) + RET + // void jmpdefer(fn, sp); // called from deferreturn. // 1. pop the caller @@ -413,6 +453,7 @@ TEXT runtime·asmcgocall(SB),7,$0 MOVQ DI, 16(SP) // save g MOVQ DX, 8(SP) // save SP MOVQ BX, DI // DI = first argument in AMD64 ABI + MOVQ BX, CX // CX = first argument in Win64 CALL AX // Restore registers, g, stack pointer. @@ -506,12 +547,16 @@ TEXT runtime·stackcheck(SB), 7, $0 TEXT runtime·memclr(SB),7,$0 MOVQ 8(SP), DI // arg 1 addr MOVQ 16(SP), CX // arg 2 count - ADDQ $7, CX + MOVQ CX, BX + ANDQ $7, BX SHRQ $3, CX MOVQ $0, AX CLD REP STOSQ + MOVQ BX, CX + REP + STOSB RET TEXT runtime·getcallerpc(SB),7,$0 diff --git a/src/pkg/runtime/amd64/atomic.c b/src/pkg/runtime/amd64/atomic.c index c031cc4f6..a4f2a114f 100644 --- a/src/pkg/runtime/amd64/atomic.c +++ b/src/pkg/runtime/amd64/atomic.c @@ -10,3 +10,10 @@ runtime·atomicload(uint32 volatile* addr) { return *addr; } + +#pragma textflag 7 +void* +runtime·atomicloadp(void* volatile* addr) +{ + return *addr; +} diff --git a/src/pkg/runtime/amd64/closure.c b/src/pkg/runtime/amd64/closure.c index 5033468d2..481b4a888 100644 --- a/src/pkg/runtime/amd64/closure.c +++ b/src/pkg/runtime/amd64/closure.c @@ -45,7 +45,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0) q = p + n - siz; if(siz > 0) { - runtime·mcpy(q, (byte*)&arg0, siz); + runtime·memmove(q, (byte*)&arg0, siz); // SUBQ $siz, SP *p++ = 0x48; diff --git a/src/pkg/runtime/amd64/memmove.s b/src/pkg/runtime/amd64/memmove.s index fc9573f72..e78be8145 100644 --- a/src/pkg/runtime/amd64/memmove.s +++ b/src/pkg/runtime/amd64/memmove.s @@ -28,8 +28,6 @@ TEXT runtime·memmove(SB), 7, $0 MOVQ to+0(FP), DI MOVQ fr+8(FP), SI MOVLQSX n+16(FP), BX - CMPQ BX, $0 - JLT fault /* * check and set for backwards @@ -88,12 +86,3 @@ back: MOVQ to+0(FP),AX RET -/* - * if called with negative count, - * treat as error rather than - * rotating all of memory - */ -fault: - MOVQ $0,SI - MOVQ 0(SI), AX - RET diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c index d422cb692..3e85d36bd 100644 --- a/src/pkg/runtime/amd64/traceback.c +++ b/src/pkg/runtime/amd64/traceback.c @@ -10,6 +10,7 @@ void runtime·deferproc(void); void runtime·newproc(void); void runtime·newstack(void); void runtime·morestack(void); +void runtime·sigpanic(void); // This code is also used for the 386 tracebacks. // Use uintptr for an appropriate word-sized integer. @@ -27,11 +28,13 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr byte *fp; Stktop *stk; Func *f; + bool waspanic; USED(lr0); pc = (uintptr)pc0; lr = 0; fp = nil; + waspanic = false; // If the PC is goexit, the goroutine hasn't started yet. if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { @@ -127,7 +130,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr if(pc > f->entry) runtime·printf("+%p", (uintptr)(pc - f->entry)); tracepc = pc; // back up to CALL instruction for funcline. - if(n > 0 && pc > f->entry) + if(n > 0 && pc > f->entry && !waspanic) tracepc--; runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc)); runtime·printf("\t%S(", f->name); @@ -144,6 +147,8 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr n++; } + waspanic = f->entry == (uintptr)runtime·sigpanic; + if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) fp += 2*sizeof(uintptr); diff --git a/src/pkg/runtime/append_test.go b/src/pkg/runtime/append_test.go index 75a635306..b8552224e 100644 --- a/src/pkg/runtime/append_test.go +++ b/src/pkg/runtime/append_test.go @@ -36,7 +36,7 @@ func BenchmarkAppendSpecialCase(b *testing.B) { } } -var x = make([]int, 0, 10) +var x []int func f() int { x[:1][0] = 3 @@ -44,6 +44,7 @@ func f() int { } func TestSideEffectOrder(t *testing.T) { + x = make([]int, 0, 10) x = append(x, 1, f()) if x[0] != 1 || x[1] != 2 { t.Error("append failed: ", x[0], x[1]) diff --git a/src/pkg/runtime/arm/atomic.c b/src/pkg/runtime/arm/atomic.c index 9fd47bae7..52e4059ae 100644 --- a/src/pkg/runtime/arm/atomic.c +++ b/src/pkg/runtime/arm/atomic.c @@ -4,9 +4,80 @@ #include "runtime.h" +// Atomic add and return new value. +#pragma textflag 7 +uint32 +runtime·xadd(uint32 volatile *val, int32 delta) +{ + uint32 oval, nval; + + for(;;){ + oval = *val; + nval = oval + delta; + if(runtime·cas(val, oval, nval)) + return nval; + } +} + +#pragma textflag 7 +uint32 +runtime·xchg(uint32 volatile* addr, uint32 v) +{ + uint32 old; + + for(;;) { + old = *addr; + if(runtime·cas(addr, old, v)) + return old; + } +} + +#pragma textflag 7 +void +runtime·procyield(uint32 cnt) +{ + uint32 volatile i; + + for(i = 0; i < cnt; i++) { + } +} + #pragma textflag 7 uint32 runtime·atomicload(uint32 volatile* addr) { return runtime·xadd(addr, 0); } + +#pragma textflag 7 +void* +runtime·atomicloadp(void* volatile* addr) +{ + return (void*)runtime·xadd((uint32 volatile*)addr, 0); +} + +#pragma textflag 7 +void +runtime·atomicstorep(void* volatile* addr, void* v) +{ + void *old; + + for(;;) { + old = *addr; + if(runtime·casp(addr, old, v)) + return; + } +} + +#pragma textflag 7 +void +runtime·atomicstore(uint32 volatile* addr, uint32 v) +{ + uint32 old; + + for(;;) { + old = *addr; + if(runtime·cas(addr, old, v)) + return; + } +}
\ No newline at end of file diff --git a/src/pkg/runtime/arm/closure.c b/src/pkg/runtime/arm/closure.c index 36a93bc53..119e91b61 100644 --- a/src/pkg/runtime/arm/closure.c +++ b/src/pkg/runtime/arm/closure.c @@ -83,7 +83,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0) *pc++ = 0xe52de000 | (siz + 4); if(siz > 0) { - runtime·mcpy(q, (byte*)&arg0, siz); + runtime·memmove(q, (byte*)&arg0, siz); // MOVW $vars(PC), R0 *pc = 0xe28f0000 | (int32)(q - (byte*)pc - 8); diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c index c3934c37c..5628b8349 100644 --- a/src/pkg/runtime/arm/traceback.c +++ b/src/pkg/runtime/arm/traceback.c @@ -9,6 +9,7 @@ void runtime·deferproc(void); void runtime·newproc(void); void runtime·newstack(void); void runtime·morestack(void); +void runtime·sigpanic(void); void _div(void); void _mod(void); void _divu(void); @@ -20,12 +21,14 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr int32 i, n, iter; uintptr pc, lr, tracepc, x; byte *fp, *p; + bool waspanic; Stktop *stk; Func *f; pc = (uintptr)pc0; lr = (uintptr)lr0; fp = nil; + waspanic = false; // If the PC is goexit, the goroutine hasn't started yet. if(pc == (uintptr)runtime·goexit) { @@ -121,7 +124,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr if(pc > f->entry) runtime·printf("+%p", (uintptr)(pc - f->entry)); tracepc = pc; // back up to CALL instruction for funcline. - if(n > 0 && pc > f->entry) + if(n > 0 && pc > f->entry && !waspanic) tracepc -= sizeof(uintptr); runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc)); runtime·printf("\t%S(", f->name); @@ -137,6 +140,8 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr runtime·prints(")\n"); n++; } + + waspanic = f->entry == (uintptr)runtime·sigpanic; if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) { runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid); diff --git a/src/pkg/runtime/cgo/windows_amd64.c b/src/pkg/runtime/cgo/windows_amd64.c index fd5b397ab..e8313e250 100755 --- a/src/pkg/runtime/cgo/windows_amd64.c +++ b/src/pkg/runtime/cgo/windows_amd64.c @@ -30,6 +30,7 @@ static void* threadentry(void *v) { ThreadStart ts; + void *tls0; ts = *(ThreadStart*)v; free(v); @@ -45,11 +46,13 @@ threadentry(void *v) /* * Set specific keys in thread local storage. */ + tls0 = (void*)LocalAlloc(LPTR, 64); asm volatile ( + "movq %0, %%gs:0x58\n" // MOVL tls0, 0x58(GS) "movq %%gs:0x58, %%rax\n" // MOVQ 0x58(GS), tmp - "movq %0, 0(%%rax)\n" // MOVQ g, 0(GS) - "movq %1, 8(%%rax)\n" // MOVQ m, 8(GS) - :: "r"(ts.g), "r"(ts.m) : "%rax" + "movq %1, 0(%%rax)\n" // MOVQ g, 0(GS) + "movq %2, 8(%%rax)\n" // MOVQ m, 8(GS) + :: "r"(tls0), "r"(ts.g), "r"(ts.m) : "%rax" ); crosscall_amd64(ts.fn); diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c index 58f287e90..829448b02 100644 --- a/src/pkg/runtime/cgocall.c +++ b/src/pkg/runtime/cgocall.c @@ -83,7 +83,6 @@ // callee-save registers for gcc and returns to GoF, which returns to f. void *initcgo; /* filled in by dynamic linker when Cgo is available */ -int64 ncgocall; static void unlockm(void); static void unwindm(void); @@ -101,7 +100,7 @@ runtime·cgocall(void (*fn)(void*), void *arg) if(fn == 0) runtime·throw("cgocall nil"); - ncgocall++; + m->ncgocall++; /* * Lock g to m to ensure we stay on the same stack if we do a @@ -155,7 +154,11 @@ unlockm(void) void runtime·Cgocalls(int64 ret) { - ret = ncgocall; + M *m; + + ret = 0; + for(m=runtime·atomicloadp(&runtime·allm); m; m=m->alllink) + ret += m->ncgocall; FLUSH(&ret); } diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c index f94c3ef40..b77e51b60 100644 --- a/src/pkg/runtime/chan.c +++ b/src/pkg/runtime/chan.c @@ -6,6 +6,7 @@ #include "type.h" #define MAXALIGN 7 +#define NOSELGEN 1 static int32 debug = 0; @@ -18,10 +19,8 @@ struct SudoG { G* g; // g and selgen constitute uint32 selgen; // a weak pointer to g - int16 offset; // offset of case number - int8 isfree; // offset of case number SudoG* link; - byte elem[8]; // synch data element (+ more) + byte* elem; // data element }; struct WaitQ @@ -38,11 +37,10 @@ struct Hchan bool closed; uint8 elemalign; Alg* elemalg; // interface for element type - uint32 sendx; // send index - uint32 recvx; // receive index + uint32 sendx; // send index + uint32 recvx; // receive index WaitQ recvq; // list of recv waiters WaitQ sendq; // list of send waiters - SudoG* free; // freelist Lock; }; @@ -60,34 +58,26 @@ enum struct Scase { + SudoG sg; // must be first member (cast to Scase) Hchan* chan; // chan byte* pc; // return pc uint16 kind; uint16 so; // vararg of selected bool - union { - byte elem[2*sizeof(void*)]; // element (send) - struct { - byte* elemp; // pointer to element (recv) - bool* receivedp; // pointer to received bool (recv2) - } recv; - } u; + bool* receivedp; // pointer to received bool (recv2) }; struct Select { uint16 tcase; // total count of scase[] uint16 ncase; // currently filled scase[] - Select* link; // for freelist - uint16* order; - Scase* scase[1]; // one per case + uint16* pollorder; // case poll order + Hchan** lockorder; // channel lock order + Scase scase[1]; // one per case (in order of appearance) }; -static void dequeueg(WaitQ*, Hchan*); -static SudoG* dequeue(WaitQ*, Hchan*); +static void dequeueg(WaitQ*); +static SudoG* dequeue(WaitQ*); static void enqueue(WaitQ*, SudoG*); -static SudoG* allocsg(Hchan*); -static void freesg(Hchan*, SudoG*); -static uint32 fastrandn(uint32); static void destroychan(Hchan*); Hchan* @@ -97,7 +87,7 @@ runtime·makechan_c(Type *elem, int64 hint) int32 n; byte *by; - if(hint < 0 || (int32)hint != hint || hint > ((uintptr)-1) / elem->size) + if(hint < 0 || (int32)hint != hint || (elem->size > 0 && hint > ((uintptr)-1) / elem->size)) runtime·panicstring("makechan: size out of range"); if(elem->alg >= nelem(runtime·algarray)) { @@ -170,6 +160,7 @@ void runtime·chansend(Hchan *c, byte *ep, bool *pres) { SudoG *sg; + SudoG mysg; G* gp; if(c == nil) @@ -185,21 +176,20 @@ runtime·chansend(Hchan *c, byte *ep, bool *pres) } runtime·lock(c); -loop: if(c->closed) goto closed; if(c->dataqsiz > 0) goto asynch; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { - if(ep != nil) - c->elemalg->copy(c->elemsize, sg->elem, ep); - + runtime·unlock(c); + gp = sg->g; gp->param = sg; - runtime·unlock(c); + if(sg->elem != nil) + c->elemalg->copy(c->elemsize, sg->elem, ep); runtime·ready(gp); if(pres != nil) @@ -213,21 +203,22 @@ loop: return; } - sg = allocsg(c); - if(ep != nil) - c->elemalg->copy(c->elemsize, sg->elem, ep); + mysg.elem = ep; + mysg.g = g; + mysg.selgen = NOSELGEN; g->param = nil; g->status = Gwaiting; - enqueue(&c->sendq, sg); + enqueue(&c->sendq, &mysg); runtime·unlock(c); runtime·gosched(); - runtime·lock(c); - sg = g->param; - if(sg == nil) - goto loop; - freesg(c, sg); - runtime·unlock(c); + if(g->param == nil) { + runtime·lock(c); + if(!c->closed) + runtime·throw("chansend: spurious wakeup"); + goto closed; + } + return; asynch: @@ -240,25 +231,25 @@ asynch: *pres = false; return; } - sg = allocsg(c); + mysg.g = g; + mysg.elem = nil; + mysg.selgen = NOSELGEN; g->status = Gwaiting; - enqueue(&c->sendq, sg); + enqueue(&c->sendq, &mysg); runtime·unlock(c); runtime·gosched(); runtime·lock(c); goto asynch; } - if(ep != nil) - c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep); + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep); if(++c->sendx == c->dataqsiz) c->sendx = 0; c->qcount++; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { gp = sg->g; - freesg(c, sg); runtime·unlock(c); runtime·ready(gp); } else @@ -277,6 +268,7 @@ void runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received) { SudoG *sg; + SudoG mysg; G *gp; if(c == nil) @@ -289,23 +281,20 @@ runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received) runtime·printf("chanrecv: chan=%p\n", c); runtime·lock(c); - -loop: if(c->dataqsiz > 0) goto asynch; if(c->closed) goto closed; - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) { + runtime·unlock(c); + if(ep != nil) c->elemalg->copy(c->elemsize, ep, sg->elem); - c->elemalg->copy(c->elemsize, sg->elem, nil); - gp = sg->g; gp->param = sg; - runtime·unlock(c); runtime·ready(gp); if(selected != nil) @@ -321,25 +310,24 @@ loop: return; } - sg = allocsg(c); + mysg.elem = ep; + mysg.g = g; + mysg.selgen = NOSELGEN; g->param = nil; g->status = Gwaiting; - enqueue(&c->recvq, sg); + enqueue(&c->recvq, &mysg); runtime·unlock(c); runtime·gosched(); - runtime·lock(c); - sg = g->param; - if(sg == nil) - goto loop; + if(g->param == nil) { + runtime·lock(c); + if(!c->closed) + runtime·throw("chanrecv: spurious wakeup"); + goto closed; + } - if(ep != nil) - c->elemalg->copy(c->elemsize, ep, sg->elem); - c->elemalg->copy(c->elemsize, sg->elem, nil); if(received != nil) *received = true; - freesg(c, sg); - runtime·unlock(c); return; asynch: @@ -354,9 +342,11 @@ asynch: *received = false; return; } - sg = allocsg(c); + mysg.g = g; + mysg.elem = nil; + mysg.selgen = NOSELGEN; g->status = Gwaiting; - enqueue(&c->recvq, sg); + enqueue(&c->recvq, &mysg); runtime·unlock(c); runtime·gosched(); @@ -369,10 +359,10 @@ asynch: if(++c->recvx == c->dataqsiz) c->recvx = 0; c->qcount--; - sg = dequeue(&c->sendq, c); + + sg = dequeue(&c->sendq); if(sg != nil) { gp = sg->g; - freesg(c, sg); runtime·unlock(c); runtime·ready(gp); } else @@ -437,7 +427,7 @@ runtime·chanrecv2(Hchan* c, ...) o = runtime·rnd(sizeof(c), Structrnd); ae = (byte*)&c + o; - o = runtime·rnd(o+c->elemsize, 1); + o += c->elemsize; ac = (byte*)&c + o; runtime·chanrecv(c, ae, nil, ac); @@ -619,57 +609,56 @@ newselect(int32 size, Select **selp) if(size > 1) n = size-1; - sel = runtime·mal(sizeof(*sel) + n*sizeof(sel->scase[0]) + size*sizeof(sel->order[0])); + sel = runtime·mal(sizeof(*sel) + + n*sizeof(sel->scase[0]) + + size*sizeof(sel->lockorder[0]) + + size*sizeof(sel->pollorder[0])); sel->tcase = size; sel->ncase = 0; - sel->order = (void*)(sel->scase + size); + sel->pollorder = (void*)(sel->scase + size); + sel->lockorder = (void*)(sel->pollorder + size); *selp = sel; + if(debug) runtime·printf("newselect s=%p size=%d\n", sel, size); } // cut in half to give stack a chance to split -static void selectsend(Select **selp, Hchan *c, void *pc); +static void selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so); -// selectsend(sel *byte, hchan *chan any, elem any) (selected bool); +// selectsend(sel *byte, hchan *chan any, elem *any) (selected bool); #pragma textflag 7 void -runtime·selectsend(Select *sel, Hchan *c, ...) +runtime·selectsend(Select *sel, Hchan *c, void *elem, bool selected) { + selected = false; + FLUSH(&selected); + // nil cases do not compete if(c == nil) return; - selectsend(&sel, c, runtime·getcallerpc(&sel)); + selectsend(sel, c, runtime·getcallerpc(&sel), elem, (byte*)&selected - (byte*)&sel); } static void -selectsend(Select **selp, Hchan *c, void *pc) +selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so) { - int32 i, eo; + int32 i; Scase *cas; - byte *ae; - Select *sel; - sel = *selp; i = sel->ncase; if(i >= sel->tcase) runtime·throw("selectsend: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas + c->elemsize - sizeof(cas->u.elem)); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = pc; cas->chan = c; - - eo = runtime·rnd(sizeof(sel), sizeof(c)); - eo = runtime·rnd(eo+sizeof(c), c->elemsize); - cas->so = runtime·rnd(eo+c->elemsize, Structrnd); + cas->so = so; cas->kind = CaseSend; - - ae = (byte*)selp + eo; - c->elemalg->copy(c->elemsize, cas->u.elem, ae); + cas->sg.elem = elem; if(debug) runtime·printf("selectsend s=%p pc=%p chan=%p so=%d\n", @@ -684,6 +673,9 @@ static void selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool*, int32 void runtime·selectrecv(Select *sel, Hchan *c, void *elem, bool selected) { + selected = false; + FLUSH(&selected); + // nil cases do not compete if(c == nil) return; @@ -696,6 +688,9 @@ runtime·selectrecv(Select *sel, Hchan *c, void *elem, bool selected) void runtime·selectrecv2(Select *sel, Hchan *c, void *elem, bool *received, bool selected) { + selected = false; + FLUSH(&selected); + // nil cases do not compete if(c == nil) return; @@ -713,16 +708,14 @@ selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool *received, int32 so if(i >= sel->tcase) runtime·throw("selectrecv: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = pc; cas->chan = c; cas->so = so; cas->kind = CaseRecv; - cas->u.recv.elemp = elem; - cas->u.recv.receivedp = nil; - cas->u.recv.receivedp = received; + cas->sg.elem = elem; + cas->receivedp = received; if(debug) runtime·printf("selectrecv s=%p pc=%p chan=%p so=%d\n", @@ -737,6 +730,9 @@ static void selectdefault(Select*, void*, int32); void runtime·selectdefault(Select *sel, bool selected) { + selected = false; + FLUSH(&selected); + selectdefault(sel, runtime·getcallerpc(&sel), (byte*)&selected - (byte*)&sel); } @@ -750,8 +746,7 @@ selectdefault(Select *sel, void *callerpc, int32 so) if(i >= sel->tcase) runtime·throw("selectdefault: too many cases"); sel->ncase = i+1; - cas = runtime·mal(sizeof *cas); - sel->scase[i] = cas; + cas = &sel->scase[i]; cas->pc = callerpc; cas->chan = nil; @@ -764,25 +759,16 @@ selectdefault(Select *sel, void *callerpc, int32 so) } static void -freesel(Select *sel) -{ - uint32 i; - - for(i=0; i<sel->ncase; i++) - runtime·free(sel->scase[i]); - runtime·free(sel); -} - -static void sellock(Select *sel) { uint32 i; - Hchan *c; + Hchan *c, *c0; c = nil; for(i=0; i<sel->ncase; i++) { - if(sel->scase[i]->chan != c) { - c = sel->scase[i]->chan; + c0 = sel->lockorder[i]; + if(c0 && c0 != c) { + c = sel->lockorder[i]; runtime·lock(c); } } @@ -792,12 +778,13 @@ static void selunlock(Select *sel) { uint32 i; - Hchan *c; + Hchan *c, *c0; c = nil; - for(i=sel->ncase; i>0; i--) { - if(sel->scase[i-1]->chan && sel->scase[i-1]->chan != c) { - c = sel->scase[i-1]->chan; + for(i=sel->ncase; i-->0;) { + c0 = sel->lockorder[i]; + if(c0 && c0 != c) { + c = c0; runtime·unlock(c); } } @@ -852,20 +839,20 @@ selectgo(Select **selp) // generate permuted order for(i=0; i<sel->ncase; i++) - sel->order[i] = i; + sel->pollorder[i] = i; for(i=1; i<sel->ncase; i++) { - o = sel->order[i]; - j = fastrandn(i+1); - sel->order[i] = sel->order[j]; - sel->order[j] = o; + o = sel->pollorder[i]; + j = runtime·fastrand1()%(i+1); + sel->pollorder[i] = sel->pollorder[j]; + sel->pollorder[j] = o; } // sort the cases by Hchan address to get the locking order. - for(i=1; i<sel->ncase; i++) { - cas = sel->scase[i]; - for(j=i; j>0 && sel->scase[j-1]->chan >= cas->chan; j--) - sel->scase[j] = sel->scase[j-1]; - sel->scase[j] = cas; + for(i=0; i<sel->ncase; i++) { + c = sel->scase[i].chan; + for(j=i; j>0 && sel->lockorder[j-1] >= c; j--) + sel->lockorder[j] = sel->lockorder[j-1]; + sel->lockorder[j] = c; } sellock(sel); @@ -873,8 +860,8 @@ loop: // pass 1 - look for something already waiting dfl = nil; for(i=0; i<sel->ncase; i++) { - o = sel->order[i]; - cas = sel->scase[o]; + o = sel->pollorder[i]; + cas = &sel->scase[o]; c = cas->chan; switch(cas->kind) { @@ -883,7 +870,7 @@ loop: if(c->qcount > 0) goto asyncrecv; } else { - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) goto syncrecv; } @@ -898,7 +885,7 @@ loop: if(c->qcount < c->dataqsiz) goto asyncsend; } else { - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) goto syncsend; } @@ -911,6 +898,7 @@ loop: } if(dfl != nil) { + selunlock(sel); cas = dfl; goto retc; } @@ -918,11 +906,11 @@ loop: // pass 2 - enqueue on all chans for(i=0; i<sel->ncase; i++) { - o = sel->order[i]; - cas = sel->scase[o]; + cas = &sel->scase[i]; c = cas->chan; - sg = allocsg(c); - sg->offset = o; + sg = &cas->sg; + sg->g = g; + sg->selgen = g->selgen; switch(cas->kind) { case CaseRecv: @@ -930,8 +918,6 @@ loop: break; case CaseSend: - if(c->dataqsiz == 0) - c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem); enqueue(&c->sendq, sg); break; } @@ -948,85 +934,82 @@ loop: // pass 3 - dequeue from unsuccessful chans // otherwise they stack up on quiet channels for(i=0; i<sel->ncase; i++) { - if(sg == nil || i != sg->offset) { - cas = sel->scase[i]; + cas = &sel->scase[i]; + if(cas != (Scase*)sg) { c = cas->chan; if(cas->kind == CaseSend) - dequeueg(&c->sendq, c); + dequeueg(&c->sendq); else - dequeueg(&c->recvq, c); + dequeueg(&c->recvq); } } if(sg == nil) goto loop; - o = sg->offset; - cas = sel->scase[o]; + cas = (Scase*)sg; c = cas->chan; - if(c->dataqsiz > 0) { -// prints("shouldnt happen\n"); - goto loop; - } + if(c->dataqsiz > 0) + runtime·throw("selectgo: shouldnt happen"); if(debug) - runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d o=%d\n", - sel, c, cas, cas->kind, o); + runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d\n", + sel, c, cas, cas->kind); if(cas->kind == CaseRecv) { - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem); - c->elemalg->copy(c->elemsize, sg->elem, nil); + if(cas->receivedp != nil) + *cas->receivedp = true; } - freesg(c, sg); + selunlock(sel); goto retc; asyncrecv: // can receive from buffer - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, chanbuf(c, c->recvx)); + if(cas->receivedp != nil) + *cas->receivedp = true; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, chanbuf(c, c->recvx)); c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil); if(++c->recvx == c->dataqsiz) c->recvx = 0; c->qcount--; - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg != nil) { gp = sg->g; - freesg(c, sg); + selunlock(sel); runtime·ready(gp); + } else { + selunlock(sel); } goto retc; asyncsend: // can send to buffer - if(cas->u.elem != nil) - c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->u.elem); + c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->sg.elem); if(++c->sendx == c->dataqsiz) c->sendx = 0; c->qcount++; - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg != nil) { gp = sg->g; - freesg(c, sg); + selunlock(sel); runtime·ready(gp); + } else { + selunlock(sel); } goto retc; syncrecv: // can receive from sleeping sender (sg) + selunlock(sel); if(debug) runtime·printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o); - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = true; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem); - c->elemalg->copy(c->elemsize, sg->elem, nil); + if(cas->receivedp != nil) + *cas->receivedp = true; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, sg->elem); gp = sg->g; gp->param = sg; runtime·ready(gp); @@ -1034,30 +1017,28 @@ syncrecv: rclose: // read at end of closed channel - if(cas->u.recv.receivedp != nil) - *cas->u.recv.receivedp = false; - if(cas->u.recv.elemp != nil) - c->elemalg->copy(c->elemsize, cas->u.recv.elemp, nil); + selunlock(sel); + if(cas->receivedp != nil) + *cas->receivedp = false; + if(cas->sg.elem != nil) + c->elemalg->copy(c->elemsize, cas->sg.elem, nil); goto retc; syncsend: // can send to sleeping receiver (sg) + selunlock(sel); if(debug) runtime·printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o); - if(c->closed) - goto sclose; - c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem); + c->elemalg->copy(c->elemsize, sg->elem, cas->sg.elem); gp = sg->g; gp->param = sg; runtime·ready(gp); retc: - selunlock(sel); - // return to pc corresponding to chosen case pc = cas->pc; as = (byte*)selp + cas->so; - freesel(sel); + runtime·free(sel); *as = true; return pc; @@ -1088,23 +1069,21 @@ runtime·closechan(Hchan *c) // release all readers for(;;) { - sg = dequeue(&c->recvq, c); + sg = dequeue(&c->recvq); if(sg == nil) break; gp = sg->g; gp->param = nil; - freesg(c, sg); runtime·ready(gp); } // release all writers for(;;) { - sg = dequeue(&c->sendq, c); + sg = dequeue(&c->sendq); if(sg == nil) break; gp = sg->g; gp->param = nil; - freesg(c, sg); runtime·ready(gp); } @@ -1144,7 +1123,7 @@ reflect·chancap(Hchan *c, int32 cap) } static SudoG* -dequeue(WaitQ *q, Hchan *c) +dequeue(WaitQ *q) { SudoG *sgp; @@ -1155,9 +1134,10 @@ loop: q->first = sgp->link; // if sgp is stale, ignore it - if(!runtime·cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 1)) { + if(sgp->selgen != NOSELGEN && + (sgp->selgen != sgp->g->selgen || + !runtime·cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 2))) { //prints("INVALID PSEUDOG POINTER\n"); - freesg(c, sgp); goto loop; } @@ -1165,14 +1145,16 @@ loop: } static void -dequeueg(WaitQ *q, Hchan *c) +dequeueg(WaitQ *q) { - SudoG **l, *sgp; - - for(l=&q->first; (sgp=*l) != nil; l=&sgp->link) { + SudoG **l, *sgp, *prevsgp; + + prevsgp = nil; + for(l=&q->first; (sgp=*l) != nil; l=&sgp->link, prevsgp=sgp) { if(sgp->g == g) { *l = sgp->link; - freesg(c, sgp); + if(q->last == sgp) + q->last = prevsgp; break; } } @@ -1190,62 +1172,3 @@ enqueue(WaitQ *q, SudoG *sgp) q->last->link = sgp; q->last = sgp; } - -static SudoG* -allocsg(Hchan *c) -{ - SudoG* sg; - - sg = c->free; - if(sg != nil) { - c->free = sg->link; - } else - sg = runtime·mal(sizeof(*sg) + c->elemsize - sizeof(sg->elem)); - sg->selgen = g->selgen; - sg->g = g; - sg->offset = 0; - sg->isfree = 0; - - return sg; -} - -static void -freesg(Hchan *c, SudoG *sg) -{ - if(sg != nil) { - if(sg->isfree) - runtime·throw("chan.freesg: already free"); - sg->isfree = 1; - sg->link = c->free; - c->free = sg; - } -} - -static uint32 -fastrand1(void) -{ - static uint32 x = 0x49f6428aUL; - - x += x; - if(x & 0x80000000L) - x ^= 0x88888eefUL; - return x; -} - -static uint32 -fastrandn(uint32 n) -{ - uint32 max, r; - - if(n <= 1) - return 0; - - r = fastrand1(); - if(r < (1ULL<<31)-n) // avoid computing max in common case - return r%n; - - max = (1ULL<<31)/n * n; - while(r >= max) - r = fastrand1(); - return r%n; -} diff --git a/src/pkg/runtime/chan_test.go b/src/pkg/runtime/chan_test.go new file mode 100644 index 000000000..c5ffe93ac --- /dev/null +++ b/src/pkg/runtime/chan_test.go @@ -0,0 +1,267 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "runtime" + "sync/atomic" + "testing" +) + +func TestChanSendInterface(t *testing.T) { + type mt struct{} + m := &mt{} + c := make(chan interface{}, 1) + c <- m + select { + case c <- m: + default: + } + select { + case c <- m: + case c <- &mt{}: + default: + } +} + +func BenchmarkSelectUncontended(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + myc1 := make(chan int, 1) + myc2 := make(chan int, 1) + myc1 <- 0 + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + select { + case <-myc1: + myc2 <- 0 + case <-myc2: + myc1 <- 0 + } + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkSelectContended(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + myc1 := make(chan int, procs) + myc2 := make(chan int, procs) + for p := 0; p < procs; p++ { + myc1 <- 0 + go func() { + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + select { + case <-myc1: + myc2 <- 0 + case <-myc2: + myc1 <- 0 + } + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkSelectNonblock(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + myc1 := make(chan int) + myc2 := make(chan int) + myc3 := make(chan int, 1) + myc4 := make(chan int, 1) + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + select { + case <-myc1: + default: + } + select { + case myc2 <- 0: + default: + } + select { + case <-myc3: + default: + } + select { + case myc4 <- 0: + default: + } + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkChanUncontended(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + myc := make(chan int, CallsPerSched) + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + myc <- 0 + } + for g := 0; g < CallsPerSched; g++ { + <-myc + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkChanContended(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + myc := make(chan int, procs*CallsPerSched) + for p := 0; p < procs; p++ { + go func() { + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + myc <- 0 + } + for g := 0; g < CallsPerSched; g++ { + <-myc + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkChanSync(b *testing.B) { + const CallsPerSched = 1000 + procs := 2 + N := int32(b.N / CallsPerSched / procs * procs) + c := make(chan bool, procs) + myc := make(chan int) + for p := 0; p < procs; p++ { + go func() { + for { + i := atomic.AddInt32(&N, -1) + if i < 0 { + break + } + for g := 0; g < CallsPerSched; g++ { + if i%2 == 0 { + <-myc + myc <- 0 + } else { + myc <- 0 + <-myc + } + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func benchmarkChanProdCons(b *testing.B, chanSize, localWork int) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, 2*procs) + myc := make(chan int, chanSize) + for p := 0; p < procs; p++ { + go func() { + foo := 0 + for atomic.AddInt32(&N, -1) >= 0 { + for g := 0; g < CallsPerSched; g++ { + for i := 0; i < localWork; i++ { + foo *= 2 + foo /= 2 + } + myc <- 1 + } + } + myc <- 0 + c <- foo == 42 + }() + go func() { + foo := 0 + for { + v := <-myc + if v == 0 { + break + } + for i := 0; i < localWork; i++ { + foo *= 2 + foo /= 2 + } + } + c <- foo == 42 + }() + } + for p := 0; p < procs; p++ { + <-c + <-c + } +} + +func BenchmarkChanProdCons0(b *testing.B) { + benchmarkChanProdCons(b, 0, 0) +} + +func BenchmarkChanProdCons10(b *testing.B) { + benchmarkChanProdCons(b, 10, 0) +} + +func BenchmarkChanProdCons100(b *testing.B) { + benchmarkChanProdCons(b, 100, 0) +} + +func BenchmarkChanProdConsWork0(b *testing.B) { + benchmarkChanProdCons(b, 0, 100) +} + +func BenchmarkChanProdConsWork10(b *testing.B) { + benchmarkChanProdCons(b, 10, 100) +} + +func BenchmarkChanProdConsWork100(b *testing.B) { + benchmarkChanProdCons(b, 100, 100) +} diff --git a/src/pkg/runtime/cpuprof.c b/src/pkg/runtime/cpuprof.c index 6233bcb45..74b795b7e 100644 --- a/src/pkg/runtime/cpuprof.c +++ b/src/pkg/runtime/cpuprof.c @@ -121,6 +121,10 @@ runtime·SetCPUProfileRate(int32 hz) { uintptr *p; uintptr n; + + // Call findfunc now so that it won't have to + // build tables during the signal handler. + runtime·findfunc(0); // Clamp hz to something reasonable. if(hz < 0) diff --git a/src/pkg/runtime/debug/stack_test.go b/src/pkg/runtime/debug/stack_test.go index 4aeea13ff..94293bb93 100644 --- a/src/pkg/runtime/debug/stack_test.go +++ b/src/pkg/runtime/debug/stack_test.go @@ -23,7 +23,7 @@ func (t T) method() []byte { Don't worry much about the base levels, but check the ones in our own package. /Users/r/go/src/pkg/runtime/debug/stack_test.go:15 (0x13878) - *T.ptrmethod: return Stack() + (*T).ptrmethod: return Stack() /Users/r/go/src/pkg/runtime/debug/stack_test.go:18 (0x138dd) T.method: return t.ptrmethod() /Users/r/go/src/pkg/runtime/debug/stack_test.go:23 (0x13920) @@ -40,7 +40,7 @@ func TestStack(t *testing.T) { t.Fatal("too few lines") } check(t, lines[0], "src/pkg/runtime/debug/stack_test.go") - check(t, lines[1], "\t*T.ptrmethod: return Stack()") + check(t, lines[1], "\t(*T).ptrmethod: return Stack()") check(t, lines[2], "src/pkg/runtime/debug/stack_test.go") check(t, lines[3], "\tT.method: return t.ptrmethod()") check(t, lines[4], "src/pkg/runtime/debug/stack_test.go") diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go index 58631c7b4..53c5fcba4 100644 --- a/src/pkg/runtime/export_test.go +++ b/src/pkg/runtime/export_test.go @@ -15,3 +15,9 @@ var F32to64 = f32to64 var Fcmp64 = fcmp64 var Fintto64 = fintto64 var F64toint = f64toint + +func entersyscall() +func exitsyscall() + +var Entersyscall = entersyscall +var Exitsyscall = exitsyscall diff --git a/src/pkg/runtime/freebsd/386/signal.c b/src/pkg/runtime/freebsd/386/signal.c index 3600f0762..2fe7ecd70 100644 --- a/src/pkg/runtime/freebsd/386/signal.c +++ b/src/pkg/runtime/freebsd/386/signal.c @@ -111,6 +111,8 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) runtime·exit(2); } +// Called from kernel on signal stack, so no stack split. +#pragma textflag 7 void runtime·sigignore(void) { diff --git a/src/pkg/runtime/freebsd/amd64/signal.c b/src/pkg/runtime/freebsd/amd64/signal.c index 85cb1d855..8015e366e 100644 --- a/src/pkg/runtime/freebsd/amd64/signal.c +++ b/src/pkg/runtime/freebsd/amd64/signal.c @@ -119,6 +119,8 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp) runtime·exit(2); } +// Called from kernel on signal stack, so no stack split. +#pragma textflag 7 void runtime·sigignore(void) { diff --git a/src/pkg/runtime/goc2c.c b/src/pkg/runtime/goc2c.c index 826ceff3a..61236e226 100644 --- a/src/pkg/runtime/goc2c.c +++ b/src/pkg/runtime/goc2c.c @@ -2,26 +2,27 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -/* Translate a .goc file into a .c file. A .goc file is a combination - of a limited form of Go with C. */ +/* + * Translate a .goc file into a .c file. A .goc file is a combination + * of a limited form of Go with C. + */ /* - package PACKAGENAME - {# line} - func NAME([NAME TYPE { , NAME TYPE }]) [(NAME TYPE { , NAME TYPE })] \{ - C code with proper brace nesting - \} + package PACKAGENAME + {# line} + func NAME([NAME TYPE { , NAME TYPE }]) [(NAME TYPE { , NAME TYPE })] \{ + C code with proper brace nesting + \} */ -/* We generate C code which implements the function such that it can - be called from Go and executes the C code. */ +/* + * We generate C code which implements the function such that it can + * be called from Go and executes the C code. + */ -#include <assert.h> -#include <ctype.h> +#include <u.h> #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> +#include <libc.h> /* Whether we're emitting for gcc */ static int gcc; @@ -88,16 +89,14 @@ int structround = 4; static void bad_eof(void) { - fprintf(stderr, "%s:%u: unexpected EOF\n", file, lineno); - exit(1); + sysfatal("%s:%ud: unexpected EOF\n", file, lineno); } /* Out of memory. */ static void bad_mem(void) { - fprintf(stderr, "%s:%u: out of memory\n", file, lineno); - exit(1); + sysfatal("%s:%ud: out of memory\n", file, lineno); } /* Allocate memory without fail. */ @@ -196,8 +195,10 @@ getchar_skipping_comments(void) } } -/* Read and return a token. Tokens are delimited by whitespace or by - [(),{}]. The latter are all returned as single characters. */ +/* + * Read and return a token. Tokens are delimited by whitespace or by + * [(),{}]. The latter are all returned as single characters. + */ static char * read_token(void) { @@ -259,11 +260,11 @@ read_package(void) char *token; token = read_token_no_eof(); + if (token == nil) + sysfatal("%s:%ud: no token\n", file, lineno); if (strcmp(token, "package") != 0) { - fprintf(stderr, - "%s:%u: expected \"package\", got \"%s\"\n", + sysfatal("%s:%ud: expected \"package\", got \"%s\"\n", file, lineno, token); - exit(1); } return read_token_no_eof(); } @@ -290,8 +291,10 @@ read_preprocessor_lines(void) } } -/* Read a type in Go syntax and return a type in C syntax. We only - permit basic types and pointers. */ +/* + * Read a type in Go syntax and return a type in C syntax. We only + * permit basic types and pointers. + */ static char * read_type(void) { @@ -333,13 +336,14 @@ type_size(char *p) for(i=0; type_table[i].name; i++) if(strcmp(type_table[i].name, p) == 0) return type_table[i].size; - fprintf(stderr, "%s:%u: unknown type %s\n", file, lineno, p); - exit(1); + sysfatal("%s:%ud: unknown type %s\n", file, lineno, p); return 0; } -/* Read a list of parameters. Each parameter is a name and a type. - The list ends with a ')'. We have already read the '('. */ +/* + * Read a list of parameters. Each parameter is a name and a type. + * The list ends with a ')'. We have already read the '('. + */ static struct params * read_params(int *poffset) { @@ -375,17 +379,18 @@ read_params(int *poffset) } } if (strcmp(token, ")") != 0) { - fprintf(stderr, "%s:%u: expected '('\n", + sysfatal("%s:%ud: expected '('\n", file, lineno); - exit(1); } if (poffset != NULL) *poffset = offset; return ret; } -/* Read a function header. This reads up to and including the initial - '{' character. Returns 1 if it read a header, 0 at EOF. */ +/* + * Read a function header. This reads up to and including the initial + * '{' character. Returns 1 if it read a header, 0 at EOF. + */ static int read_func_header(char **name, struct params **params, int *paramwid, struct params **rets) { @@ -416,9 +421,8 @@ read_func_header(char **name, struct params **params, int *paramwid, struct para token = read_token(); if (token == NULL || strcmp(token, "(") != 0) { - fprintf(stderr, "%s:%u: expected \"(\"\n", + sysfatal("%s:%ud: expected \"(\"\n", file, lineno); - exit(1); } *params = read_params(paramwid); @@ -430,9 +434,8 @@ read_func_header(char **name, struct params **params, int *paramwid, struct para token = read_token(); } if (token == NULL || strcmp(token, "{") != 0) { - fprintf(stderr, "%s:%u: expected \"{\"\n", + sysfatal("%s:%ud: expected \"{\"\n", file, lineno); - exit(1); } return 1; } @@ -581,8 +584,10 @@ write_func_trailer(char *package, char *name, write_6g_func_trailer(rets); } -/* Read and write the body of the function, ending in an unnested } - (which is read but not written). */ +/* + * Read and write the body of the function, ending in an unnested } + * (which is read but not written). + */ static void copy_body(void) { @@ -669,15 +674,15 @@ process_file(void) static void usage(void) { - fprintf(stderr, "Usage: goc2c [--6g | --gc] [file]\n"); - exit(1); + sysfatal("Usage: goc2c [--6g | --gc] [file]\n"); } -int +void main(int argc, char **argv) { char *goarch; + argv0 = argv[0]; while(argc > 1 && argv[1][0] == '-') { if(strcmp(argv[1], "-") == 0) break; @@ -694,7 +699,7 @@ main(int argc, char **argv) if(argc <= 1 || strcmp(argv[1], "-") == 0) { file = "<stdin>"; process_file(); - return 0; + exits(0); } if(argc > 2) @@ -702,8 +707,7 @@ main(int argc, char **argv) file = argv[1]; if(freopen(file, "r", stdin) == 0) { - fprintf(stderr, "open %s: %s\n", file, strerror(errno)); - exit(1); + sysfatal("open %s: %r\n", file); } if(!gcc) { @@ -719,5 +723,5 @@ main(int argc, char **argv) } process_file(); - return 0; + exits(0); } diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c index 5ba1eb20a..179a56375 100644 --- a/src/pkg/runtime/hashmap.c +++ b/src/pkg/runtime/hashmap.c @@ -753,12 +753,12 @@ runtime·makemap_c(Type *key, Type *val, int64 hint) // func(key) (val[, pres]) h->ko1 = runtime·rnd(sizeof(h), key->align); h->vo1 = runtime·rnd(h->ko1+keysize, Structrnd); - h->po1 = runtime·rnd(h->vo1+valsize, 1); + h->po1 = h->vo1 + valsize; // func(key, val[, pres]) h->ko2 = runtime·rnd(sizeof(h), key->align); h->vo2 = runtime·rnd(h->ko2+keysize, val->align); - h->po2 = runtime·rnd(h->vo2+valsize, 1); + h->po2 = h->vo2 + valsize; if(debug) { runtime·printf("makemap: map=%p; keysize=%d; valsize=%d; keyalg=%d; valalg=%d; offsets=%d,%d; %d,%d,%d; %d,%d,%d\n", diff --git a/src/pkg/runtime/hashmap.h b/src/pkg/runtime/hashmap.h index d0fd3527f..19ff41697 100644 --- a/src/pkg/runtime/hashmap.h +++ b/src/pkg/runtime/hashmap.h @@ -65,7 +65,7 @@ #define malloc runtime·mal #define memset(a,b,c) runtime·memclr((byte*)(a), (uint32)(c)) -#define memcpy(a,b,c) runtime·mcpy((byte*)(a),(byte*)(b),(uint32)(c)) +#define memcpy(a,b,c) runtime·memmove((byte*)(a),(byte*)(b),(uint32)(c)) #define assert(a) if(!(a)) runtime·throw("assert") #define free(x) runtime·free(x) #define memmove(a,b,c) runtime·memmove(a, b, c) diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c index b1015f695..000f834cf 100644 --- a/src/pkg/runtime/iface.c +++ b/src/pkg/runtime/iface.c @@ -81,7 +81,7 @@ itab(InterfaceType *inter, Type *type, int32 canfail) for(locked=0; locked<2; locked++) { if(locked) runtime·lock(&ifacelock); - for(m=hash[h]; m!=nil; m=m->link) { + for(m=runtime·atomicloadp(&hash[h]); m!=nil; m=m->link) { if(m->inter == inter && m->type == type) { if(m->bad) { m = nil; @@ -145,10 +145,11 @@ search: } out: + if(!locked) + runtime·panicstring("invalid itab locking"); m->link = hash[h]; - hash[h] = m; - if(locked) - runtime·unlock(&ifacelock); + runtime·atomicstorep(&hash[h], m); + runtime·unlock(&ifacelock); if(m->bad) return nil; return m; @@ -264,7 +265,7 @@ runtime·assertI2T2(Type *t, Iface i, ...) ret = (byte*)(&i+1); wid = t->size; - ok = (bool*)(ret+runtime·rnd(wid, 1)); + ok = (bool*)(ret + wid); if(i.tab == nil || i.tab->type != t) { *ok = false; @@ -326,7 +327,7 @@ runtime·assertE2T2(Type *t, Eface e, ...) runtime·throw("invalid interface value"); ret = (byte*)(&e+1); wid = t->size; - ok = (bool*)(ret+runtime·rnd(wid, 1)); + ok = (bool*)(ret + wid); if(t != e.type) { *ok = false; diff --git a/src/pkg/runtime/linux/386/defs.h b/src/pkg/runtime/linux/386/defs.h index 6ae1c4e13..73fe23ef9 100644 --- a/src/pkg/runtime/linux/386/defs.h +++ b/src/pkg/runtime/linux/386/defs.h @@ -61,6 +61,8 @@ enum { ITIMER_REAL = 0, ITIMER_VIRTUAL = 0x1, ITIMER_PROF = 0x2, + O_RDONLY = 0, + O_CLOEXEC = 02000000, }; // Types diff --git a/src/pkg/runtime/linux/386/sys.s b/src/pkg/runtime/linux/386/sys.s index e8b423324..0b4a34986 100644 --- a/src/pkg/runtime/linux/386/sys.s +++ b/src/pkg/runtime/linux/386/sys.s @@ -22,9 +22,31 @@ TEXT runtime·exit1(SB),7,$0 INT $3 // not reached RET +TEXT runtime·open(SB),7,$0 + MOVL $5, AX // syscall - open + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + INT $0x80 + RET + +TEXT runtime·close(SB),7,$0 + MOVL $6, AX // syscall - close + MOVL 4(SP), BX + INT $0x80 + RET + TEXT runtime·write(SB),7,$0 MOVL $4, AX // syscall - write - MOVL 4(SP), BX + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + INT $0x80 + RET + +TEXT runtime·read(SB),7,$0 + MOVL $3, AX // syscall - read + MOVL 4(SP), BX MOVL 8(SP), CX MOVL 12(SP), DX INT $0x80 @@ -315,3 +337,8 @@ TEXT runtime·setldt(SB),7,$32 MOVW AX, GS RET + +TEXT runtime·osyield(SB),7,$0 + MOVL $158, AX + INT $0x80 + RET diff --git a/src/pkg/runtime/linux/amd64/defs.h b/src/pkg/runtime/linux/amd64/defs.h index 70d63145c..8053dd16f 100644 --- a/src/pkg/runtime/linux/amd64/defs.h +++ b/src/pkg/runtime/linux/amd64/defs.h @@ -61,6 +61,8 @@ enum { ITIMER_REAL = 0, ITIMER_VIRTUAL = 0x1, ITIMER_PROF = 0x2, + O_RDONLY = 0, + O_CLOEXEC = 02000000, }; // Types diff --git a/src/pkg/runtime/linux/amd64/sys.s b/src/pkg/runtime/linux/amd64/sys.s index 66fdab208..8b4dcd921 100644 --- a/src/pkg/runtime/linux/amd64/sys.s +++ b/src/pkg/runtime/linux/amd64/sys.s @@ -28,6 +28,12 @@ TEXT runtime·open(SB),7,$0-16 SYSCALL RET +TEXT runtime·close(SB),7,$0-16 + MOVL 8(SP), DI + MOVL $3, AX // syscall entry + SYSCALL + RET + TEXT runtime·write(SB),7,$0-24 MOVL 8(SP), DI MOVQ 16(SP), SI @@ -36,6 +42,14 @@ TEXT runtime·write(SB),7,$0-24 SYSCALL RET +TEXT runtime·read(SB),7,$0-24 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL 24(SP), DX + MOVL $0, AX // syscall entry + SYSCALL + RET + TEXT runtime·raisesigpipe(SB),7,$12 MOVL $186, AX // syscall - gettid SYSCALL @@ -232,3 +246,7 @@ TEXT runtime·settls(SB),7,$32 CALL runtime·notok(SB) RET +TEXT runtime·osyield(SB),7,$0 + MOVL $24, AX + SYSCALL + RET diff --git a/src/pkg/runtime/linux/arm/defs.h b/src/pkg/runtime/linux/arm/defs.h index 6b2f22c66..09b558ed0 100644 --- a/src/pkg/runtime/linux/arm/defs.h +++ b/src/pkg/runtime/linux/arm/defs.h @@ -61,6 +61,8 @@ enum { ITIMER_REAL = 0, ITIMER_PROF = 0x2, ITIMER_VIRTUAL = 0x1, + O_RDONLY = 0, + O_CLOEXEC = 02000000, }; // Types diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s index ab5349822..8619f0945 100644 --- a/src/pkg/runtime/linux/arm/sys.s +++ b/src/pkg/runtime/linux/arm/sys.s @@ -15,7 +15,10 @@ #define SYS_BASE 0x0 #define SYS_exit (SYS_BASE + 1) +#define SYS_read (SYS_BASE + 3) #define SYS_write (SYS_BASE + 4) +#define SYS_open (SYS_BASE + 5) +#define SYS_close (SYS_BASE + 6) #define SYS_gettimeofday (SYS_BASE + 78) #define SYS_clone (SYS_BASE + 120) #define SYS_rt_sigreturn (SYS_BASE + 173) @@ -29,10 +32,25 @@ #define SYS_mincore (SYS_BASE + 219) #define SYS_gettid (SYS_BASE + 224) #define SYS_tkill (SYS_BASE + 238) +#define SYS_sched_yield (SYS_BASE + 158) #define ARM_BASE (SYS_BASE + 0x0f0000) #define SYS_ARM_cacheflush (ARM_BASE + 2) +TEXT runtime·open(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW $SYS_open, R7 + SWI $0 + RET + +TEXT runtime·close(SB),7,$0 + MOVW 0(FP), R0 + MOVW $SYS_close, R7 + SWI $0 + RET + TEXT runtime·write(SB),7,$0 MOVW 0(FP), R0 MOVW 4(FP), R1 @@ -41,6 +59,14 @@ TEXT runtime·write(SB),7,$0 SWI $0 RET +TEXT runtime·read(SB),7,$0 + MOVW 0(FP), R0 + MOVW 4(FP), R1 + MOVW 8(FP), R2 + MOVW $SYS_read, R7 + SWI $0 + RET + TEXT runtime·exit(SB),7,$-4 MOVW 0(FP), R0 MOVW $SYS_exit_group, R7 @@ -287,3 +313,7 @@ cascheck: TEXT runtime·casp(SB),7,$0 B runtime·cas(SB) +TEXT runtime·osyield(SB),7,$0 + MOVW $SYS_sched_yield, R7 + SWI $0 + RET diff --git a/src/pkg/runtime/linux/thread.c b/src/pkg/runtime/linux/thread.c index 7c7ca7b4e..8efba2b98 100644 --- a/src/pkg/runtime/linux/thread.c +++ b/src/pkg/runtime/linux/thread.c @@ -8,6 +8,11 @@ #include "stack.h" extern SigTab runtime·sigtab[]; +static int32 proccount; + +int32 runtime·open(uint8*, int32, int32); +int32 runtime·close(int32); +int32 runtime·read(int32, void*, int32); // Linux futex. // @@ -15,11 +20,19 @@ extern SigTab runtime·sigtab[]; // futexwakeup(uint32 *addr) // // Futexsleep atomically checks if *addr == val and if so, sleeps on addr. -// Futexwakeup wakes up one thread sleeping on addr. +// Futexwakeup wakes up threads sleeping on addr. // Futexsleep is allowed to wake up spuriously. enum { + MUTEX_UNLOCKED = 0, + MUTEX_LOCKED = 1, + MUTEX_SLEEPING = 2, + + ACTIVE_SPIN = 4, + ACTIVE_SPIN_CNT = 30, + PASSIVE_SPIN = 1, + FUTEX_WAIT = 0, FUTEX_WAKE = 1, @@ -52,13 +65,13 @@ futexsleep(uint32 *addr, uint32 val) runtime·futex(addr, FUTEX_WAIT, val, &longtime, nil, 0); } -// If any procs are sleeping on addr, wake up at least one. +// If any procs are sleeping on addr, wake up at most cnt. static void -futexwakeup(uint32 *addr) +futexwakeup(uint32 *addr, uint32 cnt) { int64 ret; - ret = runtime·futex(addr, FUTEX_WAKE, 1, nil, nil, 0); + ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0); if(ret >= 0) return; @@ -66,70 +79,96 @@ futexwakeup(uint32 *addr) // I don't know that futex wakeup can return // EAGAIN or EINTR, but if it does, it would be // safe to loop and call futex again. - - runtime·prints("futexwakeup addr="); - runtime·printpointer(addr); - runtime·prints(" returned "); - runtime·printint(ret); - runtime·prints("\n"); + runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret); *(int32*)0x1006 = 0x1006; } +static int32 +getproccount(void) +{ + int32 fd, rd, cnt, cpustrlen; + byte *cpustr, *pos, *bufpos; + byte buf[256]; + + fd = runtime·open((byte*)"/proc/stat", O_RDONLY|O_CLOEXEC, 0); + if(fd == -1) + return 1; + cnt = 0; + bufpos = buf; + cpustr = (byte*)"\ncpu"; + cpustrlen = runtime·findnull(cpustr); + for(;;) { + rd = runtime·read(fd, bufpos, sizeof(buf)-cpustrlen); + if(rd == -1) + break; + bufpos[rd] = 0; + for(pos=buf; pos=runtime·strstr(pos, cpustr); cnt++, pos++) { + } + if(rd < cpustrlen) + break; + runtime·memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1); + bufpos = buf+cpustrlen-1; + } + runtime·close(fd); + return cnt ? cnt : 1; +} -// Lock and unlock. -// -// The lock state is a single 32-bit word that holds -// a 31-bit count of threads waiting for the lock -// and a single bit (the low bit) saying whether the lock is held. -// The uncontended case runs entirely in user space. -// When contention is detected, we defer to the kernel (futex). -// -// A reminder: compare-and-swap runtime·cas(addr, old, new) does -// if(*addr == old) { *addr = new; return 1; } -// else return 0; -// but atomically. - +// Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING. +// MUTEX_SLEEPING means that there is presumably at least one sleeping thread. +// Note that there can be spinning threads during all states - they do not +// affect mutex's state. static void futexlock(Lock *l) { - uint32 v; + uint32 i, v, wait, spin; -again: - v = l->key; - if((v&1) == 0){ - if(runtime·cas(&l->key, v, v|1)){ - // Lock wasn't held; we grabbed it. - return; + // Speculative grab for lock. + v = runtime·xchg(&l->key, MUTEX_LOCKED); + if(v == MUTEX_UNLOCKED) + return; + + // wait is either MUTEX_LOCKED or MUTEX_SLEEPING + // depending on whether there is a thread sleeping + // on this mutex. If we ever change l->key from + // MUTEX_SLEEPING to some other value, we must be + // careful to change it back to MUTEX_SLEEPING before + // returning, to ensure that the sleeping thread gets + // its wakeup call. + wait = v; + + if(proccount == 0) + proccount = getproccount(); + + // On uniprocessor's, no point spinning. + // On multiprocessors, spin for ACTIVE_SPIN attempts. + spin = 0; + if(proccount > 1) + spin = ACTIVE_SPIN; + + for(;;) { + // Try for lock, spinning. + for(i = 0; i < spin; i++) { + while(l->key == MUTEX_UNLOCKED) + if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait)) + return; + runtime·procyield(ACTIVE_SPIN_CNT); } - goto again; - } - // Lock was held; try to add ourselves to the waiter count. - if(!runtime·cas(&l->key, v, v+2)) - goto again; - - // We're accounted for, now sleep in the kernel. - // - // We avoid the obvious lock/unlock race because - // the kernel won't put us to sleep if l->key has - // changed underfoot and is no longer v+2. - // - // We only really care that (v&1) == 1 (the lock is held), - // and in fact there is a futex variant that could - // accommodate that check, but let's not get carried away.) - futexsleep(&l->key, v+2); - - // We're awake: remove ourselves from the count. - for(;;){ - v = l->key; - if(v < 2) - runtime·throw("bad lock key"); - if(runtime·cas(&l->key, v, v-2)) - break; - } + // Try for lock, rescheduling. + for(i=0; i < PASSIVE_SPIN; i++) { + while(l->key == MUTEX_UNLOCKED) + if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait)) + return; + runtime·osyield(); + } - // Try for the lock again. - goto again; + // Sleep. + v = runtime·xchg(&l->key, MUTEX_SLEEPING); + if(v == MUTEX_UNLOCKED) + return; + wait = MUTEX_SLEEPING; + futexsleep(&l->key, MUTEX_SLEEPING); + } } static void @@ -137,34 +176,26 @@ futexunlock(Lock *l) { uint32 v; - // Atomically get value and clear lock bit. -again: - v = l->key; - if((v&1) == 0) + v = runtime·xchg(&l->key, MUTEX_UNLOCKED); + if(v == MUTEX_UNLOCKED) runtime·throw("unlock of unlocked lock"); - if(!runtime·cas(&l->key, v, v&~1)) - goto again; - - // If there were waiters, wake one. - if(v & ~1) - futexwakeup(&l->key); + if(v == MUTEX_SLEEPING) + futexwakeup(&l->key, 1); } void runtime·lock(Lock *l) { - if(m->locks < 0) - runtime·throw("lock count"); - m->locks++; + if(m->locks++ < 0) + runtime·throw("runtime·lock: lock count"); futexlock(l); } void runtime·unlock(Lock *l) { - m->locks--; - if(m->locks < 0) - runtime·throw("lock count"); + if(--m->locks < 0) + runtime·throw("runtime·unlock: lock count"); futexunlock(l); } @@ -175,35 +206,24 @@ runtime·destroylock(Lock*) // One-time notifications. -// -// Since the lock/unlock implementation already -// takes care of sleeping in the kernel, we just reuse it. -// (But it's a weird use, so it gets its own interface.) -// -// We use a lock to represent the event: -// unlocked == event has happened. -// Thus the lock starts out locked, and to wait for the -// event you try to lock the lock. To signal the event, -// you unlock the lock. - void runtime·noteclear(Note *n) { - n->lock.key = 0; // memset(n, 0, sizeof *n) - futexlock(&n->lock); + n->state = 0; } void runtime·notewakeup(Note *n) { - futexunlock(&n->lock); + runtime·xchg(&n->state, 1); + futexwakeup(&n->state, 1<<30); } void runtime·notesleep(Note *n) { - futexlock(&n->lock); - futexunlock(&n->lock); // Let other sleepers find out too. + while(runtime·atomicload(&n->state) == 0) + futexsleep(&n->state, 0); } diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index 49ab24df8..b9fe36db6 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -18,21 +18,6 @@ extern MStats mstats; // defined in extern.go extern volatile int32 runtime·MemProfileRate; -// Same algorithm from chan.c, but a different -// instance of the static uint32 x. -// Not protected by a lock - let the threads use -// the same random number if they like. -static uint32 -fastrand1(void) -{ - static uint32 x = 0x49f6428aUL; - - x += x; - if(x & 0x80000000L) - x ^= 0x88888eefUL; - return x; -} - // Allocate an object of at least size bytes. // Small objects are allocated from the per-thread cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -53,18 +38,18 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) if(size == 0) size = 1; - mstats.nmalloc++; + c = m->mcache; + c->local_nmalloc++; if(size <= MaxSmallSize) { // Allocate from mcache free lists. sizeclass = runtime·SizeToClass(size); size = runtime·class_to_size[sizeclass]; - c = m->mcache; v = runtime·MCache_Alloc(c, sizeclass, size, zeroed); if(v == nil) runtime·throw("out of memory"); - mstats.alloc += size; - mstats.total_alloc += size; - mstats.by_size[sizeclass].nmalloc++; + c->local_alloc += size; + c->local_total_alloc += size; + c->local_by_size[sizeclass].nmalloc++; } else { // TODO(rsc): Report tracebacks for very large allocations. @@ -76,8 +61,8 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) if(s == nil) runtime·throw("out of memory"); size = npages<<PageShift; - mstats.alloc += size; - mstats.total_alloc += size; + c->local_alloc += size; + c->local_total_alloc += size; v = (void*)(s->start << PageShift); // setup for mark sweep @@ -97,7 +82,7 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) // pick next profile time if(rate > 0x3fffffff) // make 2*rate not overflow rate = 0x3fffffff; - m->mcache->next_sample = fastrand1() % (2*rate); + m->mcache->next_sample = runtime·fastrand1() % (2*rate); profile: runtime·setblockspecial(v); runtime·MProf_Malloc(v, size); @@ -143,6 +128,7 @@ runtime·free(void *v) // Find size class for v. sizeclass = s->sizeclass; + c = m->mcache; if(sizeclass == 0) { // Large object. size = s->npages<<PageShift; @@ -154,7 +140,6 @@ runtime·free(void *v) runtime·MHeap_Free(&runtime·mheap, s, 1); } else { // Small object. - c = m->mcache; size = runtime·class_to_size[sizeclass]; if(size > sizeof(uintptr)) ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" @@ -162,10 +147,10 @@ runtime·free(void *v) // it might coalesce v and other blocks into a bigger span // and change the bitmap further. runtime·markfreed(v, size); - mstats.by_size[sizeclass].nfree++; + c->local_by_size[sizeclass].nfree++; runtime·MCache_Free(c, v, sizeclass, size); } - mstats.alloc -= size; + c->local_alloc -= size; if(prof) runtime·MProf_Free(v, size); m->mallocing = 0; @@ -178,7 +163,7 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) byte *p; MSpan *s; - mstats.nlookup++; + m->mcache->local_nlookup++; s = runtime·MHeap_LookupMaybe(&runtime·mheap, v); if(sp) *sp = s; @@ -207,9 +192,10 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) } n = runtime·class_to_size[s->sizeclass]; - i = ((byte*)v - p)/n; - if(base) + if(base) { + i = ((byte*)v - p)/n; *base = p + i*n; + } if(size) *size = n; @@ -229,6 +215,29 @@ runtime·allocmcache(void) return c; } +void +runtime·purgecachedstats(M* m) +{ + MCache *c; + + // Protected by either heap or GC lock. + c = m->mcache; + mstats.heap_alloc += c->local_cachealloc; + c->local_cachealloc = 0; + mstats.heap_objects += c->local_objects; + c->local_objects = 0; + mstats.nmalloc += c->local_nmalloc; + c->local_nmalloc = 0; + mstats.nfree += c->local_nfree; + c->local_nfree = 0; + mstats.nlookup += c->local_nlookup; + c->local_nlookup = 0; + mstats.alloc += c->local_alloc; + c->local_alloc= 0; + mstats.total_alloc += c->local_total_alloc; + c->local_total_alloc= 0; +} + uintptr runtime·sizeof_C_MStats = sizeof(MStats); #define MaxArena32 (2U<<30) @@ -373,46 +382,28 @@ func new(n uint32) (ret *uint8) { ret = runtime·mal(n); } -// Stack allocator uses malloc/free most of the time, -// but if we're in the middle of malloc and need stack, -// we have to do something else to avoid deadlock. -// In that case, we fall back on a fixed-size free-list -// allocator, assuming that inside malloc all the stack -// frames are small, so that all the stack allocations -// will be a single size, the minimum (right now, 5k). -static struct { - Lock; - FixAlloc; -} stacks; - -enum { - FixedStack = StackMin, -}; - void* runtime·stackalloc(uint32 n) { - void *v; - // Stackalloc must be called on scheduler stack, so that we // never try to grow the stack during the code that stackalloc runs. // Doing so would cause a deadlock (issue 1547). if(g != m->g0) runtime·throw("stackalloc not on scheduler stack"); + // Stack allocator uses malloc/free most of the time, + // but if we're in the middle of malloc and need stack, + // we have to do something else to avoid deadlock. + // In that case, we fall back on a fixed-size free-list + // allocator, assuming that inside malloc all the stack + // frames are small, so that all the stack allocations + // will be a single size, the minimum (right now, 5k). if(m->mallocing || m->gcing || n == FixedStack) { - runtime·lock(&stacks); - if(stacks.size == 0) - runtime·FixAlloc_Init(&stacks, n, runtime·SysAlloc, nil, nil); - if(stacks.size != n) { - runtime·printf("stackalloc: in malloc, size=%D want %d", (uint64)stacks.size, n); + if(n != FixedStack) { + runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n); runtime·throw("stackalloc"); } - v = runtime·FixAlloc_Alloc(&stacks); - mstats.stacks_inuse = stacks.inuse; - mstats.stacks_sys = stacks.sys; - runtime·unlock(&stacks); - return v; + return runtime·FixAlloc_Alloc(m->stackalloc); } return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0); } @@ -421,11 +412,7 @@ void runtime·stackfree(void *v, uintptr n) { if(m->mallocing || m->gcing || n == FixedStack) { - runtime·lock(&stacks); - runtime·FixAlloc_Free(&stacks, v); - mstats.stacks_inuse = stacks.inuse; - mstats.stacks_sys = stacks.sys; - runtime·unlock(&stacks); + runtime·FixAlloc_Free(m->stackalloc, v); return; } runtime·free(v); diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index 4e2794570..5bc80f4df 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -80,7 +80,6 @@ // This C code was written with an eye toward translating to Go // in the future. Methods have the form Type_Method(Type *t, ...). -typedef struct FixAlloc FixAlloc; typedef struct MCentral MCentral; typedef struct MHeap MHeap; typedef struct MSpan MSpan; @@ -186,10 +185,10 @@ void runtime·FixAlloc_Free(FixAlloc *f, void *p); // Shared with Go: if you edit this structure, also edit extern.go. struct MStats { - // General statistics. No locking; approximate. + // General statistics. uint64 alloc; // bytes allocated and still in use uint64 total_alloc; // bytes allocated (even if freed) - uint64 sys; // bytes obtained from system (should be sum of xxx_sys below) + uint64 sys; // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) uint64 nlookup; // number of pointer lookups uint64 nmalloc; // number of mallocs uint64 nfree; // number of frees @@ -222,7 +221,6 @@ struct MStats bool debuggc; // Statistics about allocation size classes. - // No locking; approximate. struct { uint32 size; uint64 nmalloc; @@ -268,9 +266,20 @@ struct MCache { MCacheList list[NumSizeClasses]; uint64 size; + int64 local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap + int64 local_objects; // objects allocated (or freed) from cache since last lock of heap int64 local_alloc; // bytes allocated (or freed) since last lock of heap - int64 local_objects; // objects allocated (or freed) since last lock of heap + int64 local_total_alloc; // bytes allocated (even if freed) since last lock of heap + int64 local_nmalloc; // number of mallocs since last lock of heap + int64 local_nfree; // number of frees since last lock of heap + int64 local_nlookup; // number of pointer lookups since last lock of heap int32 next_sample; // trigger heap sample after allocating this many bytes + // Statistics about allocation size classes since last lock of heap + struct { + int64 nmalloc; + int64 nfree; + } local_by_size[NumSizeClasses]; + }; void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed); @@ -379,6 +388,7 @@ void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); void runtime·unmarkspan(void *v, uintptr size); bool runtime·blockspecial(void*); void runtime·setblockspecial(void*); +void runtime·purgecachedstats(M*); enum { diff --git a/src/pkg/runtime/mcache.c b/src/pkg/runtime/mcache.c index e40621186..711e938fc 100644 --- a/src/pkg/runtime/mcache.c +++ b/src/pkg/runtime/mcache.c @@ -48,7 +48,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) v->next = nil; } } - c->local_alloc += size; + c->local_cachealloc += size; c->local_objects++; return v; } @@ -90,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) l->list = p; l->nlist++; c->size += size; - c->local_alloc -= size; + c->local_cachealloc -= size; c->local_objects--; if(l->nlist >= MaxMCacheListLen) { diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go index c3316d44c..93d155a7f 100644 --- a/src/pkg/runtime/mem.go +++ b/src/pkg/runtime/mem.go @@ -62,8 +62,13 @@ func init() { } // MemStats holds statistics about the memory system. -// The statistics are only approximate, as they are not interlocked on update. +// The statistics may be out of date, as the information is +// updated lazily from per-thread caches. +// Use UpdateMemStats to bring the statistics up to date. var MemStats MemStatsType +// UpdateMemStats brings MemStats up to date. +func UpdateMemStats() + // GC runs a garbage collection. func GC() diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index ac6a1fa40..6325aadc6 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -484,6 +484,7 @@ sweep(void) // Mark freed; restore block boundary bit. *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + c = m->mcache; if(s->sizeclass == 0) { // Free large span. runtime·unmarkspan(p, 1<<PageShift); @@ -491,14 +492,13 @@ sweep(void) runtime·MHeap_Free(&runtime·mheap, s, 1); } else { // Free small object. - c = m->mcache; if(size > sizeof(uintptr)) ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" - mstats.by_size[s->sizeclass].nfree++; + c->local_by_size[s->sizeclass].nfree++; runtime·MCache_Free(c, p, s->sizeclass, size); } - mstats.alloc -= size; - mstats.nfree++; + c->local_alloc -= size; + c->local_nfree++; } } } @@ -533,14 +533,26 @@ cachestats(void) { M *m; MCache *c; + int32 i; + uint64 stacks_inuse; + uint64 stacks_sys; + stacks_inuse = 0; + stacks_sys = 0; for(m=runtime·allm; m; m=m->alllink) { + runtime·purgecachedstats(m); + stacks_inuse += m->stackalloc->inuse; + stacks_sys += m->stackalloc->sys; c = m->mcache; - mstats.heap_alloc += c->local_alloc; - c->local_alloc = 0; - mstats.heap_objects += c->local_objects; - c->local_objects = 0; + for(i=0; i<nelem(c->local_by_size); i++) { + mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; + c->local_by_size[i].nmalloc = 0; + mstats.by_size[i].nfree += c->local_by_size[i].nfree; + c->local_by_size[i].nfree = 0; + } } + mstats.stacks_inuse = stacks_inuse; + mstats.stacks_sys = stacks_sys; } void @@ -603,6 +615,7 @@ runtime·gc(int32 force) sweep(); t2 = runtime·nanotime(); stealcache(); + cachestats(); mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; m->gcing = 0; @@ -650,6 +663,22 @@ runtime·gc(int32 force) runtime·gc(1); } +void +runtime·UpdateMemStats(void) +{ + // Have to acquire gcsema to stop the world, + // because stoptheworld can only be used by + // one goroutine at a time, and there might be + // a pending garbage collection already calling it. + runtime·semacquire(&gcsema); + m->gcing = 1; + runtime·stoptheworld(); + cachestats(); + m->gcing = 0; + runtime·semrelease(&gcsema); + runtime·starttheworld(); +} + static void runfinq(void) { diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index dde31ce34..37d505681 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c @@ -57,10 +57,7 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct) MSpan *s; runtime·lock(h); - mstats.heap_alloc += m->mcache->local_alloc; - m->mcache->local_alloc = 0; - mstats.heap_objects += m->mcache->local_objects; - m->mcache->local_objects = 0; + runtime·purgecachedstats(m); s = MHeap_AllocLocked(h, npage, sizeclass); if(s != nil) { mstats.heap_inuse += npage<<PageShift; @@ -258,10 +255,7 @@ void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct) { runtime·lock(h); - mstats.heap_alloc += m->mcache->local_alloc; - m->mcache->local_alloc = 0; - mstats.heap_objects += m->mcache->local_objects; - m->mcache->local_objects = 0; + runtime·purgecachedstats(m); mstats.heap_inuse -= s->npages<<PageShift; if(acct) { mstats.heap_alloc -= s->npages<<PageShift; diff --git a/src/pkg/runtime/plan9/mem.c b/src/pkg/runtime/plan9/mem.c index 9dfdf2cc3..f795b2c01 100644 --- a/src/pkg/runtime/plan9/mem.c +++ b/src/pkg/runtime/plan9/mem.c @@ -8,6 +8,7 @@ extern byte end[]; static byte *bloc = { end }; +static Lock memlock; enum { @@ -19,23 +20,31 @@ runtime·SysAlloc(uintptr nbytes) { uintptr bl; + runtime·lock(&memlock); + mstats.sys += nbytes; // Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c bl = ((uintptr)bloc + Round) & ~Round; - if(runtime·brk_((void*)(bl + nbytes)) < 0) + if(runtime·brk_((void*)(bl + nbytes)) < 0) { + runtime·unlock(&memlock); return (void*)-1; + } bloc = (byte*)bl + nbytes; + runtime·unlock(&memlock); return (void*)bl; } void runtime·SysFree(void *v, uintptr nbytes) { + runtime·lock(&memlock); + mstats.sys -= nbytes; // from tiny/mem.c // Push pointer back if this is a free // of the most recent SysAlloc. nbytes += (nbytes + Round) & ~Round; if(bloc == (byte*)v+nbytes) bloc -= nbytes; + runtime·unlock(&memlock); } void diff --git a/src/pkg/runtime/plan9/thread.c b/src/pkg/runtime/plan9/thread.c index ef9a23e8e..b091c5978 100644 --- a/src/pkg/runtime/plan9/thread.c +++ b/src/pkg/runtime/plan9/thread.c @@ -47,11 +47,11 @@ runtime·exit(int32) pid = pid/10; } p = buf; - runtime·mcpy((void*)p, (void*)"/proc/", 6); + runtime·memmove((void*)p, (void*)"/proc/", 6); p += 6; for(q--; q >= tmp;) *p++ = *q--; - runtime·mcpy((void*)p, (void*)"/notepg", 7); + runtime·memmove((void*)p, (void*)"/notepg", 7); /* post interrupt note */ fd = runtime·open(buf, OWRITE); @@ -167,3 +167,14 @@ os·sigpipe(void) { runtime·throw("too many writes on closed pipe"); } + +/* + * placeholder - once notes are implemented, + * a signal generating a panic must appear as + * a call to this function for correct handling by + * traceback. + */ +void +runtime·sigpanic(void) +{ +} diff --git a/src/pkg/runtime/print.c b/src/pkg/runtime/print.c index b8069aa39..3ce779495 100644 --- a/src/pkg/runtime/print.c +++ b/src/pkg/runtime/print.c @@ -320,7 +320,7 @@ runtime·printpointer(void *p) void runtime·printstring(String v) { - extern int32 runtime·maxstring; + extern uint32 runtime·maxstring; if(v.len > runtime·maxstring) { runtime·write(2, "[invalid string]", 16); diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index a8f3a796a..6d8f6990b 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -28,10 +28,10 @@ int32 runtime·gcwaiting; // Go scheduler // // The go scheduler's job is to match ready-to-run goroutines (`g's) -// with waiting-for-work schedulers (`m's). If there are ready gs -// and no waiting ms, ready() will start a new m running in a new -// OS thread, so that all ready gs can run simultaneously, up to a limit. -// For now, ms never go away. +// with waiting-for-work schedulers (`m's). If there are ready g's +// and no waiting m's, ready() will start a new m running in a new +// OS thread, so that all ready g's can run simultaneously, up to a limit. +// For now, m's never go away. // // By default, Go keeps only one kernel thread (m) running user code // at a single time; other threads may be blocked in the operating system. @@ -41,10 +41,10 @@ int32 runtime·gcwaiting; // approximation of the maximum number of cores to use. // // Even a program that can run without deadlock in a single process -// might use more ms if given the chance. For example, the prime -// sieve will use as many ms as there are primes (up to runtime·sched.mmax), +// might use more m's if given the chance. For example, the prime +// sieve will use as many m's as there are primes (up to runtime·sched.mmax), // allowing different stages of the pipeline to execute in parallel. -// We could revisit this choice, only kicking off new ms for blocking +// We could revisit this choice, only kicking off new m's for blocking // system calls, but that would limit the amount of parallel computation // that go would try to do. // @@ -55,27 +55,75 @@ int32 runtime·gcwaiting; struct Sched { Lock; - G *gfree; // available gs (status == Gdead) + G *gfree; // available g's (status == Gdead) + int32 goidgen; - G *ghead; // gs waiting to run + G *ghead; // g's waiting to run G *gtail; - int32 gwait; // number of gs waiting to run - int32 gcount; // number of gs that are alive + int32 gwait; // number of g's waiting to run + int32 gcount; // number of g's that are alive + int32 grunning; // number of g's running on cpu or in syscall - M *mhead; // ms waiting for work - int32 mwait; // number of ms waiting for work - int32 mcount; // number of ms that have been created - int32 mcpu; // number of ms executing on cpu - int32 mcpumax; // max number of ms allowed on cpu - int32 msyscall; // number of ms in system calls + M *mhead; // m's waiting for work + int32 mwait; // number of m's waiting for work + int32 mcount; // number of m's that have been created - int32 predawn; // running initialization, don't run new gs. + volatile uint32 atomic; // atomic scheduling word (see below) + + int32 predawn; // running initialization, don't run new g's. int32 profilehz; // cpu profiling rate - Note stopped; // one g can wait here for ms to stop - int32 waitstop; // after setting this flag + Note stopped; // one g can set waitstop and wait here for m's to stop +}; + +// The atomic word in sched is an atomic uint32 that +// holds these fields. +// +// [15 bits] mcpu number of m's executing on cpu +// [15 bits] mcpumax max number of m's allowed on cpu +// [1 bit] waitstop some g is waiting on stopped +// [1 bit] gwaiting gwait != 0 +// +// These fields are the information needed by entersyscall +// and exitsyscall to decide whether to coordinate with the +// scheduler. Packing them into a single machine word lets +// them use a fast path with a single atomic read/write and +// no lock/unlock. This greatly reduces contention in +// syscall- or cgo-heavy multithreaded programs. +// +// Except for entersyscall and exitsyscall, the manipulations +// to these fields only happen while holding the schedlock, +// so the routines holding schedlock only need to worry about +// what entersyscall and exitsyscall do, not the other routines +// (which also use the schedlock). +// +// In particular, entersyscall and exitsyscall only read mcpumax, +// waitstop, and gwaiting. They never write them. Thus, writes to those +// fields can be done (holding schedlock) without fear of write conflicts. +// There may still be logic conflicts: for example, the set of waitstop must +// be conditioned on mcpu >= mcpumax or else the wait may be a +// spurious sleep. The Promela model in proc.p verifies these accesses. +enum { + mcpuWidth = 15, + mcpuMask = (1<<mcpuWidth) - 1, + mcpuShift = 0, + mcpumaxShift = mcpuShift + mcpuWidth, + waitstopShift = mcpumaxShift + mcpuWidth, + gwaitingShift = waitstopShift+1, + + // The max value of GOMAXPROCS is constrained + // by the max value we can store in the bit fields + // of the atomic word. Reserve a few high values + // so that we can detect accidental decrement + // beyond zero. + maxgomaxprocs = mcpuMask - 10, }; +#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask) +#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask) +#define atomic_waitstop(v) (((v)>>waitstopShift)&1) +#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1) + Sched runtime·sched; int32 runtime·gomaxprocs; @@ -93,9 +141,25 @@ static void mput(M*); // put/get on mhead static M* mget(G*); static void gfput(G*); // put/get on gfree static G* gfget(void); -static void matchmg(void); // match ms to gs +static void matchmg(void); // match m's to g's static void readylocked(G*); // ready, but sched is locked static void mnextg(M*, G*); +static void mcommoninit(M*); + +void +setmcpumax(uint32 n) +{ + uint32 v, w; + + for(;;) { + v = runtime·sched.atomic; + w = v; + w &= ~(mcpuMask<<mcpumaxShift); + w |= n<<mcpumaxShift; + if(runtime·cas(&runtime·sched.atomic, v, w)) + break; + } +} // The bootstrap sequence is: // @@ -115,10 +179,10 @@ runtime·schedinit(void) int32 n; byte *p; - runtime·allm = m; m->nomemprof++; - runtime·mallocinit(); + mcommoninit(m); + runtime·goargs(); runtime·goenvs(); @@ -129,10 +193,12 @@ runtime·schedinit(void) runtime·gomaxprocs = 1; p = runtime·getenv("GOMAXPROCS"); - if(p != nil && (n = runtime·atoi(p)) != 0) + if(p != nil && (n = runtime·atoi(p)) != 0) { + if(n > maxgomaxprocs) + n = maxgomaxprocs; runtime·gomaxprocs = n; - runtime·sched.mcpumax = runtime·gomaxprocs; - runtime·sched.mcount = 1; + } + setmcpumax(runtime·gomaxprocs); runtime·sched.predawn = 1; m->nomemprof--; @@ -167,7 +233,7 @@ runtime·initdone(void) mstats.enablegc = 1; // If main·init_function started other goroutines, - // kick off new ms to handle them, like ready + // kick off new m's to handle them, like ready // would have, had it not been pre-dawn. schedlock(); matchmg(); @@ -206,6 +272,37 @@ runtime·idlegoroutine(void) g->idlem = m; } +static void +mcommoninit(M *m) +{ + // Add to runtime·allm so garbage collector doesn't free m + // when it is just in a register or thread-local storage. + m->alllink = runtime·allm; + // runtime·Cgocalls() iterates over allm w/o schedlock, + // so we need to publish it safely. + runtime·atomicstorep(&runtime·allm, m); + + m->id = runtime·sched.mcount++; + m->fastrand = 0x49f6428aUL + m->id; + m->stackalloc = runtime·malloc(sizeof(*m->stackalloc)); + runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil); +} + +// Try to increment mcpu. Report whether succeeded. +static bool +canaddmcpu(void) +{ + uint32 v; + + for(;;) { + v = runtime·sched.atomic; + if(atomic_mcpu(v) >= atomic_mcpumax(v)) + return 0; + if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift))) + return 1; + } +} + // Put on `g' queue. Sched must be locked. static void gput(G *g) @@ -213,11 +310,11 @@ gput(G *g) M *m; // If g is wired, hand it off directly. - if(runtime·sched.mcpu < runtime·sched.mcpumax && (m = g->lockedm) != nil) { + if((m = g->lockedm) != nil && canaddmcpu()) { mnextg(m, g); return; } - + // If g is the idle goroutine for an m, hand it off. if(g->idlem != nil) { if(g->idlem->idleg != nil) { @@ -236,7 +333,18 @@ gput(G *g) else runtime·sched.gtail->schedlink = g; runtime·sched.gtail = g; - runtime·sched.gwait++; + + // increment gwait. + // if it transitions to nonzero, set atomic gwaiting bit. + if(runtime·sched.gwait++ == 0) + runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift); +} + +// Report whether gget would return something. +static bool +haveg(void) +{ + return runtime·sched.ghead != nil || m->idleg != nil; } // Get from `g' queue. Sched must be locked. @@ -250,7 +358,10 @@ gget(void) runtime·sched.ghead = g->schedlink; if(runtime·sched.ghead == nil) runtime·sched.gtail = nil; - runtime·sched.gwait--; + // decrement gwait. + // if it transitions to zero, clear atomic gwaiting bit. + if(--runtime·sched.gwait == 0) + runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift); } else if(m->idleg != nil) { g = m->idleg; m->idleg = nil; @@ -335,10 +446,11 @@ newprocreadylocked(G *g) } // Pass g to m for running. +// Caller has already incremented mcpu. static void mnextg(M *m, G *g) { - runtime·sched.mcpu++; + runtime·sched.grunning++; m->nextg = g; if(m->waitnextg) { m->waitnextg = 0; @@ -350,18 +462,19 @@ mnextg(M *m, G *g) // Get the next goroutine that m should run. // Sched must be locked on entry, is unlocked on exit. -// Makes sure that at most $GOMAXPROCS gs are +// Makes sure that at most $GOMAXPROCS g's are // running on cpus (not in system calls) at any given time. static G* nextgandunlock(void) { G *gp; + uint32 v; - if(runtime·sched.mcpu < 0) - runtime·throw("negative runtime·sched.mcpu"); + if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs) + runtime·throw("negative mcpu"); - // If there is a g waiting as m->nextg, - // mnextg took care of the runtime·sched.mcpu++. + // If there is a g waiting as m->nextg, the mcpu++ + // happened before it was passed to mnextg. if(m->nextg != nil) { gp = m->nextg; m->nextg = nil; @@ -373,29 +486,62 @@ nextgandunlock(void) // We can only run one g, and it's not available. // Make sure some other cpu is running to handle // the ordinary run queue. - if(runtime·sched.gwait != 0) + if(runtime·sched.gwait != 0) { matchmg(); + // m->lockedg might have been on the queue. + if(m->nextg != nil) { + gp = m->nextg; + m->nextg = nil; + schedunlock(); + return gp; + } + } } else { // Look for work on global queue. - while(runtime·sched.mcpu < runtime·sched.mcpumax && (gp=gget()) != nil) { + while(haveg() && canaddmcpu()) { + gp = gget(); + if(gp == nil) + runtime·throw("gget inconsistency"); + if(gp->lockedm) { mnextg(gp->lockedm, gp); continue; } - runtime·sched.mcpu++; // this m will run gp + runtime·sched.grunning++; schedunlock(); return gp; } - // Otherwise, wait on global m queue. + + // The while loop ended either because the g queue is empty + // or because we have maxed out our m procs running go + // code (mcpu >= mcpumax). We need to check that + // concurrent actions by entersyscall/exitsyscall cannot + // invalidate the decision to end the loop. + // + // We hold the sched lock, so no one else is manipulating the + // g queue or changing mcpumax. Entersyscall can decrement + // mcpu, but if does so when there is something on the g queue, + // the gwait bit will be set, so entersyscall will take the slow path + // and use the sched lock. So it cannot invalidate our decision. + // + // Wait on global m queue. mput(m); } - if(runtime·sched.mcpu == 0 && runtime·sched.msyscall == 0) + + v = runtime·atomicload(&runtime·sched.atomic); + if(runtime·sched.grunning == 0) runtime·throw("all goroutines are asleep - deadlock!"); m->nextg = nil; m->waitnextg = 1; runtime·noteclear(&m->havenextg); - if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) { - runtime·sched.waitstop = 0; + + // Stoptheworld is waiting for all but its cpu to go to stop. + // Entersyscall might have decremented mcpu too, but if so + // it will see the waitstop and take the slow path. + // Exitsyscall never increments mcpu beyond mcpumax. + if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { + // set waitstop = 0 (known to be 1) + runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); runtime·notewakeup(&runtime·sched.stopped); } schedunlock(); @@ -407,21 +553,34 @@ nextgandunlock(void) return gp; } -// TODO(rsc): Remove. This is only temporary, -// for the mark and sweep collector. void runtime·stoptheworld(void) { + uint32 v; + schedlock(); runtime·gcwaiting = 1; - runtime·sched.mcpumax = 1; - while(runtime·sched.mcpu > 1) { + + setmcpumax(1); + + // while mcpu > 1 + for(;;) { + v = runtime·sched.atomic; + if(atomic_mcpu(v) <= 1) + break; + // It would be unsafe for multiple threads to be using // the stopped note at once, but there is only - // ever one thread doing garbage collection, - // so this is okay. + // ever one thread doing garbage collection. runtime·noteclear(&runtime·sched.stopped); - runtime·sched.waitstop = 1; + if(atomic_waitstop(v)) + runtime·throw("invalid waitstop"); + + // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above + // still being true. + if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift))) + continue; + schedunlock(); runtime·notesleep(&runtime·sched.stopped); schedlock(); @@ -436,7 +595,7 @@ runtime·starttheworld(void) { schedlock(); runtime·gcwaiting = 0; - runtime·sched.mcpumax = runtime·gomaxprocs; + setmcpumax(runtime·gomaxprocs); matchmg(); schedunlock(); } @@ -473,7 +632,7 @@ struct CgoThreadStart void (*fn)(void); }; -// Kick off new ms as needed (up to mcpumax). +// Kick off new m's as needed (up to mcpumax). // There are already `other' other cpus that will // start looking for goroutines shortly. // Sched is locked. @@ -484,17 +643,17 @@ matchmg(void) if(m->mallocing || m->gcing) return; - while(runtime·sched.mcpu < runtime·sched.mcpumax && (g = gget()) != nil){ - M *m; + + while(haveg() && canaddmcpu()) { + g = gget(); + if(g == nil) + runtime·throw("gget inconsistency"); // Find the m that will run g. + M *m; if((m = mget(g)) == nil){ m = runtime·malloc(sizeof(M)); - // Add to runtime·allm so garbage collector doesn't free m - // when it is just in a register or thread-local storage. - m->alllink = runtime·allm; - runtime·allm = m; - m->id = runtime·sched.mcount++; + mcommoninit(m); if(runtime·iscgo) { CgoThreadStart ts; @@ -528,6 +687,7 @@ static void schedule(G *gp) { int32 hz; + uint32 v; schedlock(); if(gp != nil) { @@ -536,10 +696,13 @@ schedule(G *gp) // Just finished running gp. gp->m = nil; - runtime·sched.mcpu--; + runtime·sched.grunning--; + + // atomic { mcpu-- } + v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); + if(atomic_mcpu(v) > maxgomaxprocs) + runtime·throw("negative mcpu in scheduler"); - if(runtime·sched.mcpu < 0) - runtime·throw("runtime·sched.mcpu < 0 in scheduler"); switch(gp->status){ case Grunnable: case Gdead: @@ -574,7 +737,7 @@ schedule(G *gp) gp->status = Grunning; m->curg = gp; gp->m = m; - + // Check whether the profiler needs to be turned on or off. hz = runtime·sched.profilehz; if(m->profilehz != hz) @@ -618,31 +781,50 @@ runtime·gosched(void) void runtime·entersyscall(void) { + uint32 v; + if(runtime·sched.predawn) return; - schedlock(); - g->status = Gsyscall; - runtime·sched.mcpu--; - runtime·sched.msyscall++; - if(runtime·sched.gwait != 0) - matchmg(); - - if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) { - runtime·sched.waitstop = 0; - runtime·notewakeup(&runtime·sched.stopped); - } // Leave SP around for gc and traceback. - // Do before schedunlock so that gc - // never sees Gsyscall with wrong stack. runtime·gosave(&g->sched); g->gcsp = g->sched.sp; g->gcstack = g->stackbase; g->gcguard = g->stackguard; + g->status = Gsyscall; if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) { - runtime·printf("entersyscall inconsistent %p [%p,%p]\n", g->gcsp, g->gcguard-StackGuard, g->gcstack); + // runtime·printf("entersyscall inconsistent %p [%p,%p]\n", + // g->gcsp, g->gcguard-StackGuard, g->gcstack); runtime·throw("entersyscall"); } + + // Fast path. + // The slow path inside the schedlock/schedunlock will get + // through without stopping if it does: + // mcpu-- + // gwait not true + // waitstop && mcpu <= mcpumax not true + // If we can do the same with a single atomic add, + // then we can skip the locks. + v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); + if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v))) + return; + + schedlock(); + v = runtime·atomicload(&runtime·sched.atomic); + if(atomic_gwaiting(v)) { + matchmg(); + v = runtime·atomicload(&runtime·sched.atomic); + } + if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { + runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); + runtime·notewakeup(&runtime·sched.stopped); + } + + // Re-save sched in case one of the calls + // (notewakeup, matchmg) triggered something using it. + runtime·gosave(&g->sched); + schedunlock(); } @@ -653,22 +835,28 @@ runtime·entersyscall(void) void runtime·exitsyscall(void) { + uint32 v; + if(runtime·sched.predawn) return; - schedlock(); - runtime·sched.msyscall--; - runtime·sched.mcpu++; - // Fast path - if there's room for this m, we're done. - if(m->profilehz == runtime·sched.profilehz && runtime·sched.mcpu <= runtime·sched.mcpumax) { + // Fast path. + // If we can do the mcpu++ bookkeeping and + // find that we still have mcpu <= mcpumax, then we can + // start executing Go code immediately, without having to + // schedlock/schedunlock. + v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift)); + if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) { // There's a cpu for us, so we can run. g->status = Grunning; // Garbage collector isn't running (since we are), // so okay to clear gcstack. g->gcstack = nil; - schedunlock(); return; } + + schedlock(); + // Tell scheduler to put g back on the run queue: // mostly equivalent to g->status = Grunning, // but keeps the garbage collector from thinking @@ -676,12 +864,12 @@ runtime·exitsyscall(void) g->readyonstop = 1; schedunlock(); - // Slow path - all the cpus are taken. + // All the cpus are taken. // The scheduler will ready g and put this m to sleep. // When the scheduler takes g away from m, // it will undo the runtime·sched.mcpu++ above. runtime·gosched(); - + // Gosched returned, so we're allowed to run now. // Delete the gcstack information that we left for // the garbage collector during the system call. @@ -698,7 +886,7 @@ runtime·oldstack(void) uint32 argsize; byte *sp; G *g1; - static int32 goid; + int32 goid; //printf("oldstack m->cret=%p\n", m->cret); @@ -709,9 +897,10 @@ runtime·oldstack(void) argsize = old.argsize; if(argsize > 0) { sp -= argsize; - runtime·mcpy(top->argp, sp, argsize); + runtime·memmove(top->argp, sp, argsize); } goid = old.gobuf.g->goid; // fault if g is bad, before gogo + USED(goid); if(old.free != 0) runtime·stackfree(g1->stackguard - StackGuard, old.free); @@ -790,7 +979,7 @@ runtime·newstack(void) sp = (byte*)top; if(argsize > 0) { sp -= argsize; - runtime·mcpy(sp, m->moreargp, argsize); + runtime·memmove(sp, m->moreargp, argsize); } if(thechar == '5') { // caller would have saved its LR below args. @@ -855,7 +1044,7 @@ void runtime·newproc(int32 siz, byte* fn, ...) { byte *argp; - + if(thechar == '5') argp = (byte*)(&fn+2); // skip caller's saved LR else @@ -873,8 +1062,13 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) //printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret); siz = narg + nret; siz = (siz+7) & ~7; - if(siz > 1024) - runtime·throw("runtime.newproc: too many args"); + + // We could instead create a secondary stack frame + // and make it look like goexit was on the original but + // the call to the actual goroutine function was split. + // Not worth it: this is almost always an error. + if(siz > StackMin - 1024) + runtime·throw("runtime.newproc: function arguments too large for new goroutine"); schedlock(); @@ -891,7 +1085,7 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) sp = newg->stackbase; sp -= siz; - runtime·mcpy(sp, argp, narg); + runtime·memmove(sp, argp, narg); if(thechar == '5') { // caller's LR sp -= sizeof(void*); @@ -905,8 +1099,8 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) newg->gopc = (uintptr)callerpc; runtime·sched.gcount++; - runtime·goidgen++; - newg->goid = runtime·goidgen; + runtime·sched.goidgen++; + newg->goid = runtime·sched.goidgen; newprocreadylocked(newg); schedunlock(); @@ -929,11 +1123,11 @@ runtime·deferproc(int32 siz, byte* fn, ...) d->argp = (byte*)(&fn+2); // skip caller's saved link register else d->argp = (byte*)(&fn+1); - runtime·mcpy(d->args, d->argp, d->siz); + runtime·memmove(d->args, d->argp, d->siz); d->link = g->defer; g->defer = d; - + // deferproc returns 0 normally. // a deferred func that stops a panic // makes the deferproc return 1. @@ -956,7 +1150,7 @@ runtime·deferreturn(uintptr arg0) argp = (byte*)&arg0; if(d->argp != argp) return; - runtime·mcpy(argp, d->args, d->siz); + runtime·memmove(argp, d->args, d->siz); g->defer = d->link; fn = d->fn; runtime·free(d); @@ -965,9 +1159,9 @@ runtime·deferreturn(uintptr arg0) static void rundefer(void) -{ +{ Defer *d; - + while((d = g->defer) != nil) { g->defer = d->link; reflect·call(d->fn, d->args, d->siz); @@ -982,7 +1176,7 @@ unwindstack(G *gp, byte *sp) { Stktop *top; byte *stk; - + // Must be called from a different goroutine, usually m->g0. if(g == gp) runtime·throw("unwindstack on self"); @@ -1018,7 +1212,7 @@ printpanics(Panic *p) } static void recovery(G*); - + void runtime·panic(Eface e) { @@ -1068,7 +1262,7 @@ recovery(G *gp) // Rewind gp's stack; we're running on m->g0's stack. d = gp->defer; gp->defer = d->link; - + // Unwind to the stack frame with d's arguments in it. unwindstack(gp, d->argp); @@ -1216,25 +1410,29 @@ int32 runtime·gomaxprocsfunc(int32 n) { int32 ret; + uint32 v; schedlock(); ret = runtime·gomaxprocs; - if (n <= 0) + if(n <= 0) n = ret; + if(n > maxgomaxprocs) + n = maxgomaxprocs; runtime·gomaxprocs = n; - if (runtime·gcwaiting != 0) { - if (runtime·sched.mcpumax != 1) - runtime·throw("invalid runtime·sched.mcpumax during gc"); + if(runtime·gcwaiting != 0) { + if(atomic_mcpumax(runtime·sched.atomic) != 1) + runtime·throw("invalid mcpumax during gc"); schedunlock(); return ret; } - runtime·sched.mcpumax = n; - // handle fewer procs? - if(runtime·sched.mcpu > runtime·sched.mcpumax) { + + setmcpumax(n); + + // If there are now fewer allowed procs + // than procs running, stop. + v = runtime·atomicload(&runtime·sched.atomic); + if(atomic_mcpu(v) > n) { schedunlock(); - // just give up the cpu. - // we'll only get rescheduled once the - // number has come down. runtime·gosched(); return ret; } @@ -1301,10 +1499,10 @@ void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) { int32 n; - + if(prof.fn == nil || prof.hz == 0) return; - + runtime·lock(&prof); if(prof.fn == nil) { runtime·unlock(&prof); @@ -1339,7 +1537,7 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) runtime·lock(&runtime·sched); runtime·sched.profilehz = hz; runtime·unlock(&runtime·sched); - + if(hz != 0) runtime·resetcpuprofiler(hz); } @@ -1355,11 +1553,11 @@ os·setenv_c(String k, String v) return; arg[0] = runtime·malloc(k.len + 1); - runtime·mcpy(arg[0], k.str, k.len); + runtime·memmove(arg[0], k.str, k.len); arg[0][k.len] = 0; arg[1] = runtime·malloc(v.len + 1); - runtime·mcpy(arg[1], v.str, v.len); + runtime·memmove(arg[1], v.str, v.len); arg[1][v.len] = 0; runtime·asmcgocall(libcgo_setenv, arg); diff --git a/src/pkg/runtime/proc.p b/src/pkg/runtime/proc.p new file mode 100644 index 000000000..f0b46de61 --- /dev/null +++ b/src/pkg/runtime/proc.p @@ -0,0 +1,526 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +model for proc.c as of 2011/07/22. + +takes 4900 seconds to explore 1189070 states +with G=3, var_gomaxprocs=1 +on a Core i7 L640 2.13 GHz Lenovo X201s. + +rm -f proc.p.trail pan.* pan +spin -a proc.p +gcc -DSAFETY -DREACH -DMEMLIM'='4000 -o pan pan.c +pan -w28 -n -i -m500000 +test -f proc.p.trail && pan -r proc.p.trail +*/ + +/* + * scheduling parameters + */ + +/* + * the number of goroutines G doubles as the maximum + * number of OS threads; the max is reachable when all + * the goroutines are blocked in system calls. + */ +#define G 3 + +/* + * whether to allow gomaxprocs to vary during execution. + * enabling this checks the scheduler even when code is + * calling GOMAXPROCS, but it also slows down the verification + * by about 10x. + */ +#define var_gomaxprocs 1 /* allow gomaxprocs to vary */ + +/* gomaxprocs */ +#if var_gomaxprocs +byte gomaxprocs = 3; +#else +#define gomaxprocs 3 +#endif + +/* queue of waiting M's: sched_mhead[:mwait] */ +byte mwait; +byte sched_mhead[G]; + +/* garbage collector state */ +bit gc_lock, gcwaiting; + +/* goroutines sleeping, waiting to run */ +byte gsleep, gwait; + +/* scheduler state */ +bit sched_lock; +bit sched_stopped; +bit atomic_gwaiting, atomic_waitstop; +byte atomic_mcpu, atomic_mcpumax; + +/* M struct fields - state for handing off g to m. */ +bit m_waitnextg[G]; +bit m_havenextg[G]; +bit m_nextg[G]; + +/* + * opt_atomic/opt_dstep mark atomic/deterministics + * sequences that are marked only for reasons of + * optimization, not for correctness of the algorithms. + * + * in general any code that runs while holding the + * schedlock and does not refer to or modify the atomic_* + * fields can be marked atomic/dstep without affecting + * the usefulness of the model. since we trust the lock + * implementation, what we really want to test is the + * interleaving of the atomic fast paths with entersyscall + * and exitsyscall. + */ +#define opt_atomic atomic +#define opt_dstep d_step + +/* locks */ +inline lock(x) { + d_step { x == 0; x = 1 } +} + +inline unlock(x) { + d_step { assert x == 1; x = 0 } +} + +/* notes */ +inline noteclear(x) { + x = 0 +} + +inline notesleep(x) { + x == 1 +} + +inline notewakeup(x) { + opt_dstep { assert x == 0; x = 1 } +} + +/* + * scheduler + */ +inline schedlock() { + lock(sched_lock) +} + +inline schedunlock() { + unlock(sched_lock) +} + +/* + * canaddmcpu is like the C function but takes + * an extra argument to include in the test, to model + * "cannget() && canaddmcpu()" as "canaddmcpu(cangget())" + */ +inline canaddmcpu(g) { + d_step { + g && atomic_mcpu < atomic_mcpumax; + atomic_mcpu++; + } +} + +/* + * gput is like the C function. + * instead of tracking goroutines explicitly we + * maintain only the count of the number of + * waiting goroutines. + */ +inline gput() { + /* omitted: lockedm, idlem concerns */ + opt_dstep { + gwait++; + if + :: gwait == 1 -> + atomic_gwaiting = 1 + :: else + fi + } +} + +/* + * cangget is a macro so it can be passed to + * canaddmcpu (see above). + */ +#define cangget() (gwait>0) + +/* + * gget is like the C function. + */ +inline gget() { + opt_dstep { + assert gwait > 0; + gwait--; + if + :: gwait == 0 -> + atomic_gwaiting = 0 + :: else + fi + } +} + +/* + * mput is like the C function. + * here we do keep an explicit list of waiting M's, + * so that we know which ones can be awakened. + * we use _pid-1 because the monitor is proc 0. + */ +inline mput() { + opt_dstep { + sched_mhead[mwait] = _pid - 1; + mwait++ + } +} + +/* + * mnextg is like the C function mnextg(m, g). + * it passes an unspecified goroutine to m to start running. + */ +inline mnextg(m) { + opt_dstep { + m_nextg[m] = 1; + if + :: m_waitnextg[m] -> + m_waitnextg[m] = 0; + notewakeup(m_havenextg[m]) + :: else + fi + } +} + +/* + * mgetnextg handles the main m handoff in matchmg. + * it is like mget() || new M followed by mnextg(m, g), + * but combined to avoid a local variable. + * unlike the C code, a new M simply assumes it is + * running a g instead of using the mnextg coordination + * to obtain one. + */ +inline mgetnextg() { + opt_atomic { + if + :: mwait > 0 -> + mwait--; + mnextg(sched_mhead[mwait]); + sched_mhead[mwait] = 0; + :: else -> + run mstart(); + fi + } +} + +/* + * nextgandunlock is like the C function. + * it pulls a g off the queue or else waits for one. + */ +inline nextgandunlock() { + assert atomic_mcpu <= G; + + if + :: m_nextg[_pid-1] -> + m_nextg[_pid-1] = 0; + schedunlock(); + :: canaddmcpu(!m_nextg[_pid-1] && cangget()) -> + gget(); + schedunlock(); + :: else -> + opt_dstep { + mput(); + m_nextg[_pid-1] = 0; + m_waitnextg[_pid-1] = 1; + noteclear(m_havenextg[_pid-1]); + } + if + :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax -> + atomic_waitstop = 0; + notewakeup(sched_stopped) + :: else + fi; + schedunlock(); + opt_dstep { + notesleep(m_havenextg[_pid-1]); + assert m_nextg[_pid-1]; + m_nextg[_pid-1] = 0; + } + fi +} + +/* + * stoptheworld is like the C function. + */ +inline stoptheworld() { + schedlock(); + gcwaiting = 1; + atomic_mcpumax = 1; + do + :: d_step { atomic_mcpu > 1 -> + noteclear(sched_stopped); + assert !atomic_waitstop; + atomic_waitstop = 1 } + schedunlock(); + notesleep(sched_stopped); + schedlock(); + :: else -> + break + od; + schedunlock(); +} + +/* + * starttheworld is like the C function. + */ +inline starttheworld() { + schedlock(); + gcwaiting = 0; + atomic_mcpumax = gomaxprocs; + matchmg(); + schedunlock(); +} + +/* + * matchmg is like the C function. + */ +inline matchmg() { + do + :: canaddmcpu(cangget()) -> + gget(); + mgetnextg(); + :: else -> break + od +} + +/* + * ready is like the C function. + * it puts a g on the run queue. + */ +inline ready() { + schedlock(); + gput() + matchmg() + schedunlock() +} + +/* + * schedule simulates the C scheduler. + * it assumes that there is always a goroutine + * running already, and the goroutine has entered + * the scheduler for an unspecified reason, + * either to yield or to block. + */ +inline schedule() { + schedlock(); + + mustsched = 0; + atomic_mcpu--; + assert atomic_mcpu <= G; + if + :: skip -> + // goroutine yields, still runnable + gput(); + :: gsleep+1 < G -> + // goroutine goes to sleep (but there is another that can wake it) + gsleep++ + fi; + + // Find goroutine to run. + nextgandunlock() +} + +/* + * schedpend is > 0 if a goroutine is about to committed to + * entering the scheduler but has not yet done so. + * Just as we don't test for the undesirable conditions when a + * goroutine is in the scheduler, we don't test for them when + * a goroutine will be in the scheduler shortly. + * Modeling this state lets us replace mcpu cas loops with + * simpler mcpu atomic adds. + */ +byte schedpend; + +/* + * entersyscall is like the C function. + */ +inline entersyscall() { + bit willsched; + + /* + * Fast path. Check all the conditions tested during schedlock/schedunlock + * below, and if we can get through the whole thing without stopping, run it + * in one atomic cas-based step. + */ + atomic { + atomic_mcpu--; + if + :: atomic_gwaiting -> + skip + :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax -> + skip + :: else -> + goto Lreturn_entersyscall; + fi; + willsched = 1; + schedpend++; + } + + /* + * Normal path. + */ + schedlock() + opt_dstep { + if + :: willsched -> + schedpend--; + willsched = 0 + :: else + fi + } + if + :: atomic_gwaiting -> + matchmg() + :: else + fi; + if + :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax -> + atomic_waitstop = 0; + notewakeup(sched_stopped) + :: else + fi; + schedunlock(); +Lreturn_entersyscall: + skip +} + +/* + * exitsyscall is like the C function. + */ +inline exitsyscall() { + /* + * Fast path. If there's a cpu available, use it. + */ + atomic { + // omitted profilehz check + atomic_mcpu++; + if + :: atomic_mcpu >= atomic_mcpumax -> + skip + :: else -> + goto Lreturn_exitsyscall + fi + } + + /* + * Normal path. + */ + schedlock(); + d_step { + if + :: atomic_mcpu <= atomic_mcpumax -> + skip + :: else -> + mustsched = 1 + fi + } + schedunlock() +Lreturn_exitsyscall: + skip +} + +#if var_gomaxprocs +inline gomaxprocsfunc() { + schedlock(); + opt_atomic { + if + :: gomaxprocs != 1 -> gomaxprocs = 1 + :: gomaxprocs != 2 -> gomaxprocs = 2 + :: gomaxprocs != 3 -> gomaxprocs = 3 + fi; + } + if + :: gcwaiting != 0 -> + assert atomic_mcpumax == 1 + :: else -> + atomic_mcpumax = gomaxprocs; + if + :: atomic_mcpu > gomaxprocs -> + mustsched = 1 + :: else -> + matchmg() + fi + fi; + schedunlock(); +} +#endif + +/* + * mstart is the entry point for a new M. + * our model of an M is always running some + * unspecified goroutine. + */ +proctype mstart() { + /* + * mustsched is true if the goroutine must enter the + * scheduler instead of continuing to execute. + */ + bit mustsched; + + do + :: skip -> + // goroutine reschedules. + schedule() + :: !mustsched -> + // goroutine does something. + if + :: skip -> + // goroutine executes system call + entersyscall(); + exitsyscall() + :: atomic { gsleep > 0; gsleep-- } -> + // goroutine wakes another goroutine + ready() + :: lock(gc_lock) -> + // goroutine runs a garbage collection + stoptheworld(); + starttheworld(); + unlock(gc_lock) +#if var_gomaxprocs + :: skip -> + // goroutine picks a new gomaxprocs + gomaxprocsfunc() +#endif + fi + od; + + assert 0; +} + +/* + * monitor initializes the scheduler state + * and then watches for impossible conditions. + */ +active proctype monitor() { + opt_dstep { + byte i = 1; + do + :: i < G -> + gput(); + i++ + :: else -> break + od; + atomic_mcpu = 1; + atomic_mcpumax = 1; + } + run mstart(); + + do + // Should never have goroutines waiting with procs available. + :: !sched_lock && schedpend==0 && gwait > 0 && atomic_mcpu < atomic_mcpumax -> + assert 0 + // Should never have gc waiting for stop if things have already stopped. + :: !sched_lock && schedpend==0 && atomic_waitstop && atomic_mcpu <= atomic_mcpumax -> + assert 0 + od +} diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go index cac4f9eea..32111080a 100644 --- a/src/pkg/runtime/proc_test.go +++ b/src/pkg/runtime/proc_test.go @@ -6,6 +6,7 @@ package runtime_test import ( "runtime" + "sync/atomic" "testing" ) @@ -44,3 +45,81 @@ func TestStopTheWorldDeadlock(t *testing.T) { stop <- true runtime.GOMAXPROCS(maxprocs) } + +func stackGrowthRecursive(i int) { + var pad [128]uint64 + if i != 0 && pad[0] == 0 { + stackGrowthRecursive(i - 1) + } +} + +func BenchmarkStackGrowth(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + for atomic.AddInt32(&N, -1) >= 0 { + runtime.Gosched() + for g := 0; g < CallsPerSched; g++ { + stackGrowthRecursive(10) + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkSyscall(b *testing.B) { + const CallsPerSched = 1000 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + for atomic.AddInt32(&N, -1) >= 0 { + runtime.Gosched() + for g := 0; g < CallsPerSched; g++ { + runtime.Entersyscall() + runtime.Exitsyscall() + } + } + c <- true + }() + } + for p := 0; p < procs; p++ { + <-c + } +} + +func BenchmarkSyscallWork(b *testing.B) { + const CallsPerSched = 1000 + const LocalWork = 100 + procs := runtime.GOMAXPROCS(-1) + N := int32(b.N / CallsPerSched) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + foo := 42 + for atomic.AddInt32(&N, -1) >= 0 { + runtime.Gosched() + for g := 0; g < CallsPerSched; g++ { + runtime.Entersyscall() + for i := 0; i < LocalWork; i++ { + foo *= 2 + foo /= 2 + } + runtime.Exitsyscall() + } + } + c <- foo == 42 + }() + } + for p := 0; p < procs; p++ { + <-c + } +} diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c index 1a3653f10..c572897d2 100644 --- a/src/pkg/runtime/runtime.c +++ b/src/pkg/runtime/runtime.c @@ -11,6 +11,14 @@ enum { uint32 runtime·panicking; +/* + * We assume that all architectures turn faults and the like + * into apparent calls to runtime.sigpanic. If we see a "call" + * to runtime.sigpanic, we do not back up the PC to find the + * line number of the CALL instruction, because there is no CALL. + */ +void runtime·sigpanic(void); + int32 runtime·gotraceback(void) { @@ -116,17 +124,6 @@ runtime·panicstring(int8 *s) runtime·panic(err); } -void -runtime·mcpy(byte *t, byte *f, uint32 n) -{ - while(n > 0) { - *t = *f; - t++; - f++; - n--; - } -} - int32 runtime·mcmp(byte *s1, byte *s2, uint32 n) { @@ -218,20 +215,6 @@ runtime·goenvs_unix(void) os·Envs.cap = n; } -// Atomic add and return new value. -uint32 -runtime·xadd(uint32 volatile *val, int32 delta) -{ - uint32 oval, nval; - - for(;;){ - oval = *val; - nval = oval + delta; - if(runtime·cas(val, oval, nval)) - return nval; - } -} - byte* runtime·getenv(int8 *s) { @@ -406,18 +389,11 @@ memprint(uint32 s, void *a) static void memcopy(uint32 s, void *a, void *b) { - byte *ba, *bb; - uint32 i; - - ba = a; - bb = b; - if(bb == nil) { - for(i=0; i<s; i++) - ba[i] = 0; + if(b == nil) { + runtime·memclr(a,s); return; } - for(i=0; i<s; i++) - ba[i] = bb[i]; + runtime·memmove(a,b,s); } static uint32 @@ -551,25 +527,35 @@ runtime·nanotime(void) void runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool retbool) { - Func *f; + Func *f, *g; uintptr pc; - - if(runtime·callers(1+skip, &retpc, 1) == 0) { + uintptr rpc[2]; + + /* + * Ask for two PCs: the one we were asked for + * and what it called, so that we can see if it + * "called" sigpanic. + */ + retpc = 0; + if(runtime·callers(1+skip-1, rpc, 2) < 2) { retfile = runtime·emptystring; retline = 0; retbool = false; - } else if((f = runtime·findfunc(retpc)) == nil) { + } else if((f = runtime·findfunc(rpc[1])) == nil) { retfile = runtime·emptystring; retline = 0; retbool = true; // have retpc at least } else { + retpc = rpc[1]; retfile = f->src; pc = retpc; - if(pc > f->entry) + g = runtime·findfunc(rpc[0]); + if(pc > f->entry && (g == nil || g->entry != (uintptr)runtime·sigpanic)) pc--; retline = runtime·funcline(f, pc); retbool = true; } + FLUSH(&retpc); FLUSH(&retfile); FLUSH(&retline); FLUSH(&retbool); @@ -588,3 +574,16 @@ runtime·FuncForPC(uintptr pc, void *retf) retf = runtime·findfunc(pc); FLUSH(&retf); } + +uint32 +runtime·fastrand1(void) +{ + uint32 x; + + x = m->fastrand; + x += x; + if(x & 0x80000000L) + x ^= 0x88888eefUL; + m->fastrand = x; + return x; +} diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index ad5da0a96..44511da83 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -57,6 +57,7 @@ typedef struct String String; typedef struct Usema Usema; typedef struct SigTab SigTab; typedef struct MCache MCache; +typedef struct FixAlloc FixAlloc; typedef struct Iface Iface; typedef struct Itab Itab; typedef struct Eface Eface; @@ -130,7 +131,10 @@ struct Usema union Note { struct { // Linux - Lock lock; + uint32 state; + }; + struct { // Windows + Lock lock; }; struct { // OS X int32 wakeup; @@ -229,12 +233,15 @@ struct M int32 waitnextg; int32 dying; int32 profilehz; + uint32 fastrand; + uint64 ncgocall; Note havenextg; G* nextg; M* alllink; // on allm M* schedlink; uint32 machport; // Return address for Mach IPC (OS X) MCache *mcache; + FixAlloc *stackalloc; G* lockedg; G* idleg; uint32 freglo[16]; // D[i] lsb and F[i] @@ -368,7 +375,6 @@ extern Alg runtime·algarray[Amax]; extern String runtime·emptystring; G* runtime·allg; M* runtime·allm; -int32 runtime·goidgen; extern int32 runtime·gomaxprocs; extern uint32 runtime·panicking; extern int32 runtime·gcwaiting; // gc is waiting to run @@ -379,6 +385,7 @@ extern bool runtime·iscgo; * common functions and data */ int32 runtime·strcmp(byte*, byte*); +byte* runtime·strstr(byte*, byte*); int32 runtime·findnull(byte*); int32 runtime·findnullw(uint16*); void runtime·dump(byte*, int32); @@ -404,13 +411,13 @@ uint32 runtime·rnd(uint32, uint32); void runtime·prints(int8*); void runtime·printf(int8*, ...); byte* runtime·mchr(byte*, byte, byte*); -void runtime·mcpy(byte*, byte*, uint32); int32 runtime·mcmp(byte*, byte*, uint32); void runtime·memmove(void*, void*, uint32); void* runtime·mal(uintptr); String runtime·catstring(String, String); String runtime·gostring(byte*); String runtime·gostringn(byte*, int32); +Slice runtime·gobytes(byte*, int32); String runtime·gostringnocopy(byte*); String runtime·gostringw(uint16*); void runtime·initsig(int32); @@ -424,7 +431,11 @@ bool runtime·casp(void**, void*, void*); // Don't confuse with XADD x86 instruction, // this one is actually 'addx', that is, add-and-fetch. uint32 runtime·xadd(uint32 volatile*, int32); -uint32 runtime·atomicload(uint32 volatile*); +uint32 runtime·xchg(uint32 volatile*, uint32); +uint32 runtime·atomicload(uint32 volatile*); +void runtime·atomicstore(uint32 volatile*, uint32); +void* runtime·atomicloadp(void* volatile*); +void runtime·atomicstorep(void* volatile*, void*); void runtime·jmpdefer(byte*, void*); void runtime·exit1(int32); void runtime·ready(G*); @@ -454,6 +465,7 @@ void runtime·runpanic(Panic*); void* runtime·getcallersp(void*); int32 runtime·mcount(void); void runtime·mcall(void(*)(G*)); +uint32 runtime·fastrand1(void); void runtime·exit(int32); void runtime·breakpoint(void); @@ -590,6 +602,8 @@ void runtime·semacquire(uint32*); void runtime·semrelease(uint32*); String runtime·signame(int32 sig); int32 runtime·gomaxprocsfunc(int32 n); +void runtime·procyield(uint32); +void runtime·osyield(void); void runtime·mapassign(Hmap*, byte*, byte*); void runtime·mapaccess(Hmap*, byte*, byte*, bool*); diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index 9146c177f..70534279b 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -20,7 +20,7 @@ runtime·makeslice(SliceType *t, int64 len, int64 cap, Slice ret) { if(len < 0 || (int32)len != len) runtime·panicstring("makeslice: len out of range"); - if(cap < len || (int32)cap != cap || cap > ((uintptr)-1) / t->elem->size) + if(cap < len || (int32)cap != cap || t->elem->size > 0 && cap > ((uintptr)-1) / t->elem->size) runtime·panicstring("makeslice: cap out of range"); makeslice1(t, len, cap, &ret); diff --git a/src/pkg/runtime/stack.h b/src/pkg/runtime/stack.h index 2b6b0e387..44d5533f4 100644 --- a/src/pkg/runtime/stack.h +++ b/src/pkg/runtime/stack.h @@ -71,6 +71,7 @@ enum { // If the amount needed for the splitting frame + StackExtra // is less than this number, the stack will have this size instead. StackMin = 4096, + FixedStack = StackMin + StackSystem, // Functions that need frames bigger than this call morestack // unconditionally. That is, on entry to a function it is assumed diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc index b72aa937c..48bf3183b 100644 --- a/src/pkg/runtime/string.goc +++ b/src/pkg/runtime/string.goc @@ -32,19 +32,23 @@ runtime·findnullw(uint16 *s) return l; } -int32 runtime·maxstring = 256; +uint32 runtime·maxstring = 256; String runtime·gostringsize(int32 l) { String s; + uint32 ms; if(l == 0) return runtime·emptystring; s.str = runtime·mal(l+1); // leave room for NUL for C runtime (e.g., callers of getenv) s.len = l; - if(l > runtime·maxstring) - runtime·maxstring = l; + for(;;) { + ms = runtime·maxstring; + if((uint32)l <= ms || runtime·cas(&runtime·maxstring, ms, (uint32)l)) + break; + } return s; } @@ -56,7 +60,7 @@ runtime·gostring(byte *str) l = runtime·findnull(str); s = runtime·gostringsize(l); - runtime·mcpy(s.str, str, l); + runtime·memmove(s.str, str, l); return s; } @@ -66,10 +70,20 @@ runtime·gostringn(byte *str, int32 l) String s; s = runtime·gostringsize(l); - runtime·mcpy(s.str, str, l); + runtime·memmove(s.str, str, l); return s; } +Slice +runtime·gobytes(byte *p, int32 n) +{ + Slice sl; + + sl.array = runtime·mallocgc(n, FlagNoPointers, 1, 0); + runtime·memmove(sl.array, p, n); + return sl; +} + String runtime·gostringnocopy(byte *str) { @@ -109,8 +123,8 @@ runtime·catstring(String s1, String s2) return s1; s3 = runtime·gostringsize(s1.len + s2.len); - runtime·mcpy(s3.str, s1.str, s1.len); - runtime·mcpy(s3.str+s1.len, s2.str, s2.len); + runtime·memmove(s3.str, s1.str, s1.len); + runtime·memmove(s3.str+s1.len, s2.str, s2.len); return s3; } @@ -130,7 +144,7 @@ concatstring(int32 n, String *s) out = runtime·gostringsize(l); l = 0; for(i=0; i<n; i++) { - runtime·mcpy(out.str+l, s[i].str, s[i].len); + runtime·memmove(out.str+l, s[i].str, s[i].len); l += s[i].len; } return out; @@ -189,6 +203,28 @@ runtime·strcmp(byte *s1, byte *s2) } } +byte* +runtime·strstr(byte *s1, byte *s2) +{ + byte *sp1, *sp2; + + if(*s2 == 0) + return s1; + for(; *s1; s1++) { + if(*s1 != *s2) + continue; + sp1 = s1; + sp2 = s2; + for(;;) { + if(*sp2 == 0) + return s1; + if(*sp1++ != *sp2++) + break; + } + } + return nil; +} + func slicestring(si String, lindex int32, hindex int32) (so String) { int32 l; @@ -221,14 +257,14 @@ func intstring(v int64) (s String) { func slicebytetostring(b Slice) (s String) { s = runtime·gostringsize(b.len); - runtime·mcpy(s.str, b.array, s.len); + runtime·memmove(s.str, b.array, s.len); } func stringtoslicebyte(s String) (b Slice) { b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1); b.len = s.len; b.cap = s.len; - runtime·mcpy(b.array, s.str, s.len); + runtime·memmove(b.array, s.str, s.len); } func sliceinttostring(b Slice) (s String) { diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index da4579734..d2ebf9b40 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -78,6 +78,7 @@ static int32 nfunc; static byte **fname; static int32 nfname; +static uint32 funcinit; static Lock funclock; static void @@ -159,7 +160,7 @@ makepath(byte *buf, int32 nbuf, byte *path) break; if(p > buf && p[-1] != '/') *p++ = '/'; - runtime·mcpy(p, q, len+1); + runtime·memmove(p, q, len+1); p += len; } } @@ -420,10 +421,21 @@ runtime·findfunc(uintptr addr) Func *f; int32 nf, n; - runtime·lock(&funclock); - if(func == nil) - buildfuncs(); - runtime·unlock(&funclock); + // Use atomic double-checked locking, + // because when called from pprof signal + // handler, findfunc must run without + // grabbing any locks. + // (Before enabling the signal handler, + // SetCPUProfileRate calls findfunc to trigger + // the initialization outside the handler.) + if(runtime·atomicload(&funcinit) == 0) { + runtime·lock(&funclock); + if(funcinit == 0) { + buildfuncs(); + runtime·atomicstore(&funcinit, 1); + } + runtime·unlock(&funclock); + } if(nfunc == 0) return nil; diff --git a/src/pkg/runtime/symtab_test.go b/src/pkg/runtime/symtab_test.go new file mode 100644 index 000000000..bd9fe18c4 --- /dev/null +++ b/src/pkg/runtime/symtab_test.go @@ -0,0 +1,47 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "runtime" + "strings" + "testing" +) + +func TestCaller(t *testing.T) { + procs := runtime.GOMAXPROCS(-1) + c := make(chan bool, procs) + for p := 0; p < procs; p++ { + go func() { + for i := 0; i < 1000; i++ { + testCallerFoo(t) + } + c <- true + }() + defer func() { + <-c + }() + } +} + +func testCallerFoo(t *testing.T) { + testCallerBar(t) +} + +func testCallerBar(t *testing.T) { + for i := 0; i < 2; i++ { + pc, file, line, ok := runtime.Caller(i) + f := runtime.FuncForPC(pc) + if !ok || + !strings.HasSuffix(file, "symtab_test.go") || + (i == 0 && !strings.HasSuffix(f.Name(), "testCallerBar")) || + (i == 1 && !strings.HasSuffix(f.Name(), "testCallerFoo")) || + line < 5 || line > 1000 || + f.Entry() >= pc { + t.Errorf("incorrect symbol info %d: %t %d %d %s %s %d", + i, ok, f.Entry(), pc, f.Name(), file, line) + } + } +} diff --git a/src/pkg/runtime/windows/amd64/rt0.s b/src/pkg/runtime/windows/amd64/rt0.s index e54e7edeb..35978bc74 100644 --- a/src/pkg/runtime/windows/amd64/rt0.s +++ b/src/pkg/runtime/windows/amd64/rt0.s @@ -8,3 +8,6 @@ TEXT _rt0_amd64_windows(SB),7,$-8 MOVQ $_rt0_amd64(SB), AX MOVQ SP, DI JMP AX + +DATA runtime·iswindows(SB)/4, $1 +GLOBL runtime·iswindows(SB), $4 diff --git a/src/pkg/runtime/windows/amd64/sys.s b/src/pkg/runtime/windows/amd64/sys.s index b1eacfc82..2009d164e 100644 --- a/src/pkg/runtime/windows/amd64/sys.s +++ b/src/pkg/runtime/windows/amd64/sys.s @@ -20,6 +20,7 @@ TEXT runtime·stdcall_raw(SB),7,$8 CMPQ g(DI), SI JEQ 3(PC) MOVQ (g_sched+gobuf_sp)(SI), SP + ANDQ $~15, SP MOVQ SI, g(DI) SUBQ $0x60, SP |