diff options
Diffstat (limited to 'src/pkg/runtime')
38 files changed, 1209 insertions, 452 deletions
diff --git a/src/pkg/runtime/386/asm.s b/src/pkg/runtime/386/asm.s index 63d582606..74e1df0da 100644 --- a/src/pkg/runtime/386/asm.s +++ b/src/pkg/runtime/386/asm.s @@ -5,6 +5,14 @@ #include "386/asm.h" TEXT _rt0_386(SB),7,$0 + // Linux, Windows start the FPU in extended double precision. + // Other operating systems use double precision. + // Change to double precision to match them, + // and to match other hardware that only has double. + PUSHL $0x27F + FLDCW 0(SP) + POPL AX + // copy arguments forward on an even stack MOVL 0(SP), AX // argc LEAL 4(SP), BX // argv diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile index e9488cfb5..521c095b9 100644 --- a/src/pkg/runtime/Makefile +++ b/src/pkg/runtime/Makefile @@ -26,8 +26,12 @@ GOFILES=\ softfloat64.go\ type.go\ version.go\ + version_$(GOOS).go\ + version_$(GOARCH).go\ runtime_defs.go\ +CLEANFILES+=version.go version_*.go + OFILES_windows=\ syscall.$O\ @@ -107,7 +111,7 @@ include ../../Make.pkg $(pkgdir)/%.h: %.h @test -d $(QUOTED_GOROOT)/pkg && mkdir -p $(pkgdir) - cp $< $@ + cp $< "$@" clean: clean-local @@ -127,8 +131,14 @@ mkversion: mkversion.c version.go: mkversion ./mkversion >version.go +version_$(GOARCH).go: + (echo 'package runtime'; echo 'const theGoarch = "$(GOARCH)"') >$@ + +version_$(GOOS).go: + (echo 'package runtime'; echo 'const theGoos = "$(GOOS)"') >$@ + %.c: %.goc goc2c - ./goc2c `pwd`/$< > $@.tmp + ./goc2c "`pwd`/$<" > $@.tmp mv -f $@.tmp $@ %.$O: $(GOARCH)/%.c diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c index 86e96f348..d3aae0db9 100644 --- a/src/pkg/runtime/amd64/traceback.c +++ b/src/pkg/runtime/amd64/traceback.c @@ -8,6 +8,8 @@ static uintptr isclosureentry(uintptr); void runtime·deferproc(void); void runtime·newproc(void); +void runtime·newstack(void); +void runtime·morestack(void); // This code is also used for the 386 tracebacks. // Use uintptr for an appropriate word-sized integer. @@ -17,15 +19,32 @@ void runtime·newproc(void); // A little clunky to merge the two but avoids duplicating // the code and all its subtlety. static int32 -gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) +gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 max) { byte *p; - int32 i, n, iter, nascent; - uintptr pc, tracepc, *fp; + int32 i, n, iter, sawnewstack; + uintptr pc, lr, tracepc; + byte *fp; Stktop *stk; Func *f; - + pc = (uintptr)pc0; + lr = 0; + fp = nil; + + // If the PC is goexit, the goroutine hasn't started yet. + if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { + fp = sp; + lr = pc; + pc = (uintptr)g->entry; + } + + // If the PC is zero, it's likely a nil function call. + // Start in the caller's frame. + if(pc == 0) { + pc = lr; + lr = 0; + } // If the PC is zero, it's likely a nil function call. // Start in the caller's frame. @@ -33,26 +52,29 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) pc = *(uintptr*)sp; sp += sizeof(uintptr); } - - nascent = 0; - if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { - // Hasn't started yet. g->sched is set up for goexit - // but goroutine will start at g->entry. - nascent = 1; - pc = (uintptr)g->entry; - } - + n = 0; + sawnewstack = 0; stk = (Stktop*)g->stackbase; - for(iter = 0; iter < 100 && n < m; iter++) { // iter avoids looping forever + for(iter = 0; iter < 100 && n < max; iter++) { // iter avoids looping forever + // Typically: + // pc is the PC of the running function. + // sp is the stack pointer at that program counter. + // fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown. + // stk is the stack containing sp. + // The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp. + if(pc == (uintptr)runtime·lessstack) { // Hit top of stack segment. Unwind to next segment. pc = (uintptr)stk->gobuf.pc; sp = stk->gobuf.sp; + lr = 0; + fp = nil; + if(pcbuf == nil) + runtime·printf("----- stack segment boundary -----\n"); stk = (Stktop*)stk->stackbase; continue; } - if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) { // Dangerous, but worthwhile: see if this is a closure: // ADDQ $wwxxyyzz, SP; RET @@ -66,17 +88,32 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) sp += *(uint32*)(p+2); pc = *(uintptr*)sp; sp += sizeof(uintptr); + lr = 0; + fp = nil; continue; } - if(nascent && (pc = isclosureentry(pc)) != 0) + // Closure at top of stack, not yet started. + if(lr == (uintptr)runtime·goexit && (pc = isclosureentry(pc)) != 0) { + fp = sp; continue; + } - // Unknown pc; stop. + // Unknown pc: stop. break; } - // Found an actual function worth reporting. + // Found an actual function. + if(fp == nil) { + fp = sp; + if(pc > f->entry && f->frame >= sizeof(uintptr)) + fp += f->frame - sizeof(uintptr); + if(lr == 0) + lr = *(uintptr*)fp; + fp += sizeof(uintptr); + } else if(lr == 0) + lr = *(uintptr*)fp; + if(skip > 0) skip--; else if(pcbuf != nil) @@ -93,15 +130,10 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) tracepc--; runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc)); runtime·printf("\t%S(", f->name); - fp = (uintptr*)sp; - if(f->frame < sizeof(uintptr)) - fp++; - else - fp += f->frame/sizeof(uintptr); for(i = 0; i < f->args; i++) { if(i != 0) runtime·prints(", "); - runtime·printhex(fp[i]); + runtime·printhex(((uintptr*)fp)[i]); if(i >= 4) { runtime·prints(", ..."); break; @@ -111,20 +143,32 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) n++; } - if(nascent) { - pc = (uintptr)g->sched.pc; - sp = g->sched.sp; - nascent = 0; + if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) + fp += 2*sizeof(uintptr); + + if(f->entry == (uintptr)runtime·newstack) + sawnewstack = 1; + + if(pcbuf == nil && f->entry == (uintptr)runtime·morestack && g == m->g0 && sawnewstack) { + // The fact that we saw newstack means that morestack + // has managed to record its information in m, so we can + // use it to keep unwinding the stack. + runtime·printf("----- morestack called from goroutine %d -----\n", m->curg->goid); + pc = (uintptr)m->morepc; + sp = m->morebuf.sp - sizeof(void*); + lr = (uintptr)m->morebuf.pc; + fp = m->morebuf.sp; + sawnewstack = 0; + g = m->curg; + stk = (Stktop*)g->stackbase; continue; } - if(f->frame < sizeof(uintptr)) // assembly functions lie - sp += sizeof(uintptr); - else - sp += f->frame; - pc = *((uintptr*)sp - 1); - if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) - sp += 2*sizeof(uintptr); + // Unwind to next frame. + pc = lr; + lr = 0; + sp = fp; + fp = nil; } return n; } @@ -156,7 +200,17 @@ isclosureentry(uintptr pc) p = (byte*)pc; if(p < runtime·mheap.arena_start || p+32 > runtime·mheap.arena_used) return 0; + + if(*p == 0xe8) { + // CALL fn + return pc+5+*(int32*)(p+1); + } + if(sizeof(uintptr) == 8 && p[0] == 0x48 && p[1] == 0xb9 && p[10] == 0xff && p[11] == 0xd1) { + // MOVQ $fn, CX; CALL *CX + return *(uintptr*)(p+2); + } + // SUBQ $siz, SP if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x81 || *p++ != 0xec) return 0; diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c index 8289fdb28..2307e98e8 100644 --- a/src/pkg/runtime/arm/traceback.c +++ b/src/pkg/runtime/arm/traceback.c @@ -3,19 +3,27 @@ // license that can be found in the LICENSE file. #include "runtime.h" +#include "malloc.h" + +void runtime·deferproc(void); +void runtime·newproc(void); +void runtime·newstack(void); +void runtime·morestack(void); static int32 -gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 m) +gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max) { int32 i, n, iter; - uintptr pc, lr, tracepc; + uintptr pc, lr, tracepc, x; + byte *fp, *p; Stktop *stk; Func *f; pc = (uintptr)pc0; lr = (uintptr)lr0; - - // If the PC is goexit, it hasn't started yet. + fp = nil; + + // If the PC is goexit, the goroutine hasn't started yet. if(pc == (uintptr)runtime·goexit) { pc = (uintptr)g->entry; lr = (uintptr)runtime·goexit; @@ -30,21 +38,73 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i n = 0; stk = (Stktop*)g->stackbase; - for(iter = 0; iter < 100 && n < m; iter++) { // iter avoids looping forever + for(iter = 0; iter < 100 && n < max; iter++) { // iter avoids looping forever + // Typically: + // pc is the PC of the running function. + // sp is the stack pointer at that program counter. + // fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown. + // stk is the stack containing sp. + // The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp. + if(pc == (uintptr)runtime·lessstack) { // Hit top of stack segment. Unwind to next segment. pc = (uintptr)stk->gobuf.pc; sp = stk->gobuf.sp; - lr = *(uintptr*)sp; + lr = 0; + fp = nil; + if(pcbuf == nil) + runtime·printf("----- stack segment boundary -----\n"); stk = (Stktop*)stk->stackbase; continue; } - if(pc <= 0x1000 || (f = runtime·findfunc(pc-4)) == nil) { - // TODO: Check for closure. + + if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) { + // Dangerous, but worthwhile: see if this is a closure by + // decoding the instruction stream. + // + // We check p < p+4 to avoid wrapping and faulting if + // we have lost track of where we are. + p = (byte*)pc; + if((pc&3) == 0 && p < p+4 && + runtime·mheap.arena_start < p && + p+4 < runtime·mheap.arena_used) { + x = *(uintptr*)p; + if((x&0xfffff000) == 0xe49df000) { + // End of closure: + // MOVW.P frame(R13), R15 + pc = *(uintptr*)sp; + lr = 0; + sp += x & 0xfff; + fp = nil; + continue; + } + if((x&0xfffff000) == 0xe52de000 && lr == (uintptr)runtime·goexit) { + // Beginning of closure. + // Closure at top of stack, not yet started. + p += 5*4; + if((x&0xfff) != 4) { + // argument copying + p += 7*4; + } + if((byte*)pc < p && p < p+4 && p+4 < runtime·mheap.arena_used) { + pc = *(uintptr*)p; + fp = nil; + continue; + } + } + } break; } - // Found an actual function worth reporting. + // Found an actual function. + if(lr == 0) + lr = *(uintptr*)sp; + if(fp == nil) { + fp = sp; + if(pc > f->entry && f->frame >= 0) + fp += f->frame; + } + if(skip > 0) skip--; else if(pcbuf != nil) @@ -64,7 +124,7 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i for(i = 0; i < f->args; i++) { if(i != 0) runtime·prints(", "); - runtime·printhex(((uintptr*)sp)[1+i]); + runtime·printhex(((uintptr*)fp)[1+i]); if(i >= 4) { runtime·prints(", ..."); break; @@ -73,17 +133,28 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i runtime·prints(")\n"); n++; } + + if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) { + runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid); + pc = (uintptr)m->morepc; + sp = (byte*)m->moreargp - sizeof(void*); + lr = (uintptr)m->morebuf.pc; + fp = m->morebuf.sp; + g = m->curg; + stk = (Stktop*)g->stackbase; + continue; + } - if(lr == 0) - lr = *(uintptr*)sp; + // Unwind to next frame. pc = lr; lr = 0; - if(f->frame >= 0) - sp += f->frame; + sp = fp; + fp = nil; } return n; } + void runtime·traceback(byte *pc0, byte *sp, byte *lr, G *g) { diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c index e6ece9542..74e5a3085 100644 --- a/src/pkg/runtime/cgocall.c +++ b/src/pkg/runtime/cgocall.c @@ -53,13 +53,12 @@ runtime·cgocall(void (*fn)(void*), void *arg) // (arg/argsize) on to the stack, calls the function, copies the // arguments back where they came from, and finally returns to the old // stack. -uintptr +void runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize) { Gobuf oldsched, oldg1sched; G *g1; void *sp; - uintptr ret; if(g != m->g0) runtime·throw("bad g in cgocallback"); @@ -71,11 +70,11 @@ runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize) runtime·startcgocallback(g1); sp = g1->sched.sp - argsize; - if(sp < g1->stackguard - StackGuard + 4) // +4 for return address + if(sp < g1->stackguard - StackGuard + 8) // +8 for return address runtime·throw("g stack overflow in cgocallback"); runtime·mcpy(sp, arg, argsize); - ret = runtime·runcgocallback(g1, sp, fn); + runtime·runcgocallback(g1, sp, fn); runtime·mcpy(arg, sp, argsize); @@ -83,8 +82,6 @@ runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize) m->sched = oldsched; g1->sched = oldg1sched; - - return ret; } void diff --git a/src/pkg/runtime/cgocall.h b/src/pkg/runtime/cgocall.h index 7c24e167b..1ad954eb1 100644 --- a/src/pkg/runtime/cgocall.h +++ b/src/pkg/runtime/cgocall.h @@ -7,6 +7,6 @@ */ void runtime·cgocall(void (*fn)(void*), void*); -uintptr runtime·cgocallback(void (*fn)(void), void*, int32); +void runtime·cgocallback(void (*fn)(void), void*, int32); void *runtime·cmalloc(uintptr); void runtime·cfree(void*); diff --git a/src/pkg/runtime/debug.go b/src/pkg/runtime/debug.go index d09db1be6..5117e1a55 100644 --- a/src/pkg/runtime/debug.go +++ b/src/pkg/runtime/debug.go @@ -69,7 +69,8 @@ type MemStatsType struct { // Per-size allocation statistics. // Not locked during update; approximate. - BySize [67]struct { + // 61 is NumSizeClasses in the C code. + BySize [61]struct { Size uint32 Mallocs uint64 Frees uint64 diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go index dba28324c..c6e664abb 100644 --- a/src/pkg/runtime/extern.go +++ b/src/pkg/runtime/extern.go @@ -60,31 +60,47 @@ func (f *Func) Entry() uintptr { return f.entry } // counter within f. func (f *Func) FileLine(pc uintptr) (file string, line int) { // NOTE(rsc): If you edit this function, also edit - // symtab.c:/^funcline. + // symtab.c:/^funcline. That function also has the + // comments explaining the logic. + targetpc := pc + var pcQuant uintptr = 1 if GOARCH == "arm" { pcQuant = 4 } - targetpc := pc p := f.pcln pc = f.pc0 line = int(f.ln0) - file = f.src - for i := 0; i < len(p) && pc <= targetpc; i++ { - switch { - case p[i] == 0: + i := 0 + //print("FileLine start pc=", pc, " targetpc=", targetpc, " line=", line, + // " tab=", p, " ", p[0], " quant=", pcQuant, " GOARCH=", GOARCH, "\n") + for { + for i < len(p) && p[i] > 128 { + pc += pcQuant * uintptr(p[i]-128) + i++ + } + //print("pc<", pc, " targetpc=", targetpc, " line=", line, "\n") + if pc > targetpc || i >= len(p) { + break + } + if p[i] == 0 { + if i+5 > len(p) { + break + } line += int(p[i+1]<<24) | int(p[i+2]<<16) | int(p[i+3]<<8) | int(p[i+4]) - i += 4 - case p[i] <= 64: + i += 5 + } else if p[i] <= 64 { line += int(p[i]) - case p[i] <= 128: + i++ + } else { line -= int(p[i] - 64) - default: - pc += pcQuant * uintptr(p[i]-129) + i++ } + //print("pc=", pc, " targetpc=", targetpc, " line=", line, "\n") pc += pcQuant } + file = f.src return } diff --git a/src/pkg/runtime/freebsd/mem.c b/src/pkg/runtime/freebsd/mem.c index cbae18718..f5bbfa6fa 100644 --- a/src/pkg/runtime/freebsd/mem.c +++ b/src/pkg/runtime/freebsd/mem.c @@ -33,6 +33,12 @@ runtime·SysFree(void *v, uintptr n) void* runtime·SysReserve(void *v, uintptr n) { + // On 64-bit, people with ulimit -v set complain if we reserve too + // much address space. Instead, assume that the reservation is okay + // and check the assumption in SysMap. + if(sizeof(void*) == 8) + return v; + return runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0); } @@ -42,6 +48,17 @@ runtime·SysMap(void *v, uintptr n) void *p; mstats.sys += n; + + // On 64-bit, we don't actually have v reserved, so tread carefully. + if(sizeof(void*) == 8) { + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + if(p != v) { + runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); + runtime·throw("runtime: address space conflict"); + } + return; + } + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p != v) runtime·throw("runtime: cannot map pages in arena address space"); diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c index aa36df68e..3dec45e2b 100644 --- a/src/pkg/runtime/iface.c +++ b/src/pkg/runtime/iface.c @@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret) t = (Type*)((Eface*)typ.data-1); if(t->kind&KindNoPointers) - ret = runtime·mallocgc(t->size, RefNoPointers, 1, 1); + ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1); else ret = runtime·mal(t->size); FLUSH(&ret); @@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret) size = n*t->size; if(t->kind&KindNoPointers) - ret = runtime·mallocgc(size, RefNoPointers, 1, 1); + ret = runtime·mallocgc(size, FlagNoPointers, 1, 1); else ret = runtime·mal(size); FLUSH(&ret); diff --git a/src/pkg/runtime/linux/386/rt0.s b/src/pkg/runtime/linux/386/rt0.s index 0f82d6a1c..223e6d2ea 100644 --- a/src/pkg/runtime/linux/386/rt0.s +++ b/src/pkg/runtime/linux/386/rt0.s @@ -5,13 +5,5 @@ // Darwin and Linux use the same linkage to main TEXT _rt0_386_linux(SB),7,$0 - // Linux starts the FPU in extended double precision. - // Other operating systems use double precision. - // Change to double precision to match them, - // and to match other hardware that only has double. - PUSHL $0x27F - FLDCW 0(SP) - POPL AX - JMP _rt0_386(SB) diff --git a/src/pkg/runtime/linux/mem.c b/src/pkg/runtime/linux/mem.c index 3a83e7394..633ad0c62 100644 --- a/src/pkg/runtime/linux/mem.c +++ b/src/pkg/runtime/linux/mem.c @@ -39,6 +39,12 @@ runtime·SysFree(void *v, uintptr n) void* runtime·SysReserve(void *v, uintptr n) { + // On 64-bit, people with ulimit -v set complain if we reserve too + // much address space. Instead, assume that the reservation is okay + // and check the assumption in SysMap. + if(sizeof(void*) == 8) + return v; + return runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0); } @@ -48,6 +54,17 @@ runtime·SysMap(void *v, uintptr n) void *p; mstats.sys += n; + + // On 64-bit, we don't actually have v reserved, so tread carefully. + if(sizeof(void*) == 8) { + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); + if(p != v) { + runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); + runtime·throw("runtime: address space conflict"); + } + return; + } + p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); if(p != v) runtime·throw("runtime: cannot map pages in arena address space"); diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index cc28b943d..70b85d68d 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -36,14 +36,13 @@ fastrand1(void) // Small objects are allocated from the per-thread cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. void* -runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) +runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) { int32 sizeclass, rate; MCache *c; uintptr npages; MSpan *s; void *v; - uint32 *ref; if(runtime·gcwaiting && g != m->g0 && m->locks == 0) runtime·gosched(); @@ -65,12 +64,6 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) mstats.alloc += size; mstats.total_alloc += size; mstats.by_size[sizeclass].nmalloc++; - - if(!runtime·mlookup(v, nil, nil, nil, &ref)) { - runtime·printf("malloc %D; runtime·mlookup failed\n", (uint64)size); - runtime·throw("malloc runtime·mlookup"); - } - *ref = RefNone | refflag; } else { // TODO(rsc): Report tracebacks for very large allocations. @@ -87,13 +80,14 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) v = (void*)(s->start << PageShift); // setup for mark sweep - s->gcref0 = RefNone | refflag; - ref = &s->gcref0; + runtime·markspan(v, 0, 0, true); } + if(!(flag & FlagNoGC)) + runtime·markallocated(v, size, (flag&FlagNoPointers) != 0); m->mallocing = 0; - if(!(refflag & RefNoProfiling) && (rate = runtime·MemProfileRate) > 0) { + if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) { if(size >= rate) goto profile; if(m->mcache->next_sample > size) @@ -104,7 +98,7 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) rate = 0x3fffffff; m->mcache->next_sample = fastrand1() % (2*rate); profile: - *ref |= RefProfiled; + runtime·setblockspecial(v); runtime·MProf_Malloc(v, size); } } @@ -124,33 +118,35 @@ runtime·malloc(uintptr size) void runtime·free(void *v) { - int32 sizeclass, size; + int32 sizeclass; MSpan *s; MCache *c; - uint32 prof, *ref; + uint32 prof; + uintptr size; if(v == nil) return; + + // If you change this also change mgc0.c:/^sweepspan, + // which has a copy of the guts of free. if(m->mallocing) runtime·throw("malloc/free - deadlock"); m->mallocing = 1; - if(!runtime·mlookup(v, nil, nil, &s, &ref)) { + if(!runtime·mlookup(v, nil, nil, &s)) { runtime·printf("free %p: not an allocated block\n", v); runtime·throw("free runtime·mlookup"); } - prof = *ref & RefProfiled; - *ref = RefFree; + prof = runtime·blockspecial(v); // Find size class for v. sizeclass = s->sizeclass; if(sizeclass == 0) { // Large object. - if(prof) - runtime·MProf_Free(v, s->npages<<PageShift); - mstats.alloc -= s->npages<<PageShift; - runtime·memclr(v, s->npages<<PageShift); + size = s->npages<<PageShift; + *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed" + runtime·unmarkspan(v, 1<<PageShift); runtime·MHeap_Free(&runtime·mheap, s, 1); } else { // Small object. @@ -158,19 +154,20 @@ runtime·free(void *v) size = runtime·class_to_size[sizeclass]; if(size > sizeof(uintptr)) ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" - if(prof) - runtime·MProf_Free(v, size); - mstats.alloc -= size; mstats.by_size[sizeclass].nfree++; runtime·MCache_Free(c, v, sizeclass, size); } + runtime·markfreed(v, size); + mstats.alloc -= size; + if(prof) + runtime·MProf_Free(v, size); m->mallocing = 0; } int32 -runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) +runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) { - uintptr n, nobj, i; + uintptr n, i; byte *p; MSpan *s; @@ -179,12 +176,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) if(sp) *sp = s; if(s == nil) { + runtime·checkfreed(v, 1); if(base) *base = nil; if(size) *size = 0; - if(ref) - *ref = 0; return 0; } @@ -195,14 +191,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) *base = p; if(size) *size = s->npages<<PageShift; - if(ref) - *ref = &s->gcref0; return 1; } - if((byte*)v >= (byte*)s->gcref) { - // pointers into the gc ref counts - // do not count as pointers. + if((byte*)v >= (byte*)s->limit) { + // pointers past the last block do not count as pointers. return 0; } @@ -213,21 +206,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) if(size) *size = n; - // good for error checking, but expensive - if(0) { - nobj = (s->npages << PageShift) / (n + RefcountOverhead); - if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) { - runtime·printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n", - s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages); - runtime·printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n", - s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift, - (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift)); - runtime·throw("bad gcref"); - } - } - if(ref) - *ref = &s->gcref[i]; - return 1; } @@ -246,14 +224,20 @@ runtime·allocmcache(void) int32 runtime·sizeof_C_MStats = sizeof(MStats); +#define MaxArena32 (2U<<30) + void runtime·mallocinit(void) { byte *p; - uintptr arena_size; + uintptr arena_size, bitmap_size; + extern byte end[]; runtime·InitSizes(); + // Set up the allocation arena, a contiguous area of memory where + // allocated data will be found. The arena begins with a bitmap large + // enough to hold 4 bits per allocated word. if(sizeof(void*) == 8) { // On a 64-bit machine, allocate from a single contiguous reservation. // 16 GB should be big enough for now. @@ -273,19 +257,53 @@ runtime·mallocinit(void) // odds of the conservative garbage collector not collecting memory // because some non-pointer block of memory had a bit pattern // that matched a memory address. + // + // Actually we reserve 17 GB (because the bitmap ends up being 1 GB) + // but it hardly matters: fc is not valid UTF-8 either, and we have to + // allocate 15 GB before we get that far. arena_size = 16LL<<30; - p = runtime·SysReserve((void*)(0x00f8ULL<<32), arena_size); + bitmap_size = arena_size / (sizeof(void*)*8/4); + p = runtime·SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size); if(p == nil) runtime·throw("runtime: cannot reserve arena virtual address space"); - runtime·mheap.arena_start = p; - runtime·mheap.arena_used = p; - runtime·mheap.arena_end = p + arena_size; } else { - // On a 32-bit machine, we'll take what we can get for each allocation - // and maintain arena_start and arena_end as min, max we've seen. - runtime·mheap.arena_start = (byte*)0xffffffff; - runtime·mheap.arena_end = 0; + // On a 32-bit machine, we can't typically get away + // with a giant virtual address space reservation. + // Instead we map the memory information bitmap + // immediately after the data segment, large enough + // to handle another 2GB of mappings (256 MB), + // along with a reservation for another 512 MB of memory. + // When that gets used up, we'll start asking the kernel + // for any memory anywhere and hope it's in the 2GB + // following the bitmap (presumably the executable begins + // near the bottom of memory, so we'll have to use up + // most of memory before the kernel resorts to giving out + // memory before the beginning of the text segment). + // + // Alternatively we could reserve 512 MB bitmap, enough + // for 4GB of mappings, and then accept any memory the + // kernel threw at us, but normally that's a waste of 512 MB + // of address space, which is probably too much in a 32-bit world. + bitmap_size = MaxArena32 / (sizeof(void*)*8/4); + arena_size = 512<<20; + + // SysReserve treats the address we ask for, end, as a hint, + // not as an absolute requirement. If we ask for the end + // of the data segment but the operating system requires + // a little more space before we can start allocating, it will + // give out a slightly higher pointer. That's fine. + // Run with what we get back. + p = runtime·SysReserve(end, bitmap_size + arena_size); + if(p == nil) + runtime·throw("runtime: cannot reserve arena virtual address space"); } + if((uintptr)p & (((uintptr)1<<PageShift)-1)) + runtime·throw("runtime: SysReserve returned unaligned address"); + + runtime·mheap.bitmap = p; + runtime·mheap.arena_start = p + bitmap_size; + runtime·mheap.arena_used = runtime·mheap.arena_start; + runtime·mheap.arena_end = runtime·mheap.arena_start + arena_size; // Initialize the rest of the allocator. runtime·MHeap_Init(&runtime·mheap, runtime·SysAlloc); @@ -299,26 +317,41 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n) { byte *p; - - if(sizeof(void*) == 8) { + + if(n <= h->arena_end - h->arena_used) { // Keep taking from our reservation. - if(h->arena_end - h->arena_used < n) - return nil; p = h->arena_used; runtime·SysMap(p, n); h->arena_used += n; + runtime·MHeap_MapBits(h); return p; - } else { - // Take what we can get from the OS. - p = runtime·SysAlloc(n); - if(p == nil) - return nil; - if(p+n > h->arena_used) - h->arena_used = p+n; - if(p > h->arena_end) - h->arena_end = p; - return p; } + + // On 64-bit, our reservation is all we have. + if(sizeof(void*) == 8) + return nil; + + // On 32-bit, once the reservation is gone we can + // try to get memory at a location chosen by the OS + // and hope that it is in the range we allocated bitmap for. + p = runtime·SysAlloc(n); + if(p == nil) + return nil; + + if(p < h->arena_start || p+n - h->arena_start >= MaxArena32) { + runtime·printf("runtime: memory allocated by OS not in usable range"); + runtime·SysFree(p, n); + return nil; + } + + if(p+n > h->arena_used) { + h->arena_used = p+n; + if(h->arena_used > h->arena_end) + h->arena_end = h->arena_used; + runtime·MHeap_MapBits(h); + } + + return p; } // Runtime stubs. @@ -353,7 +386,6 @@ void* runtime·stackalloc(uint32 n) { void *v; - uint32 *ref; if(m->mallocing || m->gcing || n == FixedStack) { runtime·lock(&stacks); @@ -369,11 +401,7 @@ runtime·stackalloc(uint32 n) runtime·unlock(&stacks); return v; } - v = runtime·mallocgc(n, RefNoProfiling, 0, 0); - if(!runtime·mlookup(v, nil, nil, nil, &ref)) - runtime·throw("stackalloc runtime·mlookup"); - *ref = RefStack; - return v; + return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0); } void @@ -399,7 +427,7 @@ func Free(p *byte) { } func Lookup(p *byte) (base *byte, size uintptr) { - runtime·mlookup(p, &base, &size, nil, nil); + runtime·mlookup(p, &base, &size, nil); } func GC() { @@ -422,7 +450,7 @@ func SetFinalizer(obj Eface, finalizer Eface) { runtime·printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string); goto throw; } - if(!runtime·mlookup(obj.data, &base, &size, nil, nil) || obj.data != base) { + if(!runtime·mlookup(obj.data, &base, &size, nil) || obj.data != base) { runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n"); goto throw; } diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index e2472e8d2..4e2794570 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -97,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift enum { + // Computed constant. The definition of MaxSmallSize and the + // algorithm in msize.c produce some number of different allocation + // size classes. NumSizeClasses is that number. It's needed here + // because there are static arrays of this length; when msize runs its + // size choosing algorithm it double-checks that NumSizeClasses agrees. + NumSizeClasses = 61, + // Tunable constants. - NumSizeClasses = 67, // Number of size classes (must match msize.c) MaxSmallSize = 32<<10, FixAllocChunk = 128<<10, // Chunk size for FixAlloc @@ -290,10 +296,7 @@ struct MSpan uint32 ref; // number of allocated objects in this span uint32 sizeclass; // size class uint32 state; // MSpanInUse etc - union { - uint32 *gcref; // sizeclass > 0 - uint32 gcref0; // sizeclass == 0 - }; + byte *limit; // end of data in span }; void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); @@ -336,6 +339,7 @@ struct MHeap // range of addresses we might see in the heap byte *bitmap; + uintptr bitmap_mapped; byte *arena_start; byte *arena_used; byte *arena_end; @@ -359,26 +363,29 @@ MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); -void runtime·MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj); +void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); +void runtime·MHeap_MapBits(MHeap *h); void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); -int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref); +int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s); void runtime·gc(int32 force); +void runtime·markallocated(void *v, uintptr n, bool noptr); +void runtime·checkallocated(void *v, uintptr n); +void runtime·markfreed(void *v, uintptr n); +void runtime·checkfreed(void *v, uintptr n); +int32 runtime·checking; +void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); +void runtime·unmarkspan(void *v, uintptr size); +bool runtime·blockspecial(void*); +void runtime·setblockspecial(void*); enum { - RefcountOverhead = 4, // one uint32 per object - - RefFree = 0, // must be zero - RefStack, // stack segment - don't free and don't scan for pointers - RefNone, // no references - RefSome, // some references - RefNoPointers = 0x80000000U, // flag - no pointers here - RefHasFinalizer = 0x40000000U, // flag - has finalizer - RefProfiled = 0x20000000U, // flag - is in profiling table - RefNoProfiling = 0x10000000U, // flag - must not profile - RefFlags = 0xFFFF0000U, + // flags to malloc + FlagNoPointers = 1<<0, // no pointers here + FlagNoProfiling = 1<<1, // must not profile + FlagNoGC = 1<<2, // must not free or scan for pointers }; void runtime·MProf_Malloc(void*, uintptr); diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c index f1ad119d3..29b03b58f 100644 --- a/src/pkg/runtime/mcentral.c +++ b/src/pkg/runtime/mcentral.c @@ -113,8 +113,7 @@ static void MCentral_Free(MCentral *c, void *v) { MSpan *s; - PageID page; - MLink *p, *next; + MLink *p; int32 size; // Find span for v. @@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v) if(--s->ref == 0) { size = runtime·class_to_size[c->sizeclass]; runtime·MSpanList_Remove(s); - // The second word of each freed block indicates - // whether it needs to be zeroed. The first word - // is the link pointer and must always be cleared. - for(p=s->freelist; p; p=next) { - next = p->next; - if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0) - runtime·memclr((byte*)p, size); - else - p->next = nil; - } + runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); + *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing s->freelist = nil; c->nfree -= (s->npages << PageShift) / size; runtime·unlock(c); @@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v) } void -runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj) +runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj) { int32 size; int32 npages; @@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 size = runtime·class_to_size[sizeclass]; *npagesp = npages; *sizep = size; - *nobj = (npages << PageShift) / (size + RefcountOverhead); + *nobj = (npages << PageShift) / size; } // Fetch a new span from the heap and @@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 static bool MCentral_Grow(MCentral *c) { - int32 i, n, npages, size; + int32 i, n, npages; + uintptr size; MLink **tailp, *v; byte *p; MSpan *s; @@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c) // Carve span into sequence of blocks. tailp = &s->freelist; p = (byte*)(s->start << PageShift); - s->gcref = (uint32*)(p + size*n); + s->limit = p + size*n; for(i=0; i<n; i++) { v = (MLink*)p; *tailp = v; @@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c) p += size; } *tailp = nil; + runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift)); runtime·lock(c); c->nfree += n; diff --git a/src/pkg/runtime/mfinal.c b/src/pkg/runtime/mfinal.c index f73561b3c..03ee777c0 100644 --- a/src/pkg/runtime/mfinal.c +++ b/src/pkg/runtime/mfinal.c @@ -5,6 +5,7 @@ #include "runtime.h" #include "malloc.h" +// TODO(rsc): Why not just use mheap.Lock? static Lock finlock; // Finalizer hash table. Direct hash, linear scan, at most 3/4 full. @@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) } runtime·lock(&finlock); - if(!runtime·mlookup(p, &base, nil, nil, &ref) || p != base) { + if(!runtime·mlookup(p, &base, nil, nil) || p != base) { runtime·unlock(&finlock); runtime·throw("addfinalizer on invalid pointer"); } if(f == nil) { - if(*ref & RefHasFinalizer) { - lookfintab(&fintab, p, 1); - *ref &= ~RefHasFinalizer; - } + lookfintab(&fintab, p, 1); runtime·unlock(&finlock); return; } - if(*ref & RefHasFinalizer) { + if(lookfintab(&fintab, p, 0)) { runtime·unlock(&finlock); runtime·throw("double finalizer"); } - *ref |= RefHasFinalizer; + runtime·setblockspecial(p); if(fintab.nkey >= fintab.max/2+fintab.max/4) { // keep table at most 3/4 full: @@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) newtab.max *= 3; } - newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1); + newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1); newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); for(i=0; i<fintab.max; i++) { diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index af1c721e8..232c6cdcd 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -2,28 +2,66 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Garbage collector -- step 0. -// -// Stop the world, mark and sweep garbage collector. -// NOT INTENDED FOR PRODUCTION USE. -// -// A mark and sweep collector provides a way to exercise -// and test the memory allocator and the stack walking machinery -// without also needing to get reference counting -// exactly right. +// Garbage collector. #include "runtime.h" #include "malloc.h" enum { - Debug = 0 + Debug = 0, + UseCas = 1, + PtrSize = sizeof(void*), + + // Four bits per word (see #defines below). + wordsPerBitmapWord = sizeof(void*)*8/4, + bitShift = sizeof(void*)*8/4, }; -typedef struct BlockList BlockList; -struct BlockList +// Bits in per-word bitmap. +// #defines because enum might not be able to hold the values. +// +// Each word in the bitmap describes wordsPerBitmapWord words +// of heap memory. There are 4 bitmap bits dedicated to each heap word, +// so on a 64-bit system there is one bitmap word per 16 heap words. +// The bits in the word are packed together by type first, then by +// heap location, so each 64-bit bitmap word consists of, from top to bottom, +// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, +// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits. +// This layout makes it easier to iterate over the bits of a given type. +// +// The bitmap starts at mheap.arena_start and extends *backward* from +// there. On a 64-bit system the off'th word in the arena is tracked by +// the off/16+1'th word before mheap.arena_start. (On a 32-bit system, +// the only difference is that the divisor is 8.) +// +// To pull out the bits corresponding to a given pointer p, we use: +// +// off = p - (uintptr*)mheap.arena_start; // word offset +// b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; +// shift = off % wordsPerBitmapWord +// bits = *b >> shift; +// /* then test bits & bitAllocated, bits & bitMarked, etc. */ +// +#define bitAllocated ((uintptr)1<<(bitShift*0)) +#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ +#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ +#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ +#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ + +#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) + +static uint64 nlookup; +static uint64 nsizelookup; +static uint64 naddrlookup; +static uint64 nhandoff; +static int32 gctrace; + +typedef struct Workbuf Workbuf; +struct Workbuf { - byte *obj; - uintptr size; + Workbuf *next; + uintptr nw; + byte *w[2048-2]; }; extern byte data[]; @@ -33,72 +71,258 @@ extern byte end[]; static G *fing; static Finalizer *finq; static int32 fingwait; -static BlockList *bl, *ebl; +static uint32 nfullwait; static void runfinq(void); - -enum { - PtrSize = sizeof(void*) -}; - +static bool bitlookup(void*, uintptr**, uintptr*, int32*); +static Workbuf* getempty(Workbuf*); +static Workbuf* getfull(Workbuf*); + +// scanblock scans a block of n bytes starting at pointer b for references +// to other objects, scanning any it finds recursively until there are no +// unscanned objects left. Instead of using an explicit recursion, it keeps +// a work list in the Workbuf* structures and loops in the main function +// body. Keeping an explicit work list is easier on the stack allocator and +// more efficient. static void scanblock(byte *b, int64 n) { - int32 off; - void *obj; - uintptr size; - uint32 *refp, ref; + byte *obj, *arena_start, *p; void **vp; - int64 i; - BlockList *w; - - w = bl; - w->obj = b; - w->size = n; - w++; + uintptr size, *bitp, bits, shift, i, j, x, xbits, off; + MSpan *s; + PageID k; + void **bw, **w, **ew; + Workbuf *wbuf; - while(w > bl) { - w--; - b = w->obj; - n = w->size; + // Memory arena parameters. + arena_start = runtime·mheap.arena_start; + + wbuf = nil; // current work buffer + ew = nil; // end of work buffer + bw = nil; // beginning of work buffer + w = nil; // current pointer into work buffer + + // Align b to a word boundary. + off = (uintptr)b & (PtrSize-1); + if(off != 0) { + b += PtrSize - off; + n -= PtrSize - off; + } + for(;;) { + // Each iteration scans the block b of length n, queueing pointers in + // the work buffer. if(Debug > 1) runtime·printf("scanblock %p %D\n", b, n); - off = (uint32)(uintptr)b & (PtrSize-1); - if(off) { - b += PtrSize - off; - n -= PtrSize - off; - } - + vp = (void**)b; n /= PtrSize; for(i=0; i<n; i++) { - obj = vp[i]; - if(obj == nil) + obj = (byte*)vp[i]; + + // Words outside the arena cannot be pointers. + if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used) continue; - if(runtime·mheap.arena_start <= (byte*)obj && (byte*)obj < runtime·mheap.arena_end) { - if(runtime·mlookup(obj, &obj, &size, nil, &refp)) { - ref = *refp; - switch(ref & ~RefFlags) { - case RefNone: - if(Debug > 1) - runtime·printf("found at %p: ", &vp[i]); - *refp = RefSome | (ref & RefFlags); - if(!(ref & RefNoPointers)) { - if(w >= ebl) - runtime·throw("scanblock: garbage collection stack overflow"); - w->obj = obj; - w->size = size; - w++; - } - break; - } + + // obj may be a pointer to a live object. + // Try to find the beginning of the object. + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Find bits for this word. + off = (uintptr*)obj - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Pointing at the beginning of a block? + if((bits & (bitAllocated|bitBlockBoundary)) != 0) + goto found; + + // Pointing just past the beginning? + // Scan backward a little to find a block boundary. + for(j=shift; j-->0; ) { + if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { + obj = (byte*)obj - (shift-j)*PtrSize; + shift = j; + bits = xbits>>shift; + goto found; } } + + // Otherwise consult span table to find beginning. + // (Manually inlined copy of MHeap_LookupMaybe.) + nlookup++; + naddrlookup++; + k = (uintptr)obj>>PageShift; + x = k; + if(sizeof(void*) == 8) + x -= (uintptr)arena_start>>PageShift; + s = runtime·mheap.map[x]; + if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + continue; + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + } else { + if((byte*)obj >= (byte*)s->limit) + continue; + size = runtime·class_to_size[s->sizeclass]; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + found: + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // If not allocated or already marked, done. + if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0) + continue; + *bitp |= bitMarked<<shift; + + // If object has no pointers, don't need to scan further. + if((bits & bitNoPointers) != 0) + continue; + + // If buffer is full, get a new one. + if(w >= ew) { + wbuf = getempty(wbuf); + bw = wbuf->w; + w = bw; + ew = bw + nelem(wbuf->w); + } + *w++ = obj; + } + + // Done scanning [b, b+n). Prepare for the next iteration of + // the loop by setting b and n to the parameters for the next block. + + // Fetch b from the work buffers. + if(w <= bw) { + // Emptied our buffer: refill. + wbuf = getfull(wbuf); + if(wbuf == nil) + break; + bw = wbuf->w; + ew = wbuf->w + nelem(wbuf->w); + w = bw+wbuf->nw; } + b = *--w; + + // Figure out n = size of b. Start by loading bits for b. + off = (uintptr*)b - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Might be small; look for nearby block boundary. + // A block boundary is marked by either bitBlockBoundary + // or bitAllocated being set (see notes near their definition). + enum { + boundary = bitBlockBoundary|bitAllocated + }; + // Look for a block boundary both after and before b + // in the same bitmap word. + // + // A block boundary j words after b is indicated by + // bits>>j & boundary + // assuming shift+j < bitShift. (If shift+j >= bitShift then + // we'll be bleeding other bit types like bitMarked into our test.) + // Instead of inserting the conditional shift+j < bitShift into the loop, + // we can let j range from 1 to bitShift as long as we first + // apply a mask to keep only the bits corresponding + // to shift+j < bitShift aka j < bitShift-shift. + bits &= (boundary<<(bitShift-shift)) - boundary; + + // A block boundary j words before b is indicated by + // xbits>>(shift-j) & boundary + // (assuming shift >= j). There is no cleverness here + // avoid the test, because when j gets too large the shift + // turns negative, which is undefined in C. + + for(j=1; j<bitShift; j++) { + if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) { + n = j*PtrSize; + goto scan; + } + } + + // Fall back to asking span about size class. + // (Manually inlined copy of MHeap_Lookup.) + nlookup++; + nsizelookup++; + x = (uintptr)b>>PageShift; + if(sizeof(void*) == 8) + x -= (uintptr)arena_start>>PageShift; + s = runtime·mheap.map[x]; + if(s->sizeclass == 0) + n = s->npages<<PageShift; + else + n = runtime·class_to_size[s->sizeclass]; + scan:; + } +} + +static struct { + Workbuf *full; + Workbuf *empty; + byte *chunk; + uintptr nchunk; +} work; + +// Get an empty work buffer off the work.empty list, +// allocating new buffers as needed. +static Workbuf* +getempty(Workbuf *b) +{ + if(b != nil) { + b->nw = nelem(b->w); + b->next = work.full; + work.full = b; + } + b = work.empty; + if(b != nil) { + work.empty = b->next; + return b; + } + + if(work.nchunk < sizeof *b) { + work.nchunk = 1<<20; + work.chunk = runtime·SysAlloc(work.nchunk); + } + b = (Workbuf*)work.chunk; + work.chunk += sizeof *b; + work.nchunk -= sizeof *b; + return b; +} + +// Get a full work buffer off the work.full list, or return nil. +static Workbuf* +getfull(Workbuf *b) +{ + if(b != nil) { + b->nw = 0; + b->next = work.empty; + work.empty = b; } + b = work.full; + if(b != nil) + work.full = b->next; + return b; } +// Scanstack calls scanblock on each of gp's stack segments. static void scanstack(G *gp) { @@ -119,46 +343,26 @@ scanstack(G *gp) } } +// Markfin calls scanblock on the blocks that have finalizers: +// the things pointed at cannot be freed until the finalizers have run. static void markfin(void *v) { uintptr size; - uint32 *refp; size = 0; - refp = nil; - if(!runtime·mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer)) + if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v)) runtime·throw("mark - finalizer inconsistency"); - + // do not mark the finalizer block itself. just mark the things it points at. scanblock(v, size); } +// Mark static void mark(void) { G *gp; - uintptr blsize, nobj; - - // Figure out how big an object stack we need. - // Get a new one if we need more than we have - // or we need significantly less than we have. - nobj = mstats.heap_objects; - if(nobj > ebl - bl || nobj < (ebl-bl)/4) { - if(bl != nil) - runtime·SysFree(bl, (byte*)ebl - (byte*)bl); - - // While we're allocated a new object stack, - // add 20% headroom and also round up to - // the nearest page boundary, since mmap - // will anyway. - nobj = nobj * 12/10; - blsize = nobj * sizeof *bl; - blsize = (blsize + 4095) & ~4095; - nobj = blsize / sizeof *bl; - bl = runtime·SysAlloc(blsize); - ebl = bl + nobj; - } // mark data+bss. // skip runtime·mheap itself, which has no interesting pointers @@ -192,97 +396,85 @@ mark(void) runtime·walkfintab(markfin); } -// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome +// Sweep frees or calls finalizers for blocks not marked in the mark phase. +// It clears the mark bits in preparation for the next GC round. static void -sweepspan(MSpan *s) +sweep(void) { - int32 n, npages, size; + MSpan *s; + int32 cl, n, npages; + uintptr size; byte *p; - uint32 ref, *gcrefp, *gcrefep; MCache *c; Finalizer *f; - p = (byte*)(s->start << PageShift); - if(s->sizeclass == 0) { - // Large block. - ref = s->gcref0; - switch(ref & ~(RefFlags^RefHasFinalizer)) { - case RefNone: - // Free large object. - mstats.alloc -= s->npages<<PageShift; - mstats.nfree++; - runtime·memclr(p, s->npages<<PageShift); - if(ref & RefProfiled) - runtime·MProf_Free(p, s->npages<<PageShift); - s->gcref0 = RefFree; - runtime·MHeap_Free(&runtime·mheap, s, 1); - break; - case RefNone|RefHasFinalizer: - f = runtime·getfinalizer(p, 1); - if(f == nil) - runtime·throw("finalizer inconsistency"); - f->arg = p; - f->next = finq; - finq = f; - ref &= ~RefHasFinalizer; - // fall through - case RefSome: - case RefSome|RefHasFinalizer: - s->gcref0 = RefNone | (ref&RefFlags); - break; + for(s = runtime·mheap.allspans; s != nil; s = s->allnext) { + if(s->state != MSpanInUse) + continue; + + p = (byte*)(s->start << PageShift); + cl = s->sizeclass; + if(cl == 0) { + size = s->npages<<PageShift; + n = 1; + } else { + // Chunk full of small blocks. + size = runtime·class_to_size[cl]; + npages = runtime·class_to_allocnpages[cl]; + n = (npages << PageShift) / size; } - return; - } + + // sweep through n objects of given size starting at p. + for(; n > 0; n--, p += size) { + uintptr off, *bitp, shift, bits; - // Chunk full of small blocks. - runtime·MGetSizeClassInfo(s->sizeclass, &size, &npages, &n); - gcrefp = s->gcref; - gcrefep = s->gcref + n; - for(; gcrefp < gcrefep; gcrefp++, p += size) { - ref = *gcrefp; - if(ref < RefNone) // RefFree or RefStack - continue; - switch(ref & ~(RefFlags^RefHasFinalizer)) { - case RefNone: - // Free small object. - if(ref & RefProfiled) + off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; + bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + bits = *bitp>>shift; + + if((bits & bitAllocated) == 0) + continue; + + if((bits & bitMarked) != 0) { + *bitp &= ~(bitMarked<<shift); + continue; + } + + if((bits & bitSpecial) != 0) { + // Special means it has a finalizer or is being profiled. + f = runtime·getfinalizer(p, 1); + if(f != nil) { + f->arg = p; + f->next = finq; + finq = f; + continue; + } runtime·MProf_Free(p, size); - *gcrefp = RefFree; - c = m->mcache; - if(size > sizeof(uintptr)) - ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + } + + // Mark freed; restore block boundary bit. + *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + + if(s->sizeclass == 0) { + // Free large span. + runtime·unmarkspan(p, 1<<PageShift); + *(uintptr*)p = 1; // needs zeroing + runtime·MHeap_Free(&runtime·mheap, s, 1); + } else { + // Free small object. + c = m->mcache; + if(size > sizeof(uintptr)) + ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + mstats.by_size[s->sizeclass].nfree++; + runtime·MCache_Free(c, p, s->sizeclass, size); + } mstats.alloc -= size; mstats.nfree++; - mstats.by_size[s->sizeclass].nfree++; - runtime·MCache_Free(c, p, s->sizeclass, size); - break; - case RefNone|RefHasFinalizer: - f = runtime·getfinalizer(p, 1); - if(f == nil) - runtime·throw("finalizer inconsistency"); - f->arg = p; - f->next = finq; - finq = f; - ref &= ~RefHasFinalizer; - // fall through - case RefSome: - case RefSome|RefHasFinalizer: - *gcrefp = RefNone | (ref&RefFlags); - break; } } } -static void -sweep(void) -{ - MSpan *s; - - for(s = runtime·mheap.allspans; s != nil; s = s->allnext) - if(s->state == MSpanInUse) - sweepspan(s); -} - // Semaphore, not Lock, so that the goroutine // reschedules when there is contention rather // than spinning. @@ -326,7 +518,8 @@ cachestats(void) void runtime·gc(int32 force) { - int64 t0, t1; + int64 t0, t1, t2, t3; + uint64 heap0, heap1, obj0, obj1; byte *p; Finalizer *fp; @@ -349,23 +542,41 @@ runtime·gc(int32 force) gcpercent = -1; else gcpercent = runtime·atoi(p); + + p = runtime·getenv("GOGCTRACE"); + if(p != nil) + gctrace = runtime·atoi(p); } if(gcpercent < 0) return; runtime·semacquire(&gcsema); + if(!force && mstats.heap_alloc < mstats.next_gc) { + runtime·semrelease(&gcsema); + return; + } + t0 = runtime·nanotime(); + nlookup = 0; + nsizelookup = 0; + naddrlookup = 0; + m->gcing = 1; runtime·stoptheworld(); if(runtime·mheap.Lock.key != 0) runtime·throw("runtime·mheap locked during gc"); - if(force || mstats.heap_alloc >= mstats.next_gc) { - cachestats(); - mark(); - sweep(); - stealcache(); - mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; - } + + cachestats(); + heap0 = mstats.heap_alloc; + obj0 = mstats.nmalloc - mstats.nfree; + + mark(); + t1 = runtime·nanotime(); + sweep(); + t2 = runtime·nanotime(); + stealcache(); + + mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; m->gcing = 0; m->locks++; // disable gc during the mallocs in newproc @@ -381,18 +592,34 @@ runtime·gc(int32 force) } m->locks--; - t1 = runtime·nanotime(); + cachestats(); + heap1 = mstats.heap_alloc; + obj1 = mstats.nmalloc - mstats.nfree; + + t3 = runtime·nanotime(); + mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; + mstats.pause_total_ns += t3 - t0; mstats.numgc++; - mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0; - mstats.pause_total_ns += t1 - t0; if(mstats.debuggc) - runtime·printf("pause %D\n", t1-t0); + runtime·printf("pause %D\n", t3-t0); + + if(gctrace) { + runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n", + mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, + heap0>>20, heap1>>20, obj0, obj1, + mstats.nmalloc, mstats.nfree, + nlookup, nsizelookup, naddrlookup); + } + runtime·semrelease(&gcsema); runtime·starttheworld(); // give the queued finalizers, if any, a chance to run if(fp != nil) runtime·gosched(); + + if(gctrace > 1 && !force) + runtime·gc(1); } static void @@ -430,3 +657,157 @@ runfinq(void) runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible } } + +// mark the block at v of size n as allocated. +// If noptr is true, mark it as having no pointers. +void +runtime·markallocated(void *v, uintptr n, bool noptr) +{ + uintptr *b, bits, off, shift; + + if(0) + runtime·printf("markallocated %p+%p\n", v, n); + + if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + runtime·throw("markallocated: bad pointer"); + + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + bits = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift); + if(noptr) + bits |= bitNoPointers<<shift; + *b = bits; +} + +// mark the block at v of size n as freed. +void +runtime·markfreed(void *v, uintptr n) +{ + uintptr *b, off, shift; + + if(0) + runtime·printf("markallocated %p+%p\n", v, n); + + if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + runtime·throw("markallocated: bad pointer"); + + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); +} + +// check that the block at v of size n is marked freed. +void +runtime·checkfreed(void *v, uintptr n) +{ + uintptr *b, bits, off, shift; + + if(!runtime·checking) + return; + + if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + return; // not allocated, so okay + + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + bits = *b>>shift; + if((bits & bitAllocated) != 0) { + runtime·printf("checkfreed %p+%p: off=%p have=%p\n", + v, n, off, bits & bitMask); + runtime·throw("checkfreed: not freed"); + } +} + +// mark the span of memory at v as having n blocks of the given size. +// if leftover is true, there is left over space at the end of the span. +void +runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) +{ + uintptr *b, off, shift; + byte *p; + + if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + runtime·throw("markspan: bad pointer"); + + p = v; + if(leftover) // mark a boundary just past end of last block too + n++; + for(; n-- > 0; p += size) { + off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + } +} + +// unmark the span of memory at v of length n bytes. +void +runtime·unmarkspan(void *v, uintptr n) +{ + uintptr *p, *b, off; + + if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) + runtime·throw("markspan: bad pointer"); + + p = v; + off = p - (uintptr*)runtime·mheap.arena_start; // word offset + if(off % wordsPerBitmapWord != 0) + runtime·throw("markspan: unaligned pointer"); + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + n /= PtrSize; + if(n%wordsPerBitmapWord != 0) + runtime·throw("unmarkspan: unaligned length"); + n /= wordsPerBitmapWord; + while(n-- > 0) + *b-- = 0; +} + +bool +runtime·blockspecial(void *v) +{ + uintptr *b, off, shift; + + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + return (*b & (bitSpecial<<shift)) != 0; +} + +void +runtime·setblockspecial(void *v) +{ + uintptr *b, off, shift; + + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + *b |= bitSpecial<<shift; +} + +void +runtime·MHeap_MapBits(MHeap *h) +{ + // Caller has added extra mappings to the arena. + // Add extra mappings of bitmap words as needed. + // We allocate extra bitmap pieces in chunks of bitmapChunk. + enum { + bitmapChunk = 8192 + }; + uintptr n; + + n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; + n = (n+bitmapChunk-1) & ~(bitmapChunk-1); + if(h->bitmap_mapped >= n) + return; + + runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); + h->bitmap_mapped = n; +} diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index 0c9ac0a09..8061b7cf8 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c @@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage) // Allocate a multiple of 64kB (16 pages). npage = (npage+15)&~15; ask = npage<<PageShift; - if(ask < HeapAllocChunk) + if(ask > h->arena_end - h->arena_used) + return false; + if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used) ask = HeapAllocChunk; v = runtime·MHeap_SysAlloc(h, ask); @@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage) } mstats.heap_sys += ask; - if((byte*)v < h->arena_start || h->arena_start == nil) - h->arena_start = v; - if((byte*)v+ask > h->arena_end) - h->arena_end = (byte*)v+ask; - // Create a fake "in use" span and free it, so that the // right coalescing happens. s = runtime·FixAlloc_Alloc(&h->spanalloc); @@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list) void runtime·MSpanList_Insert(MSpan *list, MSpan *span) { - if(span->next != nil || span->prev != nil) + if(span->next != nil || span->prev != nil) { + runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev); runtime·throw("MSpanList_Insert"); + } span->next = list->next; span->prev = list; span->next->prev = span; span->prev->next = span; } + + diff --git a/src/pkg/runtime/mkasmh.sh b/src/pkg/runtime/mkasmh.sh index 3ed5f74c9..91d1bbe5d 100755 --- a/src/pkg/runtime/mkasmh.sh +++ b/src/pkg/runtime/mkasmh.sh @@ -25,9 +25,9 @@ case "$GOARCH" in echo '#define m(r) 4(r)' ;; plan9) - echo '#define get_tls(r)' - echo '#define g(r) 0xdfffefc0' - echo '#define m(r) 0xdfffefc4' + echo '#define get_tls(r) MOVL _tos(SB), r ' + echo '#define g(r) -8(r)' + echo '#define m(r) -4(r)' ;; linux) # On Linux systems, what we call 0(GS) and 4(GS) for g and m @@ -84,6 +84,7 @@ esac echo awk ' +{ gsub(/\r/, ""); } /^aggr G$/ { aggr="g" } /^aggr M$/ { aggr = "m" } /^aggr Gobuf$/ { aggr = "gobuf" } diff --git a/src/pkg/runtime/mkversion.c b/src/pkg/runtime/mkversion.c index 9790d3f09..56afa1892 100644 --- a/src/pkg/runtime/mkversion.c +++ b/src/pkg/runtime/mkversion.c @@ -5,13 +5,11 @@ char *template = "// generated by mkversion.c; do not edit.\n" "package runtime\n" "const defaultGoroot = \"%s\"\n" - "const theVersion = \"%s\"\n" - "const theGoarch = \"%s\"\n" - "const theGoos = \"%s\"\n"; + "const theVersion = \"%s\"\n"; void main(void) { - print(template, getgoroot(), getgoversion(), getgoarch(), getgoos()); + print(template, getgoroot(), getgoversion()); exits(0); } diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc index f4581e98d..aae3d183f 100644 --- a/src/pkg/runtime/mprof.goc +++ b/src/pkg/runtime/mprof.goc @@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk) runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) return b; - b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1); + b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); b->hash = h; @@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b) if(ah->addr == (addr>>20)) goto found; - ah = runtime·mallocgc(sizeof *ah, RefNoProfiling, 0, 1); + ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); addrmem += sizeof *ah; ah->next = addrhash[h]; ah->addr = addr>>20; @@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b) found: if((e = addrfree) == nil) { - e = runtime·mallocgc(64*sizeof *e, RefNoProfiling, 0, 0); + e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; diff --git a/src/pkg/runtime/msize.c b/src/pkg/runtime/msize.c index ec85eb373..770ef38ce 100644 --- a/src/pkg/runtime/msize.c +++ b/src/pkg/runtime/msize.c @@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size) void runtime·InitSizes(void) { - int32 align, sizeclass, size, osize, nextsize, n; + int32 align, sizeclass, size, nextsize, n; uint32 i; uintptr allocsize, npages; @@ -81,8 +81,7 @@ runtime·InitSizes(void) // the leftover is less than 1/8 of the total, // so wasted space is at most 12.5%. allocsize = PageSize; - osize = size + RefcountOverhead; - while(allocsize%osize > (allocsize/8)) + while(allocsize%size > allocsize/8) allocsize += PageSize; npages = allocsize >> PageShift; @@ -93,7 +92,7 @@ runtime·InitSizes(void) // different sizes. if(sizeclass > 1 && npages == runtime·class_to_allocnpages[sizeclass-1] - && allocsize/osize == allocsize/(runtime·class_to_size[sizeclass-1]+RefcountOverhead)) { + && allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) { runtime·class_to_size[sizeclass-1] = size; continue; } diff --git a/src/pkg/runtime/plan9/386/defs.h b/src/pkg/runtime/plan9/386/defs.h index 5df757613..58fd9d94d 100644 --- a/src/pkg/runtime/plan9/386/defs.h +++ b/src/pkg/runtime/plan9/386/defs.h @@ -1 +1,2 @@ // nothing to see here +#define tos_pid 48 diff --git a/src/pkg/runtime/plan9/386/sys.s b/src/pkg/runtime/plan9/386/sys.s index 867b8940f..f760b782f 100644 --- a/src/pkg/runtime/plan9/386/sys.s +++ b/src/pkg/runtime/plan9/386/sys.s @@ -58,9 +58,10 @@ TEXT runtime·rfork(SB),7,$0 MOVL BX, m(AX) // Initialize AX from _tos->pid - MOVL 0xdfffeff8, AX + MOVL _tos(SB), AX + MOVL tos_pid(AX), AX MOVL AX, m_procid(BX) // save pid as m->procid - + CALL runtime·stackcheck(SB) // smashes AX, CX MOVL 0(DX), DX // paranoia; check they are not nil diff --git a/src/pkg/runtime/plan9/mem.c b/src/pkg/runtime/plan9/mem.c index 651e6728e..b840de984 100644 --- a/src/pkg/runtime/plan9/mem.c +++ b/src/pkg/runtime/plan9/mem.c @@ -10,40 +10,47 @@ static byte *bloc = { end }; enum { - Round = 7 + Round = 4095 }; void* -runtime·SysAlloc(uintptr ask) +runtime·SysAlloc(uintptr nbytes) { uintptr bl; // Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c bl = ((uintptr)bloc + Round) & ~Round; - if(runtime·brk_((void*)(bl + ask)) < 0) + if(runtime·brk_((void*)(bl + nbytes)) < 0) return (void*)-1; - bloc = (byte*)bl + ask; + bloc = (byte*)bl + nbytes; return (void*)bl; } void -runtime·SysFree(void *v, uintptr n) +runtime·SysFree(void *v, uintptr nbytes) { // from tiny/mem.c // Push pointer back if this is a free // of the most recent SysAlloc. - n += (n + Round) & ~Round; - if(bloc == (byte*)v+n) - bloc -= n; + nbytes += (nbytes + Round) & ~Round; + if(bloc == (byte*)v+nbytes) + bloc -= nbytes; } void -runtime·SysUnused(void *v, uintptr n) +runtime·SysUnused(void *v, uintptr nbytes) { - USED(v, n); + USED(v, nbytes); } void -runtime·SysMemInit(void) +runtime·SysMap(void *v, uintptr nbytes) { + USED(v, nbytes); +} + +void* +runtime·SysReserve(void *v, uintptr nbytes) +{ + return runtime·SysAlloc(nbytes); } diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 998cbc7bc..26c1f13a4 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -752,8 +752,8 @@ runtime·newstack(void) free = framesize; } -//printf("newstack frame=%d args=%d morepc=%p morefp=%p gobuf=%p, %p newstk=%p\n", -//frame, args, m->morepc, m->morefp, g->sched.pc, g->sched.sp, stk); +//runtime·printf("newstack framesize=%d argsize=%d morepc=%p moreargp=%p gobuf=%p, %p top=%p old=%p\n", +//framesize, argsize, m->morepc, m->moreargp, m->morebuf.pc, m->morebuf.sp, top, g1->stackbase); top->stackbase = g1->stackbase; top->stackguard = g1->stackguard; @@ -761,7 +761,7 @@ runtime·newstack(void) top->argp = m->moreargp; top->argsize = argsize; top->free = free; - + // copy flag from panic top->panic = g1->ispanic; g1->ispanic = false; diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c index 284b1e458..e3a20d48a 100644 --- a/src/pkg/runtime/runtime.c +++ b/src/pkg/runtime/runtime.c @@ -528,14 +528,22 @@ void runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool retbool) { Func *f; + uintptr pc; - if(runtime·callers(1+skip, &retpc, 1) == 0 || (f = runtime·findfunc(retpc-1)) == nil) { + if(runtime·callers(1+skip, &retpc, 1) == 0) { retfile = runtime·emptystring; retline = 0; retbool = false; + } else if((f = runtime·findfunc(retpc)) == nil) { + retfile = runtime·emptystring; + retline = 0; + retbool = true; // have retpc at least } else { retfile = f->src; - retline = runtime·funcline(f, retpc-1); + pc = retpc; + if(pc > f->entry) + pc--; + retline = runtime·funcline(f, pc); retbool = true; } FLUSH(&retfile); diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 2c19f851e..cea07e4a7 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -419,7 +419,7 @@ void runtime·signalstack(byte*, int32); G* runtime·malg(int32); void runtime·minit(void); Func* runtime·findfunc(uintptr); -int32 runtime·funcline(Func*, uint64); +int32 runtime·funcline(Func*, uintptr); void* runtime·stackalloc(uint32); void runtime·stackfree(void*, uintptr); MCache* runtime·allocmcache(void); @@ -443,7 +443,7 @@ void runtime·breakpoint(void); void runtime·gosched(void); void runtime·goexit(void); void runtime·runcgo(void (*fn)(void*), void*); -uintptr runtime·runcgocallback(G*, void*, void (*fn)()); +void runtime·runcgocallback(G*, void*, void (*fn)()); void runtime·entersyscall(void); void runtime·exitsyscall(void); void runtime·startcgocallback(G*); diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index 051075479..1fee923e4 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret) ret->cap = cap; if((t->elem->kind&KindNoPointers)) - ret->array = runtime·mallocgc(size, RefNoPointers, 1, 1); + ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1); else ret->array = runtime·mal(size); } diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc index 916559eb2..b72aa937c 100644 --- a/src/pkg/runtime/string.goc +++ b/src/pkg/runtime/string.goc @@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) { } func stringtoslicebyte(s String) (b Slice) { - b.array = runtime·mallocgc(s.len, RefNoPointers, 1, 1); + b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1); b.len = s.len; b.cap = s.len; runtime·mcpy(b.array, s.str, s.len); @@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) { n++; } - b.array = runtime·mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1); + b.array = runtime·mallocgc(n*sizeof(r[0]), FlagNoPointers, 1, 1); b.len = n; b.cap = n; p = s.str; diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index b2cccd3cf..6f0eea0e7 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -258,28 +258,49 @@ splitpcln(void) ef = func + nfunc; pc = func[0].entry; // text base f->pcln.array = p; - f->pc0 = pc - pcquant; + f->pc0 = pc; line = 0; - for(; p < ep; p++) { - if(f < ef && pc > (f+1)->entry) { + for(;;) { + while(p < ep && *p > 128) + pc += pcquant * (*p++ - 128); + // runtime·printf("pc<%p targetpc=%p line=%d\n", pc, targetpc, line); + if(*p == 0) { + if(p+5 > ep) + break; + // 4 byte add to line + line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; + p += 5; + } else if(*p <= 64) + line += *p++; + else + line -= *p++ - 64; + + // pc, line now match. + // Because the state machine begins at pc==entry and line==0, + // it can happen - just at the beginning! - that the update may + // have updated line but left pc alone, to tell us the true line + // number for pc==entry. In that case, update f->ln0. + // Having the correct initial line number is important for choosing + // the correct file in dosrcline above. + if(f == func && pc == f->pc0) { + f->pcln.array = p; + f->pc0 = pc + pcquant; + f->ln0 = line; + } + + if(f < ef && pc >= (f+1)->entry) { f->pcln.len = p - f->pcln.array; f->pcln.cap = f->pcln.len; f++; f->pcln.array = p; - f->pc0 = pc; + // pc0 and ln0 are the starting values for + // the loop over f->pcln, so pc must be + // adjusted by the same pcquant update + // that we're going to do as we continue our loop. + f->pc0 = pc + pcquant; f->ln0 = line; } - if(*p == 0) { - // 4 byte add to line - line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; - p += 4; - } else if(*p <= 64) { - line += *p; - } else if(*p <= 128) { - line -= *p - 64; - } else { - pc += pcquant*(*p - 129); - } + pc += pcquant; } if(f < ef) { @@ -293,13 +314,17 @@ splitpcln(void) // (Source file is f->src.) // NOTE(rsc): If you edit this function, also edit extern.go:/FileLine int32 -runtime·funcline(Func *f, uint64 targetpc) +runtime·funcline(Func *f, uintptr targetpc) { byte *p, *ep; uintptr pc; int32 line; int32 pcquant; + enum { + debug = 0 + }; + switch(thechar) { case '5': pcquant = 4; @@ -313,17 +338,41 @@ runtime·funcline(Func *f, uint64 targetpc) ep = p + f->pcln.len; pc = f->pc0; line = f->ln0; - for(; p < ep && pc <= targetpc; p++) { + if(debug && !runtime·panicking) + runtime·printf("funcline start pc=%p targetpc=%p line=%d tab=%p+%d\n", + pc, targetpc, line, p, (int32)f->pcln.len); + for(;;) { + // Table is a sequence of updates. + + // Each update says first how to adjust the pc, + // in possibly multiple instructions... + while(p < ep && *p > 128) + pc += pcquant * (*p++ - 128); + + if(debug && !runtime·panicking) + runtime·printf("pc<%p targetpc=%p line=%d\n", pc, targetpc, line); + + // If the pc has advanced too far or we're out of data, + // stop and the last known line number. + if(pc > targetpc || p >= ep) + break; + + // ... and then how to adjust the line number, + // in a single instruction. if(*p == 0) { + if(p+5 > ep) + break; line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; - p += 4; - } else if(*p <= 64) { - line += *p; - } else if(*p <= 128) { - line -= *p - 64; - } else { - pc += pcquant*(*p - 129); - } + p += 5; + } else if(*p <= 64) + line += *p++; + else + line -= *p++ - 64; + // Now pc, line pair is consistent. + if(debug && !runtime·panicking) + runtime·printf("pc=%p targetpc=%p line=%d\n", pc, targetpc, line); + + // PC increments implicitly on each iteration. pc += pcquant; } return line; diff --git a/src/pkg/runtime/windows/386/defs.h b/src/pkg/runtime/windows/386/defs.h index a2a882103..49fc19504 100644 --- a/src/pkg/runtime/windows/386/defs.h +++ b/src/pkg/runtime/windows/386/defs.h @@ -10,6 +10,9 @@ enum { PROT_EXEC = 0x4, MAP_ANON = 0x1, MAP_PRIVATE = 0x2, + SIGINT = 0x2, + CTRL_C_EVENT = 0, + CTRL_BREAK_EVENT = 0x1, EXCEPTION_ACCESS_VIOLATION = 0xc0000005, EXCEPTION_BREAKPOINT = 0x80000003, EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d, diff --git a/src/pkg/runtime/windows/386/signal.c b/src/pkg/runtime/windows/386/signal.c index 69178cdd0..903636910 100644 --- a/src/pkg/runtime/windows/386/signal.c +++ b/src/pkg/runtime/windows/386/signal.c @@ -27,12 +27,7 @@ runtime·dumpregs(Context *r) void runtime·initsig(int32) { -} - -String -runtime·signame(int32) -{ - return runtime·emptystring; + runtime·siginit(); } uint32 diff --git a/src/pkg/runtime/windows/386/sys.s b/src/pkg/runtime/windows/386/sys.s index d1a8a49a9..bca48febe 100644 --- a/src/pkg/runtime/windows/386/sys.s +++ b/src/pkg/runtime/windows/386/sys.s @@ -99,6 +99,45 @@ TEXT runtime·sigtramp1(SB),0,$16-28 sigdone: RET +// Windows runs the ctrl handler in a new thread. +TEXT runtime·ctrlhandler(SB),7,$0 + PUSHL BP + MOVL SP, BP + PUSHL BX + PUSHL SI + PUSHL DI + PUSHL 0x2c(FS) + MOVL SP, BX + + // setup dummy m, g + SUBL $(m_sehframe+4), SP // at least space for m_sehframe + LEAL m_tls(SP), CX + MOVL CX, 0x2c(FS) + MOVL SP, m(CX) + MOVL SP, DX + SUBL $8, SP // space for g_stack{guard,base} + MOVL SP, g(CX) + MOVL SP, m_g0(DX) + LEAL -4096(SP), CX + MOVL CX, g_stackguard(SP) + MOVL BX, g_stackbase(SP) + + PUSHL 8(BP) + CALL runtime·ctrlhandler1(SB) + POPL CX + + get_tls(CX) + MOVL g(CX), CX + MOVL g_stackbase(CX), SP + POPL 0x2c(FS) + POPL DI + POPL SI + POPL BX + POPL BP + MOVL 0(SP), CX + ADDL $8, SP + JMP CX + // Called from dynamic function created by ../thread.c compilecallback, // running on Windows stack (not Go stack). // BX, BP, SI, DI registers and DF flag are preserved @@ -107,7 +146,11 @@ sigdone: // DX = total size of arguments // TEXT runtime·callbackasm+0(SB),7,$0 + // preserve whatever's at the memory location that + // the callback will use to store the return value LEAL 8(SP), CX + PUSHL 0(CX)(DX*1) + ADDL $4, DX // extend argsize by size of return value // save registers as required for windows callback PUSHL 0(FS) @@ -129,7 +172,7 @@ TEXT runtime·callbackasm+0(SB),7,$0 CALL runtime·cgocallback(SB) // restore registers as required for windows callback - POPL CX + POPL AX POPL CX POPL DX POPL BX @@ -139,6 +182,8 @@ TEXT runtime·callbackasm+0(SB),7,$0 POPL 0(FS) CLD + MOVL -4(CX)(DX*1), AX + POPL -4(CX)(DX*1) RET // void tstart(M *newm); diff --git a/src/pkg/runtime/windows/defs.c b/src/pkg/runtime/windows/defs.c index 5aac03c81..3b2824940 100644 --- a/src/pkg/runtime/windows/defs.c +++ b/src/pkg/runtime/windows/defs.c @@ -2,9 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +#include <signal.h> #include <stdarg.h> #include <windef.h> #include <winbase.h> +#include <wincon.h> enum { $PROT_NONE = 0, @@ -15,6 +17,10 @@ enum { $MAP_ANON = 1, $MAP_PRIVATE = 2, + $SIGINT = SIGINT, + $CTRL_C_EVENT = CTRL_C_EVENT, + $CTRL_BREAK_EVENT = CTRL_BREAK_EVENT, + $EXCEPTION_ACCESS_VIOLATION = STATUS_ACCESS_VIOLATION, $EXCEPTION_BREAKPOINT = STATUS_BREAKPOINT, $EXCEPTION_FLT_DENORMAL_OPERAND = STATUS_FLOAT_DENORMAL_OPERAND, diff --git a/src/pkg/runtime/windows/mem.c b/src/pkg/runtime/windows/mem.c index 19d11ce8d..54d77da37 100644 --- a/src/pkg/runtime/windows/mem.c +++ b/src/pkg/runtime/windows/mem.c @@ -48,7 +48,14 @@ runtime·SysFree(void *v, uintptr n) void* runtime·SysReserve(void *v, uintptr n) { - return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, 0); + // v is just a hint. + // First try at v. + v = runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE); + if(v != nil) + return v; + + // Next let the kernel choose the address. + return runtime·stdcall(runtime·VirtualAlloc, 4, nil, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE); } void diff --git a/src/pkg/runtime/windows/os.h b/src/pkg/runtime/windows/os.h index 391eace5a..77881e86e 100644 --- a/src/pkg/runtime/windows/os.h +++ b/src/pkg/runtime/windows/os.h @@ -20,6 +20,7 @@ uint32 runtime·tstart_stdcall(M *newm); uint32 runtime·issigpanic(uint32); void runtime·sigpanic(void); +uint32 runtime·ctrlhandler(uint32 type); // Windows dll function to go callback entry. byte *runtime·compilecallback(Eface fn, bool cleanstack); diff --git a/src/pkg/runtime/windows/thread.c b/src/pkg/runtime/windows/thread.c index 278a5da69..aedd24200 100644 --- a/src/pkg/runtime/windows/thread.c +++ b/src/pkg/runtime/windows/thread.c @@ -18,6 +18,7 @@ #pragma dynimport runtime·LoadLibraryEx LoadLibraryExA "kernel32.dll" #pragma dynimport runtime·QueryPerformanceCounter QueryPerformanceCounter "kernel32.dll" #pragma dynimport runtime·QueryPerformanceFrequency QueryPerformanceFrequency "kernel32.dll" +#pragma dynimport runtime·SetConsoleCtrlHandler SetConsoleCtrlHandler "kernel32.dll" #pragma dynimport runtime·SetEvent SetEvent "kernel32.dll" #pragma dynimport runtime·WaitForSingleObject WaitForSingleObject "kernel32.dll" #pragma dynimport runtime·WriteFile WriteFile "kernel32.dll" @@ -33,6 +34,7 @@ extern void *runtime·GetStdHandle; extern void *runtime·LoadLibraryEx; extern void *runtime·QueryPerformanceCounter; extern void *runtime·QueryPerformanceFrequency; +extern void *runtime·SetConsoleCtrlHandler; extern void *runtime·SetEvent; extern void *runtime·WaitForSingleObject; extern void *runtime·WriteFile; @@ -43,6 +45,7 @@ void runtime·osinit(void) { runtime·stdcall(runtime·QueryPerformanceFrequency, 1, &timerfreq); + runtime·stdcall(runtime·SetConsoleCtrlHandler, 2, runtime·ctrlhandler, 1); } void @@ -161,6 +164,7 @@ runtime·destroylock(Lock *l) void runtime·noteclear(Note *n) { + n->lock.key = 0; // memset(n, 0, sizeof *n) eventlock(&n->lock); } @@ -180,11 +184,17 @@ runtime·notesleep(Note *n) void runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) { + void *thandle; + USED(stk); USED(g); // assuming g = m->g0 USED(fn); // assuming fn = mstart - runtime·stdcall(runtime·CreateThread, 6, 0, 0, runtime·tstart_stdcall, m, 0, 0); + thandle = runtime·stdcall(runtime·CreateThread, 6, 0, 0, runtime·tstart_stdcall, m, 0, 0); + if(thandle == 0) { + runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror()); + runtime·throw("runtime.newosproc"); + } } // Called to initialize a new m (including the bootstrap m). @@ -279,6 +289,41 @@ runtime·sigpanic(void) runtime·throw("fault"); } +String +runtime·signame(int32 sig) +{ + int8 *s; + + switch(sig) { + case SIGINT: + s = "SIGINT: interrupt"; + break; + default: + return runtime·emptystring; + } + return runtime·gostringnocopy((byte*)s); +} + +uint32 +runtime·ctrlhandler1(uint32 type) +{ + int32 s; + + switch(type) { + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + s = SIGINT; + break; + default: + return 0; + } + + if(runtime·sigsend(s)) + return 1; + runtime·exit(2); // SIGINT, SIGTERM, etc + return 0; +} + // Call back from windows dll into go. byte * runtime·compilecallback(Eface fn, bool cleanstack) |