diff options
Diffstat (limited to 'src/pkg/runtime')
38 files changed, 1209 insertions, 452 deletions
| diff --git a/src/pkg/runtime/386/asm.s b/src/pkg/runtime/386/asm.s index 63d582606..74e1df0da 100644 --- a/src/pkg/runtime/386/asm.s +++ b/src/pkg/runtime/386/asm.s @@ -5,6 +5,14 @@  #include "386/asm.h"  TEXT _rt0_386(SB),7,$0 +	// Linux, Windows start the FPU in extended double precision. +	// Other operating systems use double precision. +	// Change to double precision to match them, +	// and to match other hardware that only has double. +	PUSHL $0x27F +	FLDCW	0(SP) +	POPL AX +  	// copy arguments forward on an even stack  	MOVL	0(SP), AX		// argc  	LEAL	4(SP), BX		// argv diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile index e9488cfb5..521c095b9 100644 --- a/src/pkg/runtime/Makefile +++ b/src/pkg/runtime/Makefile @@ -26,8 +26,12 @@ GOFILES=\  	softfloat64.go\  	type.go\  	version.go\ +	version_$(GOOS).go\ +	version_$(GOARCH).go\  	runtime_defs.go\ +CLEANFILES+=version.go version_*.go +  OFILES_windows=\  	syscall.$O\ @@ -107,7 +111,7 @@ include ../../Make.pkg  $(pkgdir)/%.h: %.h  	@test -d $(QUOTED_GOROOT)/pkg && mkdir -p $(pkgdir) -	cp $< $@ +	cp $< "$@"  clean: clean-local @@ -127,8 +131,14 @@ mkversion: mkversion.c  version.go: mkversion  	./mkversion >version.go +version_$(GOARCH).go: +	(echo 'package runtime'; echo 'const theGoarch = "$(GOARCH)"') >$@ + +version_$(GOOS).go: +	(echo 'package runtime'; echo 'const theGoos = "$(GOOS)"') >$@ +  %.c:	%.goc goc2c -	./goc2c `pwd`/$< > $@.tmp +	./goc2c "`pwd`/$<" > $@.tmp  	mv -f $@.tmp $@  %.$O:	$(GOARCH)/%.c diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c index 86e96f348..d3aae0db9 100644 --- a/src/pkg/runtime/amd64/traceback.c +++ b/src/pkg/runtime/amd64/traceback.c @@ -8,6 +8,8 @@  static uintptr isclosureentry(uintptr);  void runtime·deferproc(void);  void runtime·newproc(void); +void runtime·newstack(void); +void runtime·morestack(void);  // This code is also used for the 386 tracebacks.  // Use uintptr for an appropriate word-sized integer. @@ -17,15 +19,32 @@ void runtime·newproc(void);  // A little clunky to merge the two but avoids duplicating  // the code and all its subtlety.  static int32 -gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m) +gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 max)  {  	byte *p; -	int32 i, n, iter, nascent; -	uintptr pc, tracepc, *fp; +	int32 i, n, iter, sawnewstack; +	uintptr pc, lr, tracepc; +	byte *fp;  	Stktop *stk;  	Func *f; -	 +  	pc = (uintptr)pc0; +	lr = 0; +	fp = nil; +	 +	// If the PC is goexit, the goroutine hasn't started yet. +	if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { +		fp = sp; +		lr = pc; +		pc = (uintptr)g->entry; +	} +	 +	// If the PC is zero, it's likely a nil function call. +	// Start in the caller's frame. +	if(pc == 0) { +		pc = lr; +		lr = 0; +	}  	// If the PC is zero, it's likely a nil function call.  	// Start in the caller's frame. @@ -33,26 +52,29 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m)  		pc = *(uintptr*)sp;  		sp += sizeof(uintptr);  	} -	 -	nascent = 0; -	if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) { -		// Hasn't started yet.  g->sched is set up for goexit -		// but goroutine will start at g->entry. -		nascent = 1; -		pc = (uintptr)g->entry; -	} -	 +  	n = 0; +	sawnewstack = 0;  	stk = (Stktop*)g->stackbase; -	for(iter = 0; iter < 100 && n < m; iter++) {	// iter avoids looping forever +	for(iter = 0; iter < 100 && n < max; iter++) {	// iter avoids looping forever +		// Typically: +		//	pc is the PC of the running function. +		//	sp is the stack pointer at that program counter. +		//	fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown. +		//	stk is the stack containing sp. +		//	The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp. +	  		if(pc == (uintptr)runtime·lessstack) {  			// Hit top of stack segment.  Unwind to next segment.  			pc = (uintptr)stk->gobuf.pc;  			sp = stk->gobuf.sp; +			lr = 0; +			fp = nil; +			if(pcbuf == nil) +				runtime·printf("----- stack segment boundary -----\n");  			stk = (Stktop*)stk->stackbase;  			continue;  		} -  		if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) {  			// Dangerous, but worthwhile: see if this is a closure:  			//	ADDQ $wwxxyyzz, SP; RET @@ -66,17 +88,32 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m)  				sp += *(uint32*)(p+2);  				pc = *(uintptr*)sp;  				sp += sizeof(uintptr); +				lr = 0; +				fp = nil;  				continue;  			} -			if(nascent && (pc = isclosureentry(pc)) != 0) +			// Closure at top of stack, not yet started. +			if(lr == (uintptr)runtime·goexit && (pc = isclosureentry(pc)) != 0) { +				fp = sp;  				continue; +			} -			// Unknown pc; stop. +			// Unknown pc: stop.  			break;  		} -		// Found an actual function worth reporting. +		// Found an actual function. +		if(fp == nil) { +			fp = sp; +			if(pc > f->entry && f->frame >= sizeof(uintptr)) +				fp += f->frame - sizeof(uintptr); +			if(lr == 0) +				lr = *(uintptr*)fp; +			fp += sizeof(uintptr); +		} else if(lr == 0) +			lr = *(uintptr*)fp; +  		if(skip > 0)  			skip--;  		else if(pcbuf != nil) @@ -93,15 +130,10 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m)  				tracepc--;  			runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));  			runtime·printf("\t%S(", f->name); -			fp = (uintptr*)sp; -			if(f->frame < sizeof(uintptr)) -				fp++; -			else -				fp += f->frame/sizeof(uintptr);  			for(i = 0; i < f->args; i++) {  				if(i != 0)  					runtime·prints(", "); -				runtime·printhex(fp[i]); +				runtime·printhex(((uintptr*)fp)[i]);  				if(i >= 4) {  					runtime·prints(", ...");  					break; @@ -111,20 +143,32 @@ gentraceback(byte *pc0, byte *sp, G *g, int32 skip, uintptr *pcbuf, int32 m)  			n++;  		} -		if(nascent) { -			pc = (uintptr)g->sched.pc; -			sp = g->sched.sp; -			nascent = 0; +		if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) +			fp += 2*sizeof(uintptr); + +		if(f->entry == (uintptr)runtime·newstack) +			sawnewstack = 1; + +		if(pcbuf == nil && f->entry == (uintptr)runtime·morestack && g == m->g0 && sawnewstack) { +			// The fact that we saw newstack means that morestack +			// has managed to record its information in m, so we can +			// use it to keep unwinding the stack. +			runtime·printf("----- morestack called from goroutine %d -----\n", m->curg->goid); +			pc = (uintptr)m->morepc; +			sp = m->morebuf.sp - sizeof(void*); +			lr = (uintptr)m->morebuf.pc; +			fp = m->morebuf.sp; +			sawnewstack = 0; +			g = m->curg; +			stk = (Stktop*)g->stackbase;  			continue;  		} -		if(f->frame < sizeof(uintptr))	// assembly functions lie -			sp += sizeof(uintptr); -		else -			sp += f->frame; -		pc = *((uintptr*)sp - 1); -		if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc) -			sp += 2*sizeof(uintptr); +		// Unwind to next frame. +		pc = lr; +		lr = 0; +		sp = fp; +		fp = nil;  	}  	return n;  } @@ -156,7 +200,17 @@ isclosureentry(uintptr pc)  	p = (byte*)pc;  	if(p < runtime·mheap.arena_start || p+32 > runtime·mheap.arena_used)  		return 0; + +	if(*p == 0xe8) { +		// CALL fn +		return pc+5+*(int32*)(p+1); +	} +	if(sizeof(uintptr) == 8 && p[0] == 0x48 && p[1] == 0xb9 && p[10] == 0xff && p[11] == 0xd1) { +		// MOVQ $fn, CX; CALL *CX +		return *(uintptr*)(p+2); +	} +  	// SUBQ $siz, SP  	if((sizeof(uintptr) == 8 && *p++ != 0x48) || *p++ != 0x81 || *p++ != 0xec)  		return 0; diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c index 8289fdb28..2307e98e8 100644 --- a/src/pkg/runtime/arm/traceback.c +++ b/src/pkg/runtime/arm/traceback.c @@ -3,19 +3,27 @@  // license that can be found in the LICENSE file.  #include "runtime.h" +#include "malloc.h" + +void runtime·deferproc(void); +void runtime·newproc(void); +void runtime·newstack(void); +void runtime·morestack(void);  static int32 -gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 m) +gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max)  {  	int32 i, n, iter; -	uintptr pc, lr, tracepc; +	uintptr pc, lr, tracepc, x; +	byte *fp, *p;  	Stktop *stk;  	Func *f;  	pc = (uintptr)pc0;  	lr = (uintptr)lr0; -	 -	// If the PC is goexit, it hasn't started yet. +	fp = nil; + +	// If the PC is goexit, the goroutine hasn't started yet.  	if(pc == (uintptr)runtime·goexit) {  		pc = (uintptr)g->entry;  		lr = (uintptr)runtime·goexit; @@ -30,21 +38,73 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i  	n = 0;  	stk = (Stktop*)g->stackbase; -	for(iter = 0; iter < 100 && n < m; iter++) {	// iter avoids looping forever +	for(iter = 0; iter < 100 && n < max; iter++) {	// iter avoids looping forever +		// Typically: +		//	pc is the PC of the running function. +		//	sp is the stack pointer at that program counter. +		//	fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown. +		//	stk is the stack containing sp. +		//	The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp. +		  		if(pc == (uintptr)runtime·lessstack) {  			// Hit top of stack segment.  Unwind to next segment.  			pc = (uintptr)stk->gobuf.pc;  			sp = stk->gobuf.sp; -			lr = *(uintptr*)sp; +			lr = 0; +			fp = nil; +			if(pcbuf == nil) +				runtime·printf("----- stack segment boundary -----\n");  			stk = (Stktop*)stk->stackbase;  			continue;  		} -		if(pc <= 0x1000 || (f = runtime·findfunc(pc-4)) == nil) { -			// TODO: Check for closure. +		 +		if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) { +			// Dangerous, but worthwhile: see if this is a closure by +			// decoding the instruction stream. +			// +			// We check p < p+4 to avoid wrapping and faulting if +			// we have lost track of where we are. +			p = (byte*)pc; +			if((pc&3) == 0 && p < p+4 && +			   runtime·mheap.arena_start < p && +			   p+4 < runtime·mheap.arena_used) { +			   	x = *(uintptr*)p; +				if((x&0xfffff000) == 0xe49df000) { +					// End of closure: +					// MOVW.P frame(R13), R15 +					pc = *(uintptr*)sp; +					lr = 0; +					sp += x & 0xfff; +					fp = nil; +					continue; +				} +				if((x&0xfffff000) == 0xe52de000 && lr == (uintptr)runtime·goexit) { +					// Beginning of closure. +					// Closure at top of stack, not yet started. +					p += 5*4; +					if((x&0xfff) != 4) { +						// argument copying +						p += 7*4; +					} +					if((byte*)pc < p && p < p+4 && p+4 < runtime·mheap.arena_used) { +						pc = *(uintptr*)p; +						fp = nil; +						continue; +					} +				} +			}  			break;  		} -		// Found an actual function worth reporting. +		// Found an actual function. +		if(lr == 0) +			lr = *(uintptr*)sp; +		if(fp == nil) { +			fp = sp; +			if(pc > f->entry && f->frame >= 0) +				fp += f->frame; +		} +  		if(skip > 0)  			skip--;  		else if(pcbuf != nil) @@ -64,7 +124,7 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i  			for(i = 0; i < f->args; i++) {  				if(i != 0)  					runtime·prints(", "); -				runtime·printhex(((uintptr*)sp)[1+i]); +				runtime·printhex(((uintptr*)fp)[1+i]);  				if(i >= 4) {  					runtime·prints(", ...");  					break; @@ -73,17 +133,28 @@ gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, i  			runtime·prints(")\n");  			n++;  		} + +		if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) { +			runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid); +			pc = (uintptr)m->morepc; +			sp = (byte*)m->moreargp - sizeof(void*); +			lr = (uintptr)m->morebuf.pc; +			fp = m->morebuf.sp; +			g = m->curg; +			stk = (Stktop*)g->stackbase; +			continue; +		} -		if(lr == 0) -			lr = *(uintptr*)sp; +		// Unwind to next frame.  		pc = lr;  		lr = 0; -		if(f->frame >= 0) -			sp += f->frame; +		sp = fp; +		fp = nil;  	}  	return n;		  } +  void  runtime·traceback(byte *pc0, byte *sp, byte *lr, G *g)  { diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c index e6ece9542..74e5a3085 100644 --- a/src/pkg/runtime/cgocall.c +++ b/src/pkg/runtime/cgocall.c @@ -53,13 +53,12 @@ runtime·cgocall(void (*fn)(void*), void *arg)  // (arg/argsize) on to the stack, calls the function, copies the  // arguments back where they came from, and finally returns to the old  // stack. -uintptr +void  runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize)  {  	Gobuf oldsched, oldg1sched;  	G *g1;  	void *sp; -	uintptr ret;  	if(g != m->g0)  		runtime·throw("bad g in cgocallback"); @@ -71,11 +70,11 @@ runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize)  	runtime·startcgocallback(g1);  	sp = g1->sched.sp - argsize; -	if(sp < g1->stackguard - StackGuard + 4) // +4 for return address +	if(sp < g1->stackguard - StackGuard + 8) // +8 for return address  		runtime·throw("g stack overflow in cgocallback");  	runtime·mcpy(sp, arg, argsize); -	ret = runtime·runcgocallback(g1, sp, fn); +	runtime·runcgocallback(g1, sp, fn);  	runtime·mcpy(arg, sp, argsize); @@ -83,8 +82,6 @@ runtime·cgocallback(void (*fn)(void), void *arg, int32 argsize)  	m->sched = oldsched;  	g1->sched = oldg1sched; - -	return ret;  }  void diff --git a/src/pkg/runtime/cgocall.h b/src/pkg/runtime/cgocall.h index 7c24e167b..1ad954eb1 100644 --- a/src/pkg/runtime/cgocall.h +++ b/src/pkg/runtime/cgocall.h @@ -7,6 +7,6 @@   */  void runtime·cgocall(void (*fn)(void*), void*); -uintptr runtime·cgocallback(void (*fn)(void), void*, int32); +void runtime·cgocallback(void (*fn)(void), void*, int32);  void *runtime·cmalloc(uintptr);  void runtime·cfree(void*); diff --git a/src/pkg/runtime/debug.go b/src/pkg/runtime/debug.go index d09db1be6..5117e1a55 100644 --- a/src/pkg/runtime/debug.go +++ b/src/pkg/runtime/debug.go @@ -69,7 +69,8 @@ type MemStatsType struct {  	// Per-size allocation statistics.  	// Not locked during update; approximate. -	BySize [67]struct { +	// 61 is NumSizeClasses in the C code. +	BySize [61]struct {  		Size    uint32  		Mallocs uint64  		Frees   uint64 diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go index dba28324c..c6e664abb 100644 --- a/src/pkg/runtime/extern.go +++ b/src/pkg/runtime/extern.go @@ -60,31 +60,47 @@ func (f *Func) Entry() uintptr { return f.entry }  // counter within f.  func (f *Func) FileLine(pc uintptr) (file string, line int) {  	// NOTE(rsc): If you edit this function, also edit -	// symtab.c:/^funcline. +	// symtab.c:/^funcline.  That function also has the +	// comments explaining the logic. +	targetpc := pc +  	var pcQuant uintptr = 1  	if GOARCH == "arm" {  		pcQuant = 4  	} -	targetpc := pc  	p := f.pcln  	pc = f.pc0  	line = int(f.ln0) -	file = f.src -	for i := 0; i < len(p) && pc <= targetpc; i++ { -		switch { -		case p[i] == 0: +	i := 0 +	//print("FileLine start pc=", pc, " targetpc=", targetpc, " line=", line, +	//	" tab=", p, " ", p[0], " quant=", pcQuant, " GOARCH=", GOARCH, "\n") +	for { +		for i < len(p) && p[i] > 128 { +			pc += pcQuant * uintptr(p[i]-128) +			i++ +		} +		//print("pc<", pc, " targetpc=", targetpc, " line=", line, "\n") +		if pc > targetpc || i >= len(p) { +			break +		} +		if p[i] == 0 { +			if i+5 > len(p) { +				break +			}  			line += int(p[i+1]<<24) | int(p[i+2]<<16) | int(p[i+3]<<8) | int(p[i+4]) -			i += 4 -		case p[i] <= 64: +			i += 5 +		} else if p[i] <= 64 {  			line += int(p[i]) -		case p[i] <= 128: +			i++ +		} else {  			line -= int(p[i] - 64) -		default: -			pc += pcQuant * uintptr(p[i]-129) +			i++  		} +		//print("pc=", pc, " targetpc=", targetpc, " line=", line, "\n")  		pc += pcQuant  	} +	file = f.src  	return  } diff --git a/src/pkg/runtime/freebsd/mem.c b/src/pkg/runtime/freebsd/mem.c index cbae18718..f5bbfa6fa 100644 --- a/src/pkg/runtime/freebsd/mem.c +++ b/src/pkg/runtime/freebsd/mem.c @@ -33,6 +33,12 @@ runtime·SysFree(void *v, uintptr n)  void*  runtime·SysReserve(void *v, uintptr n)  { +	// On 64-bit, people with ulimit -v set complain if we reserve too +	// much address space.  Instead, assume that the reservation is okay +	// and check the assumption in SysMap. +	if(sizeof(void*) == 8) +		return v; +	  	return runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);  } @@ -42,6 +48,17 @@ runtime·SysMap(void *v, uintptr n)  	void *p;  	mstats.sys += n; + +	// On 64-bit, we don't actually have v reserved, so tread carefully. +	if(sizeof(void*) == 8) { +		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); +		if(p != v) { +			runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); +			runtime·throw("runtime: address space conflict"); +		} +		return; +	} +  	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);  	if(p != v)  		runtime·throw("runtime: cannot map pages in arena address space"); diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c index aa36df68e..3dec45e2b 100644 --- a/src/pkg/runtime/iface.c +++ b/src/pkg/runtime/iface.c @@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret)  	t = (Type*)((Eface*)typ.data-1);  	if(t->kind&KindNoPointers) -		ret = runtime·mallocgc(t->size, RefNoPointers, 1, 1); +		ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1);  	else  		ret = runtime·mal(t->size);  	FLUSH(&ret); @@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret)  	size = n*t->size;  	if(t->kind&KindNoPointers) -		ret = runtime·mallocgc(size, RefNoPointers, 1, 1); +		ret = runtime·mallocgc(size, FlagNoPointers, 1, 1);  	else  		ret = runtime·mal(size);  	FLUSH(&ret); diff --git a/src/pkg/runtime/linux/386/rt0.s b/src/pkg/runtime/linux/386/rt0.s index 0f82d6a1c..223e6d2ea 100644 --- a/src/pkg/runtime/linux/386/rt0.s +++ b/src/pkg/runtime/linux/386/rt0.s @@ -5,13 +5,5 @@  // Darwin and Linux use the same linkage to main  TEXT _rt0_386_linux(SB),7,$0 -	// Linux starts the FPU in extended double precision. -	// Other operating systems use double precision. -	// Change to double precision to match them, -	// and to match other hardware that only has double. -	PUSHL $0x27F -	FLDCW	0(SP) -	POPL AX -  	JMP	_rt0_386(SB) diff --git a/src/pkg/runtime/linux/mem.c b/src/pkg/runtime/linux/mem.c index 3a83e7394..633ad0c62 100644 --- a/src/pkg/runtime/linux/mem.c +++ b/src/pkg/runtime/linux/mem.c @@ -39,6 +39,12 @@ runtime·SysFree(void *v, uintptr n)  void*  runtime·SysReserve(void *v, uintptr n)  { +	// On 64-bit, people with ulimit -v set complain if we reserve too +	// much address space.  Instead, assume that the reservation is okay +	// and check the assumption in SysMap. +	if(sizeof(void*) == 8) +		return v; +	  	return runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);  } @@ -48,6 +54,17 @@ runtime·SysMap(void *v, uintptr n)  	void *p;  	mstats.sys += n; + +	// On 64-bit, we don't actually have v reserved, so tread carefully. +	if(sizeof(void*) == 8) { +		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); +		if(p != v) { +			runtime·printf("runtime: address space conflict: map(%v) = %v\n", v, p); +			runtime·throw("runtime: address space conflict"); +		} +		return; +	} +  	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);  	if(p != v)  		runtime·throw("runtime: cannot map pages in arena address space"); diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index cc28b943d..70b85d68d 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -36,14 +36,13 @@ fastrand1(void)  // Small objects are allocated from the per-thread cache's free lists.  // Large objects (> 32 kB) are allocated straight from the heap.  void* -runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) +runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)  {  	int32 sizeclass, rate;  	MCache *c;  	uintptr npages;  	MSpan *s;  	void *v; -	uint32 *ref;  	if(runtime·gcwaiting && g != m->g0 && m->locks == 0)  		runtime·gosched(); @@ -65,12 +64,6 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)  		mstats.alloc += size;  		mstats.total_alloc += size;  		mstats.by_size[sizeclass].nmalloc++; - -		if(!runtime·mlookup(v, nil, nil, nil, &ref)) { -			runtime·printf("malloc %D; runtime·mlookup failed\n", (uint64)size); -			runtime·throw("malloc runtime·mlookup"); -		} -		*ref = RefNone | refflag;  	} else {  		// TODO(rsc): Report tracebacks for very large allocations. @@ -87,13 +80,14 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)  		v = (void*)(s->start << PageShift);  		// setup for mark sweep -		s->gcref0 = RefNone | refflag; -		ref = &s->gcref0; +		runtime·markspan(v, 0, 0, true);  	} +	if(!(flag & FlagNoGC)) +		runtime·markallocated(v, size, (flag&FlagNoPointers) != 0);  	m->mallocing = 0; -	if(!(refflag & RefNoProfiling) && (rate = runtime·MemProfileRate) > 0) { +	if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) {  		if(size >= rate)  			goto profile;  		if(m->mcache->next_sample > size) @@ -104,7 +98,7 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)  				rate = 0x3fffffff;  			m->mcache->next_sample = fastrand1() % (2*rate);  		profile: -			*ref |= RefProfiled; +			runtime·setblockspecial(v);  			runtime·MProf_Malloc(v, size);  		}  	} @@ -124,33 +118,35 @@ runtime·malloc(uintptr size)  void  runtime·free(void *v)  { -	int32 sizeclass, size; +	int32 sizeclass;  	MSpan *s;  	MCache *c; -	uint32 prof, *ref; +	uint32 prof; +	uintptr size;  	if(v == nil)  		return; +	 +	// If you change this also change mgc0.c:/^sweepspan, +	// which has a copy of the guts of free.  	if(m->mallocing)  		runtime·throw("malloc/free - deadlock");  	m->mallocing = 1; -	if(!runtime·mlookup(v, nil, nil, &s, &ref)) { +	if(!runtime·mlookup(v, nil, nil, &s)) {  		runtime·printf("free %p: not an allocated block\n", v);  		runtime·throw("free runtime·mlookup");  	} -	prof = *ref & RefProfiled; -	*ref = RefFree; +	prof = runtime·blockspecial(v);  	// Find size class for v.  	sizeclass = s->sizeclass;  	if(sizeclass == 0) {  		// Large object. -		if(prof) -			runtime·MProf_Free(v, s->npages<<PageShift); -		mstats.alloc -= s->npages<<PageShift; -		runtime·memclr(v, s->npages<<PageShift); +		size = s->npages<<PageShift; +		*(uintptr*)(s->start<<PageShift) = 1;	// mark as "needs to be zeroed" +		runtime·unmarkspan(v, 1<<PageShift);  		runtime·MHeap_Free(&runtime·mheap, s, 1);  	} else {  		// Small object. @@ -158,19 +154,20 @@ runtime·free(void *v)  		size = runtime·class_to_size[sizeclass];  		if(size > sizeof(uintptr))  			((uintptr*)v)[1] = 1;	// mark as "needs to be zeroed" -		if(prof) -			runtime·MProf_Free(v, size); -		mstats.alloc -= size;  		mstats.by_size[sizeclass].nfree++;  		runtime·MCache_Free(c, v, sizeclass, size);  	} +	runtime·markfreed(v, size); +	mstats.alloc -= size; +	if(prof) +		runtime·MProf_Free(v, size);  	m->mallocing = 0;  }  int32 -runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) +runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)  { -	uintptr n, nobj, i; +	uintptr n, i;  	byte *p;  	MSpan *s; @@ -179,12 +176,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)  	if(sp)  		*sp = s;  	if(s == nil) { +		runtime·checkfreed(v, 1);  		if(base)  			*base = nil;  		if(size)  			*size = 0; -		if(ref) -			*ref = 0;  		return 0;  	} @@ -195,14 +191,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)  			*base = p;  		if(size)  			*size = s->npages<<PageShift; -		if(ref) -			*ref = &s->gcref0;  		return 1;  	} -	if((byte*)v >= (byte*)s->gcref) { -		// pointers into the gc ref counts -		// do not count as pointers. +	if((byte*)v >= (byte*)s->limit) { +		// pointers past the last block do not count as pointers.  		return 0;  	} @@ -213,21 +206,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)  	if(size)  		*size = n; -	// good for error checking, but expensive -	if(0) { -		nobj = (s->npages << PageShift) / (n + RefcountOverhead); -		if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) { -			runtime·printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n", -				s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages); -			runtime·printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n", -				s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift, -				(uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift)); -			runtime·throw("bad gcref"); -		} -	} -	if(ref) -		*ref = &s->gcref[i]; -  	return 1;  } @@ -246,14 +224,20 @@ runtime·allocmcache(void)  int32 runtime·sizeof_C_MStats = sizeof(MStats); +#define MaxArena32 (2U<<30) +  void  runtime·mallocinit(void)  {  	byte *p; -	uintptr arena_size; +	uintptr arena_size, bitmap_size; +	extern byte end[];  	runtime·InitSizes(); +	// Set up the allocation arena, a contiguous area of memory where +	// allocated data will be found.  The arena begins with a bitmap large +	// enough to hold 4 bits per allocated word.  	if(sizeof(void*) == 8) {  		// On a 64-bit machine, allocate from a single contiguous reservation.  		// 16 GB should be big enough for now. @@ -273,19 +257,53 @@ runtime·mallocinit(void)  		// odds of the conservative garbage collector not collecting memory  		// because some non-pointer block of memory had a bit pattern  		// that matched a memory address. +		// +		// Actually we reserve 17 GB (because the bitmap ends up being 1 GB) +		// but it hardly matters: fc is not valid UTF-8 either, and we have to +		// allocate 15 GB before we get that far.  		arena_size = 16LL<<30; -		p = runtime·SysReserve((void*)(0x00f8ULL<<32), arena_size); +		bitmap_size = arena_size / (sizeof(void*)*8/4); +		p = runtime·SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size);  		if(p == nil)  			runtime·throw("runtime: cannot reserve arena virtual address space"); -		runtime·mheap.arena_start = p; -		runtime·mheap.arena_used = p; -		runtime·mheap.arena_end = p + arena_size;  	} else { -		// On a 32-bit machine, we'll take what we can get for each allocation -		// and maintain arena_start and arena_end as min, max we've seen. -		runtime·mheap.arena_start = (byte*)0xffffffff; -		runtime·mheap.arena_end = 0; +		// On a 32-bit machine, we can't typically get away +		// with a giant virtual address space reservation. +		// Instead we map the memory information bitmap +		// immediately after the data segment, large enough +		// to handle another 2GB of mappings (256 MB), +		// along with a reservation for another 512 MB of memory. +		// When that gets used up, we'll start asking the kernel +		// for any memory anywhere and hope it's in the 2GB +		// following the bitmap (presumably the executable begins +		// near the bottom of memory, so we'll have to use up +		// most of memory before the kernel resorts to giving out +		// memory before the beginning of the text segment). +		// +		// Alternatively we could reserve 512 MB bitmap, enough +		// for 4GB of mappings, and then accept any memory the +		// kernel threw at us, but normally that's a waste of 512 MB +		// of address space, which is probably too much in a 32-bit world. +		bitmap_size = MaxArena32 / (sizeof(void*)*8/4); +		arena_size = 512<<20; +		 +		// SysReserve treats the address we ask for, end, as a hint, +		// not as an absolute requirement.  If we ask for the end +		// of the data segment but the operating system requires +		// a little more space before we can start allocating, it will +		// give out a slightly higher pointer.  That's fine.   +		// Run with what we get back. +		p = runtime·SysReserve(end, bitmap_size + arena_size); +		if(p == nil) +			runtime·throw("runtime: cannot reserve arena virtual address space");  	} +	if((uintptr)p & (((uintptr)1<<PageShift)-1)) +		runtime·throw("runtime: SysReserve returned unaligned address"); + +	runtime·mheap.bitmap = p; +	runtime·mheap.arena_start = p + bitmap_size; +	runtime·mheap.arena_used = runtime·mheap.arena_start; +	runtime·mheap.arena_end = runtime·mheap.arena_start + arena_size;  	// Initialize the rest of the allocator.	  	runtime·MHeap_Init(&runtime·mheap, runtime·SysAlloc); @@ -299,26 +317,41 @@ void*  runtime·MHeap_SysAlloc(MHeap *h, uintptr n)  {  	byte *p; -	 -	if(sizeof(void*) == 8) { + +	if(n <= h->arena_end - h->arena_used) {  		// Keep taking from our reservation. -		if(h->arena_end - h->arena_used < n) -			return nil;  		p = h->arena_used;  		runtime·SysMap(p, n);  		h->arena_used += n; +		runtime·MHeap_MapBits(h);  		return p; -	} else { -		// Take what we can get from the OS. -		p = runtime·SysAlloc(n); -		if(p == nil) -			return nil; -		if(p+n > h->arena_used) -			h->arena_used = p+n; -		if(p > h->arena_end) -			h->arena_end = p; -		return p;		  	} +	 +	// On 64-bit, our reservation is all we have. +	if(sizeof(void*) == 8) +		return nil; + +	// On 32-bit, once the reservation is gone we can +	// try to get memory at a location chosen by the OS +	// and hope that it is in the range we allocated bitmap for. +	p = runtime·SysAlloc(n); +	if(p == nil) +		return nil; + +	if(p < h->arena_start || p+n - h->arena_start >= MaxArena32) { +		runtime·printf("runtime: memory allocated by OS not in usable range"); +		runtime·SysFree(p, n); +		return nil; +	} + +	if(p+n > h->arena_used) { +		h->arena_used = p+n; +		if(h->arena_used > h->arena_end) +			h->arena_end = h->arena_used; +		runtime·MHeap_MapBits(h); +	} +	 +	return p;  }  // Runtime stubs. @@ -353,7 +386,6 @@ void*  runtime·stackalloc(uint32 n)  {  	void *v; -	uint32 *ref;  	if(m->mallocing || m->gcing || n == FixedStack) {  		runtime·lock(&stacks); @@ -369,11 +401,7 @@ runtime·stackalloc(uint32 n)  		runtime·unlock(&stacks);  		return v;  	} -	v = runtime·mallocgc(n, RefNoProfiling, 0, 0); -	if(!runtime·mlookup(v, nil, nil, nil, &ref)) -		runtime·throw("stackalloc runtime·mlookup"); -	*ref = RefStack; -	return v; +	return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0);  }  void @@ -399,7 +427,7 @@ func Free(p *byte) {  }  func Lookup(p *byte) (base *byte, size uintptr) { -	runtime·mlookup(p, &base, &size, nil, nil); +	runtime·mlookup(p, &base, &size, nil);  }  func GC() { @@ -422,7 +450,7 @@ func SetFinalizer(obj Eface, finalizer Eface) {  		runtime·printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string);  		goto throw;  	} -	if(!runtime·mlookup(obj.data, &base, &size, nil, nil) || obj.data != base) { +	if(!runtime·mlookup(obj.data, &base, &size, nil) || obj.data != base) {  		runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");  		goto throw;  	} diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index e2472e8d2..4e2794570 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -97,8 +97,14 @@ typedef	uintptr	PageID;		// address >> PageShift  enum  { +	// Computed constant.  The definition of MaxSmallSize and the +	// algorithm in msize.c produce some number of different allocation +	// size classes.  NumSizeClasses is that number.  It's needed here +	// because there are static arrays of this length; when msize runs its +	// size choosing algorithm it double-checks that NumSizeClasses agrees. +	NumSizeClasses = 61, +  	// Tunable constants. -	NumSizeClasses = 67,		// Number of size classes (must match msize.c)  	MaxSmallSize = 32<<10,  	FixAllocChunk = 128<<10,	// Chunk size for FixAlloc @@ -290,10 +296,7 @@ struct MSpan  	uint32	ref;		// number of allocated objects in this span  	uint32	sizeclass;	// size class  	uint32	state;		// MSpanInUse etc -	union { -		uint32	*gcref;	// sizeclass > 0 -		uint32	gcref0;	// sizeclass == 0 -	}; +	byte	*limit;	// end of data in span  };  void	runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); @@ -336,6 +339,7 @@ struct MHeap  	// range of addresses we might see in the heap  	byte *bitmap; +	uintptr bitmap_mapped;  	byte *arena_start;  	byte *arena_used;  	byte *arena_end; @@ -359,26 +363,29 @@ MSpan*	runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct  void	runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);  MSpan*	runtime·MHeap_Lookup(MHeap *h, void *v);  MSpan*	runtime·MHeap_LookupMaybe(MHeap *h, void *v); -void	runtime·MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj); +void	runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);  void*	runtime·MHeap_SysAlloc(MHeap *h, uintptr n); +void	runtime·MHeap_MapBits(MHeap *h);  void*	runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); -int32	runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref); +int32	runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);  void	runtime·gc(int32 force); +void	runtime·markallocated(void *v, uintptr n, bool noptr); +void	runtime·checkallocated(void *v, uintptr n); +void	runtime·markfreed(void *v, uintptr n); +void	runtime·checkfreed(void *v, uintptr n); +int32	runtime·checking; +void	runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); +void	runtime·unmarkspan(void *v, uintptr size); +bool	runtime·blockspecial(void*); +void	runtime·setblockspecial(void*);  enum  { -	RefcountOverhead = 4,	// one uint32 per object - -	RefFree = 0,	// must be zero -	RefStack,		// stack segment - don't free and don't scan for pointers -	RefNone,		// no references -	RefSome,		// some references -	RefNoPointers = 0x80000000U,	// flag - no pointers here -	RefHasFinalizer = 0x40000000U,	// flag - has finalizer -	RefProfiled = 0x20000000U,	// flag - is in profiling table -	RefNoProfiling = 0x10000000U,	// flag - must not profile -	RefFlags = 0xFFFF0000U, +	// flags to malloc +	FlagNoPointers = 1<<0,	// no pointers here +	FlagNoProfiling = 1<<1,	// must not profile +	FlagNoGC = 1<<2,	// must not free or scan for pointers  };  void	runtime·MProf_Malloc(void*, uintptr); diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c index f1ad119d3..29b03b58f 100644 --- a/src/pkg/runtime/mcentral.c +++ b/src/pkg/runtime/mcentral.c @@ -113,8 +113,7 @@ static void  MCentral_Free(MCentral *c, void *v)  {  	MSpan *s; -	PageID page; -	MLink *p, *next; +	MLink *p;  	int32 size;  	// Find span for v. @@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v)  	if(--s->ref == 0) {  		size = runtime·class_to_size[c->sizeclass];  		runtime·MSpanList_Remove(s); -		// The second word of each freed block indicates -		// whether it needs to be zeroed.  The first word -		// is the link pointer and must always be cleared. -		for(p=s->freelist; p; p=next) { -			next = p->next; -			if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0) -				runtime·memclr((byte*)p, size); -			else -				p->next = nil; -		} +		runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); +		*(uintptr*)(s->start<<PageShift) = 1;  // needs zeroing  		s->freelist = nil;  		c->nfree -= (s->npages << PageShift) / size;  		runtime·unlock(c); @@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v)  }  void -runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj) +runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)  {  	int32 size;  	int32 npages; @@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32  	size = runtime·class_to_size[sizeclass];  	*npagesp = npages;  	*sizep = size; -	*nobj = (npages << PageShift) / (size + RefcountOverhead); +	*nobj = (npages << PageShift) / size;  }  // Fetch a new span from the heap and @@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32  static bool  MCentral_Grow(MCentral *c)  { -	int32 i, n, npages, size; +	int32 i, n, npages; +	uintptr size;  	MLink **tailp, *v;  	byte *p;  	MSpan *s; @@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c)  	// Carve span into sequence of blocks.  	tailp = &s->freelist;  	p = (byte*)(s->start << PageShift); -	s->gcref = (uint32*)(p + size*n); +	s->limit = p + size*n;  	for(i=0; i<n; i++) {  		v = (MLink*)p;  		*tailp = v; @@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c)  		p += size;  	}  	*tailp = nil; +	runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));  	runtime·lock(c);  	c->nfree += n; diff --git a/src/pkg/runtime/mfinal.c b/src/pkg/runtime/mfinal.c index f73561b3c..03ee777c0 100644 --- a/src/pkg/runtime/mfinal.c +++ b/src/pkg/runtime/mfinal.c @@ -5,6 +5,7 @@  #include "runtime.h"  #include "malloc.h" +// TODO(rsc): Why not just use mheap.Lock?  static Lock finlock;  // Finalizer hash table.  Direct hash, linear scan, at most 3/4 full. @@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)  	}  	runtime·lock(&finlock); -	if(!runtime·mlookup(p, &base, nil, nil, &ref) || p != base) { +	if(!runtime·mlookup(p, &base, nil, nil) || p != base) {  		runtime·unlock(&finlock);  		runtime·throw("addfinalizer on invalid pointer");  	}  	if(f == nil) { -		if(*ref & RefHasFinalizer) { -			lookfintab(&fintab, p, 1); -			*ref &= ~RefHasFinalizer; -		} +		lookfintab(&fintab, p, 1);  		runtime·unlock(&finlock);  		return;  	} -	if(*ref & RefHasFinalizer) { +	if(lookfintab(&fintab, p, 0)) {  		runtime·unlock(&finlock);  		runtime·throw("double finalizer");  	} -	*ref |= RefHasFinalizer; +	runtime·setblockspecial(p);  	if(fintab.nkey >= fintab.max/2+fintab.max/4) {  		// keep table at most 3/4 full: @@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)  			newtab.max *= 3;  		} -		newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1); +		newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);  		newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);  		for(i=0; i<fintab.max; i++) { diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index af1c721e8..232c6cdcd 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -2,28 +2,66 @@  // Use of this source code is governed by a BSD-style  // license that can be found in the LICENSE file. -// Garbage collector -- step 0. -// -// Stop the world, mark and sweep garbage collector. -// NOT INTENDED FOR PRODUCTION USE. -// -// A mark and sweep collector provides a way to exercise -// and test the memory allocator and the stack walking machinery -// without also needing to get reference counting -// exactly right. +// Garbage collector.  #include "runtime.h"  #include "malloc.h"  enum { -	Debug = 0 +	Debug = 0, +	UseCas = 1, +	PtrSize = sizeof(void*), +	 +	// Four bits per word (see #defines below). +	wordsPerBitmapWord = sizeof(void*)*8/4, +	bitShift = sizeof(void*)*8/4,  }; -typedef struct BlockList BlockList; -struct BlockList +// Bits in per-word bitmap. +// #defines because enum might not be able to hold the values. +// +// Each word in the bitmap describes wordsPerBitmapWord words +// of heap memory.  There are 4 bitmap bits dedicated to each heap word, +// so on a 64-bit system there is one bitmap word per 16 heap words. +// The bits in the word are packed together by type first, then by +// heap location, so each 64-bit bitmap word consists of, from top to bottom, +// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, +// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits. +// This layout makes it easier to iterate over the bits of a given type. +// +// The bitmap starts at mheap.arena_start and extends *backward* from +// there.  On a 64-bit system the off'th word in the arena is tracked by +// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system, +// the only difference is that the divisor is 8.) +// +// To pull out the bits corresponding to a given pointer p, we use: +// +//	off = p - (uintptr*)mheap.arena_start;  // word offset +//	b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; +//	shift = off % wordsPerBitmapWord +//	bits = *b >> shift; +//	/* then test bits & bitAllocated, bits & bitMarked, etc. */ +// +#define bitAllocated		((uintptr)1<<(bitShift*0)) +#define bitNoPointers		((uintptr)1<<(bitShift*1))	/* when bitAllocated is set */ +#define bitMarked		((uintptr)1<<(bitShift*2))	/* when bitAllocated is set */ +#define bitSpecial		((uintptr)1<<(bitShift*3))	/* when bitAllocated is set - has finalizer or being profiled */ +#define bitBlockBoundary	((uintptr)1<<(bitShift*1))	/* when bitAllocated is NOT set */ + +#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) + +static uint64 nlookup; +static uint64 nsizelookup; +static uint64 naddrlookup; +static uint64 nhandoff; +static int32 gctrace; + +typedef struct Workbuf Workbuf; +struct Workbuf  { -	byte *obj; -	uintptr size; +	Workbuf *next; +	uintptr nw; +	byte *w[2048-2];  };  extern byte data[]; @@ -33,72 +71,258 @@ extern byte end[];  static G *fing;  static Finalizer *finq;  static int32 fingwait; -static BlockList *bl, *ebl; +static uint32 nfullwait;  static void runfinq(void); - -enum { -	PtrSize = sizeof(void*) -}; - +static bool bitlookup(void*, uintptr**, uintptr*, int32*); +static Workbuf* getempty(Workbuf*); +static Workbuf* getfull(Workbuf*); + +// scanblock scans a block of n bytes starting at pointer b for references +// to other objects, scanning any it finds recursively until there are no +// unscanned objects left.  Instead of using an explicit recursion, it keeps +// a work list in the Workbuf* structures and loops in the main function +// body.  Keeping an explicit work list is easier on the stack allocator and +// more efficient.  static void  scanblock(byte *b, int64 n)  { -	int32 off; -	void *obj; -	uintptr size; -	uint32 *refp, ref; +	byte *obj, *arena_start, *p;  	void **vp; -	int64 i; -	BlockList *w; - -	w = bl; -	w->obj = b; -	w->size = n; -	w++; +	uintptr size, *bitp, bits, shift, i, j, x, xbits, off; +	MSpan *s; +	PageID k; +	void **bw, **w, **ew; +	Workbuf *wbuf; -	while(w > bl) { -		w--; -		b = w->obj; -		n = w->size; +	// Memory arena parameters. +	arena_start = runtime·mheap.arena_start; +	 +	wbuf = nil;  // current work buffer +	ew = nil;  // end of work buffer +	bw = nil;  // beginning of work buffer +	w = nil;  // current pointer into work buffer + +	// Align b to a word boundary. +	off = (uintptr)b & (PtrSize-1); +	if(off != 0) { +		b += PtrSize - off; +		n -= PtrSize - off; +	} +	for(;;) { +		// Each iteration scans the block b of length n, queueing pointers in +		// the work buffer.  		if(Debug > 1)  			runtime·printf("scanblock %p %D\n", b, n); -		off = (uint32)(uintptr)b & (PtrSize-1); -		if(off) { -			b += PtrSize - off; -			n -= PtrSize - off; -		} -	 +  		vp = (void**)b;  		n /= PtrSize;  		for(i=0; i<n; i++) { -			obj = vp[i]; -			if(obj == nil) +			obj = (byte*)vp[i]; +			 +			// Words outside the arena cannot be pointers. +			if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used)  				continue; -			if(runtime·mheap.arena_start <= (byte*)obj && (byte*)obj < runtime·mheap.arena_end) { -				if(runtime·mlookup(obj, &obj, &size, nil, &refp)) { -					ref = *refp; -					switch(ref & ~RefFlags) { -					case RefNone: -						if(Debug > 1) -							runtime·printf("found at %p: ", &vp[i]); -						*refp = RefSome | (ref & RefFlags); -						if(!(ref & RefNoPointers)) { -							if(w >= ebl) -								runtime·throw("scanblock: garbage collection stack overflow"); -							w->obj = obj; -							w->size = size; -							w++; -						} -						break; -					} +			 +			// obj may be a pointer to a live object. +			// Try to find the beginning of the object. +			 +			// Round down to word boundary. +			obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + +			// Find bits for this word. +			off = (uintptr*)obj - (uintptr*)arena_start; +			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; +			shift = off % wordsPerBitmapWord; +			xbits = *bitp; +			bits = xbits >> shift; + +			// Pointing at the beginning of a block? +			if((bits & (bitAllocated|bitBlockBoundary)) != 0) +				goto found; + +			// Pointing just past the beginning? +			// Scan backward a little to find a block boundary. +			for(j=shift; j-->0; ) { +				if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { +					obj = (byte*)obj - (shift-j)*PtrSize; +					shift = j; +					bits = xbits>>shift; +					goto found;  				}  			} + +			// Otherwise consult span table to find beginning. +			// (Manually inlined copy of MHeap_LookupMaybe.) +			nlookup++; +			naddrlookup++; +			k = (uintptr)obj>>PageShift; +			x = k; +			if(sizeof(void*) == 8) +				x -= (uintptr)arena_start>>PageShift; +			s = runtime·mheap.map[x]; +			if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) +				continue; +			p =  (byte*)((uintptr)s->start<<PageShift); +			if(s->sizeclass == 0) { +				obj = p; +			} else { +				if((byte*)obj >= (byte*)s->limit) +					continue; +				size = runtime·class_to_size[s->sizeclass]; +				int32 i = ((byte*)obj - p)/size; +				obj = p+i*size; +			} + +			// Now that we know the object header, reload bits. +			off = (uintptr*)obj - (uintptr*)arena_start; +			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; +			shift = off % wordsPerBitmapWord; +			xbits = *bitp; +			bits = xbits >> shift; + +		found: +			// Now we have bits, bitp, and shift correct for +			// obj pointing at the base of the object. +			// If not allocated or already marked, done. +			if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0) +				continue; +			*bitp |= bitMarked<<shift; + +			// If object has no pointers, don't need to scan further. +			if((bits & bitNoPointers) != 0) +				continue; + +			// If buffer is full, get a new one. +			if(w >= ew) { +				wbuf = getempty(wbuf); +				bw = wbuf->w; +				w = bw; +				ew = bw + nelem(wbuf->w); +			} +			*w++ = obj; +		} +		 +		// Done scanning [b, b+n).  Prepare for the next iteration of +		// the loop by setting b and n to the parameters for the next block. + +		// Fetch b from the work buffers. +		if(w <= bw) { +			// Emptied our buffer: refill. +			wbuf = getfull(wbuf); +			if(wbuf == nil) +				break; +			bw = wbuf->w; +			ew = wbuf->w + nelem(wbuf->w); +			w = bw+wbuf->nw;  		} +		b = *--w; +	 +		// Figure out n = size of b.  Start by loading bits for b. +		off = (uintptr*)b - (uintptr*)arena_start; +		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; +		shift = off % wordsPerBitmapWord; +		xbits = *bitp; +		bits = xbits >> shift; +		 +		// Might be small; look for nearby block boundary. +		// A block boundary is marked by either bitBlockBoundary +		// or bitAllocated being set (see notes near their definition). +		enum { +			boundary = bitBlockBoundary|bitAllocated +		}; +		// Look for a block boundary both after and before b +		// in the same bitmap word. +		// +		// A block boundary j words after b is indicated by +		//	bits>>j & boundary +		// assuming shift+j < bitShift.  (If shift+j >= bitShift then +		// we'll be bleeding other bit types like bitMarked into our test.) +		// Instead of inserting the conditional shift+j < bitShift into the loop, +		// we can let j range from 1 to bitShift as long as we first +		// apply a mask to keep only the bits corresponding +		// to shift+j < bitShift aka j < bitShift-shift. +		bits &= (boundary<<(bitShift-shift)) - boundary; +		 +		// A block boundary j words before b is indicated by +		//	xbits>>(shift-j) & boundary +		// (assuming shift >= j).  There is no cleverness here +		// avoid the test, because when j gets too large the shift +		// turns negative, which is undefined in C.		 + +		for(j=1; j<bitShift; j++) { +			if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) { +				n = j*PtrSize; +				goto scan; +			} +		} +		 +		// Fall back to asking span about size class. +		// (Manually inlined copy of MHeap_Lookup.) +		nlookup++; +		nsizelookup++; +		x = (uintptr)b>>PageShift; +		if(sizeof(void*) == 8) +			x -= (uintptr)arena_start>>PageShift; +		s = runtime·mheap.map[x]; +		if(s->sizeclass == 0) +			n = s->npages<<PageShift; +		else +			n = runtime·class_to_size[s->sizeclass]; +	scan:; +	} +} + +static struct { +	Workbuf	*full; +	Workbuf	*empty; +	byte	*chunk; +	uintptr	nchunk; +} work; + +// Get an empty work buffer off the work.empty list, +// allocating new buffers as needed. +static Workbuf* +getempty(Workbuf *b) +{ +	if(b != nil) { +		b->nw = nelem(b->w); +		b->next = work.full; +		work.full = b; +	} +	b = work.empty; +	if(b != nil) { +		work.empty = b->next; +		return b; +	} +	 +	if(work.nchunk < sizeof *b) { +		work.nchunk = 1<<20; +		work.chunk = runtime·SysAlloc(work.nchunk); +	} +	b = (Workbuf*)work.chunk; +	work.chunk += sizeof *b; +	work.nchunk -= sizeof *b; +	return b; +} + +// Get a full work buffer off the work.full list, or return nil. +static Workbuf* +getfull(Workbuf *b) +{ +	if(b != nil) { +		b->nw = 0; +		b->next = work.empty; +		work.empty = b;  	} +	b = work.full; +	if(b != nil) +		work.full = b->next; +	return b;  } +// Scanstack calls scanblock on each of gp's stack segments.  static void  scanstack(G *gp)  { @@ -119,46 +343,26 @@ scanstack(G *gp)  	}  } +// Markfin calls scanblock on the blocks that have finalizers: +// the things pointed at cannot be freed until the finalizers have run.  static void  markfin(void *v)  {  	uintptr size; -	uint32 *refp;  	size = 0; -	refp = nil; -	if(!runtime·mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer)) +	if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))  		runtime·throw("mark - finalizer inconsistency"); -	 +  	// do not mark the finalizer block itself.  just mark the things it points at.  	scanblock(v, size);  } +// Mark   static void  mark(void)  {  	G *gp; -	uintptr blsize, nobj; - -	// Figure out how big an object stack we need. -	// Get a new one if we need more than we have -	// or we need significantly less than we have. -	nobj = mstats.heap_objects; -	if(nobj > ebl - bl || nobj < (ebl-bl)/4) { -		if(bl != nil) -			runtime·SysFree(bl, (byte*)ebl - (byte*)bl); -		 -		// While we're allocated a new object stack, -		// add 20% headroom and also round up to -		// the nearest page boundary, since mmap -		// will anyway. -		nobj = nobj * 12/10; -		blsize = nobj * sizeof *bl; -		blsize = (blsize + 4095) & ~4095; -		nobj = blsize / sizeof *bl; -		bl = runtime·SysAlloc(blsize); -		ebl = bl + nobj; -	}  	// mark data+bss.  	// skip runtime·mheap itself, which has no interesting pointers @@ -192,97 +396,85 @@ mark(void)  	runtime·walkfintab(markfin);  } -// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome +// Sweep frees or calls finalizers for blocks not marked in the mark phase. +// It clears the mark bits in preparation for the next GC round.  static void -sweepspan(MSpan *s) +sweep(void)  { -	int32 n, npages, size; +	MSpan *s; +	int32 cl, n, npages; +	uintptr size;  	byte *p; -	uint32 ref, *gcrefp, *gcrefep;  	MCache *c;  	Finalizer *f; -	p = (byte*)(s->start << PageShift); -	if(s->sizeclass == 0) { -		// Large block. -		ref = s->gcref0; -		switch(ref & ~(RefFlags^RefHasFinalizer)) { -		case RefNone: -			// Free large object. -			mstats.alloc -= s->npages<<PageShift; -			mstats.nfree++; -			runtime·memclr(p, s->npages<<PageShift); -			if(ref & RefProfiled) -				runtime·MProf_Free(p, s->npages<<PageShift); -			s->gcref0 = RefFree; -			runtime·MHeap_Free(&runtime·mheap, s, 1); -			break; -		case RefNone|RefHasFinalizer: -			f = runtime·getfinalizer(p, 1); -			if(f == nil) -				runtime·throw("finalizer inconsistency"); -			f->arg = p; -			f->next = finq; -			finq = f; -			ref &= ~RefHasFinalizer; -			// fall through -		case RefSome: -		case RefSome|RefHasFinalizer: -			s->gcref0 = RefNone | (ref&RefFlags); -			break; +	for(s = runtime·mheap.allspans; s != nil; s = s->allnext) { +		if(s->state != MSpanInUse) +			continue; + +		p = (byte*)(s->start << PageShift); +		cl = s->sizeclass; +		if(cl == 0) { +			size = s->npages<<PageShift; +			n = 1; +		} else { +			// Chunk full of small blocks. +			size = runtime·class_to_size[cl]; +			npages = runtime·class_to_allocnpages[cl]; +			n = (npages << PageShift) / size;  		} -		return; -	} +	 +		// sweep through n objects of given size starting at p. +		for(; n > 0; n--, p += size) { +			uintptr off, *bitp, shift, bits; -	// Chunk full of small blocks. -	runtime·MGetSizeClassInfo(s->sizeclass, &size, &npages, &n); -	gcrefp = s->gcref; -	gcrefep = s->gcref + n; -	for(; gcrefp < gcrefep; gcrefp++, p += size) { -		ref = *gcrefp; -		if(ref < RefNone)	// RefFree or RefStack -			continue; -		switch(ref & ~(RefFlags^RefHasFinalizer)) { -		case RefNone: -			// Free small object. -			if(ref & RefProfiled) +			off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; +			bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +			shift = off % wordsPerBitmapWord; +			bits = *bitp>>shift; + +			if((bits & bitAllocated) == 0) +				continue; + +			if((bits & bitMarked) != 0) { +				*bitp &= ~(bitMarked<<shift); +				continue; +			} + +			if((bits & bitSpecial) != 0) { +				// Special means it has a finalizer or is being profiled. +				f = runtime·getfinalizer(p, 1); +				if(f != nil) { +					f->arg = p; +					f->next = finq; +					finq = f; +					continue; +				}  				runtime·MProf_Free(p, size); -			*gcrefp = RefFree; -			c = m->mcache; -			if(size > sizeof(uintptr)) -				((uintptr*)p)[1] = 1;	// mark as "needs to be zeroed" +			} + +			// Mark freed; restore block boundary bit. +			*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + +			if(s->sizeclass == 0) { +				// Free large span. +				runtime·unmarkspan(p, 1<<PageShift); +				*(uintptr*)p = 1;	// needs zeroing +				runtime·MHeap_Free(&runtime·mheap, s, 1); +			} else { +				// Free small object. +				c = m->mcache; +				if(size > sizeof(uintptr)) +					((uintptr*)p)[1] = 1;	// mark as "needs to be zeroed" +				mstats.by_size[s->sizeclass].nfree++; +				runtime·MCache_Free(c, p, s->sizeclass, size); +			}  			mstats.alloc -= size;  			mstats.nfree++; -			mstats.by_size[s->sizeclass].nfree++; -			runtime·MCache_Free(c, p, s->sizeclass, size); -			break; -		case RefNone|RefHasFinalizer: -			f = runtime·getfinalizer(p, 1); -			if(f == nil) -				runtime·throw("finalizer inconsistency"); -			f->arg = p; -			f->next = finq; -			finq = f; -			ref &= ~RefHasFinalizer; -			// fall through -		case RefSome: -		case RefSome|RefHasFinalizer: -			*gcrefp = RefNone | (ref&RefFlags); -			break;  		}  	}  } -static void -sweep(void) -{ -	MSpan *s; - -	for(s = runtime·mheap.allspans; s != nil; s = s->allnext) -		if(s->state == MSpanInUse) -			sweepspan(s); -} -  // Semaphore, not Lock, so that the goroutine  // reschedules when there is contention rather  // than spinning. @@ -326,7 +518,8 @@ cachestats(void)  void  runtime·gc(int32 force)  { -	int64 t0, t1; +	int64 t0, t1, t2, t3; +	uint64 heap0, heap1, obj0, obj1;  	byte *p;  	Finalizer *fp; @@ -349,23 +542,41 @@ runtime·gc(int32 force)  			gcpercent = -1;  		else  			gcpercent = runtime·atoi(p); +		 +		p = runtime·getenv("GOGCTRACE"); +		if(p != nil) +			gctrace = runtime·atoi(p);  	}  	if(gcpercent < 0)  		return;  	runtime·semacquire(&gcsema); +	if(!force && mstats.heap_alloc < mstats.next_gc) { +		runtime·semrelease(&gcsema); +		return; +	} +  	t0 = runtime·nanotime(); +	nlookup = 0; +	nsizelookup = 0; +	naddrlookup = 0; +  	m->gcing = 1;  	runtime·stoptheworld();  	if(runtime·mheap.Lock.key != 0)  		runtime·throw("runtime·mheap locked during gc"); -	if(force || mstats.heap_alloc >= mstats.next_gc) { -		cachestats(); -		mark(); -		sweep(); -		stealcache(); -		mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; -	} + +	cachestats(); +	heap0 = mstats.heap_alloc; +	obj0 = mstats.nmalloc - mstats.nfree; + +	mark(); +	t1 = runtime·nanotime(); +	sweep(); +	t2 = runtime·nanotime(); +	stealcache(); + +	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;  	m->gcing = 0;  	m->locks++;	// disable gc during the mallocs in newproc @@ -381,18 +592,34 @@ runtime·gc(int32 force)  	}  	m->locks--; -	t1 = runtime·nanotime(); +	cachestats(); +	heap1 = mstats.heap_alloc; +	obj1 = mstats.nmalloc - mstats.nfree; + +	t3 = runtime·nanotime(); +	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; +	mstats.pause_total_ns += t3 - t0;  	mstats.numgc++; -	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0; -	mstats.pause_total_ns += t1 - t0;  	if(mstats.debuggc) -		runtime·printf("pause %D\n", t1-t0); +		runtime·printf("pause %D\n", t3-t0); +	 +	if(gctrace) { +		runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n", +			mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, +			heap0>>20, heap1>>20, obj0, obj1, +			mstats.nmalloc, mstats.nfree, +			nlookup, nsizelookup, naddrlookup); +	} +  	runtime·semrelease(&gcsema);  	runtime·starttheworld();  	// give the queued finalizers, if any, a chance to run  	if(fp != nil)  		runtime·gosched(); +	 +	if(gctrace > 1 && !force) +		runtime·gc(1);  }  static void @@ -430,3 +657,157 @@ runfinq(void)  		runtime·gc(1);	// trigger another gc to clean up the finalized objects, if possible  	}  } + +// mark the block at v of size n as allocated. +// If noptr is true, mark it as having no pointers. +void +runtime·markallocated(void *v, uintptr n, bool noptr) +{ +	uintptr *b, bits, off, shift; + +	if(0) +		runtime·printf("markallocated %p+%p\n", v, n); + +	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) +		runtime·throw("markallocated: bad pointer"); + +	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	shift = off % wordsPerBitmapWord; + +	bits = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift); +	if(noptr) +		bits |= bitNoPointers<<shift; +	*b = bits; +} + +// mark the block at v of size n as freed. +void +runtime·markfreed(void *v, uintptr n) +{ +	uintptr *b, off, shift; + +	if(0) +		runtime·printf("markallocated %p+%p\n", v, n); + +	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) +		runtime·throw("markallocated: bad pointer"); + +	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	shift = off % wordsPerBitmapWord; + +	*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); +} + +// check that the block at v of size n is marked freed. +void +runtime·checkfreed(void *v, uintptr n) +{ +	uintptr *b, bits, off, shift; + +	if(!runtime·checking) +		return; + +	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) +		return;	// not allocated, so okay + +	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	shift = off % wordsPerBitmapWord; + +	bits = *b>>shift; +	if((bits & bitAllocated) != 0) { +		runtime·printf("checkfreed %p+%p: off=%p have=%p\n", +			v, n, off, bits & bitMask); +		runtime·throw("checkfreed: not freed"); +	} +} + +// mark the span of memory at v as having n blocks of the given size. +// if leftover is true, there is left over space at the end of the span. +void +runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) +{ +	uintptr *b, off, shift; +	byte *p; + +	if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) +		runtime·throw("markspan: bad pointer"); + +	p = v; +	if(leftover)	// mark a boundary just past end of last block too +		n++; +	for(; n-- > 0; p += size) { +		off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;  // word offset +		b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +		shift = off % wordsPerBitmapWord; +		*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); +	} +} + +// unmark the span of memory at v of length n bytes. +void +runtime·unmarkspan(void *v, uintptr n) +{ +	uintptr *p, *b, off; + +	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) +		runtime·throw("markspan: bad pointer"); + +	p = v; +	off = p - (uintptr*)runtime·mheap.arena_start;  // word offset +	if(off % wordsPerBitmapWord != 0) +		runtime·throw("markspan: unaligned pointer"); +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	n /= PtrSize; +	if(n%wordsPerBitmapWord != 0) +		runtime·throw("unmarkspan: unaligned length"); +	n /= wordsPerBitmapWord; +	while(n-- > 0) +		*b-- = 0; +} + +bool +runtime·blockspecial(void *v) +{ +	uintptr *b, off, shift; + +	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	shift = off % wordsPerBitmapWord; + +	return (*b & (bitSpecial<<shift)) != 0; +} + +void +runtime·setblockspecial(void *v) +{ +	uintptr *b, off, shift; + +	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; +	b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; +	shift = off % wordsPerBitmapWord; + +	*b |= bitSpecial<<shift; +} +  +void +runtime·MHeap_MapBits(MHeap *h) +{ +	// Caller has added extra mappings to the arena. +	// Add extra mappings of bitmap words as needed. +	// We allocate extra bitmap pieces in chunks of bitmapChunk. +	enum { +		bitmapChunk = 8192 +	}; +	uintptr n; +	 +	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; +	n = (n+bitmapChunk-1) & ~(bitmapChunk-1); +	if(h->bitmap_mapped >= n) +		return; + +	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped); +	h->bitmap_mapped = n; +} diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index 0c9ac0a09..8061b7cf8 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c @@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage)  	// Allocate a multiple of 64kB (16 pages).  	npage = (npage+15)&~15;  	ask = npage<<PageShift; -	if(ask < HeapAllocChunk) +	if(ask > h->arena_end - h->arena_used) +		return false; +	if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used)  		ask = HeapAllocChunk;  	v = runtime·MHeap_SysAlloc(h, ask); @@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage)  	}  	mstats.heap_sys += ask; -	if((byte*)v < h->arena_start || h->arena_start == nil) -		h->arena_start = v; -	if((byte*)v+ask > h->arena_end) -		h->arena_end = (byte*)v+ask; -  	// Create a fake "in use" span and free it, so that the  	// right coalescing happens.  	s = runtime·FixAlloc_Alloc(&h->spanalloc); @@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list)  void  runtime·MSpanList_Insert(MSpan *list, MSpan *span)  { -	if(span->next != nil || span->prev != nil) +	if(span->next != nil || span->prev != nil) { +		runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);  		runtime·throw("MSpanList_Insert"); +	}  	span->next = list->next;  	span->prev = list;  	span->next->prev = span;  	span->prev->next = span;  } + + diff --git a/src/pkg/runtime/mkasmh.sh b/src/pkg/runtime/mkasmh.sh index 3ed5f74c9..91d1bbe5d 100755 --- a/src/pkg/runtime/mkasmh.sh +++ b/src/pkg/runtime/mkasmh.sh @@ -25,9 +25,9 @@ case "$GOARCH" in  		echo '#define	m(r)	4(r)'  		;;  	plan9) -		echo '#define	get_tls(r)' -		echo '#define	g(r)	0xdfffefc0' -		echo '#define	m(r)	0xdfffefc4' +		echo '#define	get_tls(r)	MOVL _tos(SB), r ' +		echo '#define	g(r)	-8(r)' +		echo '#define	m(r)	-4(r)'  		;;  	linux)  		# On Linux systems, what we call 0(GS) and 4(GS) for g and m @@ -84,6 +84,7 @@ esac  echo  awk ' +{ gsub(/\r/, ""); }  /^aggr G$/ { aggr="g" }  /^aggr M$/ { aggr = "m" }  /^aggr Gobuf$/ { aggr = "gobuf" } diff --git a/src/pkg/runtime/mkversion.c b/src/pkg/runtime/mkversion.c index 9790d3f09..56afa1892 100644 --- a/src/pkg/runtime/mkversion.c +++ b/src/pkg/runtime/mkversion.c @@ -5,13 +5,11 @@ char *template =  	"// generated by mkversion.c; do not edit.\n"  	"package runtime\n"  	"const defaultGoroot = \"%s\"\n" -	"const theVersion = \"%s\"\n" -	"const theGoarch = \"%s\"\n" -	"const theGoos = \"%s\"\n"; +	"const theVersion = \"%s\"\n";  void  main(void)  { -	print(template, getgoroot(), getgoversion(), getgoarch(), getgoos()); +	print(template, getgoroot(), getgoversion());  	exits(0);  } diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc index f4581e98d..aae3d183f 100644 --- a/src/pkg/runtime/mprof.goc +++ b/src/pkg/runtime/mprof.goc @@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk)  		   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)  			return b; -	b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1); +	b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);  	bucketmem += sizeof *b + nstk*sizeof stk[0];  	runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);  	b->hash = h; @@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b)  		if(ah->addr == (addr>>20))  			goto found; -	ah = runtime·mallocgc(sizeof *ah, RefNoProfiling, 0, 1); +	ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);  	addrmem += sizeof *ah;  	ah->next = addrhash[h];  	ah->addr = addr>>20; @@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b)  found:  	if((e = addrfree) == nil) { -		e = runtime·mallocgc(64*sizeof *e, RefNoProfiling, 0, 0); +		e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);  		addrmem += 64*sizeof *e;  		for(i=0; i+1<64; i++)  			e[i].next = &e[i+1]; diff --git a/src/pkg/runtime/msize.c b/src/pkg/runtime/msize.c index ec85eb373..770ef38ce 100644 --- a/src/pkg/runtime/msize.c +++ b/src/pkg/runtime/msize.c @@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size)  void  runtime·InitSizes(void)  { -	int32 align, sizeclass, size, osize, nextsize, n; +	int32 align, sizeclass, size, nextsize, n;  	uint32 i;  	uintptr allocsize, npages; @@ -81,8 +81,7 @@ runtime·InitSizes(void)  		// the leftover is less than 1/8 of the total,  		// so wasted space is at most 12.5%.  		allocsize = PageSize; -		osize = size + RefcountOverhead; -		while(allocsize%osize > (allocsize/8)) +		while(allocsize%size > allocsize/8)  			allocsize += PageSize;  		npages = allocsize >> PageShift; @@ -93,7 +92,7 @@ runtime·InitSizes(void)  		// different sizes.  		if(sizeclass > 1  		&& npages == runtime·class_to_allocnpages[sizeclass-1] -		&& allocsize/osize == allocsize/(runtime·class_to_size[sizeclass-1]+RefcountOverhead)) { +		&& allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {  			runtime·class_to_size[sizeclass-1] = size;  			continue;  		} diff --git a/src/pkg/runtime/plan9/386/defs.h b/src/pkg/runtime/plan9/386/defs.h index 5df757613..58fd9d94d 100644 --- a/src/pkg/runtime/plan9/386/defs.h +++ b/src/pkg/runtime/plan9/386/defs.h @@ -1 +1,2 @@  // nothing to see here +#define tos_pid 48 diff --git a/src/pkg/runtime/plan9/386/sys.s b/src/pkg/runtime/plan9/386/sys.s index 867b8940f..f760b782f 100644 --- a/src/pkg/runtime/plan9/386/sys.s +++ b/src/pkg/runtime/plan9/386/sys.s @@ -58,9 +58,10 @@ TEXT runtime·rfork(SB),7,$0  	MOVL	BX, m(AX)  	// Initialize AX from _tos->pid -	MOVL	0xdfffeff8, AX +	MOVL	_tos(SB), AX +	MOVL	tos_pid(AX), AX  	MOVL	AX, m_procid(BX)	// save pid as m->procid - +	  	CALL	runtime·stackcheck(SB)	// smashes AX, CX  	MOVL	0(DX), DX	// paranoia; check they are not nil diff --git a/src/pkg/runtime/plan9/mem.c b/src/pkg/runtime/plan9/mem.c index 651e6728e..b840de984 100644 --- a/src/pkg/runtime/plan9/mem.c +++ b/src/pkg/runtime/plan9/mem.c @@ -10,40 +10,47 @@ static byte *bloc = { end };  enum  { -	Round = 7 +	Round = 4095  };  void* -runtime·SysAlloc(uintptr ask) +runtime·SysAlloc(uintptr nbytes)  {  	uintptr bl;  	// Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c  	bl = ((uintptr)bloc + Round) & ~Round; -	if(runtime·brk_((void*)(bl + ask)) < 0) +	if(runtime·brk_((void*)(bl + nbytes)) < 0)  		return (void*)-1; -	bloc = (byte*)bl + ask; +	bloc = (byte*)bl + nbytes;  	return (void*)bl;  }  void -runtime·SysFree(void *v, uintptr n) +runtime·SysFree(void *v, uintptr nbytes)  {  	// from tiny/mem.c  	// Push pointer back if this is a free  	// of the most recent SysAlloc. -	n += (n + Round) & ~Round; -	if(bloc == (byte*)v+n) -		bloc -= n;	 +	nbytes += (nbytes + Round) & ~Round; +	if(bloc == (byte*)v+nbytes) +		bloc -= nbytes;	  }  void -runtime·SysUnused(void *v, uintptr n) +runtime·SysUnused(void *v, uintptr nbytes)  { -	USED(v, n); +	USED(v, nbytes);  }  void -runtime·SysMemInit(void) +runtime·SysMap(void *v, uintptr nbytes)  { +	USED(v, nbytes); +} + +void* +runtime·SysReserve(void *v, uintptr nbytes) +{ +	return runtime·SysAlloc(nbytes);  } diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 998cbc7bc..26c1f13a4 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -752,8 +752,8 @@ runtime·newstack(void)  		free = framesize;  	} -//printf("newstack frame=%d args=%d morepc=%p morefp=%p gobuf=%p, %p newstk=%p\n", -//frame, args, m->morepc, m->morefp, g->sched.pc, g->sched.sp, stk); +//runtime·printf("newstack framesize=%d argsize=%d morepc=%p moreargp=%p gobuf=%p, %p top=%p old=%p\n", +//framesize, argsize, m->morepc, m->moreargp, m->morebuf.pc, m->morebuf.sp, top, g1->stackbase);  	top->stackbase = g1->stackbase;  	top->stackguard = g1->stackguard; @@ -761,7 +761,7 @@ runtime·newstack(void)  	top->argp = m->moreargp;  	top->argsize = argsize;  	top->free = free; -	 +  	// copy flag from panic  	top->panic = g1->ispanic;  	g1->ispanic = false; diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c index 284b1e458..e3a20d48a 100644 --- a/src/pkg/runtime/runtime.c +++ b/src/pkg/runtime/runtime.c @@ -528,14 +528,22 @@ void  runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool retbool)  {  	Func *f; +	uintptr pc; -	if(runtime·callers(1+skip, &retpc, 1) == 0 || (f = runtime·findfunc(retpc-1)) == nil) { +	if(runtime·callers(1+skip, &retpc, 1) == 0) {  		retfile = runtime·emptystring;  		retline = 0;  		retbool = false; +	} else if((f = runtime·findfunc(retpc)) == nil) { +		retfile = runtime·emptystring; +		retline = 0; +		retbool = true;  // have retpc at least  	} else {  		retfile = f->src; -		retline = runtime·funcline(f, retpc-1); +		pc = retpc; +		if(pc > f->entry) +			pc--; +		retline = runtime·funcline(f, pc);  		retbool = true;  	}  	FLUSH(&retfile); diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 2c19f851e..cea07e4a7 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -419,7 +419,7 @@ void	runtime·signalstack(byte*, int32);  G*	runtime·malg(int32);  void	runtime·minit(void);  Func*	runtime·findfunc(uintptr); -int32	runtime·funcline(Func*, uint64); +int32	runtime·funcline(Func*, uintptr);  void*	runtime·stackalloc(uint32);  void	runtime·stackfree(void*, uintptr);  MCache*	runtime·allocmcache(void); @@ -443,7 +443,7 @@ void	runtime·breakpoint(void);  void	runtime·gosched(void);  void	runtime·goexit(void);  void	runtime·runcgo(void (*fn)(void*), void*); -uintptr	runtime·runcgocallback(G*, void*, void (*fn)()); +void	runtime·runcgocallback(G*, void*, void (*fn)());  void	runtime·entersyscall(void);  void	runtime·exitsyscall(void);  void	runtime·startcgocallback(G*); diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index 051075479..1fee923e4 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret)  	ret->cap = cap;  	if((t->elem->kind&KindNoPointers)) -		ret->array = runtime·mallocgc(size, RefNoPointers, 1, 1); +		ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1);  	else  		ret->array = runtime·mal(size);  } diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc index 916559eb2..b72aa937c 100644 --- a/src/pkg/runtime/string.goc +++ b/src/pkg/runtime/string.goc @@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) {  }  func stringtoslicebyte(s String) (b Slice) { -	b.array = runtime·mallocgc(s.len, RefNoPointers, 1, 1); +	b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1);  	b.len = s.len;  	b.cap = s.len;  	runtime·mcpy(b.array, s.str, s.len); @@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) {  		n++;  	} -	b.array = runtime·mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1); +	b.array = runtime·mallocgc(n*sizeof(r[0]), FlagNoPointers, 1, 1);  	b.len = n;  	b.cap = n;  	p = s.str; diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index b2cccd3cf..6f0eea0e7 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -258,28 +258,49 @@ splitpcln(void)  	ef = func + nfunc;  	pc = func[0].entry;	// text base  	f->pcln.array = p; -	f->pc0 = pc - pcquant; +	f->pc0 = pc;  	line = 0; -	for(; p < ep; p++) { -		if(f < ef && pc > (f+1)->entry) { +	for(;;) { +		while(p < ep && *p > 128) +			pc += pcquant * (*p++ - 128); +		// runtime·printf("pc<%p targetpc=%p line=%d\n", pc, targetpc, line); +		if(*p == 0) { +			if(p+5 > ep) +				break; +			// 4 byte add to line +			line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; +			p += 5; +		} else if(*p <= 64) +			line += *p++; +		else +			line -= *p++ - 64; +		 +		// pc, line now match. +		// Because the state machine begins at pc==entry and line==0, +		// it can happen - just at the beginning! - that the update may +		// have updated line but left pc alone, to tell us the true line +		// number for pc==entry.  In that case, update f->ln0. +		// Having the correct initial line number is important for choosing +		// the correct file in dosrcline above. +		if(f == func && pc == f->pc0) { +			f->pcln.array = p; +			f->pc0 = pc + pcquant; +			f->ln0 = line; +		} + +		if(f < ef && pc >= (f+1)->entry) {  			f->pcln.len = p - f->pcln.array;  			f->pcln.cap = f->pcln.len;  			f++;  			f->pcln.array = p; -			f->pc0 = pc; +			// pc0 and ln0 are the starting values for +			// the loop over f->pcln, so pc must be  +			// adjusted by the same pcquant update +			// that we're going to do as we continue our loop. +			f->pc0 = pc + pcquant;  			f->ln0 = line;  		} -		if(*p == 0) { -			// 4 byte add to line -			line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; -			p += 4; -		} else if(*p <= 64) { -			line += *p; -		} else if(*p <= 128) { -			line -= *p - 64; -		} else { -			pc += pcquant*(*p - 129); -		} +  		pc += pcquant;  	}  	if(f < ef) { @@ -293,13 +314,17 @@ splitpcln(void)  // (Source file is f->src.)  // NOTE(rsc): If you edit this function, also edit extern.go:/FileLine  int32 -runtime·funcline(Func *f, uint64 targetpc) +runtime·funcline(Func *f, uintptr targetpc)  {  	byte *p, *ep;  	uintptr pc;  	int32 line;  	int32 pcquant; +	enum { +		debug = 0 +	}; +	  	switch(thechar) {  	case '5':  		pcquant = 4; @@ -313,17 +338,41 @@ runtime·funcline(Func *f, uint64 targetpc)  	ep = p + f->pcln.len;  	pc = f->pc0;  	line = f->ln0; -	for(; p < ep && pc <= targetpc; p++) { +	if(debug && !runtime·panicking) +		runtime·printf("funcline start pc=%p targetpc=%p line=%d tab=%p+%d\n", +			pc, targetpc, line, p, (int32)f->pcln.len); +	for(;;) { +		// Table is a sequence of updates. + +		// Each update says first how to adjust the pc, +		// in possibly multiple instructions... +		while(p < ep && *p > 128) +			pc += pcquant * (*p++ - 128); + +		if(debug && !runtime·panicking) +			runtime·printf("pc<%p targetpc=%p line=%d\n", pc, targetpc, line); +		 +		// If the pc has advanced too far or we're out of data, +		// stop and the last known line number. +		if(pc > targetpc || p >= ep) +			break; + +		// ... and then how to adjust the line number, +		// in a single instruction.  		if(*p == 0) { +			if(p+5 > ep) +				break;  			line += (p[1]<<24) | (p[2]<<16) | (p[3]<<8) | p[4]; -			p += 4; -		} else if(*p <= 64) { -			line += *p; -		} else if(*p <= 128) { -			line -= *p - 64; -		} else { -			pc += pcquant*(*p - 129); -		} +			p += 5; +		} else if(*p <= 64) +			line += *p++; +		else +			line -= *p++ - 64; +		// Now pc, line pair is consistent. +		if(debug && !runtime·panicking) +			runtime·printf("pc=%p targetpc=%p line=%d\n", pc, targetpc, line); + +		// PC increments implicitly on each iteration.  		pc += pcquant;  	}  	return line; diff --git a/src/pkg/runtime/windows/386/defs.h b/src/pkg/runtime/windows/386/defs.h index a2a882103..49fc19504 100644 --- a/src/pkg/runtime/windows/386/defs.h +++ b/src/pkg/runtime/windows/386/defs.h @@ -10,6 +10,9 @@ enum {  	PROT_EXEC = 0x4,  	MAP_ANON = 0x1,  	MAP_PRIVATE = 0x2, +	SIGINT = 0x2, +	CTRL_C_EVENT = 0, +	CTRL_BREAK_EVENT = 0x1,  	EXCEPTION_ACCESS_VIOLATION = 0xc0000005,  	EXCEPTION_BREAKPOINT = 0x80000003,  	EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d, diff --git a/src/pkg/runtime/windows/386/signal.c b/src/pkg/runtime/windows/386/signal.c index 69178cdd0..903636910 100644 --- a/src/pkg/runtime/windows/386/signal.c +++ b/src/pkg/runtime/windows/386/signal.c @@ -27,12 +27,7 @@ runtime·dumpregs(Context *r)  void  runtime·initsig(int32)  { -} - -String -runtime·signame(int32) -{ -	return runtime·emptystring; +	runtime·siginit();  }  uint32 diff --git a/src/pkg/runtime/windows/386/sys.s b/src/pkg/runtime/windows/386/sys.s index d1a8a49a9..bca48febe 100644 --- a/src/pkg/runtime/windows/386/sys.s +++ b/src/pkg/runtime/windows/386/sys.s @@ -99,6 +99,45 @@ TEXT runtime·sigtramp1(SB),0,$16-28  sigdone:  	RET +// Windows runs the ctrl handler in a new thread. +TEXT runtime·ctrlhandler(SB),7,$0 +	PUSHL	BP +	MOVL	SP, BP +	PUSHL	BX +	PUSHL	SI +	PUSHL	DI +	PUSHL	0x2c(FS) +	MOVL	SP, BX + +	// setup dummy m, g +	SUBL	$(m_sehframe+4), SP	// at least space for m_sehframe +	LEAL	m_tls(SP), CX +	MOVL	CX, 0x2c(FS) +	MOVL	SP, m(CX) +	MOVL	SP, DX +	SUBL	$8, SP			// space for g_stack{guard,base} +	MOVL	SP, g(CX) +	MOVL	SP, m_g0(DX) +	LEAL	-4096(SP), CX +	MOVL	CX, g_stackguard(SP) +	MOVL	BX, g_stackbase(SP) + +	PUSHL	8(BP) +	CALL	runtime·ctrlhandler1(SB) +	POPL	CX + +	get_tls(CX) +	MOVL	g(CX), CX +	MOVL	g_stackbase(CX), SP +	POPL	0x2c(FS) +	POPL	DI +	POPL	SI +	POPL	BX +	POPL	BP +	MOVL	0(SP), CX +	ADDL	$8, SP +	JMP	CX +  // Called from dynamic function created by ../thread.c compilecallback,  // running on Windows stack (not Go stack).  // BX, BP, SI, DI registers and DF flag are preserved @@ -107,7 +146,11 @@ sigdone:  // DX = total size of arguments  //  TEXT runtime·callbackasm+0(SB),7,$0 +	// preserve whatever's at the memory location that +	// the callback will use to store the return value  	LEAL	8(SP), CX +	PUSHL	0(CX)(DX*1) +	ADDL	$4, DX			// extend argsize by size of return value  	// save registers as required for windows callback  	PUSHL	0(FS) @@ -129,7 +172,7 @@ TEXT runtime·callbackasm+0(SB),7,$0  	CALL	runtime·cgocallback(SB)  	// restore registers as required for windows callback -	POPL	CX +	POPL	AX  	POPL	CX  	POPL	DX  	POPL	BX @@ -139,6 +182,8 @@ TEXT runtime·callbackasm+0(SB),7,$0  	POPL	0(FS)  	CLD +	MOVL	-4(CX)(DX*1), AX +	POPL	-4(CX)(DX*1)  	RET  // void tstart(M *newm); diff --git a/src/pkg/runtime/windows/defs.c b/src/pkg/runtime/windows/defs.c index 5aac03c81..3b2824940 100644 --- a/src/pkg/runtime/windows/defs.c +++ b/src/pkg/runtime/windows/defs.c @@ -2,9 +2,11 @@  // Use of this source code is governed by a BSD-style  // license that can be found in the LICENSE file. +#include <signal.h>  #include <stdarg.h>  #include <windef.h>  #include <winbase.h> +#include <wincon.h>  enum {  	$PROT_NONE = 0, @@ -15,6 +17,10 @@ enum {  	$MAP_ANON = 1,  	$MAP_PRIVATE = 2, +	$SIGINT = SIGINT, +	$CTRL_C_EVENT = CTRL_C_EVENT, +	$CTRL_BREAK_EVENT = CTRL_BREAK_EVENT, +  	$EXCEPTION_ACCESS_VIOLATION = STATUS_ACCESS_VIOLATION,  	$EXCEPTION_BREAKPOINT = STATUS_BREAKPOINT,  	$EXCEPTION_FLT_DENORMAL_OPERAND = STATUS_FLOAT_DENORMAL_OPERAND, diff --git a/src/pkg/runtime/windows/mem.c b/src/pkg/runtime/windows/mem.c index 19d11ce8d..54d77da37 100644 --- a/src/pkg/runtime/windows/mem.c +++ b/src/pkg/runtime/windows/mem.c @@ -48,7 +48,14 @@ runtime·SysFree(void *v, uintptr n)  void*  runtime·SysReserve(void *v, uintptr n)  { -	return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, 0); +	// v is just a hint. +	// First try at v. +	v = runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE); +	if(v != nil) +		return v; +	 +	// Next let the kernel choose the address. +	return runtime·stdcall(runtime·VirtualAlloc, 4, nil, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE);  }  void diff --git a/src/pkg/runtime/windows/os.h b/src/pkg/runtime/windows/os.h index 391eace5a..77881e86e 100644 --- a/src/pkg/runtime/windows/os.h +++ b/src/pkg/runtime/windows/os.h @@ -20,6 +20,7 @@ uint32 runtime·tstart_stdcall(M *newm);  uint32 runtime·issigpanic(uint32);  void runtime·sigpanic(void); +uint32 runtime·ctrlhandler(uint32 type);  // Windows dll function to go callback entry.  byte *runtime·compilecallback(Eface fn, bool cleanstack); diff --git a/src/pkg/runtime/windows/thread.c b/src/pkg/runtime/windows/thread.c index 278a5da69..aedd24200 100644 --- a/src/pkg/runtime/windows/thread.c +++ b/src/pkg/runtime/windows/thread.c @@ -18,6 +18,7 @@  #pragma dynimport runtime·LoadLibraryEx LoadLibraryExA "kernel32.dll"  #pragma dynimport runtime·QueryPerformanceCounter QueryPerformanceCounter "kernel32.dll"  #pragma dynimport runtime·QueryPerformanceFrequency QueryPerformanceFrequency "kernel32.dll" +#pragma dynimport runtime·SetConsoleCtrlHandler SetConsoleCtrlHandler "kernel32.dll"  #pragma dynimport runtime·SetEvent SetEvent "kernel32.dll"  #pragma dynimport runtime·WaitForSingleObject WaitForSingleObject "kernel32.dll"  #pragma dynimport runtime·WriteFile WriteFile "kernel32.dll" @@ -33,6 +34,7 @@ extern void *runtime·GetStdHandle;  extern void *runtime·LoadLibraryEx;  extern void *runtime·QueryPerformanceCounter;  extern void *runtime·QueryPerformanceFrequency; +extern void *runtime·SetConsoleCtrlHandler;  extern void *runtime·SetEvent;  extern void *runtime·WaitForSingleObject;  extern void *runtime·WriteFile; @@ -43,6 +45,7 @@ void  runtime·osinit(void)  {  	runtime·stdcall(runtime·QueryPerformanceFrequency, 1, &timerfreq); +	runtime·stdcall(runtime·SetConsoleCtrlHandler, 2, runtime·ctrlhandler, 1);  }  void @@ -161,6 +164,7 @@ runtime·destroylock(Lock *l)  void  runtime·noteclear(Note *n)  { +	n->lock.key = 0;	// memset(n, 0, sizeof *n)  	eventlock(&n->lock);  } @@ -180,11 +184,17 @@ runtime·notesleep(Note *n)  void  runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void))  { +	void *thandle; +  	USED(stk);  	USED(g);	// assuming g = m->g0  	USED(fn);	// assuming fn = mstart -	runtime·stdcall(runtime·CreateThread, 6, 0, 0, runtime·tstart_stdcall, m, 0, 0); +	thandle = runtime·stdcall(runtime·CreateThread, 6, 0, 0, runtime·tstart_stdcall, m, 0, 0); +	if(thandle == 0) { +		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror()); +		runtime·throw("runtime.newosproc"); +	}  }  // Called to initialize a new m (including the bootstrap m). @@ -279,6 +289,41 @@ runtime·sigpanic(void)  	runtime·throw("fault");  } +String +runtime·signame(int32 sig) +{ +	int8 *s; + +	switch(sig) { +	case SIGINT: +		s = "SIGINT: interrupt"; +		break; +	default: +		return runtime·emptystring; +	} +	return runtime·gostringnocopy((byte*)s); +} + +uint32 +runtime·ctrlhandler1(uint32 type) +{ +	int32 s; + +	switch(type) { +	case CTRL_C_EVENT: +	case CTRL_BREAK_EVENT: +		s = SIGINT; +		break; +	default: +		return 0; +	} + +	if(runtime·sigsend(s)) +		return 1; +	runtime·exit(2);	// SIGINT, SIGTERM, etc +	return 0; +} +  // Call back from windows dll into go.  byte *  runtime·compilecallback(Eface fn, bool cleanstack) | 
