diff options
| author | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 | 
|---|---|---|
| committer | Rob Pike <r@golang.org> | 2009-06-09 09:53:44 -0700 | 
| commit | 7249ea4df2b4f12a4e7ed446f270cea87e4ffd34 (patch) | |
| tree | 7032a11d0cac2ae4d3e90f7a189b575b5a50f848 /src/pkg/runtime/proc.c | |
| parent | acf6ef7a82b3fe61516a1bac4563706552bdf078 (diff) | |
| download | golang-7249ea4df2b4f12a4e7ed446f270cea87e4ffd34.tar.gz | |
mv src/lib to src/pkg
tests: all.bash passes, gobuild still works, godoc still works.
R=rsc
OCL=30096
CL=30102
Diffstat (limited to 'src/pkg/runtime/proc.c')
| -rw-r--r-- | src/pkg/runtime/proc.c | 858 | 
1 files changed, 858 insertions, 0 deletions
| diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c new file mode 100644 index 000000000..1d065e6d2 --- /dev/null +++ b/src/pkg/runtime/proc.c @@ -0,0 +1,858 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "malloc.h" + +typedef struct Sched Sched; + +M	m0; +G	g0;	// idle goroutine for m0 + +static	int32	debug	= 0; +static	Lock	debuglock; + +// Go scheduler +// +// The go scheduler's job is to match ready-to-run goroutines (`g's) +// with waiting-for-work schedulers (`m's).  If there are ready gs +// and no waiting ms, ready() will start a new m running in a new +// OS thread, so that all ready gs can run simultaneously, up to a limit. +// For now, ms never go away. +// +// The default maximum number of ms is one: go runs single-threaded. +// This is because some locking details have to be worked ou +// (select in particular is not locked properly) and because the low-level +// code hasn't been written yet for OS X.  Setting the environmen +// variable $gomaxprocs changes sched.mmax for now. +// +// Even a program that can run without deadlock in a single process +// might use more ms if given the chance.  For example, the prime +// sieve will use as many ms as there are primes (up to sched.mmax), +// allowing different stages of the pipeline to execute in parallel. +// We could revisit this choice, only kicking off new ms for blocking +// system calls, but that would limit the amount of parallel computation +// that go would try to do. +// +// In general, one could imagine all sorts of refinements to the +// scheduler, but the goal now is just to get something working on +// Linux and OS X. + +struct Sched { +	Lock; + +	G *gfree;	// available gs (status == Gdead) + +	G *ghead;	// gs waiting to run +	G *gtail; +	int32 gwait;	// number of gs waiting to run +	int32 gcount;	// number of gs that are alive + +	M *mhead;	// ms waiting for work +	int32 mwait;	// number of ms waiting for work +	int32 mcount;	// number of ms that have been created +	int32 mcpu;	// number of ms executing on cpu +	int32 mcpumax;	// max number of ms allowed on cpu +	int32 gomaxprocs; +	int32 msyscall;	// number of ms in system calls + +	int32 predawn;	// running initialization, don't run new gs. + +	Note	stopped;	// one g can wait here for ms to stop +	int32 waitstop;	// after setting this flag +}; + +Sched sched; + +// Scheduling helpers.  Sched must be locked. +static void gput(G*);	// put/get on ghead/gtail +static G* gget(void); +static void mput(M*);	// put/get on mhead +static M* mget(void); +static void gfput(G*);	// put/get on gfree +static G* gfget(void); +static void matchmg(void);	// match ms to gs +static void readylocked(G*);	// ready, but sched is locked + +// Scheduler loop. +static void scheduler(void); + +// The bootstrap sequence is: +// +//	call osinit +//	call schedinit +//	make & queue new G +//	call mstart +// +// The new G does: +// +//	call main·init_function +//	call initdone +//	call main·main +void +schedinit(void) +{ +	int32 n; +	byte *p; + +	mallocinit(); +	goargs(); + +	// Allocate internal symbol table representation now, +	// so that we don't need to call malloc when we crash. +	findfunc(0); + +	sched.gomaxprocs = 1; +	p = getenv("GOMAXPROCS"); +	if(p != nil && (n = atoi(p)) != 0) +		sched.gomaxprocs = n; +	sched.mcpumax = sched.gomaxprocs; +	sched.mcount = 1; +	sched.predawn = 1; +} + +// Called after main·init_function; main·main will be called on return. +void +initdone(void) +{ +	// Let's go. +	sched.predawn = 0; +	mstats.enablegc = 1; + +	// If main·init_function started other goroutines, +	// kick off new ms to handle them, like ready +	// would have, had it not been pre-dawn. +	lock(&sched); +	matchmg(); +	unlock(&sched); +} + +void +goexit(void) +{ +	if(debug > 1){ +		lock(&debuglock); +		printf("goexit goid=%d\n", g->goid); +		unlock(&debuglock); +	} +	g->status = Gmoribund; +	gosched(); +} + +void +tracebackothers(G *me) +{ +	G *g; + +	for(g = allg; g != nil; g = g->alllink) { +		if(g == me || g->status == Gdead) +			continue; +		printf("\ngoroutine %d:\n", g->goid); +		traceback(g->sched.PC, g->sched.SP+sizeof(uintptr), g);  // gogo adjusts SP by one word +	} +} + +// Put on `g' queue.  Sched must be locked. +static void +gput(G *g) +{ +	g->schedlink = nil; +	if(sched.ghead == nil) +		sched.ghead = g; +	else +		sched.gtail->schedlink = g; +	sched.gtail = g; +	sched.gwait++; +} + +// Get from `g' queue.  Sched must be locked. +static G* +gget(void) +{ +	G *g; + +	g = sched.ghead; +	if(g){ +		sched.ghead = g->schedlink; +		if(sched.ghead == nil) +			sched.gtail = nil; +		sched.gwait--; +	} +	return g; +} + +// Put on `m' list.  Sched must be locked. +static void +mput(M *m) +{ +	m->schedlink = sched.mhead; +	sched.mhead = m; +	sched.mwait++; +} + +// Get from `m' list.  Sched must be locked. +static M* +mget(void) +{ +	M *m; + +	m = sched.mhead; +	if(m){ +		sched.mhead = m->schedlink; +		sched.mwait--; +	} +	return m; +} + +// Put on gfree list.  Sched must be locked. +static void +gfput(G *g) +{ +	g->schedlink = sched.gfree; +	sched.gfree = g; +} + +// Get from gfree list.  Sched must be locked. +static G* +gfget(void) +{ +	G *g; + +	g = sched.gfree; +	if(g) +		sched.gfree = g->schedlink; +	return g; +} + +// Mark g ready to run. +void +ready(G *g) +{ +	lock(&sched); +	readylocked(g); +	unlock(&sched); +} + +// Mark g ready to run.  Sched is already locked. +// G might be running already and about to stop. +// The sched lock protects g->status from changing underfoot. +static void +readylocked(G *g) +{ +	if(g->m){ +		// Running on another machine. +		// Ready it when it stops. +		g->readyonstop = 1; +		return; +	} + +	// Mark runnable. +	if(g->status == Grunnable || g->status == Grunning) +		throw("bad g->status in ready"); +	g->status = Grunnable; + +	gput(g); +	if(!sched.predawn) +		matchmg(); +} + +// Get the next goroutine that m should run. +// Sched must be locked on entry, is unlocked on exit. +// Makes sure that at most $GOMAXPROCS gs are +// running on cpus (not in system calls) at any given time. +static G* +nextgandunlock(void) +{ +	G *gp; + +	// On startup, each m is assigned a nextg and +	// has already been accounted for in mcpu. +	if(m->nextg != nil) { +		gp = m->nextg; +		m->nextg = nil; +		unlock(&sched); +		if(debug > 1) { +			lock(&debuglock); +			printf("m%d nextg found g%d\n", m->id, gp->goid); +			unlock(&debuglock); +		} +		return gp; +	} + +	// Otherwise, look for work. +	if(sched.mcpu < sched.mcpumax && (gp=gget()) != nil) { +		sched.mcpu++; +		unlock(&sched); +		if(debug > 1) { +			lock(&debuglock); +			printf("m%d nextg got g%d\n", m->id, gp->goid); +			unlock(&debuglock); +		} +		return gp; +	} + +	// Otherwise, sleep. +	mput(m); +	if(sched.mcpu == 0 && sched.msyscall == 0) +		throw("all goroutines are asleep - deadlock!"); +	m->nextg = nil; +	noteclear(&m->havenextg); +	if(sched.waitstop && sched.mcpu <= sched.mcpumax) { +		sched.waitstop = 0; +		notewakeup(&sched.stopped); +	} +	unlock(&sched); + +	notesleep(&m->havenextg); +	if((gp = m->nextg) == nil) +		throw("bad m->nextg in nextgoroutine"); +	m->nextg = nil; +	if(debug > 1) { +		lock(&debuglock); +		printf("m%d nextg woke g%d\n", m->id, gp->goid); +		unlock(&debuglock); +	} +	return gp; +} + +// TODO(rsc): Remove. This is only temporary, +// for the mark and sweep collector. +void +stoptheworld(void) +{ +	lock(&sched); +	sched.mcpumax = 1; +	while(sched.mcpu > 1) { +		noteclear(&sched.stopped); +		sched.waitstop = 1; +		unlock(&sched); +		notesleep(&sched.stopped); +		lock(&sched); +	} +	unlock(&sched); +} + +// TODO(rsc): Remove. This is only temporary, +// for the mark and sweep collector. +void +starttheworld(void) +{ +	lock(&sched); +	sched.mcpumax = sched.gomaxprocs; +	matchmg(); +	unlock(&sched); +} + +// Called to start an M. +void +mstart(void) +{ +	if(m->mcache == nil) +		m->mcache = allocmcache(); +	minit(); +	scheduler(); +} + +// Kick of new ms as needed (up to mcpumax). +// There are already `other' other cpus that will +// start looking for goroutines shortly. +// Sched is locked. +static void +matchmg(void) +{ +	M *m; +	G *g; + +	if(debug > 1 && sched.ghead != nil) { +		lock(&debuglock); +		printf("matchmg mcpu=%d mcpumax=%d gwait=%d\n", sched.mcpu, sched.mcpumax, sched.gwait); +		unlock(&debuglock); +	} + +	while(sched.mcpu < sched.mcpumax && (g = gget()) != nil){ +		sched.mcpu++; +		if((m = mget()) != nil){ +			if(debug > 1) { +				lock(&debuglock); +				printf("wakeup m%d g%d\n", m->id, g->goid); +				unlock(&debuglock); +			} +			m->nextg = g; +			notewakeup(&m->havenextg); +		}else{ +			m = malloc(sizeof(M)); +			m->g0 = malg(8192); +			m->nextg = g; +			m->id = sched.mcount++; +			if(debug) { +				lock(&debuglock); +				printf("alloc m%d g%d\n", m->id, g->goid); +				unlock(&debuglock); +			} +			newosproc(m, m->g0, m->g0->stackbase, mstart); +		} +	} +} + +// Scheduler loop: find g to run, run it, repeat. +static void +scheduler(void) +{ +	G* gp; + +	lock(&sched); +	if(gosave(&m->sched)){ +		// Jumped here via gosave/gogo, so didn't +		// execute lock(&sched) above. +		lock(&sched); + +		if(sched.predawn) +			throw("init sleeping"); + +		// Just finished running m->curg. +		gp = m->curg; +		gp->m = nil; +		sched.mcpu--; +		if(debug > 1) { +			lock(&debuglock); +			printf("m%d sched g%d status %d\n", m->id, gp->goid, gp->status); +			unlock(&debuglock); +		} +		switch(gp->status){ +		case Grunnable: +		case Gdead: +			// Shouldn't have been running! +			throw("bad gp->status in sched"); +		case Grunning: +			gp->status = Grunnable; +			gput(gp); +			break; +		case Gmoribund: +			gp->status = Gdead; +			if(--sched.gcount == 0) +				exit(0); +			break; +		} +		if(gp->readyonstop){ +			gp->readyonstop = 0; +			readylocked(gp); +		} +	} + +	// Find (or wait for) g to run.  Unlocks sched. +	gp = nextgandunlock(); +	gp->readyonstop = 0; +	gp->status = Grunning; +	if(debug > 1) { +		lock(&debuglock); +		printf("m%d run g%d at %p\n", m->id, gp->goid, gp->sched.PC); +		traceback(gp->sched.PC, gp->sched.SP+8, gp); +		unlock(&debuglock); +	} +	m->curg = gp; +	gp->m = m; +	g = gp; +	gogo(&gp->sched); +} + +// Enter scheduler.  If g->status is Grunning, +// re-queues g and runs everyone else who is waiting +// before running g again.  If g->status is Gmoribund, +// kills off g. +void +gosched(void) +{ +	if(g == m->g0) +		throw("gosched of g0"); +	if(gosave(&g->sched) == 0){ +		g = m->g0; +		gogo(&m->sched); +	} +} + +// The goroutine g is about to enter a system call. +// Record that it's not using the cpu anymore. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +// The "arguments" are syscall.Syscall's stack frame +void +sys·entersyscall(uint64 callerpc, int64 trap) +{ +	USED(callerpc); + +	if(debug > 1) { +		lock(&debuglock); +		printf("m%d g%d enter syscall %D\n", m->id, g->goid, trap); +		unlock(&debuglock); +	} +	lock(&sched); +	g->status = Gsyscall; +	sched.mcpu--; +	sched.msyscall++; +	if(sched.gwait != 0) +		matchmg(); +	if(sched.waitstop && sched.mcpu <= sched.mcpumax) { +		sched.waitstop = 0; +		notewakeup(&sched.stopped); +	} +	unlock(&sched); +	// leave SP around for gc and traceback +	gosave(&g->sched); +} + +// The goroutine g exited its system call. +// Arrange for it to run on a cpu again. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +void +sys·exitsyscall(void) +{ +	if(debug > 1) { +		lock(&debuglock); +		printf("m%d g%d exit syscall mcpu=%d mcpumax=%d\n", m->id, g->goid, sched.mcpu, sched.mcpumax); +		unlock(&debuglock); +	} + +	lock(&sched); +	g->status = Grunning; +	sched.msyscall--; +	sched.mcpu++; +	// Fast path - if there's room for this m, we're done. +	if(sched.mcpu <= sched.mcpumax) { +		unlock(&sched); +		return; +	} +	unlock(&sched); + +	// Slow path - all the cpus are taken. +	// The scheduler will ready g and put this m to sleep. +	// When the scheduler takes g awa from m, +	// it will undo the sched.mcpu++ above. +	gosched(); +} + +/* + * stack layout parameters. + * known to linkers. + * + * g->stackguard is set to point StackGuard bytes + * above the bottom of the stack.  each function + * compares its stack pointer against g->stackguard + * to check for overflow.  to cut one instruction from + * the check sequence for functions with tiny frames, + * the stack is allowed to protrude StackSmall bytes + * below the stack guard.  functions with large frames + * don't bother with the check and always call morestack. + * the sequences are: + * + *	guard = g->stackguard + *	frame = function's stack frame size + *	argsize = size of function arguments (call + return) + * + *	stack frame size <= StackSmall: + *		CMPQ guard, SP + *		JHI 3(PC) + *		MOVQ m->morearg, $(argsize << 32) + *		CALL sys.morestack(SB) + * + *	stack frame size > StackSmall but < StackBig + *		LEAQ (frame-StackSmall)(SP), R0 + *		CMPQ guard, R0 + *		JHI 3(PC) + *		MOVQ m->morearg, $(argsize << 32) + *		CALL sys.morestack(SB) + * + *	stack frame size >= StackBig: + *		MOVQ m->morearg, $((argsize << 32) | frame) + *		CALL sys.morestack(SB) + * + * the bottom StackGuard - StackSmall bytes are important: + * there has to be enough room to execute functions that + * refuse to check for stack overflow, either because they + * need to be adjacent to the actual caller's frame (sys.deferproc) + * or because they handle the imminent stack overflow (sys.morestack). + * + * for example, sys.deferproc might call malloc, + * which does one of the above checks (without allocating a full frame), + * which might trigger a call to sys.morestack. + * this sequence needs to fit in the bottom section of the stack. + * on amd64, sys.morestack's frame is 40 bytes, and + * sys.deferproc's frame is 56 bytes.  that fits well within + * the StackGuard - StackSmall = 128 bytes at the bottom. + * there may be other sequences lurking or yet to be written + * that require more stack.  sys.morestack checks to make sure + * the stack has not completely overflowed and should + * catch such sequences. + */ +enum +{ +	// byte offset of stack guard (g->stackguard) above bottom of stack. +	StackGuard = 256, + +	// checked frames are allowed to protrude below the guard by +	// this many bytes.  this saves an instruction in the checking +	// sequence when the stack frame is tiny. +	StackSmall = 128, + +	// extra space in the frame (beyond the function for which +	// the frame is allocated) is assumed not to be much bigger +	// than this amount.  it may not be used efficiently if it is. +	StackBig = 4096, +}; + +void +oldstack(void) +{ +	Stktop *top; +	uint32 args; +	byte *sp; +	uintptr oldsp, oldpc, oldbase, oldguard; + +// printf("oldstack m->cret=%p\n", m->cret); + +	top = (Stktop*)m->curg->stackbase; + +	args = (top->magic>>32) & 0xffffLL; + +	sp = (byte*)top; +	if(args > 0) { +		args = (args+7) & ~7; +		sp -= args; +		mcpy(top->oldsp+2*sizeof(uintptr), sp, args); +	} + +	oldsp = (uintptr)top->oldsp + sizeof(uintptr); +	oldpc = *(uintptr*)oldsp; +	oldbase = (uintptr)top->oldbase; +	oldguard = (uintptr)top->oldguard; + +	stackfree((byte*)m->curg->stackguard - StackGuard); + +	m->curg->stackbase = (byte*)oldbase; +	m->curg->stackguard = (byte*)oldguard; +	m->morestack.SP = (byte*)oldsp; +	m->morestack.PC = (byte*)oldpc; + +	// These two lines must happen in sequence; +	// once g has been changed, must switch to g's stack +	// before calling any non-assembly functions. +	// TODO(rsc): Perhaps make the new g a parameter +	// to gogoret and setspgoto, so that g is never +	// explicitly assigned to without also setting +	// the stack pointer. +	g = m->curg; +	gogoret(&m->morestack, m->cret); +} + +#pragma textflag 7 +void +lessstack(void) +{ +	g = m->g0; +	setspgoto(m->sched.SP, oldstack, nil); +} + +void +newstack(void) +{ +	int32 frame, args; +	Stktop *top; +	byte *stk, *sp; +	void (*fn)(void); + +	frame = m->morearg & 0xffffffffLL; +	args = (m->morearg>>32) & 0xffffLL; + +// printf("newstack frame=%d args=%d moresp=%p morepc=%p\n", frame, args, m->moresp, *(uintptr*)m->moresp); + +	if(frame < StackBig) +		frame = StackBig; +	frame += 1024;	// for more functions, Stktop. +	stk = stackalloc(frame); + +	top = (Stktop*)(stk+frame-sizeof(*top)); + +	top->oldbase = m->curg->stackbase; +	top->oldguard = m->curg->stackguard; +	top->oldsp = m->moresp; +	top->magic = m->morearg; + +	m->curg->stackbase = (byte*)top; +	m->curg->stackguard = stk + StackGuard; + +	sp = (byte*)top; + +	if(args > 0) { +		// Copy args.  There have been two function calls +		// since they got pushed, so skip over those return +		// addresses. +		args = (args+7) & ~7; +		sp -= args; +		mcpy(sp, m->moresp+2*sizeof(uintptr), args); +	} + +	g = m->curg; + +	// sys.morestack's return address +	fn = (void(*)(void))(*(uintptr*)m->moresp); + +// printf("fn=%p\n", fn); + +	setspgoto(sp, fn, retfromnewstack); + +	*(int32*)345 = 123;	// never return +} + +#pragma textflag 7 +void +sys·morestack(uintptr u) +{ +	while(g == m->g0) { +		// very bad news +		*(int32*)0x1001 = 123; +	} + +	// Morestack's frame is about 0x30 bytes on amd64. +	// If that the frame ends below the stack bottom, we've already +	// overflowed.  Stop right now. +	while((byte*)&u - 0x30 < m->curg->stackguard - StackGuard) { +		// very bad news +		*(int32*)0x1002 = 123; +	} + +	g = m->g0; +	m->moresp = (byte*)(&u-1); +	setspgoto(m->sched.SP, newstack, nil); + +	*(int32*)0x1003 = 123;	// never return +} + +G* +malg(int32 stacksize) +{ +	G *g; +	byte *stk; + +	g = malloc(sizeof(G)); +	stk = stackalloc(stacksize + StackGuard); +	g->stack0 = stk; +	g->stackguard = stk + StackGuard; +	g->stackbase = stk + StackGuard + stacksize; +	return g; +} + +/* + * Newproc and deferproc need to be textflag 7 + * (no possible stack split when nearing overflow) + * because they assume that the arguments to fn + * are available sequentially beginning at &arg0. + * If a stack split happened, only the one word + * arg0 would be copied.  It's okay if any functions + * they call split the stack below the newproc frame. + */ +#pragma textflag 7 +void +sys·newproc(int32 siz, byte* fn, byte* arg0) +{ +	byte *stk, *sp; +	G *newg; + +//printf("newproc siz=%d fn=%p", siz, fn); + +	siz = (siz+7) & ~7; +	if(siz > 1024) +		throw("sys·newproc: too many args"); + +	lock(&sched); + +	if((newg = gfget()) != nil){ +		newg->status = Gwaiting; +	} else { +		newg = malg(4096); +		newg->status = Gwaiting; +		newg->alllink = allg; +		allg = newg; +	} +	stk = newg->stack0; + +	newg->stackguard = stk+StackGuard; + +	sp = stk + 4096 - 4*8; +	newg->stackbase = sp; + +	sp -= siz; +	mcpy(sp, (byte*)&arg0, siz); + +	sp -= sizeof(uintptr); +	*(byte**)sp = (byte*)goexit; + +	sp -= sizeof(uintptr);	// retpc used by gogo +	newg->sched.SP = sp; +	newg->sched.PC = fn; + +	sched.gcount++; +	goidgen++; +	newg->goid = goidgen; + +	readylocked(newg); +	unlock(&sched); + +//printf(" goid=%d\n", newg->goid); +} + +#pragma textflag 7 +void +sys·deferproc(int32 siz, byte* fn, byte* arg0) +{ +	Defer *d; + +	d = malloc(sizeof(*d) + siz - sizeof(d->args)); +	d->fn = fn; +	d->sp = (byte*)&arg0; +	d->siz = siz; +	mcpy(d->args, d->sp, d->siz); + +	d->link = g->defer; +	g->defer = d; +} + +#pragma textflag 7 +void +sys·deferreturn(uintptr arg0) +{ +	Defer *d; +	byte *sp, *fn; +	uintptr *caller; + +	d = g->defer; +	if(d == nil) +		return; +	sp = (byte*)&arg0; +	if(d->sp != sp) +		return; +	mcpy(d->sp, d->args, d->siz); +	g->defer = d->link; +	fn = d->fn; +	free(d); +	jmpdefer(fn, sp); +  } + +void +runtime·Breakpoint(void) +{ +	breakpoint(); +} + +void +runtime·Goexit(void) +{ +	goexit(); +} + +void +runtime·Gosched(void) +{ +	gosched(); +} + | 
