diff options
Diffstat (limited to 'src/lib/runtime/proc.c')
-rw-r--r-- | src/lib/runtime/proc.c | 858 |
1 files changed, 858 insertions, 0 deletions
diff --git a/src/lib/runtime/proc.c b/src/lib/runtime/proc.c new file mode 100644 index 000000000..1d065e6d2 --- /dev/null +++ b/src/lib/runtime/proc.c @@ -0,0 +1,858 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "runtime.h" +#include "malloc.h" + +typedef struct Sched Sched; + +M m0; +G g0; // idle goroutine for m0 + +static int32 debug = 0; +static Lock debuglock; + +// Go scheduler +// +// The go scheduler's job is to match ready-to-run goroutines (`g's) +// with waiting-for-work schedulers (`m's). If there are ready gs +// and no waiting ms, ready() will start a new m running in a new +// OS thread, so that all ready gs can run simultaneously, up to a limit. +// For now, ms never go away. +// +// The default maximum number of ms is one: go runs single-threaded. +// This is because some locking details have to be worked ou +// (select in particular is not locked properly) and because the low-level +// code hasn't been written yet for OS X. Setting the environmen +// variable $gomaxprocs changes sched.mmax for now. +// +// Even a program that can run without deadlock in a single process +// might use more ms if given the chance. For example, the prime +// sieve will use as many ms as there are primes (up to sched.mmax), +// allowing different stages of the pipeline to execute in parallel. +// We could revisit this choice, only kicking off new ms for blocking +// system calls, but that would limit the amount of parallel computation +// that go would try to do. +// +// In general, one could imagine all sorts of refinements to the +// scheduler, but the goal now is just to get something working on +// Linux and OS X. + +struct Sched { + Lock; + + G *gfree; // available gs (status == Gdead) + + G *ghead; // gs waiting to run + G *gtail; + int32 gwait; // number of gs waiting to run + int32 gcount; // number of gs that are alive + + M *mhead; // ms waiting for work + int32 mwait; // number of ms waiting for work + int32 mcount; // number of ms that have been created + int32 mcpu; // number of ms executing on cpu + int32 mcpumax; // max number of ms allowed on cpu + int32 gomaxprocs; + int32 msyscall; // number of ms in system calls + + int32 predawn; // running initialization, don't run new gs. + + Note stopped; // one g can wait here for ms to stop + int32 waitstop; // after setting this flag +}; + +Sched sched; + +// Scheduling helpers. Sched must be locked. +static void gput(G*); // put/get on ghead/gtail +static G* gget(void); +static void mput(M*); // put/get on mhead +static M* mget(void); +static void gfput(G*); // put/get on gfree +static G* gfget(void); +static void matchmg(void); // match ms to gs +static void readylocked(G*); // ready, but sched is locked + +// Scheduler loop. +static void scheduler(void); + +// The bootstrap sequence is: +// +// call osinit +// call schedinit +// make & queue new G +// call mstart +// +// The new G does: +// +// call main·init_function +// call initdone +// call main·main +void +schedinit(void) +{ + int32 n; + byte *p; + + mallocinit(); + goargs(); + + // Allocate internal symbol table representation now, + // so that we don't need to call malloc when we crash. + findfunc(0); + + sched.gomaxprocs = 1; + p = getenv("GOMAXPROCS"); + if(p != nil && (n = atoi(p)) != 0) + sched.gomaxprocs = n; + sched.mcpumax = sched.gomaxprocs; + sched.mcount = 1; + sched.predawn = 1; +} + +// Called after main·init_function; main·main will be called on return. +void +initdone(void) +{ + // Let's go. + sched.predawn = 0; + mstats.enablegc = 1; + + // If main·init_function started other goroutines, + // kick off new ms to handle them, like ready + // would have, had it not been pre-dawn. + lock(&sched); + matchmg(); + unlock(&sched); +} + +void +goexit(void) +{ + if(debug > 1){ + lock(&debuglock); + printf("goexit goid=%d\n", g->goid); + unlock(&debuglock); + } + g->status = Gmoribund; + gosched(); +} + +void +tracebackothers(G *me) +{ + G *g; + + for(g = allg; g != nil; g = g->alllink) { + if(g == me || g->status == Gdead) + continue; + printf("\ngoroutine %d:\n", g->goid); + traceback(g->sched.PC, g->sched.SP+sizeof(uintptr), g); // gogo adjusts SP by one word + } +} + +// Put on `g' queue. Sched must be locked. +static void +gput(G *g) +{ + g->schedlink = nil; + if(sched.ghead == nil) + sched.ghead = g; + else + sched.gtail->schedlink = g; + sched.gtail = g; + sched.gwait++; +} + +// Get from `g' queue. Sched must be locked. +static G* +gget(void) +{ + G *g; + + g = sched.ghead; + if(g){ + sched.ghead = g->schedlink; + if(sched.ghead == nil) + sched.gtail = nil; + sched.gwait--; + } + return g; +} + +// Put on `m' list. Sched must be locked. +static void +mput(M *m) +{ + m->schedlink = sched.mhead; + sched.mhead = m; + sched.mwait++; +} + +// Get from `m' list. Sched must be locked. +static M* +mget(void) +{ + M *m; + + m = sched.mhead; + if(m){ + sched.mhead = m->schedlink; + sched.mwait--; + } + return m; +} + +// Put on gfree list. Sched must be locked. +static void +gfput(G *g) +{ + g->schedlink = sched.gfree; + sched.gfree = g; +} + +// Get from gfree list. Sched must be locked. +static G* +gfget(void) +{ + G *g; + + g = sched.gfree; + if(g) + sched.gfree = g->schedlink; + return g; +} + +// Mark g ready to run. +void +ready(G *g) +{ + lock(&sched); + readylocked(g); + unlock(&sched); +} + +// Mark g ready to run. Sched is already locked. +// G might be running already and about to stop. +// The sched lock protects g->status from changing underfoot. +static void +readylocked(G *g) +{ + if(g->m){ + // Running on another machine. + // Ready it when it stops. + g->readyonstop = 1; + return; + } + + // Mark runnable. + if(g->status == Grunnable || g->status == Grunning) + throw("bad g->status in ready"); + g->status = Grunnable; + + gput(g); + if(!sched.predawn) + matchmg(); +} + +// Get the next goroutine that m should run. +// Sched must be locked on entry, is unlocked on exit. +// Makes sure that at most $GOMAXPROCS gs are +// running on cpus (not in system calls) at any given time. +static G* +nextgandunlock(void) +{ + G *gp; + + // On startup, each m is assigned a nextg and + // has already been accounted for in mcpu. + if(m->nextg != nil) { + gp = m->nextg; + m->nextg = nil; + unlock(&sched); + if(debug > 1) { + lock(&debuglock); + printf("m%d nextg found g%d\n", m->id, gp->goid); + unlock(&debuglock); + } + return gp; + } + + // Otherwise, look for work. + if(sched.mcpu < sched.mcpumax && (gp=gget()) != nil) { + sched.mcpu++; + unlock(&sched); + if(debug > 1) { + lock(&debuglock); + printf("m%d nextg got g%d\n", m->id, gp->goid); + unlock(&debuglock); + } + return gp; + } + + // Otherwise, sleep. + mput(m); + if(sched.mcpu == 0 && sched.msyscall == 0) + throw("all goroutines are asleep - deadlock!"); + m->nextg = nil; + noteclear(&m->havenextg); + if(sched.waitstop && sched.mcpu <= sched.mcpumax) { + sched.waitstop = 0; + notewakeup(&sched.stopped); + } + unlock(&sched); + + notesleep(&m->havenextg); + if((gp = m->nextg) == nil) + throw("bad m->nextg in nextgoroutine"); + m->nextg = nil; + if(debug > 1) { + lock(&debuglock); + printf("m%d nextg woke g%d\n", m->id, gp->goid); + unlock(&debuglock); + } + return gp; +} + +// TODO(rsc): Remove. This is only temporary, +// for the mark and sweep collector. +void +stoptheworld(void) +{ + lock(&sched); + sched.mcpumax = 1; + while(sched.mcpu > 1) { + noteclear(&sched.stopped); + sched.waitstop = 1; + unlock(&sched); + notesleep(&sched.stopped); + lock(&sched); + } + unlock(&sched); +} + +// TODO(rsc): Remove. This is only temporary, +// for the mark and sweep collector. +void +starttheworld(void) +{ + lock(&sched); + sched.mcpumax = sched.gomaxprocs; + matchmg(); + unlock(&sched); +} + +// Called to start an M. +void +mstart(void) +{ + if(m->mcache == nil) + m->mcache = allocmcache(); + minit(); + scheduler(); +} + +// Kick of new ms as needed (up to mcpumax). +// There are already `other' other cpus that will +// start looking for goroutines shortly. +// Sched is locked. +static void +matchmg(void) +{ + M *m; + G *g; + + if(debug > 1 && sched.ghead != nil) { + lock(&debuglock); + printf("matchmg mcpu=%d mcpumax=%d gwait=%d\n", sched.mcpu, sched.mcpumax, sched.gwait); + unlock(&debuglock); + } + + while(sched.mcpu < sched.mcpumax && (g = gget()) != nil){ + sched.mcpu++; + if((m = mget()) != nil){ + if(debug > 1) { + lock(&debuglock); + printf("wakeup m%d g%d\n", m->id, g->goid); + unlock(&debuglock); + } + m->nextg = g; + notewakeup(&m->havenextg); + }else{ + m = malloc(sizeof(M)); + m->g0 = malg(8192); + m->nextg = g; + m->id = sched.mcount++; + if(debug) { + lock(&debuglock); + printf("alloc m%d g%d\n", m->id, g->goid); + unlock(&debuglock); + } + newosproc(m, m->g0, m->g0->stackbase, mstart); + } + } +} + +// Scheduler loop: find g to run, run it, repeat. +static void +scheduler(void) +{ + G* gp; + + lock(&sched); + if(gosave(&m->sched)){ + // Jumped here via gosave/gogo, so didn't + // execute lock(&sched) above. + lock(&sched); + + if(sched.predawn) + throw("init sleeping"); + + // Just finished running m->curg. + gp = m->curg; + gp->m = nil; + sched.mcpu--; + if(debug > 1) { + lock(&debuglock); + printf("m%d sched g%d status %d\n", m->id, gp->goid, gp->status); + unlock(&debuglock); + } + switch(gp->status){ + case Grunnable: + case Gdead: + // Shouldn't have been running! + throw("bad gp->status in sched"); + case Grunning: + gp->status = Grunnable; + gput(gp); + break; + case Gmoribund: + gp->status = Gdead; + if(--sched.gcount == 0) + exit(0); + break; + } + if(gp->readyonstop){ + gp->readyonstop = 0; + readylocked(gp); + } + } + + // Find (or wait for) g to run. Unlocks sched. + gp = nextgandunlock(); + gp->readyonstop = 0; + gp->status = Grunning; + if(debug > 1) { + lock(&debuglock); + printf("m%d run g%d at %p\n", m->id, gp->goid, gp->sched.PC); + traceback(gp->sched.PC, gp->sched.SP+8, gp); + unlock(&debuglock); + } + m->curg = gp; + gp->m = m; + g = gp; + gogo(&gp->sched); +} + +// Enter scheduler. If g->status is Grunning, +// re-queues g and runs everyone else who is waiting +// before running g again. If g->status is Gmoribund, +// kills off g. +void +gosched(void) +{ + if(g == m->g0) + throw("gosched of g0"); + if(gosave(&g->sched) == 0){ + g = m->g0; + gogo(&m->sched); + } +} + +// The goroutine g is about to enter a system call. +// Record that it's not using the cpu anymore. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +// The "arguments" are syscall.Syscall's stack frame +void +sys·entersyscall(uint64 callerpc, int64 trap) +{ + USED(callerpc); + + if(debug > 1) { + lock(&debuglock); + printf("m%d g%d enter syscall %D\n", m->id, g->goid, trap); + unlock(&debuglock); + } + lock(&sched); + g->status = Gsyscall; + sched.mcpu--; + sched.msyscall++; + if(sched.gwait != 0) + matchmg(); + if(sched.waitstop && sched.mcpu <= sched.mcpumax) { + sched.waitstop = 0; + notewakeup(&sched.stopped); + } + unlock(&sched); + // leave SP around for gc and traceback + gosave(&g->sched); +} + +// The goroutine g exited its system call. +// Arrange for it to run on a cpu again. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +void +sys·exitsyscall(void) +{ + if(debug > 1) { + lock(&debuglock); + printf("m%d g%d exit syscall mcpu=%d mcpumax=%d\n", m->id, g->goid, sched.mcpu, sched.mcpumax); + unlock(&debuglock); + } + + lock(&sched); + g->status = Grunning; + sched.msyscall--; + sched.mcpu++; + // Fast path - if there's room for this m, we're done. + if(sched.mcpu <= sched.mcpumax) { + unlock(&sched); + return; + } + unlock(&sched); + + // Slow path - all the cpus are taken. + // The scheduler will ready g and put this m to sleep. + // When the scheduler takes g awa from m, + // it will undo the sched.mcpu++ above. + gosched(); +} + +/* + * stack layout parameters. + * known to linkers. + * + * g->stackguard is set to point StackGuard bytes + * above the bottom of the stack. each function + * compares its stack pointer against g->stackguard + * to check for overflow. to cut one instruction from + * the check sequence for functions with tiny frames, + * the stack is allowed to protrude StackSmall bytes + * below the stack guard. functions with large frames + * don't bother with the check and always call morestack. + * the sequences are: + * + * guard = g->stackguard + * frame = function's stack frame size + * argsize = size of function arguments (call + return) + * + * stack frame size <= StackSmall: + * CMPQ guard, SP + * JHI 3(PC) + * MOVQ m->morearg, $(argsize << 32) + * CALL sys.morestack(SB) + * + * stack frame size > StackSmall but < StackBig + * LEAQ (frame-StackSmall)(SP), R0 + * CMPQ guard, R0 + * JHI 3(PC) + * MOVQ m->morearg, $(argsize << 32) + * CALL sys.morestack(SB) + * + * stack frame size >= StackBig: + * MOVQ m->morearg, $((argsize << 32) | frame) + * CALL sys.morestack(SB) + * + * the bottom StackGuard - StackSmall bytes are important: + * there has to be enough room to execute functions that + * refuse to check for stack overflow, either because they + * need to be adjacent to the actual caller's frame (sys.deferproc) + * or because they handle the imminent stack overflow (sys.morestack). + * + * for example, sys.deferproc might call malloc, + * which does one of the above checks (without allocating a full frame), + * which might trigger a call to sys.morestack. + * this sequence needs to fit in the bottom section of the stack. + * on amd64, sys.morestack's frame is 40 bytes, and + * sys.deferproc's frame is 56 bytes. that fits well within + * the StackGuard - StackSmall = 128 bytes at the bottom. + * there may be other sequences lurking or yet to be written + * that require more stack. sys.morestack checks to make sure + * the stack has not completely overflowed and should + * catch such sequences. + */ +enum +{ + // byte offset of stack guard (g->stackguard) above bottom of stack. + StackGuard = 256, + + // checked frames are allowed to protrude below the guard by + // this many bytes. this saves an instruction in the checking + // sequence when the stack frame is tiny. + StackSmall = 128, + + // extra space in the frame (beyond the function for which + // the frame is allocated) is assumed not to be much bigger + // than this amount. it may not be used efficiently if it is. + StackBig = 4096, +}; + +void +oldstack(void) +{ + Stktop *top; + uint32 args; + byte *sp; + uintptr oldsp, oldpc, oldbase, oldguard; + +// printf("oldstack m->cret=%p\n", m->cret); + + top = (Stktop*)m->curg->stackbase; + + args = (top->magic>>32) & 0xffffLL; + + sp = (byte*)top; + if(args > 0) { + args = (args+7) & ~7; + sp -= args; + mcpy(top->oldsp+2*sizeof(uintptr), sp, args); + } + + oldsp = (uintptr)top->oldsp + sizeof(uintptr); + oldpc = *(uintptr*)oldsp; + oldbase = (uintptr)top->oldbase; + oldguard = (uintptr)top->oldguard; + + stackfree((byte*)m->curg->stackguard - StackGuard); + + m->curg->stackbase = (byte*)oldbase; + m->curg->stackguard = (byte*)oldguard; + m->morestack.SP = (byte*)oldsp; + m->morestack.PC = (byte*)oldpc; + + // These two lines must happen in sequence; + // once g has been changed, must switch to g's stack + // before calling any non-assembly functions. + // TODO(rsc): Perhaps make the new g a parameter + // to gogoret and setspgoto, so that g is never + // explicitly assigned to without also setting + // the stack pointer. + g = m->curg; + gogoret(&m->morestack, m->cret); +} + +#pragma textflag 7 +void +lessstack(void) +{ + g = m->g0; + setspgoto(m->sched.SP, oldstack, nil); +} + +void +newstack(void) +{ + int32 frame, args; + Stktop *top; + byte *stk, *sp; + void (*fn)(void); + + frame = m->morearg & 0xffffffffLL; + args = (m->morearg>>32) & 0xffffLL; + +// printf("newstack frame=%d args=%d moresp=%p morepc=%p\n", frame, args, m->moresp, *(uintptr*)m->moresp); + + if(frame < StackBig) + frame = StackBig; + frame += 1024; // for more functions, Stktop. + stk = stackalloc(frame); + + top = (Stktop*)(stk+frame-sizeof(*top)); + + top->oldbase = m->curg->stackbase; + top->oldguard = m->curg->stackguard; + top->oldsp = m->moresp; + top->magic = m->morearg; + + m->curg->stackbase = (byte*)top; + m->curg->stackguard = stk + StackGuard; + + sp = (byte*)top; + + if(args > 0) { + // Copy args. There have been two function calls + // since they got pushed, so skip over those return + // addresses. + args = (args+7) & ~7; + sp -= args; + mcpy(sp, m->moresp+2*sizeof(uintptr), args); + } + + g = m->curg; + + // sys.morestack's return address + fn = (void(*)(void))(*(uintptr*)m->moresp); + +// printf("fn=%p\n", fn); + + setspgoto(sp, fn, retfromnewstack); + + *(int32*)345 = 123; // never return +} + +#pragma textflag 7 +void +sys·morestack(uintptr u) +{ + while(g == m->g0) { + // very bad news + *(int32*)0x1001 = 123; + } + + // Morestack's frame is about 0x30 bytes on amd64. + // If that the frame ends below the stack bottom, we've already + // overflowed. Stop right now. + while((byte*)&u - 0x30 < m->curg->stackguard - StackGuard) { + // very bad news + *(int32*)0x1002 = 123; + } + + g = m->g0; + m->moresp = (byte*)(&u-1); + setspgoto(m->sched.SP, newstack, nil); + + *(int32*)0x1003 = 123; // never return +} + +G* +malg(int32 stacksize) +{ + G *g; + byte *stk; + + g = malloc(sizeof(G)); + stk = stackalloc(stacksize + StackGuard); + g->stack0 = stk; + g->stackguard = stk + StackGuard; + g->stackbase = stk + StackGuard + stacksize; + return g; +} + +/* + * Newproc and deferproc need to be textflag 7 + * (no possible stack split when nearing overflow) + * because they assume that the arguments to fn + * are available sequentially beginning at &arg0. + * If a stack split happened, only the one word + * arg0 would be copied. It's okay if any functions + * they call split the stack below the newproc frame. + */ +#pragma textflag 7 +void +sys·newproc(int32 siz, byte* fn, byte* arg0) +{ + byte *stk, *sp; + G *newg; + +//printf("newproc siz=%d fn=%p", siz, fn); + + siz = (siz+7) & ~7; + if(siz > 1024) + throw("sys·newproc: too many args"); + + lock(&sched); + + if((newg = gfget()) != nil){ + newg->status = Gwaiting; + } else { + newg = malg(4096); + newg->status = Gwaiting; + newg->alllink = allg; + allg = newg; + } + stk = newg->stack0; + + newg->stackguard = stk+StackGuard; + + sp = stk + 4096 - 4*8; + newg->stackbase = sp; + + sp -= siz; + mcpy(sp, (byte*)&arg0, siz); + + sp -= sizeof(uintptr); + *(byte**)sp = (byte*)goexit; + + sp -= sizeof(uintptr); // retpc used by gogo + newg->sched.SP = sp; + newg->sched.PC = fn; + + sched.gcount++; + goidgen++; + newg->goid = goidgen; + + readylocked(newg); + unlock(&sched); + +//printf(" goid=%d\n", newg->goid); +} + +#pragma textflag 7 +void +sys·deferproc(int32 siz, byte* fn, byte* arg0) +{ + Defer *d; + + d = malloc(sizeof(*d) + siz - sizeof(d->args)); + d->fn = fn; + d->sp = (byte*)&arg0; + d->siz = siz; + mcpy(d->args, d->sp, d->siz); + + d->link = g->defer; + g->defer = d; +} + +#pragma textflag 7 +void +sys·deferreturn(uintptr arg0) +{ + Defer *d; + byte *sp, *fn; + uintptr *caller; + + d = g->defer; + if(d == nil) + return; + sp = (byte*)&arg0; + if(d->sp != sp) + return; + mcpy(d->sp, d->args, d->siz); + g->defer = d->link; + fn = d->fn; + free(d); + jmpdefer(fn, sp); + } + +void +runtime·Breakpoint(void) +{ + breakpoint(); +} + +void +runtime·Goexit(void) +{ + goexit(); +} + +void +runtime·Gosched(void) +{ + gosched(); +} + |