diff options
Diffstat (limited to 'src/pkg/runtime/proc.c')
-rw-r--r-- | src/pkg/runtime/proc.c | 2934 |
1 files changed, 1701 insertions, 1233 deletions
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 04a992628..4ce0a718c 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -4,169 +4,112 @@ #include "runtime.h" #include "arch_GOARCH.h" -#include "defs_GOOS_GOARCH.h" #include "malloc.h" -#include "os_GOOS.h" #include "stack.h" +#include "race.h" +#include "type.h" -bool runtime·iscgo; - -static void unwindstack(G*, byte*); -static void schedule(G*); - -typedef struct Sched Sched; - -M runtime·m0; -G runtime·g0; // idle goroutine for m0 - -static int32 debug = 0; - -int32 runtime·gcwaiting; - -// Go scheduler -// -// The go scheduler's job is to match ready-to-run goroutines (`g's) -// with waiting-for-work schedulers (`m's). If there are ready g's -// and no waiting m's, ready() will start a new m running in a new -// OS thread, so that all ready g's can run simultaneously, up to a limit. -// For now, m's never go away. -// -// By default, Go keeps only one kernel thread (m) running user code -// at a single time; other threads may be blocked in the operating system. -// Setting the environment variable $GOMAXPROCS or calling -// runtime.GOMAXPROCS() will change the number of user threads -// allowed to execute simultaneously. $GOMAXPROCS is thus an -// approximation of the maximum number of cores to use. +// Goroutine scheduler +// The scheduler's job is to distribute ready-to-run goroutines over worker threads. // -// Even a program that can run without deadlock in a single process -// might use more m's if given the chance. For example, the prime -// sieve will use as many m's as there are primes (up to runtime·sched.mmax), -// allowing different stages of the pipeline to execute in parallel. -// We could revisit this choice, only kicking off new m's for blocking -// system calls, but that would limit the amount of parallel computation -// that go would try to do. +// The main concepts are: +// G - goroutine. +// M - worker thread, or machine. +// P - processor, a resource that is required to execute Go code. +// M must have an associated P to execute Go code, however it can be +// blocked or in a syscall w/o an associated P. // -// In general, one could imagine all sorts of refinements to the -// scheduler, but the goal now is just to get something working on -// Linux and OS X. +// Design doc at http://golang.org/s/go11sched. +typedef struct Sched Sched; struct Sched { Lock; - G *gfree; // available g's (status == Gdead) - int32 goidgen; + uint64 goidgen; - G *ghead; // g's waiting to run - G *gtail; - int32 gwait; // number of g's waiting to run - int32 gcount; // number of g's that are alive - int32 grunning; // number of g's running on cpu or in syscall + M* midle; // idle m's waiting for work + int32 nmidle; // number of idle m's waiting for work + int32 mlocked; // number of locked m's waiting for work + int32 mcount; // number of m's that have been created - M *mhead; // m's waiting for work - int32 mwait; // number of m's waiting for work - int32 mcount; // number of m's that have been created + P* pidle; // idle P's + uint32 npidle; + uint32 nmspinning; - volatile uint32 atomic; // atomic scheduling word (see below) + // Global runnable queue. + G* runqhead; + G* runqtail; + int32 runqsize; - int32 profilehz; // cpu profiling rate + // Global cache of dead G's. + Lock gflock; + G* gfree; - bool init; // running initialization - bool lockmain; // init called runtime.LockOSThread + int32 stopwait; + Note stopnote; + uint32 sysmonwait; + Note sysmonnote; - Note stopped; // one g can set waitstop and wait here for m's to stop + int32 profilehz; // cpu profiling rate }; -// The atomic word in sched is an atomic uint32 that -// holds these fields. -// -// [15 bits] mcpu number of m's executing on cpu -// [15 bits] mcpumax max number of m's allowed on cpu -// [1 bit] waitstop some g is waiting on stopped -// [1 bit] gwaiting gwait != 0 -// -// These fields are the information needed by entersyscall -// and exitsyscall to decide whether to coordinate with the -// scheduler. Packing them into a single machine word lets -// them use a fast path with a single atomic read/write and -// no lock/unlock. This greatly reduces contention in -// syscall- or cgo-heavy multithreaded programs. -// -// Except for entersyscall and exitsyscall, the manipulations -// to these fields only happen while holding the schedlock, -// so the routines holding schedlock only need to worry about -// what entersyscall and exitsyscall do, not the other routines -// (which also use the schedlock). -// -// In particular, entersyscall and exitsyscall only read mcpumax, -// waitstop, and gwaiting. They never write them. Thus, writes to those -// fields can be done (holding schedlock) without fear of write conflicts. -// There may still be logic conflicts: for example, the set of waitstop must -// be conditioned on mcpu >= mcpumax or else the wait may be a -// spurious sleep. The Promela model in proc.p verifies these accesses. -enum { - mcpuWidth = 15, - mcpuMask = (1<<mcpuWidth) - 1, - mcpuShift = 0, - mcpumaxShift = mcpuShift + mcpuWidth, - waitstopShift = mcpumaxShift + mcpuWidth, - gwaitingShift = waitstopShift+1, - - // The max value of GOMAXPROCS is constrained - // by the max value we can store in the bit fields - // of the atomic word. Reserve a few high values - // so that we can detect accidental decrement - // beyond zero. - maxgomaxprocs = mcpuMask - 10, -}; - -#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask) -#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask) -#define atomic_waitstop(v) (((v)>>waitstopShift)&1) -#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1) - -Sched runtime·sched; -int32 runtime·gomaxprocs; -bool runtime·singleproc; - -static bool canaddmcpu(void); - -// An m that is waiting for notewakeup(&m->havenextg). This may -// only be accessed while the scheduler lock is held. This is used to -// minimize the number of times we call notewakeup while the scheduler -// lock is held, since the m will normally move quickly to lock the -// scheduler itself, producing lock contention. -static M* mwakeup; - -// Scheduling helpers. Sched must be locked. -static void gput(G*); // put/get on ghead/gtail -static G* gget(void); -static void mput(M*); // put/get on mhead -static M* mget(G*); -static void gfput(G*); // put/get on gfree -static G* gfget(void); -static void matchmg(void); // match m's to g's -static void readylocked(G*); // ready, but sched is locked -static void mnextg(M*, G*); -static void mcommoninit(M*); - -void -setmcpumax(uint32 n) -{ - uint32 v, w; - - for(;;) { - v = runtime·sched.atomic; - w = v; - w &= ~(mcpuMask<<mcpumaxShift); - w |= n<<mcpumaxShift; - if(runtime·cas(&runtime·sched.atomic, v, w)) - break; - } -} +// The max value of GOMAXPROCS. +// There are no fundamental restrictions on the value. +enum { MaxGomaxprocs = 1<<8 }; +Sched runtime·sched; +int32 runtime·gomaxprocs; +bool runtime·singleproc; +bool runtime·iscgo; +uint32 runtime·gcwaiting; +M runtime·m0; +G runtime·g0; // idle goroutine for m0 +G* runtime·allg; +G* runtime·lastg; +M* runtime·allm; +M* runtime·extram; +int8* runtime·goos; +int32 runtime·ncpu; +static int32 newprocs; // Keep trace of scavenger's goroutine for deadlock detection. static G *scvg; +void runtime·mstart(void); +static void runqput(P*, G*); +static G* runqget(P*); +static void runqgrow(P*); +static G* runqsteal(P*, P*); +static void mput(M*); +static M* mget(void); +static void mcommoninit(M*); +static void schedule(void); +static void procresize(int32); +static void acquirep(P*); +static P* releasep(void); +static void newm(void(*)(void), P*); +static void goidle(void); +static void stopm(void); +static void startm(P*, bool); +static void handoffp(P*); +static void wakep(void); +static void stoplockedm(void); +static void startlockedm(G*); +static void sysmon(void); +static uint32 retake(uint32*); +static void inclocked(int32); +static void checkdead(void); +static void exitsyscall0(G*); +static void park0(G*); +static void gosched0(G*); +static void goexit0(G*); +static void gfput(P*, G*); +static G* gfget(P*); +static void gfpurge(P*); +static void globrunqput(G*); +static G* globrunqget(P*); +static P* pidleget(void); +static void pidleput(P*); + // The bootstrap sequence is: // // call osinit @@ -178,10 +121,11 @@ static G *scvg; void runtime·schedinit(void) { - int32 n; + int32 n, procs; byte *p; m->nomemprof++; + runtime·mprofinit(); runtime·mallocinit(); mcommoninit(m); @@ -193,93 +137,70 @@ runtime·schedinit(void) // so that we don't need to call malloc when we crash. // runtime·findfunc(0); - runtime·gomaxprocs = 1; + procs = 1; p = runtime·getenv("GOMAXPROCS"); - if(p != nil && (n = runtime·atoi(p)) != 0) { - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime·gomaxprocs = n; + if(p != nil && (n = runtime·atoi(p)) > 0) { + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + procs = n; } - // wait for the main goroutine to start before taking - // GOMAXPROCS into account. - setmcpumax(1); - runtime·singleproc = runtime·gomaxprocs == 1; - - canaddmcpu(); // mcpu++ to account for bootstrap m - m->helpgc = 1; // flag to tell schedule() to mcpu-- - runtime·sched.grunning++; + runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0])); + procresize(procs); mstats.enablegc = 1; m->nomemprof--; + + if(raceenabled) + g->racectx = runtime·raceinit(); } extern void main·init(void); extern void main·main(void); +static FuncVal scavenger = {runtime·MHeap_Scavenger}; + // The main goroutine. void runtime·main(void) { + newm(sysmon, nil); + // Lock the main goroutine onto this, the main OS thread, // during initialization. Most programs won't care, but a few // do require certain calls to be made by the main thread. // Those can arrange for main.main to run in the main thread // by calling runtime.LockOSThread during initialization // to preserve the lock. - runtime·LockOSThread(); - // From now on, newgoroutines may use non-main threads. - setmcpumax(runtime·gomaxprocs); - runtime·sched.init = true; - scvg = runtime·newproc1((byte*)runtime·MHeap_Scavenger, nil, 0, 0, runtime·main); + runtime·lockOSThread(); + if(m != &runtime·m0) + runtime·throw("runtime·main not on m0"); + scvg = runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); + scvg->issystem = true; main·init(); - runtime·sched.init = false; - if(!runtime·sched.lockmain) - runtime·UnlockOSThread(); - - // The deadlock detection has false negatives. - // Let scvg start up, to eliminate the false negative - // for the trivial program func main() { select{} }. - runtime·gosched(); + runtime·unlockOSThread(); main·main(); + if(raceenabled) + runtime·racefini(); + + // Make racy client program work: if panicking on + // another goroutine at the same time as main returns, + // let the other goroutine finish printing the panic trace. + // Once it does, it will exit. See issue 3934. + if(runtime·panicking) + runtime·park(nil, nil, "panicwait"); + runtime·exit(0); for(;;) *(int32*)runtime·main = 0; } -// Lock the scheduler. -static void -schedlock(void) -{ - runtime·lock(&runtime·sched); -} - -// Unlock the scheduler. -static void -schedunlock(void) -{ - M *m; - - m = mwakeup; - mwakeup = nil; - runtime·unlock(&runtime·sched); - if(m != nil) - runtime·notewakeup(&m->havenextg); -} - -void -runtime·goexit(void) -{ - g->status = Gmoribund; - runtime·gosched(); -} - void -runtime·goroutineheader(G *g) +runtime·goroutineheader(G *gp) { int8 *status; - switch(g->status) { + switch(gp->status) { case Gidle: status = "idle"; break; @@ -293,609 +214,748 @@ runtime·goroutineheader(G *g) status = "syscall"; break; case Gwaiting: - if(g->waitreason) - status = g->waitreason; + if(gp->waitreason) + status = gp->waitreason; else status = "waiting"; break; - case Gmoribund: - status = "moribund"; - break; default: status = "???"; break; } - runtime·printf("goroutine %d [%s]:\n", g->goid, status); + runtime·printf("goroutine %D [%s]:\n", gp->goid, status); } void runtime·tracebackothers(G *me) { - G *g; + G *gp; + int32 traceback; - for(g = runtime·allg; g != nil; g = g->alllink) { - if(g == me || g->status == Gdead) + traceback = runtime·gotraceback(); + for(gp = runtime·allg; gp != nil; gp = gp->alllink) { + if(gp == me || gp->status == Gdead) + continue; + if(gp->issystem && traceback < 2) continue; runtime·printf("\n"); - runtime·goroutineheader(g); - runtime·traceback(g->sched.pc, g->sched.sp, 0, g); + runtime·goroutineheader(gp); + runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp); } } -// Mark this g as m's idle goroutine. -// This functionality might be used in environments where programs -// are limited to a single thread, to simulate a select-driven -// network server. It is not exposed via the standard runtime API. -void -runtime·idlegoroutine(void) -{ - if(g->idlem != nil) - runtime·throw("g is already an idle goroutine"); - g->idlem = m; -} - static void -mcommoninit(M *m) +mcommoninit(M *mp) { - m->id = runtime·sched.mcount++; - m->fastrand = 0x49f6428aUL + m->id + runtime·cputicks(); - m->stackalloc = runtime·malloc(sizeof(*m->stackalloc)); - runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil); + // If there is no mcache runtime·callers() will crash, + // and we are most likely in sysmon thread so the stack is senseless anyway. + if(m->mcache) + runtime·callers(1, mp->createstack, nelem(mp->createstack)); - if(m->mcache == nil) - m->mcache = runtime·allocmcache(); + mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks(); - runtime·callers(1, m->createstack, nelem(m->createstack)); + runtime·lock(&runtime·sched); + mp->id = runtime·sched.mcount++; + + runtime·mpreinit(mp); // Add to runtime·allm so garbage collector doesn't free m // when it is just in a register or thread-local storage. - m->alllink = runtime·allm; + mp->alllink = runtime·allm; // runtime·NumCgoCall() iterates over allm w/o schedlock, // so we need to publish it safely. - runtime·atomicstorep(&runtime·allm, m); + runtime·atomicstorep(&runtime·allm, mp); + runtime·unlock(&runtime·sched); } -// Try to increment mcpu. Report whether succeeded. -static bool -canaddmcpu(void) +// Mark gp ready to run. +void +runtime·ready(G *gp) { - uint32 v; - - for(;;) { - v = runtime·sched.atomic; - if(atomic_mcpu(v) >= atomic_mcpumax(v)) - return 0; - if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift))) - return 1; + // Mark runnable. + if(gp->status != Gwaiting) { + runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status); + runtime·throw("bad g->status in ready"); } + gp->status = Grunnable; + runqput(m->p, gp); + if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0) // TODO: fast atomic + wakep(); } -// Put on `g' queue. Sched must be locked. -static void -gput(G *g) +int32 +runtime·gcprocs(void) { - M *m; - - // If g is wired, hand it off directly. - if((m = g->lockedm) != nil && canaddmcpu()) { - mnextg(m, g); - return; - } + int32 n; - // If g is the idle goroutine for an m, hand it off. - if(g->idlem != nil) { - if(g->idlem->idleg != nil) { - runtime·printf("m%d idle out of sync: g%d g%d\n", - g->idlem->id, - g->idlem->idleg->goid, g->goid); - runtime·throw("runtime: double idle"); - } - g->idlem->idleg = g; - return; - } + // Figure out how many CPUs to use during GC. + // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. + runtime·lock(&runtime·sched); + n = runtime·gomaxprocs; + if(n > runtime·ncpu) + n = runtime·ncpu; + if(n > MaxGcproc) + n = MaxGcproc; + if(n > runtime·sched.nmidle+1) // one M is currently running + n = runtime·sched.nmidle+1; + runtime·unlock(&runtime·sched); + return n; +} - g->schedlink = nil; - if(runtime·sched.ghead == nil) - runtime·sched.ghead = g; - else - runtime·sched.gtail->schedlink = g; - runtime·sched.gtail = g; +static bool +needaddgcproc(void) +{ + int32 n; - // increment gwait. - // if it transitions to nonzero, set atomic gwaiting bit. - if(runtime·sched.gwait++ == 0) - runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift); + runtime·lock(&runtime·sched); + n = runtime·gomaxprocs; + if(n > runtime·ncpu) + n = runtime·ncpu; + if(n > MaxGcproc) + n = MaxGcproc; + n -= runtime·sched.nmidle+1; // one M is currently running + runtime·unlock(&runtime·sched); + return n > 0; } -// Report whether gget would return something. -static bool -haveg(void) +void +runtime·helpgc(int32 nproc) { - return runtime·sched.ghead != nil || m->idleg != nil; + M *mp; + int32 n, pos; + + runtime·lock(&runtime·sched); + pos = 0; + for(n = 1; n < nproc; n++) { // one M is currently running + if(runtime·allp[pos]->mcache == m->mcache) + pos++; + mp = mget(); + if(mp == nil) + runtime·throw("runtime·gcprocs inconsistency"); + mp->helpgc = 1; + mp->mcache = runtime·allp[pos]->mcache; + pos++; + runtime·notewakeup(&mp->park); + } + runtime·unlock(&runtime·sched); } -// Get from `g' queue. Sched must be locked. -static G* -gget(void) +void +runtime·stoptheworld(void) { - G *g; + int32 i; + uint32 s; + P *p; + bool wait; - g = runtime·sched.ghead; - if(g){ - runtime·sched.ghead = g->schedlink; - if(runtime·sched.ghead == nil) - runtime·sched.gtail = nil; - // decrement gwait. - // if it transitions to zero, clear atomic gwaiting bit. - if(--runtime·sched.gwait == 0) - runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift); - } else if(m->idleg != nil) { - g = m->idleg; - m->idleg = nil; + runtime·lock(&runtime·sched); + runtime·sched.stopwait = runtime·gomaxprocs; + runtime·atomicstore((uint32*)&runtime·gcwaiting, 1); + // stop current P + m->p->status = Pgcstop; + runtime·sched.stopwait--; + // try to retake all P's in Psyscall status + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + s = p->status; + if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop)) + runtime·sched.stopwait--; + } + // stop idle P's + while(p = pidleget()) { + p->status = Pgcstop; + runtime·sched.stopwait--; + } + wait = runtime·sched.stopwait > 0; + runtime·unlock(&runtime·sched); + + // wait for remaining P's to stop voluntary + if(wait) { + runtime·notesleep(&runtime·sched.stopnote); + runtime·noteclear(&runtime·sched.stopnote); + } + if(runtime·sched.stopwait) + runtime·throw("stoptheworld: not stopped"); + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p->status != Pgcstop) + runtime·throw("stoptheworld: not stopped"); } - return g; } -// Put on `m' list. Sched must be locked. static void -mput(M *m) +mhelpgc(void) { - m->schedlink = runtime·sched.mhead; - runtime·sched.mhead = m; - runtime·sched.mwait++; + m->helpgc = 1; } -// Get an `m' to run `g'. Sched must be locked. -static M* -mget(G *g) +void +runtime·starttheworld(void) { - M *m; + P *p; + M *mp; + bool add; - // if g has its own m, use it. - if(g && (m = g->lockedm) != nil) - return m; + add = needaddgcproc(); + runtime·lock(&runtime·sched); + if(newprocs) { + procresize(newprocs); + newprocs = 0; + } else + procresize(runtime·gomaxprocs); + runtime·gcwaiting = 0; - // otherwise use general m pool. - if((m = runtime·sched.mhead) != nil){ - runtime·sched.mhead = m->schedlink; - runtime·sched.mwait--; + while(p = pidleget()) { + // procresize() puts p's with work at the beginning of the list. + // Once we reach a p without a run queue, the rest don't have one either. + if(p->runqhead == p->runqtail) { + pidleput(p); + break; + } + mp = mget(); + if(mp == nil) { + pidleput(p); + break; + } + if(mp->nextp) + runtime·throw("starttheworld: inconsistent mp->nextp"); + mp->nextp = p; + runtime·notewakeup(&mp->park); + } + if(runtime·sched.sysmonwait) { + runtime·sched.sysmonwait = false; + runtime·notewakeup(&runtime·sched.sysmonnote); + } + runtime·unlock(&runtime·sched); + + if(add) { + // If GC could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + newm(mhelpgc, nil); } - return m; } -// Mark g ready to run. +// Called to start an M. void -runtime·ready(G *g) +runtime·mstart(void) { - schedlock(); - readylocked(g); - schedunlock(); -} + // It is used by windows-386 only. Unfortunately, seh needs + // to be located on os stack, and mstart runs on os stack + // for both m0 and m. + SEH seh; -// Mark g ready to run. Sched is already locked. -// G might be running already and about to stop. -// The sched lock protects g->status from changing underfoot. -static void -readylocked(G *g) -{ - if(g->m){ - // Running on another machine. - // Ready it when it stops. - g->readyonstop = 1; - return; + if(g != m->g0) + runtime·throw("bad runtime·mstart"); + + // Record top of stack for use by mcall. + // Once we call schedule we're never coming back, + // so other calls can reuse this stack space. + runtime·gosave(&m->g0->sched); + m->g0->sched.pc = (void*)-1; // make sure it is never used + m->seh = &seh; + runtime·asminit(); + runtime·minit(); + + // Install signal handlers; after minit so that minit can + // prepare the thread to be able to handle the signals. + if(m == &runtime·m0) { + runtime·initsig(); + if(runtime·iscgo) + runtime·newextram(); } + + if(m->mstartfn) + m->mstartfn(); - // Mark runnable. - if(g->status == Grunnable || g->status == Grunning) { - runtime·printf("goroutine %d has status %d\n", g->goid, g->status); - runtime·throw("bad g->status in ready"); + if(m->helpgc) { + m->helpgc = false; + stopm(); + } else if(m != &runtime·m0) { + acquirep(m->nextp); + m->nextp = nil; } - g->status = Grunnable; + schedule(); - gput(g); - matchmg(); + // TODO(brainman): This point is never reached, because scheduler + // does not release os threads at the moment. But once this path + // is enabled, we must remove our seh here. } -static void -nop(void) -{ -} +// When running with cgo, we call _cgo_thread_start +// to start threads for us so that we can play nicely with +// foreign code. +void (*_cgo_thread_start)(void*); -// Same as readylocked but a different symbol so that -// debuggers can set a breakpoint here and catch all -// new goroutines. -static void -newprocreadylocked(G *g) +typedef struct CgoThreadStart CgoThreadStart; +struct CgoThreadStart { - nop(); // avoid inlining in 6l - readylocked(g); -} + M *m; + G *g; + void (*fn)(void); +}; -// Pass g to m for running. -// Caller has already incremented mcpu. -static void -mnextg(M *m, G *g) +// Allocate a new m unassociated with any thread. +// Can use p for allocation context if needed. +M* +runtime·allocm(P *p) { - runtime·sched.grunning++; - m->nextg = g; - if(m->waitnextg) { - m->waitnextg = 0; - if(mwakeup != nil) - runtime·notewakeup(&mwakeup->havenextg); - mwakeup = m; + M *mp; + static Type *mtype; // The Go type M + + m->locks++; // disable GC because it can be called from sysmon + if(m->p == nil) + acquirep(p); // temporarily borrow p for mallocs in this function + if(mtype == nil) { + Eface e; + runtime·gc_m_ptr(&e); + mtype = ((PtrType*)e.type)->elem; } -} -// Get the next goroutine that m should run. -// Sched must be locked on entry, is unlocked on exit. -// Makes sure that at most $GOMAXPROCS g's are -// running on cpus (not in system calls) at any given time. -static G* -nextgandunlock(void) -{ - G *gp; - uint32 v; + mp = runtime·cnew(mtype); + mcommoninit(mp); -top: - if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs) - runtime·throw("negative mcpu"); - - // If there is a g waiting as m->nextg, the mcpu++ - // happened before it was passed to mnextg. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } + // In case of cgo, pthread_create will make us a stack. + // Windows will layout sched stack on OS stack. + if(runtime·iscgo || Windows) + mp->g0 = runtime·malg(-1); + else + mp->g0 = runtime·malg(8192); - if(m->lockedg != nil) { - // We can only run one g, and it's not available. - // Make sure some other cpu is running to handle - // the ordinary run queue. - if(runtime·sched.gwait != 0) { - matchmg(); - // m->lockedg might have been on the queue. - if(m->nextg != nil) { - gp = m->nextg; - m->nextg = nil; - schedunlock(); - return gp; - } - } - } else { - // Look for work on global queue. - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime·throw("gget inconsistency"); - - if(gp->lockedm) { - mnextg(gp->lockedm, gp); - continue; - } - runtime·sched.grunning++; - schedunlock(); - return gp; - } + if(p == m->p) + releasep(); + m->locks--; - // The while loop ended either because the g queue is empty - // or because we have maxed out our m procs running go - // code (mcpu >= mcpumax). We need to check that - // concurrent actions by entersyscall/exitsyscall cannot - // invalidate the decision to end the loop. - // - // We hold the sched lock, so no one else is manipulating the - // g queue or changing mcpumax. Entersyscall can decrement - // mcpu, but if does so when there is something on the g queue, - // the gwait bit will be set, so entersyscall will take the slow path - // and use the sched lock. So it cannot invalidate our decision. - // - // Wait on global m queue. - mput(m); - } - - // Look for deadlock situation. - // There is a race with the scavenger that causes false negatives: - // if the scavenger is just starting, then we have - // scvg != nil && grunning == 0 && gwait == 0 - // and we do not detect a deadlock. It is possible that we should - // add that case to the if statement here, but it is too close to Go 1 - // to make such a subtle change. Instead, we work around the - // false negative in trivial programs by calling runtime.gosched - // from the main goroutine just before main.main. - // See runtime·main above. - // - // On a related note, it is also possible that the scvg == nil case is - // wrong and should include gwait, but that does not happen in - // standard Go programs, which all start the scavenger. - // - if((scvg == nil && runtime·sched.grunning == 0) || - (scvg != nil && runtime·sched.grunning == 1 && runtime·sched.gwait == 0 && - (scvg->status == Grunning || scvg->status == Gsyscall))) { - runtime·throw("all goroutines are asleep - deadlock!"); - } - - m->nextg = nil; - m->waitnextg = 1; - runtime·noteclear(&m->havenextg); - - // Stoptheworld is waiting for all but its cpu to go to stop. - // Entersyscall might have decremented mcpu too, but if so - // it will see the waitstop and take the slow path. - // Exitsyscall never increments mcpu beyond mcpumax. - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - // set waitstop = 0 (known to be 1) - runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); - runtime·notewakeup(&runtime·sched.stopped); - } - schedunlock(); - - runtime·notesleep(&m->havenextg); - if(m->helpgc) { - runtime·gchelper(); - m->helpgc = 0; - runtime·lock(&runtime·sched); - goto top; - } - if((gp = m->nextg) == nil) - runtime·throw("bad m->nextg in nextgoroutine"); - m->nextg = nil; - return gp; + return mp; } -int32 -runtime·helpgc(bool *extra) +static M* lockextra(bool nilokay); +static void unlockextra(M*); + +// needm is called when a cgo callback happens on a +// thread without an m (a thread not created by Go). +// In this case, needm is expected to find an m to use +// and return with m, g initialized correctly. +// Since m and g are not set now (likely nil, but see below) +// needm is limited in what routines it can call. In particular +// it can only call nosplit functions (textflag 7) and cannot +// do any scheduling that requires an m. +// +// In order to avoid needing heavy lifting here, we adopt +// the following strategy: there is a stack of available m's +// that can be stolen. Using compare-and-swap +// to pop from the stack has ABA races, so we simulate +// a lock by doing an exchange (via casp) to steal the stack +// head and replace the top pointer with MLOCKED (1). +// This serves as a simple spin lock that we can use even +// without an m. The thread that locks the stack in this way +// unlocks the stack by storing a valid stack head pointer. +// +// In order to make sure that there is always an m structure +// available to be stolen, we maintain the invariant that there +// is always one more than needed. At the beginning of the +// program (if cgo is in use) the list is seeded with a single m. +// If needm finds that it has taken the last m off the list, its job +// is - once it has installed its own m so that it can do things like +// allocate memory - to create a spare m and put it on the list. +// +// Each of these extra m's also has a g0 and a curg that are +// pressed into service as the scheduling stack and current +// goroutine for the duration of the cgo callback. +// +// When the callback is done with the m, it calls dropm to +// put the m back on the list. +#pragma textflag 7 +void +runtime·needm(byte x) { M *mp; - int32 n, max; - // Figure out how many CPUs to use. - // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc. - max = runtime·gomaxprocs; - if(max > runtime·ncpu) - max = runtime·ncpu; - if(max > MaxGcproc) - max = MaxGcproc; + // Lock extra list, take head, unlock popped list. + // nilokay=false is safe here because of the invariant above, + // that the extra list always contains or will soon contain + // at least one m. + mp = lockextra(false); + + // Set needextram when we've just emptied the list, + // so that the eventual call into cgocallbackg will + // allocate a new m for the extra list. We delay the + // allocation until then so that it can be done + // after exitsyscall makes sure it is okay to be + // running at all (that is, there's no garbage collection + // running right now). + mp->needextram = mp->schedlink == nil; + unlockextra(mp->schedlink); + + // Install m and g (= m->g0) and set the stack bounds + // to match the current stack. We don't actually know + // how big the stack is, like we don't know how big any + // scheduling stack is, but we assume there's at least 32 kB, + // which is more than enough for us. + runtime·setmg(mp, mp->g0); + g->stackbase = (uintptr)(&x + 1024); + g->stackguard = (uintptr)(&x - 32*1024); + + // On windows/386, we need to put an SEH frame (two words) + // somewhere on the current stack. We are called + // from needm, and we know there is some available + // space one word into the argument frame. Use that. + m->seh = (SEH*)((uintptr*)&x + 1); + + // Initialize this thread to use the m. + runtime·asminit(); + runtime·minit(); +} - // We're going to use one CPU no matter what. - // Figure out the max number of additional CPUs. - max--; +// newextram allocates an m and puts it on the extra list. +// It is called with a working local m, so that it can do things +// like call schedlock and allocate. +void +runtime·newextram(void) +{ + M *mp, *mnext; + G *gp; + // Create extra goroutine locked to extra m. + // The goroutine is the context in which the cgo callback will run. + // The sched.pc will never be returned to, but setting it to + // runtime.goexit makes clear to the traceback routines where + // the goroutine stack ends. + mp = runtime·allocm(nil); + gp = runtime·malg(4096); + gp->sched.pc = (void*)runtime·goexit; + gp->sched.sp = gp->stackbase; + gp->sched.g = gp; + gp->status = Gsyscall; + mp->curg = gp; + mp->locked = LockInternal; + mp->lockedg = gp; + gp->lockedm = mp; + // put on allg for garbage collector runtime·lock(&runtime·sched); - n = 0; - while(n < max && (mp = mget(nil)) != nil) { - n++; - mp->helpgc = 1; - mp->waitnextg = 0; - runtime·notewakeup(&mp->havenextg); - } + if(runtime·lastg == nil) + runtime·allg = gp; + else + runtime·lastg->alllink = gp; + runtime·lastg = gp; runtime·unlock(&runtime·sched); - if(extra) - *extra = n != max; - return n; + gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + if(raceenabled) + gp->racectx = runtime·racegostart(runtime·newextram); + + // Add m to the extra list. + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); } +// dropm is called when a cgo callback has called needm but is now +// done with the callback and returning back into the non-Go thread. +// It puts the current m back onto the extra list. +// +// The main expense here is the call to signalstack to release the +// m's signal stack, and then the call to needm on the next callback +// from this thread. It is tempting to try to save the m for next time, +// which would eliminate both these costs, but there might not be +// a next time: the current thread (which Go does not control) might exit. +// If we saved the m for that thread, there would be an m leak each time +// such a thread exited. Instead, we acquire and release an m on each +// call. These should typically not be scheduling operations, just a few +// atomics, so the cost should be small. +// +// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread +// variable using pthread_key_create. Unlike the pthread keys we already use +// on OS X, this dummy key would never be read by Go code. It would exist +// only so that we could register at thread-exit-time destructor. +// That destructor would put the m back onto the extra list. +// This is purely a performance optimization. The current version, +// in which dropm happens on each cgo call, is still correct too. +// We may have to keep the current version on systems with cgo +// but without pthreads, like Windows. void -runtime·stoptheworld(void) +runtime·dropm(void) { - uint32 v; + M *mp, *mnext; - schedlock(); - runtime·gcwaiting = 1; + // Undo whatever initialization minit did during needm. + runtime·unminit(); - setmcpumax(1); + // Clear m and g, and return m to the extra list. + // After the call to setmg we can only call nosplit functions. + mp = m; + runtime·setmg(nil, nil); - // while mcpu > 1 - for(;;) { - v = runtime·sched.atomic; - if(atomic_mcpu(v) <= 1) - break; + mnext = lockextra(true); + mp->schedlink = mnext; + unlockextra(mp); +} - // It would be unsafe for multiple threads to be using - // the stopped note at once, but there is only - // ever one thread doing garbage collection. - runtime·noteclear(&runtime·sched.stopped); - if(atomic_waitstop(v)) - runtime·throw("invalid waitstop"); +#define MLOCKED ((M*)1) - // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above - // still being true. - if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift))) - continue; +// lockextra locks the extra list and returns the list head. +// The caller must unlock the list by storing a new list head +// to runtime.extram. If nilokay is true, then lockextra will +// return a nil list head if that's what it finds. If nilokay is false, +// lockextra will keep waiting until the list head is no longer nil. +#pragma textflag 7 +static M* +lockextra(bool nilokay) +{ + M *mp; + void (*yield)(void); - schedunlock(); - runtime·notesleep(&runtime·sched.stopped); - schedlock(); + for(;;) { + mp = runtime·atomicloadp(&runtime·extram); + if(mp == MLOCKED) { + yield = runtime·osyield; + yield(); + continue; + } + if(mp == nil && !nilokay) { + runtime·usleep(1); + continue; + } + if(!runtime·casp(&runtime·extram, mp, MLOCKED)) { + yield = runtime·osyield; + yield(); + continue; + } + break; } - runtime·singleproc = runtime·gomaxprocs == 1; - schedunlock(); + return mp; } -void -runtime·starttheworld(bool extra) +#pragma textflag 7 +static void +unlockextra(M *mp) { - M *m; - - schedlock(); - runtime·gcwaiting = 0; - setmcpumax(runtime·gomaxprocs); - matchmg(); - if(extra && canaddmcpu()) { - // Start a new m that will (we hope) be idle - // and so available to help when the next - // garbage collection happens. - // canaddmcpu above did mcpu++ - // (necessary, because m will be doing various - // initialization work so is definitely running), - // but m is not running a specific goroutine, - // so set the helpgc flag as a signal to m's - // first schedule(nil) to mcpu-- and grunning--. - m = runtime·newm(); - m->helpgc = 1; - runtime·sched.grunning++; - } - schedunlock(); + runtime·atomicstorep(&runtime·extram, mp); } -// Called to start an M. -void -runtime·mstart(void) + +// Create a new m. It will start off with a call to fn, or else the scheduler. +static void +newm(void(*fn)(void), P *p) { - if(g != m->g0) - runtime·throw("bad runtime·mstart"); + M *mp; - // Record top of stack for use by mcall. - // Once we call schedule we're never coming back, - // so other calls can reuse this stack space. - runtime·gosave(&m->g0->sched); - m->g0->sched.pc = (void*)-1; // make sure it is never used - runtime·asminit(); - runtime·minit(); + mp = runtime·allocm(p); + mp->nextp = p; + mp->mstartfn = fn; - // Install signal handlers; after minit so that minit can - // prepare the thread to be able to handle the signals. - if(m == &runtime·m0) - runtime·initsig(); + if(runtime·iscgo) { + CgoThreadStart ts; - schedule(nil); + if(_cgo_thread_start == nil) + runtime·throw("_cgo_thread_start missing"); + ts.m = mp; + ts.g = mp->g0; + ts.fn = runtime·mstart; + runtime·asmcgocall(_cgo_thread_start, &ts); + return; + } + runtime·newosproc(mp, (byte*)mp->g0->stackbase); } -// When running with cgo, we call libcgo_thread_start -// to start threads for us so that we can play nicely with -// foreign code. -void (*libcgo_thread_start)(void*); +// Stops execution of the current m until new work is available. +// Returns with acquired P. +static void +stopm(void) +{ + if(m->locks) + runtime·throw("stopm holding locks"); + if(m->p) + runtime·throw("stopm holding p"); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } -typedef struct CgoThreadStart CgoThreadStart; -struct CgoThreadStart +retry: + runtime·lock(&runtime·sched); + mput(m); + runtime·unlock(&runtime·sched); + runtime·notesleep(&m->park); + runtime·noteclear(&m->park); + if(m->helpgc) { + m->helpgc = 0; + runtime·gchelper(); + m->mcache = nil; + goto retry; + } + acquirep(m->nextp); + m->nextp = nil; +} + +static void +mspinning(void) { - M *m; - G *g; - void (*fn)(void); -}; + m->spinning = true; +} -// Kick off new m's as needed (up to mcpumax). -// Sched is locked. +// Schedules some M to run the p (creates an M if necessary). +// If p==nil, tries to get an idle P, if no idle P's returns false. static void -matchmg(void) +startm(P *p, bool spinning) { - G *gp; M *mp; + void (*fn)(void); - if(m->mallocing || m->gcing) + runtime·lock(&runtime·sched); + if(p == nil) { + p = pidleget(); + if(p == nil) { + runtime·unlock(&runtime·sched); + if(spinning) + runtime·xadd(&runtime·sched.nmspinning, -1); + return; + } + } + mp = mget(); + runtime·unlock(&runtime·sched); + if(mp == nil) { + fn = nil; + if(spinning) + fn = mspinning; + newm(fn, p); return; - - while(haveg() && canaddmcpu()) { - gp = gget(); - if(gp == nil) - runtime·throw("gget inconsistency"); - - // Find the m that will run gp. - if((mp = mget(gp)) == nil) - mp = runtime·newm(); - mnextg(mp, gp); } + if(mp->spinning) + runtime·throw("startm: m is spinning"); + if(mp->nextp) + runtime·throw("startm: m has p"); + mp->spinning = spinning; + mp->nextp = p; + runtime·notewakeup(&mp->park); } -// Create a new m. It will start off with a call to runtime·mstart. -M* -runtime·newm(void) +// Hands off P from syscall or locked M. +static void +handoffp(P *p) { - M *m; + // if it has local work, start it straight away + if(p->runqhead != p->runqtail || runtime·sched.runqsize) { + startm(p, false); + return; + } + // no local work, check that there are no spinning/idle M's, + // otherwise our help is not required + if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 && // TODO: fast atomic + runtime·cas(&runtime·sched.nmspinning, 0, 1)) { + startm(p, true); + return; + } + runtime·lock(&runtime·sched); + if(runtime·gcwaiting) { + p->status = Pgcstop; + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + runtime·unlock(&runtime·sched); + return; + } + if(runtime·sched.runqsize) { + runtime·unlock(&runtime·sched); + startm(p, false); + return; + } + pidleput(p); + runtime·unlock(&runtime·sched); +} - m = runtime·malloc(sizeof(M)); - mcommoninit(m); +// Tries to add one more P to execute G's. +// Called when a G is made runnable (newproc, ready). +static void +wakep(void) +{ + // be conservative about spinning threads + if(!runtime·cas(&runtime·sched.nmspinning, 0, 1)) + return; + startm(nil, true); +} - if(runtime·iscgo) { - CgoThreadStart ts; +// Stops execution of the current m that is locked to a g until the g is runnable again. +// Returns with acquired P. +static void +stoplockedm(void) +{ + P *p; - if(libcgo_thread_start == nil) - runtime·throw("libcgo_thread_start missing"); - // pthread_create will make us a stack. - m->g0 = runtime·malg(-1); - ts.m = m; - ts.g = m->g0; - ts.fn = runtime·mstart; - runtime·asmcgocall(libcgo_thread_start, &ts); - } else { - if(Windows) - // windows will layout sched stack on os stack - m->g0 = runtime·malg(-1); - else - m->g0 = runtime·malg(8192); - runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart); + if(m->lockedg == nil || m->lockedg->lockedm != m) + runtime·throw("stoplockedm: inconsistent locking"); + if(m->p) { + // Schedule another M to run this p. + p = releasep(); + handoffp(p); } + inclocked(1); + // Wait until another thread schedules lockedg again. + runtime·notesleep(&m->park); + runtime·noteclear(&m->park); + if(m->lockedg->status != Grunnable) + runtime·throw("stoplockedm: not runnable"); + acquirep(m->nextp); + m->nextp = nil; +} + +// Schedules the locked m to run the locked gp. +static void +startlockedm(G *gp) +{ + M *mp; + P *p; + + mp = gp->lockedm; + if(mp == m) + runtime·throw("startlockedm: locked to me"); + if(mp->nextp) + runtime·throw("startlockedm: m has p"); + // directly handoff current P to the locked m + inclocked(-1); + p = releasep(); + mp->nextp = p; + runtime·notewakeup(&mp->park); + stopm(); +} + +// Stops the current m for stoptheworld. +// Returns when the world is restarted. +static void +gcstopm(void) +{ + P *p; - return m; + if(!runtime·gcwaiting) + runtime·throw("gcstopm: not waiting for gc"); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + p = releasep(); + runtime·lock(&runtime·sched); + p->status = Pgcstop; + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + runtime·unlock(&runtime·sched); + stopm(); } -// One round of scheduler: find a goroutine and run it. -// The argument is the goroutine that was running before -// schedule was called, or nil if this is the first call. +// Schedules gp to run on the current M. // Never returns. static void -schedule(G *gp) +execute(G *gp) { int32 hz; - uint32 v; - - schedlock(); - if(gp != nil) { - // Just finished running gp. - gp->m = nil; - runtime·sched.grunning--; - - // atomic { mcpu-- } - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime·throw("negative mcpu in scheduler"); - - switch(gp->status){ - case Grunnable: - case Gdead: - // Shouldn't have been running! - runtime·throw("bad gp->status in sched"); - case Grunning: - gp->status = Grunnable; - gput(gp); - break; - case Gmoribund: - gp->status = Gdead; - if(gp->lockedm) { - gp->lockedm = nil; - m->lockedg = nil; - } - gp->idlem = nil; - unwindstack(gp, nil); - gfput(gp); - if(--runtime·sched.gcount == 0) - runtime·exit(0); - break; - } - if(gp->readyonstop){ - gp->readyonstop = 0; - readylocked(gp); - } - } else if(m->helpgc) { - // Bootstrap m or new m started by starttheworld. - // atomic { mcpu-- } - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(atomic_mcpu(v) > maxgomaxprocs) - runtime·throw("negative mcpu in scheduler"); - // Compensate for increment in starttheworld(). - runtime·sched.grunning--; - m->helpgc = 0; - } else if(m->nextg != nil) { - // New m started by matchmg. - } else { - runtime·throw("invalid m state in scheduler"); - } - // Find (or wait for) g to run. Unlocks runtime·sched. - gp = nextgandunlock(); - gp->readyonstop = 0; + if(gp->status != Grunnable) { + runtime·printf("execute: bad g status %d\n", gp->status); + runtime·throw("execute: bad g status"); + } gp->status = Grunning; + m->p->tick++; m->curg = gp; gp->m = m; @@ -904,27 +964,209 @@ schedule(G *gp) if(m->profilehz != hz) runtime·resetcpuprofiler(hz); - if(gp->sched.pc == (byte*)runtime·goexit) { // kickoff - runtime·gogocall(&gp->sched, (void(*)(void))gp->entry); - } + if(gp->sched.pc == (byte*)runtime·goexit) // kickoff + runtime·gogocallfn(&gp->sched, gp->fnstart); runtime·gogo(&gp->sched, 0); } -// Enter scheduler. If g->status is Grunning, -// re-queues g and runs everyone else who is waiting -// before running g again. If g->status is Gmoribund, -// kills off g. -// Cannot split stack because it is called from exitsyscall. -// See comment below. -#pragma textflag 7 +// Finds a runnable goroutine to execute. +// Tries to steal from other P's and get g from global queue. +static G* +findrunnable(void) +{ + G *gp; + P *p; + int32 i; + +top: + if(runtime·gcwaiting) { + gcstopm(); + goto top; + } + // local runq + gp = runqget(m->p); + if(gp) + return gp; + // global runq + if(runtime·sched.runqsize) { + runtime·lock(&runtime·sched); + gp = globrunqget(m->p); + runtime·unlock(&runtime·sched); + if(gp) + return gp; + } + // If number of spinning M's >= number of busy P's, block. + // This is necessary to prevent excessive CPU consumption + // when GOMAXPROCS>>1 but the program parallelism is low. + if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle)) // TODO: fast atomic + goto stop; + if(!m->spinning) { + m->spinning = true; + runtime·xadd(&runtime·sched.nmspinning, 1); + } + // random steal from other P's + for(i = 0; i < 2*runtime·gomaxprocs; i++) { + if(runtime·gcwaiting) + goto top; + p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs]; + if(p == m->p) + gp = runqget(p); + else + gp = runqsteal(m->p, p); + if(gp) + return gp; + } +stop: + // return P and block + runtime·lock(&runtime·sched); + if(runtime·gcwaiting) { + runtime·unlock(&runtime·sched); + goto top; + } + if(runtime·sched.runqsize) { + gp = globrunqget(m->p); + runtime·unlock(&runtime·sched); + return gp; + } + p = releasep(); + pidleput(p); + runtime·unlock(&runtime·sched); + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + // check all runqueues once again + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p && p->runqhead != p->runqtail) { + runtime·lock(&runtime·sched); + p = pidleget(); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + goto top; + } + break; + } + } + stopm(); + goto top; +} + +// One round of scheduler: find a runnable goroutine and execute it. +// Never returns. +static void +schedule(void) +{ + G *gp; + + if(m->locks) + runtime·throw("schedule: holding locks"); + +top: + if(runtime·gcwaiting) { + gcstopm(); + goto top; + } + + gp = runqget(m->p); + if(gp == nil) + gp = findrunnable(); + + if(m->spinning) { + m->spinning = false; + runtime·xadd(&runtime·sched.nmspinning, -1); + } + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another M here. + if (m->p->runqhead != m->p->runqtail && + runtime·atomicload(&runtime·sched.nmspinning) == 0 && + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic + wakep(); + + if(gp->lockedm) { + startlockedm(gp); + goto top; + } + + execute(gp); +} + +// Puts the current goroutine into a waiting state and unlocks the lock. +// The goroutine can be made runnable again by calling runtime·ready(gp). +void +runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason) +{ + m->waitlock = lock; + m->waitunlockf = unlockf; + g->waitreason = reason; + runtime·mcall(park0); +} + +// runtime·park continuation on g0. +static void +park0(G *gp) +{ + gp->status = Gwaiting; + gp->m = nil; + m->curg = nil; + if(m->waitunlockf) { + m->waitunlockf(m->waitlock); + m->waitunlockf = nil; + } + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Scheduler yield. void runtime·gosched(void) { - if(m->locks != 0) - runtime·throw("gosched holding locks"); - if(g == m->g0) - runtime·throw("gosched of g0"); - runtime·mcall(schedule); + runtime·mcall(gosched0); +} + +// runtime·gosched continuation on g0. +static void +gosched0(G *gp) +{ + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime·lock(&runtime·sched); + globrunqput(gp); + runtime·unlock(&runtime·sched); + if(m->lockedg) { + stoplockedm(); + execute(gp); // Never returns. + } + schedule(); +} + +// Finishes execution of the current goroutine. +void +runtime·goexit(void) +{ + if(raceenabled) + runtime·racegoend(); + runtime·mcall(goexit0); +} + +// runtime·goexit continuation on g0. +static void +goexit0(G *gp) +{ + gp->status = Gdead; + gp->m = nil; + gp->lockedm = nil; + m->curg = nil; + m->lockedg = nil; + runtime·unwindstack(gp, nil); + gfput(m->p, gp); + schedule(); } // The goroutine g is about to enter a system call. @@ -935,21 +1177,19 @@ runtime·gosched(void) // Entersyscall cannot split the stack: the runtime·gosave must // make g->sched refer to the caller's stack segment, because // entersyscall is going to return immediately after. -// It's okay to call matchmg and notewakeup even after -// decrementing mcpu, because we haven't released the -// sched lock yet, so the garbage collector cannot be running. #pragma textflag 7 void -runtime·entersyscall(void) +·entersyscall(int32 dummy) { - uint32 v; - if(m->profilehz > 0) runtime·setprof(false); // Leave SP around for gc and traceback. - runtime·gosave(&g->sched); + g->sched.sp = (uintptr)runtime·getcallersp(&dummy); + g->sched.pc = runtime·getcallerpc(&dummy); + g->sched.g = g; g->gcsp = g->sched.sp; + g->gcpc = g->sched.pc; g->gcstack = g->stackbase; g->gcguard = g->stackguard; g->status = Gsyscall; @@ -959,34 +1199,61 @@ runtime·entersyscall(void) runtime·throw("entersyscall"); } - // Fast path. - // The slow path inside the schedlock/schedunlock will get - // through without stopping if it does: - // mcpu-- - // gwait not true - // waitstop && mcpu <= mcpumax not true - // If we can do the same with a single atomic add, - // then we can skip the locks. - v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift); - if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v))) - return; - - schedlock(); - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_gwaiting(v)) { - matchmg(); - v = runtime·atomicload(&runtime·sched.atomic); + if(runtime·atomicload(&runtime·sched.sysmonwait)) { // TODO: fast atomic + runtime·lock(&runtime·sched); + if(runtime·atomicload(&runtime·sched.sysmonwait)) { + runtime·atomicstore(&runtime·sched.sysmonwait, 0); + runtime·notewakeup(&runtime·sched.sysmonnote); + } + runtime·unlock(&runtime·sched); + runtime·gosave(&g->sched); // re-save for traceback } - if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { - runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift); - runtime·notewakeup(&runtime·sched.stopped); + + m->mcache = nil; + m->p->tick++; + m->p->m = nil; + runtime·atomicstore(&m->p->status, Psyscall); + if(runtime·gcwaiting) { + runtime·lock(&runtime·sched); + if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) { + if(--runtime·sched.stopwait == 0) + runtime·notewakeup(&runtime·sched.stopnote); + } + runtime·unlock(&runtime·sched); + runtime·gosave(&g->sched); // re-save for traceback } +} - // Re-save sched in case one of the calls - // (notewakeup, matchmg) triggered something using it. - runtime·gosave(&g->sched); +// The same as runtime·entersyscall(), but with a hint that the syscall is blocking. +#pragma textflag 7 +void +·entersyscallblock(int32 dummy) +{ + P *p; - schedunlock(); + if(m->profilehz > 0) + runtime·setprof(false); + + // Leave SP around for gc and traceback. + g->sched.sp = (uintptr)runtime·getcallersp(&dummy); + g->sched.pc = runtime·getcallerpc(&dummy); + g->sched.g = g; + g->gcsp = g->sched.sp; + g->gcpc = g->sched.pc; + g->gcstack = g->stackbase; + g->gcguard = g->stackguard; + g->status = Gsyscall; + if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) { + // runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n", + // g->gcsp, g->gcguard-StackGuard, g->gcstack); + runtime·throw("entersyscallblock"); + } + + p = releasep(); + handoffp(p); + if(g == scvg) // do not consider blocked scavenger for deadlock detection + inclocked(1); + runtime·gosave(&g->sched); // re-save for traceback } // The goroutine g exited its system call. @@ -996,177 +1263,81 @@ runtime·entersyscall(void) void runtime·exitsyscall(void) { - uint32 v; + P *p; - // Fast path. - // If we can do the mcpu++ bookkeeping and - // find that we still have mcpu <= mcpumax, then we can - // start executing Go code immediately, without having to - // schedlock/schedunlock. - v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift)); - if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) { + // Check whether the profiler needs to be turned on. + if(m->profilehz > 0) + runtime·setprof(true); + + // Try to re-acquire the last P. + if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) { // There's a cpu for us, so we can run. + m->mcache = m->p->mcache; + m->p->m = m; + m->p->tick++; g->status = Grunning; // Garbage collector isn't running (since we are), - // so okay to clear gcstack. - g->gcstack = nil; - - if(m->profilehz > 0) - runtime·setprof(true); + // so okay to clear gcstack and gcsp. + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; return; } - // Tell scheduler to put g back on the run queue: - // mostly equivalent to g->status = Grunning, - // but keeps the garbage collector from thinking - // that g is running right now, which it's not. - g->readyonstop = 1; + if(g == scvg) // do not consider blocked scavenger for deadlock detection + inclocked(-1); + // Try to get any other idle P. + m->p = nil; + if(runtime·sched.pidle) { + runtime·lock(&runtime·sched); + p = pidleget(); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; + return; + } + } - // All the cpus are taken. - // The scheduler will ready g and put this m to sleep. - // When the scheduler takes g away from m, - // it will undo the runtime·sched.mcpu++ above. - runtime·gosched(); + // Call the scheduler. + runtime·mcall(exitsyscall0); - // Gosched returned, so we're allowed to run now. + // Scheduler returned, so we're allowed to run now. // Delete the gcstack information that we left for // the garbage collector during the system call. // Must wait until now because until gosched returns // we don't know for sure that the garbage collector // is not running. - g->gcstack = nil; + g->gcstack = (uintptr)nil; + g->gcsp = (uintptr)nil; } -// Called from runtime·lessstack when returning from a function which -// allocated a new stack segment. The function's return value is in -// m->cret. -void -runtime·oldstack(void) +// runtime·exitsyscall slow path on g0. +// Failed to acquire P, enqueue gp as runnable. +static void +exitsyscall0(G *gp) { - Stktop *top, old; - uint32 argsize; - uintptr cret; - byte *sp; - G *g1; - int32 goid; - -//printf("oldstack m->cret=%p\n", m->cret); - - g1 = m->curg; - top = (Stktop*)g1->stackbase; - sp = (byte*)top; - old = *top; - argsize = old.argsize; - if(argsize > 0) { - sp -= argsize; - runtime·memmove(top->argp, sp, argsize); - } - goid = old.gobuf.g->goid; // fault if g is bad, before gogo - USED(goid); - - if(old.free != 0) - runtime·stackfree(g1->stackguard - StackGuard, old.free); - g1->stackbase = old.stackbase; - g1->stackguard = old.stackguard; - - cret = m->cret; - m->cret = 0; // drop reference - runtime·gogo(&old.gobuf, cret); -} - -// Called from reflect·call or from runtime·morestack when a new -// stack segment is needed. Allocate a new stack big enough for -// m->moreframesize bytes, copy m->moreargsize bytes to the new frame, -// and then act as though runtime·lessstack called the function at -// m->morepc. -void -runtime·newstack(void) -{ - int32 framesize, argsize; - Stktop *top; - byte *stk, *sp; - G *g1; - Gobuf label; - bool reflectcall; - uintptr free; - - framesize = m->moreframesize; - argsize = m->moreargsize; - g1 = m->curg; - - if(m->morebuf.sp < g1->stackguard - StackGuard) { - runtime·printf("runtime: split stack overflow: %p < %p\n", m->morebuf.sp, g1->stackguard - StackGuard); - runtime·throw("runtime: split stack overflow"); - } - if(argsize % sizeof(uintptr) != 0) { - runtime·printf("runtime: stack split with misaligned argsize %d\n", argsize); - runtime·throw("runtime: stack split argsize"); - } - - reflectcall = framesize==1; - if(reflectcall) - framesize = 0; - - if(reflectcall && m->morebuf.sp - sizeof(Stktop) - argsize - 32 > g1->stackguard) { - // special case: called from reflect.call (framesize==1) - // to call code with an arbitrary argument size, - // and we have enough space on the current stack. - // the new Stktop* is necessary to unwind, but - // we don't need to create a new segment. - top = (Stktop*)(m->morebuf.sp - sizeof(*top)); - stk = g1->stackguard - StackGuard; - free = 0; - } else { - // allocate new segment. - framesize += argsize; - framesize += StackExtra; // room for more functions, Stktop. - if(framesize < StackMin) - framesize = StackMin; - framesize += StackSystem; - stk = runtime·stackalloc(framesize); - top = (Stktop*)(stk+framesize-sizeof(*top)); - free = framesize; - } - -//runtime·printf("newstack framesize=%d argsize=%d morepc=%p moreargp=%p gobuf=%p, %p top=%p old=%p\n", -//framesize, argsize, m->morepc, m->moreargp, m->morebuf.pc, m->morebuf.sp, top, g1->stackbase); - - top->stackbase = g1->stackbase; - top->stackguard = g1->stackguard; - top->gobuf = m->morebuf; - top->argp = m->moreargp; - top->argsize = argsize; - top->free = free; - m->moreargp = nil; - m->morebuf.pc = nil; - m->morebuf.sp = nil; - - // copy flag from panic - top->panic = g1->ispanic; - g1->ispanic = false; - - g1->stackbase = (byte*)top; - g1->stackguard = stk + StackGuard; - - sp = (byte*)top; - if(argsize > 0) { - sp -= argsize; - runtime·memmove(sp, top->argp, argsize); + P *p; + + gp->status = Grunnable; + gp->m = nil; + m->curg = nil; + runtime·lock(&runtime·sched); + p = pidleget(); + if(p == nil) + globrunqput(gp); + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + execute(gp); // Never returns. } - if(thechar == '5') { - // caller would have saved its LR below args. - sp -= sizeof(void*); - *(void**)sp = nil; + if(m->lockedg) { + // Wait until another thread schedules gp and so m again. + stoplockedm(); + execute(gp); // Never returns. } - - // Continue as if lessstack had just called m->morepc - // (the PC that decided to grow the stack). - label.sp = sp; - label.pc = (byte*)runtime·lessstack; - label.g = m->curg; - runtime·gogocall(&label, m->morepc); - - *(int32*)345 = 123; // never return + stopm(); + schedule(); // Never returns. } // Hook used by runtime·malg to call runtime·stackalloc on the @@ -1204,10 +1375,10 @@ runtime·malg(int32 stacksize) stk = g->param; g->param = nil; } - newg->stack0 = stk; - newg->stackguard = stk + StackGuard; - newg->stackbase = stk + StackSystem + stacksize - sizeof(Stktop); - runtime·memclr(newg->stackbase, sizeof(Stktop)); + newg->stack0 = (uintptr)stk; + newg->stackguard = (uintptr)stk + StackGuard; + newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop); + runtime·memclr((byte*)newg->stackbase, sizeof(Stktop)); } return newg; } @@ -1221,7 +1392,7 @@ runtime·malg(int32 stacksize) // functions that split the stack. #pragma textflag 7 void -runtime·newproc(int32 siz, byte* fn, ...) +runtime·newproc(int32 siz, FuncVal* fn, ...) { byte *argp; @@ -1237,7 +1408,7 @@ runtime·newproc(int32 siz, byte* fn, ...) // address of the go statement that created this. The new g is put // on the queue of g's waiting to run. G* -runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) +runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc) { byte *sp; G *newg; @@ -1254,23 +1425,21 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) if(siz > StackMin - 1024) runtime·throw("runtime.newproc: function arguments too large for new goroutine"); - schedlock(); - - if((newg = gfget()) != nil){ + if((newg = gfget(m->p)) != nil) { if(newg->stackguard - StackGuard != newg->stack0) runtime·throw("invalid stack in newg"); } else { newg = runtime·malg(StackMin); + runtime·lock(&runtime·sched); if(runtime·lastg == nil) runtime·allg = newg; else runtime·lastg->alllink = newg; runtime·lastg = newg; + runtime·unlock(&runtime·sched); } - newg->status = Gwaiting; - newg->waitreason = "new goroutine"; - sp = newg->stackbase; + sp = (byte*)newg->stackbase; sp -= siz; runtime·memmove(sp, argp, narg); if(thechar == '5') { @@ -1279,318 +1448,88 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc) *(void**)sp = nil; } - newg->sched.sp = sp; + newg->sched.sp = (uintptr)sp; newg->sched.pc = (byte*)runtime·goexit; newg->sched.g = newg; - newg->entry = fn; + newg->fnstart = fn; newg->gopc = (uintptr)callerpc; - - runtime·sched.gcount++; - runtime·sched.goidgen++; - newg->goid = runtime·sched.goidgen; - - newprocreadylocked(newg); - schedunlock(); - + newg->status = Grunnable; + newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + if(raceenabled) + newg->racectx = runtime·racegostart(callerpc); + runqput(m->p, newg); + + if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic + wakep(); return newg; -//printf(" goid=%d\n", newg->goid); } -// Create a new deferred function fn with siz bytes of arguments. -// The compiler turns a defer statement into a call to this. -// Cannot split the stack because it assumes that the arguments -// are available sequentially after &fn; they would not be -// copied if a stack split occurred. It's OK for this to call -// functions that split the stack. -#pragma textflag 7 -uintptr -runtime·deferproc(int32 siz, byte* fn, ...) -{ - Defer *d; - - d = runtime·malloc(sizeof(*d) + siz - sizeof(d->args)); - d->fn = fn; - d->siz = siz; - d->pc = runtime·getcallerpc(&siz); - if(thechar == '5') - d->argp = (byte*)(&fn+2); // skip caller's saved link register - else - d->argp = (byte*)(&fn+1); - runtime·memmove(d->args, d->argp, d->siz); - - d->link = g->defer; - g->defer = d; - - // deferproc returns 0 normally. - // a deferred func that stops a panic - // makes the deferproc return 1. - // the code the compiler generates always - // checks the return value and jumps to the - // end of the function if deferproc returns != 0. - return 0; -} - -// Run a deferred function if there is one. -// The compiler inserts a call to this at the end of any -// function which calls defer. -// If there is a deferred function, this will call runtime·jmpdefer, -// which will jump to the deferred function such that it appears -// to have been called by the caller of deferreturn at the point -// just before deferreturn was called. The effect is that deferreturn -// is called again and again until there are no more deferred functions. -// Cannot split the stack because we reuse the caller's frame to -// call the deferred function. -#pragma textflag 7 -void -runtime·deferreturn(uintptr arg0) -{ - Defer *d; - byte *argp, *fn; - - d = g->defer; - if(d == nil) - return; - argp = (byte*)&arg0; - if(d->argp != argp) - return; - runtime·memmove(argp, d->args, d->siz); - g->defer = d->link; - fn = d->fn; - if(!d->nofree) - runtime·free(d); - runtime·jmpdefer(fn, argp); -} - -// Run all deferred functions for the current goroutine. +// Put on gfree list. +// If local list is too long, transfer a batch to the global list. static void -rundefer(void) +gfput(P *p, G *gp) { - Defer *d; - - while((d = g->defer) != nil) { - g->defer = d->link; - reflect·call(d->fn, d->args, d->siz); - if(!d->nofree) - runtime·free(d); + if(gp->stackguard - StackGuard != gp->stack0) + runtime·throw("invalid stack in gfput"); + gp->schedlink = p->gfree; + p->gfree = gp; + p->gfreecnt++; + if(p->gfreecnt >= 64) { + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt >= 32) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime·sched.gfree; + runtime·sched.gfree = gp; + } + runtime·unlock(&runtime·sched.gflock); } } -// Free stack frames until we hit the last one -// or until we find the one that contains the argp. -static void -unwindstack(G *gp, byte *sp) +// Get from gfree list. +// If local list is empty, grab a batch from global list. +static G* +gfget(P *p) { - Stktop *top; - byte *stk; - - // Must be called from a different goroutine, usually m->g0. - if(g == gp) - runtime·throw("unwindstack on self"); + G *gp; - while((top = (Stktop*)gp->stackbase) != nil && top->stackbase != nil) { - stk = gp->stackguard - StackGuard; - if(stk <= sp && sp < gp->stackbase) - break; - gp->stackbase = top->stackbase; - gp->stackguard = top->stackguard; - if(top->free != 0) - runtime·stackfree(stk, top->free); +retry: + gp = p->gfree; + if(gp == nil && runtime·sched.gfree) { + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt < 32 && runtime·sched.gfree) { + p->gfreecnt++; + gp = runtime·sched.gfree; + runtime·sched.gfree = gp->schedlink; + gp->schedlink = p->gfree; + p->gfree = gp; + } + runtime·unlock(&runtime·sched.gflock); + goto retry; } - - if(sp != nil && (sp < gp->stackguard - StackGuard || gp->stackbase < sp)) { - runtime·printf("recover: %p not in [%p, %p]\n", sp, gp->stackguard - StackGuard, gp->stackbase); - runtime·throw("bad unwindstack"); + if(gp) { + p->gfree = gp->schedlink; + p->gfreecnt--; } + return gp; } -// Print all currently active panics. Used when crashing. +// Purge all cached G's from gfree list to the global list. static void -printpanics(Panic *p) -{ - if(p->link) { - printpanics(p->link); - runtime·printf("\t"); - } - runtime·printf("panic: "); - runtime·printany(p->arg); - if(p->recovered) - runtime·printf(" [recovered]"); - runtime·printf("\n"); -} - -static void recovery(G*); - -// The implementation of the predeclared function panic. -void -runtime·panic(Eface e) +gfpurge(P *p) { - Defer *d; - Panic *p; - - p = runtime·mal(sizeof *p); - p->arg = e; - p->link = g->panic; - p->stackbase = g->stackbase; - g->panic = p; + G *gp; - for(;;) { - d = g->defer; - if(d == nil) - break; - // take defer off list in case of recursive panic - g->defer = d->link; - g->ispanic = true; // rock for newstack, where reflect.call ends up - reflect·call(d->fn, d->args, d->siz); - if(p->recovered) { - g->panic = p->link; - if(g->panic == nil) // must be done with signal - g->sig = 0; - runtime·free(p); - // put recovering defer back on list - // for scheduler to find. - d->link = g->defer; - g->defer = d; - runtime·mcall(recovery); - runtime·throw("recovery failed"); // mcall should not return - } - if(!d->nofree) - runtime·free(d); + runtime·lock(&runtime·sched.gflock); + while(p->gfreecnt) { + p->gfreecnt--; + gp = p->gfree; + p->gfree = gp->schedlink; + gp->schedlink = runtime·sched.gfree; + runtime·sched.gfree = gp; } - - // ran out of deferred calls - old-school panic now - runtime·startpanic(); - printpanics(g->panic); - runtime·dopanic(0); -} - -// Unwind the stack after a deferred function calls recover -// after a panic. Then arrange to continue running as though -// the caller of the deferred function returned normally. -static void -recovery(G *gp) -{ - Defer *d; - - // Rewind gp's stack; we're running on m->g0's stack. - d = gp->defer; - gp->defer = d->link; - - // Unwind to the stack frame with d's arguments in it. - unwindstack(gp, d->argp); - - // Make the deferproc for this d return again, - // this time returning 1. The calling function will - // jump to the standard return epilogue. - // The -2*sizeof(uintptr) makes up for the - // two extra words that are on the stack at - // each call to deferproc. - // (The pc we're returning to does pop pop - // before it tests the return value.) - // On the arm there are 2 saved LRs mixed in too. - if(thechar == '5') - gp->sched.sp = (byte*)d->argp - 4*sizeof(uintptr); - else - gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr); - gp->sched.pc = d->pc; - if(!d->nofree) - runtime·free(d); - runtime·gogo(&gp->sched, 1); -} - -// The implementation of the predeclared function recover. -// Cannot split the stack because it needs to reliably -// find the stack segment of its caller. -#pragma textflag 7 -void -runtime·recover(byte *argp, Eface ret) -{ - Stktop *top, *oldtop; - Panic *p; - - // Must be a panic going on. - if((p = g->panic) == nil || p->recovered) - goto nomatch; - - // Frame must be at the top of the stack segment, - // because each deferred call starts a new stack - // segment as a side effect of using reflect.call. - // (There has to be some way to remember the - // variable argument frame size, and the segment - // code already takes care of that for us, so we - // reuse it.) - // - // As usual closures complicate things: the fp that - // the closure implementation function claims to have - // is where the explicit arguments start, after the - // implicit pointer arguments and PC slot. - // If we're on the first new segment for a closure, - // then fp == top - top->args is correct, but if - // the closure has its own big argument frame and - // allocated a second segment (see below), - // the fp is slightly above top - top->args. - // That condition can't happen normally though - // (stack pointers go down, not up), so we can accept - // any fp between top and top - top->args as - // indicating the top of the segment. - top = (Stktop*)g->stackbase; - if(argp < (byte*)top - top->argsize || (byte*)top < argp) - goto nomatch; - - // The deferred call makes a new segment big enough - // for the argument frame but not necessarily big - // enough for the function's local frame (size unknown - // at the time of the call), so the function might have - // made its own segment immediately. If that's the - // case, back top up to the older one, the one that - // reflect.call would have made for the panic. - // - // The fp comparison here checks that the argument - // frame that was copied during the split (the top->args - // bytes above top->fp) abuts the old top of stack. - // This is a correct test for both closure and non-closure code. - oldtop = (Stktop*)top->stackbase; - if(oldtop != nil && top->argp == (byte*)oldtop - top->argsize) - top = oldtop; - - // Now we have the segment that was created to - // run this call. It must have been marked as a panic segment. - if(!top->panic) - goto nomatch; - - // Okay, this is the top frame of a deferred call - // in response to a panic. It can see the panic argument. - p->recovered = 1; - ret = p->arg; - FLUSH(&ret); - return; - -nomatch: - ret.type = nil; - ret.data = nil; - FLUSH(&ret); -} - - -// Put on gfree list. Sched must be locked. -static void -gfput(G *g) -{ - if(g->stackguard - StackGuard != g->stack0) - runtime·throw("invalid stack in gfput"); - g->schedlink = runtime·sched.gfree; - runtime·sched.gfree = g; -} - -// Get from gfree list. Sched must be locked. -static G* -gfget(void) -{ - G *g; - - g = runtime·sched.gfree; - if(g) - runtime·sched.gfree = g->schedlink; - return g; + runtime·unlock(&runtime·sched.gflock); } void @@ -1600,80 +1539,85 @@ runtime·Breakpoint(void) } void -runtime·Goexit(void) -{ - rundefer(); - runtime·goexit(); -} - -void runtime·Gosched(void) { runtime·gosched(); } // Implementation of runtime.GOMAXPROCS. -// delete when scheduler is stronger +// delete when scheduler is even stronger int32 runtime·gomaxprocsfunc(int32 n) { int32 ret; - uint32 v; - schedlock(); + if(n > MaxGomaxprocs) + n = MaxGomaxprocs; + runtime·lock(&runtime·sched); ret = runtime·gomaxprocs; - if(n <= 0) - n = ret; - if(n > maxgomaxprocs) - n = maxgomaxprocs; - runtime·gomaxprocs = n; - if(runtime·gomaxprocs > 1) - runtime·singleproc = false; - if(runtime·gcwaiting != 0) { - if(atomic_mcpumax(runtime·sched.atomic) != 1) - runtime·throw("invalid mcpumax during gc"); - schedunlock(); + if(n <= 0 || n == ret) { + runtime·unlock(&runtime·sched); return ret; } + runtime·unlock(&runtime·sched); - setmcpumax(n); + runtime·semacquire(&runtime·worldsema); + m->gcing = 1; + runtime·stoptheworld(); + newprocs = n; + m->gcing = 0; + runtime·semrelease(&runtime·worldsema); + runtime·starttheworld(); - // If there are now fewer allowed procs - // than procs running, stop. - v = runtime·atomicload(&runtime·sched.atomic); - if(atomic_mcpu(v) > n) { - schedunlock(); - runtime·gosched(); - return ret; - } - // handle more procs - matchmg(); - schedunlock(); return ret; } -void -runtime·LockOSThread(void) +static void +LockOSThread(void) { - if(m == &runtime·m0 && runtime·sched.init) { - runtime·sched.lockmain = true; - return; - } m->lockedg = g; g->lockedm = m; } void -runtime·UnlockOSThread(void) +runtime·LockOSThread(void) { - if(m == &runtime·m0 && runtime·sched.init) { - runtime·sched.lockmain = false; + m->locked |= LockExternal; + LockOSThread(); +} + +void +runtime·lockOSThread(void) +{ + m->locked += LockInternal; + LockOSThread(); +} + +static void +UnlockOSThread(void) +{ + if(m->locked != 0) return; - } m->lockedg = nil; g->lockedm = nil; } +void +runtime·UnlockOSThread(void) +{ + m->locked &= ~LockExternal; + UnlockOSThread(); +} + +void +runtime·unlockOSThread(void) +{ + if(m->locked < LockInternal) + runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); + m->locked -= LockInternal; + UnlockOSThread(); +} + bool runtime·lockedOSThread(void) { @@ -1697,16 +1641,31 @@ runtime·mid(uint32 ret) } void -runtime·NumGoroutine(int32 ret) +runtime·NumGoroutine(intgo ret) { - ret = runtime·sched.gcount; + ret = runtime·gcount(); FLUSH(&ret); } int32 runtime·gcount(void) { - return runtime·sched.gcount; + G *gp; + int32 n, s; + + n = 0; + runtime·lock(&runtime·sched); + // TODO(dvyukov): runtime.NumGoroutine() is O(N). + // We do not want to increment/decrement centralized counter in newproc/goexit, + // just to make runtime.NumGoroutine() faster. + // Compromise solution is to introduce per-P counters of active goroutines. + for(gp = runtime·allg; gp; gp = gp->alllink) { + s = gp->status; + if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting) + n++; + } + runtime·unlock(&runtime·sched); + return n; } int32 @@ -1740,6 +1699,9 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) { int32 n; + // Windows does profiling in a dedicated thread w/o m. + if(!Windows && (m == nil || m->mcache == nil)) + return; if(prof.fn == nil || prof.hz == 0) return; @@ -1783,27 +1745,533 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) runtime·resetcpuprofiler(hz); } -void (*libcgo_setenv)(byte**); +// Change number of processors. The world is stopped, sched is locked. +static void +procresize(int32 new) +{ + int32 i, old; + G *gp; + P *p; + + old = runtime·gomaxprocs; + if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs) + runtime·throw("procresize: invalid arg"); + // initialize new P's + for(i = 0; i < new; i++) { + p = runtime·allp[i]; + if(p == nil) { + p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1); + p->status = Pgcstop; + runtime·atomicstorep(&runtime·allp[i], p); + } + if(p->mcache == nil) { + if(old==0 && i==0) + p->mcache = m->mcache; // bootstrap + else + p->mcache = runtime·allocmcache(); + } + if(p->runq == nil) { + p->runqsize = 128; + p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1); + } + } + + // redistribute runnable G's evenly + for(i = 0; i < old; i++) { + p = runtime·allp[i]; + while(gp = runqget(p)) + globrunqput(gp); + } + // start at 1 because current M already executes some G and will acquire allp[0] below, + // so if we have a spare G we want to put it into allp[1]. + for(i = 1; runtime·sched.runqhead; i++) { + gp = runtime·sched.runqhead; + runtime·sched.runqhead = gp->schedlink; + runqput(runtime·allp[i%new], gp); + } + runtime·sched.runqtail = nil; + runtime·sched.runqsize = 0; + + // free unused P's + for(i = new; i < old; i++) { + p = runtime·allp[i]; + runtime·freemcache(p->mcache); + p->mcache = nil; + gfpurge(p); + p->status = Pdead; + // can't free P itself because it can be referenced by an M in syscall + } + + if(m->p) + m->p->m = nil; + m->p = nil; + m->mcache = nil; + p = runtime·allp[0]; + p->m = nil; + p->status = Pidle; + acquirep(p); + for(i = new-1; i > 0; i--) { + p = runtime·allp[i]; + p->status = Pidle; + pidleput(p); + } + runtime·singleproc = new == 1; + runtime·atomicstore((uint32*)&runtime·gomaxprocs, new); +} + +// Associate p and the current m. +static void +acquirep(P *p) +{ + if(m->p || m->mcache) + runtime·throw("acquirep: already in go"); + if(p->m || p->status != Pidle) { + runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status); + runtime·throw("acquirep: invalid p state"); + } + m->mcache = p->mcache; + m->p = p; + p->m = m; + p->status = Prunning; +} -// Update the C environment if cgo is loaded. -// Called from syscall.Setenv. -void -syscall·setenv_c(String k, String v) +// Disassociate p and the current m. +static P* +releasep(void) { - byte *arg[2]; + P *p; + + if(m->p == nil || m->mcache == nil) + runtime·throw("releasep: invalid arg"); + p = m->p; + if(p->m != m || p->mcache != m->mcache || p->status != Prunning) { + runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n", + m, m->p, p->m, m->mcache, p->mcache, p->status); + runtime·throw("releasep: invalid p state"); + } + m->p = nil; + m->mcache = nil; + p->m = nil; + p->status = Pidle; + return p; +} - if(libcgo_setenv == nil) +static void +inclocked(int32 v) +{ + runtime·lock(&runtime·sched); + runtime·sched.mlocked += v; + if(v > 0) + checkdead(); + runtime·unlock(&runtime·sched); +} + +// Check for deadlock situation. +// The check is based on number of running M's, if 0 -> deadlock. +static void +checkdead(void) +{ + G *gp; + int32 run, grunning, s; + + // -1 for sysmon + run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1; + if(run > 0) return; + if(run < 0) { + runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n", + runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount); + runtime·throw("checkdead: inconsistent counts"); + } + grunning = 0; + for(gp = runtime·allg; gp; gp = gp->alllink) { + if(gp == scvg) + continue; + s = gp->status; + if(s == Gwaiting) + grunning++; + else if(s == Grunnable || s == Grunning || s == Gsyscall) { + runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s); + runtime·throw("checkdead: runnable g"); + } + } + if(grunning == 0) // possible if main goroutine calls runtime·Goexit() + runtime·exit(0); + m->throwing = -1; // do not dump full stacks + runtime·throw("all goroutines are asleep - deadlock!"); +} + +static void +sysmon(void) +{ + uint32 idle, delay; + uint32 ticks[MaxGomaxprocs]; + + idle = 0; // how many cycles in succession we had not wokeup somebody + delay = 0; + for(;;) { + if(idle == 0) // start with 20us sleep... + delay = 20; + else if(idle > 50) // start doubling the sleep after 1ms... + delay *= 2; + if(delay > 10*1000) // up to 10ms + delay = 10*1000; + runtime·usleep(delay); + if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { // TODO: fast atomic + runtime·lock(&runtime·sched); + if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { + runtime·atomicstore(&runtime·sched.sysmonwait, 1); + runtime·unlock(&runtime·sched); + runtime·notesleep(&runtime·sched.sysmonnote); + runtime·noteclear(&runtime·sched.sysmonnote); + idle = 0; + delay = 20; + } else + runtime·unlock(&runtime·sched); + } + if(retake(ticks)) + idle = 0; + else + idle++; + } +} + +static uint32 +retake(uint32 *ticks) +{ + uint32 i, s, n; + int64 t; + P *p; + + n = 0; + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p==nil) + continue; + t = p->tick; + if(ticks[i] != t) { + ticks[i] = t; + continue; + } + s = p->status; + if(s != Psyscall) + continue; + if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic + continue; + // Need to increment number of locked M's before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + inclocked(-1); + if(runtime·cas(&p->status, s, Pidle)) { + n++; + handoffp(p); + } + inclocked(1); + } + return n; +} + +// Put mp on midle list. +// Sched must be locked. +static void +mput(M *mp) +{ + mp->schedlink = runtime·sched.midle; + runtime·sched.midle = mp; + runtime·sched.nmidle++; + checkdead(); +} + +// Try to get an m from midle list. +// Sched must be locked. +static M* +mget(void) +{ + M *mp; + + if((mp = runtime·sched.midle) != nil){ + runtime·sched.midle = mp->schedlink; + runtime·sched.nmidle--; + } + return mp; +} + +// Put gp on the global runnable queue. +// Sched must be locked. +static void +globrunqput(G *gp) +{ + gp->schedlink = nil; + if(runtime·sched.runqtail) + runtime·sched.runqtail->schedlink = gp; + else + runtime·sched.runqhead = gp; + runtime·sched.runqtail = gp; + runtime·sched.runqsize++; +} + +// Try get a batch of G's from the global runnable queue. +// Sched must be locked. +static G* +globrunqget(P *p) +{ + G *gp, *gp1; + int32 n; + + if(runtime·sched.runqsize == 0) + return nil; + n = runtime·sched.runqsize/runtime·gomaxprocs+1; + if(n > runtime·sched.runqsize) + n = runtime·sched.runqsize; + runtime·sched.runqsize -= n; + if(runtime·sched.runqsize == 0) + runtime·sched.runqtail = nil; + gp = runtime·sched.runqhead; + runtime·sched.runqhead = gp->schedlink; + n--; + while(n--) { + gp1 = runtime·sched.runqhead; + runtime·sched.runqhead = gp1->schedlink; + runqput(p, gp1); + } + return gp; +} + +// Put p to on pidle list. +// Sched must be locked. +static void +pidleput(P *p) +{ + p->link = runtime·sched.pidle; + runtime·sched.pidle = p; + runtime·xadd(&runtime·sched.npidle, 1); // TODO: fast atomic +} + +// Try get a p from pidle list. +// Sched must be locked. +static P* +pidleget(void) +{ + P *p; - arg[0] = runtime·malloc(k.len + 1); - runtime·memmove(arg[0], k.str, k.len); - arg[0][k.len] = 0; + p = runtime·sched.pidle; + if(p) { + runtime·sched.pidle = p->link; + runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic + } + return p; +} - arg[1] = runtime·malloc(v.len + 1); - runtime·memmove(arg[1], v.str, v.len); - arg[1][v.len] = 0; +// Put g on local runnable queue. +// TODO(dvyukov): consider using lock-free queue. +static void +runqput(P *p, G *gp) +{ + int32 h, t, s; + + runtime·lock(p); +retry: + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h-1 || (h == 0 && t == s-1)) { + runqgrow(p); + goto retry; + } + p->runq[t++] = gp; + if(t == s) + t = 0; + p->runqtail = t; + runtime·unlock(p); +} + +// Get g from local runnable queue. +static G* +runqget(P *p) +{ + G *gp; + int32 t, h, s; + + if(p->runqhead == p->runqtail) + return nil; + runtime·lock(p); + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + if(t == h) { + runtime·unlock(p); + return nil; + } + gp = p->runq[h++]; + if(h == s) + h = 0; + p->runqhead = h; + runtime·unlock(p); + return gp; +} - runtime·asmcgocall((void*)libcgo_setenv, arg); - runtime·free(arg[0]); - runtime·free(arg[1]); +// Grow local runnable queue. +// TODO(dvyukov): consider using fixed-size array +// and transfer excess to the global list (local queue can grow way too big). +static void +runqgrow(P *p) +{ + G **q; + int32 s, t, h, t2; + + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + t2 = 0; + q = runtime·malloc(2*s*sizeof(*q)); + while(t != h) { + q[t2++] = p->runq[h++]; + if(h == s) + h = 0; + } + runtime·free(p->runq); + p->runq = q; + p->runqhead = 0; + p->runqtail = t2; + p->runqsize = 2*s; } + +// Steal half of elements from local runnable queue of p2 +// and put onto local runnable queue of p. +// Returns one of the stolen elements (or nil if failed). +static G* +runqsteal(P *p, P *p2) +{ + G *gp, *gp1; + int32 t, h, s, t2, h2, s2, c, i; + + if(p2->runqhead == p2->runqtail) + return nil; + // sort locks to prevent deadlocks + if(p < p2) + runtime·lock(p); + runtime·lock(p2); + if(p2->runqhead == p2->runqtail) { + runtime·unlock(p2); + if(p < p2) + runtime·unlock(p); + return nil; + } + if(p >= p2) + runtime·lock(p); + // now we've locked both queues and know the victim is not empty + h = p->runqhead; + t = p->runqtail; + s = p->runqsize; + h2 = p2->runqhead; + t2 = p2->runqtail; + s2 = p2->runqsize; + gp = p2->runq[h2++]; // return value + if(h2 == s2) + h2 = 0; + // steal roughly half + if(t2 > h2) + c = (t2 - h2) / 2; + else + c = (s2 - h2 + t2) / 2; + // copy + for(i = 0; i != c; i++) { + // the target queue is full? + if(t == h-1 || (h == 0 && t == s-1)) + break; + // the victim queue is empty? + if(t2 == h2) + break; + gp1 = p2->runq[h2++]; + if(h2 == s2) + h2 = 0; + p->runq[t++] = gp1; + if(t == s) + t = 0; + } + p->runqtail = t; + p2->runqhead = h2; + runtime·unlock(p2); + runtime·unlock(p); + return gp; +} + +void +runtime·testSchedLocalQueue(void) +{ + P p; + G gs[1000]; + int32 i, j; + + runtime·memclr((byte*)&p, sizeof(p)); + p.runqsize = 1; + p.runqhead = 0; + p.runqtail = 0; + p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq)); + + for(i = 0; i < nelem(gs); i++) { + if(runqget(&p) != nil) + runtime·throw("runq is not empty initially"); + for(j = 0; j < i; j++) + runqput(&p, &gs[i]); + for(j = 0; j < i; j++) { + if(runqget(&p) != &gs[i]) { + runtime·printf("bad element at iter %d/%d\n", i, j); + runtime·throw("bad element"); + } + } + if(runqget(&p) != nil) + runtime·throw("runq is not empty afterwards"); + } +} + +void +runtime·testSchedLocalQueueSteal(void) +{ + P p1, p2; + G gs[1000], *gp; + int32 i, j, s; + + runtime·memclr((byte*)&p1, sizeof(p1)); + p1.runqsize = 1; + p1.runqhead = 0; + p1.runqtail = 0; + p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq)); + + runtime·memclr((byte*)&p2, sizeof(p2)); + p2.runqsize = nelem(gs); + p2.runqhead = 0; + p2.runqtail = 0; + p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq)); + + for(i = 0; i < nelem(gs); i++) { + for(j = 0; j < i; j++) { + gs[j].sig = 0; + runqput(&p1, &gs[j]); + } + gp = runqsteal(&p2, &p1); + s = 0; + if(gp) { + s++; + gp->sig++; + } + while(gp = runqget(&p2)) { + s++; + gp->sig++; + } + while(gp = runqget(&p1)) + gp->sig++; + for(j = 0; j < i; j++) { + if(gs[j].sig != 1) { + runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i); + runtime·throw("bad element"); + } + } + if(s != i/2 && s != i/2+1) { + runtime·printf("bad steal %d, want %d or %d, iter %d\n", + s, i/2, i/2+1, i); + runtime·throw("bad steal"); + } + } +} + |