diff options
Diffstat (limited to 'src/pkg/runtime/proc.c')
-rw-r--r-- | src/pkg/runtime/proc.c | 1009 |
1 files changed, 801 insertions, 208 deletions
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 5734509e0..de26c72d3 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -4,10 +4,12 @@ #include "runtime.h" #include "arch_GOARCH.h" +#include "zaexperiment.h" #include "malloc.h" #include "stack.h" #include "race.h" #include "type.h" +#include "../../cmd/ld/textflag.h" // Goroutine scheduler // The scheduler's job is to distribute ready-to-run goroutines over worker threads. @@ -29,8 +31,9 @@ struct Sched { M* midle; // idle m's waiting for work int32 nmidle; // number of idle m's waiting for work - int32 mlocked; // number of locked m's waiting for work + int32 nmidlelocked; // number of locked m's waiting for work int32 mcount; // number of m's that have been created + int32 maxmcount; // maximum number of m's allowed (or die) P* pidle; // idle P's uint32 npidle; @@ -45,6 +48,7 @@ struct Sched { Lock gflock; G* gfree; + uint32 gcwaiting; // gc is waiting to run int32 stopwait; Note stopnote; uint32 sysmonwait; @@ -60,9 +64,8 @@ enum { MaxGomaxprocs = 1<<8 }; Sched runtime·sched; int32 runtime·gomaxprocs; -bool runtime·singleproc; +uint32 runtime·needextram; bool runtime·iscgo; -uint32 runtime·gcwaiting; M runtime·m0; G runtime·g0; // idle goroutine for m0 G* runtime·allg; @@ -86,7 +89,6 @@ static void procresize(int32); static void acquirep(P*); static P* releasep(void); static void newm(void(*)(void), P*); -static void goidle(void); static void stopm(void); static void startm(P*, bool); static void handoffp(P*); @@ -94,21 +96,24 @@ static void wakep(void); static void stoplockedm(void); static void startlockedm(G*); static void sysmon(void); -static uint32 retake(uint32*); -static void inclocked(int32); +static uint32 retake(int64); +static void incidlelocked(int32); static void checkdead(void); static void exitsyscall0(G*); static void park0(G*); -static void gosched0(G*); static void goexit0(G*); static void gfput(P*, G*); static G* gfget(P*); static void gfpurge(P*); static void globrunqput(G*); -static G* globrunqget(P*); +static G* globrunqget(P*, int32); static P* pidleget(void); static void pidleput(P*); static void injectglist(G*); +static bool preemptall(void); +static bool preemptone(P*); +static bool exitsyscallfast(void); +static bool haveexperiment(int8*); // The bootstrap sequence is: // @@ -123,19 +128,27 @@ runtime·schedinit(void) { int32 n, procs; byte *p; + Eface i; + + runtime·sched.maxmcount = 10000; + runtime·precisestack = haveexperiment("precisestack"); - m->nomemprof++; runtime·mprofinit(); runtime·mallocinit(); mcommoninit(m); + + // Initialize the itable value for newErrorCString, + // so that the next time it gets called, possibly + // in a fault during a garbage collection, it will not + // need to allocated memory. + runtime·newErrorCString(0, &i); runtime·goargs(); runtime·goenvs(); + runtime·parsedebugvars(); - // For debugging: - // Allocate internal symbol table representation now, - // so that we don't need to call malloc when we crash. - // runtime·findfunc(0); + // Allocate internal symbol table representation now, we need it for GC anyway. + runtime·symtabinit(); runtime·sched.lastpoll = runtime·nanotime(); procs = 1; @@ -149,7 +162,6 @@ runtime·schedinit(void) procresize(procs); mstats.enablegc = 1; - m->nomemprof--; if(raceenabled) g->racectx = runtime·raceinit(); @@ -160,10 +172,22 @@ extern void main·main(void); static FuncVal scavenger = {runtime·MHeap_Scavenger}; +static FuncVal initDone = { runtime·unlockOSThread }; + // The main goroutine. void runtime·main(void) { + Defer d; + + // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. + // Using decimal instead of binary GB and MB because + // they look nicer in the stack overflow failure message. + if(sizeof(void*) == 8) + runtime·maxstacksize = 1000000000; + else + runtime·maxstacksize = 250000000; + newm(sysmon, nil); // Lock the main goroutine onto this, the main OS thread, @@ -173,10 +197,24 @@ runtime·main(void) // by calling runtime.LockOSThread during initialization // to preserve the lock. runtime·lockOSThread(); + + // Defer unlock so that runtime.Goexit during init does the unlock too. + d.fn = &initDone; + d.siz = 0; + d.link = g->defer; + d.argp = (void*)-1; + d.special = true; + d.free = false; + g->defer = &d; + if(m != &runtime·m0) runtime·throw("runtime·main not on m0"); runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); main·init(); + + if(g->defer != &d || d.fn != &initDone) + runtime·throw("runtime: bad defer entry after init"); + g->defer = d.link; runtime·unlockOSThread(); main·main(); @@ -233,14 +271,36 @@ runtime·tracebackothers(G *me) int32 traceback; traceback = runtime·gotraceback(nil); + + // Show the current goroutine first, if we haven't already. + if((gp = m->curg) != nil && gp != me) { + runtime·printf("\n"); + runtime·goroutineheader(gp); + runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.lr, gp); + } + for(gp = runtime·allg; gp != nil; gp = gp->alllink) { - if(gp == me || gp->status == Gdead) + if(gp == me || gp == m->curg || gp->status == Gdead) continue; if(gp->issystem && traceback < 2) continue; runtime·printf("\n"); runtime·goroutineheader(gp); - runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp); + if(gp->status == Grunning) { + runtime·printf("\tgoroutine running on other thread; stack unavailable\n"); + runtime·printcreatedby(gp); + } else + runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.lr, gp); + } +} + +static void +checkmcount(void) +{ + // sched lock is held + if(runtime·sched.mcount > runtime·sched.maxmcount) { + runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount); + runtime·throw("thread exhaustion"); } } @@ -256,7 +316,7 @@ mcommoninit(M *mp) runtime·lock(&runtime·sched); mp->id = runtime·sched.mcount++; - + checkmcount(); runtime·mpreinit(mp); // Add to runtime·allm so garbage collector doesn't free m @@ -273,6 +333,7 @@ void runtime·ready(G *gp) { // Mark runnable. + m->locks++; // disable preemption because it can be holding p in a local var if(gp->status != Gwaiting) { runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status); runtime·throw("bad g->status in ready"); @@ -281,6 +342,9 @@ runtime·ready(G *gp) runqput(m->p, gp); if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0) // TODO: fast atomic wakep(); + m->locks--; + if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; } int32 @@ -340,6 +404,34 @@ runtime·helpgc(int32 nproc) runtime·unlock(&runtime·sched); } +// Similar to stoptheworld but best-effort and can be called several times. +// There is no reverse operation, used during crashing. +// This function must not lock any mutexes. +void +runtime·freezetheworld(void) +{ + int32 i; + + if(runtime·gomaxprocs == 1) + return; + // stopwait and preemption requests can be lost + // due to races with concurrently executing threads, + // so try several times + for(i = 0; i < 5; i++) { + // this should tell the scheduler to not start any new goroutines + runtime·sched.stopwait = 0x7fffffff; + runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1); + // this should stop running goroutines + if(!preemptall()) + break; // no running goroutines + runtime·usleep(1000); + } + // to be sure + runtime·usleep(1000); + preemptall(); + runtime·usleep(1000); +} + void runtime·stoptheworld(void) { @@ -350,7 +442,8 @@ runtime·stoptheworld(void) runtime·lock(&runtime·sched); runtime·sched.stopwait = runtime·gomaxprocs; - runtime·atomicstore((uint32*)&runtime·gcwaiting, 1); + runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1); + preemptall(); // stop current P m->p->status = Pgcstop; runtime·sched.stopwait--; @@ -369,10 +462,16 @@ runtime·stoptheworld(void) wait = runtime·sched.stopwait > 0; runtime·unlock(&runtime·sched); - // wait for remaining P's to stop voluntary + // wait for remaining P's to stop voluntarily if(wait) { - runtime·notesleep(&runtime·sched.stopnote); - runtime·noteclear(&runtime·sched.stopnote); + for(;;) { + // wait for 100us, then try to re-preempt in case of any races + if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) { + runtime·noteclear(&runtime·sched.stopnote); + break; + } + preemptall(); + } } if(runtime·sched.stopwait) runtime·throw("stoptheworld: not stopped"); @@ -397,6 +496,7 @@ runtime·starttheworld(void) G *gp; bool add; + m->locks++; // disable preemption because it can be holding p in a local var gp = runtime·netpoll(false); // non-blocking injectglist(gp); add = needaddgcproc(); @@ -406,7 +506,7 @@ runtime·starttheworld(void) newprocs = 0; } else procresize(runtime·gomaxprocs); - runtime·gcwaiting = 0; + runtime·sched.gcwaiting = 0; p1 = nil; while(p = pidleget()) { @@ -416,16 +516,9 @@ runtime·starttheworld(void) pidleput(p); break; } - mp = mget(); - if(mp == nil) { - p->link = p1; - p1 = p; - continue; - } - if(mp->nextp) - runtime·throw("starttheworld: inconsistent mp->nextp"); - mp->nextp = p; - runtime·notewakeup(&mp->park); + p->m = mget(); + p->link = p1; + p1 = p; } if(runtime·sched.sysmonwait) { runtime·sched.sysmonwait = false; @@ -436,8 +529,18 @@ runtime·starttheworld(void) while(p1) { p = p1; p1 = p1->link; - add = false; - newm(nil, p); + if(p->m) { + mp = p->m; + p->m = nil; + if(mp->nextp) + runtime·throw("starttheworld: inconsistent mp->nextp"); + mp->nextp = p; + runtime·notewakeup(&mp->park); + } else { + // Start M to run P. Do not start another M below. + newm(nil, p); + add = false; + } } if(add) { @@ -450,16 +553,23 @@ runtime·starttheworld(void) // the maximum number of procs. newm(mhelpgc, nil); } + m->locks--; + if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; } // Called to start an M. void runtime·mstart(void) { +#ifdef GOOS_windows +#ifdef GOARCH_386 // It is used by windows-386 only. Unfortunately, seh needs // to be located on os stack, and mstart runs on os stack // for both m0 and m. SEH seh; +#endif +#endif if(g != m->g0) runtime·throw("bad runtime·mstart"); @@ -468,18 +578,20 @@ runtime·mstart(void) // Once we call schedule we're never coming back, // so other calls can reuse this stack space. runtime·gosave(&m->g0->sched); - m->g0->sched.pc = (void*)-1; // make sure it is never used + m->g0->sched.pc = (uintptr)-1; // make sure it is never used + m->g0->stackguard = m->g0->stackguard0; // cgo sets only stackguard0, copy it to stackguard +#ifdef GOOS_windows +#ifdef GOARCH_386 m->seh = &seh; +#endif +#endif runtime·asminit(); runtime·minit(); // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. - if(m == &runtime·m0) { + if(m == &runtime·m0) runtime·initsig(); - if(runtime·iscgo) - runtime·newextram(); - } if(m->mstartfn) m->mstartfn(); @@ -541,6 +653,8 @@ runtime·allocm(P *p) if(p == m->p) releasep(); m->locks--; + if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; return mp; } @@ -581,12 +695,20 @@ static void unlockextra(M*); // // When the callback is done with the m, it calls dropm to // put the m back on the list. -#pragma textflag 7 +#pragma textflag NOSPLIT void runtime·needm(byte x) { M *mp; + if(runtime·needextram) { + // Can happen if C/C++ code calls Go from a global ctor. + // Can not throw, because scheduler is not initialized yet. + runtime·write(2, "fatal error: cgo callback before cgo call\n", + sizeof("fatal error: cgo callback before cgo call\n")-1); + runtime·exit(1); + } + // Lock extra list, take head, unlock popped list. // nilokay=false is safe here because of the invariant above, // that the extra list always contains or will soon contain @@ -611,12 +733,17 @@ runtime·needm(byte x) runtime·setmg(mp, mp->g0); g->stackbase = (uintptr)(&x + 1024); g->stackguard = (uintptr)(&x - 32*1024); + g->stackguard0 = g->stackguard; +#ifdef GOOS_windows +#ifdef GOARCH_386 // On windows/386, we need to put an SEH frame (two words) - // somewhere on the current stack. We are called - // from needm, and we know there is some available - // space one word into the argument frame. Use that. + // somewhere on the current stack. We are called from cgocallback_gofunc + // and we know that it will leave two unused words below m->curg->sched.sp. + // Use those. m->seh = (SEH*)((uintptr*)&x + 1); +#endif +#endif // Initialize this thread to use the m. runtime·asminit(); @@ -639,14 +766,22 @@ runtime·newextram(void) // the goroutine stack ends. mp = runtime·allocm(nil); gp = runtime·malg(4096); - gp->sched.pc = (void*)runtime·goexit; + gp->sched.pc = (uintptr)runtime·goexit; gp->sched.sp = gp->stackbase; + gp->sched.lr = 0; gp->sched.g = gp; + gp->syscallpc = gp->sched.pc; + gp->syscallsp = gp->sched.sp; + gp->syscallstack = gp->stackbase; + gp->syscallguard = gp->stackguard; gp->status = Gsyscall; mp->curg = gp; mp->locked = LockInternal; mp->lockedg = gp; gp->lockedm = mp; + gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + if(raceenabled) + gp->racectx = runtime·racegostart(runtime·newextram); // put on allg for garbage collector runtime·lock(&runtime·sched); if(runtime·lastg == nil) @@ -655,9 +790,6 @@ runtime·newextram(void) runtime·lastg->alllink = gp; runtime·lastg = gp; runtime·unlock(&runtime·sched); - gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1); - if(raceenabled) - gp->racectx = runtime·racegostart(runtime·newextram); // Add m to the extra list. mnext = lockextra(true); @@ -695,7 +827,12 @@ runtime·dropm(void) // Undo whatever initialization minit did during needm. runtime·unminit(); + +#ifdef GOOS_windows +#ifdef GOARCH_386 m->seh = nil; // reset dangling typed pointer +#endif +#endif // Clear m and g, and return m to the extra list. // After the call to setmg we can only call nosplit functions. @@ -714,7 +851,7 @@ runtime·dropm(void) // to runtime.extram. If nilokay is true, then lockextra will // return a nil list head if that's what it finds. If nilokay is false, // lockextra will keep waiting until the list head is no longer nil. -#pragma textflag 7 +#pragma textflag NOSPLIT static M* lockextra(bool nilokay) { @@ -742,7 +879,7 @@ lockextra(bool nilokay) return mp; } -#pragma textflag 7 +#pragma textflag NOSPLIT static void unlockextra(M *mp) { @@ -863,7 +1000,7 @@ handoffp(P *p) return; } runtime·lock(&runtime·sched); - if(runtime·gcwaiting) { + if(runtime·sched.gcwaiting) { p->status = Pgcstop; if(--runtime·sched.stopwait == 0) runtime·notewakeup(&runtime·sched.stopnote); @@ -911,7 +1048,7 @@ stoplockedm(void) p = releasep(); handoffp(p); } - inclocked(1); + incidlelocked(1); // Wait until another thread schedules lockedg again. runtime·notesleep(&m->park); runtime·noteclear(&m->park); @@ -934,7 +1071,7 @@ startlockedm(G *gp) if(mp->nextp) runtime·throw("startlockedm: m has p"); // directly handoff current P to the locked m - inclocked(-1); + incidlelocked(-1); p = releasep(); mp->nextp = p; runtime·notewakeup(&mp->park); @@ -948,7 +1085,7 @@ gcstopm(void) { P *p; - if(!runtime·gcwaiting) + if(!runtime·sched.gcwaiting) runtime·throw("gcstopm: not waiting for gc"); if(m->spinning) { m->spinning = false; @@ -975,7 +1112,9 @@ execute(G *gp) runtime·throw("execute: bad g status"); } gp->status = Grunning; - m->p->tick++; + gp->preempt = false; + gp->stackguard0 = gp->stackguard; + m->p->schedtick++; m->curg = gp; gp->m = m; @@ -984,9 +1123,7 @@ execute(G *gp) if(m->profilehz != hz) runtime·resetcpuprofiler(hz); - if(gp->sched.pc == (byte*)runtime·goexit) // kickoff - runtime·gogocallfn(&gp->sched, gp->fnstart); - runtime·gogo(&gp->sched, 0); + runtime·gogo(&gp->sched); } // Finds a runnable goroutine to execute. @@ -999,7 +1136,7 @@ findrunnable(void) int32 i; top: - if(runtime·gcwaiting) { + if(runtime·sched.gcwaiting) { gcstopm(); goto top; } @@ -1010,7 +1147,7 @@ top: // global runq if(runtime·sched.runqsize) { runtime·lock(&runtime·sched); - gp = globrunqget(m->p); + gp = globrunqget(m->p, 0); runtime·unlock(&runtime·sched); if(gp) return gp; @@ -1033,7 +1170,7 @@ top: } // random steal from other P's for(i = 0; i < 2*runtime·gomaxprocs; i++) { - if(runtime·gcwaiting) + if(runtime·sched.gcwaiting) goto top; p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs]; if(p == m->p) @@ -1046,12 +1183,12 @@ top: stop: // return P and block runtime·lock(&runtime·sched); - if(runtime·gcwaiting) { + if(runtime·sched.gcwaiting) { runtime·unlock(&runtime·sched); goto top; } if(runtime·sched.runqsize) { - gp = globrunqget(m->p); + gp = globrunqget(m->p, 0); runtime·unlock(&runtime·sched); return gp; } @@ -1101,6 +1238,25 @@ stop: goto top; } +static void +resetspinning(void) +{ + int32 nmspinning; + + if(m->spinning) { + m->spinning = false; + nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1); + if(nmspinning < 0) + runtime·throw("findrunnable: negative nmspinning"); + } else + nmspinning = runtime·atomicload(&runtime·sched.nmspinning); + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another P here. + if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0) + wakep(); +} + // Injects the list of runnable G's into the scheduler. // Can run concurrently with GC. static void @@ -1130,33 +1286,44 @@ static void schedule(void) { G *gp; + uint32 tick; if(m->locks) runtime·throw("schedule: holding locks"); top: - if(runtime·gcwaiting) { + if(runtime·sched.gcwaiting) { gcstopm(); goto top; } - gp = runqget(m->p); - if(gp == nil) - gp = findrunnable(); - - if(m->spinning) { - m->spinning = false; - runtime·xadd(&runtime·sched.nmspinning, -1); + gp = nil; + // Check the global runnable queue once in a while to ensure fairness. + // Otherwise two goroutines can completely occupy the local runqueue + // by constantly respawning each other. + tick = m->p->schedtick; + // This is a fancy way to say tick%61==0, + // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. + if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) { + runtime·lock(&runtime·sched); + gp = globrunqget(m->p, 1); + runtime·unlock(&runtime·sched); + if(gp) + resetspinning(); + } + if(gp == nil) { + gp = runqget(m->p); + if(gp && m->spinning) + runtime·throw("schedule: spinning with local work"); + } + if(gp == nil) { + gp = findrunnable(); // blocks until work is available + resetspinning(); } - - // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), - // so see if we need to wakeup another M here. - if (m->p->runqhead != m->p->runqtail && - runtime·atomicload(&runtime·sched.nmspinning) == 0 && - runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic - wakep(); if(gp->lockedm) { + // Hands off own p to the locked m, + // then blocks waiting for a new p. startlockedm(gp); goto top; } @@ -1198,12 +1365,12 @@ park0(G *gp) void runtime·gosched(void) { - runtime·mcall(gosched0); + runtime·mcall(runtime·gosched0); } // runtime·gosched continuation on g0. -static void -gosched0(G *gp) +void +runtime·gosched0(G *gp) { gp->status = Grunnable; gp->m = nil; @@ -1219,6 +1386,10 @@ gosched0(G *gp) } // Finishes execution of the current goroutine. +// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack). +// Since it does not return it does not matter. But if it is preempted +// at the split stack check, GC will complain about inconsistent sp. +#pragma textflag NOSPLIT void runtime·goexit(void) { @@ -1232,13 +1403,12 @@ static void goexit0(G *gp) { gp->status = Gdead; - gp->fnstart = nil; gp->m = nil; gp->lockedm = nil; m->curg = nil; m->lockedg = nil; if(m->locked & ~LockExternal) { - runtime·printf("invalid m->locked = %d", m->locked); + runtime·printf("invalid m->locked = %d\n", m->locked); runtime·throw("internal lockOSThread error"); } m->locked = 0; @@ -1247,6 +1417,18 @@ goexit0(G *gp) schedule(); } +#pragma textflag NOSPLIT +static void +save(void *pc, uintptr sp) +{ + g->sched.pc = (uintptr)pc; + g->sched.sp = sp; + g->sched.lr = 0; + g->sched.ret = 0; + g->sched.ctxt = 0; + g->sched.g = g; +} + // The goroutine g is about to enter a system call. // Record that it's not using the cpu anymore. // This is called only from the go syscall library and cgocall, @@ -1255,25 +1437,24 @@ goexit0(G *gp) // Entersyscall cannot split the stack: the runtime·gosave must // make g->sched refer to the caller's stack segment, because // entersyscall is going to return immediately after. -#pragma textflag 7 +#pragma textflag NOSPLIT void ·entersyscall(int32 dummy) { - if(m->profilehz > 0) - runtime·setprof(false); + // Disable preemption because during this function g is in Gsyscall status, + // but can have inconsistent g->sched, do not let GC observe it. + m->locks++; - // Leave SP around for gc and traceback. - g->sched.sp = (uintptr)runtime·getcallersp(&dummy); - g->sched.pc = runtime·getcallerpc(&dummy); - g->sched.g = g; - g->gcsp = g->sched.sp; - g->gcpc = g->sched.pc; - g->gcstack = g->stackbase; - g->gcguard = g->stackguard; + // Leave SP around for GC and traceback. + save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy)); + g->syscallsp = g->sched.sp; + g->syscallpc = g->sched.pc; + g->syscallstack = g->stackbase; + g->syscallguard = g->stackguard; g->status = Gsyscall; - if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) { + if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) { // runtime·printf("entersyscall inconsistent %p [%p,%p]\n", - // g->gcsp, g->gcguard-StackGuard, g->gcstack); + // g->syscallsp, g->syscallguard-StackGuard, g->syscallstack); runtime·throw("entersyscall"); } @@ -1284,100 +1465,96 @@ void runtime·notewakeup(&runtime·sched.sysmonnote); } runtime·unlock(&runtime·sched); - runtime·gosave(&g->sched); // re-save for traceback + save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy)); } m->mcache = nil; - m->p->tick++; m->p->m = nil; runtime·atomicstore(&m->p->status, Psyscall); - if(runtime·gcwaiting) { + if(runtime·sched.gcwaiting) { runtime·lock(&runtime·sched); if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) { if(--runtime·sched.stopwait == 0) runtime·notewakeup(&runtime·sched.stopnote); } runtime·unlock(&runtime·sched); - runtime·gosave(&g->sched); // re-save for traceback + save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy)); } + + // Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched). + // We set stackguard to StackPreempt so that first split stack check calls morestack. + // Morestack detects this case and throws. + g->stackguard0 = StackPreempt; + m->locks--; } // The same as runtime·entersyscall(), but with a hint that the syscall is blocking. -#pragma textflag 7 +#pragma textflag NOSPLIT void ·entersyscallblock(int32 dummy) { P *p; - if(m->profilehz > 0) - runtime·setprof(false); + m->locks++; // see comment in entersyscall - // Leave SP around for gc and traceback. - g->sched.sp = (uintptr)runtime·getcallersp(&dummy); - g->sched.pc = runtime·getcallerpc(&dummy); - g->sched.g = g; - g->gcsp = g->sched.sp; - g->gcpc = g->sched.pc; - g->gcstack = g->stackbase; - g->gcguard = g->stackguard; + // Leave SP around for GC and traceback. + save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy)); + g->syscallsp = g->sched.sp; + g->syscallpc = g->sched.pc; + g->syscallstack = g->stackbase; + g->syscallguard = g->stackguard; g->status = Gsyscall; - if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) { - // runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n", - // g->gcsp, g->gcguard-StackGuard, g->gcstack); + if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) { + // runtime·printf("entersyscall inconsistent %p [%p,%p]\n", + // g->syscallsp, g->syscallguard-StackGuard, g->syscallstack); runtime·throw("entersyscallblock"); } p = releasep(); handoffp(p); if(g->isbackground) // do not consider blocked scavenger for deadlock detection - inclocked(1); - runtime·gosave(&g->sched); // re-save for traceback + incidlelocked(1); + + // Resave for traceback during blocked call. + save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy)); + + g->stackguard0 = StackPreempt; // see comment in entersyscall + m->locks--; } // The goroutine g exited its system call. // Arrange for it to run on a cpu again. // This is called only from the go syscall library, not // from the low-level system calls used by the runtime. +#pragma textflag NOSPLIT void runtime·exitsyscall(void) { - P *p; + m->locks++; // see comment in entersyscall - // Check whether the profiler needs to be turned on. - if(m->profilehz > 0) - runtime·setprof(true); + if(g->isbackground) // do not consider blocked scavenger for deadlock detection + incidlelocked(-1); - // Try to re-acquire the last P. - if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) { + if(exitsyscallfast()) { // There's a cpu for us, so we can run. - m->mcache = m->p->mcache; - m->p->m = m; - m->p->tick++; + m->p->syscalltick++; g->status = Grunning; // Garbage collector isn't running (since we are), // so okay to clear gcstack and gcsp. - g->gcstack = (uintptr)nil; - g->gcsp = (uintptr)nil; + g->syscallstack = (uintptr)nil; + g->syscallsp = (uintptr)nil; + m->locks--; + if(g->preempt) { + // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; + } else { + // otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock + g->stackguard0 = g->stackguard; + } return; } - if(g->isbackground) // do not consider blocked scavenger for deadlock detection - inclocked(-1); - // Try to get any other idle P. - m->p = nil; - if(runtime·sched.pidle) { - runtime·lock(&runtime·sched); - p = pidleget(); - runtime·unlock(&runtime·sched); - if(p) { - acquirep(p); - m->p->tick++; - g->status = Grunning; - g->gcstack = (uintptr)nil; - g->gcsp = (uintptr)nil; - return; - } - } + m->locks--; // Call the scheduler. runtime·mcall(exitsyscall0); @@ -1388,8 +1565,46 @@ runtime·exitsyscall(void) // Must wait until now because until gosched returns // we don't know for sure that the garbage collector // is not running. - g->gcstack = (uintptr)nil; - g->gcsp = (uintptr)nil; + g->syscallstack = (uintptr)nil; + g->syscallsp = (uintptr)nil; + m->p->syscalltick++; +} + +#pragma textflag NOSPLIT +static bool +exitsyscallfast(void) +{ + P *p; + + // Freezetheworld sets stopwait but does not retake P's. + if(runtime·sched.stopwait) { + m->p = nil; + return false; + } + + // Try to re-acquire the last P. + if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) { + // There's a cpu for us, so we can run. + m->mcache = m->p->mcache; + m->p->m = m; + return true; + } + // Try to get any other idle P. + m->p = nil; + if(runtime·sched.pidle) { + runtime·lock(&runtime·sched); + p = pidleget(); + if(p && runtime·atomicload(&runtime·sched.sysmonwait)) { + runtime·atomicstore(&runtime·sched.sysmonwait, 0); + runtime·notewakeup(&runtime·sched.sysmonnote); + } + runtime·unlock(&runtime·sched); + if(p) { + acquirep(p); + return true; + } + } + return false; } // runtime·exitsyscall slow path on g0. @@ -1406,6 +1621,10 @@ exitsyscall0(G *gp) p = pidleget(); if(p == nil) globrunqput(gp); + else if(runtime·atomicload(&runtime·sched.sysmonwait)) { + runtime·atomicstore(&runtime·sched.sysmonwait, 0); + runtime·notewakeup(&runtime·sched.sysmonnote); + } runtime·unlock(&runtime·sched); if(p) { acquirep(p); @@ -1420,6 +1639,29 @@ exitsyscall0(G *gp) schedule(); // Never returns. } +// Called from syscall package before fork. +void +syscall·runtime_BeforeFork(void) +{ + // Fork can hang if preempted with signals frequently enough (see issue 5517). + // Ensure that we stay on the same M where we disable profiling. + m->locks++; + if(m->profilehz != 0) + runtime·resetcpuprofiler(0); +} + +// Called from syscall package after fork in parent. +void +syscall·runtime_AfterFork(void) +{ + int32 hz; + + hz = runtime·sched.profilehz; + if(hz != 0) + runtime·resetcpuprofiler(hz); + m->locks--; +} + // Hook used by runtime·malg to call runtime·stackalloc on the // scheduler stack. This exists because runtime·stackalloc insists // on being called on the scheduler stack, to avoid trying to grow @@ -1428,7 +1670,7 @@ static void mstackalloc(G *gp) { gp->param = runtime·stackalloc((uintptr)gp->param); - runtime·gogo(&gp->sched, 0); + runtime·gogo(&gp->sched); } // Allocate a new g, with a stack big enough for stacksize bytes. @@ -1455,8 +1697,10 @@ runtime·malg(int32 stacksize) stk = g->param; g->param = nil; } + newg->stacksize = StackSystem + stacksize; newg->stack0 = (uintptr)stk; newg->stackguard = (uintptr)stk + StackGuard; + newg->stackguard0 = newg->stackguard; newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop); runtime·memclr((byte*)newg->stackbase, sizeof(Stktop)); } @@ -1470,7 +1714,7 @@ runtime·malg(int32 stacksize) // are available sequentially after &fn; they would not be // copied if a stack split occurred. It's OK for this to call // functions that split the stack. -#pragma textflag 7 +#pragma textflag NOSPLIT void runtime·newproc(int32 siz, FuncVal* fn, ...) { @@ -1494,7 +1738,8 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp G *newg; int32 siz; -//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret); +//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret); + m->locks++; // disable preemption because it can be holding p in a local var siz = narg + nret; siz = (siz+7) & ~7; @@ -1528,19 +1773,24 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp *(void**)sp = nil; } + runtime·memclr((byte*)&newg->sched, sizeof newg->sched); newg->sched.sp = (uintptr)sp; - newg->sched.pc = (byte*)runtime·goexit; + newg->sched.pc = (uintptr)runtime·goexit; newg->sched.g = newg; - newg->fnstart = fn; + runtime·gostartcallfn(&newg->sched, fn); newg->gopc = (uintptr)callerpc; newg->status = Grunnable; newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1); + newg->panicwrap = 0; if(raceenabled) - newg->racectx = runtime·racegostart(callerpc); + newg->racectx = runtime·racegostart((void*)callerpc); runqput(m->p, newg); if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic wakep(); + m->locks--; + if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; return newg; } @@ -1641,7 +1891,7 @@ runtime·gomaxprocsfunc(int32 n) } runtime·unlock(&runtime·sched); - runtime·semacquire(&runtime·worldsema); + runtime·semacquire(&runtime·worldsema, false); m->gcing = 1; runtime·stoptheworld(); newprocs = n; @@ -1652,8 +1902,12 @@ runtime·gomaxprocsfunc(int32 n) return ret; } +// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below +// after they modify m->locked. Do not allow preemption during this call, +// or else the m might be different in this function than in the caller. +#pragma textflag NOSPLIT static void -LockOSThread(void) +lockOSThread(void) { m->lockedg = g; g->lockedm = m; @@ -1663,18 +1917,23 @@ void runtime·LockOSThread(void) { m->locked |= LockExternal; - LockOSThread(); + lockOSThread(); } void runtime·lockOSThread(void) { m->locked += LockInternal; - LockOSThread(); + lockOSThread(); } + +// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below +// after they update m->locked. Do not allow preemption during this call, +// or else the m might be in different in this function than in the caller. +#pragma textflag NOSPLIT static void -UnlockOSThread(void) +unlockOSThread(void) { if(m->locked != 0) return; @@ -1686,7 +1945,7 @@ void runtime·UnlockOSThread(void) { m->locked &= ~LockExternal; - UnlockOSThread(); + unlockOSThread(); } void @@ -1695,7 +1954,7 @@ runtime·unlockOSThread(void) if(m->locked < LockInternal) runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); m->locked -= LockInternal; - UnlockOSThread(); + unlockOSThread(); } bool @@ -1712,14 +1971,6 @@ runtime·golockedOSThread(bool ret) FLUSH(&ret); } -// for testing of wire, unwire -void -runtime·mid(uint32 ret) -{ - ret = m->id; - FLUSH(&ret); -} - void runtime·NumGoroutine(intgo ret) { @@ -1755,17 +2006,25 @@ runtime·mcount(void) } void -runtime·badmcall(void) // called from assembly +runtime·badmcall(void (*fn)(G*)) // called from assembly { + USED(fn); // TODO: print fn? runtime·throw("runtime: mcall called on m->g0 stack"); } void -runtime·badmcall2(void) // called from assembly +runtime·badmcall2(void (*fn)(G*)) // called from assembly { + USED(fn); runtime·throw("runtime: mcall function returned"); } +void +runtime·badreflectcall(void) // called from assembly +{ + runtime·panicstring("runtime: arg size to reflect.call more than 1GB"); +} + static struct { Lock; void (*fn)(uintptr*, int32); @@ -1773,26 +2032,126 @@ static struct { uintptr pcbuf[100]; } prof; +static void +System(void) +{ +} + // Called if we receive a SIGPROF signal. void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) { int32 n; + bool traceback; - // Windows does profiling in a dedicated thread w/o m. - if(!Windows && (m == nil || m->mcache == nil)) - return; if(prof.fn == nil || prof.hz == 0) return; + traceback = true; + // Windows does profiling in a dedicated thread w/o m. + if(!Windows && (m == nil || m->mcache == nil)) + traceback = false; + + // Define that a "user g" is a user-created goroutine, and a "system g" + // is one that is m->g0 or m->gsignal. We've only made sure that we + // can unwind user g's, so exclude the system g's. + // + // It is not quite as easy as testing gp == m->curg (the current user g) + // because we might be interrupted for profiling halfway through a + // goroutine switch. The switch involves updating three (or four) values: + // g, PC, SP, and (on arm) LR. The PC must be the last to be updated, + // because once it gets updated the new g is running. + // + // When switching from a user g to a system g, LR is not considered live, + // so the update only affects g, SP, and PC. Since PC must be last, there + // the possible partial transitions in ordinary execution are (1) g alone is updated, + // (2) both g and SP are updated, and (3) SP alone is updated. + // If g is updated, we'll see a system g and not look closer. + // If SP alone is updated, we can detect the partial transition by checking + // whether the SP is within g's stack bounds. (We could also require that SP + // be changed only after g, but the stack bounds check is needed by other + // cases, so there is no need to impose an additional requirement.) + // + // There is one exceptional transition to a system g, not in ordinary execution. + // When a signal arrives, the operating system starts the signal handler running + // with an updated PC and SP. The g is updated last, at the beginning of the + // handler. There are two reasons this is okay. First, until g is updated the + // g and SP do not match, so the stack bounds check detects the partial transition. + // Second, signal handlers currently run with signals disabled, so a profiling + // signal cannot arrive during the handler. + // + // When switching from a system g to a user g, there are three possibilities. + // + // First, it may be that the g switch has no PC update, because the SP + // either corresponds to a user g throughout (as in runtime.asmcgocall) + // or because it has been arranged to look like a user g frame + // (as in runtime.cgocallback_gofunc). In this case, since the entire + // transition is a g+SP update, a partial transition updating just one of + // those will be detected by the stack bounds check. + // + // Second, when returning from a signal handler, the PC and SP updates + // are performed by the operating system in an atomic update, so the g + // update must be done before them. The stack bounds check detects + // the partial transition here, and (again) signal handlers run with signals + // disabled, so a profiling signal cannot arrive then anyway. + // + // Third, the common case: it may be that the switch updates g, SP, and PC + // separately, as in runtime.gogo. + // + // Because runtime.gogo is the only instance, we check whether the PC lies + // within that function, and if so, not ask for a traceback. This approach + // requires knowing the size of the runtime.gogo function, which we + // record in arch_*.h and check in runtime_test.go. + // + // There is another apparently viable approach, recorded here in case + // the "PC within runtime.gogo" check turns out not to be usable. + // It would be possible to delay the update of either g or SP until immediately + // before the PC update instruction. Then, because of the stack bounds check, + // the only problematic interrupt point is just before that PC update instruction, + // and the sigprof handler can detect that instruction and simulate stepping past + // it in order to reach a consistent state. On ARM, the update of g must be made + // in two places (in R10 and also in a TLS slot), so the delayed update would + // need to be the SP update. The sigprof handler must read the instruction at + // the current PC and if it was the known instruction (for example, JMP BX or + // MOV R2, PC), use that other register in place of the PC value. + // The biggest drawback to this solution is that it requires that we can tell + // whether it's safe to read from the memory pointed at by PC. + // In a correct program, we can test PC == nil and otherwise read, + // but if a profiling signal happens at the instant that a program executes + // a bad jump (before the program manages to handle the resulting fault) + // the profiling handler could fault trying to read nonexistent memory. + // + // To recap, there are no constraints on the assembly being used for the + // transition. We simply require that g and SP match and that the PC is not + // in runtime.gogo. + // + // On Windows, one m is sending reports about all the g's, so gp == m->curg + // is not a useful comparison. The profilem function in os_windows.c has + // already checked that gp is a user g. + if(gp == nil || + (!Windows && gp != m->curg) || + (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp || + ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes)) + traceback = false; + + // Race detector calls asmcgocall w/o entersyscall/exitsyscall, + // we can not currently unwind through asmcgocall. + if(m != nil && m->racecall) + traceback = false; runtime·lock(&prof); if(prof.fn == nil) { runtime·unlock(&prof); return; } - n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil); - if(n > 0) - prof.fn(prof.pcbuf, n); + n = 0; + if(traceback) + n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false); + if(!traceback || n <= 0) { + n = 2; + prof.pcbuf[0] = (uintptr)pc; + prof.pcbuf[1] = (uintptr)System + 1; + } + prof.fn(prof.pcbuf, n); runtime·unlock(&prof); } @@ -1808,7 +2167,11 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) if(fn == nil) hz = 0; - // Stop profiler on this cpu so that it is safe to lock prof. + // Disable preemption, otherwise we can be rescheduled to another thread + // that has profiling enabled. + m->locks++; + + // Stop profiler on this thread so that it is safe to lock prof. // if a profiling signal came in while we had prof locked, // it would deadlock. runtime·resetcpuprofiler(0); @@ -1823,6 +2186,8 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) if(hz != 0) runtime·resetcpuprofiler(hz); + + m->locks--; } // Change number of processors. The world is stopped, sched is locked. @@ -1840,7 +2205,8 @@ procresize(int32 new) for(i = 0; i < new; i++) { p = runtime·allp[i]; if(p == nil) { - p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1); + p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC); + p->id = i; p->status = Pgcstop; runtime·atomicstorep(&runtime·allp[i], p); } @@ -1852,7 +2218,7 @@ procresize(int32 new) } if(p->runq == nil) { p->runqsize = 128; - p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1); + p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC); } } @@ -1895,7 +2261,6 @@ procresize(int32 new) p->status = Pidle; pidleput(p); } - runtime·singleproc = new == 1; runtime·atomicstore((uint32*)&runtime·gomaxprocs, new); } @@ -1937,10 +2302,10 @@ releasep(void) } static void -inclocked(int32 v) +incidlelocked(int32 v) { runtime·lock(&runtime·sched); - runtime·sched.mlocked += v; + runtime·sched.nmidlelocked += v; if(v > 0) checkdead(); runtime·unlock(&runtime·sched); @@ -1955,12 +2320,12 @@ checkdead(void) int32 run, grunning, s; // -1 for sysmon - run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1; + run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1; if(run > 0) return; if(run < 0) { - runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n", - runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount); + runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n", + runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount); runtime·throw("checkdead: inconsistent counts"); } grunning = 0; @@ -1985,10 +2350,10 @@ static void sysmon(void) { uint32 idle, delay; - int64 now, lastpoll; + int64 now, lastpoll, lasttrace; G *gp; - uint32 ticks[MaxGomaxprocs]; + lasttrace = 0; idle = 0; // how many cycles in succession we had not wokeup somebody delay = 0; for(;;) { @@ -1999,9 +2364,10 @@ sysmon(void) if(delay > 10*1000) // up to 10ms delay = 10*1000; runtime·usleep(delay); - if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { // TODO: fast atomic + if(runtime·debug.schedtrace <= 0 && + (runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) { // TODO: fast atomic runtime·lock(&runtime·sched); - if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { + if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { runtime·atomicstore(&runtime·sched.sysmonwait, 1); runtime·unlock(&runtime·sched); runtime·notesleep(&runtime·sched.sysmonnote); @@ -2014,51 +2380,231 @@ sysmon(void) // poll network if not polled for more than 10ms lastpoll = runtime·atomicload64(&runtime·sched.lastpoll); now = runtime·nanotime(); - if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) { + if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) { + runtime·cas64(&runtime·sched.lastpoll, lastpoll, now); gp = runtime·netpoll(false); // non-blocking - injectglist(gp); + if(gp) { + // Need to decrement number of idle locked M's + // (pretending that one more is running) before injectglist. + // Otherwise it can lead to the following situation: + // injectglist grabs all P's but before it starts M's to run the P's, + // another M returns from syscall, finishes running its G, + // observes that there is no work to do and no other running M's + // and reports deadlock. + incidlelocked(-1); + injectglist(gp); + incidlelocked(1); + } } // retake P's blocked in syscalls - if(retake(ticks)) + // and preempt long running G's + if(retake(now)) idle = 0; else idle++; + + if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) { + lasttrace = now; + runtime·schedtrace(runtime·debug.scheddetail); + } } } +typedef struct Pdesc Pdesc; +struct Pdesc +{ + uint32 schedtick; + int64 schedwhen; + uint32 syscalltick; + int64 syscallwhen; +}; +static Pdesc pdesc[MaxGomaxprocs]; + static uint32 -retake(uint32 *ticks) +retake(int64 now) { uint32 i, s, n; int64 t; P *p; + Pdesc *pd; n = 0; for(i = 0; i < runtime·gomaxprocs; i++) { p = runtime·allp[i]; if(p==nil) continue; - t = p->tick; - if(ticks[i] != t) { - ticks[i] = t; - continue; - } + pd = &pdesc[i]; s = p->status; - if(s != Psyscall) + if(s == Psyscall) { + // Retake P from syscall if it's there for more than 1 sysmon tick (20us). + // But only if there is other work to do. + t = p->syscalltick; + if(pd->syscalltick != t) { + pd->syscalltick = t; + pd->syscallwhen = now; + continue; + } + if(p->runqhead == p->runqtail && + runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) + continue; + // Need to decrement number of idle locked M's + // (pretending that one more is running) before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + incidlelocked(-1); + if(runtime·cas(&p->status, s, Pidle)) { + n++; + handoffp(p); + } + incidlelocked(1); + } else if(s == Prunning) { + // Preempt G if it's running for more than 10ms. + t = p->schedtick; + if(pd->schedtick != t) { + pd->schedtick = t; + pd->schedwhen = now; + continue; + } + if(pd->schedwhen + 10*1000*1000 > now) + continue; + preemptone(p); + } + } + return n; +} + +// Tell all goroutines that they have been preempted and they should stop. +// This function is purely best-effort. It can fail to inform a goroutine if a +// processor just started running it. +// No locks need to be held. +// Returns true if preemption request was issued to at least one goroutine. +static bool +preemptall(void) +{ + P *p; + int32 i; + bool res; + + res = false; + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p == nil || p->status != Prunning) continue; - if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic + res |= preemptone(p); + } + return res; +} + +// Tell the goroutine running on processor P to stop. +// This function is purely best-effort. It can incorrectly fail to inform the +// goroutine. It can send inform the wrong goroutine. Even if it informs the +// correct goroutine, that goroutine might ignore the request if it is +// simultaneously executing runtime·newstack. +// No lock needs to be held. +// Returns true if preemption request was issued. +static bool +preemptone(P *p) +{ + M *mp; + G *gp; + + mp = p->m; + if(mp == nil || mp == m) + return false; + gp = mp->curg; + if(gp == nil || gp == mp->g0) + return false; + gp->preempt = true; + gp->stackguard0 = StackPreempt; + return true; +} + +void +runtime·schedtrace(bool detailed) +{ + static int64 starttime; + int64 now; + int64 id1, id2, id3; + int32 i, q, t, h, s; + int8 *fmt; + M *mp, *lockedm; + G *gp, *lockedg; + P *p; + + now = runtime·nanotime(); + if(starttime == 0) + starttime = now; + + runtime·lock(&runtime·sched); + runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d", + (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount, + runtime·sched.nmidle, runtime·sched.runqsize); + if(detailed) { + runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n", + runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning, + runtime·sched.stopwait, runtime·sched.sysmonwait); + } + // We must be careful while reading data from P's, M's and G's. + // Even if we hold schedlock, most data can be changed concurrently. + // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. + for(i = 0; i < runtime·gomaxprocs; i++) { + p = runtime·allp[i]; + if(p == nil) continue; - // Need to increment number of locked M's before the CAS. - // Otherwise the M from which we retake can exit the syscall, - // increment nmidle and report deadlock. - inclocked(-1); - if(runtime·cas(&p->status, s, Pidle)) { - n++; - handoffp(p); + mp = p->m; + t = p->runqtail; + h = p->runqhead; + s = p->runqsize; + q = t - h; + if(q < 0) + q += s; + if(detailed) + runtime·printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n", + i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt); + else { + // In non-detailed mode format lengths of per-P run queues as: + // [len1 len2 len3 len4] + fmt = " %d"; + if(runtime·gomaxprocs == 1) + fmt = " [%d]\n"; + else if(i == 0) + fmt = " [%d"; + else if(i == runtime·gomaxprocs-1) + fmt = " %d]\n"; + runtime·printf(fmt, q); } - inclocked(1); } - return n; + if(!detailed) { + runtime·unlock(&runtime·sched); + return; + } + for(mp = runtime·allm; mp; mp = mp->alllink) { + p = mp->p; + gp = mp->curg; + lockedg = mp->lockedg; + id1 = -1; + if(p) + id1 = p->id; + id2 = -1; + if(gp) + id2 = gp->goid; + id3 = -1; + if(lockedg) + id3 = lockedg->goid; + runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d" + " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n", + mp->id, id1, id2, + mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, + mp->spinning, id3); + } + for(gp = runtime·allg; gp; gp = gp->alllink) { + mp = gp->m; + lockedm = gp->lockedm; + runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", + gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, + lockedm ? lockedm->id : -1); + } + runtime·unlock(&runtime·sched); } // Put mp on midle list. @@ -2103,7 +2649,7 @@ globrunqput(G *gp) // Try get a batch of G's from the global runnable queue. // Sched must be locked. static G* -globrunqget(P *p) +globrunqget(P *p, int32 max) { G *gp, *gp1; int32 n; @@ -2113,6 +2659,8 @@ globrunqget(P *p) n = runtime·sched.runqsize/runtime·gomaxprocs+1; if(n > runtime·sched.runqsize) n = runtime·sched.runqsize; + if(max > 0 && n > max) + n = max; runtime·sched.runqsize -= n; if(runtime·sched.runqsize == 0) runtime·sched.runqtail = nil; @@ -2365,3 +2913,48 @@ runtime·testSchedLocalQueueSteal(void) } } +extern void runtime·morestack(void); + +// Does f mark the top of a goroutine stack? +bool +runtime·topofstack(Func *f) +{ + return f->entry == (uintptr)runtime·goexit || + f->entry == (uintptr)runtime·mstart || + f->entry == (uintptr)runtime·mcall || + f->entry == (uintptr)runtime·morestack || + f->entry == (uintptr)runtime·lessstack || + f->entry == (uintptr)_rt0_go; +} + +void +runtime∕debug·setMaxThreads(intgo in, intgo out) +{ + runtime·lock(&runtime·sched); + out = runtime·sched.maxmcount; + runtime·sched.maxmcount = in; + checkmcount(); + runtime·unlock(&runtime·sched); + FLUSH(&out); +} + +static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h + +static bool +haveexperiment(int8 *name) +{ + int32 i, j; + + for(i=0; i<sizeof(experiment); i++) { + if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) { + for(j=0; name[j]; j++) + if(experiment[i+j] != name[j]) + goto nomatch; + if(experiment[i+j] != '\0' && experiment[i+j] != ',') + goto nomatch; + return 1; + } + nomatch:; + } + return 0; +} |