summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/proc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/proc.c')
-rw-r--r--src/pkg/runtime/proc.c1009
1 files changed, 801 insertions, 208 deletions
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index 5734509e0..de26c72d3 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -4,10 +4,12 @@
#include "runtime.h"
#include "arch_GOARCH.h"
+#include "zaexperiment.h"
#include "malloc.h"
#include "stack.h"
#include "race.h"
#include "type.h"
+#include "../../cmd/ld/textflag.h"
// Goroutine scheduler
// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
@@ -29,8 +31,9 @@ struct Sched {
M* midle; // idle m's waiting for work
int32 nmidle; // number of idle m's waiting for work
- int32 mlocked; // number of locked m's waiting for work
+ int32 nmidlelocked; // number of locked m's waiting for work
int32 mcount; // number of m's that have been created
+ int32 maxmcount; // maximum number of m's allowed (or die)
P* pidle; // idle P's
uint32 npidle;
@@ -45,6 +48,7 @@ struct Sched {
Lock gflock;
G* gfree;
+ uint32 gcwaiting; // gc is waiting to run
int32 stopwait;
Note stopnote;
uint32 sysmonwait;
@@ -60,9 +64,8 @@ enum { MaxGomaxprocs = 1<<8 };
Sched runtime·sched;
int32 runtime·gomaxprocs;
-bool runtime·singleproc;
+uint32 runtime·needextram;
bool runtime·iscgo;
-uint32 runtime·gcwaiting;
M runtime·m0;
G runtime·g0; // idle goroutine for m0
G* runtime·allg;
@@ -86,7 +89,6 @@ static void procresize(int32);
static void acquirep(P*);
static P* releasep(void);
static void newm(void(*)(void), P*);
-static void goidle(void);
static void stopm(void);
static void startm(P*, bool);
static void handoffp(P*);
@@ -94,21 +96,24 @@ static void wakep(void);
static void stoplockedm(void);
static void startlockedm(G*);
static void sysmon(void);
-static uint32 retake(uint32*);
-static void inclocked(int32);
+static uint32 retake(int64);
+static void incidlelocked(int32);
static void checkdead(void);
static void exitsyscall0(G*);
static void park0(G*);
-static void gosched0(G*);
static void goexit0(G*);
static void gfput(P*, G*);
static G* gfget(P*);
static void gfpurge(P*);
static void globrunqput(G*);
-static G* globrunqget(P*);
+static G* globrunqget(P*, int32);
static P* pidleget(void);
static void pidleput(P*);
static void injectglist(G*);
+static bool preemptall(void);
+static bool preemptone(P*);
+static bool exitsyscallfast(void);
+static bool haveexperiment(int8*);
// The bootstrap sequence is:
//
@@ -123,19 +128,27 @@ runtime·schedinit(void)
{
int32 n, procs;
byte *p;
+ Eface i;
+
+ runtime·sched.maxmcount = 10000;
+ runtime·precisestack = haveexperiment("precisestack");
- m->nomemprof++;
runtime·mprofinit();
runtime·mallocinit();
mcommoninit(m);
+
+ // Initialize the itable value for newErrorCString,
+ // so that the next time it gets called, possibly
+ // in a fault during a garbage collection, it will not
+ // need to allocated memory.
+ runtime·newErrorCString(0, &i);
runtime·goargs();
runtime·goenvs();
+ runtime·parsedebugvars();
- // For debugging:
- // Allocate internal symbol table representation now,
- // so that we don't need to call malloc when we crash.
- // runtime·findfunc(0);
+ // Allocate internal symbol table representation now, we need it for GC anyway.
+ runtime·symtabinit();
runtime·sched.lastpoll = runtime·nanotime();
procs = 1;
@@ -149,7 +162,6 @@ runtime·schedinit(void)
procresize(procs);
mstats.enablegc = 1;
- m->nomemprof--;
if(raceenabled)
g->racectx = runtime·raceinit();
@@ -160,10 +172,22 @@ extern void main·main(void);
static FuncVal scavenger = {runtime·MHeap_Scavenger};
+static FuncVal initDone = { runtime·unlockOSThread };
+
// The main goroutine.
void
runtime·main(void)
{
+ Defer d;
+
+ // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
+ // Using decimal instead of binary GB and MB because
+ // they look nicer in the stack overflow failure message.
+ if(sizeof(void*) == 8)
+ runtime·maxstacksize = 1000000000;
+ else
+ runtime·maxstacksize = 250000000;
+
newm(sysmon, nil);
// Lock the main goroutine onto this, the main OS thread,
@@ -173,10 +197,24 @@ runtime·main(void)
// by calling runtime.LockOSThread during initialization
// to preserve the lock.
runtime·lockOSThread();
+
+ // Defer unlock so that runtime.Goexit during init does the unlock too.
+ d.fn = &initDone;
+ d.siz = 0;
+ d.link = g->defer;
+ d.argp = (void*)-1;
+ d.special = true;
+ d.free = false;
+ g->defer = &d;
+
if(m != &runtime·m0)
runtime·throw("runtime·main not on m0");
runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
main·init();
+
+ if(g->defer != &d || d.fn != &initDone)
+ runtime·throw("runtime: bad defer entry after init");
+ g->defer = d.link;
runtime·unlockOSThread();
main·main();
@@ -233,14 +271,36 @@ runtime·tracebackothers(G *me)
int32 traceback;
traceback = runtime·gotraceback(nil);
+
+ // Show the current goroutine first, if we haven't already.
+ if((gp = m->curg) != nil && gp != me) {
+ runtime·printf("\n");
+ runtime·goroutineheader(gp);
+ runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.lr, gp);
+ }
+
for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
- if(gp == me || gp->status == Gdead)
+ if(gp == me || gp == m->curg || gp->status == Gdead)
continue;
if(gp->issystem && traceback < 2)
continue;
runtime·printf("\n");
runtime·goroutineheader(gp);
- runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp);
+ if(gp->status == Grunning) {
+ runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
+ runtime·printcreatedby(gp);
+ } else
+ runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.lr, gp);
+ }
+}
+
+static void
+checkmcount(void)
+{
+ // sched lock is held
+ if(runtime·sched.mcount > runtime·sched.maxmcount) {
+ runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
+ runtime·throw("thread exhaustion");
}
}
@@ -256,7 +316,7 @@ mcommoninit(M *mp)
runtime·lock(&runtime·sched);
mp->id = runtime·sched.mcount++;
-
+ checkmcount();
runtime·mpreinit(mp);
// Add to runtime·allm so garbage collector doesn't free m
@@ -273,6 +333,7 @@ void
runtime·ready(G *gp)
{
// Mark runnable.
+ m->locks++; // disable preemption because it can be holding p in a local var
if(gp->status != Gwaiting) {
runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
runtime·throw("bad g->status in ready");
@@ -281,6 +342,9 @@ runtime·ready(G *gp)
runqput(m->p, gp);
if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0) // TODO: fast atomic
wakep();
+ m->locks--;
+ if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
}
int32
@@ -340,6 +404,34 @@ runtime·helpgc(int32 nproc)
runtime·unlock(&runtime·sched);
}
+// Similar to stoptheworld but best-effort and can be called several times.
+// There is no reverse operation, used during crashing.
+// This function must not lock any mutexes.
+void
+runtime·freezetheworld(void)
+{
+ int32 i;
+
+ if(runtime·gomaxprocs == 1)
+ return;
+ // stopwait and preemption requests can be lost
+ // due to races with concurrently executing threads,
+ // so try several times
+ for(i = 0; i < 5; i++) {
+ // this should tell the scheduler to not start any new goroutines
+ runtime·sched.stopwait = 0x7fffffff;
+ runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
+ // this should stop running goroutines
+ if(!preemptall())
+ break; // no running goroutines
+ runtime·usleep(1000);
+ }
+ // to be sure
+ runtime·usleep(1000);
+ preemptall();
+ runtime·usleep(1000);
+}
+
void
runtime·stoptheworld(void)
{
@@ -350,7 +442,8 @@ runtime·stoptheworld(void)
runtime·lock(&runtime·sched);
runtime·sched.stopwait = runtime·gomaxprocs;
- runtime·atomicstore((uint32*)&runtime·gcwaiting, 1);
+ runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
+ preemptall();
// stop current P
m->p->status = Pgcstop;
runtime·sched.stopwait--;
@@ -369,10 +462,16 @@ runtime·stoptheworld(void)
wait = runtime·sched.stopwait > 0;
runtime·unlock(&runtime·sched);
- // wait for remaining P's to stop voluntary
+ // wait for remaining P's to stop voluntarily
if(wait) {
- runtime·notesleep(&runtime·sched.stopnote);
- runtime·noteclear(&runtime·sched.stopnote);
+ for(;;) {
+ // wait for 100us, then try to re-preempt in case of any races
+ if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
+ runtime·noteclear(&runtime·sched.stopnote);
+ break;
+ }
+ preemptall();
+ }
}
if(runtime·sched.stopwait)
runtime·throw("stoptheworld: not stopped");
@@ -397,6 +496,7 @@ runtime·starttheworld(void)
G *gp;
bool add;
+ m->locks++; // disable preemption because it can be holding p in a local var
gp = runtime·netpoll(false); // non-blocking
injectglist(gp);
add = needaddgcproc();
@@ -406,7 +506,7 @@ runtime·starttheworld(void)
newprocs = 0;
} else
procresize(runtime·gomaxprocs);
- runtime·gcwaiting = 0;
+ runtime·sched.gcwaiting = 0;
p1 = nil;
while(p = pidleget()) {
@@ -416,16 +516,9 @@ runtime·starttheworld(void)
pidleput(p);
break;
}
- mp = mget();
- if(mp == nil) {
- p->link = p1;
- p1 = p;
- continue;
- }
- if(mp->nextp)
- runtime·throw("starttheworld: inconsistent mp->nextp");
- mp->nextp = p;
- runtime·notewakeup(&mp->park);
+ p->m = mget();
+ p->link = p1;
+ p1 = p;
}
if(runtime·sched.sysmonwait) {
runtime·sched.sysmonwait = false;
@@ -436,8 +529,18 @@ runtime·starttheworld(void)
while(p1) {
p = p1;
p1 = p1->link;
- add = false;
- newm(nil, p);
+ if(p->m) {
+ mp = p->m;
+ p->m = nil;
+ if(mp->nextp)
+ runtime·throw("starttheworld: inconsistent mp->nextp");
+ mp->nextp = p;
+ runtime·notewakeup(&mp->park);
+ } else {
+ // Start M to run P. Do not start another M below.
+ newm(nil, p);
+ add = false;
+ }
}
if(add) {
@@ -450,16 +553,23 @@ runtime·starttheworld(void)
// the maximum number of procs.
newm(mhelpgc, nil);
}
+ m->locks--;
+ if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
}
// Called to start an M.
void
runtime·mstart(void)
{
+#ifdef GOOS_windows
+#ifdef GOARCH_386
// It is used by windows-386 only. Unfortunately, seh needs
// to be located on os stack, and mstart runs on os stack
// for both m0 and m.
SEH seh;
+#endif
+#endif
if(g != m->g0)
runtime·throw("bad runtime·mstart");
@@ -468,18 +578,20 @@ runtime·mstart(void)
// Once we call schedule we're never coming back,
// so other calls can reuse this stack space.
runtime·gosave(&m->g0->sched);
- m->g0->sched.pc = (void*)-1; // make sure it is never used
+ m->g0->sched.pc = (uintptr)-1; // make sure it is never used
+ m->g0->stackguard = m->g0->stackguard0; // cgo sets only stackguard0, copy it to stackguard
+#ifdef GOOS_windows
+#ifdef GOARCH_386
m->seh = &seh;
+#endif
+#endif
runtime·asminit();
runtime·minit();
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
- if(m == &runtime·m0) {
+ if(m == &runtime·m0)
runtime·initsig();
- if(runtime·iscgo)
- runtime·newextram();
- }
if(m->mstartfn)
m->mstartfn();
@@ -541,6 +653,8 @@ runtime·allocm(P *p)
if(p == m->p)
releasep();
m->locks--;
+ if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
return mp;
}
@@ -581,12 +695,20 @@ static void unlockextra(M*);
//
// When the callback is done with the m, it calls dropm to
// put the m back on the list.
-#pragma textflag 7
+#pragma textflag NOSPLIT
void
runtime·needm(byte x)
{
M *mp;
+ if(runtime·needextram) {
+ // Can happen if C/C++ code calls Go from a global ctor.
+ // Can not throw, because scheduler is not initialized yet.
+ runtime·write(2, "fatal error: cgo callback before cgo call\n",
+ sizeof("fatal error: cgo callback before cgo call\n")-1);
+ runtime·exit(1);
+ }
+
// Lock extra list, take head, unlock popped list.
// nilokay=false is safe here because of the invariant above,
// that the extra list always contains or will soon contain
@@ -611,12 +733,17 @@ runtime·needm(byte x)
runtime·setmg(mp, mp->g0);
g->stackbase = (uintptr)(&x + 1024);
g->stackguard = (uintptr)(&x - 32*1024);
+ g->stackguard0 = g->stackguard;
+#ifdef GOOS_windows
+#ifdef GOARCH_386
// On windows/386, we need to put an SEH frame (two words)
- // somewhere on the current stack. We are called
- // from needm, and we know there is some available
- // space one word into the argument frame. Use that.
+ // somewhere on the current stack. We are called from cgocallback_gofunc
+ // and we know that it will leave two unused words below m->curg->sched.sp.
+ // Use those.
m->seh = (SEH*)((uintptr*)&x + 1);
+#endif
+#endif
// Initialize this thread to use the m.
runtime·asminit();
@@ -639,14 +766,22 @@ runtime·newextram(void)
// the goroutine stack ends.
mp = runtime·allocm(nil);
gp = runtime·malg(4096);
- gp->sched.pc = (void*)runtime·goexit;
+ gp->sched.pc = (uintptr)runtime·goexit;
gp->sched.sp = gp->stackbase;
+ gp->sched.lr = 0;
gp->sched.g = gp;
+ gp->syscallpc = gp->sched.pc;
+ gp->syscallsp = gp->sched.sp;
+ gp->syscallstack = gp->stackbase;
+ gp->syscallguard = gp->stackguard;
gp->status = Gsyscall;
mp->curg = gp;
mp->locked = LockInternal;
mp->lockedg = gp;
gp->lockedm = mp;
+ gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
+ if(raceenabled)
+ gp->racectx = runtime·racegostart(runtime·newextram);
// put on allg for garbage collector
runtime·lock(&runtime·sched);
if(runtime·lastg == nil)
@@ -655,9 +790,6 @@ runtime·newextram(void)
runtime·lastg->alllink = gp;
runtime·lastg = gp;
runtime·unlock(&runtime·sched);
- gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
- if(raceenabled)
- gp->racectx = runtime·racegostart(runtime·newextram);
// Add m to the extra list.
mnext = lockextra(true);
@@ -695,7 +827,12 @@ runtime·dropm(void)
// Undo whatever initialization minit did during needm.
runtime·unminit();
+
+#ifdef GOOS_windows
+#ifdef GOARCH_386
m->seh = nil; // reset dangling typed pointer
+#endif
+#endif
// Clear m and g, and return m to the extra list.
// After the call to setmg we can only call nosplit functions.
@@ -714,7 +851,7 @@ runtime·dropm(void)
// to runtime.extram. If nilokay is true, then lockextra will
// return a nil list head if that's what it finds. If nilokay is false,
// lockextra will keep waiting until the list head is no longer nil.
-#pragma textflag 7
+#pragma textflag NOSPLIT
static M*
lockextra(bool nilokay)
{
@@ -742,7 +879,7 @@ lockextra(bool nilokay)
return mp;
}
-#pragma textflag 7
+#pragma textflag NOSPLIT
static void
unlockextra(M *mp)
{
@@ -863,7 +1000,7 @@ handoffp(P *p)
return;
}
runtime·lock(&runtime·sched);
- if(runtime·gcwaiting) {
+ if(runtime·sched.gcwaiting) {
p->status = Pgcstop;
if(--runtime·sched.stopwait == 0)
runtime·notewakeup(&runtime·sched.stopnote);
@@ -911,7 +1048,7 @@ stoplockedm(void)
p = releasep();
handoffp(p);
}
- inclocked(1);
+ incidlelocked(1);
// Wait until another thread schedules lockedg again.
runtime·notesleep(&m->park);
runtime·noteclear(&m->park);
@@ -934,7 +1071,7 @@ startlockedm(G *gp)
if(mp->nextp)
runtime·throw("startlockedm: m has p");
// directly handoff current P to the locked m
- inclocked(-1);
+ incidlelocked(-1);
p = releasep();
mp->nextp = p;
runtime·notewakeup(&mp->park);
@@ -948,7 +1085,7 @@ gcstopm(void)
{
P *p;
- if(!runtime·gcwaiting)
+ if(!runtime·sched.gcwaiting)
runtime·throw("gcstopm: not waiting for gc");
if(m->spinning) {
m->spinning = false;
@@ -975,7 +1112,9 @@ execute(G *gp)
runtime·throw("execute: bad g status");
}
gp->status = Grunning;
- m->p->tick++;
+ gp->preempt = false;
+ gp->stackguard0 = gp->stackguard;
+ m->p->schedtick++;
m->curg = gp;
gp->m = m;
@@ -984,9 +1123,7 @@ execute(G *gp)
if(m->profilehz != hz)
runtime·resetcpuprofiler(hz);
- if(gp->sched.pc == (byte*)runtime·goexit) // kickoff
- runtime·gogocallfn(&gp->sched, gp->fnstart);
- runtime·gogo(&gp->sched, 0);
+ runtime·gogo(&gp->sched);
}
// Finds a runnable goroutine to execute.
@@ -999,7 +1136,7 @@ findrunnable(void)
int32 i;
top:
- if(runtime·gcwaiting) {
+ if(runtime·sched.gcwaiting) {
gcstopm();
goto top;
}
@@ -1010,7 +1147,7 @@ top:
// global runq
if(runtime·sched.runqsize) {
runtime·lock(&runtime·sched);
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime·unlock(&runtime·sched);
if(gp)
return gp;
@@ -1033,7 +1170,7 @@ top:
}
// random steal from other P's
for(i = 0; i < 2*runtime·gomaxprocs; i++) {
- if(runtime·gcwaiting)
+ if(runtime·sched.gcwaiting)
goto top;
p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
if(p == m->p)
@@ -1046,12 +1183,12 @@ top:
stop:
// return P and block
runtime·lock(&runtime·sched);
- if(runtime·gcwaiting) {
+ if(runtime·sched.gcwaiting) {
runtime·unlock(&runtime·sched);
goto top;
}
if(runtime·sched.runqsize) {
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime·unlock(&runtime·sched);
return gp;
}
@@ -1101,6 +1238,25 @@ stop:
goto top;
}
+static void
+resetspinning(void)
+{
+ int32 nmspinning;
+
+ if(m->spinning) {
+ m->spinning = false;
+ nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
+ if(nmspinning < 0)
+ runtime·throw("findrunnable: negative nmspinning");
+ } else
+ nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
+
+ // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+ // so see if we need to wakeup another P here.
+ if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
+ wakep();
+}
+
// Injects the list of runnable G's into the scheduler.
// Can run concurrently with GC.
static void
@@ -1130,33 +1286,44 @@ static void
schedule(void)
{
G *gp;
+ uint32 tick;
if(m->locks)
runtime·throw("schedule: holding locks");
top:
- if(runtime·gcwaiting) {
+ if(runtime·sched.gcwaiting) {
gcstopm();
goto top;
}
- gp = runqget(m->p);
- if(gp == nil)
- gp = findrunnable();
-
- if(m->spinning) {
- m->spinning = false;
- runtime·xadd(&runtime·sched.nmspinning, -1);
+ gp = nil;
+ // Check the global runnable queue once in a while to ensure fairness.
+ // Otherwise two goroutines can completely occupy the local runqueue
+ // by constantly respawning each other.
+ tick = m->p->schedtick;
+ // This is a fancy way to say tick%61==0,
+ // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
+ if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
+ runtime·lock(&runtime·sched);
+ gp = globrunqget(m->p, 1);
+ runtime·unlock(&runtime·sched);
+ if(gp)
+ resetspinning();
+ }
+ if(gp == nil) {
+ gp = runqget(m->p);
+ if(gp && m->spinning)
+ runtime·throw("schedule: spinning with local work");
+ }
+ if(gp == nil) {
+ gp = findrunnable(); // blocks until work is available
+ resetspinning();
}
-
- // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
- // so see if we need to wakeup another M here.
- if (m->p->runqhead != m->p->runqtail &&
- runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
- runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic
- wakep();
if(gp->lockedm) {
+ // Hands off own p to the locked m,
+ // then blocks waiting for a new p.
startlockedm(gp);
goto top;
}
@@ -1198,12 +1365,12 @@ park0(G *gp)
void
runtime·gosched(void)
{
- runtime·mcall(gosched0);
+ runtime·mcall(runtime·gosched0);
}
// runtime·gosched continuation on g0.
-static void
-gosched0(G *gp)
+void
+runtime·gosched0(G *gp)
{
gp->status = Grunnable;
gp->m = nil;
@@ -1219,6 +1386,10 @@ gosched0(G *gp)
}
// Finishes execution of the current goroutine.
+// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
+// Since it does not return it does not matter. But if it is preempted
+// at the split stack check, GC will complain about inconsistent sp.
+#pragma textflag NOSPLIT
void
runtime·goexit(void)
{
@@ -1232,13 +1403,12 @@ static void
goexit0(G *gp)
{
gp->status = Gdead;
- gp->fnstart = nil;
gp->m = nil;
gp->lockedm = nil;
m->curg = nil;
m->lockedg = nil;
if(m->locked & ~LockExternal) {
- runtime·printf("invalid m->locked = %d", m->locked);
+ runtime·printf("invalid m->locked = %d\n", m->locked);
runtime·throw("internal lockOSThread error");
}
m->locked = 0;
@@ -1247,6 +1417,18 @@ goexit0(G *gp)
schedule();
}
+#pragma textflag NOSPLIT
+static void
+save(void *pc, uintptr sp)
+{
+ g->sched.pc = (uintptr)pc;
+ g->sched.sp = sp;
+ g->sched.lr = 0;
+ g->sched.ret = 0;
+ g->sched.ctxt = 0;
+ g->sched.g = g;
+}
+
// The goroutine g is about to enter a system call.
// Record that it's not using the cpu anymore.
// This is called only from the go syscall library and cgocall,
@@ -1255,25 +1437,24 @@ goexit0(G *gp)
// Entersyscall cannot split the stack: the runtime·gosave must
// make g->sched refer to the caller's stack segment, because
// entersyscall is going to return immediately after.
-#pragma textflag 7
+#pragma textflag NOSPLIT
void
·entersyscall(int32 dummy)
{
- if(m->profilehz > 0)
- runtime·setprof(false);
+ // Disable preemption because during this function g is in Gsyscall status,
+ // but can have inconsistent g->sched, do not let GC observe it.
+ m->locks++;
- // Leave SP around for gc and traceback.
- g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
- g->sched.pc = runtime·getcallerpc(&dummy);
- g->sched.g = g;
- g->gcsp = g->sched.sp;
- g->gcpc = g->sched.pc;
- g->gcstack = g->stackbase;
- g->gcguard = g->stackguard;
+ // Leave SP around for GC and traceback.
+ save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+ g->syscallsp = g->sched.sp;
+ g->syscallpc = g->sched.pc;
+ g->syscallstack = g->stackbase;
+ g->syscallguard = g->stackguard;
g->status = Gsyscall;
- if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
+ if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
- // g->gcsp, g->gcguard-StackGuard, g->gcstack);
+ // g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
runtime·throw("entersyscall");
}
@@ -1284,100 +1465,96 @@ void
runtime·notewakeup(&runtime·sched.sysmonnote);
}
runtime·unlock(&runtime·sched);
- runtime·gosave(&g->sched); // re-save for traceback
+ save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
}
m->mcache = nil;
- m->p->tick++;
m->p->m = nil;
runtime·atomicstore(&m->p->status, Psyscall);
- if(runtime·gcwaiting) {
+ if(runtime·sched.gcwaiting) {
runtime·lock(&runtime·sched);
if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
if(--runtime·sched.stopwait == 0)
runtime·notewakeup(&runtime·sched.stopnote);
}
runtime·unlock(&runtime·sched);
- runtime·gosave(&g->sched); // re-save for traceback
+ save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
}
+
+ // Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
+ // We set stackguard to StackPreempt so that first split stack check calls morestack.
+ // Morestack detects this case and throws.
+ g->stackguard0 = StackPreempt;
+ m->locks--;
}
// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
-#pragma textflag 7
+#pragma textflag NOSPLIT
void
·entersyscallblock(int32 dummy)
{
P *p;
- if(m->profilehz > 0)
- runtime·setprof(false);
+ m->locks++; // see comment in entersyscall
- // Leave SP around for gc and traceback.
- g->sched.sp = (uintptr)runtime·getcallersp(&dummy);
- g->sched.pc = runtime·getcallerpc(&dummy);
- g->sched.g = g;
- g->gcsp = g->sched.sp;
- g->gcpc = g->sched.pc;
- g->gcstack = g->stackbase;
- g->gcguard = g->stackguard;
+ // Leave SP around for GC and traceback.
+ save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+ g->syscallsp = g->sched.sp;
+ g->syscallpc = g->sched.pc;
+ g->syscallstack = g->stackbase;
+ g->syscallguard = g->stackguard;
g->status = Gsyscall;
- if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
- // runtime·printf("entersyscallblock inconsistent %p [%p,%p]\n",
- // g->gcsp, g->gcguard-StackGuard, g->gcstack);
+ if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
+ // runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
+ // g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
runtime·throw("entersyscallblock");
}
p = releasep();
handoffp(p);
if(g->isbackground) // do not consider blocked scavenger for deadlock detection
- inclocked(1);
- runtime·gosave(&g->sched); // re-save for traceback
+ incidlelocked(1);
+
+ // Resave for traceback during blocked call.
+ save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+
+ g->stackguard0 = StackPreempt; // see comment in entersyscall
+ m->locks--;
}
// The goroutine g exited its system call.
// Arrange for it to run on a cpu again.
// This is called only from the go syscall library, not
// from the low-level system calls used by the runtime.
+#pragma textflag NOSPLIT
void
runtime·exitsyscall(void)
{
- P *p;
+ m->locks++; // see comment in entersyscall
- // Check whether the profiler needs to be turned on.
- if(m->profilehz > 0)
- runtime·setprof(true);
+ if(g->isbackground) // do not consider blocked scavenger for deadlock detection
+ incidlelocked(-1);
- // Try to re-acquire the last P.
- if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
+ if(exitsyscallfast()) {
// There's a cpu for us, so we can run.
- m->mcache = m->p->mcache;
- m->p->m = m;
- m->p->tick++;
+ m->p->syscalltick++;
g->status = Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack and gcsp.
- g->gcstack = (uintptr)nil;
- g->gcsp = (uintptr)nil;
+ g->syscallstack = (uintptr)nil;
+ g->syscallsp = (uintptr)nil;
+ m->locks--;
+ if(g->preempt) {
+ // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
+ } else {
+ // otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
+ g->stackguard0 = g->stackguard;
+ }
return;
}
- if(g->isbackground) // do not consider blocked scavenger for deadlock detection
- inclocked(-1);
- // Try to get any other idle P.
- m->p = nil;
- if(runtime·sched.pidle) {
- runtime·lock(&runtime·sched);
- p = pidleget();
- runtime·unlock(&runtime·sched);
- if(p) {
- acquirep(p);
- m->p->tick++;
- g->status = Grunning;
- g->gcstack = (uintptr)nil;
- g->gcsp = (uintptr)nil;
- return;
- }
- }
+ m->locks--;
// Call the scheduler.
runtime·mcall(exitsyscall0);
@@ -1388,8 +1565,46 @@ runtime·exitsyscall(void)
// Must wait until now because until gosched returns
// we don't know for sure that the garbage collector
// is not running.
- g->gcstack = (uintptr)nil;
- g->gcsp = (uintptr)nil;
+ g->syscallstack = (uintptr)nil;
+ g->syscallsp = (uintptr)nil;
+ m->p->syscalltick++;
+}
+
+#pragma textflag NOSPLIT
+static bool
+exitsyscallfast(void)
+{
+ P *p;
+
+ // Freezetheworld sets stopwait but does not retake P's.
+ if(runtime·sched.stopwait) {
+ m->p = nil;
+ return false;
+ }
+
+ // Try to re-acquire the last P.
+ if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
+ // There's a cpu for us, so we can run.
+ m->mcache = m->p->mcache;
+ m->p->m = m;
+ return true;
+ }
+ // Try to get any other idle P.
+ m->p = nil;
+ if(runtime·sched.pidle) {
+ runtime·lock(&runtime·sched);
+ p = pidleget();
+ if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
+ runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+ runtime·notewakeup(&runtime·sched.sysmonnote);
+ }
+ runtime·unlock(&runtime·sched);
+ if(p) {
+ acquirep(p);
+ return true;
+ }
+ }
+ return false;
}
// runtime·exitsyscall slow path on g0.
@@ -1406,6 +1621,10 @@ exitsyscall0(G *gp)
p = pidleget();
if(p == nil)
globrunqput(gp);
+ else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
+ runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+ runtime·notewakeup(&runtime·sched.sysmonnote);
+ }
runtime·unlock(&runtime·sched);
if(p) {
acquirep(p);
@@ -1420,6 +1639,29 @@ exitsyscall0(G *gp)
schedule(); // Never returns.
}
+// Called from syscall package before fork.
+void
+syscall·runtime_BeforeFork(void)
+{
+ // Fork can hang if preempted with signals frequently enough (see issue 5517).
+ // Ensure that we stay on the same M where we disable profiling.
+ m->locks++;
+ if(m->profilehz != 0)
+ runtime·resetcpuprofiler(0);
+}
+
+// Called from syscall package after fork in parent.
+void
+syscall·runtime_AfterFork(void)
+{
+ int32 hz;
+
+ hz = runtime·sched.profilehz;
+ if(hz != 0)
+ runtime·resetcpuprofiler(hz);
+ m->locks--;
+}
+
// Hook used by runtime·malg to call runtime·stackalloc on the
// scheduler stack. This exists because runtime·stackalloc insists
// on being called on the scheduler stack, to avoid trying to grow
@@ -1428,7 +1670,7 @@ static void
mstackalloc(G *gp)
{
gp->param = runtime·stackalloc((uintptr)gp->param);
- runtime·gogo(&gp->sched, 0);
+ runtime·gogo(&gp->sched);
}
// Allocate a new g, with a stack big enough for stacksize bytes.
@@ -1455,8 +1697,10 @@ runtime·malg(int32 stacksize)
stk = g->param;
g->param = nil;
}
+ newg->stacksize = StackSystem + stacksize;
newg->stack0 = (uintptr)stk;
newg->stackguard = (uintptr)stk + StackGuard;
+ newg->stackguard0 = newg->stackguard;
newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
}
@@ -1470,7 +1714,7 @@ runtime·malg(int32 stacksize)
// are available sequentially after &fn; they would not be
// copied if a stack split occurred. It's OK for this to call
// functions that split the stack.
-#pragma textflag 7
+#pragma textflag NOSPLIT
void
runtime·newproc(int32 siz, FuncVal* fn, ...)
{
@@ -1494,7 +1738,8 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp
G *newg;
int32 siz;
-//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
+//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
+ m->locks++; // disable preemption because it can be holding p in a local var
siz = narg + nret;
siz = (siz+7) & ~7;
@@ -1528,19 +1773,24 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp
*(void**)sp = nil;
}
+ runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
newg->sched.sp = (uintptr)sp;
- newg->sched.pc = (byte*)runtime·goexit;
+ newg->sched.pc = (uintptr)runtime·goexit;
newg->sched.g = newg;
- newg->fnstart = fn;
+ runtime·gostartcallfn(&newg->sched, fn);
newg->gopc = (uintptr)callerpc;
newg->status = Grunnable;
newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
+ newg->panicwrap = 0;
if(raceenabled)
- newg->racectx = runtime·racegostart(callerpc);
+ newg->racectx = runtime·racegostart((void*)callerpc);
runqput(m->p, newg);
if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic
wakep();
+ m->locks--;
+ if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
+ g->stackguard0 = StackPreempt;
return newg;
}
@@ -1641,7 +1891,7 @@ runtime·gomaxprocsfunc(int32 n)
}
runtime·unlock(&runtime·sched);
- runtime·semacquire(&runtime·worldsema);
+ runtime·semacquire(&runtime·worldsema, false);
m->gcing = 1;
runtime·stoptheworld();
newprocs = n;
@@ -1652,8 +1902,12 @@ runtime·gomaxprocsfunc(int32 n)
return ret;
}
+// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
+// after they modify m->locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
+#pragma textflag NOSPLIT
static void
-LockOSThread(void)
+lockOSThread(void)
{
m->lockedg = g;
g->lockedm = m;
@@ -1663,18 +1917,23 @@ void
runtime·LockOSThread(void)
{
m->locked |= LockExternal;
- LockOSThread();
+ lockOSThread();
}
void
runtime·lockOSThread(void)
{
m->locked += LockInternal;
- LockOSThread();
+ lockOSThread();
}
+
+// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
+#pragma textflag NOSPLIT
static void
-UnlockOSThread(void)
+unlockOSThread(void)
{
if(m->locked != 0)
return;
@@ -1686,7 +1945,7 @@ void
runtime·UnlockOSThread(void)
{
m->locked &= ~LockExternal;
- UnlockOSThread();
+ unlockOSThread();
}
void
@@ -1695,7 +1954,7 @@ runtime·unlockOSThread(void)
if(m->locked < LockInternal)
runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
m->locked -= LockInternal;
- UnlockOSThread();
+ unlockOSThread();
}
bool
@@ -1712,14 +1971,6 @@ runtime·golockedOSThread(bool ret)
FLUSH(&ret);
}
-// for testing of wire, unwire
-void
-runtime·mid(uint32 ret)
-{
- ret = m->id;
- FLUSH(&ret);
-}
-
void
runtime·NumGoroutine(intgo ret)
{
@@ -1755,17 +2006,25 @@ runtime·mcount(void)
}
void
-runtime·badmcall(void) // called from assembly
+runtime·badmcall(void (*fn)(G*)) // called from assembly
{
+ USED(fn); // TODO: print fn?
runtime·throw("runtime: mcall called on m->g0 stack");
}
void
-runtime·badmcall2(void) // called from assembly
+runtime·badmcall2(void (*fn)(G*)) // called from assembly
{
+ USED(fn);
runtime·throw("runtime: mcall function returned");
}
+void
+runtime·badreflectcall(void) // called from assembly
+{
+ runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
+}
+
static struct {
Lock;
void (*fn)(uintptr*, int32);
@@ -1773,26 +2032,126 @@ static struct {
uintptr pcbuf[100];
} prof;
+static void
+System(void)
+{
+}
+
// Called if we receive a SIGPROF signal.
void
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
{
int32 n;
+ bool traceback;
- // Windows does profiling in a dedicated thread w/o m.
- if(!Windows && (m == nil || m->mcache == nil))
- return;
if(prof.fn == nil || prof.hz == 0)
return;
+ traceback = true;
+ // Windows does profiling in a dedicated thread w/o m.
+ if(!Windows && (m == nil || m->mcache == nil))
+ traceback = false;
+
+ // Define that a "user g" is a user-created goroutine, and a "system g"
+ // is one that is m->g0 or m->gsignal. We've only made sure that we
+ // can unwind user g's, so exclude the system g's.
+ //
+ // It is not quite as easy as testing gp == m->curg (the current user g)
+ // because we might be interrupted for profiling halfway through a
+ // goroutine switch. The switch involves updating three (or four) values:
+ // g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
+ // because once it gets updated the new g is running.
+ //
+ // When switching from a user g to a system g, LR is not considered live,
+ // so the update only affects g, SP, and PC. Since PC must be last, there
+ // the possible partial transitions in ordinary execution are (1) g alone is updated,
+ // (2) both g and SP are updated, and (3) SP alone is updated.
+ // If g is updated, we'll see a system g and not look closer.
+ // If SP alone is updated, we can detect the partial transition by checking
+ // whether the SP is within g's stack bounds. (We could also require that SP
+ // be changed only after g, but the stack bounds check is needed by other
+ // cases, so there is no need to impose an additional requirement.)
+ //
+ // There is one exceptional transition to a system g, not in ordinary execution.
+ // When a signal arrives, the operating system starts the signal handler running
+ // with an updated PC and SP. The g is updated last, at the beginning of the
+ // handler. There are two reasons this is okay. First, until g is updated the
+ // g and SP do not match, so the stack bounds check detects the partial transition.
+ // Second, signal handlers currently run with signals disabled, so a profiling
+ // signal cannot arrive during the handler.
+ //
+ // When switching from a system g to a user g, there are three possibilities.
+ //
+ // First, it may be that the g switch has no PC update, because the SP
+ // either corresponds to a user g throughout (as in runtime.asmcgocall)
+ // or because it has been arranged to look like a user g frame
+ // (as in runtime.cgocallback_gofunc). In this case, since the entire
+ // transition is a g+SP update, a partial transition updating just one of
+ // those will be detected by the stack bounds check.
+ //
+ // Second, when returning from a signal handler, the PC and SP updates
+ // are performed by the operating system in an atomic update, so the g
+ // update must be done before them. The stack bounds check detects
+ // the partial transition here, and (again) signal handlers run with signals
+ // disabled, so a profiling signal cannot arrive then anyway.
+ //
+ // Third, the common case: it may be that the switch updates g, SP, and PC
+ // separately, as in runtime.gogo.
+ //
+ // Because runtime.gogo is the only instance, we check whether the PC lies
+ // within that function, and if so, not ask for a traceback. This approach
+ // requires knowing the size of the runtime.gogo function, which we
+ // record in arch_*.h and check in runtime_test.go.
+ //
+ // There is another apparently viable approach, recorded here in case
+ // the "PC within runtime.gogo" check turns out not to be usable.
+ // It would be possible to delay the update of either g or SP until immediately
+ // before the PC update instruction. Then, because of the stack bounds check,
+ // the only problematic interrupt point is just before that PC update instruction,
+ // and the sigprof handler can detect that instruction and simulate stepping past
+ // it in order to reach a consistent state. On ARM, the update of g must be made
+ // in two places (in R10 and also in a TLS slot), so the delayed update would
+ // need to be the SP update. The sigprof handler must read the instruction at
+ // the current PC and if it was the known instruction (for example, JMP BX or
+ // MOV R2, PC), use that other register in place of the PC value.
+ // The biggest drawback to this solution is that it requires that we can tell
+ // whether it's safe to read from the memory pointed at by PC.
+ // In a correct program, we can test PC == nil and otherwise read,
+ // but if a profiling signal happens at the instant that a program executes
+ // a bad jump (before the program manages to handle the resulting fault)
+ // the profiling handler could fault trying to read nonexistent memory.
+ //
+ // To recap, there are no constraints on the assembly being used for the
+ // transition. We simply require that g and SP match and that the PC is not
+ // in runtime.gogo.
+ //
+ // On Windows, one m is sending reports about all the g's, so gp == m->curg
+ // is not a useful comparison. The profilem function in os_windows.c has
+ // already checked that gp is a user g.
+ if(gp == nil ||
+ (!Windows && gp != m->curg) ||
+ (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
+ ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
+ traceback = false;
+
+ // Race detector calls asmcgocall w/o entersyscall/exitsyscall,
+ // we can not currently unwind through asmcgocall.
+ if(m != nil && m->racecall)
+ traceback = false;
runtime·lock(&prof);
if(prof.fn == nil) {
runtime·unlock(&prof);
return;
}
- n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil);
- if(n > 0)
- prof.fn(prof.pcbuf, n);
+ n = 0;
+ if(traceback)
+ n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
+ if(!traceback || n <= 0) {
+ n = 2;
+ prof.pcbuf[0] = (uintptr)pc;
+ prof.pcbuf[1] = (uintptr)System + 1;
+ }
+ prof.fn(prof.pcbuf, n);
runtime·unlock(&prof);
}
@@ -1808,7 +2167,11 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
if(fn == nil)
hz = 0;
- // Stop profiler on this cpu so that it is safe to lock prof.
+ // Disable preemption, otherwise we can be rescheduled to another thread
+ // that has profiling enabled.
+ m->locks++;
+
+ // Stop profiler on this thread so that it is safe to lock prof.
// if a profiling signal came in while we had prof locked,
// it would deadlock.
runtime·resetcpuprofiler(0);
@@ -1823,6 +2186,8 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
if(hz != 0)
runtime·resetcpuprofiler(hz);
+
+ m->locks--;
}
// Change number of processors. The world is stopped, sched is locked.
@@ -1840,7 +2205,8 @@ procresize(int32 new)
for(i = 0; i < new; i++) {
p = runtime·allp[i];
if(p == nil) {
- p = (P*)runtime·mallocgc(sizeof(*p), 0, 0, 1);
+ p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
+ p->id = i;
p->status = Pgcstop;
runtime·atomicstorep(&runtime·allp[i], p);
}
@@ -1852,7 +2218,7 @@ procresize(int32 new)
}
if(p->runq == nil) {
p->runqsize = 128;
- p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
+ p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
}
}
@@ -1895,7 +2261,6 @@ procresize(int32 new)
p->status = Pidle;
pidleput(p);
}
- runtime·singleproc = new == 1;
runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
}
@@ -1937,10 +2302,10 @@ releasep(void)
}
static void
-inclocked(int32 v)
+incidlelocked(int32 v)
{
runtime·lock(&runtime·sched);
- runtime·sched.mlocked += v;
+ runtime·sched.nmidlelocked += v;
if(v > 0)
checkdead();
runtime·unlock(&runtime·sched);
@@ -1955,12 +2320,12 @@ checkdead(void)
int32 run, grunning, s;
// -1 for sysmon
- run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.mlocked - 1;
+ run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
if(run > 0)
return;
if(run < 0) {
- runtime·printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
- runtime·sched.nmidle, runtime·sched.mlocked, runtime·sched.mcount);
+ runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
+ runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
runtime·throw("checkdead: inconsistent counts");
}
grunning = 0;
@@ -1985,10 +2350,10 @@ static void
sysmon(void)
{
uint32 idle, delay;
- int64 now, lastpoll;
+ int64 now, lastpoll, lasttrace;
G *gp;
- uint32 ticks[MaxGomaxprocs];
+ lasttrace = 0;
idle = 0; // how many cycles in succession we had not wokeup somebody
delay = 0;
for(;;) {
@@ -1999,9 +2364,10 @@ sysmon(void)
if(delay > 10*1000) // up to 10ms
delay = 10*1000;
runtime·usleep(delay);
- if(runtime·gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) { // TODO: fast atomic
+ if(runtime·debug.schedtrace <= 0 &&
+ (runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) { // TODO: fast atomic
runtime·lock(&runtime·sched);
- if(runtime·atomicload(&runtime·gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
+ if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
runtime·atomicstore(&runtime·sched.sysmonwait, 1);
runtime·unlock(&runtime·sched);
runtime·notesleep(&runtime·sched.sysmonnote);
@@ -2014,51 +2380,231 @@ sysmon(void)
// poll network if not polled for more than 10ms
lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
now = runtime·nanotime();
- if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
+ if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
+ runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
gp = runtime·netpoll(false); // non-blocking
- injectglist(gp);
+ if(gp) {
+ // Need to decrement number of idle locked M's
+ // (pretending that one more is running) before injectglist.
+ // Otherwise it can lead to the following situation:
+ // injectglist grabs all P's but before it starts M's to run the P's,
+ // another M returns from syscall, finishes running its G,
+ // observes that there is no work to do and no other running M's
+ // and reports deadlock.
+ incidlelocked(-1);
+ injectglist(gp);
+ incidlelocked(1);
+ }
}
// retake P's blocked in syscalls
- if(retake(ticks))
+ // and preempt long running G's
+ if(retake(now))
idle = 0;
else
idle++;
+
+ if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
+ lasttrace = now;
+ runtime·schedtrace(runtime·debug.scheddetail);
+ }
}
}
+typedef struct Pdesc Pdesc;
+struct Pdesc
+{
+ uint32 schedtick;
+ int64 schedwhen;
+ uint32 syscalltick;
+ int64 syscallwhen;
+};
+static Pdesc pdesc[MaxGomaxprocs];
+
static uint32
-retake(uint32 *ticks)
+retake(int64 now)
{
uint32 i, s, n;
int64 t;
P *p;
+ Pdesc *pd;
n = 0;
for(i = 0; i < runtime·gomaxprocs; i++) {
p = runtime·allp[i];
if(p==nil)
continue;
- t = p->tick;
- if(ticks[i] != t) {
- ticks[i] = t;
- continue;
- }
+ pd = &pdesc[i];
s = p->status;
- if(s != Psyscall)
+ if(s == Psyscall) {
+ // Retake P from syscall if it's there for more than 1 sysmon tick (20us).
+ // But only if there is other work to do.
+ t = p->syscalltick;
+ if(pd->syscalltick != t) {
+ pd->syscalltick = t;
+ pd->syscallwhen = now;
+ continue;
+ }
+ if(p->runqhead == p->runqtail &&
+ runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0)
+ continue;
+ // Need to decrement number of idle locked M's
+ // (pretending that one more is running) before the CAS.
+ // Otherwise the M from which we retake can exit the syscall,
+ // increment nmidle and report deadlock.
+ incidlelocked(-1);
+ if(runtime·cas(&p->status, s, Pidle)) {
+ n++;
+ handoffp(p);
+ }
+ incidlelocked(1);
+ } else if(s == Prunning) {
+ // Preempt G if it's running for more than 10ms.
+ t = p->schedtick;
+ if(pd->schedtick != t) {
+ pd->schedtick = t;
+ pd->schedwhen = now;
+ continue;
+ }
+ if(pd->schedwhen + 10*1000*1000 > now)
+ continue;
+ preemptone(p);
+ }
+ }
+ return n;
+}
+
+// Tell all goroutines that they have been preempted and they should stop.
+// This function is purely best-effort. It can fail to inform a goroutine if a
+// processor just started running it.
+// No locks need to be held.
+// Returns true if preemption request was issued to at least one goroutine.
+static bool
+preemptall(void)
+{
+ P *p;
+ int32 i;
+ bool res;
+
+ res = false;
+ for(i = 0; i < runtime·gomaxprocs; i++) {
+ p = runtime·allp[i];
+ if(p == nil || p->status != Prunning)
continue;
- if(p->runqhead == p->runqtail && runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic
+ res |= preemptone(p);
+ }
+ return res;
+}
+
+// Tell the goroutine running on processor P to stop.
+// This function is purely best-effort. It can incorrectly fail to inform the
+// goroutine. It can send inform the wrong goroutine. Even if it informs the
+// correct goroutine, that goroutine might ignore the request if it is
+// simultaneously executing runtime·newstack.
+// No lock needs to be held.
+// Returns true if preemption request was issued.
+static bool
+preemptone(P *p)
+{
+ M *mp;
+ G *gp;
+
+ mp = p->m;
+ if(mp == nil || mp == m)
+ return false;
+ gp = mp->curg;
+ if(gp == nil || gp == mp->g0)
+ return false;
+ gp->preempt = true;
+ gp->stackguard0 = StackPreempt;
+ return true;
+}
+
+void
+runtime·schedtrace(bool detailed)
+{
+ static int64 starttime;
+ int64 now;
+ int64 id1, id2, id3;
+ int32 i, q, t, h, s;
+ int8 *fmt;
+ M *mp, *lockedm;
+ G *gp, *lockedg;
+ P *p;
+
+ now = runtime·nanotime();
+ if(starttime == 0)
+ starttime = now;
+
+ runtime·lock(&runtime·sched);
+ runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
+ (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
+ runtime·sched.nmidle, runtime·sched.runqsize);
+ if(detailed) {
+ runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
+ runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
+ runtime·sched.stopwait, runtime·sched.sysmonwait);
+ }
+ // We must be careful while reading data from P's, M's and G's.
+ // Even if we hold schedlock, most data can be changed concurrently.
+ // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
+ for(i = 0; i < runtime·gomaxprocs; i++) {
+ p = runtime·allp[i];
+ if(p == nil)
continue;
- // Need to increment number of locked M's before the CAS.
- // Otherwise the M from which we retake can exit the syscall,
- // increment nmidle and report deadlock.
- inclocked(-1);
- if(runtime·cas(&p->status, s, Pidle)) {
- n++;
- handoffp(p);
+ mp = p->m;
+ t = p->runqtail;
+ h = p->runqhead;
+ s = p->runqsize;
+ q = t - h;
+ if(q < 0)
+ q += s;
+ if(detailed)
+ runtime·printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
+ i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
+ else {
+ // In non-detailed mode format lengths of per-P run queues as:
+ // [len1 len2 len3 len4]
+ fmt = " %d";
+ if(runtime·gomaxprocs == 1)
+ fmt = " [%d]\n";
+ else if(i == 0)
+ fmt = " [%d";
+ else if(i == runtime·gomaxprocs-1)
+ fmt = " %d]\n";
+ runtime·printf(fmt, q);
}
- inclocked(1);
}
- return n;
+ if(!detailed) {
+ runtime·unlock(&runtime·sched);
+ return;
+ }
+ for(mp = runtime·allm; mp; mp = mp->alllink) {
+ p = mp->p;
+ gp = mp->curg;
+ lockedg = mp->lockedg;
+ id1 = -1;
+ if(p)
+ id1 = p->id;
+ id2 = -1;
+ if(gp)
+ id2 = gp->goid;
+ id3 = -1;
+ if(lockedg)
+ id3 = lockedg->goid;
+ runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
+ " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
+ mp->id, id1, id2,
+ mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
+ mp->spinning, id3);
+ }
+ for(gp = runtime·allg; gp; gp = gp->alllink) {
+ mp = gp->m;
+ lockedm = gp->lockedm;
+ runtime·printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
+ gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
+ lockedm ? lockedm->id : -1);
+ }
+ runtime·unlock(&runtime·sched);
}
// Put mp on midle list.
@@ -2103,7 +2649,7 @@ globrunqput(G *gp)
// Try get a batch of G's from the global runnable queue.
// Sched must be locked.
static G*
-globrunqget(P *p)
+globrunqget(P *p, int32 max)
{
G *gp, *gp1;
int32 n;
@@ -2113,6 +2659,8 @@ globrunqget(P *p)
n = runtime·sched.runqsize/runtime·gomaxprocs+1;
if(n > runtime·sched.runqsize)
n = runtime·sched.runqsize;
+ if(max > 0 && n > max)
+ n = max;
runtime·sched.runqsize -= n;
if(runtime·sched.runqsize == 0)
runtime·sched.runqtail = nil;
@@ -2365,3 +2913,48 @@ runtime·testSchedLocalQueueSteal(void)
}
}
+extern void runtime·morestack(void);
+
+// Does f mark the top of a goroutine stack?
+bool
+runtime·topofstack(Func *f)
+{
+ return f->entry == (uintptr)runtime·goexit ||
+ f->entry == (uintptr)runtime·mstart ||
+ f->entry == (uintptr)runtime·mcall ||
+ f->entry == (uintptr)runtime·morestack ||
+ f->entry == (uintptr)runtime·lessstack ||
+ f->entry == (uintptr)_rt0_go;
+}
+
+void
+runtime∕debug·setMaxThreads(intgo in, intgo out)
+{
+ runtime·lock(&runtime·sched);
+ out = runtime·sched.maxmcount;
+ runtime·sched.maxmcount = in;
+ checkmcount();
+ runtime·unlock(&runtime·sched);
+ FLUSH(&out);
+}
+
+static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
+
+static bool
+haveexperiment(int8 *name)
+{
+ int32 i, j;
+
+ for(i=0; i<sizeof(experiment); i++) {
+ if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
+ for(j=0; name[j]; j++)
+ if(experiment[i+j] != name[j])
+ goto nomatch;
+ if(experiment[i+j] != '\0' && experiment[i+j] != ',')
+ goto nomatch;
+ return 1;
+ }
+ nomatch:;
+ }
+ return 0;
+}