diff options
Diffstat (limited to 'src/pkg/runtime/proc.c')
| -rw-r--r-- | src/pkg/runtime/proc.c | 717 | 
1 files changed, 450 insertions, 267 deletions
| diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index ed3e1e73e..914a02e0b 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -58,17 +58,23 @@ struct Sched {  	int32	profilehz;	// cpu profiling rate  }; -// The max value of GOMAXPROCS. -// There are no fundamental restrictions on the value. -enum { MaxGomaxprocs = 1<<8 }; +enum +{ +	// The max value of GOMAXPROCS. +	// There are no fundamental restrictions on the value. +	MaxGomaxprocs = 1<<8, + +	// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once. +	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. +	GoidCacheBatch = 16, +};  Sched	runtime·sched;  int32	runtime·gomaxprocs;  uint32	runtime·needextram;  bool	runtime·iscgo;  M	runtime·m0; -G	runtime·g0;	 // idle goroutine for m0 -G*	runtime·allg; +G	runtime·g0;	// idle goroutine for m0  G*	runtime·lastg;  M*	runtime·allm;  M*	runtime·extram; @@ -76,10 +82,15 @@ int8*	runtime·goos;  int32	runtime·ncpu;  static int32	newprocs; +static	Lock allglock;	// the following vars are protected by this lock or by stoptheworld +G**	runtime·allg; +uintptr runtime·allglen; +static	uintptr allgcap; +  void runtime·mstart(void);  static void runqput(P*, G*);  static G* runqget(P*); -static void runqgrow(P*); +static bool runqputslow(P*, G*, uint32, uint32);  static G* runqsteal(P*, P*);  static void mput(M*);  static M* mget(void); @@ -106,6 +117,7 @@ static void gfput(P*, G*);  static G* gfget(P*);  static void gfpurge(P*);  static void globrunqput(G*); +static void globrunqputbatch(G*, G*, int32);  static G* globrunqget(P*, int32);  static P* pidleget(void);  static void pidleput(P*); @@ -114,6 +126,7 @@ static bool preemptall(void);  static bool preemptone(P*);  static bool exitsyscallfast(void);  static bool haveexperiment(int8*); +static void allgadd(G*);  // The bootstrap sequence is:  // @@ -131,9 +144,9 @@ runtime·schedinit(void)  	Eface i;  	runtime·sched.maxmcount = 10000; -	runtime·precisestack = haveexperiment("precisestack"); +	runtime·precisestack = true; // haveexperiment("precisestack"); -	runtime·mprofinit(); +	runtime·symtabinit();  	runtime·mallocinit();  	mcommoninit(m); @@ -142,14 +155,15 @@ runtime·schedinit(void)  	// in a fault during a garbage collection, it will not  	// need to allocated memory.  	runtime·newErrorCString(0, &i); +	 +	// Initialize the cached gotraceback value, since +	// gotraceback calls getenv, which mallocs on Plan 9. +	runtime·gotraceback(nil);  	runtime·goargs();  	runtime·goenvs();  	runtime·parsedebugvars(); -	// Allocate internal symbol table representation now, we need it for GC anyway. -	runtime·symtabinit(); -  	runtime·sched.lastpoll = runtime·nanotime();  	procs = 1;  	p = runtime·getenv("GOMAXPROCS"); @@ -161,6 +175,11 @@ runtime·schedinit(void)  	runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));  	procresize(procs); +	runtime·copystack = runtime·precisestack; +	p = runtime·getenv("GOCOPYSTACK"); +	if(p != nil && !runtime·strcmp(p, (byte*)"0")) +		runtime·copystack = false; +  	mstats.enablegc = 1;  	if(raceenabled) @@ -175,6 +194,15 @@ static FuncVal scavenger = {runtime·MHeap_Scavenger};  static FuncVal initDone = { runtime·unlockOSThread };  // The main goroutine. +// Note: C frames in general are not copyable during stack growth, for two reasons: +//   1) We don't know where in a frame to find pointers to other stack locations. +//   2) There's no guarantee that globals or heap values do not point into the frame. +// +// The C frame for runtime.main is copyable, because: +//   1) There are no pointers to other stack locations in the frame +//      (d.fn points at a global, d.link is nil, d.argp is -1). +//   2) The only pointer into this frame is from the defer chain, +//      which is explicitly handled during stack copying.  void  runtime·main(void)  { @@ -202,9 +230,8 @@ runtime·main(void)  	d.fn = &initDone;  	d.siz = 0;  	d.link = g->defer; -	d.argp = (void*)-1; +	d.argp = NoArgs;  	d.special = true; -	d.free = false;  	g->defer = &d;  	if(m != &runtime·m0) @@ -237,6 +264,7 @@ void  runtime·goroutineheader(G *gp)  {  	int8 *status; +	int64 waitfor;  	switch(gp->status) {  	case Gidle: @@ -261,7 +289,16 @@ runtime·goroutineheader(G *gp)  		status = "???";  		break;  	} -	runtime·printf("goroutine %D [%s]:\n", gp->goid, status); + +	// approx time the G is blocked, in minutes +	waitfor = 0; +	if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0) +		waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000); + +	if(waitfor < 1) +		runtime·printf("goroutine %D [%s]:\n", gp->goid, status); +	else +		runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);  }  void @@ -269,6 +306,7 @@ runtime·tracebackothers(G *me)  {  	G *gp;  	int32 traceback; +	uintptr i;  	traceback = runtime·gotraceback(nil); @@ -279,7 +317,9 @@ runtime·tracebackothers(G *me)  		runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);  	} -	for(gp = runtime·allg; gp != nil; gp = gp->alllink) { +	runtime·lock(&allglock); +	for(i = 0; i < runtime·allglen; i++) { +		gp = runtime·allg[i];  		if(gp == me || gp == m->curg || gp->status == Gdead)  			continue;  		if(gp->issystem && traceback < 2) @@ -292,6 +332,7 @@ runtime·tracebackothers(G *me)  		} else  			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);  	} +	runtime·unlock(&allglock);  }  static void @@ -562,15 +603,6 @@ runtime·starttheworld(void)  void  runtime·mstart(void)  { -#ifdef GOOS_windows -#ifdef GOARCH_386 -	// It is used by windows-386 only. Unfortunately, seh needs -	// to be located on os stack, and mstart runs on os stack -	// for both m0 and m. -	SEH seh; -#endif -#endif -  	if(g != m->g0)  		runtime·throw("bad runtime·mstart"); @@ -580,11 +612,6 @@ runtime·mstart(void)  	runtime·gosave(&m->g0->sched);  	m->g0->sched.pc = (uintptr)-1;  // make sure it is never used  	m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard -#ifdef GOOS_windows -#ifdef GOARCH_386 -	m->seh = &seh; -#endif -#endif  	runtime·asminit();  	runtime·minit(); @@ -620,6 +647,7 @@ struct CgoThreadStart  {  	M *m;  	G *g; +	uintptr *tls;  	void (*fn)(void);  }; @@ -643,9 +671,9 @@ runtime·allocm(P *p)  	mp = runtime·cnew(mtype);  	mcommoninit(mp); -	// In case of cgo, pthread_create will make us a stack. +	// In case of cgo or Solaris, pthread_create will make us a stack.  	// Windows will layout sched stack on OS stack. -	if(runtime·iscgo || Windows) +	if(runtime·iscgo || Solaris || Windows)  		mp->g0 = runtime·malg(-1);  	else  		mp->g0 = runtime·malg(8192); @@ -659,6 +687,21 @@ runtime·allocm(P *p)  	return mp;  } +static G* +allocg(void) +{ +	G *gp; +	static Type *gtype; +	 +	if(gtype == nil) { +		Eface e; +		runtime·gc_g_ptr(&e); +		gtype = ((PtrType*)e.type)->elem; +	} +	gp = runtime·cnew(gtype); +	return gp; +} +  static M* lockextra(bool nilokay);  static void unlockextra(M*); @@ -735,16 +778,6 @@ runtime·needm(byte x)  	g->stackguard = (uintptr)(&x - 32*1024);  	g->stackguard0 = g->stackguard; -#ifdef GOOS_windows -#ifdef GOARCH_386 -	// On windows/386, we need to put an SEH frame (two words) -	// somewhere on the current stack. We are called from cgocallback_gofunc -	// and we know that it will leave two unused words below m->curg->sched.sp. -	// Use those. -	m->seh = (SEH*)((uintptr*)&x + 1); -#endif -#endif -  	// Initialize this thread to use the m.  	runtime·asminit();  	runtime·minit(); @@ -783,13 +816,7 @@ runtime·newextram(void)  	if(raceenabled)  		gp->racectx = runtime·racegostart(runtime·newextram);  	// put on allg for garbage collector -	runtime·lock(&runtime·sched); -	if(runtime·lastg == nil) -		runtime·allg = gp; -	else -		runtime·lastg->alllink = gp; -	runtime·lastg = gp; -	runtime·unlock(&runtime·sched); +	allgadd(gp);  	// Add m to the extra list.  	mnext = lockextra(true); @@ -828,12 +855,6 @@ runtime·dropm(void)  	// Undo whatever initialization minit did during needm.  	runtime·unminit(); -#ifdef GOOS_windows -#ifdef GOARCH_386 -	m->seh = nil;  // reset dangling typed pointer -#endif -#endif -  	// Clear m and g, and return m to the extra list.  	// After the call to setmg we can only call nosplit functions.  	mp = m; @@ -904,6 +925,7 @@ newm(void(*fn)(void), P *p)  			runtime·throw("_cgo_thread_start missing");  		ts.m = mp;  		ts.g = mp->g0; +		ts.tls = mp->tls;  		ts.fn = runtime·mstart;  		runtime·asmcgocall(_cgo_thread_start, &ts);  		return; @@ -948,7 +970,7 @@ mspinning(void)  }  // Schedules some M to run the p (creates an M if necessary). -// If p==nil, tries to get an idle P, if no idle P's returns false. +// If p==nil, tries to get an idle P, if no idle P's does nothing.  static void  startm(P *p, bool spinning)  { @@ -1112,6 +1134,7 @@ execute(G *gp)  		runtime·throw("execute: bad g status");  	}  	gp->status = Grunning; +	gp->waitsince = 0;  	gp->preempt = false;  	gp->stackguard0 = gp->stackguard;  	m->p->schedtick++; @@ -1140,6 +1163,8 @@ top:  		gcstopm();  		goto top;  	} +	if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil) +		runtime·ready(gp);  	// local runq  	gp = runqget(m->p);  	if(gp) @@ -1331,28 +1356,52 @@ top:  	execute(gp);  } -// Puts the current goroutine into a waiting state and unlocks the lock. -// The goroutine can be made runnable again by calling runtime·ready(gp). +// Puts the current goroutine into a waiting state and calls unlockf. +// If unlockf returns false, the goroutine is resumed.  void -runtime·park(void(*unlockf)(Lock*), Lock *lock, int8 *reason) +runtime·park(bool(*unlockf)(G*, void*), void *lock, int8 *reason)  { +	if(g->status != Grunning) +		runtime·throw("bad g status");  	m->waitlock = lock;  	m->waitunlockf = unlockf;  	g->waitreason = reason;  	runtime·mcall(park0);  } +static bool +parkunlock(G *gp, void *lock) +{ +	USED(gp); +	runtime·unlock(lock); +	return true; +} + +// Puts the current goroutine into a waiting state and unlocks the lock. +// The goroutine can be made runnable again by calling runtime·ready(gp). +void +runtime·parkunlock(Lock *lock, int8 *reason) +{ +	runtime·park(parkunlock, lock, reason); +} +  // runtime·park continuation on g0.  static void  park0(G *gp)  { +	bool ok; +  	gp->status = Gwaiting;  	gp->m = nil;  	m->curg = nil;  	if(m->waitunlockf) { -		m->waitunlockf(m->waitlock); +		ok = m->waitunlockf(gp, m->waitlock);  		m->waitunlockf = nil;  		m->waitlock = nil; +		if(!ok) { +			gp->status = Grunnable; +			execute(gp);  // Schedule it back, never returns. +		}  	}  	if(m->lockedg) {  		stoplockedm(); @@ -1365,6 +1414,8 @@ park0(G *gp)  void  runtime·gosched(void)  { +	if(g->status != Grunning) +		runtime·throw("bad g status");  	runtime·mcall(runtime·gosched0);  } @@ -1393,6 +1444,8 @@ runtime·gosched0(G *gp)  void  runtime·goexit(void)  { +	if(g->status != Grunning) +		runtime·throw("bad g status");  	if(raceenabled)  		runtime·racegoend();  	runtime·mcall(goexit0); @@ -1405,6 +1458,13 @@ goexit0(G *gp)  	gp->status = Gdead;  	gp->m = nil;  	gp->lockedm = nil; +	gp->paniconfault = 0; +	gp->defer = nil; // should be true already but just in case. +	gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data. +	gp->writenbuf = 0; +	gp->writebuf = nil; +	gp->waitreason = nil; +	gp->param = nil;  	m->curg = nil;  	m->lockedg = nil;  	if(m->locked & ~LockExternal) { @@ -1535,6 +1595,7 @@ runtime·exitsyscall(void)  	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection  		incidlelocked(-1); +	g->waitsince = 0;  	if(exitsyscallfast()) {  		// There's a cpu for us, so we can run.  		m->p->syscalltick++; @@ -1640,6 +1701,7 @@ exitsyscall0(G *gp)  }  // Called from syscall package before fork. +#pragma textflag NOSPLIT  void  syscall·runtime_BeforeFork(void)  { @@ -1648,14 +1710,28 @@ syscall·runtime_BeforeFork(void)  	m->locks++;  	if(m->profilehz != 0)  		runtime·resetcpuprofiler(0); + +	// This function is called before fork in syscall package. +	// Code between fork and exec must not allocate memory nor even try to grow stack. +	// Here we spoil g->stackguard to reliably detect any attempts to grow stack. +	// runtime_AfterFork will undo this in parent process, but not in child. +	m->forkstackguard = g->stackguard; +	g->stackguard0 = StackPreempt-1; +	g->stackguard = StackPreempt-1;  }  // Called from syscall package after fork in parent. +#pragma textflag NOSPLIT  void  syscall·runtime_AfterFork(void)  {  	int32 hz; +	// See the comment in runtime_BeforeFork. +	g->stackguard0 = m->forkstackguard; +	g->stackguard = m->forkstackguard; +	m->forkstackguard = 0; +  	hz = runtime·sched.profilehz;  	if(hz != 0)  		runtime·resetcpuprofiler(hz); @@ -1669,7 +1745,13 @@ syscall·runtime_AfterFork(void)  static void  mstackalloc(G *gp)  { -	gp->param = runtime·stackalloc((uintptr)gp->param); +	G *newg; +	uintptr size; + +	newg = (G*)gp->param; +	size = newg->stacksize; +	newg->stacksize = 0; +	gp->param = runtime·stackalloc(newg, size);  	runtime·gogo(&gp->sched);  } @@ -1685,24 +1767,24 @@ runtime·malg(int32 stacksize)  		runtime·throw("runtime: bad stack.h");  	} -	newg = runtime·malloc(sizeof(G)); +	newg = allocg();  	if(stacksize >= 0) { +		stacksize = runtime·round2(StackSystem + stacksize);  		if(g == m->g0) {  			// running on scheduler stack already. -			stk = runtime·stackalloc(StackSystem + stacksize); +			stk = runtime·stackalloc(newg, stacksize);  		} else {  			// have to call stackalloc on scheduler stack. -			g->param = (void*)(StackSystem + stacksize); +			newg->stacksize = stacksize; +			g->param = newg;  			runtime·mcall(mstackalloc);  			stk = g->param;  			g->param = nil;  		} -		newg->stacksize = StackSystem + stacksize;  		newg->stack0 = (uintptr)stk;  		newg->stackguard = (uintptr)stk + StackGuard;  		newg->stackguard0 = newg->stackguard; -		newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop); -		runtime·memclr((byte*)newg->stackbase, sizeof(Stktop)); +		newg->stackbase = (uintptr)stk + stacksize - sizeof(Stktop);  	}  	return newg;  } @@ -1736,9 +1818,14 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp  {  	byte *sp;  	G *newg; +	P *p;  	int32 siz;  //runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret); +	if(fn == nil) { +		m->throwing = -1;  // do not dump full stacks +		runtime·throw("go of nil func value"); +	}  	m->locks++;  // disable preemption because it can be holding p in a local var  	siz = narg + nret;  	siz = (siz+7) & ~7; @@ -1750,18 +1837,13 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp  	if(siz > StackMin - 1024)  		runtime·throw("runtime.newproc: function arguments too large for new goroutine"); -	if((newg = gfget(m->p)) != nil) { +	p = m->p; +	if((newg = gfget(p)) != nil) {  		if(newg->stackguard - StackGuard != newg->stack0)  			runtime·throw("invalid stack in newg");  	} else {  		newg = runtime·malg(StackMin); -		runtime·lock(&runtime·sched); -		if(runtime·lastg == nil) -			runtime·allg = newg; -		else -			runtime·lastg->alllink = newg; -		runtime·lastg = newg; -		runtime·unlock(&runtime·sched); +		allgadd(newg);  	}  	sp = (byte*)newg->stackbase; @@ -1780,11 +1862,15 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp  	runtime·gostartcallfn(&newg->sched, fn);  	newg->gopc = (uintptr)callerpc;  	newg->status = Grunnable; -	newg->goid = runtime·xadd64(&runtime·sched.goidgen, 1); +	if(p->goidcache == p->goidcacheend) { +		p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch); +		p->goidcacheend = p->goidcache + GoidCacheBatch; +	} +	newg->goid = p->goidcache++;  	newg->panicwrap = 0;  	if(raceenabled)  		newg->racectx = runtime·racegostart((void*)callerpc); -	runqput(m->p, newg); +	runqput(p, newg);  	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic  		wakep(); @@ -1794,13 +1880,56 @@ runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerp  	return newg;  } +static void +allgadd(G *gp) +{ +	G **new; +	uintptr cap; + +	runtime·lock(&allglock); +	if(runtime·allglen >= allgcap) { +		cap = 4096/sizeof(new[0]); +		if(cap < 2*allgcap) +			cap = 2*allgcap; +		new = runtime·malloc(cap*sizeof(new[0])); +		if(new == nil) +			runtime·throw("runtime: cannot allocate memory"); +		if(runtime·allg != nil) { +			runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0])); +			runtime·free(runtime·allg); +		} +		runtime·allg = new; +		allgcap = cap; +	} +	runtime·allg[runtime·allglen++] = gp; +	runtime·unlock(&allglock); +} +  // Put on gfree list.  // If local list is too long, transfer a batch to the global list.  static void  gfput(P *p, G *gp)  { +	uintptr stksize; +	Stktop *top; +  	if(gp->stackguard - StackGuard != gp->stack0)  		runtime·throw("invalid stack in gfput"); +	stksize = gp->stackbase + sizeof(Stktop) - gp->stack0; +	if(stksize != gp->stacksize) { +		runtime·printf("runtime: bad stacksize, goroutine %D, remain=%d, last=%d\n", +			gp->goid, (int32)gp->stacksize, (int32)stksize); +		runtime·throw("gfput: bad stacksize"); +	} +	top = (Stktop*)gp->stackbase; +	if(top->malloced) { +		// non-standard stack size - free it. +		runtime·stackfree(gp, (void*)gp->stack0, top); +		gp->stack0 = 0; +		gp->stackguard = 0; +		gp->stackguard0 = 0; +		gp->stackbase = 0; +	}  	gp->schedlink = p->gfree;  	p->gfree = gp;  	p->gfreecnt++; @@ -1823,6 +1952,7 @@ static G*  gfget(P *p)  {  	G *gp; +	byte *stk;  retry:  	gp = p->gfree; @@ -1841,6 +1971,23 @@ retry:  	if(gp) {  		p->gfree = gp->schedlink;  		p->gfreecnt--; + +		if(gp->stack0 == 0) { +			// Stack was deallocated in gfput.  Allocate a new one. +			if(g == m->g0) { +				stk = runtime·stackalloc(gp, FixedStack); +			} else { +				gp->stacksize = FixedStack; +				g->param = gp; +				runtime·mcall(mstackalloc); +				stk = g->param; +				g->param = nil; +			} +			gp->stack0 = (uintptr)stk; +			gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop); +			gp->stackguard = (uintptr)stk + StackGuard; +			gp->stackguard0 = gp->stackguard; +		}  	}  	return gp;  } @@ -1963,39 +2110,26 @@ runtime·lockedOSThread(void)  	return g->lockedm != nil && m->lockedg != nil;  } -// for testing of callbacks -void -runtime·golockedOSThread(bool ret) -{ -	ret = runtime·lockedOSThread(); -	FLUSH(&ret); -} - -void -runtime·NumGoroutine(intgo ret) -{ -	ret = runtime·gcount(); -	FLUSH(&ret); -} -  int32  runtime·gcount(void)  {  	G *gp;  	int32 n, s; +	uintptr i;  	n = 0; -	runtime·lock(&runtime·sched); +	runtime·lock(&allglock);  	// TODO(dvyukov): runtime.NumGoroutine() is O(N).  	// We do not want to increment/decrement centralized counter in newproc/goexit,  	// just to make runtime.NumGoroutine() faster.  	// Compromise solution is to introduce per-P counters of active goroutines. -	for(gp = runtime·allg; gp; gp = gp->alllink) { +	for(i = 0; i < runtime·allglen; i++) { +		gp = runtime·allg[i];  		s = gp->status;  		if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)  			n++;  	} -	runtime·unlock(&runtime·sched); +	runtime·unlock(&allglock);  	return n;  } @@ -2032,25 +2166,30 @@ static struct {  	uintptr pcbuf[100];  } prof; -static void -System(void) -{ -} +static void System(void) {} +static void ExternalCode(void) {} +static void GC(void) {} +extern byte etext[];  // Called if we receive a SIGPROF signal.  void -runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp) +runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)  {  	int32 n;  	bool traceback; +	// Do not use global m in this function, use mp instead. +	// On windows one m is sending reports about all the g's, so m means a wrong thing. +	byte m; + +	m = 0; +	USED(m);  	if(prof.fn == nil || prof.hz == 0)  		return; -	traceback = true; -	// Windows does profiling in a dedicated thread w/o m. -	if(!Windows && (m == nil || m->mcache == nil)) -		traceback = false; -	 + +	// Profiling runs concurrently with GC, so it must not allocate. +	mp->mallocing++; +  	// Define that a "user g" is a user-created goroutine, and a "system g"  	// is one that is m->g0 or m->gsignal. We've only made sure that we  	// can unwind user g's, so exclude the system g's. @@ -2123,36 +2262,55 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)  	// To recap, there are no constraints on the assembly being used for the  	// transition. We simply require that g and SP match and that the PC is not  	// in runtime.gogo. -	// -	// On Windows, one m is sending reports about all the g's, so gp == m->curg -	// is not a useful comparison. The profilem function in os_windows.c has -	// already checked that gp is a user g. -	if(gp == nil || -	   (!Windows && gp != m->curg) || +	traceback = true; +	if(gp == nil || gp != mp->curg ||  	   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||  	   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))  		traceback = false; -	// Race detector calls asmcgocall w/o entersyscall/exitsyscall, -	// we can not currently unwind through asmcgocall. -	if(m != nil && m->racecall) -		traceback = false; -  	runtime·lock(&prof);  	if(prof.fn == nil) {  		runtime·unlock(&prof); +		mp->mallocing--;  		return;  	}  	n = 0;  	if(traceback)  		n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);  	if(!traceback || n <= 0) { -		n = 2; -		prof.pcbuf[0] = (uintptr)pc; -		prof.pcbuf[1] = (uintptr)System + 1; +		// Normal traceback is impossible or has failed. +		// See if it falls into several common cases. +		n = 0; +		if(mp->ncgo > 0 && mp->curg != nil && +			mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) { +			// Cgo, we can't unwind and symbolize arbitrary C code, +			// so instead collect Go stack that leads to the cgo call. +			// This is especially important on windows, since all syscalls are cgo calls. +			n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false); +		} +#ifdef GOOS_windows +		if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) { +			// Libcall, i.e. runtime syscall on windows. +			// Collect Go stack that leads to the call. +			n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false); +		} +#endif +		if(n == 0) { +			// If all of the above has failed, account it against abstract "System" or "GC". +			n = 2; +			// "ExternalCode" is better than "etext". +			if((uintptr)pc > (uintptr)etext) +				pc = (byte*)ExternalCode + PCQuantum; +			prof.pcbuf[0] = (uintptr)pc; +			if(mp->gcing || mp->helpgc) +				prof.pcbuf[1] = (uintptr)GC + PCQuantum; +			else +				prof.pcbuf[1] = (uintptr)System + PCQuantum; +		}  	}  	prof.fn(prof.pcbuf, n);  	runtime·unlock(&prof); +	mp->mallocing--;  }  // Arrange to call fn with a traceback hz times a second. @@ -2195,6 +2353,7 @@ static void  procresize(int32 new)  {  	int32 i, old; +	bool empty;  	G *gp;  	P *p; @@ -2216,27 +2375,42 @@ procresize(int32 new)  			else  				p->mcache = runtime·allocmcache();  		} -		if(p->runq == nil) { -			p->runqsize = 128; -			p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC); -		}  	}  	// redistribute runnable G's evenly -	for(i = 0; i < old; i++) { -		p = runtime·allp[i]; -		while(gp = runqget(p)) -			globrunqput(gp); +	// collect all runnable goroutines in global queue preserving FIFO order +	// FIFO order is required to ensure fairness even during frequent GCs +	// see http://golang.org/issue/7126 +	empty = false; +	while(!empty) { +		empty = true; +		for(i = 0; i < old; i++) { +			p = runtime·allp[i]; +			if(p->runqhead == p->runqtail) +				continue; +			empty = false; +			// pop from tail of local queue +			p->runqtail--; +			gp = p->runq[p->runqtail%nelem(p->runq)]; +			// push onto head of global queue +			gp->schedlink = runtime·sched.runqhead; +			runtime·sched.runqhead = gp; +			if(runtime·sched.runqtail == nil) +				runtime·sched.runqtail = gp; +			runtime·sched.runqsize++; +		}  	} +	// fill local queues with at most nelem(p->runq)/2 goroutines  	// start at 1 because current M already executes some G and will acquire allp[0] below,  	// so if we have a spare G we want to put it into allp[1]. -	for(i = 1; runtime·sched.runqhead; i++) { +	for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {  		gp = runtime·sched.runqhead;  		runtime·sched.runqhead = gp->schedlink; +		if(runtime·sched.runqhead == nil) +			runtime·sched.runqtail = nil; +		runtime·sched.runqsize--;  		runqput(runtime·allp[i%new], gp);  	} -	runtime·sched.runqtail = nil; -	runtime·sched.runqsize = 0;  	// free unused P's  	for(i = new; i < old; i++) { @@ -2318,30 +2492,41 @@ checkdead(void)  {  	G *gp;  	int32 run, grunning, s; +	uintptr i;  	// -1 for sysmon  	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;  	if(run > 0)  		return; +	// If we are dying because of a signal caught on an already idle thread, +	// freezetheworld will cause all running threads to block. +	// And runtime will essentially enter into deadlock state, +	// except that there is a thread that will call runtime·exit soon. +	if(runtime·panicking > 0) +		return;  	if(run < 0) { -		runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n", +		runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",  			runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);  		runtime·throw("checkdead: inconsistent counts");  	}  	grunning = 0; -	for(gp = runtime·allg; gp; gp = gp->alllink) { +	runtime·lock(&allglock); +	for(i = 0; i < runtime·allglen; i++) { +		gp = runtime·allg[i];  		if(gp->isbackground)  			continue;  		s = gp->status;  		if(s == Gwaiting)  			grunning++;  		else if(s == Grunnable || s == Grunning || s == Gsyscall) { -			runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s); +			runtime·unlock(&allglock); +			runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);  			runtime·throw("checkdead: runnable g");  		}  	} +	runtime·unlock(&allglock);  	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit() -		runtime·exit(0); +		runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");  	m->throwing = -1;  // do not dump full stacks  	runtime·throw("all goroutines are asleep - deadlock!");  } @@ -2418,6 +2603,7 @@ struct Pdesc  	uint32	syscalltick;  	int64	syscallwhen;  }; +#pragma dataflag NOPTR  static Pdesc pdesc[MaxGomaxprocs];  static uint32 @@ -2436,16 +2622,19 @@ retake(int64 now)  		pd = &pdesc[i];  		s = p->status;  		if(s == Psyscall) { -			// Retake P from syscall if it's there for more than 1 sysmon tick (20us). -			// But only if there is other work to do. +			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).  			t = p->syscalltick;  			if(pd->syscalltick != t) {  				pd->syscalltick = t;  				pd->syscallwhen = now;  				continue;  			} +			// On the one hand we don't want to retake Ps if there is no other work to do, +			// but on the other hand we want to retake them eventually +			// because they can prevent the sysmon thread from deep sleep.  			if(p->runqhead == p->runqtail && -				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0) +				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 && +				pd->syscallwhen + 10*1000*1000 > now)  				continue;  			// Need to decrement number of idle locked M's  			// (pretending that one more is running) before the CAS. @@ -2525,7 +2714,8 @@ runtime·schedtrace(bool detailed)  	static int64 starttime;  	int64 now;  	int64 id1, id2, id3; -	int32 i, q, t, h, s; +	int32 i, t, h; +	uintptr gi;  	int8 *fmt;  	M *mp, *lockedm;  	G *gp, *lockedg; @@ -2552,15 +2742,11 @@ runtime·schedtrace(bool detailed)  		if(p == nil)  			continue;  		mp = p->m; -		t = p->runqtail; -		h = p->runqhead; -		s = p->runqsize; -		q = t - h; -		if(q < 0) -			q += s; +		h = runtime·atomicload(&p->runqhead); +		t = runtime·atomicload(&p->runqtail);  		if(detailed) -			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n", -				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt); +			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n", +				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);  		else {  			// In non-detailed mode format lengths of per-P run queues as:  			// [len1 len2 len3 len4] @@ -2571,7 +2757,7 @@ runtime·schedtrace(bool detailed)  				fmt = " [%d";  			else if(i == runtime·gomaxprocs-1)  				fmt = " %d]\n"; -			runtime·printf(fmt, q); +			runtime·printf(fmt, t-h);  		}  	}  	if(!detailed) { @@ -2592,18 +2778,21 @@ runtime·schedtrace(bool detailed)  		if(lockedg)  			id3 = lockedg->goid;  		runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d" -			" locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n", +			" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",  			mp->id, id1, id2,  			mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, -			mp->spinning, id3); +			mp->spinning, m->blocked, id3);  	} -	for(gp = runtime·allg; gp; gp = gp->alllink) { +	runtime·lock(&allglock); +	for(gi = 0; gi < runtime·allglen; gi++) { +		gp = runtime·allg[gi];  		mp = gp->m;  		lockedm = gp->lockedm;  		runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",  			gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,  			lockedm ? lockedm->id : -1);  	} +	runtime·unlock(&allglock);  	runtime·unlock(&runtime·sched);  } @@ -2646,6 +2835,20 @@ globrunqput(G *gp)  	runtime·sched.runqsize++;  } +// Put a batch of runnable goroutines on the global runnable queue. +// Sched must be locked. +static void +globrunqputbatch(G *ghead, G *gtail, int32 n) +{ +	gtail->schedlink = nil; +	if(runtime·sched.runqtail) +		runtime·sched.runqtail->schedlink = ghead; +	else +		runtime·sched.runqhead = ghead; +	runtime·sched.runqtail = gtail; +	runtime·sched.runqsize += n; +} +  // Try get a batch of G's from the global runnable queue.  // Sched must be locked.  static G* @@ -2661,6 +2864,8 @@ globrunqget(P *p, int32 max)  		n = runtime·sched.runqsize;  	if(max > 0 && n > max)  		n = max; +	if(n > nelem(p->runq)/2) +		n = nelem(p->runq)/2;  	runtime·sched.runqsize -= n;  	if(runtime·sched.runqsize == 0)  		runtime·sched.runqtail = nil; @@ -2700,78 +2905,98 @@ pidleget(void)  	return p;  } -// Put g on local runnable queue. -// TODO(dvyukov): consider using lock-free queue. +// Try to put g on local runnable queue. +// If it's full, put onto global queue. +// Executed only by the owner P.  static void  runqput(P *p, G *gp)  { -	int32 h, t, s; +	uint32 h, t; -	runtime·lock(p);  retry: -	h = p->runqhead; +	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers  	t = p->runqtail; -	s = p->runqsize; -	if(t == h-1 || (h == 0 && t == s-1)) { -		runqgrow(p); -		goto retry; +	if(t - h < nelem(p->runq)) { +		p->runq[t%nelem(p->runq)] = gp; +		runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption +		return;  	} -	p->runq[t++] = gp; -	if(t == s) -		t = 0; -	p->runqtail = t; -	runtime·unlock(p); +	if(runqputslow(p, gp, h, t)) +		return; +	// the queue is not full, now the put above must suceed +	goto retry; +} + +// Put g and a batch of work from local runnable queue on global queue. +// Executed only by the owner P. +static bool +runqputslow(P *p, G *gp, uint32 h, uint32 t) +{ +	G *batch[nelem(p->runq)/2+1]; +	uint32 n, i; + +	// First, grab a batch from local queue. +	n = t-h; +	n = n/2; +	if(n != nelem(p->runq)/2) +		runtime·throw("runqputslow: queue is not full"); +	for(i=0; i<n; i++) +		batch[i] = p->runq[(h+i)%nelem(p->runq)]; +	if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume +		return false; +	batch[n] = gp; +	// Link the goroutines. +	for(i=0; i<n; i++) +		batch[i]->schedlink = batch[i+1]; +	// Now put the batch on global queue. +	runtime·lock(&runtime·sched); +	globrunqputbatch(batch[0], batch[n], n+1); +	runtime·unlock(&runtime·sched); +	return true;  }  // Get g from local runnable queue. +// Executed only by the owner P.  static G*  runqget(P *p)  {  	G *gp; -	int32 t, h, s; +	uint32 t, h; -	if(p->runqhead == p->runqtail) -		return nil; -	runtime·lock(p); -	h = p->runqhead; -	t = p->runqtail; -	s = p->runqsize; -	if(t == h) { -		runtime·unlock(p); -		return nil; +	for(;;) { +		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers +		t = p->runqtail; +		if(t == h) +			return nil; +		gp = p->runq[h%nelem(p->runq)]; +		if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume +			return gp;  	} -	gp = p->runq[h++]; -	if(h == s) -		h = 0; -	p->runqhead = h; -	runtime·unlock(p); -	return gp;  } -// Grow local runnable queue. -// TODO(dvyukov): consider using fixed-size array -// and transfer excess to the global list (local queue can grow way too big). -static void -runqgrow(P *p) +// Grabs a batch of goroutines from local runnable queue. +// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines. +// Can be executed by any P. +static uint32 +runqgrab(P *p, G **batch)  { -	G **q; -	int32 s, t, h, t2; +	uint32 t, h, n, i; -	h = p->runqhead; -	t = p->runqtail; -	s = p->runqsize; -	t2 = 0; -	q = runtime·malloc(2*s*sizeof(*q)); -	while(t != h) { -		q[t2++] = p->runq[h++]; -		if(h == s) -			h = 0; +	for(;;) { +		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers +		t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer +		n = t-h; +		n = n - n/2; +		if(n == 0) +			break; +		if(n > nelem(p->runq)/2)  // read inconsistent h and t +			continue; +		for(i=0; i<n; i++) +			batch[i] = p->runq[(h+i)%nelem(p->runq)]; +		if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume +			break;  	} -	runtime·free(p->runq); -	p->runq = q; -	p->runqhead = 0; -	p->runqtail = t2; -	p->runqsize = 2*s; +	return n;  }  // Steal half of elements from local runnable queue of p2 @@ -2780,57 +3005,24 @@ runqgrow(P *p)  static G*  runqsteal(P *p, P *p2)  { -	G *gp, *gp1; -	int32 t, h, s, t2, h2, s2, c, i; +	G *gp; +	G *batch[nelem(p->runq)/2]; +	uint32 t, h, n, i; -	if(p2->runqhead == p2->runqtail) +	n = runqgrab(p2, batch); +	if(n == 0)  		return nil; -	// sort locks to prevent deadlocks -	if(p < p2) -		runtime·lock(p); -	runtime·lock(p2); -	if(p2->runqhead == p2->runqtail) { -		runtime·unlock(p2); -		if(p < p2) -			runtime·unlock(p); -		return nil; -	} -	if(p >= p2) -		runtime·lock(p); -	// now we've locked both queues and know the victim is not empty -	h = p->runqhead; +	n--; +	gp = batch[n]; +	if(n == 0) +		return gp; +	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers  	t = p->runqtail; -	s = p->runqsize; -	h2 = p2->runqhead; -	t2 = p2->runqtail; -	s2 = p2->runqsize; -	gp = p2->runq[h2++];  // return value -	if(h2 == s2) -		h2 = 0; -	// steal roughly half -	if(t2 > h2) -		c = (t2 - h2) / 2; -	else -		c = (s2 - h2 + t2) / 2; -	// copy -	for(i = 0; i != c; i++) { -		// the target queue is full? -		if(t == h-1 || (h == 0 && t == s-1)) -			break; -		// the victim queue is empty? -		if(t2 == h2) -			break; -		gp1 = p2->runq[h2++]; -		if(h2 == s2) -			h2 = 0; -		p->runq[t++] = gp1; -		if(t == s) -			t = 0; -	} -	p->runqtail = t; -	p2->runqhead = h2; -	runtime·unlock(p2); -	runtime·unlock(p); +	if(t - h + n >= nelem(p->runq)) +		runtime·throw("runqsteal: runq overflow"); +	for(i=0; i<n; i++, t++) +		p->runq[t%nelem(p->runq)] = batch[i]; +	runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption  	return gp;  } @@ -2838,14 +3030,10 @@ void  runtime·testSchedLocalQueue(void)  {  	P p; -	G gs[1000]; +	G gs[nelem(p.runq)];  	int32 i, j;  	runtime·memclr((byte*)&p, sizeof(p)); -	p.runqsize = 1; -	p.runqhead = 0; -	p.runqtail = 0; -	p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq));  	for(i = 0; i < nelem(gs); i++) {  		if(runqget(&p) != nil) @@ -2867,20 +3055,11 @@ void  runtime·testSchedLocalQueueSteal(void)  {  	P p1, p2; -	G gs[1000], *gp; +	G gs[nelem(p1.runq)], *gp;  	int32 i, j, s;  	runtime·memclr((byte*)&p1, sizeof(p1)); -	p1.runqsize = 1; -	p1.runqhead = 0; -	p1.runqtail = 0; -	p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq)); -  	runtime·memclr((byte*)&p2, sizeof(p2)); -	p2.runqsize = nelem(gs); -	p2.runqhead = 0; -	p2.runqtail = 0; -	p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq));  	for(i = 0; i < nelem(gs); i++) {  		for(j = 0; j < i; j++) { @@ -2914,6 +3093,7 @@ runtime·testSchedLocalQueueSteal(void)  }  extern void runtime·morestack(void); +uintptr runtime·externalthreadhandlerp;  // Does f mark the top of a goroutine stack?  bool @@ -2924,18 +3104,21 @@ runtime·topofstack(Func *f)  		f->entry == (uintptr)runtime·mcall ||  		f->entry == (uintptr)runtime·morestack ||  		f->entry == (uintptr)runtime·lessstack || -		f->entry == (uintptr)_rt0_go; +		f->entry == (uintptr)_rt0_go || +		(runtime·externalthreadhandlerp != 0 && f->entry == runtime·externalthreadhandlerp);  } -void -runtime∕debug·setMaxThreads(intgo in, intgo out) +int32 +runtime·setmaxthreads(int32 in)  { +	int32 out; +  	runtime·lock(&runtime·sched);  	out = runtime·sched.maxmcount;  	runtime·sched.maxmcount = in;  	checkmcount();  	runtime·unlock(&runtime·sched); -	FLUSH(&out); +	return out;  }  static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h | 
