summaryrefslogtreecommitdiff
path: root/src/pkg/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime')
-rw-r--r--src/pkg/runtime/386/asm.s52
-rw-r--r--src/pkg/runtime/386/atomic.c7
-rw-r--r--src/pkg/runtime/386/closure.c2
-rw-r--r--src/pkg/runtime/386/memmove.s12
-rw-r--r--src/pkg/runtime/Makefile2
-rw-r--r--src/pkg/runtime/amd64/asm.s49
-rw-r--r--src/pkg/runtime/amd64/atomic.c7
-rw-r--r--src/pkg/runtime/amd64/closure.c2
-rw-r--r--src/pkg/runtime/amd64/memmove.s11
-rw-r--r--src/pkg/runtime/amd64/traceback.c7
-rw-r--r--src/pkg/runtime/append_test.go3
-rw-r--r--src/pkg/runtime/arm/atomic.c71
-rw-r--r--src/pkg/runtime/arm/closure.c2
-rw-r--r--src/pkg/runtime/arm/traceback.c7
-rwxr-xr-xsrc/pkg/runtime/cgo/windows_amd64.c9
-rw-r--r--src/pkg/runtime/cgocall.c9
-rw-r--r--src/pkg/runtime/chan.c411
-rw-r--r--src/pkg/runtime/chan_test.go267
-rw-r--r--src/pkg/runtime/cpuprof.c4
-rw-r--r--src/pkg/runtime/debug/stack_test.go4
-rw-r--r--src/pkg/runtime/export_test.go6
-rw-r--r--src/pkg/runtime/freebsd/386/signal.c2
-rw-r--r--src/pkg/runtime/freebsd/amd64/signal.c2
-rw-r--r--src/pkg/runtime/goc2c.c96
-rw-r--r--src/pkg/runtime/hashmap.c4
-rw-r--r--src/pkg/runtime/hashmap.h2
-rw-r--r--src/pkg/runtime/iface.c13
-rw-r--r--src/pkg/runtime/linux/386/defs.h2
-rw-r--r--src/pkg/runtime/linux/386/sys.s29
-rw-r--r--src/pkg/runtime/linux/amd64/defs.h2
-rw-r--r--src/pkg/runtime/linux/amd64/sys.s18
-rw-r--r--src/pkg/runtime/linux/arm/defs.h2
-rw-r--r--src/pkg/runtime/linux/arm/sys.s30
-rw-r--r--src/pkg/runtime/linux/thread.c198
-rw-r--r--src/pkg/runtime/malloc.goc111
-rw-r--r--src/pkg/runtime/malloc.h20
-rw-r--r--src/pkg/runtime/mcache.c4
-rw-r--r--src/pkg/runtime/mem.go7
-rw-r--r--src/pkg/runtime/mgc0.c45
-rw-r--r--src/pkg/runtime/mheap.c10
-rw-r--r--src/pkg/runtime/plan9/mem.c11
-rw-r--r--src/pkg/runtime/plan9/thread.c15
-rw-r--r--src/pkg/runtime/print.c2
-rw-r--r--src/pkg/runtime/proc.c434
-rw-r--r--src/pkg/runtime/proc.p526
-rw-r--r--src/pkg/runtime/proc_test.go79
-rw-r--r--src/pkg/runtime/runtime.c79
-rw-r--r--src/pkg/runtime/runtime.h22
-rw-r--r--src/pkg/runtime/slice.c2
-rw-r--r--src/pkg/runtime/stack.h1
-rw-r--r--src/pkg/runtime/string.goc56
-rw-r--r--src/pkg/runtime/symtab.c22
-rw-r--r--src/pkg/runtime/symtab_test.go47
-rw-r--r--src/pkg/runtime/windows/amd64/rt0.s3
-rw-r--r--src/pkg/runtime/windows/amd64/sys.s1
55 files changed, 2141 insertions, 700 deletions
diff --git a/src/pkg/runtime/386/asm.s b/src/pkg/runtime/386/asm.s
index e2cabef14..a14518839 100644
--- a/src/pkg/runtime/386/asm.s
+++ b/src/pkg/runtime/386/asm.s
@@ -28,15 +28,18 @@ TEXT _rt0_386(SB),7,$0
TESTL AX, AX
JZ 4(PC)
CALL AX
+ // skip runtime·ldt0setup(SB) and tls test after initcgo for non-windows
CMPL runtime·iswindows(SB), $0
JEQ ok
+ // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
+ CMPL runtime·isplan9(SB), $1
+ JEQ ok
+
// set up %gs
CALL runtime·ldt0setup(SB)
// store through it, to make sure it works
- CMPL runtime·isplan9(SB), $1
- JEQ ok
get_tls(BX)
MOVL $0x123, g(BX)
MOVL runtime·tls0(SB), AX
@@ -318,6 +321,45 @@ TEXT runtime·casp(SB), 7, $0
MOVL $1, AX
RET
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT runtime·xadd(SB), 7, $0
+ MOVL 4(SP), BX
+ MOVL 8(SP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ RET
+
+TEXT runtime·xchg(SB), 7, $0
+ MOVL 4(SP), BX
+ MOVL 8(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT runtime·procyield(SB),7,$0
+ MOVL 4(SP), AX
+again:
+ PAUSE
+ SUBL $1, AX
+ JNZ again
+ RET
+
+TEXT runtime·atomicstorep(SB), 7, $0
+ MOVL 4(SP), BX
+ MOVL 8(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT runtime·atomicstore(SB), 7, $0
+ MOVL 4(SP), BX
+ MOVL 8(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
@@ -460,12 +502,16 @@ TEXT runtime·stackcheck(SB), 7, $0
TEXT runtime·memclr(SB),7,$0
MOVL 4(SP), DI // arg 1 addr
MOVL 8(SP), CX // arg 2 count
- ADDL $3, CX
+ MOVL CX, BX
+ ANDL $3, BX
SHRL $2, CX
MOVL $0, AX
CLD
REP
STOSL
+ MOVL BX, CX
+ REP
+ STOSB
RET
TEXT runtime·getcallerpc(SB),7,$0
diff --git a/src/pkg/runtime/386/atomic.c b/src/pkg/runtime/386/atomic.c
index c031cc4f6..a4f2a114f 100644
--- a/src/pkg/runtime/386/atomic.c
+++ b/src/pkg/runtime/386/atomic.c
@@ -10,3 +10,10 @@ runtime·atomicload(uint32 volatile* addr)
{
return *addr;
}
+
+#pragma textflag 7
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+ return *addr;
+}
diff --git a/src/pkg/runtime/386/closure.c b/src/pkg/runtime/386/closure.c
index b0d4cc41a..b4d867711 100644
--- a/src/pkg/runtime/386/closure.c
+++ b/src/pkg/runtime/386/closure.c
@@ -45,7 +45,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0)
q = p + n - siz;
if(siz > 0) {
- runtime·mcpy(q, (byte*)&arg0, siz);
+ runtime·memmove(q, (byte*)&arg0, siz);
// SUBL $siz, SP
*p++ = 0x81;
diff --git a/src/pkg/runtime/386/memmove.s b/src/pkg/runtime/386/memmove.s
index 471553ba2..203a8187c 100644
--- a/src/pkg/runtime/386/memmove.s
+++ b/src/pkg/runtime/386/memmove.s
@@ -27,9 +27,6 @@ TEXT runtime·memmove(SB), 7, $0
MOVL to+0(FP), DI
MOVL fr+4(FP), SI
MOVL n+8(FP), BX
- CMPL BX, $0
- JLT fault
-
/*
* check and set for backwards
*/
@@ -87,12 +84,3 @@ back:
MOVL to+0(FP),AX
RET
-/*
- * if called with negative count,
- * treat as error rather than
- * rotating all of memory
- */
-fault:
- MOVL $0,SI
- MOVL 0(SI), AX
- RET
diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile
index 03f960cb8..64bd2b771 100644
--- a/src/pkg/runtime/Makefile
+++ b/src/pkg/runtime/Makefile
@@ -120,7 +120,7 @@ $(GOARCH)/asm.h: mkasmh.sh runtime.acid.$(GOARCH)
mv -f $@.x $@
goc2c: goc2c.c
- quietgcc -o $@ $<
+ quietgcc -o $@ -I "$(GOROOT)/include" $< "$(GOROOT)/lib/lib9.a"
mkversion: mkversion.c
quietgcc -o $@ -I "$(GOROOT)/include" $< "$(GOROOT)/lib/lib9.a"
diff --git a/src/pkg/runtime/amd64/asm.s b/src/pkg/runtime/amd64/asm.s
index 46d82e365..3e3818c10 100644
--- a/src/pkg/runtime/amd64/asm.s
+++ b/src/pkg/runtime/amd64/asm.s
@@ -18,7 +18,8 @@ TEXT _rt0_amd64(SB),7,$-8
TESTQ AX, AX
JZ needtls
CALL AX
- JMP ok
+ CMPL runtime·iswindows(SB), $0
+ JEQ ok
needtls:
LEAQ runtime·tls0(SB), DI
@@ -364,6 +365,45 @@ TEXT runtime·casp(SB), 7, $0
MOVL $1, AX
RET
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+// *val += delta;
+// return *val;
+TEXT runtime·xadd(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ MOVL AX, CX
+ LOCK
+ XADDL AX, 0(BX)
+ ADDL CX, AX
+ RET
+
+TEXT runtime·xchg(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
+TEXT runtime·procyield(SB),7,$0
+ MOVL 8(SP), AX
+again:
+ PAUSE
+ SUBL $1, AX
+ JNZ again
+ RET
+
+TEXT runtime·atomicstorep(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVQ 16(SP), AX
+ XCHGQ AX, 0(BX)
+ RET
+
+TEXT runtime·atomicstore(SB), 7, $0
+ MOVQ 8(SP), BX
+ MOVL 16(SP), AX
+ XCHGL AX, 0(BX)
+ RET
+
// void jmpdefer(fn, sp);
// called from deferreturn.
// 1. pop the caller
@@ -413,6 +453,7 @@ TEXT runtime·asmcgocall(SB),7,$0
MOVQ DI, 16(SP) // save g
MOVQ DX, 8(SP) // save SP
MOVQ BX, DI // DI = first argument in AMD64 ABI
+ MOVQ BX, CX // CX = first argument in Win64
CALL AX
// Restore registers, g, stack pointer.
@@ -506,12 +547,16 @@ TEXT runtime·stackcheck(SB), 7, $0
TEXT runtime·memclr(SB),7,$0
MOVQ 8(SP), DI // arg 1 addr
MOVQ 16(SP), CX // arg 2 count
- ADDQ $7, CX
+ MOVQ CX, BX
+ ANDQ $7, BX
SHRQ $3, CX
MOVQ $0, AX
CLD
REP
STOSQ
+ MOVQ BX, CX
+ REP
+ STOSB
RET
TEXT runtime·getcallerpc(SB),7,$0
diff --git a/src/pkg/runtime/amd64/atomic.c b/src/pkg/runtime/amd64/atomic.c
index c031cc4f6..a4f2a114f 100644
--- a/src/pkg/runtime/amd64/atomic.c
+++ b/src/pkg/runtime/amd64/atomic.c
@@ -10,3 +10,10 @@ runtime·atomicload(uint32 volatile* addr)
{
return *addr;
}
+
+#pragma textflag 7
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+ return *addr;
+}
diff --git a/src/pkg/runtime/amd64/closure.c b/src/pkg/runtime/amd64/closure.c
index 5033468d2..481b4a888 100644
--- a/src/pkg/runtime/amd64/closure.c
+++ b/src/pkg/runtime/amd64/closure.c
@@ -45,7 +45,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0)
q = p + n - siz;
if(siz > 0) {
- runtime·mcpy(q, (byte*)&arg0, siz);
+ runtime·memmove(q, (byte*)&arg0, siz);
// SUBQ $siz, SP
*p++ = 0x48;
diff --git a/src/pkg/runtime/amd64/memmove.s b/src/pkg/runtime/amd64/memmove.s
index fc9573f72..e78be8145 100644
--- a/src/pkg/runtime/amd64/memmove.s
+++ b/src/pkg/runtime/amd64/memmove.s
@@ -28,8 +28,6 @@ TEXT runtime·memmove(SB), 7, $0
MOVQ to+0(FP), DI
MOVQ fr+8(FP), SI
MOVLQSX n+16(FP), BX
- CMPQ BX, $0
- JLT fault
/*
* check and set for backwards
@@ -88,12 +86,3 @@ back:
MOVQ to+0(FP),AX
RET
-/*
- * if called with negative count,
- * treat as error rather than
- * rotating all of memory
- */
-fault:
- MOVQ $0,SI
- MOVQ 0(SI), AX
- RET
diff --git a/src/pkg/runtime/amd64/traceback.c b/src/pkg/runtime/amd64/traceback.c
index d422cb692..3e85d36bd 100644
--- a/src/pkg/runtime/amd64/traceback.c
+++ b/src/pkg/runtime/amd64/traceback.c
@@ -10,6 +10,7 @@ void runtime·deferproc(void);
void runtime·newproc(void);
void runtime·newstack(void);
void runtime·morestack(void);
+void runtime·sigpanic(void);
// This code is also used for the 386 tracebacks.
// Use uintptr for an appropriate word-sized integer.
@@ -27,11 +28,13 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
byte *fp;
Stktop *stk;
Func *f;
+ bool waspanic;
USED(lr0);
pc = (uintptr)pc0;
lr = 0;
fp = nil;
+ waspanic = false;
// If the PC is goexit, the goroutine hasn't started yet.
if(pc0 == g->sched.pc && sp == g->sched.sp && pc0 == (byte*)runtime·goexit) {
@@ -127,7 +130,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
if(pc > f->entry)
runtime·printf("+%p", (uintptr)(pc - f->entry));
tracepc = pc; // back up to CALL instruction for funcline.
- if(n > 0 && pc > f->entry)
+ if(n > 0 && pc > f->entry && !waspanic)
tracepc--;
runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
runtime·printf("\t%S(", f->name);
@@ -144,6 +147,8 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
n++;
}
+ waspanic = f->entry == (uintptr)runtime·sigpanic;
+
if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc)
fp += 2*sizeof(uintptr);
diff --git a/src/pkg/runtime/append_test.go b/src/pkg/runtime/append_test.go
index 75a635306..b8552224e 100644
--- a/src/pkg/runtime/append_test.go
+++ b/src/pkg/runtime/append_test.go
@@ -36,7 +36,7 @@ func BenchmarkAppendSpecialCase(b *testing.B) {
}
}
-var x = make([]int, 0, 10)
+var x []int
func f() int {
x[:1][0] = 3
@@ -44,6 +44,7 @@ func f() int {
}
func TestSideEffectOrder(t *testing.T) {
+ x = make([]int, 0, 10)
x = append(x, 1, f())
if x[0] != 1 || x[1] != 2 {
t.Error("append failed: ", x[0], x[1])
diff --git a/src/pkg/runtime/arm/atomic.c b/src/pkg/runtime/arm/atomic.c
index 9fd47bae7..52e4059ae 100644
--- a/src/pkg/runtime/arm/atomic.c
+++ b/src/pkg/runtime/arm/atomic.c
@@ -4,9 +4,80 @@
#include "runtime.h"
+// Atomic add and return new value.
+#pragma textflag 7
+uint32
+runtime·xadd(uint32 volatile *val, int32 delta)
+{
+ uint32 oval, nval;
+
+ for(;;){
+ oval = *val;
+ nval = oval + delta;
+ if(runtime·cas(val, oval, nval))
+ return nval;
+ }
+}
+
+#pragma textflag 7
+uint32
+runtime·xchg(uint32 volatile* addr, uint32 v)
+{
+ uint32 old;
+
+ for(;;) {
+ old = *addr;
+ if(runtime·cas(addr, old, v))
+ return old;
+ }
+}
+
+#pragma textflag 7
+void
+runtime·procyield(uint32 cnt)
+{
+ uint32 volatile i;
+
+ for(i = 0; i < cnt; i++) {
+ }
+}
+
#pragma textflag 7
uint32
runtime·atomicload(uint32 volatile* addr)
{
return runtime·xadd(addr, 0);
}
+
+#pragma textflag 7
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+ return (void*)runtime·xadd((uint32 volatile*)addr, 0);
+}
+
+#pragma textflag 7
+void
+runtime·atomicstorep(void* volatile* addr, void* v)
+{
+ void *old;
+
+ for(;;) {
+ old = *addr;
+ if(runtime·casp(addr, old, v))
+ return;
+ }
+}
+
+#pragma textflag 7
+void
+runtime·atomicstore(uint32 volatile* addr, uint32 v)
+{
+ uint32 old;
+
+ for(;;) {
+ old = *addr;
+ if(runtime·cas(addr, old, v))
+ return;
+ }
+} \ No newline at end of file
diff --git a/src/pkg/runtime/arm/closure.c b/src/pkg/runtime/arm/closure.c
index 36a93bc53..119e91b61 100644
--- a/src/pkg/runtime/arm/closure.c
+++ b/src/pkg/runtime/arm/closure.c
@@ -83,7 +83,7 @@ runtime·closure(int32 siz, byte *fn, byte *arg0)
*pc++ = 0xe52de000 | (siz + 4);
if(siz > 0) {
- runtime·mcpy(q, (byte*)&arg0, siz);
+ runtime·memmove(q, (byte*)&arg0, siz);
// MOVW $vars(PC), R0
*pc = 0xe28f0000 | (int32)(q - (byte*)pc - 8);
diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c
index c3934c37c..5628b8349 100644
--- a/src/pkg/runtime/arm/traceback.c
+++ b/src/pkg/runtime/arm/traceback.c
@@ -9,6 +9,7 @@ void runtime·deferproc(void);
void runtime·newproc(void);
void runtime·newstack(void);
void runtime·morestack(void);
+void runtime·sigpanic(void);
void _div(void);
void _mod(void);
void _divu(void);
@@ -20,12 +21,14 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
int32 i, n, iter;
uintptr pc, lr, tracepc, x;
byte *fp, *p;
+ bool waspanic;
Stktop *stk;
Func *f;
pc = (uintptr)pc0;
lr = (uintptr)lr0;
fp = nil;
+ waspanic = false;
// If the PC is goexit, the goroutine hasn't started yet.
if(pc == (uintptr)runtime·goexit) {
@@ -121,7 +124,7 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
if(pc > f->entry)
runtime·printf("+%p", (uintptr)(pc - f->entry));
tracepc = pc; // back up to CALL instruction for funcline.
- if(n > 0 && pc > f->entry)
+ if(n > 0 && pc > f->entry && !waspanic)
tracepc -= sizeof(uintptr);
runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
runtime·printf("\t%S(", f->name);
@@ -137,6 +140,8 @@ runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr
runtime·prints(")\n");
n++;
}
+
+ waspanic = f->entry == (uintptr)runtime·sigpanic;
if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) {
runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid);
diff --git a/src/pkg/runtime/cgo/windows_amd64.c b/src/pkg/runtime/cgo/windows_amd64.c
index fd5b397ab..e8313e250 100755
--- a/src/pkg/runtime/cgo/windows_amd64.c
+++ b/src/pkg/runtime/cgo/windows_amd64.c
@@ -30,6 +30,7 @@ static void*
threadentry(void *v)
{
ThreadStart ts;
+ void *tls0;
ts = *(ThreadStart*)v;
free(v);
@@ -45,11 +46,13 @@ threadentry(void *v)
/*
* Set specific keys in thread local storage.
*/
+ tls0 = (void*)LocalAlloc(LPTR, 64);
asm volatile (
+ "movq %0, %%gs:0x58\n" // MOVL tls0, 0x58(GS)
"movq %%gs:0x58, %%rax\n" // MOVQ 0x58(GS), tmp
- "movq %0, 0(%%rax)\n" // MOVQ g, 0(GS)
- "movq %1, 8(%%rax)\n" // MOVQ m, 8(GS)
- :: "r"(ts.g), "r"(ts.m) : "%rax"
+ "movq %1, 0(%%rax)\n" // MOVQ g, 0(GS)
+ "movq %2, 8(%%rax)\n" // MOVQ m, 8(GS)
+ :: "r"(tls0), "r"(ts.g), "r"(ts.m) : "%rax"
);
crosscall_amd64(ts.fn);
diff --git a/src/pkg/runtime/cgocall.c b/src/pkg/runtime/cgocall.c
index 58f287e90..829448b02 100644
--- a/src/pkg/runtime/cgocall.c
+++ b/src/pkg/runtime/cgocall.c
@@ -83,7 +83,6 @@
// callee-save registers for gcc and returns to GoF, which returns to f.
void *initcgo; /* filled in by dynamic linker when Cgo is available */
-int64 ncgocall;
static void unlockm(void);
static void unwindm(void);
@@ -101,7 +100,7 @@ runtime·cgocall(void (*fn)(void*), void *arg)
if(fn == 0)
runtime·throw("cgocall nil");
- ncgocall++;
+ m->ncgocall++;
/*
* Lock g to m to ensure we stay on the same stack if we do a
@@ -155,7 +154,11 @@ unlockm(void)
void
runtime·Cgocalls(int64 ret)
{
- ret = ncgocall;
+ M *m;
+
+ ret = 0;
+ for(m=runtime·atomicloadp(&runtime·allm); m; m=m->alllink)
+ ret += m->ncgocall;
FLUSH(&ret);
}
diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c
index f94c3ef40..b77e51b60 100644
--- a/src/pkg/runtime/chan.c
+++ b/src/pkg/runtime/chan.c
@@ -6,6 +6,7 @@
#include "type.h"
#define MAXALIGN 7
+#define NOSELGEN 1
static int32 debug = 0;
@@ -18,10 +19,8 @@ struct SudoG
{
G* g; // g and selgen constitute
uint32 selgen; // a weak pointer to g
- int16 offset; // offset of case number
- int8 isfree; // offset of case number
SudoG* link;
- byte elem[8]; // synch data element (+ more)
+ byte* elem; // data element
};
struct WaitQ
@@ -38,11 +37,10 @@ struct Hchan
bool closed;
uint8 elemalign;
Alg* elemalg; // interface for element type
- uint32 sendx; // send index
- uint32 recvx; // receive index
+ uint32 sendx; // send index
+ uint32 recvx; // receive index
WaitQ recvq; // list of recv waiters
WaitQ sendq; // list of send waiters
- SudoG* free; // freelist
Lock;
};
@@ -60,34 +58,26 @@ enum
struct Scase
{
+ SudoG sg; // must be first member (cast to Scase)
Hchan* chan; // chan
byte* pc; // return pc
uint16 kind;
uint16 so; // vararg of selected bool
- union {
- byte elem[2*sizeof(void*)]; // element (send)
- struct {
- byte* elemp; // pointer to element (recv)
- bool* receivedp; // pointer to received bool (recv2)
- } recv;
- } u;
+ bool* receivedp; // pointer to received bool (recv2)
};
struct Select
{
uint16 tcase; // total count of scase[]
uint16 ncase; // currently filled scase[]
- Select* link; // for freelist
- uint16* order;
- Scase* scase[1]; // one per case
+ uint16* pollorder; // case poll order
+ Hchan** lockorder; // channel lock order
+ Scase scase[1]; // one per case (in order of appearance)
};
-static void dequeueg(WaitQ*, Hchan*);
-static SudoG* dequeue(WaitQ*, Hchan*);
+static void dequeueg(WaitQ*);
+static SudoG* dequeue(WaitQ*);
static void enqueue(WaitQ*, SudoG*);
-static SudoG* allocsg(Hchan*);
-static void freesg(Hchan*, SudoG*);
-static uint32 fastrandn(uint32);
static void destroychan(Hchan*);
Hchan*
@@ -97,7 +87,7 @@ runtime·makechan_c(Type *elem, int64 hint)
int32 n;
byte *by;
- if(hint < 0 || (int32)hint != hint || hint > ((uintptr)-1) / elem->size)
+ if(hint < 0 || (int32)hint != hint || (elem->size > 0 && hint > ((uintptr)-1) / elem->size))
runtime·panicstring("makechan: size out of range");
if(elem->alg >= nelem(runtime·algarray)) {
@@ -170,6 +160,7 @@ void
runtime·chansend(Hchan *c, byte *ep, bool *pres)
{
SudoG *sg;
+ SudoG mysg;
G* gp;
if(c == nil)
@@ -185,21 +176,20 @@ runtime·chansend(Hchan *c, byte *ep, bool *pres)
}
runtime·lock(c);
-loop:
if(c->closed)
goto closed;
if(c->dataqsiz > 0)
goto asynch;
- sg = dequeue(&c->recvq, c);
+ sg = dequeue(&c->recvq);
if(sg != nil) {
- if(ep != nil)
- c->elemalg->copy(c->elemsize, sg->elem, ep);
-
+ runtime·unlock(c);
+
gp = sg->g;
gp->param = sg;
- runtime·unlock(c);
+ if(sg->elem != nil)
+ c->elemalg->copy(c->elemsize, sg->elem, ep);
runtime·ready(gp);
if(pres != nil)
@@ -213,21 +203,22 @@ loop:
return;
}
- sg = allocsg(c);
- if(ep != nil)
- c->elemalg->copy(c->elemsize, sg->elem, ep);
+ mysg.elem = ep;
+ mysg.g = g;
+ mysg.selgen = NOSELGEN;
g->param = nil;
g->status = Gwaiting;
- enqueue(&c->sendq, sg);
+ enqueue(&c->sendq, &mysg);
runtime·unlock(c);
runtime·gosched();
- runtime·lock(c);
- sg = g->param;
- if(sg == nil)
- goto loop;
- freesg(c, sg);
- runtime·unlock(c);
+ if(g->param == nil) {
+ runtime·lock(c);
+ if(!c->closed)
+ runtime·throw("chansend: spurious wakeup");
+ goto closed;
+ }
+
return;
asynch:
@@ -240,25 +231,25 @@ asynch:
*pres = false;
return;
}
- sg = allocsg(c);
+ mysg.g = g;
+ mysg.elem = nil;
+ mysg.selgen = NOSELGEN;
g->status = Gwaiting;
- enqueue(&c->sendq, sg);
+ enqueue(&c->sendq, &mysg);
runtime·unlock(c);
runtime·gosched();
runtime·lock(c);
goto asynch;
}
- if(ep != nil)
- c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep);
+ c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep);
if(++c->sendx == c->dataqsiz)
c->sendx = 0;
c->qcount++;
- sg = dequeue(&c->recvq, c);
+ sg = dequeue(&c->recvq);
if(sg != nil) {
gp = sg->g;
- freesg(c, sg);
runtime·unlock(c);
runtime·ready(gp);
} else
@@ -277,6 +268,7 @@ void
runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received)
{
SudoG *sg;
+ SudoG mysg;
G *gp;
if(c == nil)
@@ -289,23 +281,20 @@ runtime·chanrecv(Hchan* c, byte *ep, bool *selected, bool *received)
runtime·printf("chanrecv: chan=%p\n", c);
runtime·lock(c);
-
-loop:
if(c->dataqsiz > 0)
goto asynch;
if(c->closed)
goto closed;
- sg = dequeue(&c->sendq, c);
+ sg = dequeue(&c->sendq);
if(sg != nil) {
+ runtime·unlock(c);
+
if(ep != nil)
c->elemalg->copy(c->elemsize, ep, sg->elem);
- c->elemalg->copy(c->elemsize, sg->elem, nil);
-
gp = sg->g;
gp->param = sg;
- runtime·unlock(c);
runtime·ready(gp);
if(selected != nil)
@@ -321,25 +310,24 @@ loop:
return;
}
- sg = allocsg(c);
+ mysg.elem = ep;
+ mysg.g = g;
+ mysg.selgen = NOSELGEN;
g->param = nil;
g->status = Gwaiting;
- enqueue(&c->recvq, sg);
+ enqueue(&c->recvq, &mysg);
runtime·unlock(c);
runtime·gosched();
- runtime·lock(c);
- sg = g->param;
- if(sg == nil)
- goto loop;
+ if(g->param == nil) {
+ runtime·lock(c);
+ if(!c->closed)
+ runtime·throw("chanrecv: spurious wakeup");
+ goto closed;
+ }
- if(ep != nil)
- c->elemalg->copy(c->elemsize, ep, sg->elem);
- c->elemalg->copy(c->elemsize, sg->elem, nil);
if(received != nil)
*received = true;
- freesg(c, sg);
- runtime·unlock(c);
return;
asynch:
@@ -354,9 +342,11 @@ asynch:
*received = false;
return;
}
- sg = allocsg(c);
+ mysg.g = g;
+ mysg.elem = nil;
+ mysg.selgen = NOSELGEN;
g->status = Gwaiting;
- enqueue(&c->recvq, sg);
+ enqueue(&c->recvq, &mysg);
runtime·unlock(c);
runtime·gosched();
@@ -369,10 +359,10 @@ asynch:
if(++c->recvx == c->dataqsiz)
c->recvx = 0;
c->qcount--;
- sg = dequeue(&c->sendq, c);
+
+ sg = dequeue(&c->sendq);
if(sg != nil) {
gp = sg->g;
- freesg(c, sg);
runtime·unlock(c);
runtime·ready(gp);
} else
@@ -437,7 +427,7 @@ runtime·chanrecv2(Hchan* c, ...)
o = runtime·rnd(sizeof(c), Structrnd);
ae = (byte*)&c + o;
- o = runtime·rnd(o+c->elemsize, 1);
+ o += c->elemsize;
ac = (byte*)&c + o;
runtime·chanrecv(c, ae, nil, ac);
@@ -619,57 +609,56 @@ newselect(int32 size, Select **selp)
if(size > 1)
n = size-1;
- sel = runtime·mal(sizeof(*sel) + n*sizeof(sel->scase[0]) + size*sizeof(sel->order[0]));
+ sel = runtime·mal(sizeof(*sel) +
+ n*sizeof(sel->scase[0]) +
+ size*sizeof(sel->lockorder[0]) +
+ size*sizeof(sel->pollorder[0]));
sel->tcase = size;
sel->ncase = 0;
- sel->order = (void*)(sel->scase + size);
+ sel->pollorder = (void*)(sel->scase + size);
+ sel->lockorder = (void*)(sel->pollorder + size);
*selp = sel;
+
if(debug)
runtime·printf("newselect s=%p size=%d\n", sel, size);
}
// cut in half to give stack a chance to split
-static void selectsend(Select **selp, Hchan *c, void *pc);
+static void selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so);
-// selectsend(sel *byte, hchan *chan any, elem any) (selected bool);
+// selectsend(sel *byte, hchan *chan any, elem *any) (selected bool);
#pragma textflag 7
void
-runtime·selectsend(Select *sel, Hchan *c, ...)
+runtime·selectsend(Select *sel, Hchan *c, void *elem, bool selected)
{
+ selected = false;
+ FLUSH(&selected);
+
// nil cases do not compete
if(c == nil)
return;
- selectsend(&sel, c, runtime·getcallerpc(&sel));
+ selectsend(sel, c, runtime·getcallerpc(&sel), elem, (byte*)&selected - (byte*)&sel);
}
static void
-selectsend(Select **selp, Hchan *c, void *pc)
+selectsend(Select *sel, Hchan *c, void *pc, void *elem, int32 so)
{
- int32 i, eo;
+ int32 i;
Scase *cas;
- byte *ae;
- Select *sel;
- sel = *selp;
i = sel->ncase;
if(i >= sel->tcase)
runtime·throw("selectsend: too many cases");
sel->ncase = i+1;
- cas = runtime·mal(sizeof *cas + c->elemsize - sizeof(cas->u.elem));
- sel->scase[i] = cas;
+ cas = &sel->scase[i];
cas->pc = pc;
cas->chan = c;
-
- eo = runtime·rnd(sizeof(sel), sizeof(c));
- eo = runtime·rnd(eo+sizeof(c), c->elemsize);
- cas->so = runtime·rnd(eo+c->elemsize, Structrnd);
+ cas->so = so;
cas->kind = CaseSend;
-
- ae = (byte*)selp + eo;
- c->elemalg->copy(c->elemsize, cas->u.elem, ae);
+ cas->sg.elem = elem;
if(debug)
runtime·printf("selectsend s=%p pc=%p chan=%p so=%d\n",
@@ -684,6 +673,9 @@ static void selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool*, int32
void
runtime·selectrecv(Select *sel, Hchan *c, void *elem, bool selected)
{
+ selected = false;
+ FLUSH(&selected);
+
// nil cases do not compete
if(c == nil)
return;
@@ -696,6 +688,9 @@ runtime·selectrecv(Select *sel, Hchan *c, void *elem, bool selected)
void
runtime·selectrecv2(Select *sel, Hchan *c, void *elem, bool *received, bool selected)
{
+ selected = false;
+ FLUSH(&selected);
+
// nil cases do not compete
if(c == nil)
return;
@@ -713,16 +708,14 @@ selectrecv(Select *sel, Hchan *c, void *pc, void *elem, bool *received, int32 so
if(i >= sel->tcase)
runtime·throw("selectrecv: too many cases");
sel->ncase = i+1;
- cas = runtime·mal(sizeof *cas);
- sel->scase[i] = cas;
+ cas = &sel->scase[i];
cas->pc = pc;
cas->chan = c;
cas->so = so;
cas->kind = CaseRecv;
- cas->u.recv.elemp = elem;
- cas->u.recv.receivedp = nil;
- cas->u.recv.receivedp = received;
+ cas->sg.elem = elem;
+ cas->receivedp = received;
if(debug)
runtime·printf("selectrecv s=%p pc=%p chan=%p so=%d\n",
@@ -737,6 +730,9 @@ static void selectdefault(Select*, void*, int32);
void
runtime·selectdefault(Select *sel, bool selected)
{
+ selected = false;
+ FLUSH(&selected);
+
selectdefault(sel, runtime·getcallerpc(&sel), (byte*)&selected - (byte*)&sel);
}
@@ -750,8 +746,7 @@ selectdefault(Select *sel, void *callerpc, int32 so)
if(i >= sel->tcase)
runtime·throw("selectdefault: too many cases");
sel->ncase = i+1;
- cas = runtime·mal(sizeof *cas);
- sel->scase[i] = cas;
+ cas = &sel->scase[i];
cas->pc = callerpc;
cas->chan = nil;
@@ -764,25 +759,16 @@ selectdefault(Select *sel, void *callerpc, int32 so)
}
static void
-freesel(Select *sel)
-{
- uint32 i;
-
- for(i=0; i<sel->ncase; i++)
- runtime·free(sel->scase[i]);
- runtime·free(sel);
-}
-
-static void
sellock(Select *sel)
{
uint32 i;
- Hchan *c;
+ Hchan *c, *c0;
c = nil;
for(i=0; i<sel->ncase; i++) {
- if(sel->scase[i]->chan != c) {
- c = sel->scase[i]->chan;
+ c0 = sel->lockorder[i];
+ if(c0 && c0 != c) {
+ c = sel->lockorder[i];
runtime·lock(c);
}
}
@@ -792,12 +778,13 @@ static void
selunlock(Select *sel)
{
uint32 i;
- Hchan *c;
+ Hchan *c, *c0;
c = nil;
- for(i=sel->ncase; i>0; i--) {
- if(sel->scase[i-1]->chan && sel->scase[i-1]->chan != c) {
- c = sel->scase[i-1]->chan;
+ for(i=sel->ncase; i-->0;) {
+ c0 = sel->lockorder[i];
+ if(c0 && c0 != c) {
+ c = c0;
runtime·unlock(c);
}
}
@@ -852,20 +839,20 @@ selectgo(Select **selp)
// generate permuted order
for(i=0; i<sel->ncase; i++)
- sel->order[i] = i;
+ sel->pollorder[i] = i;
for(i=1; i<sel->ncase; i++) {
- o = sel->order[i];
- j = fastrandn(i+1);
- sel->order[i] = sel->order[j];
- sel->order[j] = o;
+ o = sel->pollorder[i];
+ j = runtime·fastrand1()%(i+1);
+ sel->pollorder[i] = sel->pollorder[j];
+ sel->pollorder[j] = o;
}
// sort the cases by Hchan address to get the locking order.
- for(i=1; i<sel->ncase; i++) {
- cas = sel->scase[i];
- for(j=i; j>0 && sel->scase[j-1]->chan >= cas->chan; j--)
- sel->scase[j] = sel->scase[j-1];
- sel->scase[j] = cas;
+ for(i=0; i<sel->ncase; i++) {
+ c = sel->scase[i].chan;
+ for(j=i; j>0 && sel->lockorder[j-1] >= c; j--)
+ sel->lockorder[j] = sel->lockorder[j-1];
+ sel->lockorder[j] = c;
}
sellock(sel);
@@ -873,8 +860,8 @@ loop:
// pass 1 - look for something already waiting
dfl = nil;
for(i=0; i<sel->ncase; i++) {
- o = sel->order[i];
- cas = sel->scase[o];
+ o = sel->pollorder[i];
+ cas = &sel->scase[o];
c = cas->chan;
switch(cas->kind) {
@@ -883,7 +870,7 @@ loop:
if(c->qcount > 0)
goto asyncrecv;
} else {
- sg = dequeue(&c->sendq, c);
+ sg = dequeue(&c->sendq);
if(sg != nil)
goto syncrecv;
}
@@ -898,7 +885,7 @@ loop:
if(c->qcount < c->dataqsiz)
goto asyncsend;
} else {
- sg = dequeue(&c->recvq, c);
+ sg = dequeue(&c->recvq);
if(sg != nil)
goto syncsend;
}
@@ -911,6 +898,7 @@ loop:
}
if(dfl != nil) {
+ selunlock(sel);
cas = dfl;
goto retc;
}
@@ -918,11 +906,11 @@ loop:
// pass 2 - enqueue on all chans
for(i=0; i<sel->ncase; i++) {
- o = sel->order[i];
- cas = sel->scase[o];
+ cas = &sel->scase[i];
c = cas->chan;
- sg = allocsg(c);
- sg->offset = o;
+ sg = &cas->sg;
+ sg->g = g;
+ sg->selgen = g->selgen;
switch(cas->kind) {
case CaseRecv:
@@ -930,8 +918,6 @@ loop:
break;
case CaseSend:
- if(c->dataqsiz == 0)
- c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem);
enqueue(&c->sendq, sg);
break;
}
@@ -948,85 +934,82 @@ loop:
// pass 3 - dequeue from unsuccessful chans
// otherwise they stack up on quiet channels
for(i=0; i<sel->ncase; i++) {
- if(sg == nil || i != sg->offset) {
- cas = sel->scase[i];
+ cas = &sel->scase[i];
+ if(cas != (Scase*)sg) {
c = cas->chan;
if(cas->kind == CaseSend)
- dequeueg(&c->sendq, c);
+ dequeueg(&c->sendq);
else
- dequeueg(&c->recvq, c);
+ dequeueg(&c->recvq);
}
}
if(sg == nil)
goto loop;
- o = sg->offset;
- cas = sel->scase[o];
+ cas = (Scase*)sg;
c = cas->chan;
- if(c->dataqsiz > 0) {
-// prints("shouldnt happen\n");
- goto loop;
- }
+ if(c->dataqsiz > 0)
+ runtime·throw("selectgo: shouldnt happen");
if(debug)
- runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d o=%d\n",
- sel, c, cas, cas->kind, o);
+ runtime·printf("wait-return: sel=%p c=%p cas=%p kind=%d\n",
+ sel, c, cas, cas->kind);
if(cas->kind == CaseRecv) {
- if(cas->u.recv.receivedp != nil)
- *cas->u.recv.receivedp = true;
- if(cas->u.recv.elemp != nil)
- c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem);
- c->elemalg->copy(c->elemsize, sg->elem, nil);
+ if(cas->receivedp != nil)
+ *cas->receivedp = true;
}
- freesg(c, sg);
+ selunlock(sel);
goto retc;
asyncrecv:
// can receive from buffer
- if(cas->u.recv.receivedp != nil)
- *cas->u.recv.receivedp = true;
- if(cas->u.recv.elemp != nil)
- c->elemalg->copy(c->elemsize, cas->u.recv.elemp, chanbuf(c, c->recvx));
+ if(cas->receivedp != nil)
+ *cas->receivedp = true;
+ if(cas->sg.elem != nil)
+ c->elemalg->copy(c->elemsize, cas->sg.elem, chanbuf(c, c->recvx));
c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil);
if(++c->recvx == c->dataqsiz)
c->recvx = 0;
c->qcount--;
- sg = dequeue(&c->sendq, c);
+ sg = dequeue(&c->sendq);
if(sg != nil) {
gp = sg->g;
- freesg(c, sg);
+ selunlock(sel);
runtime·ready(gp);
+ } else {
+ selunlock(sel);
}
goto retc;
asyncsend:
// can send to buffer
- if(cas->u.elem != nil)
- c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->u.elem);
+ c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->sg.elem);
if(++c->sendx == c->dataqsiz)
c->sendx = 0;
c->qcount++;
- sg = dequeue(&c->recvq, c);
+ sg = dequeue(&c->recvq);
if(sg != nil) {
gp = sg->g;
- freesg(c, sg);
+ selunlock(sel);
runtime·ready(gp);
+ } else {
+ selunlock(sel);
}
goto retc;
syncrecv:
// can receive from sleeping sender (sg)
+ selunlock(sel);
if(debug)
runtime·printf("syncrecv: sel=%p c=%p o=%d\n", sel, c, o);
- if(cas->u.recv.receivedp != nil)
- *cas->u.recv.receivedp = true;
- if(cas->u.recv.elemp != nil)
- c->elemalg->copy(c->elemsize, cas->u.recv.elemp, sg->elem);
- c->elemalg->copy(c->elemsize, sg->elem, nil);
+ if(cas->receivedp != nil)
+ *cas->receivedp = true;
+ if(cas->sg.elem != nil)
+ c->elemalg->copy(c->elemsize, cas->sg.elem, sg->elem);
gp = sg->g;
gp->param = sg;
runtime·ready(gp);
@@ -1034,30 +1017,28 @@ syncrecv:
rclose:
// read at end of closed channel
- if(cas->u.recv.receivedp != nil)
- *cas->u.recv.receivedp = false;
- if(cas->u.recv.elemp != nil)
- c->elemalg->copy(c->elemsize, cas->u.recv.elemp, nil);
+ selunlock(sel);
+ if(cas->receivedp != nil)
+ *cas->receivedp = false;
+ if(cas->sg.elem != nil)
+ c->elemalg->copy(c->elemsize, cas->sg.elem, nil);
goto retc;
syncsend:
// can send to sleeping receiver (sg)
+ selunlock(sel);
if(debug)
runtime·printf("syncsend: sel=%p c=%p o=%d\n", sel, c, o);
- if(c->closed)
- goto sclose;
- c->elemalg->copy(c->elemsize, sg->elem, cas->u.elem);
+ c->elemalg->copy(c->elemsize, sg->elem, cas->sg.elem);
gp = sg->g;
gp->param = sg;
runtime·ready(gp);
retc:
- selunlock(sel);
-
// return to pc corresponding to chosen case
pc = cas->pc;
as = (byte*)selp + cas->so;
- freesel(sel);
+ runtime·free(sel);
*as = true;
return pc;
@@ -1088,23 +1069,21 @@ runtime·closechan(Hchan *c)
// release all readers
for(;;) {
- sg = dequeue(&c->recvq, c);
+ sg = dequeue(&c->recvq);
if(sg == nil)
break;
gp = sg->g;
gp->param = nil;
- freesg(c, sg);
runtime·ready(gp);
}
// release all writers
for(;;) {
- sg = dequeue(&c->sendq, c);
+ sg = dequeue(&c->sendq);
if(sg == nil)
break;
gp = sg->g;
gp->param = nil;
- freesg(c, sg);
runtime·ready(gp);
}
@@ -1144,7 +1123,7 @@ reflect·chancap(Hchan *c, int32 cap)
}
static SudoG*
-dequeue(WaitQ *q, Hchan *c)
+dequeue(WaitQ *q)
{
SudoG *sgp;
@@ -1155,9 +1134,10 @@ loop:
q->first = sgp->link;
// if sgp is stale, ignore it
- if(!runtime·cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 1)) {
+ if(sgp->selgen != NOSELGEN &&
+ (sgp->selgen != sgp->g->selgen ||
+ !runtime·cas(&sgp->g->selgen, sgp->selgen, sgp->selgen + 2))) {
//prints("INVALID PSEUDOG POINTER\n");
- freesg(c, sgp);
goto loop;
}
@@ -1165,14 +1145,16 @@ loop:
}
static void
-dequeueg(WaitQ *q, Hchan *c)
+dequeueg(WaitQ *q)
{
- SudoG **l, *sgp;
-
- for(l=&q->first; (sgp=*l) != nil; l=&sgp->link) {
+ SudoG **l, *sgp, *prevsgp;
+
+ prevsgp = nil;
+ for(l=&q->first; (sgp=*l) != nil; l=&sgp->link, prevsgp=sgp) {
if(sgp->g == g) {
*l = sgp->link;
- freesg(c, sgp);
+ if(q->last == sgp)
+ q->last = prevsgp;
break;
}
}
@@ -1190,62 +1172,3 @@ enqueue(WaitQ *q, SudoG *sgp)
q->last->link = sgp;
q->last = sgp;
}
-
-static SudoG*
-allocsg(Hchan *c)
-{
- SudoG* sg;
-
- sg = c->free;
- if(sg != nil) {
- c->free = sg->link;
- } else
- sg = runtime·mal(sizeof(*sg) + c->elemsize - sizeof(sg->elem));
- sg->selgen = g->selgen;
- sg->g = g;
- sg->offset = 0;
- sg->isfree = 0;
-
- return sg;
-}
-
-static void
-freesg(Hchan *c, SudoG *sg)
-{
- if(sg != nil) {
- if(sg->isfree)
- runtime·throw("chan.freesg: already free");
- sg->isfree = 1;
- sg->link = c->free;
- c->free = sg;
- }
-}
-
-static uint32
-fastrand1(void)
-{
- static uint32 x = 0x49f6428aUL;
-
- x += x;
- if(x & 0x80000000L)
- x ^= 0x88888eefUL;
- return x;
-}
-
-static uint32
-fastrandn(uint32 n)
-{
- uint32 max, r;
-
- if(n <= 1)
- return 0;
-
- r = fastrand1();
- if(r < (1ULL<<31)-n) // avoid computing max in common case
- return r%n;
-
- max = (1ULL<<31)/n * n;
- while(r >= max)
- r = fastrand1();
- return r%n;
-}
diff --git a/src/pkg/runtime/chan_test.go b/src/pkg/runtime/chan_test.go
new file mode 100644
index 000000000..c5ffe93ac
--- /dev/null
+++ b/src/pkg/runtime/chan_test.go
@@ -0,0 +1,267 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "runtime"
+ "sync/atomic"
+ "testing"
+)
+
+func TestChanSendInterface(t *testing.T) {
+ type mt struct{}
+ m := &mt{}
+ c := make(chan interface{}, 1)
+ c <- m
+ select {
+ case c <- m:
+ default:
+ }
+ select {
+ case c <- m:
+ case c <- &mt{}:
+ default:
+ }
+}
+
+func BenchmarkSelectUncontended(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ myc1 := make(chan int, 1)
+ myc2 := make(chan int, 1)
+ myc1 <- 0
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ select {
+ case <-myc1:
+ myc2 <- 0
+ case <-myc2:
+ myc1 <- 0
+ }
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkSelectContended(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ myc1 := make(chan int, procs)
+ myc2 := make(chan int, procs)
+ for p := 0; p < procs; p++ {
+ myc1 <- 0
+ go func() {
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ select {
+ case <-myc1:
+ myc2 <- 0
+ case <-myc2:
+ myc1 <- 0
+ }
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkSelectNonblock(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ myc1 := make(chan int)
+ myc2 := make(chan int)
+ myc3 := make(chan int, 1)
+ myc4 := make(chan int, 1)
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ select {
+ case <-myc1:
+ default:
+ }
+ select {
+ case myc2 <- 0:
+ default:
+ }
+ select {
+ case <-myc3:
+ default:
+ }
+ select {
+ case myc4 <- 0:
+ default:
+ }
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkChanUncontended(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ myc := make(chan int, CallsPerSched)
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ myc <- 0
+ }
+ for g := 0; g < CallsPerSched; g++ {
+ <-myc
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkChanContended(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ myc := make(chan int, procs*CallsPerSched)
+ for p := 0; p < procs; p++ {
+ go func() {
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ myc <- 0
+ }
+ for g := 0; g < CallsPerSched; g++ {
+ <-myc
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkChanSync(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := 2
+ N := int32(b.N / CallsPerSched / procs * procs)
+ c := make(chan bool, procs)
+ myc := make(chan int)
+ for p := 0; p < procs; p++ {
+ go func() {
+ for {
+ i := atomic.AddInt32(&N, -1)
+ if i < 0 {
+ break
+ }
+ for g := 0; g < CallsPerSched; g++ {
+ if i%2 == 0 {
+ <-myc
+ myc <- 0
+ } else {
+ myc <- 0
+ <-myc
+ }
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func benchmarkChanProdCons(b *testing.B, chanSize, localWork int) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, 2*procs)
+ myc := make(chan int, chanSize)
+ for p := 0; p < procs; p++ {
+ go func() {
+ foo := 0
+ for atomic.AddInt32(&N, -1) >= 0 {
+ for g := 0; g < CallsPerSched; g++ {
+ for i := 0; i < localWork; i++ {
+ foo *= 2
+ foo /= 2
+ }
+ myc <- 1
+ }
+ }
+ myc <- 0
+ c <- foo == 42
+ }()
+ go func() {
+ foo := 0
+ for {
+ v := <-myc
+ if v == 0 {
+ break
+ }
+ for i := 0; i < localWork; i++ {
+ foo *= 2
+ foo /= 2
+ }
+ }
+ c <- foo == 42
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ <-c
+ }
+}
+
+func BenchmarkChanProdCons0(b *testing.B) {
+ benchmarkChanProdCons(b, 0, 0)
+}
+
+func BenchmarkChanProdCons10(b *testing.B) {
+ benchmarkChanProdCons(b, 10, 0)
+}
+
+func BenchmarkChanProdCons100(b *testing.B) {
+ benchmarkChanProdCons(b, 100, 0)
+}
+
+func BenchmarkChanProdConsWork0(b *testing.B) {
+ benchmarkChanProdCons(b, 0, 100)
+}
+
+func BenchmarkChanProdConsWork10(b *testing.B) {
+ benchmarkChanProdCons(b, 10, 100)
+}
+
+func BenchmarkChanProdConsWork100(b *testing.B) {
+ benchmarkChanProdCons(b, 100, 100)
+}
diff --git a/src/pkg/runtime/cpuprof.c b/src/pkg/runtime/cpuprof.c
index 6233bcb45..74b795b7e 100644
--- a/src/pkg/runtime/cpuprof.c
+++ b/src/pkg/runtime/cpuprof.c
@@ -121,6 +121,10 @@ runtime·SetCPUProfileRate(int32 hz)
{
uintptr *p;
uintptr n;
+
+ // Call findfunc now so that it won't have to
+ // build tables during the signal handler.
+ runtime·findfunc(0);
// Clamp hz to something reasonable.
if(hz < 0)
diff --git a/src/pkg/runtime/debug/stack_test.go b/src/pkg/runtime/debug/stack_test.go
index 4aeea13ff..94293bb93 100644
--- a/src/pkg/runtime/debug/stack_test.go
+++ b/src/pkg/runtime/debug/stack_test.go
@@ -23,7 +23,7 @@ func (t T) method() []byte {
Don't worry much about the base levels, but check the ones in our own package.
/Users/r/go/src/pkg/runtime/debug/stack_test.go:15 (0x13878)
- *T.ptrmethod: return Stack()
+ (*T).ptrmethod: return Stack()
/Users/r/go/src/pkg/runtime/debug/stack_test.go:18 (0x138dd)
T.method: return t.ptrmethod()
/Users/r/go/src/pkg/runtime/debug/stack_test.go:23 (0x13920)
@@ -40,7 +40,7 @@ func TestStack(t *testing.T) {
t.Fatal("too few lines")
}
check(t, lines[0], "src/pkg/runtime/debug/stack_test.go")
- check(t, lines[1], "\t*T.ptrmethod: return Stack()")
+ check(t, lines[1], "\t(*T).ptrmethod: return Stack()")
check(t, lines[2], "src/pkg/runtime/debug/stack_test.go")
check(t, lines[3], "\tT.method: return t.ptrmethod()")
check(t, lines[4], "src/pkg/runtime/debug/stack_test.go")
diff --git a/src/pkg/runtime/export_test.go b/src/pkg/runtime/export_test.go
index 58631c7b4..53c5fcba4 100644
--- a/src/pkg/runtime/export_test.go
+++ b/src/pkg/runtime/export_test.go
@@ -15,3 +15,9 @@ var F32to64 = f32to64
var Fcmp64 = fcmp64
var Fintto64 = fintto64
var F64toint = f64toint
+
+func entersyscall()
+func exitsyscall()
+
+var Entersyscall = entersyscall
+var Exitsyscall = exitsyscall
diff --git a/src/pkg/runtime/freebsd/386/signal.c b/src/pkg/runtime/freebsd/386/signal.c
index 3600f0762..2fe7ecd70 100644
--- a/src/pkg/runtime/freebsd/386/signal.c
+++ b/src/pkg/runtime/freebsd/386/signal.c
@@ -111,6 +111,8 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp)
runtime·exit(2);
}
+// Called from kernel on signal stack, so no stack split.
+#pragma textflag 7
void
runtime·sigignore(void)
{
diff --git a/src/pkg/runtime/freebsd/amd64/signal.c b/src/pkg/runtime/freebsd/amd64/signal.c
index 85cb1d855..8015e366e 100644
--- a/src/pkg/runtime/freebsd/amd64/signal.c
+++ b/src/pkg/runtime/freebsd/amd64/signal.c
@@ -119,6 +119,8 @@ runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp)
runtime·exit(2);
}
+// Called from kernel on signal stack, so no stack split.
+#pragma textflag 7
void
runtime·sigignore(void)
{
diff --git a/src/pkg/runtime/goc2c.c b/src/pkg/runtime/goc2c.c
index 826ceff3a..61236e226 100644
--- a/src/pkg/runtime/goc2c.c
+++ b/src/pkg/runtime/goc2c.c
@@ -2,26 +2,27 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-/* Translate a .goc file into a .c file. A .goc file is a combination
- of a limited form of Go with C. */
+/*
+ * Translate a .goc file into a .c file. A .goc file is a combination
+ * of a limited form of Go with C.
+ */
/*
- package PACKAGENAME
- {# line}
- func NAME([NAME TYPE { , NAME TYPE }]) [(NAME TYPE { , NAME TYPE })] \{
- C code with proper brace nesting
- \}
+ package PACKAGENAME
+ {# line}
+ func NAME([NAME TYPE { , NAME TYPE }]) [(NAME TYPE { , NAME TYPE })] \{
+ C code with proper brace nesting
+ \}
*/
-/* We generate C code which implements the function such that it can
- be called from Go and executes the C code. */
+/*
+ * We generate C code which implements the function such that it can
+ * be called from Go and executes the C code.
+ */
-#include <assert.h>
-#include <ctype.h>
+#include <u.h>
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
+#include <libc.h>
/* Whether we're emitting for gcc */
static int gcc;
@@ -88,16 +89,14 @@ int structround = 4;
static void
bad_eof(void)
{
- fprintf(stderr, "%s:%u: unexpected EOF\n", file, lineno);
- exit(1);
+ sysfatal("%s:%ud: unexpected EOF\n", file, lineno);
}
/* Out of memory. */
static void
bad_mem(void)
{
- fprintf(stderr, "%s:%u: out of memory\n", file, lineno);
- exit(1);
+ sysfatal("%s:%ud: out of memory\n", file, lineno);
}
/* Allocate memory without fail. */
@@ -196,8 +195,10 @@ getchar_skipping_comments(void)
}
}
-/* Read and return a token. Tokens are delimited by whitespace or by
- [(),{}]. The latter are all returned as single characters. */
+/*
+ * Read and return a token. Tokens are delimited by whitespace or by
+ * [(),{}]. The latter are all returned as single characters.
+ */
static char *
read_token(void)
{
@@ -259,11 +260,11 @@ read_package(void)
char *token;
token = read_token_no_eof();
+ if (token == nil)
+ sysfatal("%s:%ud: no token\n", file, lineno);
if (strcmp(token, "package") != 0) {
- fprintf(stderr,
- "%s:%u: expected \"package\", got \"%s\"\n",
+ sysfatal("%s:%ud: expected \"package\", got \"%s\"\n",
file, lineno, token);
- exit(1);
}
return read_token_no_eof();
}
@@ -290,8 +291,10 @@ read_preprocessor_lines(void)
}
}
-/* Read a type in Go syntax and return a type in C syntax. We only
- permit basic types and pointers. */
+/*
+ * Read a type in Go syntax and return a type in C syntax. We only
+ * permit basic types and pointers.
+ */
static char *
read_type(void)
{
@@ -333,13 +336,14 @@ type_size(char *p)
for(i=0; type_table[i].name; i++)
if(strcmp(type_table[i].name, p) == 0)
return type_table[i].size;
- fprintf(stderr, "%s:%u: unknown type %s\n", file, lineno, p);
- exit(1);
+ sysfatal("%s:%ud: unknown type %s\n", file, lineno, p);
return 0;
}
-/* Read a list of parameters. Each parameter is a name and a type.
- The list ends with a ')'. We have already read the '('. */
+/*
+ * Read a list of parameters. Each parameter is a name and a type.
+ * The list ends with a ')'. We have already read the '('.
+ */
static struct params *
read_params(int *poffset)
{
@@ -375,17 +379,18 @@ read_params(int *poffset)
}
}
if (strcmp(token, ")") != 0) {
- fprintf(stderr, "%s:%u: expected '('\n",
+ sysfatal("%s:%ud: expected '('\n",
file, lineno);
- exit(1);
}
if (poffset != NULL)
*poffset = offset;
return ret;
}
-/* Read a function header. This reads up to and including the initial
- '{' character. Returns 1 if it read a header, 0 at EOF. */
+/*
+ * Read a function header. This reads up to and including the initial
+ * '{' character. Returns 1 if it read a header, 0 at EOF.
+ */
static int
read_func_header(char **name, struct params **params, int *paramwid, struct params **rets)
{
@@ -416,9 +421,8 @@ read_func_header(char **name, struct params **params, int *paramwid, struct para
token = read_token();
if (token == NULL || strcmp(token, "(") != 0) {
- fprintf(stderr, "%s:%u: expected \"(\"\n",
+ sysfatal("%s:%ud: expected \"(\"\n",
file, lineno);
- exit(1);
}
*params = read_params(paramwid);
@@ -430,9 +434,8 @@ read_func_header(char **name, struct params **params, int *paramwid, struct para
token = read_token();
}
if (token == NULL || strcmp(token, "{") != 0) {
- fprintf(stderr, "%s:%u: expected \"{\"\n",
+ sysfatal("%s:%ud: expected \"{\"\n",
file, lineno);
- exit(1);
}
return 1;
}
@@ -581,8 +584,10 @@ write_func_trailer(char *package, char *name,
write_6g_func_trailer(rets);
}
-/* Read and write the body of the function, ending in an unnested }
- (which is read but not written). */
+/*
+ * Read and write the body of the function, ending in an unnested }
+ * (which is read but not written).
+ */
static void
copy_body(void)
{
@@ -669,15 +674,15 @@ process_file(void)
static void
usage(void)
{
- fprintf(stderr, "Usage: goc2c [--6g | --gc] [file]\n");
- exit(1);
+ sysfatal("Usage: goc2c [--6g | --gc] [file]\n");
}
-int
+void
main(int argc, char **argv)
{
char *goarch;
+ argv0 = argv[0];
while(argc > 1 && argv[1][0] == '-') {
if(strcmp(argv[1], "-") == 0)
break;
@@ -694,7 +699,7 @@ main(int argc, char **argv)
if(argc <= 1 || strcmp(argv[1], "-") == 0) {
file = "<stdin>";
process_file();
- return 0;
+ exits(0);
}
if(argc > 2)
@@ -702,8 +707,7 @@ main(int argc, char **argv)
file = argv[1];
if(freopen(file, "r", stdin) == 0) {
- fprintf(stderr, "open %s: %s\n", file, strerror(errno));
- exit(1);
+ sysfatal("open %s: %r\n", file);
}
if(!gcc) {
@@ -719,5 +723,5 @@ main(int argc, char **argv)
}
process_file();
- return 0;
+ exits(0);
}
diff --git a/src/pkg/runtime/hashmap.c b/src/pkg/runtime/hashmap.c
index 5ba1eb20a..179a56375 100644
--- a/src/pkg/runtime/hashmap.c
+++ b/src/pkg/runtime/hashmap.c
@@ -753,12 +753,12 @@ runtime·makemap_c(Type *key, Type *val, int64 hint)
// func(key) (val[, pres])
h->ko1 = runtime·rnd(sizeof(h), key->align);
h->vo1 = runtime·rnd(h->ko1+keysize, Structrnd);
- h->po1 = runtime·rnd(h->vo1+valsize, 1);
+ h->po1 = h->vo1 + valsize;
// func(key, val[, pres])
h->ko2 = runtime·rnd(sizeof(h), key->align);
h->vo2 = runtime·rnd(h->ko2+keysize, val->align);
- h->po2 = runtime·rnd(h->vo2+valsize, 1);
+ h->po2 = h->vo2 + valsize;
if(debug) {
runtime·printf("makemap: map=%p; keysize=%d; valsize=%d; keyalg=%d; valalg=%d; offsets=%d,%d; %d,%d,%d; %d,%d,%d\n",
diff --git a/src/pkg/runtime/hashmap.h b/src/pkg/runtime/hashmap.h
index d0fd3527f..19ff41697 100644
--- a/src/pkg/runtime/hashmap.h
+++ b/src/pkg/runtime/hashmap.h
@@ -65,7 +65,7 @@
#define malloc runtime·mal
#define memset(a,b,c) runtime·memclr((byte*)(a), (uint32)(c))
-#define memcpy(a,b,c) runtime·mcpy((byte*)(a),(byte*)(b),(uint32)(c))
+#define memcpy(a,b,c) runtime·memmove((byte*)(a),(byte*)(b),(uint32)(c))
#define assert(a) if(!(a)) runtime·throw("assert")
#define free(x) runtime·free(x)
#define memmove(a,b,c) runtime·memmove(a, b, c)
diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c
index b1015f695..000f834cf 100644
--- a/src/pkg/runtime/iface.c
+++ b/src/pkg/runtime/iface.c
@@ -81,7 +81,7 @@ itab(InterfaceType *inter, Type *type, int32 canfail)
for(locked=0; locked<2; locked++) {
if(locked)
runtime·lock(&ifacelock);
- for(m=hash[h]; m!=nil; m=m->link) {
+ for(m=runtime·atomicloadp(&hash[h]); m!=nil; m=m->link) {
if(m->inter == inter && m->type == type) {
if(m->bad) {
m = nil;
@@ -145,10 +145,11 @@ search:
}
out:
+ if(!locked)
+ runtime·panicstring("invalid itab locking");
m->link = hash[h];
- hash[h] = m;
- if(locked)
- runtime·unlock(&ifacelock);
+ runtime·atomicstorep(&hash[h], m);
+ runtime·unlock(&ifacelock);
if(m->bad)
return nil;
return m;
@@ -264,7 +265,7 @@ runtime·assertI2T2(Type *t, Iface i, ...)
ret = (byte*)(&i+1);
wid = t->size;
- ok = (bool*)(ret+runtime·rnd(wid, 1));
+ ok = (bool*)(ret + wid);
if(i.tab == nil || i.tab->type != t) {
*ok = false;
@@ -326,7 +327,7 @@ runtime·assertE2T2(Type *t, Eface e, ...)
runtime·throw("invalid interface value");
ret = (byte*)(&e+1);
wid = t->size;
- ok = (bool*)(ret+runtime·rnd(wid, 1));
+ ok = (bool*)(ret + wid);
if(t != e.type) {
*ok = false;
diff --git a/src/pkg/runtime/linux/386/defs.h b/src/pkg/runtime/linux/386/defs.h
index 6ae1c4e13..73fe23ef9 100644
--- a/src/pkg/runtime/linux/386/defs.h
+++ b/src/pkg/runtime/linux/386/defs.h
@@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_VIRTUAL = 0x1,
ITIMER_PROF = 0x2,
+ O_RDONLY = 0,
+ O_CLOEXEC = 02000000,
};
// Types
diff --git a/src/pkg/runtime/linux/386/sys.s b/src/pkg/runtime/linux/386/sys.s
index e8b423324..0b4a34986 100644
--- a/src/pkg/runtime/linux/386/sys.s
+++ b/src/pkg/runtime/linux/386/sys.s
@@ -22,9 +22,31 @@ TEXT runtime·exit1(SB),7,$0
INT $3 // not reached
RET
+TEXT runtime·open(SB),7,$0
+ MOVL $5, AX // syscall - open
+ MOVL 4(SP), BX
+ MOVL 8(SP), CX
+ MOVL 12(SP), DX
+ INT $0x80
+ RET
+
+TEXT runtime·close(SB),7,$0
+ MOVL $6, AX // syscall - close
+ MOVL 4(SP), BX
+ INT $0x80
+ RET
+
TEXT runtime·write(SB),7,$0
MOVL $4, AX // syscall - write
- MOVL 4(SP), BX
+ MOVL 4(SP), BX
+ MOVL 8(SP), CX
+ MOVL 12(SP), DX
+ INT $0x80
+ RET
+
+TEXT runtime·read(SB),7,$0
+ MOVL $3, AX // syscall - read
+ MOVL 4(SP), BX
MOVL 8(SP), CX
MOVL 12(SP), DX
INT $0x80
@@ -315,3 +337,8 @@ TEXT runtime·setldt(SB),7,$32
MOVW AX, GS
RET
+
+TEXT runtime·osyield(SB),7,$0
+ MOVL $158, AX
+ INT $0x80
+ RET
diff --git a/src/pkg/runtime/linux/amd64/defs.h b/src/pkg/runtime/linux/amd64/defs.h
index 70d63145c..8053dd16f 100644
--- a/src/pkg/runtime/linux/amd64/defs.h
+++ b/src/pkg/runtime/linux/amd64/defs.h
@@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_VIRTUAL = 0x1,
ITIMER_PROF = 0x2,
+ O_RDONLY = 0,
+ O_CLOEXEC = 02000000,
};
// Types
diff --git a/src/pkg/runtime/linux/amd64/sys.s b/src/pkg/runtime/linux/amd64/sys.s
index 66fdab208..8b4dcd921 100644
--- a/src/pkg/runtime/linux/amd64/sys.s
+++ b/src/pkg/runtime/linux/amd64/sys.s
@@ -28,6 +28,12 @@ TEXT runtime·open(SB),7,$0-16
SYSCALL
RET
+TEXT runtime·close(SB),7,$0-16
+ MOVL 8(SP), DI
+ MOVL $3, AX // syscall entry
+ SYSCALL
+ RET
+
TEXT runtime·write(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
@@ -36,6 +42,14 @@ TEXT runtime·write(SB),7,$0-24
SYSCALL
RET
+TEXT runtime·read(SB),7,$0-24
+ MOVL 8(SP), DI
+ MOVQ 16(SP), SI
+ MOVL 24(SP), DX
+ MOVL $0, AX // syscall entry
+ SYSCALL
+ RET
+
TEXT runtime·raisesigpipe(SB),7,$12
MOVL $186, AX // syscall - gettid
SYSCALL
@@ -232,3 +246,7 @@ TEXT runtime·settls(SB),7,$32
CALL runtime·notok(SB)
RET
+TEXT runtime·osyield(SB),7,$0
+ MOVL $24, AX
+ SYSCALL
+ RET
diff --git a/src/pkg/runtime/linux/arm/defs.h b/src/pkg/runtime/linux/arm/defs.h
index 6b2f22c66..09b558ed0 100644
--- a/src/pkg/runtime/linux/arm/defs.h
+++ b/src/pkg/runtime/linux/arm/defs.h
@@ -61,6 +61,8 @@ enum {
ITIMER_REAL = 0,
ITIMER_PROF = 0x2,
ITIMER_VIRTUAL = 0x1,
+ O_RDONLY = 0,
+ O_CLOEXEC = 02000000,
};
// Types
diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s
index ab5349822..8619f0945 100644
--- a/src/pkg/runtime/linux/arm/sys.s
+++ b/src/pkg/runtime/linux/arm/sys.s
@@ -15,7 +15,10 @@
#define SYS_BASE 0x0
#define SYS_exit (SYS_BASE + 1)
+#define SYS_read (SYS_BASE + 3)
#define SYS_write (SYS_BASE + 4)
+#define SYS_open (SYS_BASE + 5)
+#define SYS_close (SYS_BASE + 6)
#define SYS_gettimeofday (SYS_BASE + 78)
#define SYS_clone (SYS_BASE + 120)
#define SYS_rt_sigreturn (SYS_BASE + 173)
@@ -29,10 +32,25 @@
#define SYS_mincore (SYS_BASE + 219)
#define SYS_gettid (SYS_BASE + 224)
#define SYS_tkill (SYS_BASE + 238)
+#define SYS_sched_yield (SYS_BASE + 158)
#define ARM_BASE (SYS_BASE + 0x0f0000)
#define SYS_ARM_cacheflush (ARM_BASE + 2)
+TEXT runtime·open(SB),7,$0
+ MOVW 0(FP), R0
+ MOVW 4(FP), R1
+ MOVW 8(FP), R2
+ MOVW $SYS_open, R7
+ SWI $0
+ RET
+
+TEXT runtime·close(SB),7,$0
+ MOVW 0(FP), R0
+ MOVW $SYS_close, R7
+ SWI $0
+ RET
+
TEXT runtime·write(SB),7,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
@@ -41,6 +59,14 @@ TEXT runtime·write(SB),7,$0
SWI $0
RET
+TEXT runtime·read(SB),7,$0
+ MOVW 0(FP), R0
+ MOVW 4(FP), R1
+ MOVW 8(FP), R2
+ MOVW $SYS_read, R7
+ SWI $0
+ RET
+
TEXT runtime·exit(SB),7,$-4
MOVW 0(FP), R0
MOVW $SYS_exit_group, R7
@@ -287,3 +313,7 @@ cascheck:
TEXT runtime·casp(SB),7,$0
B runtime·cas(SB)
+TEXT runtime·osyield(SB),7,$0
+ MOVW $SYS_sched_yield, R7
+ SWI $0
+ RET
diff --git a/src/pkg/runtime/linux/thread.c b/src/pkg/runtime/linux/thread.c
index 7c7ca7b4e..8efba2b98 100644
--- a/src/pkg/runtime/linux/thread.c
+++ b/src/pkg/runtime/linux/thread.c
@@ -8,6 +8,11 @@
#include "stack.h"
extern SigTab runtime·sigtab[];
+static int32 proccount;
+
+int32 runtime·open(uint8*, int32, int32);
+int32 runtime·close(int32);
+int32 runtime·read(int32, void*, int32);
// Linux futex.
//
@@ -15,11 +20,19 @@ extern SigTab runtime·sigtab[];
// futexwakeup(uint32 *addr)
//
// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
-// Futexwakeup wakes up one thread sleeping on addr.
+// Futexwakeup wakes up threads sleeping on addr.
// Futexsleep is allowed to wake up spuriously.
enum
{
+ MUTEX_UNLOCKED = 0,
+ MUTEX_LOCKED = 1,
+ MUTEX_SLEEPING = 2,
+
+ ACTIVE_SPIN = 4,
+ ACTIVE_SPIN_CNT = 30,
+ PASSIVE_SPIN = 1,
+
FUTEX_WAIT = 0,
FUTEX_WAKE = 1,
@@ -52,13 +65,13 @@ futexsleep(uint32 *addr, uint32 val)
runtime·futex(addr, FUTEX_WAIT, val, &longtime, nil, 0);
}
-// If any procs are sleeping on addr, wake up at least one.
+// If any procs are sleeping on addr, wake up at most cnt.
static void
-futexwakeup(uint32 *addr)
+futexwakeup(uint32 *addr, uint32 cnt)
{
int64 ret;
- ret = runtime·futex(addr, FUTEX_WAKE, 1, nil, nil, 0);
+ ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0);
if(ret >= 0)
return;
@@ -66,70 +79,96 @@ futexwakeup(uint32 *addr)
// I don't know that futex wakeup can return
// EAGAIN or EINTR, but if it does, it would be
// safe to loop and call futex again.
-
- runtime·prints("futexwakeup addr=");
- runtime·printpointer(addr);
- runtime·prints(" returned ");
- runtime·printint(ret);
- runtime·prints("\n");
+ runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret);
*(int32*)0x1006 = 0x1006;
}
+static int32
+getproccount(void)
+{
+ int32 fd, rd, cnt, cpustrlen;
+ byte *cpustr, *pos, *bufpos;
+ byte buf[256];
+
+ fd = runtime·open((byte*)"/proc/stat", O_RDONLY|O_CLOEXEC, 0);
+ if(fd == -1)
+ return 1;
+ cnt = 0;
+ bufpos = buf;
+ cpustr = (byte*)"\ncpu";
+ cpustrlen = runtime·findnull(cpustr);
+ for(;;) {
+ rd = runtime·read(fd, bufpos, sizeof(buf)-cpustrlen);
+ if(rd == -1)
+ break;
+ bufpos[rd] = 0;
+ for(pos=buf; pos=runtime·strstr(pos, cpustr); cnt++, pos++) {
+ }
+ if(rd < cpustrlen)
+ break;
+ runtime·memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1);
+ bufpos = buf+cpustrlen-1;
+ }
+ runtime·close(fd);
+ return cnt ? cnt : 1;
+}
-// Lock and unlock.
-//
-// The lock state is a single 32-bit word that holds
-// a 31-bit count of threads waiting for the lock
-// and a single bit (the low bit) saying whether the lock is held.
-// The uncontended case runs entirely in user space.
-// When contention is detected, we defer to the kernel (futex).
-//
-// A reminder: compare-and-swap runtime·cas(addr, old, new) does
-// if(*addr == old) { *addr = new; return 1; }
-// else return 0;
-// but atomically.
-
+// Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING.
+// MUTEX_SLEEPING means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
static void
futexlock(Lock *l)
{
- uint32 v;
+ uint32 i, v, wait, spin;
-again:
- v = l->key;
- if((v&1) == 0){
- if(runtime·cas(&l->key, v, v|1)){
- // Lock wasn't held; we grabbed it.
- return;
+ // Speculative grab for lock.
+ v = runtime·xchg(&l->key, MUTEX_LOCKED);
+ if(v == MUTEX_UNLOCKED)
+ return;
+
+ // wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+ // depending on whether there is a thread sleeping
+ // on this mutex. If we ever change l->key from
+ // MUTEX_SLEEPING to some other value, we must be
+ // careful to change it back to MUTEX_SLEEPING before
+ // returning, to ensure that the sleeping thread gets
+ // its wakeup call.
+ wait = v;
+
+ if(proccount == 0)
+ proccount = getproccount();
+
+ // On uniprocessor's, no point spinning.
+ // On multiprocessors, spin for ACTIVE_SPIN attempts.
+ spin = 0;
+ if(proccount > 1)
+ spin = ACTIVE_SPIN;
+
+ for(;;) {
+ // Try for lock, spinning.
+ for(i = 0; i < spin; i++) {
+ while(l->key == MUTEX_UNLOCKED)
+ if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait))
+ return;
+ runtime·procyield(ACTIVE_SPIN_CNT);
}
- goto again;
- }
- // Lock was held; try to add ourselves to the waiter count.
- if(!runtime·cas(&l->key, v, v+2))
- goto again;
-
- // We're accounted for, now sleep in the kernel.
- //
- // We avoid the obvious lock/unlock race because
- // the kernel won't put us to sleep if l->key has
- // changed underfoot and is no longer v+2.
- //
- // We only really care that (v&1) == 1 (the lock is held),
- // and in fact there is a futex variant that could
- // accommodate that check, but let's not get carried away.)
- futexsleep(&l->key, v+2);
-
- // We're awake: remove ourselves from the count.
- for(;;){
- v = l->key;
- if(v < 2)
- runtime·throw("bad lock key");
- if(runtime·cas(&l->key, v, v-2))
- break;
- }
+ // Try for lock, rescheduling.
+ for(i=0; i < PASSIVE_SPIN; i++) {
+ while(l->key == MUTEX_UNLOCKED)
+ if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait))
+ return;
+ runtime·osyield();
+ }
- // Try for the lock again.
- goto again;
+ // Sleep.
+ v = runtime·xchg(&l->key, MUTEX_SLEEPING);
+ if(v == MUTEX_UNLOCKED)
+ return;
+ wait = MUTEX_SLEEPING;
+ futexsleep(&l->key, MUTEX_SLEEPING);
+ }
}
static void
@@ -137,34 +176,26 @@ futexunlock(Lock *l)
{
uint32 v;
- // Atomically get value and clear lock bit.
-again:
- v = l->key;
- if((v&1) == 0)
+ v = runtime·xchg(&l->key, MUTEX_UNLOCKED);
+ if(v == MUTEX_UNLOCKED)
runtime·throw("unlock of unlocked lock");
- if(!runtime·cas(&l->key, v, v&~1))
- goto again;
-
- // If there were waiters, wake one.
- if(v & ~1)
- futexwakeup(&l->key);
+ if(v == MUTEX_SLEEPING)
+ futexwakeup(&l->key, 1);
}
void
runtime·lock(Lock *l)
{
- if(m->locks < 0)
- runtime·throw("lock count");
- m->locks++;
+ if(m->locks++ < 0)
+ runtime·throw("runtime·lock: lock count");
futexlock(l);
}
void
runtime·unlock(Lock *l)
{
- m->locks--;
- if(m->locks < 0)
- runtime·throw("lock count");
+ if(--m->locks < 0)
+ runtime·throw("runtime·unlock: lock count");
futexunlock(l);
}
@@ -175,35 +206,24 @@ runtime·destroylock(Lock*)
// One-time notifications.
-//
-// Since the lock/unlock implementation already
-// takes care of sleeping in the kernel, we just reuse it.
-// (But it's a weird use, so it gets its own interface.)
-//
-// We use a lock to represent the event:
-// unlocked == event has happened.
-// Thus the lock starts out locked, and to wait for the
-// event you try to lock the lock. To signal the event,
-// you unlock the lock.
-
void
runtime·noteclear(Note *n)
{
- n->lock.key = 0; // memset(n, 0, sizeof *n)
- futexlock(&n->lock);
+ n->state = 0;
}
void
runtime·notewakeup(Note *n)
{
- futexunlock(&n->lock);
+ runtime·xchg(&n->state, 1);
+ futexwakeup(&n->state, 1<<30);
}
void
runtime·notesleep(Note *n)
{
- futexlock(&n->lock);
- futexunlock(&n->lock); // Let other sleepers find out too.
+ while(runtime·atomicload(&n->state) == 0)
+ futexsleep(&n->state, 0);
}
diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc
index 49ab24df8..b9fe36db6 100644
--- a/src/pkg/runtime/malloc.goc
+++ b/src/pkg/runtime/malloc.goc
@@ -18,21 +18,6 @@ extern MStats mstats; // defined in extern.go
extern volatile int32 runtime·MemProfileRate;
-// Same algorithm from chan.c, but a different
-// instance of the static uint32 x.
-// Not protected by a lock - let the threads use
-// the same random number if they like.
-static uint32
-fastrand1(void)
-{
- static uint32 x = 0x49f6428aUL;
-
- x += x;
- if(x & 0x80000000L)
- x ^= 0x88888eefUL;
- return x;
-}
-
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
@@ -53,18 +38,18 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
if(size == 0)
size = 1;
- mstats.nmalloc++;
+ c = m->mcache;
+ c->local_nmalloc++;
if(size <= MaxSmallSize) {
// Allocate from mcache free lists.
sizeclass = runtime·SizeToClass(size);
size = runtime·class_to_size[sizeclass];
- c = m->mcache;
v = runtime·MCache_Alloc(c, sizeclass, size, zeroed);
if(v == nil)
runtime·throw("out of memory");
- mstats.alloc += size;
- mstats.total_alloc += size;
- mstats.by_size[sizeclass].nmalloc++;
+ c->local_alloc += size;
+ c->local_total_alloc += size;
+ c->local_by_size[sizeclass].nmalloc++;
} else {
// TODO(rsc): Report tracebacks for very large allocations.
@@ -76,8 +61,8 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
if(s == nil)
runtime·throw("out of memory");
size = npages<<PageShift;
- mstats.alloc += size;
- mstats.total_alloc += size;
+ c->local_alloc += size;
+ c->local_total_alloc += size;
v = (void*)(s->start << PageShift);
// setup for mark sweep
@@ -97,7 +82,7 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
// pick next profile time
if(rate > 0x3fffffff) // make 2*rate not overflow
rate = 0x3fffffff;
- m->mcache->next_sample = fastrand1() % (2*rate);
+ m->mcache->next_sample = runtime·fastrand1() % (2*rate);
profile:
runtime·setblockspecial(v);
runtime·MProf_Malloc(v, size);
@@ -143,6 +128,7 @@ runtime·free(void *v)
// Find size class for v.
sizeclass = s->sizeclass;
+ c = m->mcache;
if(sizeclass == 0) {
// Large object.
size = s->npages<<PageShift;
@@ -154,7 +140,6 @@ runtime·free(void *v)
runtime·MHeap_Free(&runtime·mheap, s, 1);
} else {
// Small object.
- c = m->mcache;
size = runtime·class_to_size[sizeclass];
if(size > sizeof(uintptr))
((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
@@ -162,10 +147,10 @@ runtime·free(void *v)
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
runtime·markfreed(v, size);
- mstats.by_size[sizeclass].nfree++;
+ c->local_by_size[sizeclass].nfree++;
runtime·MCache_Free(c, v, sizeclass, size);
}
- mstats.alloc -= size;
+ c->local_alloc -= size;
if(prof)
runtime·MProf_Free(v, size);
m->mallocing = 0;
@@ -178,7 +163,7 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
byte *p;
MSpan *s;
- mstats.nlookup++;
+ m->mcache->local_nlookup++;
s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
if(sp)
*sp = s;
@@ -207,9 +192,10 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
}
n = runtime·class_to_size[s->sizeclass];
- i = ((byte*)v - p)/n;
- if(base)
+ if(base) {
+ i = ((byte*)v - p)/n;
*base = p + i*n;
+ }
if(size)
*size = n;
@@ -229,6 +215,29 @@ runtime·allocmcache(void)
return c;
}
+void
+runtime·purgecachedstats(M* m)
+{
+ MCache *c;
+
+ // Protected by either heap or GC lock.
+ c = m->mcache;
+ mstats.heap_alloc += c->local_cachealloc;
+ c->local_cachealloc = 0;
+ mstats.heap_objects += c->local_objects;
+ c->local_objects = 0;
+ mstats.nmalloc += c->local_nmalloc;
+ c->local_nmalloc = 0;
+ mstats.nfree += c->local_nfree;
+ c->local_nfree = 0;
+ mstats.nlookup += c->local_nlookup;
+ c->local_nlookup = 0;
+ mstats.alloc += c->local_alloc;
+ c->local_alloc= 0;
+ mstats.total_alloc += c->local_total_alloc;
+ c->local_total_alloc= 0;
+}
+
uintptr runtime·sizeof_C_MStats = sizeof(MStats);
#define MaxArena32 (2U<<30)
@@ -373,46 +382,28 @@ func new(n uint32) (ret *uint8) {
ret = runtime·mal(n);
}
-// Stack allocator uses malloc/free most of the time,
-// but if we're in the middle of malloc and need stack,
-// we have to do something else to avoid deadlock.
-// In that case, we fall back on a fixed-size free-list
-// allocator, assuming that inside malloc all the stack
-// frames are small, so that all the stack allocations
-// will be a single size, the minimum (right now, 5k).
-static struct {
- Lock;
- FixAlloc;
-} stacks;
-
-enum {
- FixedStack = StackMin,
-};
-
void*
runtime·stackalloc(uint32 n)
{
- void *v;
-
// Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547).
if(g != m->g0)
runtime·throw("stackalloc not on scheduler stack");
+ // Stack allocator uses malloc/free most of the time,
+ // but if we're in the middle of malloc and need stack,
+ // we have to do something else to avoid deadlock.
+ // In that case, we fall back on a fixed-size free-list
+ // allocator, assuming that inside malloc all the stack
+ // frames are small, so that all the stack allocations
+ // will be a single size, the minimum (right now, 5k).
if(m->mallocing || m->gcing || n == FixedStack) {
- runtime·lock(&stacks);
- if(stacks.size == 0)
- runtime·FixAlloc_Init(&stacks, n, runtime·SysAlloc, nil, nil);
- if(stacks.size != n) {
- runtime·printf("stackalloc: in malloc, size=%D want %d", (uint64)stacks.size, n);
+ if(n != FixedStack) {
+ runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n);
runtime·throw("stackalloc");
}
- v = runtime·FixAlloc_Alloc(&stacks);
- mstats.stacks_inuse = stacks.inuse;
- mstats.stacks_sys = stacks.sys;
- runtime·unlock(&stacks);
- return v;
+ return runtime·FixAlloc_Alloc(m->stackalloc);
}
return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0);
}
@@ -421,11 +412,7 @@ void
runtime·stackfree(void *v, uintptr n)
{
if(m->mallocing || m->gcing || n == FixedStack) {
- runtime·lock(&stacks);
- runtime·FixAlloc_Free(&stacks, v);
- mstats.stacks_inuse = stacks.inuse;
- mstats.stacks_sys = stacks.sys;
- runtime·unlock(&stacks);
+ runtime·FixAlloc_Free(m->stackalloc, v);
return;
}
runtime·free(v);
diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h
index 4e2794570..5bc80f4df 100644
--- a/src/pkg/runtime/malloc.h
+++ b/src/pkg/runtime/malloc.h
@@ -80,7 +80,6 @@
// This C code was written with an eye toward translating to Go
// in the future. Methods have the form Type_Method(Type *t, ...).
-typedef struct FixAlloc FixAlloc;
typedef struct MCentral MCentral;
typedef struct MHeap MHeap;
typedef struct MSpan MSpan;
@@ -186,10 +185,10 @@ void runtime·FixAlloc_Free(FixAlloc *f, void *p);
// Shared with Go: if you edit this structure, also edit extern.go.
struct MStats
{
- // General statistics. No locking; approximate.
+ // General statistics.
uint64 alloc; // bytes allocated and still in use
uint64 total_alloc; // bytes allocated (even if freed)
- uint64 sys; // bytes obtained from system (should be sum of xxx_sys below)
+ uint64 sys; // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
uint64 nlookup; // number of pointer lookups
uint64 nmalloc; // number of mallocs
uint64 nfree; // number of frees
@@ -222,7 +221,6 @@ struct MStats
bool debuggc;
// Statistics about allocation size classes.
- // No locking; approximate.
struct {
uint32 size;
uint64 nmalloc;
@@ -268,9 +266,20 @@ struct MCache
{
MCacheList list[NumSizeClasses];
uint64 size;
+ int64 local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
+ int64 local_objects; // objects allocated (or freed) from cache since last lock of heap
int64 local_alloc; // bytes allocated (or freed) since last lock of heap
- int64 local_objects; // objects allocated (or freed) since last lock of heap
+ int64 local_total_alloc; // bytes allocated (even if freed) since last lock of heap
+ int64 local_nmalloc; // number of mallocs since last lock of heap
+ int64 local_nfree; // number of frees since last lock of heap
+ int64 local_nlookup; // number of pointer lookups since last lock of heap
int32 next_sample; // trigger heap sample after allocating this many bytes
+ // Statistics about allocation size classes since last lock of heap
+ struct {
+ int64 nmalloc;
+ int64 nfree;
+ } local_by_size[NumSizeClasses];
+
};
void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
@@ -379,6 +388,7 @@ void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime·unmarkspan(void *v, uintptr size);
bool runtime·blockspecial(void*);
void runtime·setblockspecial(void*);
+void runtime·purgecachedstats(M*);
enum
{
diff --git a/src/pkg/runtime/mcache.c b/src/pkg/runtime/mcache.c
index e40621186..711e938fc 100644
--- a/src/pkg/runtime/mcache.c
+++ b/src/pkg/runtime/mcache.c
@@ -48,7 +48,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed)
v->next = nil;
}
}
- c->local_alloc += size;
+ c->local_cachealloc += size;
c->local_objects++;
return v;
}
@@ -90,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
l->list = p;
l->nlist++;
c->size += size;
- c->local_alloc -= size;
+ c->local_cachealloc -= size;
c->local_objects--;
if(l->nlist >= MaxMCacheListLen) {
diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go
index c3316d44c..93d155a7f 100644
--- a/src/pkg/runtime/mem.go
+++ b/src/pkg/runtime/mem.go
@@ -62,8 +62,13 @@ func init() {
}
// MemStats holds statistics about the memory system.
-// The statistics are only approximate, as they are not interlocked on update.
+// The statistics may be out of date, as the information is
+// updated lazily from per-thread caches.
+// Use UpdateMemStats to bring the statistics up to date.
var MemStats MemStatsType
+// UpdateMemStats brings MemStats up to date.
+func UpdateMemStats()
+
// GC runs a garbage collection.
func GC()
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index ac6a1fa40..6325aadc6 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -484,6 +484,7 @@ sweep(void)
// Mark freed; restore block boundary bit.
*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ c = m->mcache;
if(s->sizeclass == 0) {
// Free large span.
runtime·unmarkspan(p, 1<<PageShift);
@@ -491,14 +492,13 @@ sweep(void)
runtime·MHeap_Free(&runtime·mheap, s, 1);
} else {
// Free small object.
- c = m->mcache;
if(size > sizeof(uintptr))
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
- mstats.by_size[s->sizeclass].nfree++;
+ c->local_by_size[s->sizeclass].nfree++;
runtime·MCache_Free(c, p, s->sizeclass, size);
}
- mstats.alloc -= size;
- mstats.nfree++;
+ c->local_alloc -= size;
+ c->local_nfree++;
}
}
}
@@ -533,14 +533,26 @@ cachestats(void)
{
M *m;
MCache *c;
+ int32 i;
+ uint64 stacks_inuse;
+ uint64 stacks_sys;
+ stacks_inuse = 0;
+ stacks_sys = 0;
for(m=runtime·allm; m; m=m->alllink) {
+ runtime·purgecachedstats(m);
+ stacks_inuse += m->stackalloc->inuse;
+ stacks_sys += m->stackalloc->sys;
c = m->mcache;
- mstats.heap_alloc += c->local_alloc;
- c->local_alloc = 0;
- mstats.heap_objects += c->local_objects;
- c->local_objects = 0;
+ for(i=0; i<nelem(c->local_by_size); i++) {
+ mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
+ c->local_by_size[i].nmalloc = 0;
+ mstats.by_size[i].nfree += c->local_by_size[i].nfree;
+ c->local_by_size[i].nfree = 0;
+ }
}
+ mstats.stacks_inuse = stacks_inuse;
+ mstats.stacks_sys = stacks_sys;
}
void
@@ -603,6 +615,7 @@ runtime·gc(int32 force)
sweep();
t2 = runtime·nanotime();
stealcache();
+ cachestats();
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0;
@@ -650,6 +663,22 @@ runtime·gc(int32 force)
runtime·gc(1);
}
+void
+runtime·UpdateMemStats(void)
+{
+ // Have to acquire gcsema to stop the world,
+ // because stoptheworld can only be used by
+ // one goroutine at a time, and there might be
+ // a pending garbage collection already calling it.
+ runtime·semacquire(&gcsema);
+ m->gcing = 1;
+ runtime·stoptheworld();
+ cachestats();
+ m->gcing = 0;
+ runtime·semrelease(&gcsema);
+ runtime·starttheworld();
+}
+
static void
runfinq(void)
{
diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c
index dde31ce34..37d505681 100644
--- a/src/pkg/runtime/mheap.c
+++ b/src/pkg/runtime/mheap.c
@@ -57,10 +57,7 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct)
MSpan *s;
runtime·lock(h);
- mstats.heap_alloc += m->mcache->local_alloc;
- m->mcache->local_alloc = 0;
- mstats.heap_objects += m->mcache->local_objects;
- m->mcache->local_objects = 0;
+ runtime·purgecachedstats(m);
s = MHeap_AllocLocked(h, npage, sizeclass);
if(s != nil) {
mstats.heap_inuse += npage<<PageShift;
@@ -258,10 +255,7 @@ void
runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{
runtime·lock(h);
- mstats.heap_alloc += m->mcache->local_alloc;
- m->mcache->local_alloc = 0;
- mstats.heap_objects += m->mcache->local_objects;
- m->mcache->local_objects = 0;
+ runtime·purgecachedstats(m);
mstats.heap_inuse -= s->npages<<PageShift;
if(acct) {
mstats.heap_alloc -= s->npages<<PageShift;
diff --git a/src/pkg/runtime/plan9/mem.c b/src/pkg/runtime/plan9/mem.c
index 9dfdf2cc3..f795b2c01 100644
--- a/src/pkg/runtime/plan9/mem.c
+++ b/src/pkg/runtime/plan9/mem.c
@@ -8,6 +8,7 @@
extern byte end[];
static byte *bloc = { end };
+static Lock memlock;
enum
{
@@ -19,23 +20,31 @@ runtime·SysAlloc(uintptr nbytes)
{
uintptr bl;
+ runtime·lock(&memlock);
+ mstats.sys += nbytes;
// Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c
bl = ((uintptr)bloc + Round) & ~Round;
- if(runtime·brk_((void*)(bl + nbytes)) < 0)
+ if(runtime·brk_((void*)(bl + nbytes)) < 0) {
+ runtime·unlock(&memlock);
return (void*)-1;
+ }
bloc = (byte*)bl + nbytes;
+ runtime·unlock(&memlock);
return (void*)bl;
}
void
runtime·SysFree(void *v, uintptr nbytes)
{
+ runtime·lock(&memlock);
+ mstats.sys -= nbytes;
// from tiny/mem.c
// Push pointer back if this is a free
// of the most recent SysAlloc.
nbytes += (nbytes + Round) & ~Round;
if(bloc == (byte*)v+nbytes)
bloc -= nbytes;
+ runtime·unlock(&memlock);
}
void
diff --git a/src/pkg/runtime/plan9/thread.c b/src/pkg/runtime/plan9/thread.c
index ef9a23e8e..b091c5978 100644
--- a/src/pkg/runtime/plan9/thread.c
+++ b/src/pkg/runtime/plan9/thread.c
@@ -47,11 +47,11 @@ runtime·exit(int32)
pid = pid/10;
}
p = buf;
- runtime·mcpy((void*)p, (void*)"/proc/", 6);
+ runtime·memmove((void*)p, (void*)"/proc/", 6);
p += 6;
for(q--; q >= tmp;)
*p++ = *q--;
- runtime·mcpy((void*)p, (void*)"/notepg", 7);
+ runtime·memmove((void*)p, (void*)"/notepg", 7);
/* post interrupt note */
fd = runtime·open(buf, OWRITE);
@@ -167,3 +167,14 @@ os·sigpipe(void)
{
runtime·throw("too many writes on closed pipe");
}
+
+/*
+ * placeholder - once notes are implemented,
+ * a signal generating a panic must appear as
+ * a call to this function for correct handling by
+ * traceback.
+ */
+void
+runtime·sigpanic(void)
+{
+}
diff --git a/src/pkg/runtime/print.c b/src/pkg/runtime/print.c
index b8069aa39..3ce779495 100644
--- a/src/pkg/runtime/print.c
+++ b/src/pkg/runtime/print.c
@@ -320,7 +320,7 @@ runtime·printpointer(void *p)
void
runtime·printstring(String v)
{
- extern int32 runtime·maxstring;
+ extern uint32 runtime·maxstring;
if(v.len > runtime·maxstring) {
runtime·write(2, "[invalid string]", 16);
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index a8f3a796a..6d8f6990b 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -28,10 +28,10 @@ int32 runtime·gcwaiting;
// Go scheduler
//
// The go scheduler's job is to match ready-to-run goroutines (`g's)
-// with waiting-for-work schedulers (`m's). If there are ready gs
-// and no waiting ms, ready() will start a new m running in a new
-// OS thread, so that all ready gs can run simultaneously, up to a limit.
-// For now, ms never go away.
+// with waiting-for-work schedulers (`m's). If there are ready g's
+// and no waiting m's, ready() will start a new m running in a new
+// OS thread, so that all ready g's can run simultaneously, up to a limit.
+// For now, m's never go away.
//
// By default, Go keeps only one kernel thread (m) running user code
// at a single time; other threads may be blocked in the operating system.
@@ -41,10 +41,10 @@ int32 runtime·gcwaiting;
// approximation of the maximum number of cores to use.
//
// Even a program that can run without deadlock in a single process
-// might use more ms if given the chance. For example, the prime
-// sieve will use as many ms as there are primes (up to runtime·sched.mmax),
+// might use more m's if given the chance. For example, the prime
+// sieve will use as many m's as there are primes (up to runtime·sched.mmax),
// allowing different stages of the pipeline to execute in parallel.
-// We could revisit this choice, only kicking off new ms for blocking
+// We could revisit this choice, only kicking off new m's for blocking
// system calls, but that would limit the amount of parallel computation
// that go would try to do.
//
@@ -55,27 +55,75 @@ int32 runtime·gcwaiting;
struct Sched {
Lock;
- G *gfree; // available gs (status == Gdead)
+ G *gfree; // available g's (status == Gdead)
+ int32 goidgen;
- G *ghead; // gs waiting to run
+ G *ghead; // g's waiting to run
G *gtail;
- int32 gwait; // number of gs waiting to run
- int32 gcount; // number of gs that are alive
+ int32 gwait; // number of g's waiting to run
+ int32 gcount; // number of g's that are alive
+ int32 grunning; // number of g's running on cpu or in syscall
- M *mhead; // ms waiting for work
- int32 mwait; // number of ms waiting for work
- int32 mcount; // number of ms that have been created
- int32 mcpu; // number of ms executing on cpu
- int32 mcpumax; // max number of ms allowed on cpu
- int32 msyscall; // number of ms in system calls
+ M *mhead; // m's waiting for work
+ int32 mwait; // number of m's waiting for work
+ int32 mcount; // number of m's that have been created
- int32 predawn; // running initialization, don't run new gs.
+ volatile uint32 atomic; // atomic scheduling word (see below)
+
+ int32 predawn; // running initialization, don't run new g's.
int32 profilehz; // cpu profiling rate
- Note stopped; // one g can wait here for ms to stop
- int32 waitstop; // after setting this flag
+ Note stopped; // one g can set waitstop and wait here for m's to stop
+};
+
+// The atomic word in sched is an atomic uint32 that
+// holds these fields.
+//
+// [15 bits] mcpu number of m's executing on cpu
+// [15 bits] mcpumax max number of m's allowed on cpu
+// [1 bit] waitstop some g is waiting on stopped
+// [1 bit] gwaiting gwait != 0
+//
+// These fields are the information needed by entersyscall
+// and exitsyscall to decide whether to coordinate with the
+// scheduler. Packing them into a single machine word lets
+// them use a fast path with a single atomic read/write and
+// no lock/unlock. This greatly reduces contention in
+// syscall- or cgo-heavy multithreaded programs.
+//
+// Except for entersyscall and exitsyscall, the manipulations
+// to these fields only happen while holding the schedlock,
+// so the routines holding schedlock only need to worry about
+// what entersyscall and exitsyscall do, not the other routines
+// (which also use the schedlock).
+//
+// In particular, entersyscall and exitsyscall only read mcpumax,
+// waitstop, and gwaiting. They never write them. Thus, writes to those
+// fields can be done (holding schedlock) without fear of write conflicts.
+// There may still be logic conflicts: for example, the set of waitstop must
+// be conditioned on mcpu >= mcpumax or else the wait may be a
+// spurious sleep. The Promela model in proc.p verifies these accesses.
+enum {
+ mcpuWidth = 15,
+ mcpuMask = (1<<mcpuWidth) - 1,
+ mcpuShift = 0,
+ mcpumaxShift = mcpuShift + mcpuWidth,
+ waitstopShift = mcpumaxShift + mcpuWidth,
+ gwaitingShift = waitstopShift+1,
+
+ // The max value of GOMAXPROCS is constrained
+ // by the max value we can store in the bit fields
+ // of the atomic word. Reserve a few high values
+ // so that we can detect accidental decrement
+ // beyond zero.
+ maxgomaxprocs = mcpuMask - 10,
};
+#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask)
+#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask)
+#define atomic_waitstop(v) (((v)>>waitstopShift)&1)
+#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1)
+
Sched runtime·sched;
int32 runtime·gomaxprocs;
@@ -93,9 +141,25 @@ static void mput(M*); // put/get on mhead
static M* mget(G*);
static void gfput(G*); // put/get on gfree
static G* gfget(void);
-static void matchmg(void); // match ms to gs
+static void matchmg(void); // match m's to g's
static void readylocked(G*); // ready, but sched is locked
static void mnextg(M*, G*);
+static void mcommoninit(M*);
+
+void
+setmcpumax(uint32 n)
+{
+ uint32 v, w;
+
+ for(;;) {
+ v = runtime·sched.atomic;
+ w = v;
+ w &= ~(mcpuMask<<mcpumaxShift);
+ w |= n<<mcpumaxShift;
+ if(runtime·cas(&runtime·sched.atomic, v, w))
+ break;
+ }
+}
// The bootstrap sequence is:
//
@@ -115,10 +179,10 @@ runtime·schedinit(void)
int32 n;
byte *p;
- runtime·allm = m;
m->nomemprof++;
-
runtime·mallocinit();
+ mcommoninit(m);
+
runtime·goargs();
runtime·goenvs();
@@ -129,10 +193,12 @@ runtime·schedinit(void)
runtime·gomaxprocs = 1;
p = runtime·getenv("GOMAXPROCS");
- if(p != nil && (n = runtime·atoi(p)) != 0)
+ if(p != nil && (n = runtime·atoi(p)) != 0) {
+ if(n > maxgomaxprocs)
+ n = maxgomaxprocs;
runtime·gomaxprocs = n;
- runtime·sched.mcpumax = runtime·gomaxprocs;
- runtime·sched.mcount = 1;
+ }
+ setmcpumax(runtime·gomaxprocs);
runtime·sched.predawn = 1;
m->nomemprof--;
@@ -167,7 +233,7 @@ runtime·initdone(void)
mstats.enablegc = 1;
// If main·init_function started other goroutines,
- // kick off new ms to handle them, like ready
+ // kick off new m's to handle them, like ready
// would have, had it not been pre-dawn.
schedlock();
matchmg();
@@ -206,6 +272,37 @@ runtime·idlegoroutine(void)
g->idlem = m;
}
+static void
+mcommoninit(M *m)
+{
+ // Add to runtime·allm so garbage collector doesn't free m
+ // when it is just in a register or thread-local storage.
+ m->alllink = runtime·allm;
+ // runtime·Cgocalls() iterates over allm w/o schedlock,
+ // so we need to publish it safely.
+ runtime·atomicstorep(&runtime·allm, m);
+
+ m->id = runtime·sched.mcount++;
+ m->fastrand = 0x49f6428aUL + m->id;
+ m->stackalloc = runtime·malloc(sizeof(*m->stackalloc));
+ runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil);
+}
+
+// Try to increment mcpu. Report whether succeeded.
+static bool
+canaddmcpu(void)
+{
+ uint32 v;
+
+ for(;;) {
+ v = runtime·sched.atomic;
+ if(atomic_mcpu(v) >= atomic_mcpumax(v))
+ return 0;
+ if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift)))
+ return 1;
+ }
+}
+
// Put on `g' queue. Sched must be locked.
static void
gput(G *g)
@@ -213,11 +310,11 @@ gput(G *g)
M *m;
// If g is wired, hand it off directly.
- if(runtime·sched.mcpu < runtime·sched.mcpumax && (m = g->lockedm) != nil) {
+ if((m = g->lockedm) != nil && canaddmcpu()) {
mnextg(m, g);
return;
}
-
+
// If g is the idle goroutine for an m, hand it off.
if(g->idlem != nil) {
if(g->idlem->idleg != nil) {
@@ -236,7 +333,18 @@ gput(G *g)
else
runtime·sched.gtail->schedlink = g;
runtime·sched.gtail = g;
- runtime·sched.gwait++;
+
+ // increment gwait.
+ // if it transitions to nonzero, set atomic gwaiting bit.
+ if(runtime·sched.gwait++ == 0)
+ runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift);
+}
+
+// Report whether gget would return something.
+static bool
+haveg(void)
+{
+ return runtime·sched.ghead != nil || m->idleg != nil;
}
// Get from `g' queue. Sched must be locked.
@@ -250,7 +358,10 @@ gget(void)
runtime·sched.ghead = g->schedlink;
if(runtime·sched.ghead == nil)
runtime·sched.gtail = nil;
- runtime·sched.gwait--;
+ // decrement gwait.
+ // if it transitions to zero, clear atomic gwaiting bit.
+ if(--runtime·sched.gwait == 0)
+ runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift);
} else if(m->idleg != nil) {
g = m->idleg;
m->idleg = nil;
@@ -335,10 +446,11 @@ newprocreadylocked(G *g)
}
// Pass g to m for running.
+// Caller has already incremented mcpu.
static void
mnextg(M *m, G *g)
{
- runtime·sched.mcpu++;
+ runtime·sched.grunning++;
m->nextg = g;
if(m->waitnextg) {
m->waitnextg = 0;
@@ -350,18 +462,19 @@ mnextg(M *m, G *g)
// Get the next goroutine that m should run.
// Sched must be locked on entry, is unlocked on exit.
-// Makes sure that at most $GOMAXPROCS gs are
+// Makes sure that at most $GOMAXPROCS g's are
// running on cpus (not in system calls) at any given time.
static G*
nextgandunlock(void)
{
G *gp;
+ uint32 v;
- if(runtime·sched.mcpu < 0)
- runtime·throw("negative runtime·sched.mcpu");
+ if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
+ runtime·throw("negative mcpu");
- // If there is a g waiting as m->nextg,
- // mnextg took care of the runtime·sched.mcpu++.
+ // If there is a g waiting as m->nextg, the mcpu++
+ // happened before it was passed to mnextg.
if(m->nextg != nil) {
gp = m->nextg;
m->nextg = nil;
@@ -373,29 +486,62 @@ nextgandunlock(void)
// We can only run one g, and it's not available.
// Make sure some other cpu is running to handle
// the ordinary run queue.
- if(runtime·sched.gwait != 0)
+ if(runtime·sched.gwait != 0) {
matchmg();
+ // m->lockedg might have been on the queue.
+ if(m->nextg != nil) {
+ gp = m->nextg;
+ m->nextg = nil;
+ schedunlock();
+ return gp;
+ }
+ }
} else {
// Look for work on global queue.
- while(runtime·sched.mcpu < runtime·sched.mcpumax && (gp=gget()) != nil) {
+ while(haveg() && canaddmcpu()) {
+ gp = gget();
+ if(gp == nil)
+ runtime·throw("gget inconsistency");
+
if(gp->lockedm) {
mnextg(gp->lockedm, gp);
continue;
}
- runtime·sched.mcpu++; // this m will run gp
+ runtime·sched.grunning++;
schedunlock();
return gp;
}
- // Otherwise, wait on global m queue.
+
+ // The while loop ended either because the g queue is empty
+ // or because we have maxed out our m procs running go
+ // code (mcpu >= mcpumax). We need to check that
+ // concurrent actions by entersyscall/exitsyscall cannot
+ // invalidate the decision to end the loop.
+ //
+ // We hold the sched lock, so no one else is manipulating the
+ // g queue or changing mcpumax. Entersyscall can decrement
+ // mcpu, but if does so when there is something on the g queue,
+ // the gwait bit will be set, so entersyscall will take the slow path
+ // and use the sched lock. So it cannot invalidate our decision.
+ //
+ // Wait on global m queue.
mput(m);
}
- if(runtime·sched.mcpu == 0 && runtime·sched.msyscall == 0)
+
+ v = runtime·atomicload(&runtime·sched.atomic);
+ if(runtime·sched.grunning == 0)
runtime·throw("all goroutines are asleep - deadlock!");
m->nextg = nil;
m->waitnextg = 1;
runtime·noteclear(&m->havenextg);
- if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) {
- runtime·sched.waitstop = 0;
+
+ // Stoptheworld is waiting for all but its cpu to go to stop.
+ // Entersyscall might have decremented mcpu too, but if so
+ // it will see the waitstop and take the slow path.
+ // Exitsyscall never increments mcpu beyond mcpumax.
+ if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+ // set waitstop = 0 (known to be 1)
+ runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
schedunlock();
@@ -407,21 +553,34 @@ nextgandunlock(void)
return gp;
}
-// TODO(rsc): Remove. This is only temporary,
-// for the mark and sweep collector.
void
runtime·stoptheworld(void)
{
+ uint32 v;
+
schedlock();
runtime·gcwaiting = 1;
- runtime·sched.mcpumax = 1;
- while(runtime·sched.mcpu > 1) {
+
+ setmcpumax(1);
+
+ // while mcpu > 1
+ for(;;) {
+ v = runtime·sched.atomic;
+ if(atomic_mcpu(v) <= 1)
+ break;
+
// It would be unsafe for multiple threads to be using
// the stopped note at once, but there is only
- // ever one thread doing garbage collection,
- // so this is okay.
+ // ever one thread doing garbage collection.
runtime·noteclear(&runtime·sched.stopped);
- runtime·sched.waitstop = 1;
+ if(atomic_waitstop(v))
+ runtime·throw("invalid waitstop");
+
+ // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
+ // still being true.
+ if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift)))
+ continue;
+
schedunlock();
runtime·notesleep(&runtime·sched.stopped);
schedlock();
@@ -436,7 +595,7 @@ runtime·starttheworld(void)
{
schedlock();
runtime·gcwaiting = 0;
- runtime·sched.mcpumax = runtime·gomaxprocs;
+ setmcpumax(runtime·gomaxprocs);
matchmg();
schedunlock();
}
@@ -473,7 +632,7 @@ struct CgoThreadStart
void (*fn)(void);
};
-// Kick off new ms as needed (up to mcpumax).
+// Kick off new m's as needed (up to mcpumax).
// There are already `other' other cpus that will
// start looking for goroutines shortly.
// Sched is locked.
@@ -484,17 +643,17 @@ matchmg(void)
if(m->mallocing || m->gcing)
return;
- while(runtime·sched.mcpu < runtime·sched.mcpumax && (g = gget()) != nil){
- M *m;
+
+ while(haveg() && canaddmcpu()) {
+ g = gget();
+ if(g == nil)
+ runtime·throw("gget inconsistency");
// Find the m that will run g.
+ M *m;
if((m = mget(g)) == nil){
m = runtime·malloc(sizeof(M));
- // Add to runtime·allm so garbage collector doesn't free m
- // when it is just in a register or thread-local storage.
- m->alllink = runtime·allm;
- runtime·allm = m;
- m->id = runtime·sched.mcount++;
+ mcommoninit(m);
if(runtime·iscgo) {
CgoThreadStart ts;
@@ -528,6 +687,7 @@ static void
schedule(G *gp)
{
int32 hz;
+ uint32 v;
schedlock();
if(gp != nil) {
@@ -536,10 +696,13 @@ schedule(G *gp)
// Just finished running gp.
gp->m = nil;
- runtime·sched.mcpu--;
+ runtime·sched.grunning--;
+
+ // atomic { mcpu-- }
+ v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
+ if(atomic_mcpu(v) > maxgomaxprocs)
+ runtime·throw("negative mcpu in scheduler");
- if(runtime·sched.mcpu < 0)
- runtime·throw("runtime·sched.mcpu < 0 in scheduler");
switch(gp->status){
case Grunnable:
case Gdead:
@@ -574,7 +737,7 @@ schedule(G *gp)
gp->status = Grunning;
m->curg = gp;
gp->m = m;
-
+
// Check whether the profiler needs to be turned on or off.
hz = runtime·sched.profilehz;
if(m->profilehz != hz)
@@ -618,31 +781,50 @@ runtime·gosched(void)
void
runtime·entersyscall(void)
{
+ uint32 v;
+
if(runtime·sched.predawn)
return;
- schedlock();
- g->status = Gsyscall;
- runtime·sched.mcpu--;
- runtime·sched.msyscall++;
- if(runtime·sched.gwait != 0)
- matchmg();
-
- if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) {
- runtime·sched.waitstop = 0;
- runtime·notewakeup(&runtime·sched.stopped);
- }
// Leave SP around for gc and traceback.
- // Do before schedunlock so that gc
- // never sees Gsyscall with wrong stack.
runtime·gosave(&g->sched);
g->gcsp = g->sched.sp;
g->gcstack = g->stackbase;
g->gcguard = g->stackguard;
+ g->status = Gsyscall;
if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
- runtime·printf("entersyscall inconsistent %p [%p,%p]\n", g->gcsp, g->gcguard-StackGuard, g->gcstack);
+ // runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
+ // g->gcsp, g->gcguard-StackGuard, g->gcstack);
runtime·throw("entersyscall");
}
+
+ // Fast path.
+ // The slow path inside the schedlock/schedunlock will get
+ // through without stopping if it does:
+ // mcpu--
+ // gwait not true
+ // waitstop && mcpu <= mcpumax not true
+ // If we can do the same with a single atomic add,
+ // then we can skip the locks.
+ v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
+ if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
+ return;
+
+ schedlock();
+ v = runtime·atomicload(&runtime·sched.atomic);
+ if(atomic_gwaiting(v)) {
+ matchmg();
+ v = runtime·atomicload(&runtime·sched.atomic);
+ }
+ if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+ runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
+ runtime·notewakeup(&runtime·sched.stopped);
+ }
+
+ // Re-save sched in case one of the calls
+ // (notewakeup, matchmg) triggered something using it.
+ runtime·gosave(&g->sched);
+
schedunlock();
}
@@ -653,22 +835,28 @@ runtime·entersyscall(void)
void
runtime·exitsyscall(void)
{
+ uint32 v;
+
if(runtime·sched.predawn)
return;
- schedlock();
- runtime·sched.msyscall--;
- runtime·sched.mcpu++;
- // Fast path - if there's room for this m, we're done.
- if(m->profilehz == runtime·sched.profilehz && runtime·sched.mcpu <= runtime·sched.mcpumax) {
+ // Fast path.
+ // If we can do the mcpu++ bookkeeping and
+ // find that we still have mcpu <= mcpumax, then we can
+ // start executing Go code immediately, without having to
+ // schedlock/schedunlock.
+ v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift));
+ if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
// There's a cpu for us, so we can run.
g->status = Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack.
g->gcstack = nil;
- schedunlock();
return;
}
+
+ schedlock();
+
// Tell scheduler to put g back on the run queue:
// mostly equivalent to g->status = Grunning,
// but keeps the garbage collector from thinking
@@ -676,12 +864,12 @@ runtime·exitsyscall(void)
g->readyonstop = 1;
schedunlock();
- // Slow path - all the cpus are taken.
+ // All the cpus are taken.
// The scheduler will ready g and put this m to sleep.
// When the scheduler takes g away from m,
// it will undo the runtime·sched.mcpu++ above.
runtime·gosched();
-
+
// Gosched returned, so we're allowed to run now.
// Delete the gcstack information that we left for
// the garbage collector during the system call.
@@ -698,7 +886,7 @@ runtime·oldstack(void)
uint32 argsize;
byte *sp;
G *g1;
- static int32 goid;
+ int32 goid;
//printf("oldstack m->cret=%p\n", m->cret);
@@ -709,9 +897,10 @@ runtime·oldstack(void)
argsize = old.argsize;
if(argsize > 0) {
sp -= argsize;
- runtime·mcpy(top->argp, sp, argsize);
+ runtime·memmove(top->argp, sp, argsize);
}
goid = old.gobuf.g->goid; // fault if g is bad, before gogo
+ USED(goid);
if(old.free != 0)
runtime·stackfree(g1->stackguard - StackGuard, old.free);
@@ -790,7 +979,7 @@ runtime·newstack(void)
sp = (byte*)top;
if(argsize > 0) {
sp -= argsize;
- runtime·mcpy(sp, m->moreargp, argsize);
+ runtime·memmove(sp, m->moreargp, argsize);
}
if(thechar == '5') {
// caller would have saved its LR below args.
@@ -855,7 +1044,7 @@ void
runtime·newproc(int32 siz, byte* fn, ...)
{
byte *argp;
-
+
if(thechar == '5')
argp = (byte*)(&fn+2); // skip caller's saved LR
else
@@ -873,8 +1062,13 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
siz = narg + nret;
siz = (siz+7) & ~7;
- if(siz > 1024)
- runtime·throw("runtime.newproc: too many args");
+
+ // We could instead create a secondary stack frame
+ // and make it look like goexit was on the original but
+ // the call to the actual goroutine function was split.
+ // Not worth it: this is almost always an error.
+ if(siz > StackMin - 1024)
+ runtime·throw("runtime.newproc: function arguments too large for new goroutine");
schedlock();
@@ -891,7 +1085,7 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
sp = newg->stackbase;
sp -= siz;
- runtime·mcpy(sp, argp, narg);
+ runtime·memmove(sp, argp, narg);
if(thechar == '5') {
// caller's LR
sp -= sizeof(void*);
@@ -905,8 +1099,8 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
newg->gopc = (uintptr)callerpc;
runtime·sched.gcount++;
- runtime·goidgen++;
- newg->goid = runtime·goidgen;
+ runtime·sched.goidgen++;
+ newg->goid = runtime·sched.goidgen;
newprocreadylocked(newg);
schedunlock();
@@ -929,11 +1123,11 @@ runtime·deferproc(int32 siz, byte* fn, ...)
d->argp = (byte*)(&fn+2); // skip caller's saved link register
else
d->argp = (byte*)(&fn+1);
- runtime·mcpy(d->args, d->argp, d->siz);
+ runtime·memmove(d->args, d->argp, d->siz);
d->link = g->defer;
g->defer = d;
-
+
// deferproc returns 0 normally.
// a deferred func that stops a panic
// makes the deferproc return 1.
@@ -956,7 +1150,7 @@ runtime·deferreturn(uintptr arg0)
argp = (byte*)&arg0;
if(d->argp != argp)
return;
- runtime·mcpy(argp, d->args, d->siz);
+ runtime·memmove(argp, d->args, d->siz);
g->defer = d->link;
fn = d->fn;
runtime·free(d);
@@ -965,9 +1159,9 @@ runtime·deferreturn(uintptr arg0)
static void
rundefer(void)
-{
+{
Defer *d;
-
+
while((d = g->defer) != nil) {
g->defer = d->link;
reflect·call(d->fn, d->args, d->siz);
@@ -982,7 +1176,7 @@ unwindstack(G *gp, byte *sp)
{
Stktop *top;
byte *stk;
-
+
// Must be called from a different goroutine, usually m->g0.
if(g == gp)
runtime·throw("unwindstack on self");
@@ -1018,7 +1212,7 @@ printpanics(Panic *p)
}
static void recovery(G*);
-
+
void
runtime·panic(Eface e)
{
@@ -1068,7 +1262,7 @@ recovery(G *gp)
// Rewind gp's stack; we're running on m->g0's stack.
d = gp->defer;
gp->defer = d->link;
-
+
// Unwind to the stack frame with d's arguments in it.
unwindstack(gp, d->argp);
@@ -1216,25 +1410,29 @@ int32
runtime·gomaxprocsfunc(int32 n)
{
int32 ret;
+ uint32 v;
schedlock();
ret = runtime·gomaxprocs;
- if (n <= 0)
+ if(n <= 0)
n = ret;
+ if(n > maxgomaxprocs)
+ n = maxgomaxprocs;
runtime·gomaxprocs = n;
- if (runtime·gcwaiting != 0) {
- if (runtime·sched.mcpumax != 1)
- runtime·throw("invalid runtime·sched.mcpumax during gc");
+ if(runtime·gcwaiting != 0) {
+ if(atomic_mcpumax(runtime·sched.atomic) != 1)
+ runtime·throw("invalid mcpumax during gc");
schedunlock();
return ret;
}
- runtime·sched.mcpumax = n;
- // handle fewer procs?
- if(runtime·sched.mcpu > runtime·sched.mcpumax) {
+
+ setmcpumax(n);
+
+ // If there are now fewer allowed procs
+ // than procs running, stop.
+ v = runtime·atomicload(&runtime·sched.atomic);
+ if(atomic_mcpu(v) > n) {
schedunlock();
- // just give up the cpu.
- // we'll only get rescheduled once the
- // number has come down.
runtime·gosched();
return ret;
}
@@ -1301,10 +1499,10 @@ void
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
{
int32 n;
-
+
if(prof.fn == nil || prof.hz == 0)
return;
-
+
runtime·lock(&prof);
if(prof.fn == nil) {
runtime·unlock(&prof);
@@ -1339,7 +1537,7 @@ runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
runtime·lock(&runtime·sched);
runtime·sched.profilehz = hz;
runtime·unlock(&runtime·sched);
-
+
if(hz != 0)
runtime·resetcpuprofiler(hz);
}
@@ -1355,11 +1553,11 @@ os·setenv_c(String k, String v)
return;
arg[0] = runtime·malloc(k.len + 1);
- runtime·mcpy(arg[0], k.str, k.len);
+ runtime·memmove(arg[0], k.str, k.len);
arg[0][k.len] = 0;
arg[1] = runtime·malloc(v.len + 1);
- runtime·mcpy(arg[1], v.str, v.len);
+ runtime·memmove(arg[1], v.str, v.len);
arg[1][v.len] = 0;
runtime·asmcgocall(libcgo_setenv, arg);
diff --git a/src/pkg/runtime/proc.p b/src/pkg/runtime/proc.p
new file mode 100644
index 000000000..f0b46de61
--- /dev/null
+++ b/src/pkg/runtime/proc.p
@@ -0,0 +1,526 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+model for proc.c as of 2011/07/22.
+
+takes 4900 seconds to explore 1189070 states
+with G=3, var_gomaxprocs=1
+on a Core i7 L640 2.13 GHz Lenovo X201s.
+
+rm -f proc.p.trail pan.* pan
+spin -a proc.p
+gcc -DSAFETY -DREACH -DMEMLIM'='4000 -o pan pan.c
+pan -w28 -n -i -m500000
+test -f proc.p.trail && pan -r proc.p.trail
+*/
+
+/*
+ * scheduling parameters
+ */
+
+/*
+ * the number of goroutines G doubles as the maximum
+ * number of OS threads; the max is reachable when all
+ * the goroutines are blocked in system calls.
+ */
+#define G 3
+
+/*
+ * whether to allow gomaxprocs to vary during execution.
+ * enabling this checks the scheduler even when code is
+ * calling GOMAXPROCS, but it also slows down the verification
+ * by about 10x.
+ */
+#define var_gomaxprocs 1 /* allow gomaxprocs to vary */
+
+/* gomaxprocs */
+#if var_gomaxprocs
+byte gomaxprocs = 3;
+#else
+#define gomaxprocs 3
+#endif
+
+/* queue of waiting M's: sched_mhead[:mwait] */
+byte mwait;
+byte sched_mhead[G];
+
+/* garbage collector state */
+bit gc_lock, gcwaiting;
+
+/* goroutines sleeping, waiting to run */
+byte gsleep, gwait;
+
+/* scheduler state */
+bit sched_lock;
+bit sched_stopped;
+bit atomic_gwaiting, atomic_waitstop;
+byte atomic_mcpu, atomic_mcpumax;
+
+/* M struct fields - state for handing off g to m. */
+bit m_waitnextg[G];
+bit m_havenextg[G];
+bit m_nextg[G];
+
+/*
+ * opt_atomic/opt_dstep mark atomic/deterministics
+ * sequences that are marked only for reasons of
+ * optimization, not for correctness of the algorithms.
+ *
+ * in general any code that runs while holding the
+ * schedlock and does not refer to or modify the atomic_*
+ * fields can be marked atomic/dstep without affecting
+ * the usefulness of the model. since we trust the lock
+ * implementation, what we really want to test is the
+ * interleaving of the atomic fast paths with entersyscall
+ * and exitsyscall.
+ */
+#define opt_atomic atomic
+#define opt_dstep d_step
+
+/* locks */
+inline lock(x) {
+ d_step { x == 0; x = 1 }
+}
+
+inline unlock(x) {
+ d_step { assert x == 1; x = 0 }
+}
+
+/* notes */
+inline noteclear(x) {
+ x = 0
+}
+
+inline notesleep(x) {
+ x == 1
+}
+
+inline notewakeup(x) {
+ opt_dstep { assert x == 0; x = 1 }
+}
+
+/*
+ * scheduler
+ */
+inline schedlock() {
+ lock(sched_lock)
+}
+
+inline schedunlock() {
+ unlock(sched_lock)
+}
+
+/*
+ * canaddmcpu is like the C function but takes
+ * an extra argument to include in the test, to model
+ * "cannget() && canaddmcpu()" as "canaddmcpu(cangget())"
+ */
+inline canaddmcpu(g) {
+ d_step {
+ g && atomic_mcpu < atomic_mcpumax;
+ atomic_mcpu++;
+ }
+}
+
+/*
+ * gput is like the C function.
+ * instead of tracking goroutines explicitly we
+ * maintain only the count of the number of
+ * waiting goroutines.
+ */
+inline gput() {
+ /* omitted: lockedm, idlem concerns */
+ opt_dstep {
+ gwait++;
+ if
+ :: gwait == 1 ->
+ atomic_gwaiting = 1
+ :: else
+ fi
+ }
+}
+
+/*
+ * cangget is a macro so it can be passed to
+ * canaddmcpu (see above).
+ */
+#define cangget() (gwait>0)
+
+/*
+ * gget is like the C function.
+ */
+inline gget() {
+ opt_dstep {
+ assert gwait > 0;
+ gwait--;
+ if
+ :: gwait == 0 ->
+ atomic_gwaiting = 0
+ :: else
+ fi
+ }
+}
+
+/*
+ * mput is like the C function.
+ * here we do keep an explicit list of waiting M's,
+ * so that we know which ones can be awakened.
+ * we use _pid-1 because the monitor is proc 0.
+ */
+inline mput() {
+ opt_dstep {
+ sched_mhead[mwait] = _pid - 1;
+ mwait++
+ }
+}
+
+/*
+ * mnextg is like the C function mnextg(m, g).
+ * it passes an unspecified goroutine to m to start running.
+ */
+inline mnextg(m) {
+ opt_dstep {
+ m_nextg[m] = 1;
+ if
+ :: m_waitnextg[m] ->
+ m_waitnextg[m] = 0;
+ notewakeup(m_havenextg[m])
+ :: else
+ fi
+ }
+}
+
+/*
+ * mgetnextg handles the main m handoff in matchmg.
+ * it is like mget() || new M followed by mnextg(m, g),
+ * but combined to avoid a local variable.
+ * unlike the C code, a new M simply assumes it is
+ * running a g instead of using the mnextg coordination
+ * to obtain one.
+ */
+inline mgetnextg() {
+ opt_atomic {
+ if
+ :: mwait > 0 ->
+ mwait--;
+ mnextg(sched_mhead[mwait]);
+ sched_mhead[mwait] = 0;
+ :: else ->
+ run mstart();
+ fi
+ }
+}
+
+/*
+ * nextgandunlock is like the C function.
+ * it pulls a g off the queue or else waits for one.
+ */
+inline nextgandunlock() {
+ assert atomic_mcpu <= G;
+
+ if
+ :: m_nextg[_pid-1] ->
+ m_nextg[_pid-1] = 0;
+ schedunlock();
+ :: canaddmcpu(!m_nextg[_pid-1] && cangget()) ->
+ gget();
+ schedunlock();
+ :: else ->
+ opt_dstep {
+ mput();
+ m_nextg[_pid-1] = 0;
+ m_waitnextg[_pid-1] = 1;
+ noteclear(m_havenextg[_pid-1]);
+ }
+ if
+ :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
+ atomic_waitstop = 0;
+ notewakeup(sched_stopped)
+ :: else
+ fi;
+ schedunlock();
+ opt_dstep {
+ notesleep(m_havenextg[_pid-1]);
+ assert m_nextg[_pid-1];
+ m_nextg[_pid-1] = 0;
+ }
+ fi
+}
+
+/*
+ * stoptheworld is like the C function.
+ */
+inline stoptheworld() {
+ schedlock();
+ gcwaiting = 1;
+ atomic_mcpumax = 1;
+ do
+ :: d_step { atomic_mcpu > 1 ->
+ noteclear(sched_stopped);
+ assert !atomic_waitstop;
+ atomic_waitstop = 1 }
+ schedunlock();
+ notesleep(sched_stopped);
+ schedlock();
+ :: else ->
+ break
+ od;
+ schedunlock();
+}
+
+/*
+ * starttheworld is like the C function.
+ */
+inline starttheworld() {
+ schedlock();
+ gcwaiting = 0;
+ atomic_mcpumax = gomaxprocs;
+ matchmg();
+ schedunlock();
+}
+
+/*
+ * matchmg is like the C function.
+ */
+inline matchmg() {
+ do
+ :: canaddmcpu(cangget()) ->
+ gget();
+ mgetnextg();
+ :: else -> break
+ od
+}
+
+/*
+ * ready is like the C function.
+ * it puts a g on the run queue.
+ */
+inline ready() {
+ schedlock();
+ gput()
+ matchmg()
+ schedunlock()
+}
+
+/*
+ * schedule simulates the C scheduler.
+ * it assumes that there is always a goroutine
+ * running already, and the goroutine has entered
+ * the scheduler for an unspecified reason,
+ * either to yield or to block.
+ */
+inline schedule() {
+ schedlock();
+
+ mustsched = 0;
+ atomic_mcpu--;
+ assert atomic_mcpu <= G;
+ if
+ :: skip ->
+ // goroutine yields, still runnable
+ gput();
+ :: gsleep+1 < G ->
+ // goroutine goes to sleep (but there is another that can wake it)
+ gsleep++
+ fi;
+
+ // Find goroutine to run.
+ nextgandunlock()
+}
+
+/*
+ * schedpend is > 0 if a goroutine is about to committed to
+ * entering the scheduler but has not yet done so.
+ * Just as we don't test for the undesirable conditions when a
+ * goroutine is in the scheduler, we don't test for them when
+ * a goroutine will be in the scheduler shortly.
+ * Modeling this state lets us replace mcpu cas loops with
+ * simpler mcpu atomic adds.
+ */
+byte schedpend;
+
+/*
+ * entersyscall is like the C function.
+ */
+inline entersyscall() {
+ bit willsched;
+
+ /*
+ * Fast path. Check all the conditions tested during schedlock/schedunlock
+ * below, and if we can get through the whole thing without stopping, run it
+ * in one atomic cas-based step.
+ */
+ atomic {
+ atomic_mcpu--;
+ if
+ :: atomic_gwaiting ->
+ skip
+ :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
+ skip
+ :: else ->
+ goto Lreturn_entersyscall;
+ fi;
+ willsched = 1;
+ schedpend++;
+ }
+
+ /*
+ * Normal path.
+ */
+ schedlock()
+ opt_dstep {
+ if
+ :: willsched ->
+ schedpend--;
+ willsched = 0
+ :: else
+ fi
+ }
+ if
+ :: atomic_gwaiting ->
+ matchmg()
+ :: else
+ fi;
+ if
+ :: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
+ atomic_waitstop = 0;
+ notewakeup(sched_stopped)
+ :: else
+ fi;
+ schedunlock();
+Lreturn_entersyscall:
+ skip
+}
+
+/*
+ * exitsyscall is like the C function.
+ */
+inline exitsyscall() {
+ /*
+ * Fast path. If there's a cpu available, use it.
+ */
+ atomic {
+ // omitted profilehz check
+ atomic_mcpu++;
+ if
+ :: atomic_mcpu >= atomic_mcpumax ->
+ skip
+ :: else ->
+ goto Lreturn_exitsyscall
+ fi
+ }
+
+ /*
+ * Normal path.
+ */
+ schedlock();
+ d_step {
+ if
+ :: atomic_mcpu <= atomic_mcpumax ->
+ skip
+ :: else ->
+ mustsched = 1
+ fi
+ }
+ schedunlock()
+Lreturn_exitsyscall:
+ skip
+}
+
+#if var_gomaxprocs
+inline gomaxprocsfunc() {
+ schedlock();
+ opt_atomic {
+ if
+ :: gomaxprocs != 1 -> gomaxprocs = 1
+ :: gomaxprocs != 2 -> gomaxprocs = 2
+ :: gomaxprocs != 3 -> gomaxprocs = 3
+ fi;
+ }
+ if
+ :: gcwaiting != 0 ->
+ assert atomic_mcpumax == 1
+ :: else ->
+ atomic_mcpumax = gomaxprocs;
+ if
+ :: atomic_mcpu > gomaxprocs ->
+ mustsched = 1
+ :: else ->
+ matchmg()
+ fi
+ fi;
+ schedunlock();
+}
+#endif
+
+/*
+ * mstart is the entry point for a new M.
+ * our model of an M is always running some
+ * unspecified goroutine.
+ */
+proctype mstart() {
+ /*
+ * mustsched is true if the goroutine must enter the
+ * scheduler instead of continuing to execute.
+ */
+ bit mustsched;
+
+ do
+ :: skip ->
+ // goroutine reschedules.
+ schedule()
+ :: !mustsched ->
+ // goroutine does something.
+ if
+ :: skip ->
+ // goroutine executes system call
+ entersyscall();
+ exitsyscall()
+ :: atomic { gsleep > 0; gsleep-- } ->
+ // goroutine wakes another goroutine
+ ready()
+ :: lock(gc_lock) ->
+ // goroutine runs a garbage collection
+ stoptheworld();
+ starttheworld();
+ unlock(gc_lock)
+#if var_gomaxprocs
+ :: skip ->
+ // goroutine picks a new gomaxprocs
+ gomaxprocsfunc()
+#endif
+ fi
+ od;
+
+ assert 0;
+}
+
+/*
+ * monitor initializes the scheduler state
+ * and then watches for impossible conditions.
+ */
+active proctype monitor() {
+ opt_dstep {
+ byte i = 1;
+ do
+ :: i < G ->
+ gput();
+ i++
+ :: else -> break
+ od;
+ atomic_mcpu = 1;
+ atomic_mcpumax = 1;
+ }
+ run mstart();
+
+ do
+ // Should never have goroutines waiting with procs available.
+ :: !sched_lock && schedpend==0 && gwait > 0 && atomic_mcpu < atomic_mcpumax ->
+ assert 0
+ // Should never have gc waiting for stop if things have already stopped.
+ :: !sched_lock && schedpend==0 && atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
+ assert 0
+ od
+}
diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go
index cac4f9eea..32111080a 100644
--- a/src/pkg/runtime/proc_test.go
+++ b/src/pkg/runtime/proc_test.go
@@ -6,6 +6,7 @@ package runtime_test
import (
"runtime"
+ "sync/atomic"
"testing"
)
@@ -44,3 +45,81 @@ func TestStopTheWorldDeadlock(t *testing.T) {
stop <- true
runtime.GOMAXPROCS(maxprocs)
}
+
+func stackGrowthRecursive(i int) {
+ var pad [128]uint64
+ if i != 0 && pad[0] == 0 {
+ stackGrowthRecursive(i - 1)
+ }
+}
+
+func BenchmarkStackGrowth(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ for atomic.AddInt32(&N, -1) >= 0 {
+ runtime.Gosched()
+ for g := 0; g < CallsPerSched; g++ {
+ stackGrowthRecursive(10)
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkSyscall(b *testing.B) {
+ const CallsPerSched = 1000
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ for atomic.AddInt32(&N, -1) >= 0 {
+ runtime.Gosched()
+ for g := 0; g < CallsPerSched; g++ {
+ runtime.Entersyscall()
+ runtime.Exitsyscall()
+ }
+ }
+ c <- true
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
+
+func BenchmarkSyscallWork(b *testing.B) {
+ const CallsPerSched = 1000
+ const LocalWork = 100
+ procs := runtime.GOMAXPROCS(-1)
+ N := int32(b.N / CallsPerSched)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ foo := 42
+ for atomic.AddInt32(&N, -1) >= 0 {
+ runtime.Gosched()
+ for g := 0; g < CallsPerSched; g++ {
+ runtime.Entersyscall()
+ for i := 0; i < LocalWork; i++ {
+ foo *= 2
+ foo /= 2
+ }
+ runtime.Exitsyscall()
+ }
+ }
+ c <- foo == 42
+ }()
+ }
+ for p := 0; p < procs; p++ {
+ <-c
+ }
+}
diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c
index 1a3653f10..c572897d2 100644
--- a/src/pkg/runtime/runtime.c
+++ b/src/pkg/runtime/runtime.c
@@ -11,6 +11,14 @@ enum {
uint32 runtime·panicking;
+/*
+ * We assume that all architectures turn faults and the like
+ * into apparent calls to runtime.sigpanic. If we see a "call"
+ * to runtime.sigpanic, we do not back up the PC to find the
+ * line number of the CALL instruction, because there is no CALL.
+ */
+void runtime·sigpanic(void);
+
int32
runtime·gotraceback(void)
{
@@ -116,17 +124,6 @@ runtime·panicstring(int8 *s)
runtime·panic(err);
}
-void
-runtime·mcpy(byte *t, byte *f, uint32 n)
-{
- while(n > 0) {
- *t = *f;
- t++;
- f++;
- n--;
- }
-}
-
int32
runtime·mcmp(byte *s1, byte *s2, uint32 n)
{
@@ -218,20 +215,6 @@ runtime·goenvs_unix(void)
os·Envs.cap = n;
}
-// Atomic add and return new value.
-uint32
-runtime·xadd(uint32 volatile *val, int32 delta)
-{
- uint32 oval, nval;
-
- for(;;){
- oval = *val;
- nval = oval + delta;
- if(runtime·cas(val, oval, nval))
- return nval;
- }
-}
-
byte*
runtime·getenv(int8 *s)
{
@@ -406,18 +389,11 @@ memprint(uint32 s, void *a)
static void
memcopy(uint32 s, void *a, void *b)
{
- byte *ba, *bb;
- uint32 i;
-
- ba = a;
- bb = b;
- if(bb == nil) {
- for(i=0; i<s; i++)
- ba[i] = 0;
+ if(b == nil) {
+ runtime·memclr(a,s);
return;
}
- for(i=0; i<s; i++)
- ba[i] = bb[i];
+ runtime·memmove(a,b,s);
}
static uint32
@@ -551,25 +527,35 @@ runtime·nanotime(void)
void
runtime·Caller(int32 skip, uintptr retpc, String retfile, int32 retline, bool retbool)
{
- Func *f;
+ Func *f, *g;
uintptr pc;
-
- if(runtime·callers(1+skip, &retpc, 1) == 0) {
+ uintptr rpc[2];
+
+ /*
+ * Ask for two PCs: the one we were asked for
+ * and what it called, so that we can see if it
+ * "called" sigpanic.
+ */
+ retpc = 0;
+ if(runtime·callers(1+skip-1, rpc, 2) < 2) {
retfile = runtime·emptystring;
retline = 0;
retbool = false;
- } else if((f = runtime·findfunc(retpc)) == nil) {
+ } else if((f = runtime·findfunc(rpc[1])) == nil) {
retfile = runtime·emptystring;
retline = 0;
retbool = true; // have retpc at least
} else {
+ retpc = rpc[1];
retfile = f->src;
pc = retpc;
- if(pc > f->entry)
+ g = runtime·findfunc(rpc[0]);
+ if(pc > f->entry && (g == nil || g->entry != (uintptr)runtime·sigpanic))
pc--;
retline = runtime·funcline(f, pc);
retbool = true;
}
+ FLUSH(&retpc);
FLUSH(&retfile);
FLUSH(&retline);
FLUSH(&retbool);
@@ -588,3 +574,16 @@ runtime·FuncForPC(uintptr pc, void *retf)
retf = runtime·findfunc(pc);
FLUSH(&retf);
}
+
+uint32
+runtime·fastrand1(void)
+{
+ uint32 x;
+
+ x = m->fastrand;
+ x += x;
+ if(x & 0x80000000L)
+ x ^= 0x88888eefUL;
+ m->fastrand = x;
+ return x;
+}
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index ad5da0a96..44511da83 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -57,6 +57,7 @@ typedef struct String String;
typedef struct Usema Usema;
typedef struct SigTab SigTab;
typedef struct MCache MCache;
+typedef struct FixAlloc FixAlloc;
typedef struct Iface Iface;
typedef struct Itab Itab;
typedef struct Eface Eface;
@@ -130,7 +131,10 @@ struct Usema
union Note
{
struct { // Linux
- Lock lock;
+ uint32 state;
+ };
+ struct { // Windows
+ Lock lock;
};
struct { // OS X
int32 wakeup;
@@ -229,12 +233,15 @@ struct M
int32 waitnextg;
int32 dying;
int32 profilehz;
+ uint32 fastrand;
+ uint64 ncgocall;
Note havenextg;
G* nextg;
M* alllink; // on allm
M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X)
MCache *mcache;
+ FixAlloc *stackalloc;
G* lockedg;
G* idleg;
uint32 freglo[16]; // D[i] lsb and F[i]
@@ -368,7 +375,6 @@ extern Alg runtime·algarray[Amax];
extern String runtime·emptystring;
G* runtime·allg;
M* runtime·allm;
-int32 runtime·goidgen;
extern int32 runtime·gomaxprocs;
extern uint32 runtime·panicking;
extern int32 runtime·gcwaiting; // gc is waiting to run
@@ -379,6 +385,7 @@ extern bool runtime·iscgo;
* common functions and data
*/
int32 runtime·strcmp(byte*, byte*);
+byte* runtime·strstr(byte*, byte*);
int32 runtime·findnull(byte*);
int32 runtime·findnullw(uint16*);
void runtime·dump(byte*, int32);
@@ -404,13 +411,13 @@ uint32 runtime·rnd(uint32, uint32);
void runtime·prints(int8*);
void runtime·printf(int8*, ...);
byte* runtime·mchr(byte*, byte, byte*);
-void runtime·mcpy(byte*, byte*, uint32);
int32 runtime·mcmp(byte*, byte*, uint32);
void runtime·memmove(void*, void*, uint32);
void* runtime·mal(uintptr);
String runtime·catstring(String, String);
String runtime·gostring(byte*);
String runtime·gostringn(byte*, int32);
+Slice runtime·gobytes(byte*, int32);
String runtime·gostringnocopy(byte*);
String runtime·gostringw(uint16*);
void runtime·initsig(int32);
@@ -424,7 +431,11 @@ bool runtime·casp(void**, void*, void*);
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32);
-uint32 runtime·atomicload(uint32 volatile*);
+uint32 runtime·xchg(uint32 volatile*, uint32);
+uint32 runtime·atomicload(uint32 volatile*);
+void runtime·atomicstore(uint32 volatile*, uint32);
+void* runtime·atomicloadp(void* volatile*);
+void runtime·atomicstorep(void* volatile*, void*);
void runtime·jmpdefer(byte*, void*);
void runtime·exit1(int32);
void runtime·ready(G*);
@@ -454,6 +465,7 @@ void runtime·runpanic(Panic*);
void* runtime·getcallersp(void*);
int32 runtime·mcount(void);
void runtime·mcall(void(*)(G*));
+uint32 runtime·fastrand1(void);
void runtime·exit(int32);
void runtime·breakpoint(void);
@@ -590,6 +602,8 @@ void runtime·semacquire(uint32*);
void runtime·semrelease(uint32*);
String runtime·signame(int32 sig);
int32 runtime·gomaxprocsfunc(int32 n);
+void runtime·procyield(uint32);
+void runtime·osyield(void);
void runtime·mapassign(Hmap*, byte*, byte*);
void runtime·mapaccess(Hmap*, byte*, byte*, bool*);
diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c
index 9146c177f..70534279b 100644
--- a/src/pkg/runtime/slice.c
+++ b/src/pkg/runtime/slice.c
@@ -20,7 +20,7 @@ runtime·makeslice(SliceType *t, int64 len, int64 cap, Slice ret)
{
if(len < 0 || (int32)len != len)
runtime·panicstring("makeslice: len out of range");
- if(cap < len || (int32)cap != cap || cap > ((uintptr)-1) / t->elem->size)
+ if(cap < len || (int32)cap != cap || t->elem->size > 0 && cap > ((uintptr)-1) / t->elem->size)
runtime·panicstring("makeslice: cap out of range");
makeslice1(t, len, cap, &ret);
diff --git a/src/pkg/runtime/stack.h b/src/pkg/runtime/stack.h
index 2b6b0e387..44d5533f4 100644
--- a/src/pkg/runtime/stack.h
+++ b/src/pkg/runtime/stack.h
@@ -71,6 +71,7 @@ enum {
// If the amount needed for the splitting frame + StackExtra
// is less than this number, the stack will have this size instead.
StackMin = 4096,
+ FixedStack = StackMin + StackSystem,
// Functions that need frames bigger than this call morestack
// unconditionally. That is, on entry to a function it is assumed
diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc
index b72aa937c..48bf3183b 100644
--- a/src/pkg/runtime/string.goc
+++ b/src/pkg/runtime/string.goc
@@ -32,19 +32,23 @@ runtime·findnullw(uint16 *s)
return l;
}
-int32 runtime·maxstring = 256;
+uint32 runtime·maxstring = 256;
String
runtime·gostringsize(int32 l)
{
String s;
+ uint32 ms;
if(l == 0)
return runtime·emptystring;
s.str = runtime·mal(l+1); // leave room for NUL for C runtime (e.g., callers of getenv)
s.len = l;
- if(l > runtime·maxstring)
- runtime·maxstring = l;
+ for(;;) {
+ ms = runtime·maxstring;
+ if((uint32)l <= ms || runtime·cas(&runtime·maxstring, ms, (uint32)l))
+ break;
+ }
return s;
}
@@ -56,7 +60,7 @@ runtime·gostring(byte *str)
l = runtime·findnull(str);
s = runtime·gostringsize(l);
- runtime·mcpy(s.str, str, l);
+ runtime·memmove(s.str, str, l);
return s;
}
@@ -66,10 +70,20 @@ runtime·gostringn(byte *str, int32 l)
String s;
s = runtime·gostringsize(l);
- runtime·mcpy(s.str, str, l);
+ runtime·memmove(s.str, str, l);
return s;
}
+Slice
+runtime·gobytes(byte *p, int32 n)
+{
+ Slice sl;
+
+ sl.array = runtime·mallocgc(n, FlagNoPointers, 1, 0);
+ runtime·memmove(sl.array, p, n);
+ return sl;
+}
+
String
runtime·gostringnocopy(byte *str)
{
@@ -109,8 +123,8 @@ runtime·catstring(String s1, String s2)
return s1;
s3 = runtime·gostringsize(s1.len + s2.len);
- runtime·mcpy(s3.str, s1.str, s1.len);
- runtime·mcpy(s3.str+s1.len, s2.str, s2.len);
+ runtime·memmove(s3.str, s1.str, s1.len);
+ runtime·memmove(s3.str+s1.len, s2.str, s2.len);
return s3;
}
@@ -130,7 +144,7 @@ concatstring(int32 n, String *s)
out = runtime·gostringsize(l);
l = 0;
for(i=0; i<n; i++) {
- runtime·mcpy(out.str+l, s[i].str, s[i].len);
+ runtime·memmove(out.str+l, s[i].str, s[i].len);
l += s[i].len;
}
return out;
@@ -189,6 +203,28 @@ runtime·strcmp(byte *s1, byte *s2)
}
}
+byte*
+runtime·strstr(byte *s1, byte *s2)
+{
+ byte *sp1, *sp2;
+
+ if(*s2 == 0)
+ return s1;
+ for(; *s1; s1++) {
+ if(*s1 != *s2)
+ continue;
+ sp1 = s1;
+ sp2 = s2;
+ for(;;) {
+ if(*sp2 == 0)
+ return s1;
+ if(*sp1++ != *sp2++)
+ break;
+ }
+ }
+ return nil;
+}
+
func slicestring(si String, lindex int32, hindex int32) (so String) {
int32 l;
@@ -221,14 +257,14 @@ func intstring(v int64) (s String) {
func slicebytetostring(b Slice) (s String) {
s = runtime·gostringsize(b.len);
- runtime·mcpy(s.str, b.array, s.len);
+ runtime·memmove(s.str, b.array, s.len);
}
func stringtoslicebyte(s String) (b Slice) {
b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1);
b.len = s.len;
b.cap = s.len;
- runtime·mcpy(b.array, s.str, s.len);
+ runtime·memmove(b.array, s.str, s.len);
}
func sliceinttostring(b Slice) (s String) {
diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c
index da4579734..d2ebf9b40 100644
--- a/src/pkg/runtime/symtab.c
+++ b/src/pkg/runtime/symtab.c
@@ -78,6 +78,7 @@ static int32 nfunc;
static byte **fname;
static int32 nfname;
+static uint32 funcinit;
static Lock funclock;
static void
@@ -159,7 +160,7 @@ makepath(byte *buf, int32 nbuf, byte *path)
break;
if(p > buf && p[-1] != '/')
*p++ = '/';
- runtime·mcpy(p, q, len+1);
+ runtime·memmove(p, q, len+1);
p += len;
}
}
@@ -420,10 +421,21 @@ runtime·findfunc(uintptr addr)
Func *f;
int32 nf, n;
- runtime·lock(&funclock);
- if(func == nil)
- buildfuncs();
- runtime·unlock(&funclock);
+ // Use atomic double-checked locking,
+ // because when called from pprof signal
+ // handler, findfunc must run without
+ // grabbing any locks.
+ // (Before enabling the signal handler,
+ // SetCPUProfileRate calls findfunc to trigger
+ // the initialization outside the handler.)
+ if(runtime·atomicload(&funcinit) == 0) {
+ runtime·lock(&funclock);
+ if(funcinit == 0) {
+ buildfuncs();
+ runtime·atomicstore(&funcinit, 1);
+ }
+ runtime·unlock(&funclock);
+ }
if(nfunc == 0)
return nil;
diff --git a/src/pkg/runtime/symtab_test.go b/src/pkg/runtime/symtab_test.go
new file mode 100644
index 000000000..bd9fe18c4
--- /dev/null
+++ b/src/pkg/runtime/symtab_test.go
@@ -0,0 +1,47 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "runtime"
+ "strings"
+ "testing"
+)
+
+func TestCaller(t *testing.T) {
+ procs := runtime.GOMAXPROCS(-1)
+ c := make(chan bool, procs)
+ for p := 0; p < procs; p++ {
+ go func() {
+ for i := 0; i < 1000; i++ {
+ testCallerFoo(t)
+ }
+ c <- true
+ }()
+ defer func() {
+ <-c
+ }()
+ }
+}
+
+func testCallerFoo(t *testing.T) {
+ testCallerBar(t)
+}
+
+func testCallerBar(t *testing.T) {
+ for i := 0; i < 2; i++ {
+ pc, file, line, ok := runtime.Caller(i)
+ f := runtime.FuncForPC(pc)
+ if !ok ||
+ !strings.HasSuffix(file, "symtab_test.go") ||
+ (i == 0 && !strings.HasSuffix(f.Name(), "testCallerBar")) ||
+ (i == 1 && !strings.HasSuffix(f.Name(), "testCallerFoo")) ||
+ line < 5 || line > 1000 ||
+ f.Entry() >= pc {
+ t.Errorf("incorrect symbol info %d: %t %d %d %s %s %d",
+ i, ok, f.Entry(), pc, f.Name(), file, line)
+ }
+ }
+}
diff --git a/src/pkg/runtime/windows/amd64/rt0.s b/src/pkg/runtime/windows/amd64/rt0.s
index e54e7edeb..35978bc74 100644
--- a/src/pkg/runtime/windows/amd64/rt0.s
+++ b/src/pkg/runtime/windows/amd64/rt0.s
@@ -8,3 +8,6 @@ TEXT _rt0_amd64_windows(SB),7,$-8
MOVQ $_rt0_amd64(SB), AX
MOVQ SP, DI
JMP AX
+
+DATA runtime·iswindows(SB)/4, $1
+GLOBL runtime·iswindows(SB), $4
diff --git a/src/pkg/runtime/windows/amd64/sys.s b/src/pkg/runtime/windows/amd64/sys.s
index b1eacfc82..2009d164e 100644
--- a/src/pkg/runtime/windows/amd64/sys.s
+++ b/src/pkg/runtime/windows/amd64/sys.s
@@ -20,6 +20,7 @@ TEXT runtime·stdcall_raw(SB),7,$8
CMPQ g(DI), SI
JEQ 3(PC)
MOVQ (g_sched+gobuf_sp)(SI), SP
+ ANDQ $~15, SP
MOVQ SI, g(DI)
SUBQ $0x60, SP