summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/mgc0.c
diff options
context:
space:
mode:
authorMichael Stapelberg <stapelberg@debian.org>2013-03-04 21:27:36 +0100
committerMichael Stapelberg <michael@stapelberg.de>2013-03-04 21:27:36 +0100
commit04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
treedb247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/runtime/mgc0.c
parent917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
downloadgolang-upstream/1.1_hg20130304.tar.gz
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/runtime/mgc0.c')
-rw-r--r--src/pkg/runtime/mgc0.c1891
1 files changed, 1435 insertions, 456 deletions
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index e8fb266f4..010f9cd96 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -8,15 +8,28 @@
#include "arch_GOARCH.h"
#include "malloc.h"
#include "stack.h"
+#include "mgc0.h"
+#include "race.h"
+#include "type.h"
+#include "typekind.h"
+#include "hashmap.h"
enum {
Debug = 0,
- PtrSize = sizeof(void*),
DebugMark = 0, // run second pass to check mark
+ CollectStats = 0,
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
+
+ handoffThreshold = 4,
+ IntermediateBufferCapacity = 64,
+
+ // Bits in type information
+ PRECISE = 1,
+ LOOP = 2,
+ PC_BITS = PRECISE | LOOP,
};
// Bits in per-word bitmap.
@@ -67,25 +80,34 @@ enum {
//
uint32 runtime·worldsema = 1;
-// TODO: Make these per-M.
-static uint64 nhandoff;
-
static int32 gctrace;
+typedef struct Obj Obj;
+struct Obj
+{
+ byte *p; // data pointer
+ uintptr n; // size of data in bytes
+ uintptr ti; // type info
+};
+
+// The size of Workbuf is N*PageSize.
typedef struct Workbuf Workbuf;
struct Workbuf
{
- Workbuf *next;
+#define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
+ LFNode node; // must be first
uintptr nobj;
- byte *obj[512-2];
+ Obj obj[SIZE/sizeof(Obj) - 1];
+ uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
+#undef SIZE
};
typedef struct Finalizer Finalizer;
struct Finalizer
{
- void (*fn)(void*);
+ FuncVal *fn;
void *arg;
- int32 nret;
+ uintptr nret;
};
typedef struct FinBlock FinBlock;
@@ -99,9 +121,13 @@ struct FinBlock
};
extern byte data[];
-extern byte etext[];
+extern byte edata[];
+extern byte bss[];
extern byte ebss[];
+extern byte gcdata[];
+extern byte gcbss[];
+
static G *fing;
static FinBlock *finq; // list of finalizers that are to be executed
static FinBlock *finc; // cache of free blocks
@@ -116,89 +142,244 @@ static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
static struct {
- Lock fmu;
- Workbuf *full;
- Lock emu;
- Workbuf *empty;
+ uint64 full; // lock-free list of full blocks
+ uint64 empty; // lock-free list of empty blocks
+ byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
volatile uint32 nwait;
volatile uint32 ndone;
+ volatile uint32 debugmarkdone;
Note alldone;
- Lock markgate;
- Lock sweepgate;
- MSpan *spans;
+ ParFor *markfor;
+ ParFor *sweepfor;
Lock;
byte *chunk;
uintptr nchunk;
+
+ Obj *roots;
+ uint32 nroot;
+ uint32 rootcap;
} work;
-// scanblock scans a block of n bytes starting at pointer b for references
-// to other objects, scanning any it finds recursively until there are no
-// unscanned objects left. Instead of using an explicit recursion, it keeps
-// a work list in the Workbuf* structures and loops in the main function
-// body. Keeping an explicit work list is easier on the stack allocator and
-// more efficient.
+enum {
+ GC_DEFAULT_PTR = GC_NUM_INSTR,
+ GC_MAP_NEXT,
+ GC_CHAN,
+
+ GC_NUM_INSTR2
+};
+
+static struct {
+ struct {
+ uint64 sum;
+ uint64 cnt;
+ } ptr;
+ uint64 nbytes;
+ struct {
+ uint64 sum;
+ uint64 cnt;
+ uint64 notype;
+ uint64 typelookup;
+ } obj;
+ uint64 rescan;
+ uint64 rescanbytes;
+ uint64 instr[GC_NUM_INSTR2];
+ uint64 putempty;
+ uint64 getfull;
+} gcstats;
+
+// markonly marks an object. It returns true if the object
+// has been marked by this function, false otherwise.
+// This function isn't thread-safe and doesn't append the object to any buffer.
+static bool
+markonly(void *obj)
+{
+ byte *p;
+ uintptr *bitp, bits, shift, x, xbits, off;
+ MSpan *s;
+ PageID k;
+
+ // Words outside the arena cannot be pointers.
+ if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used)
+ return false;
+
+ // obj may be a pointer to a live object.
+ // Try to find the beginning of the object.
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Find bits for this word.
+ off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
+ bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Pointing at the beginning of a block?
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+ goto found;
+
+ // Otherwise consult span table to find beginning.
+ // (Manually inlined copy of MHeap_LookupMaybe.)
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)runtime·mheap->arena_start>>PageShift;
+ s = runtime·mheap->map[x];
+ if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+ return false;
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ return false;
+ uintptr size = s->elemsize;
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
+ bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+found:
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
+ return false;
+ *bitp |= bitMarked<<shift;
+
+ // The object is now marked
+ return true;
+}
+
+// PtrTarget and BitTarget are structures used by intermediate buffers.
+// The intermediate buffers hold GC data before it
+// is moved/flushed to the work buffer (Workbuf).
+// The size of an intermediate buffer is very small,
+// such as 32 or 64 elements.
+typedef struct PtrTarget PtrTarget;
+struct PtrTarget
+{
+ void *p;
+ uintptr ti;
+};
+
+typedef struct BitTarget BitTarget;
+struct BitTarget
+{
+ void *p;
+ uintptr ti;
+ uintptr *bitp, shift;
+};
+
+typedef struct BufferList BufferList;
+struct BufferList
+{
+ PtrTarget ptrtarget[IntermediateBufferCapacity];
+ BitTarget bittarget[IntermediateBufferCapacity];
+ Obj obj[IntermediateBufferCapacity];
+ BufferList *next;
+};
+static BufferList *bufferList;
+
+static Lock lock;
+static Type *itabtype;
+
+static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
+
+// flushptrbuf moves data from the PtrTarget buffer to the work buffer.
+// The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
+// while the work buffer contains blocks which have been marked
+// and are prepared to be scanned by the garbage collector.
+//
+// _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
+// bitbuf holds temporary data generated by this function.
+//
+// A simplified drawing explaining how the todo-list moves from a structure to another:
+//
+// scanblock
+// (find pointers)
+// Obj ------> PtrTarget (pointer targets)
+// ↑ |
+// | | flushptrbuf (1st part,
+// | | find block start)
+// | ↓
+// `--------- BitTarget (pointer targets and the corresponding locations in bitmap)
+// flushptrbuf
+// (2nd part, mark and enqueue)
static void
-scanblock(byte *b, int64 n)
+flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj, BitTarget *bitbuf)
{
- byte *obj, *arena_start, *arena_used, *p;
- void **vp;
- uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
+ byte *p, *arena_start, *obj;
+ uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
MSpan *s;
PageID k;
- void **wp;
+ Obj *wp;
Workbuf *wbuf;
- bool keepworking;
+ PtrTarget *ptrbuf_end;
+ BitTarget *bitbufpos, *bt;
- if((int64)(uintptr)n != n || n < 0) {
- runtime·printf("scanblock %p %D\n", b, n);
- runtime·throw("scanblock");
- }
+ arena_start = runtime·mheap->arena_start;
- // Memory arena parameters.
- arena_start = runtime·mheap.arena_start;
- arena_used = runtime·mheap.arena_used;
- nproc = work.nproc;
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
- wbuf = nil; // current work buffer
- wp = nil; // storage for next queued pointer (write pointer)
- nobj = 0; // number of queued objects
+ ptrbuf_end = *ptrbufpos;
+ n = ptrbuf_end - ptrbuf;
+ *ptrbufpos = ptrbuf;
- // Scanblock helpers pass b==nil.
- // The main proc needs to return to make more
- // calls to scanblock. But if work.nproc==1 then
- // might as well process blocks as soon as we
- // have them.
- keepworking = b == nil || work.nproc == 1;
+ if(CollectStats) {
+ runtime·xadd64(&gcstats.ptr.sum, n);
+ runtime·xadd64(&gcstats.ptr.cnt, 1);
+ }
- // Align b to a word boundary.
- off = (uintptr)b & (PtrSize-1);
- if(off != 0) {
- b += PtrSize - off;
- n -= PtrSize - off;
+ // If buffer is nearly full, get a new one.
+ if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+
+ if(n >= nelem(wbuf->obj))
+ runtime·throw("ptrbuf has to be smaller than WorkBuf");
}
- for(;;) {
- // Each iteration scans the block b of length n, queueing pointers in
- // the work buffer.
- if(Debug > 1)
- runtime·printf("scanblock %p %D\n", b, n);
+ // TODO(atom): This block is a branch of an if-then-else statement.
+ // The single-threaded branch may be added in a next CL.
+ {
+ // Multi-threaded version.
- vp = (void**)b;
- n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
- for(i=0; i<n; i++) {
- obj = (byte*)vp[i];
+ bitbufpos = bitbuf;
- // Words outside the arena cannot be pointers.
- if((byte*)obj < arena_start || (byte*)obj >= arena_used)
- continue;
+ while(ptrbuf < ptrbuf_end) {
+ obj = ptrbuf->p;
+ ti = ptrbuf->ti;
+ ptrbuf++;
+
+ // obj belongs to interval [mheap.arena_start, mheap.arena_used).
+ if(Debug > 1) {
+ if(obj < runtime·mheap->arena_start || obj >= runtime·mheap->arena_used)
+ runtime·throw("object is outside of mheap");
+ }
// obj may be a pointer to a live object.
// Try to find the beginning of the object.
// Round down to word boundary.
- obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+ if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+ ti = 0;
+ }
// Find bits for this word.
off = (uintptr*)obj - (uintptr*)arena_start;
@@ -211,6 +392,8 @@ scanblock(byte *b, int64 n)
if((bits & (bitAllocated|bitBlockBoundary)) != 0)
goto found;
+ ti = 0;
+
// Pointing just past the beginning?
// Scan backward a little to find a block boundary.
for(j=shift; j-->0; ) {
@@ -228,16 +411,16 @@ scanblock(byte *b, int64 n)
x = k;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.map[x];
+ s = runtime·mheap->map[x];
if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
continue;
- p = (byte*)((uintptr)s->start<<PageShift);
+ p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
obj = p;
} else {
if((byte*)obj >= (byte*)s->limit)
continue;
- size = runtime·class_to_size[s->sizeclass];
+ size = s->elemsize;
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
@@ -255,80 +438,606 @@ scanblock(byte *b, int64 n)
// Only care about allocated and not marked.
if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
- if(nproc == 1)
- *bitp |= bitMarked<<shift;
- else {
- for(;;) {
- x = *bitp;
- if(x & (bitMarked<<shift))
- goto continue_obj;
- if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
- break;
- }
- }
+
+ *bitbufpos++ = (BitTarget){obj, ti, bitp, shift};
+ }
+
+ runtime·lock(&lock);
+ for(bt=bitbuf; bt<bitbufpos; bt++){
+ xbits = *bt->bitp;
+ bits = xbits >> bt->shift;
+ if((bits & bitMarked) != 0)
+ continue;
+
+ // Mark the block
+ *bt->bitp = xbits | (bitMarked << bt->shift);
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
- // If another proc wants a pointer, give it some.
- if(nobj > 4 && work.nwait > 0 && work.full == nil) {
+ obj = bt->p;
+
+ // Ask span about size class.
+ // (Manually inlined copy of MHeap_Lookup.)
+ x = (uintptr)obj >> PageShift;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime·mheap->map[x];
+
+ PREFETCH(obj);
+
+ *wp = (Obj){obj, s->elemsize, bt->ti};
+ wp++;
+ nobj++;
+ }
+ runtime·unlock(&lock);
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+ }
+
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
+static void
+flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
+{
+ uintptr nobj, off;
+ Obj *wp, obj;
+ Workbuf *wbuf;
+ Obj *objbuf_end;
+
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
+
+ objbuf_end = *objbufpos;
+ *objbufpos = objbuf;
+
+ while(objbuf < objbuf_end) {
+ obj = *objbuf++;
+
+ // Align obj.b to a word boundary.
+ off = (uintptr)obj.p & (PtrSize-1);
+ if(off != 0) {
+ obj.p += PtrSize - off;
+ obj.n -= PtrSize - off;
+ obj.ti = 0;
+ }
+
+ if(obj.p == nil || obj.n == 0)
+ continue;
+
+ // If buffer is full, get a new one.
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
wbuf->nobj = nobj;
- wbuf = handoff(wbuf);
- nobj = wbuf->nobj;
- wp = wbuf->obj + nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+ }
+
+ *wp = obj;
+ wp++;
+ nobj++;
+ }
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
+// Program that scans the whole block and treats every block element as a potential pointer
+static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
+
+// Hashmap iterator program
+static uintptr mapProg[2] = {0, GC_MAP_NEXT};
+
+// Hchan program
+static uintptr chanProg[2] = {0, GC_CHAN};
+
+// Local variables of a program fragment or loop
+typedef struct Frame Frame;
+struct Frame {
+ uintptr count, elemsize, b;
+ uintptr *loop_or_ret;
+};
+
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left. Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body. Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
+//
+// wbuf: current work buffer
+// wp: storage for next queued pointer (write pointer)
+// nobj: number of queued objects
+static void
+scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
+{
+ byte *b, *arena_start, *arena_used;
+ uintptr n, i, end_b, elemsize, size, ti, objti, count, type;
+ uintptr *pc, precise_type, nominal_size;
+ uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti;
+ void *obj;
+ Type *t;
+ Slice *sliceptr;
+ Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
+ BufferList *scanbuffers;
+ PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
+ BitTarget *bitbuf;
+ Obj *objbuf, *objbuf_end, *objbufpos;
+ Eface *eface;
+ Iface *iface;
+ Hmap *hmap;
+ MapType *maptype;
+ bool didmark, mapkey_kind, mapval_kind;
+ struct hash_gciter map_iter;
+ struct hash_gciter_data d;
+ Hchan *chan;
+ ChanType *chantype;
+
+ if(sizeof(Workbuf) % PageSize != 0)
+ runtime·throw("scanblock: size of Workbuf is suboptimal");
+
+ // Memory arena parameters.
+ arena_start = runtime·mheap->arena_start;
+ arena_used = runtime·mheap->arena_used;
+
+ stack_ptr = stack+nelem(stack)-1;
+
+ precise_type = false;
+ nominal_size = 0;
+
+ // Allocate ptrbuf, bitbuf
+ {
+ runtime·lock(&lock);
+
+ if(bufferList == nil) {
+ bufferList = runtime·SysAlloc(sizeof(*bufferList));
+ if(bufferList == nil)
+ runtime·throw("runtime: cannot allocate memory");
+ bufferList->next = nil;
+ }
+ scanbuffers = bufferList;
+ bufferList = bufferList->next;
+
+ ptrbuf = &scanbuffers->ptrtarget[0];
+ ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
+ bitbuf = &scanbuffers->bittarget[0];
+ objbuf = &scanbuffers->obj[0];
+ objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
+
+ runtime·unlock(&lock);
+ }
+
+ ptrbufpos = ptrbuf;
+ objbufpos = objbuf;
+
+ // (Silence the compiler)
+ map_ret = nil;
+ mapkey_size = mapval_size = 0;
+ mapkey_kind = mapval_kind = false;
+ mapkey_ti = mapval_ti = 0;
+ chan = nil;
+ chantype = nil;
+
+ goto next_block;
+
+ for(;;) {
+ // Each iteration scans the block b of length n, queueing pointers in
+ // the work buffer.
+ if(Debug > 1) {
+ runtime·printf("scanblock %p %D\n", b, (int64)n);
+ }
+
+ if(CollectStats) {
+ runtime·xadd64(&gcstats.nbytes, n);
+ runtime·xadd64(&gcstats.obj.sum, nobj);
+ runtime·xadd64(&gcstats.obj.cnt, 1);
+ }
+
+ if(ti != 0) {
+ pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
+ precise_type = (ti & PRECISE);
+ stack_top.elemsize = pc[0];
+ if(!precise_type)
+ nominal_size = pc[0];
+ if(ti & LOOP) {
+ stack_top.count = 0; // 0 means an infinite number of iterations
+ stack_top.loop_or_ret = pc+1;
+ } else {
+ stack_top.count = 1;
}
+ } else if(UseSpanType) {
+ if(CollectStats)
+ runtime·xadd64(&gcstats.obj.notype, 1);
+
+ type = runtime·gettype(b);
+ if(type != 0) {
+ if(CollectStats)
+ runtime·xadd64(&gcstats.obj.typelookup, 1);
+
+ t = (Type*)(type & ~(uintptr)(PtrSize-1));
+ switch(type & (PtrSize-1)) {
+ case TypeInfo_SingleObject:
+ pc = (uintptr*)t->gc;
+ precise_type = true; // type information about 'b' is precise
+ stack_top.count = 1;
+ stack_top.elemsize = pc[0];
+ break;
+ case TypeInfo_Array:
+ pc = (uintptr*)t->gc;
+ if(pc[0] == 0)
+ goto next_block;
+ precise_type = true; // type information about 'b' is precise
+ stack_top.count = 0; // 0 means an infinite number of iterations
+ stack_top.elemsize = pc[0];
+ stack_top.loop_or_ret = pc+1;
+ break;
+ case TypeInfo_Map:
+ hmap = (Hmap*)b;
+ maptype = (MapType*)t;
+ if(hash_gciter_init(hmap, &map_iter)) {
+ mapkey_size = maptype->key->size;
+ mapkey_kind = maptype->key->kind;
+ mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
+ mapval_size = maptype->elem->size;
+ mapval_kind = maptype->elem->kind;
+ mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
+
+ map_ret = 0;
+ pc = mapProg;
+ } else {
+ goto next_block;
+ }
+ break;
+ case TypeInfo_Chan:
+ chan = (Hchan*)b;
+ chantype = (ChanType*)t;
+ pc = chanProg;
+ break;
+ default:
+ runtime·throw("scanblock: invalid type");
+ return;
+ }
+ } else {
+ pc = defaultProg;
+ }
+ } else {
+ pc = defaultProg;
+ }
+
+ pc++;
+ stack_top.b = (uintptr)b;
- // If buffer is full, get a new one.
- if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
- if(wbuf != nil)
- wbuf->nobj = nobj;
- wbuf = getempty(wbuf);
- wp = wbuf->obj;
- nobj = 0;
+ end_b = (uintptr)b + n - PtrSize;
+
+ for(;;) {
+ if(CollectStats)
+ runtime·xadd64(&gcstats.instr[pc[0]], 1);
+
+ obj = nil;
+ objti = 0;
+ switch(pc[0]) {
+ case GC_PTR:
+ obj = *(void**)(stack_top.b + pc[1]);
+ objti = pc[2];
+ pc += 3;
+ break;
+
+ case GC_SLICE:
+ sliceptr = (Slice*)(stack_top.b + pc[1]);
+ if(sliceptr->cap != 0) {
+ obj = sliceptr->array;
+ objti = pc[2] | PRECISE | LOOP;
}
- *wp++ = obj;
- nobj++;
- continue_obj:;
+ pc += 3;
+ break;
+
+ case GC_APTR:
+ obj = *(void**)(stack_top.b + pc[1]);
+ pc += 2;
+ break;
+
+ case GC_STRING:
+ obj = *(void**)(stack_top.b + pc[1]);
+ pc += 2;
+ break;
+
+ case GC_EFACE:
+ eface = (Eface*)(stack_top.b + pc[1]);
+ pc += 2;
+ if(eface->type != nil && (eface->data >= arena_start && eface->data < arena_used)) {
+ t = eface->type;
+ if(t->size <= sizeof(void*)) {
+ if((t->kind & KindNoPointers))
+ break;
+
+ obj = eface->data;
+ if((t->kind & ~KindNoPointers) == KindPtr)
+ objti = (uintptr)((PtrType*)t)->elem->gc;
+ } else {
+ obj = eface->data;
+ objti = (uintptr)t->gc;
+ }
+ }
+ break;
+
+ case GC_IFACE:
+ iface = (Iface*)(stack_top.b + pc[1]);
+ pc += 2;
+ if(iface->tab == nil)
+ break;
+
+ // iface->tab
+ if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) {
+ *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
+ if(ptrbufpos == ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ }
+
+ // iface->data
+ if(iface->data >= arena_start && iface->data < arena_used) {
+ t = iface->tab->type;
+ if(t->size <= sizeof(void*)) {
+ if((t->kind & KindNoPointers))
+ break;
+
+ obj = iface->data;
+ if((t->kind & ~KindNoPointers) == KindPtr)
+ objti = (uintptr)((PtrType*)t)->elem->gc;
+ } else {
+ obj = iface->data;
+ objti = (uintptr)t->gc;
+ }
+ }
+ break;
+
+ case GC_DEFAULT_PTR:
+ while((i = stack_top.b) <= end_b) {
+ stack_top.b += PtrSize;
+ obj = *(byte**)i;
+ if(obj >= arena_start && obj < arena_used) {
+ *ptrbufpos++ = (PtrTarget){obj, 0};
+ if(ptrbufpos == ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ }
+ }
+ goto next_block;
+
+ case GC_END:
+ if(--stack_top.count != 0) {
+ // Next iteration of a loop if possible.
+ elemsize = stack_top.elemsize;
+ stack_top.b += elemsize;
+ if(stack_top.b + elemsize <= end_b+PtrSize) {
+ pc = stack_top.loop_or_ret;
+ continue;
+ }
+ i = stack_top.b;
+ } else {
+ // Stack pop if possible.
+ if(stack_ptr+1 < stack+nelem(stack)) {
+ pc = stack_top.loop_or_ret;
+ stack_top = *(++stack_ptr);
+ continue;
+ }
+ i = (uintptr)b + nominal_size;
+ }
+ if(!precise_type) {
+ // Quickly scan [b+i,b+n) for possible pointers.
+ for(; i<=end_b; i+=PtrSize) {
+ if(*(byte**)i != nil) {
+ // Found a value that may be a pointer.
+ // Do a rescan of the entire block.
+ enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
+ if(CollectStats) {
+ runtime·xadd64(&gcstats.rescan, 1);
+ runtime·xadd64(&gcstats.rescanbytes, n);
+ }
+ break;
+ }
+ }
+ }
+ goto next_block;
+
+ case GC_ARRAY_START:
+ i = stack_top.b + pc[1];
+ count = pc[2];
+ elemsize = pc[3];
+ pc += 4;
+
+ // Stack push.
+ *stack_ptr-- = stack_top;
+ stack_top = (Frame){count, elemsize, i, pc};
+ continue;
+
+ case GC_ARRAY_NEXT:
+ if(--stack_top.count != 0) {
+ stack_top.b += stack_top.elemsize;
+ pc = stack_top.loop_or_ret;
+ } else {
+ // Stack pop.
+ stack_top = *(++stack_ptr);
+ pc += 1;
+ }
+ continue;
+
+ case GC_CALL:
+ // Stack push.
+ *stack_ptr-- = stack_top;
+ stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
+ pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction
+ continue;
+
+ case GC_MAP_PTR:
+ hmap = *(Hmap**)(stack_top.b + pc[1]);
+ if(hmap == nil) {
+ pc += 3;
+ continue;
+ }
+ runtime·lock(&lock);
+ didmark = markonly(hmap);
+ runtime·unlock(&lock);
+ if(didmark) {
+ maptype = (MapType*)pc[2];
+ if(hash_gciter_init(hmap, &map_iter)) {
+ mapkey_size = maptype->key->size;
+ mapkey_kind = maptype->key->kind;
+ mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
+ mapval_size = maptype->elem->size;
+ mapval_kind = maptype->elem->kind;
+ mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
+
+ // Start mapProg.
+ map_ret = pc+3;
+ pc = mapProg+1;
+ } else {
+ pc += 3;
+ }
+ } else {
+ pc += 3;
+ }
+ continue;
+
+ case GC_MAP_NEXT:
+ // Add all keys and values to buffers, mark all subtables.
+ while(hash_gciter_next(&map_iter, &d)) {
+ // buffers: reserve space for 2 objects.
+ if(ptrbufpos+2 >= ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ if(objbufpos+2 >= objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+
+ if(d.st != nil) {
+ runtime·lock(&lock);
+ markonly(d.st);
+ runtime·unlock(&lock);
+ }
+ if(d.key_data != nil) {
+ if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
+ if(!d.indirectkey)
+ *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
+ else
+ *ptrbufpos++ = (PtrTarget){*(void**)d.key_data, mapkey_ti};
+ }
+ if(!(mapval_kind & KindNoPointers) || d.indirectval) {
+ if(!d.indirectval)
+ *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
+ else
+ *ptrbufpos++ = (PtrTarget){*(void**)d.val_data, mapval_ti};
+ }
+ }
+ }
+ if(map_ret == 0)
+ goto next_block;
+ pc = map_ret;
+ continue;
+
+ case GC_REGION:
+ obj = (void*)(stack_top.b + pc[1]);
+ size = pc[2];
+ objti = pc[3];
+ pc += 4;
+
+ *objbufpos++ = (Obj){obj, size, objti};
+ if(objbufpos == objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ break;
+
+ case GC_CHAN:
+ // There are no heap pointers in struct Hchan,
+ // so we can ignore the leading sizeof(Hchan) bytes.
+ if(!(chantype->elem->kind & KindNoPointers)) {
+ // Channel's buffer follows Hchan immediately in memory.
+ // Size of buffer (cap(c)) is second int in the chan struct.
+ n = ((uintgo*)chan)[1];
+ if(n > 0) {
+ // TODO(atom): split into two chunks so that only the
+ // in-use part of the circular buffer is scanned.
+ // (Channel routines zero the unused part, so the current
+ // code does not lead to leaks, it's just a little inefficient.)
+ *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, n*chantype->elem->size,
+ (uintptr)chantype->elem->gc | PRECISE | LOOP};
+ if(objbufpos == objbuf_end)
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ }
+ }
+ goto next_block;
+
+ default:
+ runtime·throw("scanblock: invalid GC instruction");
+ return;
}
+ if(obj >= arena_start && obj < arena_used) {
+ *ptrbufpos++ = (PtrTarget){obj, objti};
+ if(ptrbufpos == ptrbuf_end)
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ }
+ }
+
+ next_block:
// Done scanning [b, b+n). Prepare for the next iteration of
- // the loop by setting b and n to the parameters for the next block.
+ // the loop by setting b, n, ti to the parameters for the next block.
- // Fetch b from the work buffer.
if(nobj == 0) {
- if(!keepworking) {
- putempty(wbuf);
- return;
+ flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj, bitbuf);
+ flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+
+ if(nobj == 0) {
+ if(!keepworking) {
+ if(wbuf)
+ putempty(wbuf);
+ goto endscan;
+ }
+ // Emptied our buffer: refill.
+ wbuf = getfull(wbuf);
+ if(wbuf == nil)
+ goto endscan;
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + wbuf->nobj;
}
- // Emptied our buffer: refill.
- wbuf = getfull(wbuf);
- if(wbuf == nil)
- return;
- nobj = wbuf->nobj;
- wp = wbuf->obj + wbuf->nobj;
}
- b = *--wp;
- nobj--;
- // Ask span about size class.
- // (Manually inlined copy of MHeap_Lookup.)
- x = (uintptr)b>>PageShift;
- if(sizeof(void*) == 8)
- x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.map[x];
- if(s->sizeclass == 0)
- n = s->npages<<PageShift;
- else
- n = runtime·class_to_size[s->sizeclass];
+ // Fetch b from the work buffer.
+ --wp;
+ b = wp->p;
+ n = wp->n;
+ ti = wp->ti;
+ nobj--;
}
+
+endscan:
+ runtime·lock(&lock);
+ scanbuffers->next = bufferList;
+ bufferList = scanbuffers;
+ runtime·unlock(&lock);
}
// debug_scanblock is the debug copy of scanblock.
// it is simpler, slower, single-threaded, recursive,
// and uses bitSpecial as the mark bit.
static void
-debug_scanblock(byte *b, int64 n)
+debug_scanblock(byte *b, uintptr n)
{
byte *obj, *p;
void **vp;
@@ -338,8 +1047,8 @@ debug_scanblock(byte *b, int64 n)
if(!DebugMark)
runtime·throw("debug_scanblock without DebugMark");
- if((int64)(uintptr)n != n || n < 0) {
- runtime·printf("debug_scanblock %p %D\n", b, n);
+ if((intptr)n < 0) {
+ runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
runtime·throw("debug_scanblock");
}
@@ -356,33 +1065,31 @@ debug_scanblock(byte *b, int64 n)
obj = (byte*)vp[i];
// Words outside the arena cannot be pointers.
- if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
+ if((byte*)obj < runtime·mheap->arena_start || (byte*)obj >= runtime·mheap->arena_used)
continue;
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
// Consult span table to find beginning.
- s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
+ s = runtime·MHeap_LookupMaybe(runtime·mheap, obj);
if(s == nil)
continue;
-
p = (byte*)((uintptr)s->start<<PageShift);
+ size = s->elemsize;
if(s->sizeclass == 0) {
obj = p;
- size = (uintptr)s->npages<<PageShift;
} else {
if((byte*)obj >= (byte*)s->limit)
continue;
- size = runtime·class_to_size[s->sizeclass];
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
// Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
- bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)obj - (uintptr*)runtime·mheap->arena_start;
+ bitp = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
@@ -404,53 +1111,98 @@ debug_scanblock(byte *b, int64 n)
}
}
+// Append obj to the work buffer.
+// _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
+static void
+enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
+{
+ uintptr nobj, off;
+ Obj *wp;
+ Workbuf *wbuf;
+
+ if(Debug > 1)
+ runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
+
+ // Align obj.b to a word boundary.
+ off = (uintptr)obj.p & (PtrSize-1);
+ if(off != 0) {
+ obj.p += PtrSize - off;
+ obj.n -= PtrSize - off;
+ obj.ti = 0;
+ }
+
+ if(obj.p == nil || obj.n == 0)
+ return;
+
+ // Load work buffer state
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+
+ // If buffer is full, get a new one.
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+ }
+
+ *wp = obj;
+ wp++;
+ nobj++;
+
+ // Save work buffer state
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
+static void
+markroot(ParFor *desc, uint32 i)
+{
+ Obj *wp;
+ Workbuf *wbuf;
+ uintptr nobj;
+
+ USED(&desc);
+ wp = nil;
+ wbuf = nil;
+ nobj = 0;
+ enqueue(work.roots[i], &wbuf, &wp, &nobj);
+ scanblock(wbuf, wp, nobj, false);
+}
+
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- if(work.nproc == 1) {
- // Put b on full list.
- if(b != nil) {
- b->next = work.full;
- work.full = b;
+ if(b != nil)
+ runtime·lfstackpush(&work.full, &b->node);
+ b = (Workbuf*)runtime·lfstackpop(&work.empty);
+ if(b == nil) {
+ // Need to allocate.
+ runtime·lock(&work);
+ if(work.nchunk < sizeof *b) {
+ work.nchunk = 1<<20;
+ work.chunk = runtime·SysAlloc(work.nchunk);
+ if(work.chunk == nil)
+ runtime·throw("runtime: cannot allocate memory");
}
- // Grab from empty list if possible.
- b = work.empty;
- if(b != nil) {
- work.empty = b->next;
- goto haveb;
- }
- } else {
- // Put b on full list.
- if(b != nil) {
- runtime·lock(&work.fmu);
- b->next = work.full;
- work.full = b;
- runtime·unlock(&work.fmu);
- }
- // Grab from empty list if possible.
- runtime·lock(&work.emu);
- b = work.empty;
- if(b != nil)
- work.empty = b->next;
- runtime·unlock(&work.emu);
- if(b != nil)
- goto haveb;
+ b = (Workbuf*)work.chunk;
+ work.chunk += sizeof *b;
+ work.nchunk -= sizeof *b;
+ runtime·unlock(&work);
}
-
- // Need to allocate.
- runtime·lock(&work);
- if(work.nchunk < sizeof *b) {
- work.nchunk = 1<<20;
- work.chunk = runtime·SysAlloc(work.nchunk);
- }
- b = (Workbuf*)work.chunk;
- work.chunk += sizeof *b;
- work.nchunk -= sizeof *b;
- runtime·unlock(&work);
-
-haveb:
b->nobj = 0;
return b;
}
@@ -458,19 +1210,10 @@ haveb:
static void
putempty(Workbuf *b)
{
- if(b == nil)
- return;
+ if(CollectStats)
+ runtime·xadd64(&gcstats.putempty, 1);
- if(work.nproc == 1) {
- b->next = work.empty;
- work.empty = b;
- return;
- }
-
- runtime·lock(&work.emu);
- b->next = work.empty;
- work.empty = b;
- runtime·unlock(&work.emu);
+ runtime·lfstackpush(&work.empty, &b->node);
}
// Get a full work buffer off the work.full list, or return nil.
@@ -478,63 +1221,37 @@ static Workbuf*
getfull(Workbuf *b)
{
int32 i;
- Workbuf *b1;
-
- if(work.nproc == 1) {
- // Put b on empty list.
- if(b != nil) {
- b->next = work.empty;
- work.empty = b;
- }
- // Grab from full list if possible.
- // Since work.nproc==1, no one else is
- // going to give us work.
- b = work.full;
- if(b != nil)
- work.full = b->next;
- return b;
- }
- putempty(b);
+ if(CollectStats)
+ runtime·xadd64(&gcstats.getfull, 1);
- // Grab buffer from full list if possible.
- for(;;) {
- b1 = work.full;
- if(b1 == nil)
- break;
- runtime·lock(&work.fmu);
- if(work.full != nil) {
- b1 = work.full;
- work.full = b1->next;
- runtime·unlock(&work.fmu);
- return b1;
- }
- runtime·unlock(&work.fmu);
- }
+ if(b != nil)
+ runtime·lfstackpush(&work.empty, &b->node);
+ b = (Workbuf*)runtime·lfstackpop(&work.full);
+ if(b != nil || work.nproc == 1)
+ return b;
runtime·xadd(&work.nwait, +1);
for(i=0;; i++) {
- b1 = work.full;
- if(b1 != nil) {
- runtime·lock(&work.fmu);
- if(work.full != nil) {
- runtime·xadd(&work.nwait, -1);
- b1 = work.full;
- work.full = b1->next;
- runtime·unlock(&work.fmu);
- return b1;
- }
- runtime·unlock(&work.fmu);
- continue;
+ if(work.full != 0) {
+ runtime·xadd(&work.nwait, -1);
+ b = (Workbuf*)runtime·lfstackpop(&work.full);
+ if(b != nil)
+ return b;
+ runtime·xadd(&work.nwait, +1);
}
if(work.nwait == work.nproc)
return nil;
- if(i < 10)
+ if(i < 10) {
+ m->gcstats.nprocyield++;
runtime·procyield(20);
- else if(i < 20)
+ } else if(i < 20) {
+ m->gcstats.nosyield++;
runtime·osyield();
- else
+ } else {
+ m->gcstats.nsleep++;
runtime·usleep(100);
+ }
}
}
@@ -550,20 +1267,40 @@ handoff(Workbuf *b)
b->nobj -= n;
b1->nobj = n;
runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
- nhandoff += n;
+ m->gcstats.nhandoff++;
+ m->gcstats.nhandoffcnt += n;
// Put b on full list - let first half of b get stolen.
- runtime·lock(&work.fmu);
- b->next = work.full;
- work.full = b;
- runtime·unlock(&work.fmu);
-
+ runtime·lfstackpush(&work.full, &b->node);
return b1;
}
-// Scanstack calls scanblock on each of gp's stack segments.
static void
-scanstack(void (*scanblock)(byte*, int64), G *gp)
+addroot(Obj obj)
+{
+ uint32 cap;
+ Obj *new;
+
+ if(work.nroot >= work.rootcap) {
+ cap = PageSize/sizeof(Obj);
+ if(cap < 2*work.rootcap)
+ cap = 2*work.rootcap;
+ new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj));
+ if(new == nil)
+ runtime·throw("runtime: cannot allocate memory");
+ if(work.roots != nil) {
+ runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
+ runtime·SysFree(work.roots, work.rootcap*sizeof(Obj));
+ }
+ work.roots = new;
+ work.rootcap = cap;
+ }
+ work.roots[work.nroot] = obj;
+ work.nroot++;
+}
+
+static void
+addstackroots(G *gp)
{
M *mp;
int32 n;
@@ -571,7 +1308,7 @@ scanstack(void (*scanblock)(byte*, int64), G *gp)
byte *sp, *guard;
stk = (Stktop*)gp->stackbase;
- guard = gp->stackguard;
+ guard = (byte*)gp->stackguard;
if(gp == g) {
// Scanning our own stack: start at &gp.
@@ -582,72 +1319,82 @@ scanstack(void (*scanblock)(byte*, int64), G *gp)
} else {
// Scanning another goroutine's stack.
// The goroutine is usually asleep (the world is stopped).
- sp = gp->sched.sp;
+ sp = (byte*)gp->sched.sp;
// The exception is that if the goroutine is about to enter or might
// have just exited a system call, it may be executing code such
// as schedlock and may have needed to start a new stack segment.
// Use the stack segment and stack pointer at the time of
// the system call instead, since that won't change underfoot.
- if(gp->gcstack != nil) {
+ if(gp->gcstack != (uintptr)nil) {
stk = (Stktop*)gp->gcstack;
- sp = gp->gcsp;
- guard = gp->gcguard;
+ sp = (byte*)gp->gcsp;
+ guard = (byte*)gp->gcguard;
}
}
- if(Debug > 1)
- runtime·printf("scanstack %d %p\n", gp->goid, sp);
n = 0;
while(stk) {
if(sp < guard-StackGuard || (byte*)stk < sp) {
- runtime·printf("scanstack inconsistent: g%d#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
+ runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
runtime·throw("scanstack");
}
- scanblock(sp, (byte*)stk - sp);
- sp = stk->gobuf.sp;
+ addroot((Obj){sp, (byte*)stk - sp, 0});
+ sp = (byte*)stk->gobuf.sp;
guard = stk->stackguard;
stk = (Stktop*)stk->stackbase;
n++;
}
}
-// Markfin calls scanblock on the blocks that have finalizers:
-// the things pointed at cannot be freed until the finalizers have run.
static void
-markfin(void *v)
+addfinroots(void *v)
{
uintptr size;
+ void *base;
size = 0;
- if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))
+ if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base))
runtime·throw("mark - finalizer inconsistency");
// do not mark the finalizer block itself. just mark the things it points at.
- scanblock(v, size);
+ addroot((Obj){base, size, 0});
}
static void
-debug_markfin(void *v)
-{
- uintptr size;
-
- if(!runtime·mlookup(v, &v, &size, nil))
- runtime·throw("debug_mark - finalizer inconsistency");
- debug_scanblock(v, size);
-}
-
-// Mark
-static void
-mark(void (*scan)(byte*, int64))
+addroots(void)
{
G *gp;
FinBlock *fb;
+ MSpan *s, **allspans;
+ uint32 spanidx;
+
+ work.nroot = 0;
+
+ // data & bss
+ // TODO(atom): load balancing
+ addroot((Obj){data, edata - data, (uintptr)gcdata});
+ addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
+
+ // MSpan.types
+ allspans = runtime·mheap->allspans;
+ for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) {
+ s = allspans[spanidx];
+ if(s->state == MSpanInUse) {
+ switch(s->types.compression) {
+ case MTypes_Empty:
+ case MTypes_Single:
+ break;
+ case MTypes_Words:
+ case MTypes_Bytes:
+ // TODO(atom): consider using defaultProg instead of 0
+ addroot((Obj){(byte*)&s->types.data, sizeof(void*), 0});
+ break;
+ }
+ }
+ }
- // mark data+bss.
- scan(data, ebss - data);
-
- // mark stacks
+ // stacks
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
switch(gp->status){
default:
@@ -658,37 +1405,30 @@ mark(void (*scan)(byte*, int64))
case Grunning:
if(gp != g)
runtime·throw("mark - world not stopped");
- scanstack(scan, gp);
+ addstackroots(gp);
break;
case Grunnable:
case Gsyscall:
case Gwaiting:
- scanstack(scan, gp);
+ addstackroots(gp);
break;
}
}
- // mark things pointed at by objects with finalizers
- if(scan == debug_scanblock)
- runtime·walkfintab(debug_markfin);
- else
- runtime·walkfintab(markfin);
+ runtime·walkfintab(addfinroots);
for(fb=allfin; fb; fb=fb->alllink)
- scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
-
- // in multiproc mode, join in the queued work.
- scan(nil, 0);
+ addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
}
static bool
handlespecial(byte *p, uintptr size)
{
- void (*fn)(void*);
- int32 nret;
+ FuncVal *fn;
+ uintptr nret;
FinBlock *block;
Finalizer *f;
-
+
if(!runtime·getfinalizer(p, true, &fn, &nret)) {
runtime·setblockspecial(p, false);
runtime·MProf_Free(p, size);
@@ -699,6 +1439,8 @@ handlespecial(byte *p, uintptr size)
if(finq == nil || finq->cnt == finq->cap) {
if(finc == nil) {
finc = runtime·SysAlloc(PageSize);
+ if(finc == nil)
+ runtime·throw("runtime: cannot allocate memory");
finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
finc->alllink = allfin;
allfin = finc;
@@ -713,124 +1455,225 @@ handlespecial(byte *p, uintptr size)
f->fn = fn;
f->nret = nret;
f->arg = p;
- runtime·unlock(&finlock);
+ runtime·unlock(&finlock);
return true;
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
static void
-sweep(void)
+sweepspan(ParFor *desc, uint32 idx)
{
- MSpan *s;
int32 cl, n, npages;
uintptr size;
byte *p;
MCache *c;
byte *arena_start;
- int64 now;
+ MLink head, *end;
+ int32 nfree;
+ byte *type_data;
+ byte compression;
+ uintptr type_data_inc;
+ MSpan *s;
- arena_start = runtime·mheap.arena_start;
- now = runtime·nanotime();
+ USED(&desc);
+ s = runtime·mheap->allspans[idx];
+ if(s->state != MSpanInUse)
+ return;
+ arena_start = runtime·mheap->arena_start;
+ p = (byte*)(s->start << PageShift);
+ cl = s->sizeclass;
+ size = s->elemsize;
+ if(cl == 0) {
+ n = 1;
+ } else {
+ // Chunk full of small blocks.
+ npages = runtime·class_to_allocnpages[cl];
+ n = (npages << PageShift) / size;
+ }
+ nfree = 0;
+ end = &head;
+ c = m->mcache;
+
+ type_data = (byte*)s->types.data;
+ type_data_inc = sizeof(uintptr);
+ compression = s->types.compression;
+ switch(compression) {
+ case MTypes_Bytes:
+ type_data += 8*sizeof(uintptr);
+ type_data_inc = 1;
+ break;
+ }
- for(;;) {
- s = work.spans;
- if(s == nil)
- break;
- if(!runtime·casp(&work.spans, s, s->allnext))
- continue;
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
+ for(; n > 0; n--, p += size, type_data+=type_data_inc) {
+ uintptr off, *bitp, shift, bits;
- // Stamp newly unused spans. The scavenger will use that
- // info to potentially give back some pages to the OS.
- if(s->state == MSpanFree && s->unusedsince == 0)
- s->unusedsince = now;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
- if(s->state != MSpanInUse)
+ if((bits & bitAllocated) == 0)
continue;
- p = (byte*)(s->start << PageShift);
- cl = s->sizeclass;
+ if((bits & bitMarked) != 0) {
+ if(DebugMark) {
+ if(!(bits & bitSpecial))
+ runtime·printf("found spurious mark on %p\n", p);
+ *bitp &= ~(bitSpecial<<shift);
+ }
+ *bitp &= ~(bitMarked<<shift);
+ continue;
+ }
+
+ // Special means it has a finalizer or is being profiled.
+ // In DebugMark mode, the bit has been coopted so
+ // we have to assume all blocks are special.
+ if(DebugMark || (bits & bitSpecial) != 0) {
+ if(handlespecial(p, size))
+ continue;
+ }
+
+ // Mark freed; restore block boundary bit.
+ *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+
if(cl == 0) {
- size = s->npages<<PageShift;
- n = 1;
+ // Free large span.
+ runtime·unmarkspan(p, 1<<PageShift);
+ *(uintptr*)p = 1; // needs zeroing
+ runtime·MHeap_Free(runtime·mheap, s, 1);
+ c->local_alloc -= size;
+ c->local_nfree++;
} else {
- // Chunk full of small blocks.
- size = runtime·class_to_size[cl];
- npages = runtime·class_to_allocnpages[cl];
- n = (npages << PageShift) / size;
+ // Free small object.
+ switch(compression) {
+ case MTypes_Words:
+ *(uintptr*)type_data = 0;
+ break;
+ case MTypes_Bytes:
+ *(byte*)type_data = 0;
+ break;
+ }
+ if(size > sizeof(uintptr))
+ ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
+
+ end->next = (MLink*)p;
+ end = (MLink*)p;
+ nfree++;
}
+ }
- // Sweep through n objects of given size starting at p.
- // This thread owns the span now, so it can manipulate
- // the block bitmap without atomic operations.
- for(; n > 0; n--, p += size) {
- uintptr off, *bitp, shift, bits;
+ if(nfree) {
+ c->local_by_size[cl].nfree += nfree;
+ c->local_alloc -= size * nfree;
+ c->local_nfree += nfree;
+ c->local_cachealloc -= nfree * size;
+ c->local_objects -= nfree;
+ runtime·MCentral_FreeSpan(&runtime·mheap->central[cl], s, nfree, head.next, end);
+ }
+}
- off = (uintptr*)p - (uintptr*)arena_start;
- bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- bits = *bitp>>shift;
+static void
+dumpspan(uint32 idx)
+{
+ int32 sizeclass, n, npages, i, column;
+ uintptr size;
+ byte *p;
+ byte *arena_start;
+ MSpan *s;
+ bool allocated, special;
- if((bits & bitAllocated) == 0)
- continue;
+ s = runtime·mheap->allspans[idx];
+ if(s->state != MSpanInUse)
+ return;
+ arena_start = runtime·mheap->arena_start;
+ p = (byte*)(s->start << PageShift);
+ sizeclass = s->sizeclass;
+ size = s->elemsize;
+ if(sizeclass == 0) {
+ n = 1;
+ } else {
+ npages = runtime·class_to_allocnpages[sizeclass];
+ n = (npages << PageShift) / size;
+ }
+
+ runtime·printf("%p .. %p:\n", p, p+n*size);
+ column = 0;
+ for(; n>0; n--, p+=size) {
+ uintptr off, *bitp, shift, bits;
- if((bits & bitMarked) != 0) {
- if(DebugMark) {
- if(!(bits & bitSpecial))
- runtime·printf("found spurious mark on %p\n", p);
- *bitp &= ~(bitSpecial<<shift);
- }
- *bitp &= ~(bitMarked<<shift);
- continue;
- }
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
- // Special means it has a finalizer or is being profiled.
- // In DebugMark mode, the bit has been coopted so
- // we have to assume all blocks are special.
- if(DebugMark || (bits & bitSpecial) != 0) {
- if(handlespecial(p, size))
- continue;
+ allocated = ((bits & bitAllocated) != 0);
+ special = ((bits & bitSpecial) != 0);
+
+ for(i=0; i<size; i+=sizeof(void*)) {
+ if(column == 0) {
+ runtime·printf("\t");
+ }
+ if(i == 0) {
+ runtime·printf(allocated ? "(" : "[");
+ runtime·printf(special ? "@" : "");
+ runtime·printf("%p: ", p+i);
+ } else {
+ runtime·printf(" ");
}
- // Mark freed; restore block boundary bit.
- *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ runtime·printf("%p", *(void**)(p+i));
- c = m->mcache;
- if(s->sizeclass == 0) {
- // Free large span.
- runtime·unmarkspan(p, 1<<PageShift);
- *(uintptr*)p = 1; // needs zeroing
- runtime·MHeap_Free(&runtime·mheap, s, 1);
- } else {
- // Free small object.
- if(size > sizeof(uintptr))
- ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
- c->local_by_size[s->sizeclass].nfree++;
- runtime·MCache_Free(c, p, s->sizeclass, size);
+ if(i+sizeof(void*) >= size) {
+ runtime·printf(allocated ? ") " : "] ");
+ }
+
+ column++;
+ if(column == 8) {
+ runtime·printf("\n");
+ column = 0;
}
- c->local_alloc -= size;
- c->local_nfree++;
}
}
+ runtime·printf("\n");
+}
+
+// A debugging function to dump the contents of memory
+void
+runtime·memorydump(void)
+{
+ uint32 spanidx;
+
+ for(spanidx=0; spanidx<runtime·mheap->nspan; spanidx++) {
+ dumpspan(spanidx);
+ }
}
void
runtime·gchelper(void)
{
- // Wait until main proc is ready for mark help.
- runtime·lock(&work.markgate);
- runtime·unlock(&work.markgate);
- scanblock(nil, 0);
+ // parallel mark for over gc roots
+ runtime·parfordo(work.markfor);
+
+ // help other threads scan secondary blocks
+ scanblock(nil, nil, 0, true);
- // Wait until main proc is ready for sweep help.
- runtime·lock(&work.sweepgate);
- runtime·unlock(&work.sweepgate);
- sweep();
+ if(DebugMark) {
+ // wait while the main thread executes mark(debug_scanblock)
+ while(runtime·atomicload(&work.debugmarkdone) == 0)
+ runtime·usleep(10);
+ }
+ runtime·parfordo(work.sweepfor);
if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
runtime·notewakeup(&work.alldone);
}
+#define GcpercentUnknown (-2)
+
// Initialized from $GOGC. GOGC=off means no gc.
//
// Next gc is after we've allocated an extra amount of
@@ -840,33 +1683,36 @@ runtime·gchelper(void)
// proportion to the allocation cost. Adjusting gcpercent
// just changes the linear constant (and also the amount of
// extra memory used).
-static int32 gcpercent = -2;
+static int32 gcpercent = GcpercentUnknown;
static void
-stealcache(void)
+cachestats(GCStats *stats)
{
- M *m;
-
- for(m=runtime·allm; m; m=m->alllink)
- runtime·MCache_ReleaseAll(m->mcache);
-}
-
-static void
-cachestats(void)
-{
- M *m;
+ M *mp;
MCache *c;
+ P *p, **pp;
int32 i;
uint64 stacks_inuse;
- uint64 stacks_sys;
+ uint64 *src, *dst;
+ if(stats)
+ runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
- stacks_sys = 0;
- for(m=runtime·allm; m; m=m->alllink) {
- runtime·purgecachedstats(m);
- stacks_inuse += m->stackalloc->inuse;
- stacks_sys += m->stackalloc->sys;
- c = m->mcache;
+ for(mp=runtime·allm; mp; mp=mp->alllink) {
+ stacks_inuse += mp->stackinuse*FixedStack;
+ if(stats) {
+ src = (uint64*)&mp->gcstats;
+ dst = (uint64*)stats;
+ for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
+ dst[i] += src[i];
+ runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
+ }
+ }
+ for(pp=runtime·allp; p=*pp; pp++) {
+ c = p->mcache;
+ if(c==nil)
+ continue;
+ runtime·purgecachedstats(c);
for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
c->local_by_size[i].nmalloc = 0;
@@ -875,16 +1721,42 @@ cachestats(void)
}
}
mstats.stacks_inuse = stacks_inuse;
- mstats.stacks_sys = stacks_sys;
+}
+
+// Structure of arguments passed to function gc().
+// This allows the arguments to be passed via reflect·call.
+struct gc_args
+{
+ int32 force;
+};
+
+static void gc(struct gc_args *args);
+
+static int32
+readgogc(void)
+{
+ byte *p;
+
+ p = runtime·getenv("GOGC");
+ if(p == nil || p[0] == '\0')
+ return 100;
+ if(runtime·strcmp(p, (byte*)"off") == 0)
+ return -1;
+ return runtime·atoi(p);
}
void
runtime·gc(int32 force)
{
- int64 t0, t1, t2, t3;
- uint64 heap0, heap1, obj0, obj1;
byte *p;
- bool extra;
+ struct gc_args a, *ap;
+ FuncVal gcv;
+
+ // The atomic operations are not atomic if the uint64s
+ // are not aligned on uint64 boundaries. This has been
+ // a problem in the past.
+ if((((uintptr)&work.empty) & 7) != 0)
+ runtime·throw("runtime: gc work buffer is misaligned");
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
@@ -897,14 +1769,8 @@ runtime·gc(int32 force)
if(!mstats.enablegc || m->locks > 0 || runtime·panicking)
return;
- if(gcpercent == -2) { // first time through
- p = runtime·getenv("GOGC");
- if(p == nil || p[0] == '\0')
- gcpercent = 100;
- else if(runtime·strcmp(p, (byte*)"off") == 0)
- gcpercent = -1;
- else
- gcpercent = runtime·atoi(p);
+ if(gcpercent == GcpercentUnknown) { // first time through
+ gcpercent = readgogc();
p = runtime·getenv("GOGCTRACE");
if(p != nil)
@@ -913,103 +1779,171 @@ runtime·gc(int32 force)
if(gcpercent < 0)
return;
+ // Run gc on a bigger stack to eliminate
+ // a potentially large number of calls to runtime·morestack.
+ a.force = force;
+ ap = &a;
+ m->moreframesize_minalloc = StackBig;
+ gcv.fn = (void*)gc;
+ reflect·call(&gcv, (byte*)&ap, sizeof(ap));
+
+ if(gctrace > 1 && !force) {
+ a.force = 1;
+ gc(&a);
+ }
+}
+
+static FuncVal runfinqv = {runfinq};
+
+static void
+gc(struct gc_args *args)
+{
+ int64 t0, t1, t2, t3, t4;
+ uint64 heap0, heap1, obj0, obj1, ninstr;
+ GCStats stats;
+ M *mp;
+ uint32 i;
+ Eface eface;
+
runtime·semacquire(&runtime·worldsema);
- if(!force && mstats.heap_alloc < mstats.next_gc) {
+ if(!args->force && mstats.heap_alloc < mstats.next_gc) {
runtime·semrelease(&runtime·worldsema);
return;
}
t0 = runtime·nanotime();
- nhandoff = 0;
m->gcing = 1;
runtime·stoptheworld();
- cachestats();
- heap0 = mstats.heap_alloc;
- obj0 = mstats.nmalloc - mstats.nfree;
+ if(CollectStats)
+ runtime·memclr((byte*)&gcstats, sizeof(gcstats));
- runtime·lock(&work.markgate);
- runtime·lock(&work.sweepgate);
+ for(mp=runtime·allm; mp; mp=mp->alllink)
+ runtime·settype_flush(mp, false);
- extra = false;
- work.nproc = 1;
- if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) {
- runtime·noteclear(&work.alldone);
- work.nproc += runtime·helpgc(&extra);
+ heap0 = 0;
+ obj0 = 0;
+ if(gctrace) {
+ cachestats(nil);
+ heap0 = mstats.heap_alloc;
+ obj0 = mstats.nmalloc - mstats.nfree;
}
+
+ m->locks++; // disable gc during mallocs in parforalloc
+ if(work.markfor == nil)
+ work.markfor = runtime·parforalloc(MaxGcproc);
+ if(work.sweepfor == nil)
+ work.sweepfor = runtime·parforalloc(MaxGcproc);
+ m->locks--;
+
+ if(itabtype == nil) {
+ // get C pointer to the Go type "itab"
+ runtime·gc_itab_ptr(&eface);
+ itabtype = ((PtrType*)eface.type)->elem;
+ }
+
work.nwait = 0;
work.ndone = 0;
+ work.debugmarkdone = 0;
+ work.nproc = runtime·gcprocs();
+ addroots();
+ runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
+ runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap->nspan, nil, true, sweepspan);
+ if(work.nproc > 1) {
+ runtime·noteclear(&work.alldone);
+ runtime·helpgc(work.nproc);
+ }
- runtime·unlock(&work.markgate); // let the helpers in
- mark(scanblock);
- if(DebugMark)
- mark(debug_scanblock);
t1 = runtime·nanotime();
- work.spans = runtime·mheap.allspans;
- runtime·unlock(&work.sweepgate); // let the helpers in
- sweep();
+ runtime·parfordo(work.markfor);
+ scanblock(nil, nil, 0, true);
+
+ if(DebugMark) {
+ for(i=0; i<work.nroot; i++)
+ debug_scanblock(work.roots[i].p, work.roots[i].n);
+ runtime·atomicstore(&work.debugmarkdone, 1);
+ }
+ t2 = runtime·nanotime();
+
+ runtime·parfordo(work.sweepfor);
+ t3 = runtime·nanotime();
+
if(work.nproc > 1)
runtime·notesleep(&work.alldone);
- t2 = runtime·nanotime();
- stealcache();
- cachestats();
+ cachestats(&stats);
+
+ stats.nprocyield += work.sweepfor->nprocyield;
+ stats.nosyield += work.sweepfor->nosyield;
+ stats.nsleep += work.sweepfor->nsleep;
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0;
- m->locks++; // disable gc during the mallocs in newproc
if(finq != nil) {
+ m->locks++; // disable gc during the mallocs in newproc
// kick off or wake up goroutine to run queued finalizers
if(fing == nil)
- fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc);
+ fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
else if(fingwait) {
fingwait = 0;
runtime·ready(fing);
}
+ m->locks--;
}
- m->locks--;
- cachestats();
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
- t3 = runtime·nanotime();
- mstats.last_gc = t3;
- mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
- mstats.pause_total_ns += t3 - t0;
+ t4 = runtime·nanotime();
+ mstats.last_gc = t4;
+ mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
+ mstats.pause_total_ns += t4 - t0;
mstats.numgc++;
if(mstats.debuggc)
- runtime·printf("pause %D\n", t3-t0);
+ runtime·printf("pause %D\n", t4-t0);
if(gctrace) {
- runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n",
- mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
+ runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
+ " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
+ mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
heap0>>20, heap1>>20, obj0, obj1,
mstats.nmalloc, mstats.nfree,
- nhandoff);
+ stats.nhandoff, stats.nhandoffcnt,
+ work.sweepfor->nsteal, work.sweepfor->nstealcnt,
+ stats.nprocyield, stats.nosyield, stats.nsleep);
+ if(CollectStats) {
+ runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
+ gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
+ if(gcstats.ptr.cnt != 0)
+ runtime·printf("avg ptrbufsize: %D (%D/%D)\n",
+ gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
+ if(gcstats.obj.cnt != 0)
+ runtime·printf("avg nobj: %D (%D/%D)\n",
+ gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
+ runtime·printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
+
+ runtime·printf("instruction counts:\n");
+ ninstr = 0;
+ for(i=0; i<nelem(gcstats.instr); i++) {
+ runtime·printf("\t%d:\t%D\n", i, gcstats.instr[i]);
+ ninstr += gcstats.instr[i];
+ }
+ runtime·printf("\ttotal:\t%D\n", ninstr);
+
+ runtime·printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
+ }
}
-
+
runtime·MProf_GC();
runtime·semrelease(&runtime·worldsema);
+ runtime·starttheworld();
- // If we could have used another helper proc, start one now,
- // in the hope that it will be available next time.
- // It would have been even better to start it before the collection,
- // but doing so requires allocating memory, so it's tricky to
- // coordinate. This lazy approach works out in practice:
- // we don't mind if the first couple gc rounds don't have quite
- // the maximum number of procs.
- runtime·starttheworld(extra);
-
- // give the queued finalizers, if any, a chance to run
- if(finq != nil)
+ // give the queued finalizers, if any, a chance to run
+ if(finq != nil)
runtime·gosched();
-
- if(gctrace > 1 && !force)
- runtime·gc(1);
}
void
@@ -1022,11 +1956,56 @@ runtime·ReadMemStats(MStats *stats)
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
- cachestats();
+ cachestats(nil);
*stats = mstats;
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
- runtime·starttheworld(false);
+ runtime·starttheworld();
+}
+
+void
+runtime∕debug·readGCStats(Slice *pauses)
+{
+ uint64 *p;
+ uint32 i, n;
+
+ // Calling code in runtime/debug should make the slice large enough.
+ if(pauses->cap < nelem(mstats.pause_ns)+3)
+ runtime·throw("runtime: short slice passed to readGCStats");
+
+ // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
+ p = (uint64*)pauses->array;
+ runtime·lock(runtime·mheap);
+ n = mstats.numgc;
+ if(n > nelem(mstats.pause_ns))
+ n = nelem(mstats.pause_ns);
+
+ // The pause buffer is circular. The most recent pause is at
+ // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
+ // from there to go back farther in time. We deliver the times
+ // most recent first (in p[0]).
+ for(i=0; i<n; i++)
+ p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
+
+ p[n] = mstats.last_gc;
+ p[n+1] = mstats.numgc;
+ p[n+2] = mstats.pause_total_ns;
+ runtime·unlock(runtime·mheap);
+ pauses->len = n+3;
+}
+
+void
+runtime∕debug·setGCPercent(intgo in, intgo out)
+{
+ runtime·lock(runtime·mheap);
+ if(gcpercent == GcpercentUnknown)
+ gcpercent = readgogc();
+ out = gcpercent;
+ if(in < 0)
+ in = -1;
+ gcpercent = in;
+ runtime·unlock(runtime·mheap);
+ FLUSH(&out);
}
static void
@@ -1050,11 +2029,11 @@ runfinq(void)
finq = nil;
if(fb == nil) {
fingwait = 1;
- g->status = Gwaiting;
- g->waitreason = "finalizer wait";
- runtime·gosched();
+ runtime·park(nil, nil, "finalizer wait");
continue;
}
+ if(raceenabled)
+ runtime·racefingo();
for(; fb; fb=next) {
next = fb->next;
for(i=0; i<fb->cnt; i++) {
@@ -1066,7 +2045,7 @@ runfinq(void)
framecap = framesz;
}
*(void**)frame = f->arg;
- reflect·call((byte*)f->fn, frame, sizeof(uintptr) + f->nret);
+ reflect·call(f->fn, frame, sizeof(uintptr) + f->nret);
f->fn = nil;
f->arg = nil;
}
@@ -1088,11 +2067,11 @@ runtime·markallocated(void *v, uintptr n, bool noptr)
if(0)
runtime·printf("markallocated %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
runtime·throw("markallocated: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
@@ -1120,11 +2099,11 @@ runtime·markfreed(void *v, uintptr n)
if(0)
runtime·printf("markallocated %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
runtime·throw("markallocated: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
@@ -1150,11 +2129,11 @@ runtime·checkfreed(void *v, uintptr n)
if(!runtime·checking)
return;
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
return; // not allocated, so okay
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start; // word offset
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *b>>shift;
@@ -1173,7 +2152,7 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
uintptr *b, off, shift;
byte *p;
- if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v+size*n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
runtime·throw("markspan: bad pointer");
p = v;
@@ -1184,8 +2163,8 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
// the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these
// bitmap words.
- off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)runtime·mheap->arena_start; // word offset
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
}
@@ -1197,14 +2176,14 @@ runtime·unmarkspan(void *v, uintptr n)
{
uintptr *p, *b, off;
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v+n > (byte*)runtime·mheap->arena_used || (byte*)v < runtime·mheap->arena_start)
runtime·throw("markspan: bad pointer");
p = v;
- off = p - (uintptr*)runtime·mheap.arena_start; // word offset
+ off = p - (uintptr*)runtime·mheap->arena_start; // word offset
if(off % wordsPerBitmapWord != 0)
runtime·throw("markspan: unaligned pointer");
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
n /= PtrSize;
if(n%wordsPerBitmapWord != 0)
runtime·throw("unmarkspan: unaligned length");
@@ -1225,8 +2204,8 @@ runtime·blockspecial(void *v)
if(DebugMark)
return true;
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
return (*b & (bitSpecial<<shift)) != 0;
@@ -1240,8 +2219,8 @@ runtime·setblockspecial(void *v, bool s)
if(DebugMark)
return;
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime·mheap->arena_start;
+ b = (uintptr*)runtime·mheap->arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {