summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/mgc0.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/mgc0.c')
-rw-r--r--src/pkg/runtime/mgc0.c1814
1 files changed, 1088 insertions, 726 deletions
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index 761f128a8..392da535b 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -2,13 +2,60 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Garbage collector.
+// Garbage collector (GC).
+//
+// GC is:
+// - mark&sweep
+// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
+// - parallel (up to MaxGcproc threads)
+// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
+// - non-moving/non-compacting
+// - full (non-partial)
+//
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+//
+// Concurrent sweep.
+// The sweep phase proceeds concurrently with normal program execution.
+// The heap is swept span-by-span both lazily (when a goroutine needs another span)
+// and concurrently in a background goroutine (this helps programs that are not CPU bound).
+// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
+// and so next_gc calculation is tricky and happens as follows.
+// At the end of the stop-the-world phase next_gc is conservatively set based on total
+// heap size; all spans are marked as "needs sweeping".
+// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
+// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
+// closer to the target value. However, this is not enough to avoid over-allocating memory.
+// Consider that a goroutine wants to allocate a new span for a large object and
+// there are no free swept spans, but there are small-object unswept spans.
+// If the goroutine naively allocates a new span, it can surpass the yet-unknown
+// target next_gc value. In order to prevent such cases (1) when a goroutine needs
+// to allocate a new small-object span, it sweeps small-object spans for the same
+// object size until it frees at least one object; (2) when a goroutine needs to
+// allocate large-object span from heap, it sweeps spans until it frees at least
+// that many pages into heap. Together these two measures ensure that we don't surpass
+// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
+// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
+// but there can still be other one-page unswept spans which could be combined into a two-page span.
+// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
+// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
+// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
+// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
+// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
+// The finalizer goroutine is kicked off only when all spans are swept.
+// When the next GC starts, it sweeps all not-yet-swept spans (if any).
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "stack.h"
#include "mgc0.h"
+#include "chan.h"
#include "race.h"
#include "type.h"
#include "typekind.h"
@@ -17,14 +64,11 @@
enum {
Debug = 0,
- DebugMark = 0, // run second pass to check mark
CollectStats = 0,
- ScanStackByFrames = 0,
- IgnorePreciseGC = 0,
+ ConcurrentSweep = 1,
- // Four bits per word (see #defines below).
- wordsPerBitmapWord = sizeof(void*)*8/4,
- bitShift = sizeof(void*)*8/4,
+ WorkbufSize = 16*1024,
+ FinBlockSize = 4*1024,
handoffThreshold = 4,
IntermediateBufferCapacity = 64,
@@ -34,46 +78,50 @@ enum {
LOOP = 2,
PC_BITS = PRECISE | LOOP,
- // Pointer map
- BitsPerPointer = 2,
- BitsNoPointer = 0,
- BitsPointer = 1,
- BitsIface = 2,
- BitsEface = 3,
+ RootData = 0,
+ RootBss = 1,
+ RootFinalizers = 2,
+ RootSpanTypes = 3,
+ RootFlushCaches = 4,
+ RootCount = 5,
};
-// Bits in per-word bitmap.
-// #defines because enum might not be able to hold the values.
-//
-// Each word in the bitmap describes wordsPerBitmapWord words
-// of heap memory. There are 4 bitmap bits dedicated to each heap word,
-// so on a 64-bit system there is one bitmap word per 16 heap words.
-// The bits in the word are packed together by type first, then by
-// heap location, so each 64-bit bitmap word consists of, from top to bottom,
-// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
-// then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits.
-// This layout makes it easier to iterate over the bits of a given type.
-//
-// The bitmap starts at mheap.arena_start and extends *backward* from
-// there. On a 64-bit system the off'th word in the arena is tracked by
-// the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
-// the only difference is that the divisor is 8.)
-//
-// To pull out the bits corresponding to a given pointer p, we use:
-//
-// off = p - (uintptr*)mheap.arena_start; // word offset
-// b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
-// shift = off % wordsPerBitmapWord
-// bits = *b >> shift;
-// /* then test bits & bitAllocated, bits & bitMarked, etc. */
-//
-#define bitAllocated ((uintptr)1<<(bitShift*0))
-#define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
-#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
-#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
-#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
+#define GcpercentUnknown (-2)
+
+// Initialized from $GOGC. GOGC=off means no gc.
+static int32 gcpercent = GcpercentUnknown;
+
+static FuncVal* poolcleanup;
+
+void
+sync·runtime_registerPoolCleanup(FuncVal *f)
+{
+ poolcleanup = f;
+}
+
+static void
+clearpools(void)
+{
+ P *p, **pp;
+ MCache *c;
+ int32 i;
+
+ // clear sync.Pool's
+ if(poolcleanup != nil)
+ reflect·call(poolcleanup, nil, 0, 0);
-#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+ for(pp=runtime·allp; p=*pp; pp++) {
+ // clear tinyalloc pool
+ c = p->mcache;
+ if(c != nil) {
+ c->tiny = nil;
+ c->tinysize = 0;
+ }
+ // clear defer pools
+ for(i=0; i<nelem(p->deferpool); i++)
+ p->deferpool[i] = nil;
+ }
+}
// Holding worldsema grants an M the right to try to stop the world.
// The procedure is:
@@ -98,11 +146,10 @@ struct Obj
uintptr ti; // type info
};
-// The size of Workbuf is N*PageSize.
typedef struct Workbuf Workbuf;
struct Workbuf
{
-#define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
+#define SIZE (WorkbufSize-sizeof(LFNode)-sizeof(uintptr))
LFNode node; // must be first
uintptr nobj;
Obj obj[SIZE/sizeof(Obj) - 1];
@@ -138,39 +185,44 @@ extern byte ebss[];
extern byte gcdata[];
extern byte gcbss[];
-static G *fing;
-static FinBlock *finq; // list of finalizers that are to be executed
-static FinBlock *finc; // cache of free blocks
-static FinBlock *allfin; // list of all blocks
-static Lock finlock;
-static int32 fingwait;
+static Lock finlock; // protects the following variables
+static FinBlock *finq; // list of finalizers that are to be executed
+static FinBlock *finc; // cache of free blocks
+static FinBlock *allfin; // list of all blocks
+bool runtime·fingwait;
+bool runtime·fingwake;
+
+static Lock gclock;
+static G* fing;
-static void runfinq(void);
+static void runfinq(void);
+static void bgsweep(void);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
static void gchelperstart(void);
+static void flushallmcaches(void);
+static bool scanframe(Stkframe *frame, void *wbufp);
+static void addstackroots(G *gp, Workbuf **wbufp);
+
+static FuncVal runfinqv = {runfinq};
+static FuncVal bgsweepv = {bgsweep};
static struct {
uint64 full; // lock-free list of full blocks
uint64 empty; // lock-free list of empty blocks
byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
uint32 nproc;
+ int64 tstart;
volatile uint32 nwait;
volatile uint32 ndone;
- volatile uint32 debugmarkdone;
Note alldone;
ParFor *markfor;
- ParFor *sweepfor;
Lock;
byte *chunk;
uintptr nchunk;
-
- Obj *roots;
- uint32 nroot;
- uint32 rootcap;
} work;
enum {
@@ -207,6 +259,8 @@ static struct {
uint64 foundword;
uint64 foundspan;
} markonly;
+ uint32 nbgsweep;
+ uint32 npausesweep;
} gcstats;
// markonly marks an object. It returns true if the object
@@ -260,8 +314,7 @@ markonly(void *obj)
// (Manually inlined copy of MHeap_LookupMaybe.)
k = (uintptr)obj>>PageShift;
x = k;
- if(sizeof(void*) == 8)
- x -= (uintptr)runtime·mheap.arena_start>>PageShift;
+ x -= (uintptr)runtime·mheap.arena_start>>PageShift;
s = runtime·mheap.spans[x];
if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
return false;
@@ -317,6 +370,24 @@ struct PtrTarget
uintptr ti;
};
+typedef struct Scanbuf Scanbuf;
+struct Scanbuf
+{
+ struct {
+ PtrTarget *begin;
+ PtrTarget *end;
+ PtrTarget *pos;
+ } ptr;
+ struct {
+ Obj *begin;
+ Obj *end;
+ Obj *pos;
+ } obj;
+ Workbuf *wbuf;
+ Obj *wp;
+ uintptr nobj;
+};
+
typedef struct BufferList BufferList;
struct BufferList
{
@@ -350,7 +421,7 @@ static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
// flushptrbuf
// (find block start, mark and enqueue)
static void
-flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
+flushptrbuf(Scanbuf *sbuf)
{
byte *p, *arena_start, *obj;
uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
@@ -358,17 +429,19 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
PageID k;
Obj *wp;
Workbuf *wbuf;
+ PtrTarget *ptrbuf;
PtrTarget *ptrbuf_end;
arena_start = runtime·mheap.arena_start;
- wp = *_wp;
- wbuf = *_wbuf;
- nobj = *_nobj;
+ wp = sbuf->wp;
+ wbuf = sbuf->wbuf;
+ nobj = sbuf->nobj;
- ptrbuf_end = *ptrbufpos;
- n = ptrbuf_end - ptrbuf;
- *ptrbufpos = ptrbuf;
+ ptrbuf = sbuf->ptr.begin;
+ ptrbuf_end = sbuf->ptr.pos;
+ n = ptrbuf_end - sbuf->ptr.begin;
+ sbuf->ptr.pos = sbuf->ptr.begin;
if(CollectStats) {
runtime·xadd64(&gcstats.ptr.sum, n);
@@ -387,152 +460,146 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
runtime·throw("ptrbuf has to be smaller than WorkBuf");
}
- // TODO(atom): This block is a branch of an if-then-else statement.
- // The single-threaded branch may be added in a next CL.
- {
- // Multi-threaded version.
+ while(ptrbuf < ptrbuf_end) {
+ obj = ptrbuf->p;
+ ti = ptrbuf->ti;
+ ptrbuf++;
+
+ // obj belongs to interval [mheap.arena_start, mheap.arena_used).
+ if(Debug > 1) {
+ if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
+ runtime·throw("object is outside of mheap");
+ }
- while(ptrbuf < ptrbuf_end) {
- obj = ptrbuf->p;
- ti = ptrbuf->ti;
- ptrbuf++;
+ // obj may be a pointer to a live object.
+ // Try to find the beginning of the object.
- // obj belongs to interval [mheap.arena_start, mheap.arena_used).
- if(Debug > 1) {
- if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
- runtime·throw("object is outside of mheap");
- }
+ // Round down to word boundary.
+ if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+ ti = 0;
+ }
- // obj may be a pointer to a live object.
- // Try to find the beginning of the object.
+ // Find bits for this word.
+ off = (uintptr*)obj - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
- // Round down to word boundary.
- if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
- obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
- ti = 0;
- }
+ // Pointing at the beginning of a block?
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
+ if(CollectStats)
+ runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1);
+ goto found;
+ }
- // Find bits for this word.
- off = (uintptr*)obj - (uintptr*)arena_start;
- bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- xbits = *bitp;
- bits = xbits >> shift;
+ ti = 0;
- // Pointing at the beginning of a block?
- if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
+ // Pointing just past the beginning?
+ // Scan backward a little to find a block boundary.
+ for(j=shift; j-->0; ) {
+ if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
+ obj = (byte*)obj - (shift-j)*PtrSize;
+ shift = j;
+ bits = xbits>>shift;
if(CollectStats)
- runtime·xadd64(&gcstats.flushptrbuf.foundbit, 1);
+ runtime·xadd64(&gcstats.flushptrbuf.foundword, 1);
goto found;
}
+ }
- ti = 0;
-
- // Pointing just past the beginning?
- // Scan backward a little to find a block boundary.
- for(j=shift; j-->0; ) {
- if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
- obj = (byte*)obj - (shift-j)*PtrSize;
- shift = j;
- bits = xbits>>shift;
- if(CollectStats)
- runtime·xadd64(&gcstats.flushptrbuf.foundword, 1);
- goto found;
- }
- }
-
- // Otherwise consult span table to find beginning.
- // (Manually inlined copy of MHeap_LookupMaybe.)
- k = (uintptr)obj>>PageShift;
- x = k;
- if(sizeof(void*) == 8)
- x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.spans[x];
- if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
- continue;
- p = (byte*)((uintptr)s->start<<PageShift);
- if(s->sizeclass == 0) {
- obj = p;
- } else {
- size = s->elemsize;
- int32 i = ((byte*)obj - p)/size;
- obj = p+i*size;
- }
+ // Otherwise consult span table to find beginning.
+ // (Manually inlined copy of MHeap_LookupMaybe.)
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime·mheap.spans[x];
+ if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
+ continue;
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ } else {
+ size = s->elemsize;
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
- // Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)arena_start;
- bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- xbits = *bitp;
- bits = xbits >> shift;
- if(CollectStats)
- runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1);
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+ if(CollectStats)
+ runtime·xadd64(&gcstats.flushptrbuf.foundspan, 1);
- found:
- // Now we have bits, bitp, and shift correct for
- // obj pointing at the base of the object.
- // Only care about allocated and not marked.
- if((bits & (bitAllocated|bitMarked)) != bitAllocated)
- continue;
- if(work.nproc == 1)
- *bitp |= bitMarked<<shift;
- else {
- for(;;) {
- x = *bitp;
- if(x & (bitMarked<<shift))
- goto continue_obj;
- if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
- break;
- }
+ found:
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
+ continue;
+ if(work.nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ goto continue_obj;
+ if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
}
+ }
- // If object has no pointers, don't need to scan further.
- if((bits & bitNoScan) != 0)
- continue;
+ // If object has no pointers, don't need to scan further.
+ if((bits & bitScan) == 0)
+ continue;
- // Ask span about size class.
- // (Manually inlined copy of MHeap_Lookup.)
- x = (uintptr)obj >> PageShift;
- if(sizeof(void*) == 8)
- x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.spans[x];
+ // Ask span about size class.
+ // (Manually inlined copy of MHeap_Lookup.)
+ x = (uintptr)obj >> PageShift;
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime·mheap.spans[x];
- PREFETCH(obj);
+ PREFETCH(obj);
- *wp = (Obj){obj, s->elemsize, ti};
- wp++;
- nobj++;
- continue_obj:;
- }
+ *wp = (Obj){obj, s->elemsize, ti};
+ wp++;
+ nobj++;
+ continue_obj:;
+ }
- // If another proc wants a pointer, give it some.
- if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
- wbuf->nobj = nobj;
- wbuf = handoff(wbuf);
- nobj = wbuf->nobj;
- wp = wbuf->obj + nobj;
- }
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
}
- *_wp = wp;
- *_wbuf = wbuf;
- *_nobj = nobj;
+ sbuf->wp = wp;
+ sbuf->wbuf = wbuf;
+ sbuf->nobj = nobj;
}
static void
-flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
+flushobjbuf(Scanbuf *sbuf)
{
uintptr nobj, off;
Obj *wp, obj;
Workbuf *wbuf;
+ Obj *objbuf;
Obj *objbuf_end;
- wp = *_wp;
- wbuf = *_wbuf;
- nobj = *_nobj;
+ wp = sbuf->wp;
+ wbuf = sbuf->wbuf;
+ nobj = sbuf->nobj;
- objbuf_end = *objbufpos;
- *objbufpos = objbuf;
+ objbuf = sbuf->obj.begin;
+ objbuf_end = sbuf->obj.pos;
+ sbuf->obj.pos = sbuf->obj.begin;
while(objbuf < objbuf_end) {
obj = *objbuf++;
@@ -570,9 +637,9 @@ flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_
wp = wbuf->obj + nobj;
}
- *_wp = wp;
- *_wbuf = wbuf;
- *_nobj = nobj;
+ sbuf->wp = wp;
+ sbuf->wbuf = wbuf;
+ sbuf->nobj = nobj;
}
// Program that scans the whole block and treats every block element as a potential pointer
@@ -607,8 +674,7 @@ checkptr(void *obj, uintptr objti)
if(t == nil)
return;
x = (uintptr)obj >> PageShift;
- if(sizeof(void*) == 8)
- x -= (uintptr)(runtime·mheap.arena_start)>>PageShift;
+ x -= (uintptr)(runtime·mheap.arena_start)>>PageShift;
s = runtime·mheap.spans[x];
objstart = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass != 0) {
@@ -636,8 +702,8 @@ checkptr(void *obj, uintptr objti)
// A simple best-effort check until first GC_END.
for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
if(pc1[j] != pc2[j]) {
- runtime·printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
- t->string ? (int8*)t->string->str : (int8*)"?", j, pc1[j], pc2[j]);
+ runtime·printf("invalid gc type info for '%s', type info %p [%d]=%p, block info %p [%d]=%p\n",
+ t->string ? (int8*)t->string->str : (int8*)"?", pc1, (int32)j, pc1[j], pc2, (int32)j, pc2[j]);
runtime·throw("invalid gc type info");
}
}
@@ -650,30 +716,27 @@ checkptr(void *obj, uintptr objti)
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
-//
-// wbuf: current work buffer
-// wp: storage for next queued pointer (write pointer)
-// nobj: number of queued objects
static void
-scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
+scanblock(Workbuf *wbuf, bool keepworking)
{
byte *b, *arena_start, *arena_used;
- uintptr n, i, end_b, elemsize, size, ti, objti, count, type;
+ uintptr n, i, end_b, elemsize, size, ti, objti, count, type, nobj;
uintptr *pc, precise_type, nominal_size;
uintptr *chan_ret, chancap;
void *obj;
- Type *t;
+ Type *t, *et;
Slice *sliceptr;
+ String *stringptr;
Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
BufferList *scanbuffers;
- PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
- Obj *objbuf, *objbuf_end, *objbufpos;
+ Scanbuf sbuf;
Eface *eface;
Iface *iface;
Hchan *chan;
ChanType *chantype;
+ Obj *wp;
- if(sizeof(Workbuf) % PageSize != 0)
+ if(sizeof(Workbuf) % WorkbufSize != 0)
runtime·throw("scanblock: size of Workbuf is suboptimal");
// Memory arena parameters.
@@ -681,21 +744,30 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
arena_used = runtime·mheap.arena_used;
stack_ptr = stack+nelem(stack)-1;
-
+
precise_type = false;
nominal_size = 0;
- // Allocate ptrbuf
- {
- scanbuffers = &bufferList[m->helpgc];
- ptrbuf = &scanbuffers->ptrtarget[0];
- ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
- objbuf = &scanbuffers->obj[0];
- objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
+ if(wbuf) {
+ nobj = wbuf->nobj;
+ wp = &wbuf->obj[nobj];
+ } else {
+ nobj = 0;
+ wp = nil;
}
- ptrbufpos = ptrbuf;
- objbufpos = objbuf;
+ // Initialize sbuf
+ scanbuffers = &bufferList[m->helpgc];
+
+ sbuf.ptr.begin = sbuf.ptr.pos = &scanbuffers->ptrtarget[0];
+ sbuf.ptr.end = sbuf.ptr.begin + nelem(scanbuffers->ptrtarget);
+
+ sbuf.obj.begin = sbuf.obj.pos = &scanbuffers->obj[0];
+ sbuf.obj.end = sbuf.obj.begin + nelem(scanbuffers->obj);
+
+ sbuf.wbuf = wbuf;
+ sbuf.wp = wp;
+ sbuf.nobj = nobj;
// (Silence the compiler)
chan = nil;
@@ -707,17 +779,17 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
for(;;) {
// Each iteration scans the block b of length n, queueing pointers in
// the work buffer.
- if(Debug > 1) {
- runtime·printf("scanblock %p %D\n", b, (int64)n);
- }
if(CollectStats) {
runtime·xadd64(&gcstats.nbytes, n);
- runtime·xadd64(&gcstats.obj.sum, nobj);
+ runtime·xadd64(&gcstats.obj.sum, sbuf.nobj);
runtime·xadd64(&gcstats.obj.cnt, 1);
}
if(ti != 0) {
+ if(Debug > 1) {
+ runtime·printf("scanblock %p %D ti %p\n", b, (int64)n, ti);
+ }
pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
precise_type = (ti & PRECISE);
stack_top.elemsize = pc[0];
@@ -773,14 +845,22 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
pc = chanProg;
break;
default:
+ if(Debug > 1)
+ runtime·printf("scanblock %p %D type %p %S\n", b, (int64)n, type, *t->string);
runtime·throw("scanblock: invalid type");
return;
}
+ if(Debug > 1)
+ runtime·printf("scanblock %p %D type %p %S pc=%p\n", b, (int64)n, type, *t->string, pc);
} else {
pc = defaultProg;
+ if(Debug > 1)
+ runtime·printf("scanblock %p %D unknown type\n", b, (int64)n);
}
} else {
pc = defaultProg;
+ if(Debug > 1)
+ runtime·printf("scanblock %p %D no span types\n", b, (int64)n);
}
if(IgnorePreciseGC)
@@ -788,7 +868,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
pc++;
stack_top.b = (uintptr)b;
-
end_b = (uintptr)b + n - PtrSize;
for(;;) {
@@ -801,6 +880,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_PTR:
obj = *(void**)(stack_top.b + pc[1]);
objti = pc[2];
+ if(Debug > 2)
+ runtime·printf("gc_ptr @%p: %p ti=%p\n", stack_top.b+pc[1], obj, objti);
pc += 3;
if(Debug)
checkptr(obj, objti);
@@ -808,6 +889,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_SLICE:
sliceptr = (Slice*)(stack_top.b + pc[1]);
+ if(Debug > 2)
+ runtime·printf("gc_slice @%p: %p/%D/%D\n", sliceptr, sliceptr->array, (int64)sliceptr->len, (int64)sliceptr->cap);
if(sliceptr->cap != 0) {
obj = sliceptr->array;
// Can't use slice element type for scanning,
@@ -821,27 +904,34 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_APTR:
obj = *(void**)(stack_top.b + pc[1]);
+ if(Debug > 2)
+ runtime·printf("gc_aptr @%p: %p\n", stack_top.b+pc[1], obj);
pc += 2;
break;
case GC_STRING:
- obj = *(void**)(stack_top.b + pc[1]);
- markonly(obj);
+ stringptr = (String*)(stack_top.b + pc[1]);
+ if(Debug > 2)
+ runtime·printf("gc_string @%p: %p/%D\n", stack_top.b+pc[1], stringptr->str, (int64)stringptr->len);
+ if(stringptr->len != 0)
+ markonly(stringptr->str);
pc += 2;
continue;
case GC_EFACE:
eface = (Eface*)(stack_top.b + pc[1]);
pc += 2;
+ if(Debug > 2)
+ runtime·printf("gc_eface @%p: %p %p\n", stack_top.b+pc[1], eface->type, eface->data);
if(eface->type == nil)
continue;
// eface->type
t = eface->type;
if((void*)t >= arena_start && (void*)t < arena_used) {
- *ptrbufpos++ = (PtrTarget){t, 0};
- if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ *sbuf.ptr.pos++ = (PtrTarget){t, 0};
+ if(sbuf.ptr.pos == sbuf.ptr.end)
+ flushptrbuf(&sbuf);
}
// eface->data
@@ -851,8 +941,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
continue;
obj = eface->data;
- if((t->kind & ~KindNoPointers) == KindPtr)
- objti = (uintptr)((PtrType*)t)->elem->gc;
+ if((t->kind & ~KindNoPointers) == KindPtr) {
+ // Only use type information if it is a pointer-containing type.
+ // This matches the GC programs written by cmd/gc/reflect.c's
+ // dgcsym1 in case TPTR32/case TPTR64. See rationale there.
+ et = ((PtrType*)t)->elem;
+ if(!(et->kind & KindNoPointers))
+ objti = (uintptr)((PtrType*)t)->elem->gc;
+ }
} else {
obj = eface->data;
objti = (uintptr)t->gc;
@@ -863,14 +959,16 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_IFACE:
iface = (Iface*)(stack_top.b + pc[1]);
pc += 2;
+ if(Debug > 2)
+ runtime·printf("gc_iface @%p: %p/%p %p\n", stack_top.b+pc[1], iface->tab, nil, iface->data);
if(iface->tab == nil)
continue;
// iface->tab
if((void*)iface->tab >= arena_start && (void*)iface->tab < arena_used) {
- *ptrbufpos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
- if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ *sbuf.ptr.pos++ = (PtrTarget){iface->tab, (uintptr)itabtype->gc};
+ if(sbuf.ptr.pos == sbuf.ptr.end)
+ flushptrbuf(&sbuf);
}
// iface->data
@@ -881,8 +979,14 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
continue;
obj = iface->data;
- if((t->kind & ~KindNoPointers) == KindPtr)
- objti = (uintptr)((PtrType*)t)->elem->gc;
+ if((t->kind & ~KindNoPointers) == KindPtr) {
+ // Only use type information if it is a pointer-containing type.
+ // This matches the GC programs written by cmd/gc/reflect.c's
+ // dgcsym1 in case TPTR32/case TPTR64. See rationale there.
+ et = ((PtrType*)t)->elem;
+ if(!(et->kind & KindNoPointers))
+ objti = (uintptr)((PtrType*)t)->elem->gc;
+ }
} else {
obj = iface->data;
objti = (uintptr)t->gc;
@@ -893,11 +997,13 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
case GC_DEFAULT_PTR:
while(stack_top.b <= end_b) {
obj = *(byte**)stack_top.b;
+ if(Debug > 2)
+ runtime·printf("gc_default_ptr @%p: %p\n", stack_top.b, obj);
stack_top.b += PtrSize;
if(obj >= arena_start && obj < arena_used) {
- *ptrbufpos++ = (PtrTarget){obj, 0};
- if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ *sbuf.ptr.pos++ = (PtrTarget){obj, 0};
+ if(sbuf.ptr.pos == sbuf.ptr.end)
+ flushptrbuf(&sbuf);
}
}
goto next_block;
@@ -926,7 +1032,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
if(*(byte**)i != nil) {
// Found a value that may be a pointer.
// Do a rescan of the entire block.
- enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
+ enqueue((Obj){b, n, 0}, &sbuf.wbuf, &sbuf.wp, &sbuf.nobj);
if(CollectStats) {
runtime·xadd64(&gcstats.rescan, 1);
runtime·xadd64(&gcstats.rescanbytes, n);
@@ -972,13 +1078,17 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
objti = pc[3];
pc += 4;
- *objbufpos++ = (Obj){obj, size, objti};
- if(objbufpos == objbuf_end)
- flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ if(Debug > 2)
+ runtime·printf("gc_region @%p: %D %p\n", stack_top.b+pc[1], (int64)size, objti);
+ *sbuf.obj.pos++ = (Obj){obj, size, objti};
+ if(sbuf.obj.pos == sbuf.obj.end)
+ flushobjbuf(&sbuf);
continue;
case GC_CHAN_PTR:
chan = *(Hchan**)(stack_top.b + pc[1]);
+ if(Debug > 2 && chan != nil)
+ runtime·printf("gc_chan_ptr @%p: %p/%D/%D %p\n", stack_top.b+pc[1], chan, (int64)chan->qcount, (int64)chan->dataqsiz, pc[2]);
if(chan == nil) {
pc += 3;
continue;
@@ -1007,10 +1117,10 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// in-use part of the circular buffer is scanned.
// (Channel routines zero the unused part, so the current
// code does not lead to leaks, it's just a little inefficient.)
- *objbufpos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size,
+ *sbuf.obj.pos++ = (Obj){(byte*)chan+runtime·Hchansize, chancap*chantype->elem->size,
(uintptr)chantype->elem->gc | PRECISE | LOOP};
- if(objbufpos == objbuf_end)
- flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ if(sbuf.obj.pos == sbuf.obj.end)
+ flushobjbuf(&sbuf);
}
}
if(chan_ret == nil)
@@ -1019,14 +1129,15 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
continue;
default:
+ runtime·printf("runtime: invalid GC instruction %p at %p\n", pc[0], pc);
runtime·throw("scanblock: invalid GC instruction");
return;
}
if(obj >= arena_start && obj < arena_used) {
- *ptrbufpos++ = (PtrTarget){obj, objti};
- if(ptrbufpos == ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
+ *sbuf.ptr.pos++ = (PtrTarget){obj, objti};
+ if(sbuf.ptr.pos == sbuf.ptr.end)
+ flushptrbuf(&sbuf);
}
}
@@ -1034,109 +1145,31 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// Done scanning [b, b+n). Prepare for the next iteration of
// the loop by setting b, n, ti to the parameters for the next block.
- if(nobj == 0) {
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
- flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
+ if(sbuf.nobj == 0) {
+ flushptrbuf(&sbuf);
+ flushobjbuf(&sbuf);
- if(nobj == 0) {
+ if(sbuf.nobj == 0) {
if(!keepworking) {
- if(wbuf)
- putempty(wbuf);
- goto endscan;
+ if(sbuf.wbuf)
+ putempty(sbuf.wbuf);
+ return;
}
// Emptied our buffer: refill.
- wbuf = getfull(wbuf);
- if(wbuf == nil)
- goto endscan;
- nobj = wbuf->nobj;
- wp = wbuf->obj + wbuf->nobj;
+ sbuf.wbuf = getfull(sbuf.wbuf);
+ if(sbuf.wbuf == nil)
+ return;
+ sbuf.nobj = sbuf.wbuf->nobj;
+ sbuf.wp = sbuf.wbuf->obj + sbuf.wbuf->nobj;
}
}
// Fetch b from the work buffer.
- --wp;
- b = wp->p;
- n = wp->n;
- ti = wp->ti;
- nobj--;
- }
-
-endscan:;
-}
-
-// debug_scanblock is the debug copy of scanblock.
-// it is simpler, slower, single-threaded, recursive,
-// and uses bitSpecial as the mark bit.
-static void
-debug_scanblock(byte *b, uintptr n)
-{
- byte *obj, *p;
- void **vp;
- uintptr size, *bitp, bits, shift, i, xbits, off;
- MSpan *s;
-
- if(!DebugMark)
- runtime·throw("debug_scanblock without DebugMark");
-
- if((intptr)n < 0) {
- runtime·printf("debug_scanblock %p %D\n", b, (int64)n);
- runtime·throw("debug_scanblock");
- }
-
- // Align b to a word boundary.
- off = (uintptr)b & (PtrSize-1);
- if(off != 0) {
- b += PtrSize - off;
- n -= PtrSize - off;
- }
-
- vp = (void**)b;
- n /= PtrSize;
- for(i=0; i<n; i++) {
- obj = (byte*)vp[i];
-
- // Words outside the arena cannot be pointers.
- if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
- continue;
-
- // Round down to word boundary.
- obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
-
- // Consult span table to find beginning.
- s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
- if(s == nil)
- continue;
-
- p = (byte*)((uintptr)s->start<<PageShift);
- size = s->elemsize;
- if(s->sizeclass == 0) {
- obj = p;
- } else {
- int32 i = ((byte*)obj - p)/size;
- obj = p+i*size;
- }
-
- // Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
- bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
- xbits = *bitp;
- bits = xbits >> shift;
-
- // Now we have bits, bitp, and shift correct for
- // obj pointing at the base of the object.
- // If not allocated or already marked, done.
- if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
- continue;
- *bitp |= bitSpecial<<shift;
- if(!(bits & bitMarked))
- runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
-
- // If object has no pointers, don't need to scan further.
- if((bits & bitNoScan) != 0)
- continue;
-
- debug_scanblock(obj, size);
+ --sbuf.wp;
+ b = sbuf.wp->p;
+ n = sbuf.wp->n;
+ ti = sbuf.wp->ti;
+ sbuf.nobj--;
}
}
@@ -1196,18 +1229,102 @@ enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
}
static void
+enqueue1(Workbuf **wbufp, Obj obj)
+{
+ Workbuf *wbuf;
+
+ wbuf = *wbufp;
+ if(wbuf->nobj >= nelem(wbuf->obj))
+ *wbufp = wbuf = getempty(wbuf);
+ wbuf->obj[wbuf->nobj++] = obj;
+}
+
+static void
markroot(ParFor *desc, uint32 i)
{
- Obj *wp;
Workbuf *wbuf;
- uintptr nobj;
+ FinBlock *fb;
+ MHeap *h;
+ MSpan **allspans, *s;
+ uint32 spanidx, sg;
+ G *gp;
+ void *p;
USED(&desc);
- wp = nil;
- wbuf = nil;
- nobj = 0;
- enqueue(work.roots[i], &wbuf, &wp, &nobj);
- scanblock(wbuf, wp, nobj, false);
+ wbuf = getempty(nil);
+ // Note: if you add a case here, please also update heapdump.c:dumproots.
+ switch(i) {
+ case RootData:
+ enqueue1(&wbuf, (Obj){data, edata - data, (uintptr)gcdata});
+ break;
+
+ case RootBss:
+ enqueue1(&wbuf, (Obj){bss, ebss - bss, (uintptr)gcbss});
+ break;
+
+ case RootFinalizers:
+ for(fb=allfin; fb; fb=fb->alllink)
+ enqueue1(&wbuf, (Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
+ break;
+
+ case RootSpanTypes:
+ // mark span types and MSpan.specials (to walk spans only once)
+ h = &runtime·mheap;
+ sg = h->sweepgen;
+ allspans = h->allspans;
+ for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
+ Special *sp;
+ SpecialFinalizer *spf;
+
+ s = allspans[spanidx];
+ if(s->sweepgen != sg) {
+ runtime·printf("sweep %d %d\n", s->sweepgen, sg);
+ runtime·throw("gc: unswept span");
+ }
+ if(s->state != MSpanInUse)
+ continue;
+ // The garbage collector ignores type pointers stored in MSpan.types:
+ // - Compiler-generated types are stored outside of heap.
+ // - The reflect package has runtime-generated types cached in its data structures.
+ // The garbage collector relies on finding the references via that cache.
+ if(s->types.compression == MTypes_Words || s->types.compression == MTypes_Bytes)
+ markonly((byte*)s->types.data);
+ for(sp = s->specials; sp != nil; sp = sp->next) {
+ if(sp->kind != KindSpecialFinalizer)
+ continue;
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf = (SpecialFinalizer*)sp;
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (void*)((s->start << PageShift) + spf->offset/s->elemsize*s->elemsize);
+ enqueue1(&wbuf, (Obj){p, s->elemsize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->fn, PtrSize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->fint, PtrSize, 0});
+ enqueue1(&wbuf, (Obj){(void*)&spf->ot, PtrSize, 0});
+ }
+ }
+ break;
+
+ case RootFlushCaches:
+ flushallmcaches();
+ break;
+
+ default:
+ // the rest is scanning goroutine stacks
+ if(i - RootCount >= runtime·allglen)
+ runtime·throw("markroot: bad index");
+ gp = runtime·allg[i - RootCount];
+ // remember when we've first observed the G blocked
+ // needed only to output in traceback
+ if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince == 0)
+ gp->waitsince = work.tstart;
+ addstackroots(gp, &wbuf);
+ break;
+
+ }
+
+ if(wbuf)
+ scanblock(wbuf, false);
}
// Get an empty work buffer off the work.empty list,
@@ -1304,43 +1421,20 @@ handoff(Workbuf *b)
return b1;
}
-static void
-addroot(Obj obj)
-{
- uint32 cap;
- Obj *new;
-
- if(work.nroot >= work.rootcap) {
- cap = PageSize/sizeof(Obj);
- if(cap < 2*work.rootcap)
- cap = 2*work.rootcap;
- new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj), &mstats.gc_sys);
- if(new == nil)
- runtime·throw("runtime: cannot allocate memory");
- if(work.roots != nil) {
- runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
- runtime·SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys);
- }
- work.roots = new;
- work.rootcap = cap;
- }
- work.roots[work.nroot] = obj;
- work.nroot++;
-}
-
extern byte pclntab[]; // base for f->ptrsoff
-typedef struct BitVector BitVector;
-struct BitVector
+BitVector
+runtime·stackmapdata(StackMap *stackmap, int32 n)
{
- int32 n;
- uint32 data[];
-};
+ if(n < 0 || n >= stackmap->n)
+ runtime·throw("stackmapdata: index out of range");
+ return (BitVector){stackmap->nbit, stackmap->data + n*((stackmap->nbit+31)/32)};
+}
// Scans an interface data value when the interface type indicates
// that it is a pointer.
static void
-scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue)
+scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue, void *wbufp)
{
Itab *tab;
Type *type;
@@ -1356,16 +1450,17 @@ scaninterfacedata(uintptr bits, byte *scanp, bool afterprologue)
return;
}
}
- addroot((Obj){scanp+PtrSize, PtrSize, 0});
+ enqueue1(wbufp, (Obj){scanp+PtrSize, PtrSize, 0});
}
// Starting from scanp, scans words corresponding to set bits.
static void
-scanbitvector(byte *scanp, BitVector *bv, bool afterprologue)
+scanbitvector(Func *f, bool precise, byte *scanp, BitVector *bv, bool afterprologue, void *wbufp)
{
uintptr word, bits;
uint32 *wordp;
int32 i, remptrs;
+ byte *p;
wordp = bv->data;
for(remptrs = bv->n; remptrs > 0; remptrs -= 32) {
@@ -1377,11 +1472,88 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue)
i /= BitsPerPointer;
for(; i > 0; i--) {
bits = word & 3;
- if(bits != BitsNoPointer && *(void**)scanp != nil)
- if(bits == BitsPointer)
- addroot((Obj){scanp, PtrSize, 0});
- else
- scaninterfacedata(bits, scanp, afterprologue);
+ switch(bits) {
+ case BitsDead:
+ if(runtime·debug.gcdead)
+ *(uintptr*)scanp = PoisonGC;
+ break;
+ case BitsScalar:
+ break;
+ case BitsPointer:
+ p = *(byte**)scanp;
+ if(p != nil) {
+ if(Debug > 2)
+ runtime·printf("frame %s @%p: ptr %p\n", runtime·funcname(f), scanp, p);
+ if(precise && (p < (byte*)PageSize || (uintptr)p == PoisonGC || (uintptr)p == PoisonStack)) {
+ // Looks like a junk value in a pointer slot.
+ // Liveness analysis wrong?
+ m->traceback = 2;
+ runtime·printf("bad pointer in frame %s at %p: %p\n", runtime·funcname(f), scanp, p);
+ runtime·throw("bad pointer in scanbitvector");
+ }
+ enqueue1(wbufp, (Obj){scanp, PtrSize, 0});
+ }
+ break;
+ case BitsMultiWord:
+ p = scanp;
+ word >>= BitsPerPointer;
+ scanp += PtrSize;
+ i--;
+ if(i == 0) {
+ // Get next chunk of bits
+ remptrs -= 32;
+ word = *wordp++;
+ if(remptrs < 32)
+ i = remptrs;
+ else
+ i = 32;
+ i /= BitsPerPointer;
+ }
+ switch(word & 3) {
+ case BitsString:
+ if(Debug > 2)
+ runtime·printf("frame %s @%p: string %p/%D\n", runtime·funcname(f), p, ((String*)p)->str, (int64)((String*)p)->len);
+ if(((String*)p)->len != 0)
+ markonly(((String*)p)->str);
+ break;
+ case BitsSlice:
+ word >>= BitsPerPointer;
+ scanp += PtrSize;
+ i--;
+ if(i == 0) {
+ // Get next chunk of bits
+ remptrs -= 32;
+ word = *wordp++;
+ if(remptrs < 32)
+ i = remptrs;
+ else
+ i = 32;
+ i /= BitsPerPointer;
+ }
+ if(Debug > 2)
+ runtime·printf("frame %s @%p: slice %p/%D/%D\n", runtime·funcname(f), p, ((Slice*)p)->array, (int64)((Slice*)p)->len, (int64)((Slice*)p)->cap);
+ if(((Slice*)p)->cap < ((Slice*)p)->len) {
+ m->traceback = 2;
+ runtime·printf("bad slice in frame %s at %p: %p/%p/%p\n", runtime·funcname(f), p, ((byte**)p)[0], ((byte**)p)[1], ((byte**)p)[2]);
+ runtime·throw("slice capacity smaller than length");
+ }
+ if(((Slice*)p)->cap != 0)
+ enqueue1(wbufp, (Obj){p, PtrSize, 0});
+ break;
+ case BitsIface:
+ case BitsEface:
+ if(*(byte**)p != nil) {
+ if(Debug > 2) {
+ if((word&3) == BitsEface)
+ runtime·printf("frame %s @%p: eface %p %p\n", runtime·funcname(f), p, ((uintptr*)p)[0], ((uintptr*)p)[1]);
+ else
+ runtime·printf("frame %s @%p: iface %p %p\n", runtime·funcname(f), p, ((uintptr*)p)[0], ((uintptr*)p)[1]);
+ }
+ scaninterfacedata(word & 3, p, afterprologue, wbufp);
+ }
+ break;
+ }
+ }
word >>= BitsPerPointer;
scanp += PtrSize;
}
@@ -1389,64 +1561,111 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue)
}
// Scan a stack frame: local variables and function arguments/results.
-static void
-addframeroots(Stkframe *frame, void*)
+static bool
+scanframe(Stkframe *frame, void *wbufp)
{
Func *f;
- BitVector *args, *locals;
+ StackMap *stackmap;
+ BitVector bv;
uintptr size;
+ uintptr targetpc;
+ int32 pcdata;
bool afterprologue;
+ bool precise;
f = frame->fn;
+ targetpc = frame->continpc;
+ if(targetpc == 0) {
+ // Frame is dead.
+ return true;
+ }
+ if(targetpc != f->entry)
+ targetpc--;
+ pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
+ if(pcdata == -1) {
+ // We do not have a valid pcdata value but there might be a
+ // stackmap for this function. It is likely that we are looking
+ // at the function prologue, assume so and hope for the best.
+ pcdata = 0;
+ }
// Scan local variables if stack frame has been allocated.
// Use pointer information if known.
afterprologue = (frame->varp > (byte*)frame->sp);
+ precise = false;
if(afterprologue) {
- locals = runtime·funcdata(f, FUNCDATA_GCLocals);
- if(locals == nil) {
+ stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+ if(stackmap == nil) {
// No locals information, scan everything.
size = frame->varp - (byte*)frame->sp;
- addroot((Obj){frame->varp - size, size, 0});
- } else if(locals->n < 0) {
- // Locals size information, scan just the
+ if(Debug > 2)
+ runtime·printf("frame %s unsized locals %p+%p\n", runtime·funcname(f), frame->varp-size, size);
+ enqueue1(wbufp, (Obj){frame->varp - size, size, 0});
+ } else if(stackmap->n < 0) {
+ // Locals size information, scan just the locals.
+ size = -stackmap->n;
+ if(Debug > 2)
+ runtime·printf("frame %s conservative locals %p+%p\n", runtime·funcname(f), frame->varp-size, size);
+ enqueue1(wbufp, (Obj){frame->varp - size, size, 0});
+ } else if(stackmap->n > 0) {
+ // Locals bitmap information, scan just the pointers in
// locals.
- size = -locals->n;
- addroot((Obj){frame->varp - size, size, 0});
- } else if(locals->n > 0) {
- // Locals bitmap information, scan just the
- // pointers in locals.
- size = (locals->n*PtrSize) / BitsPerPointer;
- scanbitvector(frame->varp - size, locals, afterprologue);
+ if(pcdata < 0 || pcdata >= stackmap->n) {
+ // don't know where we are
+ runtime·printf("pcdata is %d and %d stack map entries for %s (targetpc=%p)\n",
+ pcdata, stackmap->n, runtime·funcname(f), targetpc);
+ runtime·throw("scanframe: bad symbol table");
+ }
+ bv = runtime·stackmapdata(stackmap, pcdata);
+ size = (bv.n * PtrSize) / BitsPerPointer;
+ precise = true;
+ scanbitvector(f, true, frame->varp - size, &bv, afterprologue, wbufp);
}
}
// Scan arguments.
// Use pointer information if known.
- args = runtime·funcdata(f, FUNCDATA_GCArgs);
- if(args != nil && args->n > 0)
- scanbitvector(frame->argp, args, false);
- else
- addroot((Obj){frame->argp, frame->arglen, 0});
+ stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+ if(stackmap != nil) {
+ bv = runtime·stackmapdata(stackmap, pcdata);
+ scanbitvector(f, precise, frame->argp, &bv, true, wbufp);
+ } else {
+ if(Debug > 2)
+ runtime·printf("frame %s conservative args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
+ enqueue1(wbufp, (Obj){frame->argp, frame->arglen, 0});
+ }
+ return true;
}
static void
-addstackroots(G *gp)
+addstackroots(G *gp, Workbuf **wbufp)
{
M *mp;
int32 n;
Stktop *stk;
- uintptr sp, guard, pc, lr;
+ uintptr sp, guard;
void *base;
uintptr size;
- stk = (Stktop*)gp->stackbase;
- guard = gp->stackguard;
+ switch(gp->status){
+ default:
+ runtime·printf("unexpected G.status %d (goroutine %p %D)\n", gp->status, gp, gp->goid);
+ runtime·throw("mark - bad status");
+ case Gdead:
+ return;
+ case Grunning:
+ runtime·throw("mark - world not stopped");
+ case Grunnable:
+ case Gsyscall:
+ case Gwaiting:
+ break;
+ }
if(gp == g)
runtime·throw("can't scan our own stack");
if((mp = gp->m) != nil && mp->helpgc)
runtime·throw("can't scan gchelper stack");
+
if(gp->syscallstack != (uintptr)nil) {
// Scanning another goroutine that is about to enter or might
// have just exited a system call. It may be executing code such
@@ -1454,35 +1673,33 @@ addstackroots(G *gp)
// Use the stack segment and stack pointer at the time of
// the system call instead, since that won't change underfoot.
sp = gp->syscallsp;
- pc = gp->syscallpc;
- lr = 0;
stk = (Stktop*)gp->syscallstack;
guard = gp->syscallguard;
} else {
// Scanning another goroutine's stack.
// The goroutine is usually asleep (the world is stopped).
sp = gp->sched.sp;
- pc = gp->sched.pc;
- lr = gp->sched.lr;
-
+ stk = (Stktop*)gp->stackbase;
+ guard = gp->stackguard;
// For function about to start, context argument is a root too.
if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil))
- addroot((Obj){base, size, 0});
+ enqueue1(wbufp, (Obj){base, size, 0});
}
if(ScanStackByFrames) {
+ USED(sp);
USED(stk);
USED(guard);
- runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, addframeroots, nil, false);
+ runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, scanframe, wbufp, false);
} else {
- USED(lr);
- USED(pc);
n = 0;
while(stk) {
if(sp < guard-StackGuard || (uintptr)stk < sp) {
runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
runtime·throw("scanstack");
}
- addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
+ if(Debug > 2)
+ runtime·printf("conservative stack %p+%p\n", (byte*)sp, (uintptr)stk-sp);
+ enqueue1(wbufp, (Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP});
sp = stk->gobuf.sp;
guard = stk->stackguard;
stk = (Stktop*)stk->stackbase;
@@ -1491,101 +1708,17 @@ addstackroots(G *gp)
}
}
-static void
-addfinroots(void *v)
-{
- uintptr size;
- void *base;
-
- size = 0;
- if(!runtime·mlookup(v, &base, &size, nil) || !runtime·blockspecial(base))
- runtime·throw("mark - finalizer inconsistency");
-
- // do not mark the finalizer block itself. just mark the things it points at.
- addroot((Obj){base, size, 0});
-}
-
-static void
-addroots(void)
-{
- G *gp;
- FinBlock *fb;
- MSpan *s, **allspans;
- uint32 spanidx;
-
- work.nroot = 0;
-
- // data & bss
- // TODO(atom): load balancing
- addroot((Obj){data, edata - data, (uintptr)gcdata});
- addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
-
- // MSpan.types
- allspans = runtime·mheap.allspans;
- for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
- s = allspans[spanidx];
- if(s->state == MSpanInUse) {
- // The garbage collector ignores type pointers stored in MSpan.types:
- // - Compiler-generated types are stored outside of heap.
- // - The reflect package has runtime-generated types cached in its data structures.
- // The garbage collector relies on finding the references via that cache.
- switch(s->types.compression) {
- case MTypes_Empty:
- case MTypes_Single:
- break;
- case MTypes_Words:
- case MTypes_Bytes:
- markonly((byte*)s->types.data);
- break;
- }
- }
- }
-
- // stacks
- for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
- switch(gp->status){
- default:
- runtime·printf("unexpected G.status %d\n", gp->status);
- runtime·throw("mark - bad status");
- case Gdead:
- break;
- case Grunning:
- runtime·throw("mark - world not stopped");
- case Grunnable:
- case Gsyscall:
- case Gwaiting:
- addstackroots(gp);
- break;
- }
- }
-
- runtime·walkfintab(addfinroots);
-
- for(fb=allfin; fb; fb=fb->alllink)
- addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
-}
-
-static bool
-handlespecial(byte *p, uintptr size)
+void
+runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
{
- FuncVal *fn;
- uintptr nret;
- PtrType *ot;
- Type *fint;
FinBlock *block;
Finalizer *f;
- if(!runtime·getfinalizer(p, true, &fn, &nret, &fint, &ot)) {
- runtime·setblockspecial(p, false);
- runtime·MProf_Free(p, size);
- return false;
- }
-
runtime·lock(&finlock);
if(finq == nil || finq->cnt == finq->cap) {
if(finc == nil) {
- finc = runtime·persistentalloc(PageSize, 0, &mstats.gc_sys);
- finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
+ finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
+ finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
finc->alllink = allfin;
allfin = finc;
}
@@ -1601,33 +1734,79 @@ handlespecial(byte *p, uintptr size)
f->fint = fint;
f->ot = ot;
f->arg = p;
+ runtime·fingwake = true;
runtime·unlock(&finlock);
- return true;
+}
+
+void
+runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
+{
+ FinBlock *fb;
+ Finalizer *f;
+ uintptr i;
+
+ for(fb = allfin; fb; fb = fb->alllink) {
+ for(i = 0; i < fb->cnt; i++) {
+ f = &fb->fin[i];
+ callback(f->fn, f->arg, f->nret, f->fint, f->ot);
+ }
+ }
+}
+
+void
+runtime·MSpan_EnsureSwept(MSpan *s)
+{
+ uint32 sg;
+
+ // Caller must disable preemption.
+ // Otherwise when this function returns the span can become unswept again
+ // (if GC is triggered on another goroutine).
+ if(m->locks == 0 && m->mallocing == 0 && g != m->g0)
+ runtime·throw("MSpan_EnsureSwept: m is not locked");
+
+ sg = runtime·mheap.sweepgen;
+ if(runtime·atomicload(&s->sweepgen) == sg)
+ return;
+ if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+ runtime·MSpan_Sweep(s);
+ return;
+ }
+ // unfortunate condition, and we don't have efficient means to wait
+ while(runtime·atomicload(&s->sweepgen) != sg)
+ runtime·osyield();
}
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
-static void
-sweepspan(ParFor *desc, uint32 idx)
+// Returns true if the span was returned to heap.
+bool
+runtime·MSpan_Sweep(MSpan *s)
{
- int32 cl, n, npages;
- uintptr size;
+ int32 cl, n, npages, nfree;
+ uintptr size, off, *bitp, shift, bits;
+ uint32 sweepgen;
byte *p;
MCache *c;
byte *arena_start;
MLink head, *end;
- int32 nfree;
byte *type_data;
byte compression;
uintptr type_data_inc;
- MSpan *s;
-
- USED(&desc);
- s = runtime·mheap.allspans[idx];
- if(s->state != MSpanInUse)
- return;
+ MLink *x;
+ Special *special, **specialp, *y;
+ bool res, sweepgenset;
+
+ // It's critical that we enter this function with preemption disabled,
+ // GC must not start while we are in the middle of this function.
+ if(m->locks == 0 && m->mallocing == 0 && g != m->g0)
+ runtime·throw("MSpan_Sweep: m is not locked");
+ sweepgen = runtime·mheap.sweepgen;
+ if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+ runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+ s->state, s->sweepgen, sweepgen);
+ runtime·throw("MSpan_Sweep: bad span state");
+ }
arena_start = runtime·mheap.arena_start;
- p = (byte*)(s->start << PageShift);
cl = s->sizeclass;
size = s->elemsize;
if(cl == 0) {
@@ -1637,10 +1816,52 @@ sweepspan(ParFor *desc, uint32 idx)
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
+ res = false;
nfree = 0;
end = &head;
c = m->mcache;
-
+ sweepgenset = false;
+
+ // mark any free objects in this span so we don't collect them
+ for(x = s->freelist; x != nil; x = x->next) {
+ // This is markonly(x) but faster because we don't need
+ // atomic access and we're guaranteed to be pointing at
+ // the head of a valid object.
+ off = (uintptr*)x - (uintptr*)runtime·mheap.arena_start;
+ bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ *bitp |= bitMarked<<shift;
+ }
+
+ // Unlink & free special records for any objects we're about to free.
+ specialp = &s->specials;
+ special = *specialp;
+ while(special != nil) {
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p = (byte*)(s->start << PageShift) + special->offset/size*size;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
+ if((bits & (bitAllocated|bitMarked)) == bitAllocated) {
+ // Find the exact byte for which the special was setup
+ // (as opposed to object beginning).
+ p = (byte*)(s->start << PageShift) + special->offset;
+ // about to free object: splice out special record
+ y = special;
+ special = special->next;
+ *specialp = special;
+ if(!runtime·freespecial(y, p, size, false)) {
+ // stop freeing of object if it has a finalizer
+ *bitp |= bitMarked << shift;
+ }
+ } else {
+ // object is still live: keep special record
+ specialp = &special->next;
+ special = *specialp;
+ }
+ }
+
type_data = (byte*)s->types.data;
type_data_inc = sizeof(uintptr);
compression = s->types.compression;
@@ -1654,9 +1875,8 @@ sweepspan(ParFor *desc, uint32 idx)
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
// the block bitmap without atomic operations.
+ p = (byte*)(s->start << PageShift);
for(; n > 0; n--, p += size, type_data+=type_data_inc) {
- uintptr off, *bitp, shift, bits;
-
off = (uintptr*)p - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
@@ -1666,33 +1886,32 @@ sweepspan(ParFor *desc, uint32 idx)
continue;
if((bits & bitMarked) != 0) {
- if(DebugMark) {
- if(!(bits & bitSpecial))
- runtime·printf("found spurious mark on %p\n", p);
- *bitp &= ~(bitSpecial<<shift);
- }
*bitp &= ~(bitMarked<<shift);
continue;
}
- // Special means it has a finalizer or is being profiled.
- // In DebugMark mode, the bit has been coopted so
- // we have to assume all blocks are special.
- if(DebugMark || (bits & bitSpecial) != 0) {
- if(handlespecial(p, size))
- continue;
- }
+ if(runtime·debug.allocfreetrace)
+ runtime·tracefree(p, size);
- // Mark freed; restore block boundary bit.
- *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ // Clear mark and scan bits.
+ *bitp &= ~((bitScan|bitMarked)<<shift);
if(cl == 0) {
// Free large span.
runtime·unmarkspan(p, 1<<PageShift);
- *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
- runtime·MHeap_Free(&runtime·mheap, s, 1);
+ s->needzero = 1;
+ // important to set sweepgen before returning it to heap
+ runtime·atomicstore(&s->sweepgen, sweepgen);
+ sweepgenset = true;
+ // See note about SysFault vs SysFree in malloc.goc.
+ if(runtime·debug.efence)
+ runtime·SysFault(p, size);
+ else
+ runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
+ runtime·xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100));
+ res = true;
} else {
// Free small object.
switch(compression) {
@@ -1703,19 +1922,113 @@ sweepspan(ParFor *desc, uint32 idx)
*(byte*)type_data = 0;
break;
}
- if(size > sizeof(uintptr))
+ if(size > 2*sizeof(uintptr))
((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
-
+ else if(size > sizeof(uintptr))
+ ((uintptr*)p)[1] = 0;
+
end->next = (MLink*)p;
end = (MLink*)p;
nfree++;
}
}
- if(nfree) {
+ // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
+ // because of the potential for a concurrent free/SetFinalizer.
+ // But we need to set it before we make the span available for allocation
+ // (return it to heap or mcentral), because allocation code assumes that a
+ // span is already swept if available for allocation.
+
+ if(!sweepgenset && nfree == 0) {
+ // The span must be in our exclusive ownership until we update sweepgen,
+ // check for potential races.
+ if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+ runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+ s->state, s->sweepgen, sweepgen);
+ runtime·throw("MSpan_Sweep: bad span state after sweep");
+ }
+ runtime·atomicstore(&s->sweepgen, sweepgen);
+ }
+ if(nfree > 0) {
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
- runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
+ runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100));
+ res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
+ //MCentral_FreeSpan updates sweepgen
+ }
+ return res;
+}
+
+// State of background sweep.
+// Pretected by gclock.
+static struct
+{
+ G* g;
+ bool parked;
+
+ MSpan** spans;
+ uint32 nspan;
+ uint32 spanidx;
+} sweep;
+
+// background sweeping goroutine
+static void
+bgsweep(void)
+{
+ g->issystem = 1;
+ for(;;) {
+ while(runtime·sweepone() != -1) {
+ gcstats.nbgsweep++;
+ runtime·gosched();
+ }
+ runtime·lock(&gclock);
+ if(!runtime·mheap.sweepdone) {
+ // It's possible if GC has happened between sweepone has
+ // returned -1 and gclock lock.
+ runtime·unlock(&gclock);
+ continue;
+ }
+ sweep.parked = true;
+ g->isbackground = true;
+ runtime·parkunlock(&gclock, "GC sweep wait");
+ g->isbackground = false;
+ }
+}
+
+// sweeps one span
+// returns number of pages returned to heap, or -1 if there is nothing to sweep
+uintptr
+runtime·sweepone(void)
+{
+ MSpan *s;
+ uint32 idx, sg;
+ uintptr npages;
+
+ // increment locks to ensure that the goroutine is not preempted
+ // in the middle of sweep thus leaving the span in an inconsistent state for next GC
+ m->locks++;
+ sg = runtime·mheap.sweepgen;
+ for(;;) {
+ idx = runtime·xadd(&sweep.spanidx, 1) - 1;
+ if(idx >= sweep.nspan) {
+ runtime·mheap.sweepdone = true;
+ m->locks--;
+ return -1;
+ }
+ s = sweep.spans[idx];
+ if(s->state != MSpanInUse) {
+ s->sweepgen = sg;
+ continue;
+ }
+ if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
+ continue;
+ if(s->incache)
+ runtime·throw("sweep of incache span");
+ npages = s->npages;
+ if(!runtime·MSpan_Sweep(s))
+ npages = 0;
+ m->locks--;
+ return npages;
}
}
@@ -1727,7 +2040,7 @@ dumpspan(uint32 idx)
byte *p;
byte *arena_start;
MSpan *s;
- bool allocated, special;
+ bool allocated;
s = runtime·mheap.allspans[idx];
if(s->state != MSpanInUse)
@@ -1754,7 +2067,6 @@ dumpspan(uint32 idx)
bits = *bitp>>shift;
allocated = ((bits & bitAllocated) != 0);
- special = ((bits & bitSpecial) != 0);
for(i=0; i<size; i+=sizeof(void*)) {
if(column == 0) {
@@ -1762,7 +2074,6 @@ dumpspan(uint32 idx)
}
if(i == 0) {
runtime·printf(allocated ? "(" : "[");
- runtime·printf(special ? "@" : "");
runtime·printf("%p: ", p+i);
} else {
runtime·printf(" ");
@@ -1798,42 +2109,24 @@ runtime·memorydump(void)
void
runtime·gchelper(void)
{
- int32 nproc;
+ uint32 nproc;
+ m->traceback = 2;
gchelperstart();
// parallel mark for over gc roots
runtime·parfordo(work.markfor);
// help other threads scan secondary blocks
- scanblock(nil, nil, 0, true);
-
- if(DebugMark) {
- // wait while the main thread executes mark(debug_scanblock)
- while(runtime·atomicload(&work.debugmarkdone) == 0)
- runtime·usleep(10);
- }
+ scanblock(nil, true);
- runtime·parfordo(work.sweepfor);
bufferList[m->helpgc].busy = 0;
nproc = work.nproc; // work.nproc can change right after we increment work.ndone
if(runtime·xadd(&work.ndone, +1) == nproc-1)
runtime·notewakeup(&work.alldone);
+ m->traceback = 0;
}
-#define GcpercentUnknown (-2)
-
-// Initialized from $GOGC. GOGC=off means no gc.
-//
-// Next gc is after we've allocated an extra amount of
-// memory proportional to the amount already in use.
-// If gcpercent=100 and we're using 4M, we'll gc again
-// when we get to 8M. This keeps the gc cost in linear
-// proportion to the allocation cost. Adjusting gcpercent
-// just changes the linear constant (and also the amount of
-// extra memory used).
-static int32 gcpercent = GcpercentUnknown;
-
static void
cachestats(void)
{
@@ -1849,12 +2142,25 @@ cachestats(void)
}
static void
-updatememstats(GCStats *stats)
+flushallmcaches(void)
+{
+ P *p, **pp;
+ MCache *c;
+
+ // Flush MCache's to MCentral.
+ for(pp=runtime·allp; p=*pp; pp++) {
+ c = p->mcache;
+ if(c==nil)
+ continue;
+ runtime·MCache_ReleaseAll(c);
+ }
+}
+
+void
+runtime·updatememstats(GCStats *stats)
{
M *mp;
MSpan *s;
- MCache *c;
- P *p, **pp;
int32 i;
uint64 stacks_inuse, smallfree;
uint64 *src, *dst;
@@ -1895,12 +2201,7 @@ updatememstats(GCStats *stats)
}
// Flush MCache's to MCentral.
- for(pp=runtime·allp; p=*pp; pp++) {
- c = p->mcache;
- if(c==nil)
- continue;
- runtime·MCache_ReleaseAll(c);
- }
+ flushallmcaches();
// Aggregate local stats.
cachestats();
@@ -1942,6 +2243,7 @@ updatememstats(GCStats *stats)
struct gc_args
{
int64 start_time; // start time of GC in ns (just before stoptheworld)
+ bool eagersweep;
};
static void gc(struct gc_args *args);
@@ -1960,8 +2262,8 @@ readgogc(void)
return runtime·atoi(p);
}
-static FuncVal runfinqv = {runfinq};
-
+// force = 1 - do GC regardless of current heap usage
+// force = 2 - go GC and eager sweep
void
runtime·gc(int32 force)
{
@@ -1997,7 +2299,7 @@ runtime·gc(int32 force)
return;
runtime·semacquire(&runtime·worldsema, false);
- if(!force && mstats.heap_alloc < mstats.next_gc) {
+ if(force==0 && mstats.heap_alloc < mstats.next_gc) {
// typically threads which lost the race to grab
// worldsema exit here when gc is done.
runtime·semrelease(&runtime·worldsema);
@@ -2006,22 +2308,25 @@ runtime·gc(int32 force)
// Ok, we're doing it! Stop everybody else
a.start_time = runtime·nanotime();
+ a.eagersweep = force >= 2;
m->gcing = 1;
runtime·stoptheworld();
+ clearpools();
+
// Run gc on the g0 stack. We do this so that the g stack
// we're currently running on will no longer change. Cuts
// the root set down a bit (g0 stacks are not scanned, and
// we don't need to scan gc's internal state). Also an
// enabler for copyable stacks.
for(i = 0; i < (runtime·debug.gctrace > 1 ? 2 : 1); i++) {
+ if(i > 0)
+ a.start_time = runtime·nanotime();
// switch to g0, call gc(&a), then switch back
g->param = &a;
g->status = Gwaiting;
g->waitreason = "garbage collection";
runtime·mcall(mgc);
- // record a new start time in case we're going around again
- a.start_time = runtime·nanotime();
}
// all done
@@ -2032,19 +2337,10 @@ runtime·gc(int32 force)
m->locks--;
// now that gc is done, kick off finalizer thread if needed
- if(finq != nil) {
- runtime·lock(&finlock);
- // kick off or wake up goroutine to run queued finalizers
- if(fing == nil)
- fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
- else if(fingwait) {
- fingwait = 0;
- runtime·ready(fing);
- }
- runtime·unlock(&finlock);
+ if(!ConcurrentSweep) {
+ // give the queued finalizers, if any, a chance to run
+ runtime·gosched();
}
- // give the queued finalizers, if any, a chance to run
- runtime·gosched();
}
static void
@@ -2060,33 +2356,24 @@ static void
gc(struct gc_args *args)
{
int64 t0, t1, t2, t3, t4;
- uint64 heap0, heap1, obj0, obj1, ninstr;
+ uint64 heap0, heap1, obj, ninstr;
GCStats stats;
- M *mp;
uint32 i;
Eface eface;
+ if(runtime·debug.allocfreetrace)
+ runtime·tracegc();
+
+ m->traceback = 2;
t0 = args->start_time;
+ work.tstart = args->start_time;
if(CollectStats)
runtime·memclr((byte*)&gcstats, sizeof(gcstats));
- for(mp=runtime·allm; mp; mp=mp->alllink)
- runtime·settype_flush(mp);
-
- heap0 = 0;
- obj0 = 0;
- if(runtime·debug.gctrace) {
- updatememstats(nil);
- heap0 = mstats.heap_alloc;
- obj0 = mstats.nmalloc - mstats.nfree;
- }
-
m->locks++; // disable gc during mallocs in parforalloc
if(work.markfor == nil)
work.markfor = runtime·parforalloc(MaxGcproc);
- if(work.sweepfor == nil)
- work.sweepfor = runtime·parforalloc(MaxGcproc);
m->locks--;
if(itabtype == nil) {
@@ -2095,43 +2382,49 @@ gc(struct gc_args *args)
itabtype = ((PtrType*)eface.type)->elem;
}
+ t1 = 0;
+ if(runtime·debug.gctrace)
+ t1 = runtime·nanotime();
+
+ // Sweep what is not sweeped by bgsweep.
+ while(runtime·sweepone() != -1)
+ gcstats.npausesweep++;
+
work.nwait = 0;
work.ndone = 0;
- work.debugmarkdone = 0;
work.nproc = runtime·gcprocs();
- addroots();
- runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
- runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
+ runtime·parforsetup(work.markfor, work.nproc, RootCount + runtime·allglen, nil, false, markroot);
if(work.nproc > 1) {
runtime·noteclear(&work.alldone);
runtime·helpgc(work.nproc);
}
- t1 = runtime·nanotime();
+ t2 = 0;
+ if(runtime·debug.gctrace)
+ t2 = runtime·nanotime();
gchelperstart();
runtime·parfordo(work.markfor);
- scanblock(nil, nil, 0, true);
+ scanblock(nil, true);
- if(DebugMark) {
- for(i=0; i<work.nroot; i++)
- debug_scanblock(work.roots[i].p, work.roots[i].n);
- runtime·atomicstore(&work.debugmarkdone, 1);
- }
- t2 = runtime·nanotime();
+ t3 = 0;
+ if(runtime·debug.gctrace)
+ t3 = runtime·nanotime();
- runtime·parfordo(work.sweepfor);
bufferList[m->helpgc].busy = 0;
- t3 = runtime·nanotime();
-
if(work.nproc > 1)
runtime·notesleep(&work.alldone);
cachestats();
+ // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
+ // estimate what was live heap size after previous GC (for tracing only)
+ heap0 = mstats.next_gc*100/(gcpercent+100);
+ // conservatively set next_gc to high value assuming that everything is live
+ // concurrent/lazy sweep will reduce this number while discovering new garbage
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
t4 = runtime·nanotime();
- mstats.last_gc = t4;
+ mstats.last_gc = runtime·unixnanotime(); // must be Unix time to make sense to user
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
mstats.pause_total_ns += t4 - t0;
mstats.numgc++;
@@ -2139,22 +2432,29 @@ gc(struct gc_args *args)
runtime·printf("pause %D\n", t4-t0);
if(runtime·debug.gctrace) {
- updatememstats(&stats);
heap1 = mstats.heap_alloc;
- obj1 = mstats.nmalloc - mstats.nfree;
+ runtime·updatememstats(&stats);
+ if(heap1 != mstats.heap_alloc) {
+ runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
+ runtime·throw("mstats skew");
+ }
+ obj = mstats.nmalloc - mstats.nfree;
- stats.nprocyield += work.sweepfor->nprocyield;
- stats.nosyield += work.sweepfor->nosyield;
- stats.nsleep += work.sweepfor->nsleep;
+ stats.nprocyield += work.markfor->nprocyield;
+ stats.nosyield += work.markfor->nosyield;
+ stats.nsleep += work.markfor->nsleep;
- runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
+ runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
+ " %d/%d/%d sweeps,"
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
- mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
- heap0>>20, heap1>>20, obj0, obj1,
+ mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
+ heap0>>20, heap1>>20, obj,
mstats.nmalloc, mstats.nfree,
+ sweep.nspan, gcstats.nbgsweep, gcstats.npausesweep,
stats.nhandoff, stats.nhandoffcnt,
- work.sweepfor->nsteal, work.sweepfor->nstealcnt,
+ work.markfor->nsteal, work.markfor->nstealcnt,
stats.nprocyield, stats.nosyield, stats.nsleep);
+ gcstats.nbgsweep = gcstats.npausesweep = 0;
if(CollectStats) {
runtime·printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
@@ -2181,9 +2481,49 @@ gc(struct gc_args *args)
}
}
+ // We cache current runtime·mheap.allspans array in sweep.spans,
+ // because the former can be resized and freed.
+ // Otherwise we would need to take heap lock every time
+ // we want to convert span index to span pointer.
+
+ // Free the old cached array if necessary.
+ if(sweep.spans && sweep.spans != runtime·mheap.allspans)
+ runtime·SysFree(sweep.spans, sweep.nspan*sizeof(sweep.spans[0]), &mstats.other_sys);
+ // Cache the current array.
+ runtime·mheap.sweepspans = runtime·mheap.allspans;
+ runtime·mheap.sweepgen += 2;
+ runtime·mheap.sweepdone = false;
+ sweep.spans = runtime·mheap.allspans;
+ sweep.nspan = runtime·mheap.nspan;
+ sweep.spanidx = 0;
+
+ // Temporary disable concurrent sweep, because we see failures on builders.
+ if(ConcurrentSweep && !args->eagersweep) {
+ runtime·lock(&gclock);
+ if(sweep.g == nil)
+ sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, runtime·gc);
+ else if(sweep.parked) {
+ sweep.parked = false;
+ runtime·ready(sweep.g);
+ }
+ runtime·unlock(&gclock);
+ } else {
+ // Sweep all spans eagerly.
+ while(runtime·sweepone() != -1)
+ gcstats.npausesweep++;
+ }
+
+ // Shrink a stack if not much of it is being used.
+ // TODO: do in a parfor
+ for(i = 0; i < runtime·allglen; i++)
+ runtime·shrinkstack(runtime·allg[i]);
+
runtime·MProf_GC();
+ m->traceback = 0;
}
+extern uintptr runtime·sizeof_C_MStats;
+
void
runtime·ReadMemStats(MStats *stats)
{
@@ -2194,8 +2534,10 @@ runtime·ReadMemStats(MStats *stats)
runtime·semacquire(&runtime·worldsema, false);
m->gcing = 1;
runtime·stoptheworld();
- updatememstats(nil);
- *stats = mstats;
+ runtime·updatememstats(nil);
+ // Size of the trailing by_size array differs between Go and C,
+ // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+ runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats);
m->gcing = 0;
m->locks++;
runtime·semrelease(&runtime·worldsema);
@@ -2234,9 +2576,10 @@ runtime∕debug·readGCStats(Slice *pauses)
pauses->len = n+3;
}
-void
-runtime∕debug·setGCPercent(intgo in, intgo out)
-{
+int32
+runtime·setgcpercent(int32 in) {
+ int32 out;
+
runtime·lock(&runtime·mheap);
if(gcpercent == GcpercentUnknown)
gcpercent = readgogc();
@@ -2245,7 +2588,7 @@ runtime∕debug·setGCPercent(intgo in, intgo out)
in = -1;
gcpercent = in;
runtime·unlock(&runtime·mheap);
- FLUSH(&out);
+ return out;
}
static void
@@ -2268,15 +2611,38 @@ runfinq(void)
uint32 framesz, framecap, i;
Eface *ef, ef1;
+ // This function blocks for long periods of time, and because it is written in C
+ // we have no liveness information. Zero everything so that uninitialized pointers
+ // do not cause memory leaks.
+ f = nil;
+ fb = nil;
+ next = nil;
frame = nil;
framecap = 0;
+ framesz = 0;
+ i = 0;
+ ef = nil;
+ ef1.type = nil;
+ ef1.data = nil;
+
+ // force flush to memory
+ USED(&f);
+ USED(&fb);
+ USED(&next);
+ USED(&framesz);
+ USED(&i);
+ USED(&ef);
+ USED(&ef1);
+
for(;;) {
runtime·lock(&finlock);
fb = finq;
finq = nil;
if(fb == nil) {
- fingwait = 1;
- runtime·park(runtime·unlock, &finlock, "finalizer wait");
+ runtime·fingwait = true;
+ g->isbackground = true;
+ runtime·parkunlock(&finlock, "finalizer wait");
+ g->isbackground = false;
continue;
}
runtime·unlock(&finlock);
@@ -2290,7 +2656,7 @@ runfinq(void)
if(framecap < framesz) {
runtime·free(frame);
// The frame does not contain pointers interesting for GC,
- // all not yet finalized objects are stored in finc.
+ // all not yet finalized objects are stored in finq.
// If we do not mark it as FlagNoScan,
// the last finalized object is not collected.
frame = runtime·mallocgc(framesz, 0, FlagNoScan|FlagNoInvokeGC);
@@ -2313,80 +2679,99 @@ runfinq(void)
if(!runtime·ifaceE2I2((InterfaceType*)f->fint, ef1, (Iface*)frame))
runtime·throw("invalid type conversion in runfinq");
}
- reflect·call(f->fn, frame, framesz);
+ reflect·call(f->fn, frame, framesz, framesz);
f->fn = nil;
f->arg = nil;
f->ot = nil;
}
fb->cnt = 0;
+ runtime·lock(&finlock);
fb->next = finc;
finc = fb;
+ runtime·unlock(&finlock);
}
+
+ // Zero everything that's dead, to avoid memory leaks.
+ // See comment at top of function.
+ f = nil;
+ fb = nil;
+ next = nil;
+ i = 0;
+ ef = nil;
+ ef1.type = nil;
+ ef1.data = nil;
runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible
}
}
-// mark the block at v of size n as allocated.
-// If noscan is true, mark it as not needing scanning.
void
-runtime·markallocated(void *v, uintptr n, bool noscan)
+runtime·createfing(void)
{
- uintptr *b, obits, bits, off, shift;
+ if(fing != nil)
+ return;
+ // Here we use gclock instead of finlock,
+ // because newproc1 can allocate, which can cause on-demand span sweep,
+ // which can queue finalizers, which would deadlock.
+ runtime·lock(&gclock);
+ if(fing == nil)
+ fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc);
+ runtime·unlock(&gclock);
+}
- if(0)
- runtime·printf("markallocated %p+%p\n", v, n);
+G*
+runtime·wakefing(void)
+{
+ G *res;
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
- runtime·throw("markallocated: bad pointer");
+ res = nil;
+ runtime·lock(&finlock);
+ if(runtime·fingwait && runtime·fingwake) {
+ runtime·fingwait = false;
+ runtime·fingwake = false;
+ res = fing;
+ }
+ runtime·unlock(&finlock);
+ return res;
+}
+
+void
+runtime·marknogc(void *v)
+{
+ uintptr *b, off, shift;
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
+ *b = (*b & ~(bitAllocated<<shift)) | bitBlockBoundary<<shift;
+}
- for(;;) {
- obits = *b;
- bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
- if(noscan)
- bits |= bitNoScan<<shift;
- if(runtime·gomaxprocs == 1) {
- *b = bits;
- break;
- } else {
- // more than one goroutine is potentially running: use atomic op
- if(runtime·casp((void**)b, (void*)obits, (void*)bits))
- break;
- }
- }
+void
+runtime·markscan(void *v)
+{
+ uintptr *b, off, shift;
+
+ off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
+ b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ *b |= bitScan<<shift;
}
-// mark the block at v of size n as freed.
+// mark the block at v as freed.
void
-runtime·markfreed(void *v, uintptr n)
+runtime·markfreed(void *v)
{
- uintptr *b, obits, bits, off, shift;
+ uintptr *b, off, shift;
if(0)
- runtime·printf("markfreed %p+%p\n", v, n);
+ runtime·printf("markfreed %p\n", v);
- if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+ if((byte*)v > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markfreed: bad pointer");
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
-
- for(;;) {
- obits = *b;
- bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
- if(runtime·gomaxprocs == 1) {
- *b = bits;
- break;
- } else {
- // more than one goroutine is potentially running: use atomic op
- if(runtime·casp((void**)b, (void*)obits, (void*)bits))
- break;
- }
- }
+ *b = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
}
// check that the block at v of size n is marked freed.
@@ -2418,15 +2803,28 @@ runtime·checkfreed(void *v, uintptr n)
void
runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
{
- uintptr *b, off, shift;
+ uintptr *b, *b0, off, shift, i, x;
byte *p;
if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markspan: bad pointer");
+ if(runtime·checking) {
+ // bits should be all zero at the start
+ off = (byte*)v + size - runtime·mheap.arena_start;
+ b = (uintptr*)(runtime·mheap.arena_start - off/wordsPerBitmapWord);
+ for(i = 0; i < size/PtrSize/wordsPerBitmapWord; i++) {
+ if(b[i] != 0)
+ runtime·throw("markspan: span bits not zero");
+ }
+ }
+
p = v;
if(leftover) // mark a boundary just past end of last block too
n++;
+
+ b0 = nil;
+ x = 0;
for(; n-- > 0; p += size) {
// Okay to use non-atomic ops here, because we control
// the entire span, and each bitmap word has bits for only
@@ -2435,8 +2833,15 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
- *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ if(b0 != b) {
+ if(b0 != nil)
+ *b0 = x;
+ b0 = b;
+ x = 0;
+ }
+ x |= bitAllocated<<shift;
}
+ *b0 = x;
}
// unmark the span of memory at v of length n bytes.
@@ -2465,50 +2870,6 @@ runtime·unmarkspan(void *v, uintptr n)
*b-- = 0;
}
-bool
-runtime·blockspecial(void *v)
-{
- uintptr *b, off, shift;
-
- if(DebugMark)
- return true;
-
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
-
- return (*b & (bitSpecial<<shift)) != 0;
-}
-
-void
-runtime·setblockspecial(void *v, bool s)
-{
- uintptr *b, off, shift, bits, obits;
-
- if(DebugMark)
- return;
-
- off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
- b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
- shift = off % wordsPerBitmapWord;
-
- for(;;) {
- obits = *b;
- if(s)
- bits = obits | (bitSpecial<<shift);
- else
- bits = obits & ~(bitSpecial<<shift);
- if(runtime·gomaxprocs == 1) {
- *b = bits;
- break;
- } else {
- // more than one goroutine is potentially running: use atomic op
- if(runtime·casp((void**)b, (void*)obits, (void*)bits))
- break;
- }
- }
-}
-
void
runtime·MHeap_MapBits(MHeap *h)
{
@@ -2522,9 +2883,10 @@ runtime·MHeap_MapBits(MHeap *h)
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
n = ROUND(n, bitmapChunk);
+ n = ROUND(n, PhysPageSize);
if(h->bitmap_mapped >= n)
return;
- runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, &mstats.gc_sys);
+ runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
h->bitmap_mapped = n;
}