diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
---|---|---|
committer | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
commit | 8a39ee361feb9bf46d728ff1ba4f07ca1d9610b1 (patch) | |
tree | 4449f2036cccf162e8417cc5841a35815b3e7ac5 /src/pkg/runtime/mprof.goc | |
parent | c8bf49ef8a92e2337b69c14b9b88396efe498600 (diff) | |
download | golang-upstream/1.3.tar.gz |
Imported Upstream version 1.3upstream/1.3
Diffstat (limited to 'src/pkg/runtime/mprof.goc')
-rw-r--r-- | src/pkg/runtime/mprof.goc | 271 |
1 files changed, 138 insertions, 133 deletions
diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc index 4ae74f0c2..9c23a16f8 100644 --- a/src/pkg/runtime/mprof.goc +++ b/src/pkg/runtime/mprof.goc @@ -22,7 +22,6 @@ enum { MProf, BProf }; // profile types // Per-call-stack profiling information. // Lookup by hashing call stack into a linked-list hash table. -typedef struct Bucket Bucket; struct Bucket { Bucket *next; // next in hash list @@ -34,14 +33,33 @@ struct Bucket { struct // typ == MProf { + // The following complex 3-stage scheme of stats accumulation + // is required to obtain a consistent picture of mallocs and frees + // for some point in time. + // The problem is that mallocs come in real time, while frees + // come only after a GC during concurrent sweeping. So if we would + // naively count them, we would get a skew toward mallocs. + // + // Mallocs are accounted in recent stats. + // Explicit frees are accounted in recent stats. + // GC frees are accounted in prev stats. + // After GC prev stats are added to final stats and + // recent stats are moved into prev stats. uintptr allocs; uintptr frees; uintptr alloc_bytes; uintptr free_bytes; - uintptr recent_allocs; // since last gc + + uintptr prev_allocs; // since last but one till last gc + uintptr prev_frees; + uintptr prev_alloc_bytes; + uintptr prev_free_bytes; + + uintptr recent_allocs; // since last gc till now uintptr recent_frees; uintptr recent_alloc_bytes; uintptr recent_free_bytes; + }; struct // typ == BProf { @@ -49,7 +67,8 @@ struct Bucket int64 cycles; }; }; - uintptr hash; + uintptr hash; // hash of size + stk + uintptr size; uintptr nstk; uintptr stk[1]; }; @@ -63,7 +82,7 @@ static uintptr bucketmem; // Return the bucket for stk[0:nstk], allocating new bucket if needed. static Bucket* -stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) +stkbucket(int32 typ, uintptr size, uintptr *stk, int32 nstk, bool alloc) { int32 i; uintptr h; @@ -82,12 +101,17 @@ stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) h += h<<10; h ^= h>>6; } + // hash in size + h += size; + h += h<<10; + h ^= h>>6; + // finalize h += h<<3; h ^= h>>11; i = h%BuckHashSize; for(b = buckhash[i]; b; b=b->next) - if(b->typ == typ && b->hash == h && b->nstk == nstk && + if(b->typ == typ && b->hash == h && b->size == size && b->nstk == nstk && runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) return b; @@ -99,6 +123,7 @@ stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc) runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); b->typ = typ; b->hash = h; + b->size = size; b->nstk = nstk; b->next = buckhash[i]; buckhash[i] = b; @@ -118,10 +143,16 @@ MProf_GC(void) Bucket *b; for(b=mbuckets; b; b=b->allnext) { - b->allocs += b->recent_allocs; - b->frees += b->recent_frees; - b->alloc_bytes += b->recent_alloc_bytes; - b->free_bytes += b->recent_free_bytes; + b->allocs += b->prev_allocs; + b->frees += b->prev_frees; + b->alloc_bytes += b->prev_alloc_bytes; + b->free_bytes += b->prev_free_bytes; + + b->prev_allocs = b->recent_allocs; + b->prev_frees = b->recent_frees; + b->prev_alloc_bytes = b->recent_alloc_bytes; + b->prev_free_bytes = b->recent_free_bytes; + b->recent_allocs = 0; b->recent_frees = 0; b->recent_alloc_bytes = 0; @@ -138,143 +169,39 @@ runtime·MProf_GC(void) runtime·unlock(&proflock); } -// Map from pointer to Bucket* that allocated it. -// Three levels: -// Linked-list hash table for top N-AddrHashShift bits. -// Array index for next AddrDenseBits bits. -// Linked list for next AddrHashShift-AddrDenseBits bits. -// This is more efficient than using a general map, -// because of the typical clustering of the pointer keys. - -typedef struct AddrHash AddrHash; -typedef struct AddrEntry AddrEntry; - -enum { - AddrHashBits = 12, // good for 4GB of used address space - AddrHashShift = 20, // each AddrHash knows about 1MB of address space - AddrDenseBits = 8, // good for a profiling rate of 4096 bytes -}; - -struct AddrHash -{ - AddrHash *next; // next in top-level hash table linked list - uintptr addr; // addr>>20 - AddrEntry *dense[1<<AddrDenseBits]; -}; - -struct AddrEntry -{ - AddrEntry *next; // next in bottom-level linked list - uint32 addr; - Bucket *b; -}; - -static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits] -static AddrEntry *addrfree; -static uintptr addrmem; - -// Multiplicative hash function: -// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)). -// This is a good multiplier as suggested in CLR, Knuth. The hash -// value is taken to be the top AddrHashBits bits of the bottom 32 bits -// of the multiplied value. -enum { - HashMultiplier = 2654435769U -}; - -// Set the bucket associated with addr to b. -static void -setaddrbucket(uintptr addr, Bucket *b) -{ - int32 i; - uint32 h; - AddrHash *ah; - AddrEntry *e; - - h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); - for(ah=addrhash[h]; ah; ah=ah->next) - if(ah->addr == (addr>>AddrHashShift)) - goto found; - - ah = runtime·persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys); - addrmem += sizeof *ah; - ah->next = addrhash[h]; - ah->addr = addr>>AddrHashShift; - addrhash[h] = ah; - -found: - if((e = addrfree) == nil) { - e = runtime·persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys); - addrmem += 64*sizeof *e; - for(i=0; i+1<64; i++) - e[i].next = &e[i+1]; - e[63].next = nil; - } - addrfree = e->next; - e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1)); - e->b = b; - h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. - e->next = ah->dense[h]; - ah->dense[h] = e; -} - -// Get the bucket associated with addr and clear the association. -static Bucket* -getaddrbucket(uintptr addr) -{ - uint32 h; - AddrHash *ah; - AddrEntry *e, **l; - Bucket *b; - - h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits); - for(ah=addrhash[h]; ah; ah=ah->next) - if(ah->addr == (addr>>AddrHashShift)) - goto found; - return nil; - -found: - h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20. - for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { - if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) { - *l = e->next; - b = e->b; - e->next = addrfree; - addrfree = e; - return b; - } - } - return nil; -} - // Called by malloc to record a profiled block. void runtime·MProf_Malloc(void *p, uintptr size) { - int32 nstk; uintptr stk[32]; Bucket *b; + int32 nstk; - nstk = runtime·callers(1, stk, 32); + nstk = runtime·callers(1, stk, nelem(stk)); runtime·lock(&proflock); - b = stkbucket(MProf, stk, nstk, true); + b = stkbucket(MProf, size, stk, nstk, true); b->recent_allocs++; b->recent_alloc_bytes += size; - setaddrbucket((uintptr)p, b); runtime·unlock(&proflock); + + // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. + // This reduces potential contention and chances of deadlocks. + // Since the object must be alive during call to MProf_Malloc, + // it's fine to do this non-atomically. + runtime·setprofilebucket(p, b); } // Called when freeing a profiled block. void -runtime·MProf_Free(void *p, uintptr size) +runtime·MProf_Free(Bucket *b, uintptr size, bool freed) { - Bucket *b; - runtime·lock(&proflock); - b = getaddrbucket((uintptr)p); - if(b != nil) { + if(freed) { b->recent_frees++; b->recent_free_bytes += size; + } else { + b->prev_frees++; + b->prev_free_bytes += size; } runtime·unlock(&proflock); } @@ -311,9 +238,9 @@ runtime·blockevent(int64 cycles, int32 skip) if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles)) return; - nstk = runtime·callers(skip, stk, 32); + nstk = runtime·callers(skip, stk, nelem(stk)); runtime·lock(&proflock); - b = stkbucket(BProf, stk, nstk, true); + b = stkbucket(BProf, 0, stk, nstk, true); b->count++; b->cycles += cycles; runtime·unlock(&proflock); @@ -365,6 +292,7 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { // garbage collection is disabled from the beginning of execution, // accumulate stats as if a GC just happened, and recount buckets. MProf_GC(); + MProf_GC(); n = 0; for(b=mbuckets; b; b=b->allnext) if(include_inuse_zero || b->alloc_bytes != b->free_bytes) @@ -381,6 +309,18 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { runtime·unlock(&proflock); } +void +runtime·iterate_memprof(void (*callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr)) +{ + Bucket *b; + + runtime·lock(&proflock); + for(b=mbuckets; b; b=b->allnext) { + callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees); + } + runtime·unlock(&proflock); +} + // Must match BlockProfileRecord in debug.go. typedef struct BRecord BRecord; struct BRecord { @@ -483,7 +423,7 @@ saveg(uintptr pc, uintptr sp, G *gp, TRecord *r) } func GoroutineProfile(b Slice) (n int, ok bool) { - uintptr pc, sp; + uintptr pc, sp, i; TRecord *r; G *gp; @@ -502,7 +442,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) { ok = true; r = (TRecord*)b.array; saveg(pc, sp, g, r++); - for(gp = runtime·allg; gp != nil; gp = gp->alllink) { + for(i = 0; i < runtime·allglen; i++) { + gp = runtime·allg[i]; if(gp == g || gp->status == Gdead) continue; saveg(~(uintptr)0, ~(uintptr)0, gp, r++); @@ -515,8 +456,72 @@ func GoroutineProfile(b Slice) (n int, ok bool) { } } +// Tracing of alloc/free/gc. + +static Lock tracelock; + +static int8* +typeinfoname(int32 typeinfo) +{ + if(typeinfo == TypeInfo_SingleObject) + return "single object"; + else if(typeinfo == TypeInfo_Array) + return "array"; + else if(typeinfo == TypeInfo_Chan) + return "channel"; + runtime·throw("typinfoname: unknown type info"); + return nil; +} + +void +runtime·tracealloc(void *p, uintptr size, uintptr typ) +{ + int8 *name; + Type *type; + + runtime·lock(&tracelock); + m->traceback = 2; + type = (Type*)(typ & ~3); + name = typeinfoname(typ & 3); + if(type == nil) + runtime·printf("tracealloc(%p, %p, %s)\n", p, size, name); + else + runtime·printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->string); + if(m->curg == nil || g == m->curg) { + runtime·goroutineheader(g); + runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g); + } else { + runtime·goroutineheader(m->curg); + runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, m->curg); + } + runtime·printf("\n"); + m->traceback = 0; + runtime·unlock(&tracelock); +} + +void +runtime·tracefree(void *p, uintptr size) +{ + runtime·lock(&tracelock); + m->traceback = 2; + runtime·printf("tracefree(%p, %p)\n", p, size); + runtime·goroutineheader(g); + runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g); + runtime·printf("\n"); + m->traceback = 0; + runtime·unlock(&tracelock); +} + void -runtime·mprofinit(void) +runtime·tracegc(void) { - addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys); + runtime·lock(&tracelock); + m->traceback = 2; + runtime·printf("tracegc()\n"); + // running on m->g0 stack; show all non-g0 goroutines + runtime·tracebackothers(g); + runtime·printf("end tracegc\n"); + runtime·printf("\n"); + m->traceback = 0; + runtime·unlock(&tracelock); } |