summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/mprof.goc
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/mprof.goc')
-rw-r--r--src/pkg/runtime/mprof.goc271
1 files changed, 138 insertions, 133 deletions
diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc
index 4ae74f0c2..9c23a16f8 100644
--- a/src/pkg/runtime/mprof.goc
+++ b/src/pkg/runtime/mprof.goc
@@ -22,7 +22,6 @@ enum { MProf, BProf }; // profile types
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
-typedef struct Bucket Bucket;
struct Bucket
{
Bucket *next; // next in hash list
@@ -34,14 +33,33 @@ struct Bucket
{
struct // typ == MProf
{
+ // The following complex 3-stage scheme of stats accumulation
+ // is required to obtain a consistent picture of mallocs and frees
+ // for some point in time.
+ // The problem is that mallocs come in real time, while frees
+ // come only after a GC during concurrent sweeping. So if we would
+ // naively count them, we would get a skew toward mallocs.
+ //
+ // Mallocs are accounted in recent stats.
+ // Explicit frees are accounted in recent stats.
+ // GC frees are accounted in prev stats.
+ // After GC prev stats are added to final stats and
+ // recent stats are moved into prev stats.
uintptr allocs;
uintptr frees;
uintptr alloc_bytes;
uintptr free_bytes;
- uintptr recent_allocs; // since last gc
+
+ uintptr prev_allocs; // since last but one till last gc
+ uintptr prev_frees;
+ uintptr prev_alloc_bytes;
+ uintptr prev_free_bytes;
+
+ uintptr recent_allocs; // since last gc till now
uintptr recent_frees;
uintptr recent_alloc_bytes;
uintptr recent_free_bytes;
+
};
struct // typ == BProf
{
@@ -49,7 +67,8 @@ struct Bucket
int64 cycles;
};
};
- uintptr hash;
+ uintptr hash; // hash of size + stk
+ uintptr size;
uintptr nstk;
uintptr stk[1];
};
@@ -63,7 +82,7 @@ static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket*
-stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
+stkbucket(int32 typ, uintptr size, uintptr *stk, int32 nstk, bool alloc)
{
int32 i;
uintptr h;
@@ -82,12 +101,17 @@ stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
h += h<<10;
h ^= h>>6;
}
+ // hash in size
+ h += size;
+ h += h<<10;
+ h ^= h>>6;
+ // finalize
h += h<<3;
h ^= h>>11;
i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next)
- if(b->typ == typ && b->hash == h && b->nstk == nstk &&
+ if(b->typ == typ && b->hash == h && b->size == size && b->nstk == nstk &&
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b;
@@ -99,6 +123,7 @@ stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
b->hash = h;
+ b->size = size;
b->nstk = nstk;
b->next = buckhash[i];
buckhash[i] = b;
@@ -118,10 +143,16 @@ MProf_GC(void)
Bucket *b;
for(b=mbuckets; b; b=b->allnext) {
- b->allocs += b->recent_allocs;
- b->frees += b->recent_frees;
- b->alloc_bytes += b->recent_alloc_bytes;
- b->free_bytes += b->recent_free_bytes;
+ b->allocs += b->prev_allocs;
+ b->frees += b->prev_frees;
+ b->alloc_bytes += b->prev_alloc_bytes;
+ b->free_bytes += b->prev_free_bytes;
+
+ b->prev_allocs = b->recent_allocs;
+ b->prev_frees = b->recent_frees;
+ b->prev_alloc_bytes = b->recent_alloc_bytes;
+ b->prev_free_bytes = b->recent_free_bytes;
+
b->recent_allocs = 0;
b->recent_frees = 0;
b->recent_alloc_bytes = 0;
@@ -138,143 +169,39 @@ runtime·MProf_GC(void)
runtime·unlock(&proflock);
}
-// Map from pointer to Bucket* that allocated it.
-// Three levels:
-// Linked-list hash table for top N-AddrHashShift bits.
-// Array index for next AddrDenseBits bits.
-// Linked list for next AddrHashShift-AddrDenseBits bits.
-// This is more efficient than using a general map,
-// because of the typical clustering of the pointer keys.
-
-typedef struct AddrHash AddrHash;
-typedef struct AddrEntry AddrEntry;
-
-enum {
- AddrHashBits = 12, // good for 4GB of used address space
- AddrHashShift = 20, // each AddrHash knows about 1MB of address space
- AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
-};
-
-struct AddrHash
-{
- AddrHash *next; // next in top-level hash table linked list
- uintptr addr; // addr>>20
- AddrEntry *dense[1<<AddrDenseBits];
-};
-
-struct AddrEntry
-{
- AddrEntry *next; // next in bottom-level linked list
- uint32 addr;
- Bucket *b;
-};
-
-static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
-static AddrEntry *addrfree;
-static uintptr addrmem;
-
-// Multiplicative hash function:
-// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
-// This is a good multiplier as suggested in CLR, Knuth. The hash
-// value is taken to be the top AddrHashBits bits of the bottom 32 bits
-// of the multiplied value.
-enum {
- HashMultiplier = 2654435769U
-};
-
-// Set the bucket associated with addr to b.
-static void
-setaddrbucket(uintptr addr, Bucket *b)
-{
- int32 i;
- uint32 h;
- AddrHash *ah;
- AddrEntry *e;
-
- h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
- for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>AddrHashShift))
- goto found;
-
- ah = runtime·persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
- addrmem += sizeof *ah;
- ah->next = addrhash[h];
- ah->addr = addr>>AddrHashShift;
- addrhash[h] = ah;
-
-found:
- if((e = addrfree) == nil) {
- e = runtime·persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
- addrmem += 64*sizeof *e;
- for(i=0; i+1<64; i++)
- e[i].next = &e[i+1];
- e[63].next = nil;
- }
- addrfree = e->next;
- e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
- e->b = b;
- h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
- e->next = ah->dense[h];
- ah->dense[h] = e;
-}
-
-// Get the bucket associated with addr and clear the association.
-static Bucket*
-getaddrbucket(uintptr addr)
-{
- uint32 h;
- AddrHash *ah;
- AddrEntry *e, **l;
- Bucket *b;
-
- h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
- for(ah=addrhash[h]; ah; ah=ah->next)
- if(ah->addr == (addr>>AddrHashShift))
- goto found;
- return nil;
-
-found:
- h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
- for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
- if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
- *l = e->next;
- b = e->b;
- e->next = addrfree;
- addrfree = e;
- return b;
- }
- }
- return nil;
-}
-
// Called by malloc to record a profiled block.
void
runtime·MProf_Malloc(void *p, uintptr size)
{
- int32 nstk;
uintptr stk[32];
Bucket *b;
+ int32 nstk;
- nstk = runtime·callers(1, stk, 32);
+ nstk = runtime·callers(1, stk, nelem(stk));
runtime·lock(&proflock);
- b = stkbucket(MProf, stk, nstk, true);
+ b = stkbucket(MProf, size, stk, nstk, true);
b->recent_allocs++;
b->recent_alloc_bytes += size;
- setaddrbucket((uintptr)p, b);
runtime·unlock(&proflock);
+
+ // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
+ // This reduces potential contention and chances of deadlocks.
+ // Since the object must be alive during call to MProf_Malloc,
+ // it's fine to do this non-atomically.
+ runtime·setprofilebucket(p, b);
}
// Called when freeing a profiled block.
void
-runtime·MProf_Free(void *p, uintptr size)
+runtime·MProf_Free(Bucket *b, uintptr size, bool freed)
{
- Bucket *b;
-
runtime·lock(&proflock);
- b = getaddrbucket((uintptr)p);
- if(b != nil) {
+ if(freed) {
b->recent_frees++;
b->recent_free_bytes += size;
+ } else {
+ b->prev_frees++;
+ b->prev_free_bytes += size;
}
runtime·unlock(&proflock);
}
@@ -311,9 +238,9 @@ runtime·blockevent(int64 cycles, int32 skip)
if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
return;
- nstk = runtime·callers(skip, stk, 32);
+ nstk = runtime·callers(skip, stk, nelem(stk));
runtime·lock(&proflock);
- b = stkbucket(BProf, stk, nstk, true);
+ b = stkbucket(BProf, 0, stk, nstk, true);
b->count++;
b->cycles += cycles;
runtime·unlock(&proflock);
@@ -365,6 +292,7 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
// garbage collection is disabled from the beginning of execution,
// accumulate stats as if a GC just happened, and recount buckets.
MProf_GC();
+ MProf_GC();
n = 0;
for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
@@ -381,6 +309,18 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
runtime·unlock(&proflock);
}
+void
+runtime·iterate_memprof(void (*callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr))
+{
+ Bucket *b;
+
+ runtime·lock(&proflock);
+ for(b=mbuckets; b; b=b->allnext) {
+ callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees);
+ }
+ runtime·unlock(&proflock);
+}
+
// Must match BlockProfileRecord in debug.go.
typedef struct BRecord BRecord;
struct BRecord {
@@ -483,7 +423,7 @@ saveg(uintptr pc, uintptr sp, G *gp, TRecord *r)
}
func GoroutineProfile(b Slice) (n int, ok bool) {
- uintptr pc, sp;
+ uintptr pc, sp, i;
TRecord *r;
G *gp;
@@ -502,7 +442,8 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
ok = true;
r = (TRecord*)b.array;
saveg(pc, sp, g, r++);
- for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
+ for(i = 0; i < runtime·allglen; i++) {
+ gp = runtime·allg[i];
if(gp == g || gp->status == Gdead)
continue;
saveg(~(uintptr)0, ~(uintptr)0, gp, r++);
@@ -515,8 +456,72 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
}
}
+// Tracing of alloc/free/gc.
+
+static Lock tracelock;
+
+static int8*
+typeinfoname(int32 typeinfo)
+{
+ if(typeinfo == TypeInfo_SingleObject)
+ return "single object";
+ else if(typeinfo == TypeInfo_Array)
+ return "array";
+ else if(typeinfo == TypeInfo_Chan)
+ return "channel";
+ runtime·throw("typinfoname: unknown type info");
+ return nil;
+}
+
+void
+runtime·tracealloc(void *p, uintptr size, uintptr typ)
+{
+ int8 *name;
+ Type *type;
+
+ runtime·lock(&tracelock);
+ m->traceback = 2;
+ type = (Type*)(typ & ~3);
+ name = typeinfoname(typ & 3);
+ if(type == nil)
+ runtime·printf("tracealloc(%p, %p, %s)\n", p, size, name);
+ else
+ runtime·printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->string);
+ if(m->curg == nil || g == m->curg) {
+ runtime·goroutineheader(g);
+ runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g);
+ } else {
+ runtime·goroutineheader(m->curg);
+ runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, m->curg);
+ }
+ runtime·printf("\n");
+ m->traceback = 0;
+ runtime·unlock(&tracelock);
+}
+
+void
+runtime·tracefree(void *p, uintptr size)
+{
+ runtime·lock(&tracelock);
+ m->traceback = 2;
+ runtime·printf("tracefree(%p, %p)\n", p, size);
+ runtime·goroutineheader(g);
+ runtime·traceback((uintptr)runtime·getcallerpc(&p), (uintptr)runtime·getcallersp(&p), 0, g);
+ runtime·printf("\n");
+ m->traceback = 0;
+ runtime·unlock(&tracelock);
+}
+
void
-runtime·mprofinit(void)
+runtime·tracegc(void)
{
- addrhash = runtime·persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);
+ runtime·lock(&tracelock);
+ m->traceback = 2;
+ runtime·printf("tracegc()\n");
+ // running on m->g0 stack; show all non-g0 goroutines
+ runtime·tracebackothers(g);
+ runtime·printf("end tracegc\n");
+ runtime·printf("\n");
+ m->traceback = 0;
+ runtime·unlock(&tracelock);
}