diff options
author | Russ Cox <rsc@golang.org> | 2010-02-10 00:00:12 -0800 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2010-02-10 00:00:12 -0800 |
commit | 6ee4537544fe3176118be01a9349d6525d41c114 (patch) | |
tree | 23f296cb62fc655126252c71d4a82a720875c294 | |
parent | 25066ce98bab56fa229b482e550ec8f465fd49a1 (diff) | |
download | golang-6ee4537544fe3176118be01a9349d6525d41c114.tar.gz |
runtime: garbage collection + malloc performance
* add bit tracking finalizer status, avoiding getfinalizer lookup
* add ability to allocate uncleared memory
R=iant
CC=golang-dev
http://codereview.appspot.com/207044
-rw-r--r-- | src/pkg/runtime/iface.c | 4 | ||||
-rw-r--r-- | src/pkg/runtime/malloc.cgo | 36 | ||||
-rw-r--r-- | src/pkg/runtime/malloc.h | 23 | ||||
-rw-r--r-- | src/pkg/runtime/mcache.c | 12 | ||||
-rw-r--r-- | src/pkg/runtime/mcentral.c | 14 | ||||
-rw-r--r-- | src/pkg/runtime/mfinal.c | 38 | ||||
-rw-r--r-- | src/pkg/runtime/mgc0.c | 50 | ||||
-rw-r--r-- | src/pkg/runtime/proc.c | 36 | ||||
-rw-r--r-- | src/pkg/runtime/slice.c | 4 |
9 files changed, 131 insertions, 86 deletions
diff --git a/src/pkg/runtime/iface.c b/src/pkg/runtime/iface.c index b9b00de29..eb5d76eb8 100644 --- a/src/pkg/runtime/iface.c +++ b/src/pkg/runtime/iface.c @@ -641,7 +641,7 @@ unsafe·New(Eface typ, void *ret) t = (Type*)((Eface*)typ.data-1); if(t->kind&KindNoPointers) - ret = mallocgc(t->size, RefNoPointers, 1); + ret = mallocgc(t->size, RefNoPointers, 1, 1); else ret = mal(t->size); FLUSH(&ret); @@ -661,7 +661,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret) size = n*t->size; if(t->kind&KindNoPointers) - ret = mallocgc(size, RefNoPointers, 1); + ret = mallocgc(size, RefNoPointers, 1, 1); else ret = mal(size); FLUSH(&ret); diff --git a/src/pkg/runtime/malloc.cgo b/src/pkg/runtime/malloc.cgo index c6d5c6e33..8c945baeb 100644 --- a/src/pkg/runtime/malloc.cgo +++ b/src/pkg/runtime/malloc.cgo @@ -19,7 +19,7 @@ MStats mstats; // Small objects are allocated from the per-thread cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. void* -mallocgc(uintptr size, uint32 refflag, int32 dogc) +mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) { int32 sizeclass; MCache *c; @@ -42,7 +42,7 @@ mallocgc(uintptr size, uint32 refflag, int32 dogc) sizeclass = SizeToClass(size); size = class_to_size[sizeclass]; c = m->mcache; - v = MCache_Alloc(c, sizeclass, size); + v = MCache_Alloc(c, sizeclass, size, zeroed); if(v == nil) throw("out of memory"); mstats.alloc += size; @@ -80,7 +80,7 @@ mallocgc(uintptr size, uint32 refflag, int32 dogc) void* malloc(uintptr size) { - return mallocgc(size, 0, 0); + return mallocgc(size, 0, 0, 1); } // Free the object whose base pointer is v. @@ -128,6 +128,8 @@ free(void *v) // Small object. c = m->mcache; size = class_to_size[sizeclass]; + if(size > sizeof(uintptr)) + ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" runtime_memclr(v, size); mstats.alloc -= size; mstats.by_size[sizeclass].nfree++; @@ -180,14 +182,18 @@ mlookup(void *v, byte **base, uintptr *size, uint32 **ref) *base = p + i*n; if(size) *size = n; - nobj = (s->npages << PageShift) / (n + RefcountOverhead); - if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) { - printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n", - s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages); - printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n", - s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift, - (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift)); - throw("bad gcref"); + + // good for error checking, but expensive + if(0) { + nobj = (s->npages << PageShift) / (n + RefcountOverhead); + if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) { + printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n", + s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages); + printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n", + s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift, + (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift)); + throw("bad gcref"); + } } if(ref) *ref = &s->gcref[i]; @@ -217,7 +223,7 @@ mallocinit(void) void* mal(uint32 n) { - return mallocgc(n, 0, 1); + return mallocgc(n, 0, 1, 1); } // Stack allocator uses malloc/free most of the time, @@ -250,7 +256,7 @@ stackalloc(uint32 n) unlock(&stacks); return v; } - v = malloc(n); + v = mallocgc(n, 0, 0, 0); if(!mlookup(v, nil, nil, &ref)) throw("stackalloc mlookup"); *ref = RefStack; @@ -291,7 +297,7 @@ func SetFinalizer(obj Eface, finalizer Eface) { FuncType *ft; int32 i, nret; Type *t; - + if(obj.type == nil) { printf("runtime.SetFinalizer: first argument is nil interface\n"); throw: @@ -315,7 +321,7 @@ func SetFinalizer(obj Eface, finalizer Eface) { ft = (FuncType*)finalizer.type; if(ft->dotdotdot || ft->in.len != 1 || *(Type**)ft->in.array != obj.type) goto badfunc; - + // compute size needed for return parameters for(i=0; i<ft->out.len; i++) { t = ((Type**)ft->out.array)[i]; diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index 3a3b9bef6..2d94872f7 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -67,10 +67,22 @@ // Allocating and freeing a large object uses the page heap // directly, bypassing the MCache and MCentral free lists. // +// The small objects on the MCache and MCentral free lists +// may or may not be zeroed. They are zeroed if and only if +// the second word of the object is zero. The spans in the +// page heap are always zeroed. When a span full of objects +// is returned to the page heap, the objects that need to be +// are zeroed first. There are two main benefits to delaying the +// zeroing this way: +// +// 1. stack frames allocated from the small object lists +// can avoid zeroing altogether. +// 2. the cost of zeroing when reusing a small object is +// charged to the mutator, not the garbage collector. +// // This C code was written with an eye toward translating to Go // in the future. Methods have the form Type_Method(Type *t, ...). - typedef struct FixAlloc FixAlloc; typedef struct MCentral MCentral; typedef struct MHeap MHeap; @@ -218,7 +230,7 @@ struct MCache uint64 size; }; -void* MCache_Alloc(MCache *c, int32 sizeclass, uintptr size); +void* MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed); void MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size); @@ -285,7 +297,7 @@ struct MHeap // span lookup MHeapMap map; MHeapMapCache mapcache; - + // range of addresses we might see in the heap byte *min; byte *max; @@ -310,7 +322,7 @@ void MHeap_Free(MHeap *h, MSpan *s); MSpan* MHeap_Lookup(MHeap *h, PageID p); MSpan* MHeap_LookupMaybe(MHeap *h, PageID p); -void* mallocgc(uintptr size, uint32 flag, int32 dogc); +void* mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); int32 mlookup(void *v, byte **base, uintptr *size, uint32 **ref); void gc(int32 force); @@ -329,5 +341,6 @@ enum RefNone, // no references RefSome, // some references RefFinalize, // ready to be finalized - RefNoPointers = 0x80000000U, // flag - no pointers here + RefNoPointers = 0x80000000U, // flag - no pointers here + RefHasFinalizer = 0x40000000U, // flag - has finalizer }; diff --git a/src/pkg/runtime/mcache.c b/src/pkg/runtime/mcache.c index ae2594023..429b42541 100644 --- a/src/pkg/runtime/mcache.c +++ b/src/pkg/runtime/mcache.c @@ -10,7 +10,7 @@ #include "malloc.h" void* -MCache_Alloc(MCache *c, int32 sizeclass, uintptr size) +MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) { MCacheList *l; MLink *first, *v; @@ -36,6 +36,16 @@ MCache_Alloc(MCache *c, int32 sizeclass, uintptr size) // v is zeroed except for the link pointer // that we used above; zero that. v->next = nil; + if(zeroed) { + // block is zeroed iff second word is zero ... + if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0) + runtime_memclr((byte*)v, size); + else { + // ... except for the link pointer + // that we used above; zero that. + v->next = nil; + } + } return v; } diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c index 9881812e3..7e33e01af 100644 --- a/src/pkg/runtime/mcentral.c +++ b/src/pkg/runtime/mcentral.c @@ -115,6 +115,7 @@ MCentral_Free(MCentral *c, void *v) MSpan *s; PageID page; MLink *p, *next; + int32 size; // Find span for v. page = (uintptr)v >> PageShift; @@ -136,15 +137,20 @@ MCentral_Free(MCentral *c, void *v) // If s is completely freed, return it to the heap. if(--s->ref == 0) { + size = class_to_size[c->sizeclass]; MSpanList_Remove(s); - // Freed blocks are zeroed except for the link pointer. - // Zero the link pointers so that the page is all zero. + // The second word of each freed block indicates + // whether it needs to be zeroed. The first word + // is the link pointer and must always be cleared. for(p=s->freelist; p; p=next) { next = p->next; - p->next = nil; + if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0) + runtime_memclr((byte*)p, size); + else + p->next = nil; } s->freelist = nil; - c->nfree -= (s->npages << PageShift) / class_to_size[c->sizeclass]; + c->nfree -= (s->npages << PageShift) / size; unlock(c); MHeap_Free(&mheap, s); lock(c); diff --git a/src/pkg/runtime/mfinal.c b/src/pkg/runtime/mfinal.c index 3034f0567..4fad6aa95 100644 --- a/src/pkg/runtime/mfinal.c +++ b/src/pkg/runtime/mfinal.c @@ -29,7 +29,7 @@ static void addfintab(Fintab *t, void *k, void *fn, int32 nret) { int32 i, j; - + i = (uintptr)k % (uintptr)t->max; for(j=0; j<t->max; j++) { if(t->key[i] == nil) { @@ -58,7 +58,7 @@ lookfintab(Fintab *t, void *k, bool del, int32 *nret) { int32 i, j; void *v; - + if(t->max == 0) return nil; i = (uintptr)k % (uintptr)t->max; @@ -94,11 +94,27 @@ addfinalizer(void *p, void (*f)(void*), int32 nret) { Fintab newtab; int32 i; + uint32 *ref; + byte *base; + + if(!mlookup(p, &base, nil, &ref) || p != base) + throw("addfinalizer on invalid pointer"); + if(f == nil) { + if(*ref & RefHasFinalizer) { + getfinalizer(p, 1, nil); + *ref &= ~RefHasFinalizer; + } + return; + } + + if(*ref & RefHasFinalizer) + throw("double finalizer"); + *ref |= RefHasFinalizer; if(fintab.nkey >= fintab.max/2+fintab.max/4) { // keep table at most 3/4 full: // allocate new table and rehash. - + runtime_memclr((byte*)&newtab, sizeof newtab); newtab.max = fintab.max; if(newtab.max == 0) @@ -108,13 +124,13 @@ addfinalizer(void *p, void (*f)(void*), int32 nret) // otherwise just rehash into table of same size. newtab.max *= 3; } - - newtab.key = mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0); - newtab.val = mallocgc(newtab.max*sizeof newtab.val[0], 0, 0); - + + newtab.key = mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1); + newtab.val = mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); + for(i=0; i<fintab.max; i++) { void *k; - + k = fintab.key[i]; if(k != nil && k != (void*)-1) addfintab(&newtab, k, fintab.val[i].fn, fintab.val[i].nret); @@ -123,10 +139,12 @@ addfinalizer(void *p, void (*f)(void*), int32 nret) free(fintab.val); fintab = newtab; } - - addfintab(&fintab, p, f, nret); + + addfintab(&fintab, p, f, nret); } +// get finalizer; if del, delete finalizer. +// caller is responsible for updating RefHasFinalizer bit. void* getfinalizer(void *p, bool del, int32 *nret) { diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index bd5d2e25a..7cc965400 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -47,7 +47,7 @@ scanblock(int32 depth, byte *b, int64 n) int32 off; void *obj; uintptr size; - uint32 *ref; + uint32 *refp, ref; void **vp; int64 i; @@ -65,24 +65,22 @@ scanblock(int32 depth, byte *b, int64 n) obj = vp[i]; if(obj == nil || (byte*)obj < mheap.min || (byte*)obj >= mheap.max) continue; - if(mlookup(obj, &obj, &size, &ref)) { - if(*ref == RefFree || *ref == RefStack) - continue; - - // If marked for finalization already, some other finalization-ready - // object has a pointer: turn off finalization until that object is gone. - // This means that cyclic finalizer loops never get collected, - // so don't do that. - - if(*ref == (RefNone|RefNoPointers) || *ref == (RefFinalize|RefNoPointers)) { - *ref = RefSome|RefNoPointers; - continue; - } - if(*ref == RefNone || *ref == RefFinalize) { + if(mlookup(obj, &obj, &size, &refp)) { + ref = *refp; + switch(ref & ~(RefNoPointers|RefHasFinalizer)) { + case RefFinalize: + // If marked for finalization already, some other finalization-ready + // object has a pointer: turn off finalization until that object is gone. + // This means that cyclic finalizer loops never get collected, + // so don't do that. + /* fall through */ + case RefNone: if(Debug > 1) printf("%d found at %p: ", depth, &vp[i]); - *ref = RefSome; - scanblock(depth+1, obj, size); + *refp = RefSome | (ref & (RefNoPointers|RefHasFinalizer)); + if(!(ref & RefNoPointers)) + scanblock(depth+1, obj, size); + break; } } } @@ -172,20 +170,19 @@ sweepblock(byte *p, int64 n, uint32 *gcrefp, int32 pass) uint32 gcref; gcref = *gcrefp; - switch(gcref) { + switch(gcref & ~(RefNoPointers|RefHasFinalizer)) { default: throw("bad 'ref count'"); case RefFree: case RefStack: break; case RefNone: - case RefNone|RefNoPointers: - if(pass == 0 && getfinalizer(p, 0, nil)) { + if(pass == 0 && (gcref & RefHasFinalizer)) { // Tentatively mark as finalizable. // Make sure anything it points at will not be collected. if(Debug > 0) printf("maybe finalize %p+%D\n", p, n); - *gcrefp = RefFinalize | (gcref&RefNoPointers); + *gcrefp = RefFinalize | RefHasFinalizer | (gcref&RefNoPointers); scanblock(100, p, n); } else if(pass == 1) { if(Debug > 0) @@ -194,7 +191,6 @@ sweepblock(byte *p, int64 n, uint32 *gcrefp, int32 pass) } break; case RefFinalize: - case RefFinalize|RefNoPointers: if(pass != 1) throw("sweepspan pass 0 RefFinalize"); if(pfinq < efinq) { @@ -203,18 +199,18 @@ sweepblock(byte *p, int64 n, uint32 *gcrefp, int32 pass) pfinq->p = p; pfinq->nret = 0; pfinq->fn = getfinalizer(p, 1, &pfinq->nret); + gcref &= ~RefHasFinalizer; if(pfinq->fn == nil) throw("getfinalizer inconsistency"); pfinq++; } // Reset for next mark+sweep. - *gcrefp = RefNone | (gcref&RefNoPointers); + *gcrefp = RefNone | (gcref&(RefNoPointers|RefHasFinalizer)); break; case RefSome: - case RefSome|RefNoPointers: // Reset for next mark+sweep. if(pass == 1) - *gcrefp = RefNone | (gcref&RefNoPointers); + *gcrefp = RefNone | (gcref&(RefNoPointers|RefHasFinalizer)); break; } } @@ -227,7 +223,7 @@ sweep(void) // Sweep all the spans marking blocks to be finalized. for(s = mheap.allspans; s != nil; s = s->allnext) sweepspan(s, 0); - + // Sweep again queueing finalizers and freeing the others. for(s = mheap.allspans; s != nil; s = s->allnext) sweepspan(s, 1); @@ -292,7 +288,7 @@ gc(int32 force) mstats.next_gc = mstats.inuse_pages+mstats.inuse_pages*gcpercent/100; } m->gcing = 0; - + // kick off goroutines to run queued finalizers m->locks++; // disable gc during the mallocs in newproc for(fp=finq; fp<pfinq; fp++) { diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 5bd92dd80..9b4e34f6f 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -434,23 +434,20 @@ matchmg(void) // when it is just in a register (R14 on amd64). m->alllink = allm; allm = m; - m->g0 = malg(8192); m->id = sched.mcount++; if(libcgo_thread_start != nil) { CgoThreadStart ts; - // pthread_create will make us a stack, - // so free the one malg made. - stackfree(m->g0->stack0); - m->g0->stack0 = nil; - m->g0->stackguard = nil; - m->g0->stackbase = nil; + // pthread_create will make us a stack. + m->g0 = malg(-1); ts.m = m; ts.g = m->g0; ts.fn = mstart; runcgo(libcgo_thread_start, &ts); - } else + } else { + m->g0 = malg(8192); newosproc(m, m->g0, m->g0->stackbase, mstart); + } } mnextg(m, g); } @@ -682,7 +679,7 @@ oldstack(void) mcpy(top->fp, sp, args); } - stackfree((byte*)g1->stackguard - StackGuard); + stackfree(g1->stackguard - StackGuard); g1->stackbase = old.stackbase; g1->stackguard = old.stackguard; @@ -710,6 +707,7 @@ newstack(void) frame += 1024; // for more functions, Stktop. stk = stackalloc(frame); + //printf("newstack frame=%d args=%d morepc=%p morefp=%p gobuf=%p, %p newstk=%p\n", frame, args, m->morepc, m->morefp, g->sched.pc, g->sched.sp, stk); g1 = m->curg; @@ -746,10 +744,13 @@ malg(int32 stacksize) byte *stk; g = malloc(sizeof(G)); - stk = stackalloc(stacksize + StackGuard); - g->stack0 = stk; - g->stackguard = stk + StackGuard; - g->stackbase = stk + StackGuard + stacksize; + if(stacksize >= 0) { + stk = stackalloc(stacksize + StackGuard); + g->stack0 = stk; + g->stackguard = stk + StackGuard; + g->stackbase = stk + StackGuard + stacksize - sizeof(Stktop); + runtime_memclr(g->stackbase, sizeof(Stktop)); + } return g; } @@ -772,7 +773,7 @@ void void newproc1(byte *fn, byte *argp, int32 narg, int32 nret) { - byte *stk, *sp; + byte *sp; G *newg; int32 siz; @@ -792,13 +793,8 @@ newproc1(byte *fn, byte *argp, int32 narg, int32 nret) newg->alllink = allg; allg = newg; } - stk = newg->stack0; - - newg->stackguard = stk+StackGuard; - - sp = stk + 4096 - 4*8; - newg->stackbase = sp; + sp = newg->stackbase; sp -= siz; mcpy(sp, argp, narg); diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index c61c315e8..4ee5fc51f 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -23,14 +23,14 @@ void ret.cap = cap; if((t->elem->kind&KindNoPointers)) - ret.array = mallocgc(size, RefNoPointers, 1); + ret.array = mallocgc(size, RefNoPointers, 1, 1); else ret.array = mal(size); FLUSH(&ret); if(debug) { - printf("makeslice(%S, %d, %d); ret=", + printf("makeslice(%S, %d, %d); ret=", *t->string, nel, cap); ·printslice(ret); } |