diff options
Diffstat (limited to 'src/cmd/5g/reg.c')
-rw-r--r-- | src/cmd/5g/reg.c | 267 |
1 files changed, 200 insertions, 67 deletions
diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index d2a8cc488..4762df506 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -56,30 +56,6 @@ rcmp(const void *a1, const void *a2) return p2->varno - p1->varno; } -static void -setoutvar(void) -{ - Type *t; - Node *n; - Addr a; - Iter save; - Bits bit; - int z; - - t = structfirst(&save, getoutarg(curfn->type)); - while(t != T) { - n = nodarg(t, 1); - a = zprog.from; - naddr(n, &a, 0); - bit = mkvar(R, &a); - for(z=0; z<BITS; z++) - ovar.b[z] |= bit.b[z]; - t = structnext(&save); - } -//if(bany(&ovar)) -//print("ovar = %Q\n", ovar); -} - void excise(Flow *r) { @@ -153,13 +129,15 @@ static char* regname[] = { static Node* regnodes[NREGVAR]; +static void walkvardef(Node *n, Reg *r, int active); + void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; - int i, z; + int i, z, active; uint32 vreg; Bits bit; ProgInfo info; @@ -168,8 +146,7 @@ regopt(Prog *firstp) fmtinstall('Q', Qconv); first = 0; } - - fixjmp(firstp); + mergetemp(firstp); /* @@ -191,12 +168,10 @@ regopt(Prog *firstp) params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; + ivar.b[z] = 0; ovar.b[z] = 0; } - // build list of return variables - setoutvar(); - /* * pass 1 * build aux data structure @@ -204,12 +179,18 @@ regopt(Prog *firstp) * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); - if(g == nil) + if(g == nil) { + for(i=0; i<nvar; i++) + var[i].node->opt = nil; return; + } + firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; + if(p->as == AVARDEF || p->as == AVARKILL) + continue; proginfo(&info, p); // Avoid making variables for direct-called functions. @@ -271,6 +252,26 @@ regopt(Prog *firstp) dumpit("pass2", &firstr->f, 1); /* + * pass 2.5 + * iterate propagating fat vardef covering forward + * r->act records vars with a VARDEF since the last CALL. + * (r->act will be reused in pass 5 for something else, + * but we'll be done with it by then.) + */ + active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->f.active = 0; + r->act = zbits; + } + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { + active++; + walkvardef(p->to.node, r, active); + } + } + + /* * pass 3 * iterate propagating usage * back until flow graph is complete @@ -471,6 +472,14 @@ brk: dumpit("pass6", &firstr->f, 1); /* + * free aux structures. peep allocates new ones. + */ + for(i=0; i<nvar; i++) + var[i].node->opt = nil; + flowend(g); + firstr = R; + + /* * pass 7 * peep-hole on basic block */ @@ -523,20 +532,44 @@ brk: } if(p->as == AMOVW && vreg != 0) { - if(p->from.sym != S) + if(p->from.sym != nil) if(p->from.name == D_AUTO || p->from.name == D_PARAM) { p->from.offset += vreg; // print("%P adjusting from %d %d\n", p, vreg, p->from.type); } - if(p->to.sym != S) + if(p->to.sym != nil) if(p->to.name == D_AUTO || p->to.name == D_PARAM) { p->to.offset += vreg; // print("%P adjusting to %d %d\n", p, vreg, p->from.type); } } } +} - flowend(g); +static void +walkvardef(Node *n, Reg *r, int active) +{ + Reg *r1, *r2; + int bn; + Var *v; + + for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { + if(r1->f.active == active) + break; + r1->f.active = active; + if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) + break; + for(v=n->opt; v!=nil; v=v->nextinnode) { + bn = v - var; + r1->act.b[bn/32] |= 1L << (bn%32); + } + if(r1->f.prog->as == ABL) + break; + } + + for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) + if(r2->f.s2 != nil) + walkvardef(n, (Reg*)r2->f.s2, active); } void @@ -551,6 +584,10 @@ addsplits(void) continue; if(r->f.prog->as == ABL) continue; + if(r->f.prog->as == ADUFFZERO) + continue; + if(r->f.prog->as == ADUFFCOPY) + continue; for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) { if(r1->f.loop <= 1) continue; @@ -596,11 +633,11 @@ addmove(Reg *r, int bn, int rn, int f) a = &p1->to; a->name = v->name; a->node = v->node; - a->sym = v->node->sym; + a->sym = linksym(v->node->sym); a->offset = v->offset; a->etype = v->etype; a->type = D_OREG; - if(a->etype == TARRAY || a->sym == S) + if(a->etype == TARRAY || a->sym == nil) a->type = D_CONST; if(v->addr) @@ -790,6 +827,16 @@ mkvar(Reg *r, Adr *a) if(nvar >= NVAR) { if(debug['w'] > 1 && node) fatal("variable not optimized: %D", a); + + // If we're not tracking a word in a variable, mark the rest as + // having its address taken, so that we keep the whole thing + // live at all calls. otherwise we might optimize away part of + // a variable but not all of it. + for(i=0; i<nvar; i++) { + v = var+i; + if(v->node == node) + v->addr = 1; + } goto none; } @@ -804,9 +851,13 @@ mkvar(Reg *r, Adr *a) v->addr = flag; // funny punning v->node = node; - if(debug['R']) - print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); - + // node->opt is the head of a linked list + // of Vars within the given Node, so that + // we can start at a Var and find all the other + // Vars in the same Go variable. + v->nextinnode = node->opt; + node->opt = v; + bit = blsh(i); if(n == D_EXTERN || n == D_STATIC) for(z=0; z<BITS; z++) @@ -815,6 +866,45 @@ mkvar(Reg *r, Adr *a) for(z=0; z<BITS; z++) params.b[z] |= bit.b[z]; + if(node->class == PPARAM) + for(z=0; z<BITS; z++) + ivar.b[z] |= bit.b[z]; + if(node->class == PPARAMOUT) + for(z=0; z<BITS; z++) + ovar.b[z] |= bit.b[z]; + + // Treat values with their address taken as live at calls, + // because the garbage collector's liveness analysis in ../gc/plive.c does. + // These must be consistent or else we will elide stores and the garbage + // collector will see uninitialized data. + // The typical case where our own analysis is out of sync is when the + // node appears to have its address taken but that code doesn't actually + // get generated and therefore doesn't show up as an address being + // taken when we analyze the instruction stream. + // One instance of this case is when a closure uses the same name as + // an outer variable for one of its own variables declared with :=. + // The parser flags the outer variable as possibly shared, and therefore + // sets addrtaken, even though it ends up not being actually shared. + // If we were better about _ elision, _ = &x would suffice too. + // The broader := in a closure problem is mentioned in a comment in + // closure.c:/^typecheckclosure and dcl.c:/^oldname. + if(node->addrtaken) + v->addr = 1; + + // Disable registerization for globals, because: + // (1) we might panic at any time and we want the recovery code + // to see the latest values (issue 1304). + // (2) we don't know what pointers might point at them and we want + // loads via those pointers to see updated values and vice versa (issue 7995). + // + // Disable registerization for results if using defer, because the deferred func + // might recover and return, causing the current values to be used. + if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) + v->addr = 1; + + if(debug['R']) + print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); + return bit; none: @@ -825,7 +915,8 @@ void prop(Reg *r, Bits ref, Bits cal) { Reg *r1, *r2; - int z; + int z, i, j; + Var *v, *v1; for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { for(z=0; z<BITS; z++) { @@ -844,10 +935,61 @@ prop(Reg *r, Bits ref, Bits cal) case ABL: if(noreturn(r1->f.prog)) break; + + // Mark all input variables (ivar) as used, because that's what the + // liveness bitmaps say. The liveness bitmaps say that so that a + // panic will not show stale values in the parameter dump. + // Mark variables with a recent VARDEF (r1->act) as used, + // so that the optimizer flushes initializations to memory, + // so that if a garbage collection happens during this CALL, + // the collector will see initialized memory. Again this is to + // match what the liveness bitmaps say. for(z=0; z<BITS; z++) { - cal.b[z] |= ref.b[z] | externs.b[z]; + cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z]; ref.b[z] = 0; } + + // cal.b is the current approximation of what's live across the call. + // Every bit in cal.b is a single stack word. For each such word, + // find all the other tracked stack words in the same Go variable + // (struct/slice/string/interface) and mark them live too. + // This is necessary because the liveness analysis for the garbage + // collector works at variable granularity, not at word granularity. + // It is fundamental for slice/string/interface: the garbage collector + // needs the whole value, not just some of the words, in order to + // interpret the other bits correctly. Specifically, slice needs a consistent + // ptr and cap, string needs a consistent ptr and len, and interface + // needs a consistent type word and data word. + for(z=0; z<BITS; z++) { + if(cal.b[z] == 0) + continue; + for(i=0; i<32; i++) { + if(z*32+i >= nvar || ((cal.b[z]>>i)&1) == 0) + continue; + v = var+z*32+i; + if(v->node->opt == nil) // v represents fixed register, not Go variable + continue; + + // v->node->opt is the head of a linked list of Vars + // corresponding to tracked words from the Go variable v->node. + // Walk the list and set all the bits. + // For a large struct this could end up being quadratic: + // after the first setting, the outer loop (for z, i) would see a 1 bit + // for all of the remaining words in the struct, and for each such + // word would go through and turn on all the bits again. + // To avoid the quadratic behavior, we only turn on the bits if + // v is the head of the list or if the head's bit is not yet turned on. + // This will set the bits at most twice, keeping the overall loop linear. + v1 = v->node->opt; + j = v1 - var; + if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + for(; v1 != nil; v1 = v1->nextinnode) { + j = v1 - var; + cal.b[j/32] |= 1<<(j&31); + } + } + } + } break; case ATEXT: @@ -863,17 +1005,6 @@ prop(Reg *r, Bits ref, Bits cal) ref.b[z] = 0; } break; - - default: - // Work around for issue 1304: - // flush modified globals before each instruction. - for(z=0; z<BITS; z++) { - cal.b[z] |= externs.b[z]; - // issue 4066: flush modified return variables in case of panic - if(hasdefer) - cal.b[z] |= ovar.b[z]; - } - break; } for(z=0; z<BITS; z++) { ref.b[z] = (ref.b[z] & ~r1->set.b[z]) | @@ -1004,18 +1135,20 @@ paint1(Reg *r, int bn) r->act.b[z] |= bb; p = r->f.prog; - if(r->use1.b[z] & bb) { - change += CREF * r->f.loop; - if(debug['R'] > 1) - print("%d%P\tu1 %Q $%d\n", r->f.loop, - p, blsh(bn), change); - } - if((r->use2.b[z]|r->set.b[z]) & bb) { - change += CREF * r->f.loop; - if(debug['R'] > 1) - print("%d%P\tu2 %Q $%d\n", r->f.loop, - p, blsh(bn), change); + if(r->f.prog->as != ANOP) { // don't give credit for NOPs + if(r->use1.b[z] & bb) { + change += CREF * r->f.loop; + if(debug['R'] > 1) + print("%d%P\tu1 %Q $%d\n", r->f.loop, + p, blsh(bn), change); + } + if((r->use2.b[z]|r->set.b[z]) & bb) { + change += CREF * r->f.loop; + if(debug['R'] > 1) + print("%d%P\tu2 %Q $%d\n", r->f.loop, + p, blsh(bn), change); + } } if(STORE(r) & r->regdiff.b[z] & bb) { @@ -1172,7 +1305,7 @@ paint3(Reg *r, int bn, int32 rb, int rn) void addreg(Adr *a, int rn) { - a->sym = 0; + a->sym = nil; a->name = D_NONE; a->type = D_REG; a->reg = rn; @@ -1292,9 +1425,9 @@ dumpit(char *str, Flow *r0, int isreg) if(r1 != nil) { print(" pred:"); for(; r1 != nil; r1 = r1->p2link) - print(" %.4ud", r1->prog->loc); + print(" %.4ud", (int)r1->prog->pc); if(r->p1 != nil) - print(" (and %.4ud)", r->p1->prog->loc); + print(" (and %.4ud)", (int)r->p1->prog->pc); else print(" (only)"); print("\n"); @@ -1303,7 +1436,7 @@ dumpit(char *str, Flow *r0, int isreg) // if(r1 != nil) { // print(" succ:"); // for(; r1 != R; r1 = r1->s1) -// print(" %.4ud", r1->prog->loc); +// print(" %.4ud", (int)r1->prog->pc); // print("\n"); // } } |