// Derived from Inferno utils/6c/peep.c // http://code.google.com/p/inferno-os/source/browse/utils/6c/peep.c // // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) // Portions Copyright © 1997-1999 Vita Nuova Limited // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) // Portions Copyright © 2004,2006 Bruce Ellis // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others // Portions Copyright © 2009 The Go Authors. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include #include #include "gg.h" #include "opt.h" static void conprop(Flow *r); static void elimshortmov(Graph *g); static int prevl(Flow *r, int reg); static void pushback(Flow *r); static int regconsttyp(Adr*); static int subprop(Flow*); static int copyprop(Graph*, Flow*); static int copy1(Adr*, Adr*, Flow*, int); static int copyas(Adr*, Adr*); static int copyau(Adr*, Adr*); static int copysub(Adr*, Adr*, Adr*, int); static uint32 gactive; // do we need the carry bit static int needc(Prog *p) { ProgInfo info; while(p != P) { proginfo(&info, p); if(info.flags & UseCarry) return 1; if(info.flags & (SetCarry|KillCarry)) return 0; p = p->link; } return 0; } static Flow* rnops(Flow *r) { Prog *p; Flow *r1; if(r != nil) for(;;) { p = r->prog; if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE) break; r1 = uniqs(r); if(r1 == nil) break; r = r1; } return r; } void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; gactive = 0; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != nil) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } flowend(g); } static void pushback(Flow *r0) { Flow *r, *b; Prog *p0, *p, t; b = nil; p0 = r0->prog; for(r=uniqp(r0); r!=nil && uniqs(r)!=nil; r=uniqp(r)) { p = r->prog; if(p->as != ANOP) { if(!regconsttyp(&p->from) || !regtyp(&p->to)) break; if(copyu(p, &p0->to, nil) || copyu(p0, &p->to, nil)) break; } if(p->as == ACALL) break; b = r; } if(b == nil) { if(debug['v']) { print("no pushback: %P\n", r0->prog); if(r) print("\t%P [%d]\n", r->prog, uniqs(r)!=nil); } return; } if(debug['v']) { print("pushback\n"); for(r=b;; r=r->link) { print("\t%P\n", r->prog); if(r == r0) break; } } t = *r0->prog; for(r=uniqp(r0);; r=uniqp(r)) { p0 = r->link->prog; p = r->prog; p0->as = p->as; p0->lineno = p->lineno; p0->from = p->from; p0->to = p->to; if(r == b) break; } p0 = r->prog; p0->as = t.as; p0->lineno = t.lineno; p0->from = t.from; p0->to = t.to; if(debug['v']) { print("\tafter\n"); for(r=b;; r=r->link) { print("\t%P\n", r->prog); if(r == r0) break; } } } void excise(Flow *r) { Prog *p; p = r->prog; if(debug['P'] && debug['v']) print("%P ===delete===\n", p); p->as = ANOP; p->from = zprog.from; p->to = zprog.to; ostats.ndelmov++; } int regtyp(Adr *a) { int t; t = a->type; if(t >= D_AX && t <= D_R15) return 1; if(t >= D_X0 && t <= D_X0+15) return 1; return 0; } // movb elimination. // movb is simulated by the linker // when a register other than ax, bx, cx, dx // is used, so rewrite to other instructions // when possible. a movb into a register // can smash the entire 32-bit register without // causing any trouble. // // TODO: Using the Q forms here instead of the L forms // seems unnecessary, and it makes the instructions longer. static void elimshortmov(Graph *g) { Prog *p; Flow *r; for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(regtyp(&p->to)) { switch(p->as) { case AINCB: case AINCW: p->as = AINCQ; break; case ADECB: case ADECW: p->as = ADECQ; break; case ANEGB: case ANEGW: p->as = ANEGQ; break; case ANOTB: case ANOTW: p->as = ANOTQ; break; } if(regtyp(&p->from) || p->from.type == D_CONST) { // move or artihmetic into partial register. // from another register or constant can be movl. // we don't switch to 64-bit arithmetic if it can // change how the carry bit is set (and the carry bit is needed). switch(p->as) { case AMOVB: case AMOVW: p->as = AMOVQ; break; case AADDB: case AADDW: if(!needc(p->link)) p->as = AADDQ; break; case ASUBB: case ASUBW: if(!needc(p->link)) p->as = ASUBQ; break; case AMULB: case AMULW: p->as = AMULQ; break; case AIMULB: case AIMULW: p->as = AIMULQ; break; case AANDB: case AANDW: p->as = AANDQ; break; case AORB: case AORW: p->as = AORQ; break; case AXORB: case AXORW: p->as = AXORQ; break; case ASHLB: case ASHLW: p->as = ASHLQ; break; } } else if(p->from.type >= D_NONE) { // explicit zero extension, but don't // do that if source is a byte register // (only AH can occur and it's forbidden). switch(p->as) { case AMOVB: p->as = AMOVBQZX; break; case AMOVW: p->as = AMOVWQZX; break; } } } } } // is 'a' a register or constant? static int regconsttyp(Adr *a) { if(regtyp(a)) return 1; switch(a->type) { case D_CONST: case D_FCONST: case D_SCONST: case D_ADDR: return 1; } return 0; } // is reg guaranteed to be truncated by a previous L instruction? static int prevl(Flow *r0, int reg) { Prog *p; Flow *r; ProgInfo info; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { p = r->prog; if(p->to.type == reg) { proginfo(&info, p); if(info.flags & RightWrite) { if(info.flags & SizeL) return 1; return 0; } } } return 0; } /* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; ProgInfo info; Adr *v1, *v2; Flow *r; int t; if(debug['P'] && debug['v']) print("subprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; if(!regtyp(v1)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v1); return 0; } v2 = &p->to; if(!regtyp(v2)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v2); return 0; } for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == nil) { if(debug['P'] && debug['v']) print("\tno unique successor\n"); break; } p = r->prog; if(p->as == AVARDEF || p->as == AVARKILL) continue; proginfo(&info, p); if(info.flags & Call) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if(info.reguse | info.regset) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if((info.flags & Move) && (info.flags & (SizeL|SizeQ|SizeF|SizeD)) && p->to.type == v1->type) goto gotit; if(copyau(&p->from, v2) || copyau(&p->to, v2)) { if(debug['P'] && debug['v']) print("\tcopyau %D failed\n", v2); break; } if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) { if(debug['P'] && debug['v']) print("\tcopysub failed\n"); break; } } if(debug['P'] && debug['v']) print("\tran off end; return 0\n"); return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; } /* * The idea is to remove redundant copies. * v1->v2 F=0 * (use v2 s/v2/v1/)* * set v1 F=1 * use v2 return fail * ----------------- * v1->v2 F=0 * (use v2 s/v2/v1/)* * set v1 F=1 * set v2 return success */ static int copyprop(Graph *g, Flow *r0) { Prog *p; Adr *v1, *v2; USED(g); if(debug['P'] && debug['v']) print("copyprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; v2 = &p->to; if(copyas(v1, v2)) return 1; gactive++; return copy1(v1, v2, r0->s1, 0); } static int copy1(Adr *v1, Adr *v2, Flow *r, int f) { int t; Prog *p; if(r->active == gactive) { if(debug['P']) print("act set; return 1\n"); return 1; } r->active = gactive; if(debug['P']) print("copy %D->%D f=%d\n", v1, v2, f); for(; r != nil; r = r->s1) { p = r->prog; if(debug['P']) print("%P", p); if(!f && uniqp(r) == nil) { f = 1; if(debug['P']) print("; merge; f=%d", f); } t = copyu(p, v2, nil); switch(t) { case 2: /* rar, can't split */ if(debug['P']) print("; %D rar; return 0\n", v2); return 0; case 3: /* set */ if(debug['P']) print("; %D set; return 1\n", v2); return 1; case 1: /* used, substitute */ case 4: /* use and set */ if(f) { if(!debug['P']) return 0; if(t == 4) print("; %D used+set and f=%d; return 0\n", v2, f); else print("; %D used and f=%d; return 0\n", v2, f); return 0; } if(copyu(p, v2, v1)) { if(debug['P']) print("; sub fail; return 0\n"); return 0; } if(debug['P']) print("; sub %D/%D", v2, v1); if(t == 4) { if(debug['P']) print("; %D used+set; return 1\n", v2); return 1; } break; } if(!f) { t = copyu(p, v1, nil); if(!f && (t == 2 || t == 3 || t == 4)) { f = 1; if(debug['P']) print("; %D set and !f; f=%d", v1, f); } } if(debug['P']) print("\n"); if(r->s2) if(!copy1(v1, v2, r->s2, f)) return 0; } return 1; } /* * return * 1 if v only used (and substitute), * 2 if read-alter-rewrite * 3 if set * 4 if set and used * 0 otherwise (not touched) */ int copyu(Prog *p, Adr *v, Adr *s) { ProgInfo info; switch(p->as) { case AJMP: if(s != nil) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 1; return 0; case ARET: if(s != nil) return 1; return 3; case ACALL: if(REGEXT && v->type <= REGEXT && v->type > exregoffset) return 2; if(REGARG >= 0 && v->type == (uchar)REGARG) return 2; if(v->type == p->from.type) return 2; if(s != nil) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 4; return 3; case ATEXT: if(REGARG >= 0 && v->type == (uchar)REGARG) return 3; return 0; } if(p->as == AVARDEF || p->as == AVARKILL) return 0; proginfo(&info, p); if((info.reguse|info.regset) & RtoB(v->type)) return 2; if(info.flags & LeftAddr) if(copyas(&p->from, v)) return 2; if((info.flags & (RightRead|RightWrite)) == (RightRead|RightWrite)) if(copyas(&p->to, v)) return 2; if(info.flags & RightWrite) { if(copyas(&p->to, v)) { if(s != nil) return copysub(&p->from, v, s, 1); if(copyau(&p->from, v)) return 4; return 3; } } if(info.flags & (LeftAddr|LeftRead|LeftWrite|RightAddr|RightRead|RightWrite)) { if(s != nil) { if(copysub(&p->from, v, s, 1)) return 1; return copysub(&p->to, v, s, 1); } if(copyau(&p->from, v)) return 1; if(copyau(&p->to, v)) return 1; } return 0; } /* * direct reference, * could be set/use depending on * semantics */ static int copyas(Adr *a, Adr *v) { if(D_AL <= a->type && a->type <= D_R15B) fatal("use of byte register"); if(D_AL <= v->type && v->type <= D_R15B) fatal("use of byte register"); if(a->type != v->type) return 0; if(regtyp(v)) return 1; if(v->type == D_AUTO || v->type == D_PARAM) if(v->offset == a->offset) return 1; return 0; } int sameaddr(Addr *a, Addr *v) { if(a->type != v->type) return 0; if(regtyp(v)) return 1; if(v->type == D_AUTO || v->type == D_PARAM) if(v->offset == a->offset) return 1; return 0; } /* * either direct or indirect */ static int copyau(Adr *a, Adr *v) { if(copyas(a, v)) { if(debug['P'] && debug['v']) print("\tcopyau: copyas returned 1\n"); return 1; } if(regtyp(v)) { if(a->type-D_INDIR == v->type) { if(debug['P'] && debug['v']) print("\tcopyau: found indir use - return 1\n"); return 1; } if(a->index == v->type) { if(debug['P'] && debug['v']) print("\tcopyau: found index use - return 1\n"); return 1; } } return 0; } /* * substitute s for v in a * return failure to substitute */ static int copysub(Adr *a, Adr *v, Adr *s, int f) { int t; if(copyas(a, v)) { t = s->type; if(t >= D_AX && t <= D_R15 || t >= D_X0 && t <= D_X0+15) { if(f) a->type = t; } return 0; } if(regtyp(v)) { t = v->type; if(a->type == t+D_INDIR) { if((s->type == D_BP || s->type == D_R13) && a->index != D_NONE) return 1; /* can't use BP-base with index */ if(f) a->type = s->type+D_INDIR; // return 0; } if(a->index == t) { if(f) a->index = s->type; return 0; } return 0; } return 0; } static void conprop(Flow *r0) { Flow *r; Prog *p, *p0; int t; Adr *v0; p0 = r0->prog; v0 = &p0->to; r = r0; loop: r = uniqs(r); if(r == nil || r == r0) return; if(uniqp(r) == nil) return; p = r->prog; t = copyu(p, v0, nil); switch(t) { case 0: // miss case 1: // use goto loop; case 2: // rar case 4: // use and set break; case 3: // set if(p->as == p0->as) if(p->from.type == p0->from.type) if(p->from.node == p0->from.node) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) if(p->from.type == D_FCONST && p->from.u.dval == p0->from.u.dval) if(p->from.index == p0->from.index) { excise(r); goto loop; } break; } } int smallindir(Addr *a, Addr *reg) { return regtyp(reg) && a->type == D_INDIR + reg->type && a->index == D_NONE && 0 <= a->offset && a->offset < 4096; } int stackaddr(Addr *a) { return regtyp(a) && a->type == D_SP; }