summaryrefslogtreecommitdiff
path: root/src/cmd/6g/peep.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/6g/peep.c')
-rw-r--r--src/cmd/6g/peep.c368
1 files changed, 343 insertions, 25 deletions
diff --git a/src/cmd/6g/peep.c b/src/cmd/6g/peep.c
index 3710033b2..569655786 100644
--- a/src/cmd/6g/peep.c
+++ b/src/cmd/6g/peep.c
@@ -34,6 +34,10 @@
#include "opt.h"
static void conprop(Reg *r);
+static void elimshortmov(Reg *r);
+static int prevl(Reg *r, int reg);
+static void pushback(Reg *r);
+static int regconsttyp(Adr*);
// do we need the carry bit
static int
@@ -45,11 +49,17 @@ needc(Prog *p)
case AADCQ:
case ASBBL:
case ASBBQ:
+ case ARCRB:
+ case ARCRW:
case ARCRL:
case ARCRQ:
return 1;
+ case AADDB:
+ case AADDW:
case AADDL:
case AADDQ:
+ case ASUBB:
+ case ASUBW:
case ASUBL:
case ASUBQ:
case AJMP:
@@ -122,9 +132,14 @@ peep(void)
case AGLOBL:
case ANAME:
case ASIGNAME:
+ case ALOCALS:
+ case ATYPE:
p = p->link;
}
}
+
+ // byte, word arithmetic elimination.
+ elimshortmov(r);
// constant propagation
// find MOV $con,R followed by
@@ -200,6 +215,7 @@ loop1:
case AMOVWQZX:
case AMOVLQSX:
case AMOVLQZX:
+ case AMOVQL:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
@@ -272,6 +288,115 @@ loop1:
}
if(t)
goto loop1;
+
+ // MOVLQZX removal.
+ // The MOVLQZX exists to avoid being confused for a
+ // MOVL that is just copying 32-bit data around during
+ // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2
+ // if it is dominated by an earlier ADDL/MOVL/etc into R1 that
+ // will have already cleared the high bits.
+ //
+ // MOVSD removal.
+ // We never use packed registers, so a MOVSD between registers
+ // can be replaced by MOVAPD, which moves the pair of float64s
+ // instead of just the lower one. We only use the lower one, but
+ // the processor can do better if we do moves using both.
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(p->as == AMOVLQZX)
+ if(regtyp(&p->from))
+ if(p->from.type == p->to.type)
+ if(prevl(r, p->from.type))
+ excise(r);
+
+ if(p->as == AMOVSD)
+ if(regtyp(&p->from))
+ if(regtyp(&p->to))
+ p->as = AMOVAPD;
+ }
+
+ // load pipelining
+ // push any load from memory as early as possible
+ // to give it time to complete before use.
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ case AMOVQ:
+ case AMOVLQZX:
+ if(regtyp(&p->to) && !regconsttyp(&p->from))
+ pushback(r);
+ }
+ }
+}
+
+static void
+pushback(Reg *r0)
+{
+ Reg *r, *b;
+ Prog *p0, *p, t;
+
+ b = R;
+ p0 = r0->prog;
+ for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) {
+ p = r->prog;
+ if(p->as != ANOP) {
+ if(!regconsttyp(&p->from) || !regtyp(&p->to))
+ break;
+ if(copyu(p, &p0->to, A) || copyu(p0, &p->to, A))
+ break;
+ }
+ if(p->as == ACALL)
+ break;
+ b = r;
+ }
+
+ if(b == R) {
+ if(debug['v']) {
+ print("no pushback: %P\n", r0->prog);
+ if(r)
+ print("\t%P [%d]\n", r->prog, uniqs(r)!=R);
+ }
+ return;
+ }
+
+ if(debug['v']) {
+ print("pushback\n");
+ for(r=b;; r=r->link) {
+ print("\t%P\n", r->prog);
+ if(r == r0)
+ break;
+ }
+ }
+
+ t = *r0->prog;
+ for(r=uniqp(r0);; r=uniqp(r)) {
+ p0 = r->link->prog;
+ p = r->prog;
+ p0->as = p->as;
+ p0->lineno = p->lineno;
+ p0->from = p->from;
+ p0->to = p->to;
+
+ if(r == b)
+ break;
+ }
+ p0 = r->prog;
+ p0->as = t.as;
+ p0->lineno = t.lineno;
+ p0->from = t.from;
+ p0->to = t.to;
+
+ if(debug['v']) {
+ print("\tafter\n");
+ for(r=b;; r=r->link) {
+ print("\t%P\n", r->prog);
+ if(r == r0)
+ break;
+ }
+ }
}
void
@@ -335,6 +460,155 @@ regtyp(Adr *a)
return 0;
}
+// movb elimination.
+// movb is simulated by the linker
+// when a register other than ax, bx, cx, dx
+// is used, so rewrite to other instructions
+// when possible. a movb into a register
+// can smash the entire 32-bit register without
+// causing any trouble.
+static void
+elimshortmov(Reg *r)
+{
+ Prog *p;
+
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(regtyp(&p->to)) {
+ switch(p->as) {
+ case AINCB:
+ case AINCW:
+ p->as = AINCQ;
+ break;
+ case ADECB:
+ case ADECW:
+ p->as = ADECQ;
+ break;
+ case ANEGB:
+ case ANEGW:
+ p->as = ANEGQ;
+ break;
+ case ANOTB:
+ case ANOTW:
+ p->as = ANOTQ;
+ break;
+ }
+ if(regtyp(&p->from) || p->from.type == D_CONST) {
+ // move or artihmetic into partial register.
+ // from another register or constant can be movl.
+ // we don't switch to 64-bit arithmetic if it can
+ // change how the carry bit is set (and the carry bit is needed).
+ switch(p->as) {
+ case AMOVB:
+ case AMOVW:
+ p->as = AMOVQ;
+ break;
+ case AADDB:
+ case AADDW:
+ if(!needc(p->link))
+ p->as = AADDQ;
+ break;
+ case ASUBB:
+ case ASUBW:
+ if(!needc(p->link))
+ p->as = ASUBQ;
+ break;
+ case AMULB:
+ case AMULW:
+ p->as = AMULQ;
+ break;
+ case AIMULB:
+ case AIMULW:
+ p->as = AIMULQ;
+ break;
+ case AANDB:
+ case AANDW:
+ p->as = AANDQ;
+ break;
+ case AORB:
+ case AORW:
+ p->as = AORQ;
+ break;
+ case AXORB:
+ case AXORW:
+ p->as = AXORQ;
+ break;
+ case ASHLB:
+ case ASHLW:
+ p->as = ASHLQ;
+ break;
+ }
+ } else if(p->from.type >= D_NONE) {
+ // explicit zero extension, but don't
+ // do that if source is a byte register
+ // (only AH can occur and it's forbidden).
+ switch(p->as) {
+ case AMOVB:
+ p->as = AMOVBQZX;
+ break;
+ case AMOVW:
+ p->as = AMOVWQZX;
+ break;
+ }
+ }
+ }
+ }
+}
+
+int
+regconsttyp(Adr *a)
+{
+ if(regtyp(a))
+ return 1;
+ switch(a->type) {
+ case D_CONST:
+ case D_FCONST:
+ case D_SCONST:
+ case D_ADDR:
+ return 1;
+ }
+ return 0;
+}
+
+// is reg guaranteed to be truncated by a previous L instruction?
+static int
+prevl(Reg *r0, int reg)
+{
+ Prog *p;
+ Reg *r;
+
+ for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+ p = r->prog;
+ if(p->to.type == reg) {
+ switch(p->as) {
+ case AADDL:
+ case AANDL:
+ case ADECL:
+ case ADIVL:
+ case AIDIVL:
+ case AIMULL:
+ case AINCL:
+ case AMOVL:
+ case AMULL:
+ case AORL:
+ case ARCLL:
+ case ARCRL:
+ case AROLL:
+ case ARORL:
+ case ASALL:
+ case ASARL:
+ case ASHLL:
+ case ASHRL:
+ case ASUBL:
+ case AXORL:
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
/*
* the idea is to substitute
* one register for another
@@ -357,19 +631,34 @@ subprop(Reg *r0)
Reg *r;
int t;
+ if(debug['P'] && debug['v'])
+ print("subprop %P\n", r0->prog);
p = r0->prog;
v1 = &p->from;
- if(!regtyp(v1))
+ if(!regtyp(v1)) {
+ if(debug['P'] && debug['v'])
+ print("\tnot regtype %D; return 0\n", v1);
return 0;
+ }
v2 = &p->to;
- if(!regtyp(v2))
+ if(!regtyp(v2)) {
+ if(debug['P'] && debug['v'])
+ print("\tnot regtype %D; return 0\n", v2);
return 0;
+ }
for(r=uniqp(r0); r!=R; r=uniqp(r)) {
- if(uniqs(r) == R)
+ if(debug['P'] && debug['v'])
+ print("\t? %P\n", r->prog);
+ if(uniqs(r) == R) {
+ if(debug['P'] && debug['v'])
+ print("\tno unique successor\n");
break;
+ }
p = r->prog;
switch(p->as) {
case ACALL:
+ if(debug['P'] && debug['v'])
+ print("\tfound %P; return 0\n", p);
return 0;
case AIMULL:
@@ -377,20 +666,7 @@ subprop(Reg *r0)
case AIMULW:
if(p->to.type != D_NONE)
break;
-
- case ADIVB:
- case ADIVL:
- case ADIVQ:
- case ADIVW:
- case AIDIVB:
- case AIDIVL:
- case AIDIVQ:
- case AIDIVW:
- case AIMULB:
- case AMULB:
- case AMULL:
- case AMULQ:
- case AMULW:
+ goto giveup;
case ARCLB:
case ARCLL:
@@ -424,6 +700,23 @@ subprop(Reg *r0)
case ASHRL:
case ASHRQ:
case ASHRW:
+ if(p->from.type == D_CONST)
+ break;
+ goto giveup;
+
+ case ADIVB:
+ case ADIVL:
+ case ADIVQ:
+ case ADIVW:
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVQ:
+ case AIDIVW:
+ case AIMULB:
+ case AMULB:
+ case AMULL:
+ case AMULQ:
+ case AMULW:
case AREP:
case AREPN:
@@ -438,21 +731,34 @@ subprop(Reg *r0)
case AMOVSB:
case AMOVSL:
case AMOVSQ:
+ giveup:
+ if(debug['P'] && debug['v'])
+ print("\tfound %P; return 0\n", p);
return 0;
case AMOVL:
case AMOVQ:
+ case AMOVSS:
+ case AMOVSD:
if(p->to.type == v1->type)
goto gotit;
break;
}
if(copyau(&p->from, v2) ||
- copyau(&p->to, v2))
+ copyau(&p->to, v2)) {
+ if(debug['P'] && debug['v'])
+ print("\tcopyau %D failed\n", v2);
break;
+ }
if(copysub(&p->from, v1, v2, 0) ||
- copysub(&p->to, v1, v2, 0))
+ copysub(&p->to, v1, v2, 0)) {
+ if(debug['P'] && debug['v'])
+ print("\tcopysub failed\n");
break;
+ }
}
+ if(debug['P'] && debug['v'])
+ print("\tran off end; return 0\n", p);
return 0;
gotit:
@@ -497,6 +803,8 @@ copyprop(Reg *r0)
Adr *v1, *v2;
Reg *r;
+ if(debug['P'] && debug['v'])
+ print("copyprop %P\n", r0->prog);
p = r0->prog;
v1 = &p->from;
v2 = &p->to;
@@ -636,6 +944,7 @@ copyu(Prog *p, Adr *v, Adr *s)
case AMOVWLZX:
case AMOVWQSX:
case AMOVWQZX:
+ case AMOVQL:
case AMOVSS:
case AMOVSD:
@@ -853,8 +1162,6 @@ copyu(Prog *p, Adr *v, Adr *s)
return 0;
case ARET: /* funny */
- if(v->type == REGRET || v->type == FREGRET)
- return 2;
if(s != A)
return 1;
return 3;
@@ -864,6 +1171,8 @@ copyu(Prog *p, Adr *v, Adr *s)
return 2;
if(REGARG >= 0 && v->type == (uchar)REGARG)
return 2;
+ if(v->type == p->from.type)
+ return 2;
if(s != A) {
if(copysub(&p->to, v, s, 1))
@@ -907,13 +1216,22 @@ int
copyau(Adr *a, Adr *v)
{
- if(copyas(a, v))
+ if(copyas(a, v)) {
+ if(debug['P'] && debug['v'])
+ print("\tcopyau: copyas returned 1\n");
return 1;
+ }
if(regtyp(v)) {
- if(a->type-D_INDIR == v->type)
+ if(a->type-D_INDIR == v->type) {
+ if(debug['P'] && debug['v'])
+ print("\tcopyau: found indir use - return 1\n");
return 1;
- if(a->index == v->type)
+ }
+ if(a->index == v->type) {
+ if(debug['P'] && debug['v'])
+ print("\tcopyau: found index use - return 1\n");
return 1;
+ }
}
return 0;
}
@@ -990,7 +1308,7 @@ loop:
if(p->from.node == p0->from.node)
if(p->from.offset == p0->from.offset)
if(p->from.scale == p0->from.scale)
- if(p->from.dval == p0->from.dval)
+ if(p->from.u.vval == p0->from.u.vval)
if(p->from.index == p0->from.index) {
excise(r);
goto loop;