summaryrefslogtreecommitdiff
path: root/src/cmd/5g
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/5g')
-rw-r--r--src/cmd/5g/Makefile25
-rw-r--r--src/cmd/5g/cgen.c232
-rw-r--r--src/cmd/5g/cgen64.c6
-rw-r--r--src/cmd/5g/galign.c1
-rw-r--r--src/cmd/5g/gg.h6
-rw-r--r--src/cmd/5g/ggen.c87
-rw-r--r--src/cmd/5g/gobj.c6
-rw-r--r--src/cmd/5g/gsubr.c221
-rw-r--r--src/cmd/5g/list.c34
-rw-r--r--src/cmd/5g/opt.h2
-rw-r--r--src/cmd/5g/peep.c1511
-rw-r--r--src/cmd/5g/reg.c1329
12 files changed, 3245 insertions, 215 deletions
diff --git a/src/cmd/5g/Makefile b/src/cmd/5g/Makefile
index 123af19cd..6873fbc68 100644
--- a/src/cmd/5g/Makefile
+++ b/src/cmd/5g/Makefile
@@ -2,10 +2,10 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
-include ../../Make.conf
+include ../../Make.inc
+O:=$(HOST_O)
-TARG=\
- 5g
+TARG=5g
HFILES=\
../gc/go.h\
@@ -21,18 +21,15 @@ OFILES=\
ggen.$O\
gsubr.$O\
cgen.$O\
- cgen64.$O
+ cgen64.$O\
+ cplx.$O\
+ reg.$O\
+ peep.$O\
LIB=\
- ../gc/gc.a$O
+ ../gc/gc.a\
-$(TARG): $(OFILES) $(LIB)
- $(LD) -o $(TARG) -L"$(GOROOT)"/lib $(OFILES) $(LIB) -lbio -l9 -lm
+include ../../Make.ccmd
-$(OFILES): $(HFILES)
-
-clean:
- rm -f *.o $(TARG) *.5 enam.c 5.out a.out
-
-install: $(TARG)
- cp $(TARG) "$(GOBIN)"/$(TARG)
+%.$O: ../gc/%.c
+ $(HOST_CC) $(HOST_CFLAGS) -c -I. -o $@ ../gc/$*.c
diff --git a/src/cmd/5g/cgen.c b/src/cmd/5g/cgen.c
index 8072c3ceb..1328f4be6 100644
--- a/src/cmd/5g/cgen.c
+++ b/src/cmd/5g/cgen.c
@@ -4,33 +4,6 @@
#include "gg.h"
-void
-mgen(Node *n, Node *n1, Node *rg)
-{
- n1->ostk = 0;
- n1->op = OEMPTY;
-
- if(n->addable) {
- *n1 = *n;
- n1->ostk = 0;
- if(n1->op == OREGISTER || n1->op == OINDREG)
- reg[n->val.u.reg]++;
- return;
- }
- if(n->type->width > widthptr)
- tempname(n1, n->type);
- else
- regalloc(n1, n->type, rg);
- cgen(n, n1);
-}
-
-void
-mfree(Node *n)
-{
- if(n->op == OREGISTER)
- regfree(n);
-}
-
/*
* generate:
* res = n;
@@ -55,12 +28,6 @@ cgen(Node *n, Node *res)
if(res == N || res->type == T)
fatal("cgen: res nil");
- // TODO compile complex
- if(n != N && n->type != T && iscomplex[n->type->etype])
- return;
- if(res != N && res->type != T && iscomplex[res->type->etype])
- return;
-
while(n->op == OCONVNOP)
n = n->left;
@@ -80,6 +47,7 @@ cgen(Node *n, Node *res)
goto ret;
}
+
// update addressability for string, slice
// can't do in walk because n->left->addable
// changes if n->left is an escaping local variable.
@@ -96,7 +64,9 @@ cgen(Node *n, Node *res)
// if both are addressable, move
if(n->addable && res->addable) {
- if (is64(n->type) || is64(res->type) || n->op == OREGISTER || res->op == OREGISTER) {
+ if(is64(n->type) || is64(res->type) ||
+ n->op == OREGISTER || res->op == OREGISTER ||
+ iscomplex[n->type->etype] || iscomplex[res->type->etype]) {
gmove(n, res);
} else {
regalloc(&n1, n->type, N);
@@ -126,8 +96,13 @@ cgen(Node *n, Node *res)
return;
}
+ if(complexop(n, res)) {
+ complexgen(n, res);
+ return;
+ }
+
// if n is sudoaddable generate addr and move
- if (!is64(n->type) && !is64(res->type)) {
+ if (!is64(n->type) && !is64(res->type) && !iscomplex[n->type->etype] && !iscomplex[res->type->etype]) {
a = optoas(OAS, n->type);
if(sudoaddable(a, n, &addr, &w)) {
if (res->op != OREGISTER) {
@@ -195,8 +170,8 @@ cgen(Node *n, Node *res)
case OREAL:
case OIMAG:
case OCMPLX:
- // TODO compile complex
- return;
+ fatal("unexpected complex");
+ break;
// these call bgen to get a bool value
case OOROR:
@@ -269,10 +244,26 @@ cgen(Node *n, Node *res)
cgen(nl, res);
break;
}
-
- mgen(nl, &n1, res);
- gmove(&n1, res);
- mfree(&n1);
+ if(nl->addable && !is64(nl->type)) {
+ regalloc(&n1, nl->type, res);
+ gmove(nl, &n1);
+ } else {
+ if(n->type->width > widthptr || is64(nl->type) || isfloat[nl->type->etype])
+ tempname(&n1, nl->type);
+ else
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ }
+ if(n->type->width > widthptr || is64(n->type) || isfloat[n->type->etype])
+ tempname(&n2, n->type);
+ else
+ regalloc(&n2, n->type, N);
+ gmove(&n1, &n2);
+ gmove(&n2, res);
+ if(n1.op == OREGISTER)
+ regfree(&n1);
+ if(n2.op == OREGISTER)
+ regfree(&n2);
break;
case ODOT:
@@ -461,6 +452,41 @@ ret:
}
/*
+ * generate array index into res.
+ * n might be any size; res is 32-bit.
+ * returns Prog* to patch to panic call.
+ */
+Prog*
+cgenindex(Node *n, Node *res)
+{
+ Node tmp, lo, hi, zero, n1, n2;
+
+ if(!is64(n->type)) {
+ cgen(n, res);
+ return nil;
+ }
+
+ tempname(&tmp, types[TINT64]);
+ cgen(n, &tmp);
+ split64(&tmp, &lo, &hi);
+ gmove(&lo, res);
+ if(debug['B']) {
+ splitclean();
+ return nil;
+ }
+ regalloc(&n1, types[TINT32], N);
+ regalloc(&n2, types[TINT32], N);
+ nodconst(&zero, types[TINT32], 0);
+ gmove(&hi, &n1);
+ gmove(&zero, &n2);
+ gcmp(ACMP, &n1, &n2);
+ regfree(&n2);
+ regfree(&n1);
+ splitclean();
+ return gbranch(ABNE, T);
+}
+
+/*
* generate:
* res = &n;
*/
@@ -469,10 +495,10 @@ agen(Node *n, Node *res)
{
Node *nl, *nr;
Node n1, n2, n3, n4, n5, tmp;
- Prog *p1;
+ Prog *p1, *p2;
uint32 w;
uint64 v;
- Type *t;
+ int r;
if(debug['g']) {
dump("\nagen-res", res);
@@ -504,7 +530,22 @@ agen(Node *n, Node *res)
break;
case OCALLMETH:
- cgen_callmeth(n, 0);
+ case OCALLFUNC:
+ // Release res so that it is available for cgen_call.
+ // Pick it up again after the call.
+ r = -1;
+ if(n->ullman >= UINF) {
+ if(res->op == OREGISTER || res->op == OINDREG) {
+ r = res->val.u.reg;
+ reg[r]--;
+ }
+ }
+ if(n->op == OCALLMETH)
+ cgen_callmeth(n, 0);
+ else
+ cgen_call(n, 0);
+ if(r >= 0)
+ reg[r]++;
cgen_aret(n, res);
break;
@@ -513,36 +554,36 @@ agen(Node *n, Node *res)
cgen_aret(n, res);
break;
- case OCALLFUNC:
- cgen_call(n, 0);
- cgen_aret(n, res);
- break;
-
case OINDEX:
- // TODO(rsc): uint64 indices
+ p2 = nil; // to be patched to panicindex.
w = n->type->width;
if(nr->addable) {
- agenr(nl, &n3, res);
- if(!isconst(nr, CTINT)) {
+ if(!isconst(nr, CTINT))
tempname(&tmp, types[TINT32]);
- cgen(nr, &tmp);
+ if(!isconst(nl, CTSTR))
+ agenr(nl, &n3, res);
+ if(!isconst(nr, CTINT)) {
+ p2 = cgenindex(nr, &tmp);
regalloc(&n1, tmp.type, N);
gmove(&tmp, &n1);
}
} else if(nl->addable) {
if(!isconst(nr, CTINT)) {
tempname(&tmp, types[TINT32]);
- cgen(nr, &tmp);
+ p2 = cgenindex(nr, &tmp);
regalloc(&n1, tmp.type, N);
gmove(&tmp, &n1);
}
- regalloc(&n3, types[tptr], res);
- agen(nl, &n3);
+ if(!isconst(nl, CTSTR)) {
+ regalloc(&n3, types[tptr], res);
+ agen(nl, &n3);
+ }
} else {
tempname(&tmp, types[TINT32]);
- cgen(nr, &tmp);
+ p2 = cgenindex(nr, &tmp);
nr = &tmp;
- agenr(nl, &n3, res);
+ if(!isconst(nl, CTSTR))
+ agenr(nl, &n3, res);
regalloc(&n1, tmp.type, N);
gins(optoas(OAS, tmp.type), &tmp, &n1);
}
@@ -556,9 +597,10 @@ agen(Node *n, Node *res)
// constant index
if(isconst(nr, CTINT)) {
+ if(isconst(nl, CTSTR))
+ fatal("constant string constant index");
v = mpgetfix(nr->val.u.xval);
- if(isslice(nl->type)) {
-
+ if(isslice(nl->type) || nl->type->etype == TSTRING) {
if(!debug['B'] && !n->etype) {
n1 = n3;
n1.op = OINDREG;
@@ -582,13 +624,6 @@ agen(Node *n, Node *res)
n1.type = types[tptr];
n1.xoffset = Array_array;
gmove(&n1, &n3);
- } else
- if(!debug['B'] && !n->etype) {
- if(v < 0)
- yyerror("out of bounds on array");
- else
- if(v >= nl->type->bound)
- yyerror("out of bounds on array");
}
nodconst(&n2, types[tptr], v*w);
@@ -602,19 +637,17 @@ agen(Node *n, Node *res)
break;
}
- // type of the index
- t = types[TUINT32];
- if(issigned[n1.type->etype])
- t = types[TINT32];
-
- regalloc(&n2, t, &n1); // i
+ regalloc(&n2, types[TINT32], &n1); // i
gmove(&n1, &n2);
regfree(&n1);
if(!debug['B'] && !n->etype) {
// check bounds
regalloc(&n4, types[TUINT32], N);
- if(isslice(nl->type)) {
+ if(isconst(nl, CTSTR)) {
+ nodconst(&n1, types[TUINT32], nl->val.u.sval->len);
+ gmove(&n1, &n4);
+ } else if(isslice(nl->type) || nl->type->etype == TSTRING) {
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
@@ -627,11 +660,18 @@ agen(Node *n, Node *res)
gcmp(optoas(OCMP, types[TUINT32]), &n2, &n4);
regfree(&n4);
p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(p2)
+ patch(p2, pc);
ginscall(panicindex, 0);
patch(p1, pc);
}
-
- if(isslice(nl->type)) {
+
+ if(isconst(nl, CTSTR)) {
+ regalloc(&n3, types[tptr], res);
+ p1 = gins(AMOVW, N, &n3);
+ datastring(nl->val.u.sval->s, nl->val.u.sval->len, &p1->from);
+ p1->from.type = D_CONST;
+ } else if(isslice(nl->type) || nl->type->etype == TSTRING) {
n1 = n3;
n1.op = OINDREG;
n1.type = types[tptr];
@@ -653,10 +693,10 @@ agen(Node *n, Node *res)
else if(w == 8)
gshift(AADD, &n2, SHIFT_LL, 3, &n3);
} else {
- regalloc(&n4, t, N);
- nodconst(&n1, t, w);
+ regalloc(&n4, types[TUINT32], N);
+ nodconst(&n1, types[TUINT32], w);
gmove(&n1, &n4);
- gins(optoas(OMUL, t), &n4, &n2);
+ gins(optoas(OMUL, types[TUINT32]), &n4, &n2);
gins(optoas(OADD, types[tptr]), &n2, &n3);
regfree(&n4);
gmove(&n3, res);
@@ -769,12 +809,8 @@ igen(Node *n, Node *a, Node *res)
void
agenr(Node *n, Node *a, Node *res)
{
- Node n1;
-
- tempname(&n1, types[tptr]);
- agen(n, &n1);
regalloc(a, types[tptr], res);
- gmove(&n1, a);
+ agen(n, a);
}
/*
@@ -796,15 +832,12 @@ bgen(Node *n, int true, Prog *to)
if(n == N)
n = nodbool(1);
+ if(n->ninit != nil)
+ genlist(n->ninit);
+
nl = n->left;
nr = n->right;
- // TODO compile complex
- if(nl != N && nl->type != T && iscomplex[nl->type->etype])
- return;
- if(nr != N && nr->type != T && iscomplex[nr->type->etype])
- return;
-
if(n->type == T) {
convlit(&n, types[TBOOL]);
if(n->type == T)
@@ -925,6 +958,7 @@ bgen(Node *n, int true, Prog *to)
goto ret;
}
a = brcom(a);
+ true = !true;
}
// make simplest on right
@@ -985,6 +1019,11 @@ bgen(Node *n, int true, Prog *to)
break;
}
+ if(iscomplex[nl->type->etype]) {
+ complexbool(a, nl, nr, true, to);
+ break;
+ }
+
if(is64(nr->type)) {
if(!nl->addable) {
tempname(&n1, nl->type);
@@ -1031,7 +1070,16 @@ bgen(Node *n, int true, Prog *to)
cgen(nr, &n2);
gcmp(optoas(OCMP, nr->type), &n1, &n2);
- patch(gbranch(a, nr->type), to);
+ if(isfloat[nl->type->etype]) {
+ p1 = gbranch(ABVS, nr->type);
+ patch(gbranch(a, nr->type), to);
+ if(n->op == ONE)
+ patch(p1, to);
+ else
+ patch(p1, pc);
+ } else {
+ patch(gbranch(a, nr->type), to);
+ }
regfree(&n1);
regfree(&n2);
@@ -1088,7 +1136,7 @@ stkof(Node *n)
t = structfirst(&flist, getoutarg(t));
if(t != T)
- return t->width;
+ return t->width + 4; // correct for LR
break;
}
diff --git a/src/cmd/5g/cgen64.c b/src/cmd/5g/cgen64.c
index a22f4a548..716ec5ed5 100644
--- a/src/cmd/5g/cgen64.c
+++ b/src/cmd/5g/cgen64.c
@@ -439,12 +439,12 @@ olsh_break:
p3 = gbranch(ABLO, T);
// shift == 32
+ p1 = gins(AMOVW, &bh, &al);
+ p1->scond = C_SCOND_EQ;
if(bh.type->etype == TINT32)
p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
else
- p1 = gins(AEOR, &al, &al);
- p1->scond = C_SCOND_EQ;
- p1 = gins(AMOVW, &bh, &al);
+ p1 = gins(AEOR, &ah, &ah);
p1->scond = C_SCOND_EQ;
p4 = gbranch(ABEQ, T);
diff --git a/src/cmd/5g/galign.c b/src/cmd/5g/galign.c
index 76affbf00..9c8760aea 100644
--- a/src/cmd/5g/galign.c
+++ b/src/cmd/5g/galign.c
@@ -25,7 +25,6 @@ Typedef typedefs[] =
void
betypeinit(void)
{
- maxround = 4;
widthptr = 4;
zprog.link = P;
diff --git a/src/cmd/5g/gg.h b/src/cmd/5g/gg.h
index c62efeb6c..603c09fc8 100644
--- a/src/cmd/5g/gg.h
+++ b/src/cmd/5g/gg.h
@@ -33,12 +33,13 @@ struct Addr
struct Prog
{
- short as; // opcode
+ short as; // opcode
uint32 loc; // pc offset in this func
uint32 lineno; // source line that generated this
Addr from; // src address
- Addr to; // dst address
+ Addr to; // dst address
Prog* link; // next instruction in this func
+ void* regp; // points to enclosing Reg struct
char reg; // doubles as width in DATA op
uchar scond;
};
@@ -90,6 +91,7 @@ void ginscall(Node*, int);
* cgen
*/
void agen(Node*, Node*);
+Prog* cgenindex(Node *, Node *);
void igen(Node*, Node*, Node*);
void agenr(Node *n, Node *a, Node *res);
vlong fieldoffset(Type*, Node*);
diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c
index 3243bb863..42a89415d 100644
--- a/src/cmd/5g/ggen.c
+++ b/src/cmd/5g/ggen.c
@@ -61,11 +61,14 @@ compile(Node *fn)
pl = newplist();
pl->name = curfn->nname;
+
+ setlineno(curfn);
nodconst(&nod1, types[TINT32], 0);
ptxt = gins(ATEXT, curfn->nname, &nod1);
afunclit(&ptxt->from);
+ ginit();
genlist(curfn->enter);
pret = nil;
@@ -78,6 +81,7 @@ compile(Node *fn)
}
genlist(curfn->nbody);
+ gclean();
checklabels();
if(nerrors != 0)
goto ret;
@@ -87,29 +91,32 @@ compile(Node *fn)
if(pret)
patch(pret, pc);
+ ginit();
+ if(hasdefer)
+ ginscall(deferreturn, 0);
if(curfn->exit)
genlist(curfn->exit);
+ gclean();
if(nerrors != 0)
goto ret;
- if(hasdefer)
- ginscall(deferreturn, 0);
+ if(curfn->endlineno)
+ lineno = curfn->endlineno;
pc->as = ARET; // overwrite AEND
pc->lineno = lineno;
- /* TODO(kaib): Add back register optimizations
- if(!debug['N'] || debug['R'] || debug['P'])
+ if(!debug['N'] || debug['R'] || debug['P']) {
regopt(ptxt);
- */
+ }
// fill in argument size
ptxt->to.type = D_CONST2;
ptxt->reg = 0; // flags
- ptxt->to.offset2 = rnd(curfn->type->argwid, maxround);
+ ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
if(stksize > maxstksize)
maxstksize = stksize;
- ptxt->to.offset = rnd(maxstksize+maxarg, maxround);
+ ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
maxstksize = 0;
if(debug['f'])
@@ -203,6 +210,7 @@ ginscall(Node *f, int proc)
void
cgen_callinter(Node *n, Node *res, int proc)
{
+ int r;
Node *i, *f;
Node tmpi, nodo, nodr, nodsp;
@@ -216,6 +224,14 @@ cgen_callinter(Node *n, Node *res, int proc)
i = i->left; // interface
+ // Release res register during genlist and cgen,
+ // which might have their own function calls.
+ r = -1;
+ if(res != N && (res->op == OREGISTER || res->op == OINDREG)) {
+ r = res->val.u.reg;
+ reg[r]--;
+ }
+
if(!i->addable) {
tempname(&tmpi, i->type);
cgen(i, &tmpi);
@@ -223,6 +239,8 @@ cgen_callinter(Node *n, Node *res, int proc)
}
genlist(n->list); // args
+ if(r >= 0)
+ reg[r]++;
regalloc(&nodr, types[tptr], res);
regalloc(&nodo, types[tptr], &nodr);
@@ -427,10 +445,10 @@ cgen_asop(Node *n)
case OOR:
a = optoas(n->etype, nl->type);
if(nl->addable) {
- regalloc(&n2, nl->type, N);
regalloc(&n3, nr->type, N);
- cgen(nl, &n2);
cgen(nr, &n3);
+ regalloc(&n2, nl->type, N);
+ cgen(nl, &n2);
gins(a, &n3, &n2);
cgen(&n2, nl);
regfree(&n2);
@@ -439,13 +457,14 @@ cgen_asop(Node *n)
}
if(nr->ullman < UINF)
if(sudoaddable(a, nl, &addr, &w)) {
+ w = optoas(OAS, nl->type);
regalloc(&n2, nl->type, N);
- regalloc(&n3, nr->type, N);
- p1 = gins(AMOVW, N, &n2);
+ p1 = gins(w, N, &n2);
p1->from = addr;
+ regalloc(&n3, nr->type, N);
cgen(nr, &n3);
gins(a, &n3, &n2);
- p1 = gins(AMOVW, &n2, N);
+ p1 = gins(w, &n2, N);
p1->to = addr;
regfree(&n2);
regfree(&n3);
@@ -544,7 +563,7 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)
cgen(nl, &n1);
sc = mpgetfix(nr->val.u.xval);
if(sc == 0) {
- return;
+ // nothing to do
} else if(sc >= nl->type->width*8) {
if(op == ORSH && issigned[nl->type->etype])
gshift(AMOVW, &n1, SHIFT_AR, w, &n1);
@@ -682,6 +701,29 @@ regcmp(const void *va, const void *vb)
static Prog* throwpc;
+// We're only going to bother inlining if we can
+// convert all the arguments to 32 bits safely. Can we?
+static int
+fix64(NodeList *nn, int n)
+{
+ NodeList *l;
+ Node *r;
+ int i;
+
+ l = nn;
+ for(i=0; i<n; i++) {
+ r = l->n->right;
+ if(is64(r->type) && !smallintconst(r)) {
+ if(r->op == OCONV)
+ r = r->left;
+ if(is64(r->type))
+ return 0;
+ }
+ l = l->next;
+ }
+ return 1;
+}
+
void
getargs(NodeList *nn, Node *reg, int n)
{
@@ -710,7 +752,7 @@ getargs(NodeList *nn, Node *reg, int n)
void
cmpandthrow(Node *nl, Node *nr)
{
- vlong cl, cr;
+ vlong cl;
Prog *p1;
int op;
Node *c, n1, n2;
@@ -720,17 +762,8 @@ cmpandthrow(Node *nl, Node *nr)
cl = mpgetfix(nl->val.u.xval);
if(cl == 0)
return;
- if(smallintconst(nr)) {
- cr = mpgetfix(nr->val.u.xval);
- if(cl > cr) {
- if(throwpc == nil) {
- throwpc = pc;
- ginscall(panicslice, 0);
- } else
- patch(gbranch(AB, T), throwpc);
- }
+ if(smallintconst(nr))
return;
- }
// put the constant on the right
op = brrev(op);
@@ -794,6 +827,8 @@ cgen_inline(Node *n, Node *res)
goto no;
if(!n->left->addable)
goto no;
+ if(n->left->sym == S)
+ goto no;
if(n->left->sym->pkg != runtimepkg)
goto no;
if(strcmp(n->left->sym->name, "slicearray") == 0)
@@ -811,6 +846,8 @@ cgen_inline(Node *n, Node *res)
slicearray:
if(!sleasy(res))
goto no;
+ if(!fix64(n->list, 5))
+ goto no;
getargs(n->list, nodes, 5);
// if(hb[3] > nel[1]) goto throw
@@ -902,6 +939,8 @@ slicearray:
return 1;
sliceslice:
+ if(!fix64(n->list, narg))
+ goto no;
ntemp.op = OXXX;
if(!sleasy(n->list->n->right)) {
Node *n0;
diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c
index c4564ed66..bf59534b9 100644
--- a/src/cmd/5g/gobj.c
+++ b/src/cmd/5g/gobj.c
@@ -633,10 +633,10 @@ dsymptr(Sym *s, int off, Sym *x, int xoff)
void
-genembedtramp(Type *rcvr, Type *method, Sym *newnam)
+genembedtramp(Type *rcvr, Type *method, Sym *newnam, int iface)
{
// TODO(kaib): re-implement genembedtramp
- genwrapper(rcvr, method, newnam);
+ genwrapper(rcvr, method, newnam, iface);
/*
Sym *e;
int c, d, o;
@@ -705,7 +705,7 @@ out:
p->to.type = D_OREG;
p->to.reg = NREG;
p->to.name = D_EXTERN;
- p->to.sym = methodsym(method->sym, ptrto(f->type));
+ p->to.sym = methodsym(method->sym, ptrto(f->type), 0);
//print("4. %P\n", p);
pc->as = ARET; // overwrite AEND
diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c
index 700602c35..133a21b3e 100644
--- a/src/cmd/5g/gsubr.c
+++ b/src/cmd/5g/gsubr.c
@@ -197,9 +197,50 @@ afunclit(Addr *a)
}
}
+static int resvd[] =
+{
+ 9, // reserved for m
+ 10, // reserved for g
+};
+
+void
+ginit(void)
+{
+ int i;
+
+ for(i=0; i<nelem(reg); i++)
+ reg[i] = 0;
+ for(i=0; i<nelem(resvd); i++)
+ reg[resvd[i]]++;
+}
+
+void
+gclean(void)
+{
+ int i;
+
+ for(i=0; i<nelem(resvd); i++)
+ reg[resvd[i]]--;
+
+ for(i=0; i<nelem(reg); i++)
+ if(reg[i])
+ yyerror("reg %R left allocated\n", i);
+}
+
int32
anyregalloc(void)
{
+ int i, j;
+
+ for(i=0; i<nelem(reg); i++) {
+ if(reg[i] == 0)
+ goto ok;
+ for(j=0; j<nelem(resvd); j++)
+ if(resvd[j] == i)
+ goto ok;
+ return 1;
+ ok:;
+ }
return 0;
}
@@ -264,6 +305,11 @@ regalloc(Node *n, Type *t, Node *o)
goto out;
yyerror("out of floating point registers");
goto err;
+
+ case TCOMPLEX64:
+ case TCOMPLEX128:
+ tempname(n, t);
+ return;
}
yyerror("regalloc: unknown type %T", t);
@@ -293,6 +339,8 @@ regfree(Node *n)
print("regalloc fix %d float %d\n", fixfree, floatfree);
}
+ if(n->op == ONAME && iscomplex[n->type->etype])
+ return;
if(n->op != OREGISTER && n->op != OINDREG)
fatal("regfree: not a register");
i = n->val.u.reg;
@@ -498,6 +546,11 @@ gmove(Node *f, Node *t)
tt = simsimtype(t->type);
cvt = t->type;
+ if(iscomplex[ft] || iscomplex[tt]) {
+ complexmove(f, t);
+ return;
+ }
+
// cannot have two memory operands;
// except 64-bit, which always copies via registers anyway.
if(!is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
@@ -715,56 +768,109 @@ gmove(Node *f, Node *t)
* float to integer
*/
case CASE(TFLOAT32, TINT8):
- case CASE(TFLOAT32, TINT16):
- case CASE(TFLOAT32, TINT32):
case CASE(TFLOAT32, TUINT8):
+ case CASE(TFLOAT32, TINT16):
case CASE(TFLOAT32, TUINT16):
+ case CASE(TFLOAT32, TINT32):
case CASE(TFLOAT32, TUINT32):
- fa = AMOVF;
- a = AMOVFW;
- ta = AMOVW;
- goto fltconv;
+// case CASE(TFLOAT32, TUINT64):
case CASE(TFLOAT64, TINT8):
- case CASE(TFLOAT64, TINT16):
- case CASE(TFLOAT64, TINT32):
case CASE(TFLOAT64, TUINT8):
+ case CASE(TFLOAT64, TINT16):
case CASE(TFLOAT64, TUINT16):
+ case CASE(TFLOAT64, TINT32):
case CASE(TFLOAT64, TUINT32):
- fa = AMOVD;
- a = AMOVDW;
+// case CASE(TFLOAT64, TUINT64):
+ fa = AMOVF;
+ a = AMOVFW;
+ if(ft == TFLOAT64) {
+ fa = AMOVD;
+ a = AMOVDW;
+ }
ta = AMOVW;
- goto fltconv;
+ switch(tt) {
+ case TINT8:
+ ta = AMOVB;
+ break;
+ case TUINT8:
+ ta = AMOVBU;
+ break;
+ case TINT16:
+ ta = AMOVH;
+ break;
+ case TUINT16:
+ ta = AMOVHU;
+ break;
+ }
- case CASE(TFLOAT32, TUINT64):
- case CASE(TFLOAT64, TUINT64):
- fatal("gmove TFLOAT, UINT64 not implemented");
+ regalloc(&r1, types[ft], f);
+ regalloc(&r2, types[tt], t);
+ gins(fa, f, &r1); // load to fpu
+ p1 = gins(a, &r1, &r1); // convert to w
+ switch(tt) {
+ case TUINT8:
+ case TUINT16:
+ case TUINT32:
+ p1->scond |= C_UBIT;
+ }
+ gins(AMOVW, &r1, &r2); // copy to cpu
+ gins(ta, &r2, t); // store
+ regfree(&r1);
+ regfree(&r2);
return;
/*
* integer to float
*/
case CASE(TINT8, TFLOAT32):
- case CASE(TINT16, TFLOAT32):
- case CASE(TINT32, TFLOAT32):
case CASE(TUINT8, TFLOAT32):
+ case CASE(TINT16, TFLOAT32):
case CASE(TUINT16, TFLOAT32):
+ case CASE(TINT32, TFLOAT32):
case CASE(TUINT32, TFLOAT32):
- fa = AMOVW;
- a = AMOVWF;
- ta = AMOVF;
- goto fltconv;
-
case CASE(TINT8, TFLOAT64):
- case CASE(TINT16, TFLOAT64):
- case CASE(TINT32, TFLOAT64):
case CASE(TUINT8, TFLOAT64):
+ case CASE(TINT16, TFLOAT64):
case CASE(TUINT16, TFLOAT64):
+ case CASE(TINT32, TFLOAT64):
case CASE(TUINT32, TFLOAT64):
fa = AMOVW;
- a = AMOVWD;
- ta = AMOVD;
- goto fltconv;
+ switch(ft) {
+ case TINT8:
+ fa = AMOVB;
+ break;
+ case TUINT8:
+ fa = AMOVBU;
+ break;
+ case TINT16:
+ fa = AMOVH;
+ break;
+ case TUINT16:
+ fa = AMOVHU;
+ break;
+ }
+ a = AMOVWF;
+ ta = AMOVF;
+ if(tt == TFLOAT64) {
+ a = AMOVWD;
+ ta = AMOVD;
+ }
+ regalloc(&r1, types[ft], f);
+ regalloc(&r2, types[tt], t);
+ gins(fa, f, &r1); // load to cpu
+ gins(AMOVW, &r1, &r2); // copy to fpu
+ p1 = gins(a, &r2, &r2); // convert
+ switch(ft) {
+ case TUINT8:
+ case TUINT16:
+ case TUINT32:
+ p1->scond |= C_UBIT;
+ }
+ gins(ta, &r2, t); // store
+ regfree(&r1);
+ regfree(&r2);
+ return;
case CASE(TUINT64, TFLOAT32):
case CASE(TUINT64, TFLOAT64):
@@ -831,16 +937,6 @@ trunc64:
splitclean();
return;
-fltconv:
- regalloc(&r1, types[ft], f);
- regalloc(&r2, types[tt], t);
- gins(fa, f, &r1);
- gins(a, &r1, &r2);
- gins(ta, &r2, t);
- regfree(&r1);
- regfree(&r2);
- return;
-
fatal:
// should not happen
fatal("gmove %N -> %N", f, t);
@@ -951,7 +1047,7 @@ gshift(int as, Node *lhs, int32 stype, int32 sval, Node *rhs)
{
Prog *p;
- if (sval <= 0 || sval > 32)
+ if(sval <= 0 || sval > 32)
fatal("bad shift value: %d", sval);
sval = sval&0x1f;
@@ -983,7 +1079,7 @@ checkoffset(Addr *a, int canemitcode)
if(a->offset < unmappedzero)
return;
if(!canemitcode)
- fatal("checkoffset %#llx, cannot emit code", a->offset);
+ fatal("checkoffset %#x, cannot emit code", a->offset);
// cannot rely on unmapped nil page at 0 to catch
// reference with large offset. instead, emit explicit
@@ -1014,7 +1110,7 @@ naddr(Node *n, Addr *a, int canemitcode)
break;
case OREGISTER:
- if (n->val.u.reg <= REGALLOC_RMAX) {
+ if(n->val.u.reg <= REGALLOC_RMAX) {
a->type = D_REG;
a->reg = n->val.u.reg;
} else {
@@ -1314,15 +1410,21 @@ optoas(int op, Type *t)
case CASE(OAS, TBOOL):
case CASE(OAS, TINT8):
- case CASE(OAS, TUINT8):
a = AMOVB;
break;
+ case CASE(OAS, TUINT8):
+ a = AMOVBU;
+ break;
+
case CASE(OAS, TINT16):
- case CASE(OAS, TUINT16):
a = AMOVH;
break;
+ case CASE(OAS, TUINT16):
+ a = AMOVHU;
+ break;
+
case CASE(OAS, TINT32):
case CASE(OAS, TUINT32):
case CASE(OAS, TPTR32):
@@ -1554,7 +1656,7 @@ sudoaddable(int as, Node *n, Addr *a, int *w)
int64 v;
Node n1, n2, n3, n4, *nn, *l, *r;
Node *reg, *reg1;
- Prog *p1;
+ Prog *p1, *p2;
Type *t;
if(n->type == T)
@@ -1579,6 +1681,8 @@ sudoaddable(int as, Node *n, Addr *a, int *w)
goto odot;
case OINDEX:
+ if(n->left->type->etype == TSTRING)
+ return 0;
cleani += 2;
reg = &clean[cleani-1];
reg1 = &clean[cleani-2];
@@ -1692,8 +1796,8 @@ oindex:
if(issigned[r->type->etype])
t = types[TINT32];
regalloc(reg1, t, N);
- regalloc(&n3, r->type, reg1);
- cgen(r, &n3);
+ regalloc(&n3, types[TINT32], reg1);
+ p2 = cgenindex(r, &n3);
gmove(&n3, reg1);
regfree(&n3);
@@ -1734,6 +1838,8 @@ oindex:
gcmp(optoas(OCMP, types[TUINT32]), reg1, &n3);
regfree(&n3);
p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(p2)
+ patch(p2, pc);
ginscall(panicindex, 0);
patch(p1, pc);
}
@@ -1746,14 +1852,20 @@ oindex:
gmove(&n2, reg);
}
- if (*w == 1)
+ switch(*w) {
+ case 1:
gins(AADD, reg1, reg);
- else if(*w == 2)
+ break;
+ case 2:
gshift(AADD, reg1, SHIFT_LL, 1, reg);
- else if(*w == 4)
+ break;
+ case 4:
gshift(AADD, reg1, SHIFT_LL, 2, reg);
- else if(*w == 8)
+ break;
+ case 8:
gshift(AADD, reg1, SHIFT_LL, 3, reg);
+ break;
+ }
naddr(reg1, a, 1);
a->type = D_OREG;
@@ -1799,19 +1911,6 @@ oindex_const:
n1.type = types[tptr];
n1.xoffset = Array_array;
gmove(&n1, reg);
-
- } else
- if(!debug['B']) {
- if(v < 0) {
- yyerror("out of bounds on array");
- } else
- if(o & OPtrto) {
- if(v >= l->type->type->bound)
- yyerror("out of bounds on array");
- } else
- if(v >= l->type->bound) {
- yyerror("out of bounds on array");
- }
}
n2 = *reg;
diff --git a/src/cmd/5g/list.c b/src/cmd/5g/list.c
index 19027829c..ce74d6478 100644
--- a/src/cmd/5g/list.c
+++ b/src/cmd/5g/list.c
@@ -49,28 +49,28 @@ listinit(void)
int
Pconv(Fmt *fp)
{
- char str[STRINGSZ];
+ char str[STRINGSZ], str1[STRINGSZ];
Prog *p;
p = va_arg(fp->args, Prog*);
sconsize = 8;
switch(p->as) {
default:
+ snprint(str1, sizeof(str1), "%A%C", p->as, p->scond);
if(p->reg == NREG)
- snprint(str, sizeof(str), "%.4ld (%L) %-7A%C %D,%D",
- p->loc, p->lineno, p->as, p->scond, &p->from, &p->to);
- else if (p->from.type != D_FREG)
- snprint(str, sizeof(str), "%.4ld (%L) %-7A%C %D,R%d,%D",
- p->loc, p->lineno, p->as, p->scond, &p->from, p->reg, &p->to);
+ snprint(str, sizeof(str), "%.4d (%L) %-7s %D,%D",
+ p->loc, p->lineno, str1, &p->from, &p->to);
else
- snprint(str, sizeof(str), "%.4ld (%L) %-7A%C %D,F%d,%D",
+ if (p->from.type != D_FREG) {
+ snprint(str, sizeof(str), "%.4d (%L) %-7s %D,R%d,%D",
+ p->loc, p->lineno, str1, &p->from, p->reg, &p->to);
+ } else
+ snprint(str, sizeof(str), "%.4d (%L) %-7A%C %D,F%d,%D",
p->loc, p->lineno, p->as, p->scond, &p->from, p->reg, &p->to);
break;
case ADATA:
- case AINIT:
- case ADYNT:
- snprint(str, sizeof(str), "%.4ld (%L) %-7A %D/%d,%D",
+ snprint(str, sizeof(str), "%.4d (%L) %-7A %D/%d,%D",
p->loc, p->lineno, p->as, &p->from, p->reg, &p->to);
break;
}
@@ -154,10 +154,16 @@ Dconv(Fmt *fp)
break;
case D_BRANCH:
- if(a->sym != S)
- sprint(str, "%s+%d(APC)", a->sym->name, a->offset);
- else
- sprint(str, "%d(APC)", a->offset);
+ if(a->branch == P || a->branch->loc == 0) {
+ if(a->sym != S)
+ sprint(str, "%s+%d(APC)", a->sym->name, a->offset);
+ else
+ sprint(str, "%d(APC)", a->offset);
+ } else
+ if(a->sym != S)
+ sprint(str, "%s+%d(APC)", a->sym->name, a->branch->loc);
+ else
+ sprint(str, "%d(APC)", a->branch->loc);
break;
case D_FCONST:
diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h
index 1b0366290..9a4e17571 100644
--- a/src/cmd/5g/opt.h
+++ b/src/cmd/5g/opt.h
@@ -128,7 +128,7 @@ Reg* rega(void);
int rcmp(const void*, const void*);
void regopt(Prog*);
void addmove(Reg*, int, int, int);
-Bits mkvar(Reg*, Adr*);
+Bits mkvar(Reg *r, Adr *a, int);
void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits);
diff --git a/src/cmd/5g/peep.c b/src/cmd/5g/peep.c
new file mode 100644
index 000000000..32333e8a9
--- /dev/null
+++ b/src/cmd/5g/peep.c
@@ -0,0 +1,1511 @@
+// Inferno utils/5c/peep.c
+// http://code.google.com/p/inferno-os/source/browse/utils/5g/peep.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#include "gg.h"
+#include "opt.h"
+
+int xtramodes(Reg*, Adr*);
+int shiftprop(Reg *r);
+void constprop(Adr *c1, Adr *v1, Reg *r);
+void predicate(void);
+int copyau1(Prog *p, Adr *v);
+int isdconst(Addr *a);
+
+void
+peep(void)
+{
+ Reg *r, *r1, *r2;
+ Prog *p, *p1;
+ int t;
+/*
+ * complete R structure
+ */
+ t = 0;
+ for(r=firstr; r!=R; r=r1) {
+ r1 = r->link;
+ if(r1 == R)
+ break;
+ p = r->prog->link;
+ while(p != r1->prog)
+ switch(p->as) {
+ default:
+ r2 = rega();
+ r->link = r2;
+ r2->link = r1;
+
+ r2->prog = p;
+ r2->p1 = r;
+ r->s1 = r2;
+ r2->s1 = r1;
+ r1->p1 = r2;
+
+ r = r2;
+ t++;
+
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ p = p->link;
+ }
+ }
+
+loop1:
+ t = 0;
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case ASLL:
+ case ASRL:
+ case ASRA:
+ /*
+ * elide shift into D_SHIFT operand of subsequent instruction
+ */
+ if(shiftprop(r)) {
+ excise(r);
+ t++;
+ break;
+ }
+ break;
+
+ case AMOVW:
+ case AMOVF:
+ case AMOVD:
+ if(!regtyp(&p->to))
+ break;
+ if(isdconst(&p->from)) {
+ constprop(&p->from, &p->to, r->s1);
+ break;
+ }
+ if(!regtyp(&p->from))
+ break;
+ if(p->from.type != p->to.type)
+ break;
+ if(copyprop(r)) {
+ excise(r);
+ t++;
+ break;
+ }
+ if(subprop(r) && copyprop(r)) {
+ excise(r);
+ t++;
+ break;
+ }
+ }
+ }
+ if(t)
+ goto loop1;
+ /*
+ * look for MOVB x,R; MOVB R,R
+ */
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ default:
+ continue;
+ case AEOR:
+ /*
+ * EOR -1,x,y => MVN x,y
+ */
+ if(isdconst(&p->from) && p->from.offset == -1) {
+ p->as = AMVN;
+ p->from.type = D_REG;
+ if(p->reg != NREG)
+ p->from.reg = p->reg;
+ else
+ p->from.reg = p->to.reg;
+ p->reg = NREG;
+ }
+ continue;
+ case AMOVH:
+ case AMOVHU:
+ case AMOVB:
+ case AMOVBU:
+ if(p->to.type != D_REG)
+ continue;
+ break;
+ }
+ r1 = r->link;
+ if(r1 == R)
+ continue;
+ p1 = r1->prog;
+ if(p1->as != p->as)
+ continue;
+ if(p1->from.type != D_REG || p1->from.reg != p->to.reg)
+ continue;
+ if(p1->to.type != D_REG || p1->to.reg != p->to.reg)
+ continue;
+ excise(r1);
+ }
+
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case AMOVW:
+ case AMOVB:
+ case AMOVBU:
+ if(p->from.type == D_OREG && p->from.offset == 0)
+ xtramodes(r, &p->from);
+ else if(p->to.type == D_OREG && p->to.offset == 0)
+ xtramodes(r, &p->to);
+ else
+ continue;
+ break;
+ case ACMP:
+ /*
+ * elide CMP $0,x if calculation of x can set condition codes
+ */
+ if(isdconst(&p->from) || p->from.offset != 0)
+ continue;
+ r2 = r->s1;
+ if(r2 == R)
+ continue;
+ t = r2->prog->as;
+ switch(t) {
+ default:
+ continue;
+ case ABEQ:
+ case ABNE:
+ case ABMI:
+ case ABPL:
+ break;
+ case ABGE:
+ t = ABPL;
+ break;
+ case ABLT:
+ t = ABMI;
+ break;
+ case ABHI:
+ t = ABNE;
+ break;
+ case ABLS:
+ t = ABEQ;
+ break;
+ }
+ r1 = r;
+ do
+ r1 = uniqp(r1);
+ while (r1 != R && r1->prog->as == ANOP);
+ if(r1 == R)
+ continue;
+ p1 = r1->prog;
+ if(p1->to.type != D_REG)
+ continue;
+ if(p1->to.reg != p->reg)
+ if(!(p1->as == AMOVW && p1->from.type == D_REG && p1->from.reg == p->reg))
+ continue;
+ switch(p1->as) {
+ default:
+ continue;
+ case AMOVW:
+ if(p1->from.type != D_REG)
+ continue;
+ case AAND:
+ case AEOR:
+ case AORR:
+ case ABIC:
+ case AMVN:
+ case ASUB:
+ case ARSB:
+ case AADD:
+ case AADC:
+ case ASBC:
+ case ARSC:
+ break;
+ }
+ p1->scond |= C_SBIT;
+ r2->prog->as = t;
+ excise(r);
+ continue;
+ }
+ }
+
+ predicate();
+}
+
+Reg*
+uniqp(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->p1;
+ if(r1 == R) {
+ r1 = r->p2;
+ if(r1 == R || r1->p2link != R)
+ return R;
+ } else
+ if(r->p2 != R)
+ return R;
+ return r1;
+}
+
+Reg*
+uniqs(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->s1;
+ if(r1 == R) {
+ r1 = r->s2;
+ if(r1 == R)
+ return R;
+ } else
+ if(r->s2 != R)
+ return R;
+ return r1;
+}
+
+int
+regtyp(Adr *a)
+{
+
+ if(a->type == D_REG)
+ return 1;
+ if(a->type == D_FREG)
+ return 1;
+ return 0;
+}
+
+/*
+ * the idea is to substitute
+ * one register for another
+ * from one MOV to another
+ * MOV a, R0
+ * ADD b, R0 / no use of R1
+ * MOV R0, R1
+ * would be converted to
+ * MOV a, R1
+ * ADD b, R1
+ * MOV R1, R0
+ * hopefully, then the former or latter MOV
+ * will be eliminated by copy propagation.
+ */
+int
+subprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+ int t;
+
+ p = r0->prog;
+ v1 = &p->from;
+ if(!regtyp(v1))
+ return 0;
+ v2 = &p->to;
+ if(!regtyp(v2))
+ return 0;
+ for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+ if(uniqs(r) == R)
+ break;
+ p = r->prog;
+ switch(p->as) {
+ case ABL:
+ return 0;
+
+ case ACMP:
+ case ACMN:
+ case AADD:
+ case ASUB:
+ case ARSB:
+ case ASLL:
+ case ASRL:
+ case ASRA:
+ case AORR:
+ case AAND:
+ case AEOR:
+ case AMUL:
+ case ADIV:
+ case ADIVU:
+
+ case ACMPF:
+ case ACMPD:
+ case AADDD:
+ case AADDF:
+ case ASUBD:
+ case ASUBF:
+ case AMULD:
+ case AMULF:
+ case ADIVD:
+ case ADIVF:
+ if(p->to.type == v1->type)
+ if(p->to.reg == v1->reg) {
+ if(p->reg == NREG)
+ p->reg = p->to.reg;
+ goto gotit;
+ }
+ break;
+
+ case AMOVF:
+ case AMOVD:
+ case AMOVW:
+ if(p->to.type == v1->type)
+ if(p->to.reg == v1->reg)
+ goto gotit;
+ break;
+
+ case AMOVM:
+ t = 1<<v2->reg;
+ if((p->from.type == D_CONST && (p->from.offset&t)) ||
+ (p->to.type == D_CONST && (p->to.offset&t)))
+ return 0;
+ break;
+ }
+ if(copyau(&p->from, v2) ||
+ copyau1(p, v2) ||
+ copyau(&p->to, v2))
+ break;
+ if(copysub(&p->from, v1, v2, 0) ||
+ copysub1(p, v1, v2, 0) ||
+ copysub(&p->to, v1, v2, 0))
+ break;
+ }
+ return 0;
+
+gotit:
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P']) {
+ print("gotit: %D->%D\n%P", v1, v2, r->prog);
+ if(p->from.type == v2->type)
+ print(" excise");
+ print("\n");
+ }
+ for(r=uniqs(r); r!=r0; r=uniqs(r)) {
+ p = r->prog;
+ copysub(&p->from, v1, v2, 1);
+ copysub1(p, v1, v2, 1);
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P'])
+ print("%P\n", r->prog);
+ }
+ t = v1->reg;
+ v1->reg = v2->reg;
+ v2->reg = t;
+ if(debug['P'])
+ print("%P last\n", r->prog);
+ return 1;
+}
+
+/*
+ * The idea is to remove redundant copies.
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * use v2 return fail
+ * -----------------
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * set v2 return success
+ */
+int
+copyprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+
+ p = r0->prog;
+ v1 = &p->from;
+ v2 = &p->to;
+ if(copyas(v1, v2))
+ return 1;
+ for(r=firstr; r!=R; r=r->link)
+ r->active = 0;
+ return copy1(v1, v2, r0->s1, 0);
+}
+
+int
+copy1(Adr *v1, Adr *v2, Reg *r, int f)
+{
+ int t;
+ Prog *p;
+
+ if(r->active) {
+ if(debug['P'])
+ print("act set; return 1\n");
+ return 1;
+ }
+ r->active = 1;
+ if(debug['P'])
+ print("copy %D->%D f=%d\n", v1, v2, f);
+ for(; r != R; r = r->s1) {
+ p = r->prog;
+ if(debug['P'])
+ print("%P", p);
+ if(!f && uniqp(r) == R) {
+ f = 1;
+ if(debug['P'])
+ print("; merge; f=%d", f);
+ }
+ t = copyu(p, v2, A);
+ switch(t) {
+ case 2: /* rar, cant split */
+ if(debug['P'])
+ print("; %Drar; return 0\n", v2);
+ return 0;
+
+ case 3: /* set */
+ if(debug['P'])
+ print("; %Dset; return 1\n", v2);
+ return 1;
+
+ case 1: /* used, substitute */
+ case 4: /* use and set */
+ if(f) {
+ if(!debug['P'])
+ return 0;
+ if(t == 4)
+ print("; %Dused+set and f=%d; return 0\n", v2, f);
+ else
+ print("; %Dused and f=%d; return 0\n", v2, f);
+ return 0;
+ }
+ if(copyu(p, v2, v1)) {
+ if(debug['P'])
+ print("; sub fail; return 0\n");
+ return 0;
+ }
+ if(debug['P'])
+ print("; sub%D/%D", v2, v1);
+ if(t == 4) {
+ if(debug['P'])
+ print("; %Dused+set; return 1\n", v2);
+ return 1;
+ }
+ break;
+ }
+ if(!f) {
+ t = copyu(p, v1, A);
+ if(!f && (t == 2 || t == 3 || t == 4)) {
+ f = 1;
+ if(debug['P'])
+ print("; %Dset and !f; f=%d", v1, f);
+ }
+ }
+ if(debug['P'])
+ print("\n");
+ if(r->s2)
+ if(!copy1(v1, v2, r->s2, f))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * The idea is to remove redundant constants.
+ * $c1->v1
+ * ($c1->v2 s/$c1/v1)*
+ * set v1 return
+ * The v1->v2 should be eliminated by copy propagation.
+ */
+void
+constprop(Adr *c1, Adr *v1, Reg *r)
+{
+ Prog *p;
+
+ if(debug['P'])
+ print("constprop %D->%D\n", c1, v1);
+ for(; r != R; r = r->s1) {
+ p = r->prog;
+ if(debug['P'])
+ print("%P", p);
+ if(uniqp(r) == R) {
+ if(debug['P'])
+ print("; merge; return\n");
+ return;
+ }
+ if(p->as == AMOVW && copyas(&p->from, c1)) {
+ if(debug['P'])
+ print("; sub%D/%D", &p->from, v1);
+ p->from = *v1;
+ } else if(copyu(p, v1, A) > 1) {
+ if(debug['P'])
+ print("; %Dset; return\n", v1);
+ return;
+ }
+ if(debug['P'])
+ print("\n");
+ if(r->s2)
+ constprop(c1, v1, r->s2);
+ }
+}
+
+/*
+ * ASLL x,y,w
+ * .. (not use w, not set x y w)
+ * AXXX w,a,b (a != w)
+ * .. (not use w)
+ * (set w)
+ * ----------- changed to
+ * ..
+ * AXXX (x<<y),a,b
+ * ..
+ */
+#define FAIL(msg) { if(debug['P']) print("\t%s; FAILURE\n", msg); return 0; }
+int
+shiftprop(Reg *r)
+{
+ Reg *r1;
+ Prog *p, *p1, *p2;
+ int n, o;
+ Adr a;
+
+ p = r->prog;
+ if(p->to.type != D_REG)
+ FAIL("BOTCH: result not reg");
+ n = p->to.reg;
+ a = zprog.from;
+ if(p->reg != NREG && p->reg != p->to.reg) {
+ a.type = D_REG;
+ a.reg = p->reg;
+ }
+ if(debug['P'])
+ print("shiftprop\n%P", p);
+ r1 = r;
+ for(;;) {
+ /* find first use of shift result; abort if shift operands or result are changed */
+ r1 = uniqs(r1);
+ if(r1 == R)
+ FAIL("branch");
+ if(uniqp(r1) == R)
+ FAIL("merge");
+ p1 = r1->prog;
+ if(debug['P'])
+ print("\n%P", p1);
+ switch(copyu(p1, &p->to, A)) {
+ case 0: /* not used or set */
+ if((p->from.type == D_REG && copyu(p1, &p->from, A) > 1) ||
+ (a.type == D_REG && copyu(p1, &a, A) > 1))
+ FAIL("args modified");
+ continue;
+ case 3: /* set, not used */
+ FAIL("BOTCH: noref");
+ }
+ break;
+ }
+ /* check whether substitution can be done */
+ switch(p1->as) {
+ default:
+ FAIL("non-dpi");
+ case AAND:
+ case AEOR:
+ case AADD:
+ case AADC:
+ case AORR:
+ case ASUB:
+ case ARSB:
+ case ASBC:
+ case ARSC:
+ if(p1->reg == n || (p1->reg == NREG && p1->to.type == D_REG && p1->to.reg == n)) {
+ if(p1->from.type != D_REG)
+ FAIL("can't swap");
+ p1->reg = p1->from.reg;
+ p1->from.reg = n;
+ switch(p1->as) {
+ case ASUB:
+ p1->as = ARSB;
+ break;
+ case ARSB:
+ p1->as = ASUB;
+ break;
+ case ASBC:
+ p1->as = ARSC;
+ break;
+ case ARSC:
+ p1->as = ASBC;
+ break;
+ }
+ if(debug['P'])
+ print("\t=>%P", p1);
+ }
+ case ABIC:
+ case ACMP:
+ case ACMN:
+ if(p1->reg == n)
+ FAIL("can't swap");
+ if(p1->reg == NREG && p1->to.reg == n)
+ FAIL("shift result used twice");
+ case AMVN:
+ if(p1->from.type == D_SHIFT)
+ FAIL("shift result used in shift");
+ if(p1->from.type != D_REG || p1->from.reg != n)
+ FAIL("BOTCH: where is it used?");
+ break;
+ }
+ /* check whether shift result is used subsequently */
+ p2 = p1;
+ if(p1->to.reg != n)
+ for (;;) {
+ r1 = uniqs(r1);
+ if(r1 == R)
+ FAIL("inconclusive");
+ p1 = r1->prog;
+ if(debug['P'])
+ print("\n%P", p1);
+ switch(copyu(p1, &p->to, A)) {
+ case 0: /* not used or set */
+ continue;
+ case 3: /* set, not used */
+ break;
+ default:/* used */
+ FAIL("reused");
+ }
+ break;
+ }
+
+ /* make the substitution */
+ p2->from.type = D_SHIFT;
+ p2->from.reg = NREG;
+ o = p->reg;
+ if(o == NREG)
+ o = p->to.reg;
+
+ switch(p->from.type){
+ case D_CONST:
+ o |= (p->from.offset&0x1f)<<7;
+ break;
+ case D_REG:
+ o |= (1<<4) | (p->from.reg<<8);
+ break;
+ }
+ switch(p->as){
+ case ASLL:
+ o |= 0<<5;
+ break;
+ case ASRL:
+ o |= 1<<5;
+ break;
+ case ASRA:
+ o |= 2<<5;
+ break;
+ }
+ p2->from.offset = o;
+ if(debug['P'])
+ print("\t=>%P\tSUCCEED\n", p2);
+ return 1;
+}
+
+Reg*
+findpre(Reg *r, Adr *v)
+{
+ Reg *r1;
+
+ for(r1=uniqp(r); r1!=R; r=r1,r1=uniqp(r)) {
+ if(uniqs(r1) != r)
+ return R;
+ switch(copyu(r1->prog, v, A)) {
+ case 1: /* used */
+ case 2: /* read-alter-rewrite */
+ return R;
+ case 3: /* set */
+ case 4: /* set and used */
+ return r1;
+ }
+ }
+ return R;
+}
+
+Reg*
+findinc(Reg *r, Reg *r2, Adr *v)
+{
+ Reg *r1;
+ Prog *p;
+
+
+ for(r1=uniqs(r); r1!=R && r1!=r2; r=r1,r1=uniqs(r)) {
+ if(uniqp(r1) != r)
+ return R;
+ switch(copyu(r1->prog, v, A)) {
+ case 0: /* not touched */
+ continue;
+ case 4: /* set and used */
+ p = r1->prog;
+ if(p->as == AADD)
+ if(isdconst(&p->from))
+ if(p->from.offset > -4096 && p->from.offset < 4096)
+ return r1;
+ default:
+ return R;
+ }
+ }
+ return R;
+}
+
+int
+nochange(Reg *r, Reg *r2, Prog *p)
+{
+ Adr a[3];
+ int i, n;
+
+ if(r == r2)
+ return 1;
+ n = 0;
+ if(p->reg != NREG && p->reg != p->to.reg) {
+ a[n].type = D_REG;
+ a[n++].reg = p->reg;
+ }
+ switch(p->from.type) {
+ case D_SHIFT:
+ a[n].type = D_REG;
+ a[n++].reg = p->from.offset&0xf;
+ case D_REG:
+ a[n].type = D_REG;
+ a[n++].reg = p->from.reg;
+ }
+ if(n == 0)
+ return 1;
+ for(; r!=R && r!=r2; r=uniqs(r)) {
+ p = r->prog;
+ for(i=0; i<n; i++)
+ if(copyu(p, &a[i], A) > 1)
+ return 0;
+ }
+ return 1;
+}
+
+int
+findu1(Reg *r, Adr *v)
+{
+ for(; r != R; r = r->s1) {
+ if(r->active)
+ return 0;
+ r->active = 1;
+ switch(copyu(r->prog, v, A)) {
+ case 1: /* used */
+ case 2: /* read-alter-rewrite */
+ case 4: /* set and used */
+ return 1;
+ case 3: /* set */
+ return 0;
+ }
+ if(r->s2)
+ if (findu1(r->s2, v))
+ return 1;
+ }
+ return 0;
+}
+
+int
+finduse(Reg *r, Adr *v)
+{
+ Reg *r1;
+
+ for(r1=firstr; r1!=R; r1=r1->link)
+ r1->active = 0;
+ return findu1(r, v);
+}
+
+int
+xtramodes(Reg *r, Adr *a)
+{
+ Reg *r1, *r2, *r3;
+ Prog *p, *p1;
+ Adr v;
+
+ p = r->prog;
+ if(debug['h'] && p->as == AMOVB && p->from.type == D_OREG) /* byte load */
+ return 0;
+ v = *a;
+ v.type = D_REG;
+ r1 = findpre(r, &v);
+ if(r1 != R) {
+ p1 = r1->prog;
+ if(p1->to.type == D_REG && p1->to.reg == v.reg)
+ switch(p1->as) {
+ case AADD:
+ if(p1->from.type == D_REG ||
+ (p1->from.type == D_SHIFT && (p1->from.offset&(1<<4)) == 0 &&
+ (p->as != AMOVB || (a == &p->from && (p1->from.offset&~0xf) == 0))) ||
+ (p1->from.type == D_CONST &&
+ p1->from.offset > -4096 && p1->from.offset < 4096))
+ if(nochange(uniqs(r1), r, p1)) {
+ if(a != &p->from || v.reg != p->to.reg)
+ if (finduse(r->s1, &v)) {
+ if(p1->reg == NREG || p1->reg == v.reg)
+ /* pre-indexing */
+ p->scond |= C_WBIT;
+ else return 0;
+ }
+ switch (p1->from.type) {
+ case D_REG:
+ /* register offset */
+ a->type = D_SHIFT;
+ a->offset = p1->from.reg;
+ break;
+ case D_SHIFT:
+ /* scaled register offset */
+ a->type = D_SHIFT;
+ case D_CONST:
+ /* immediate offset */
+ a->offset = p1->from.offset;
+ break;
+ }
+ if(p1->reg != NREG)
+ a->reg = p1->reg;
+ excise(r1);
+ return 1;
+ }
+ break;
+ case AMOVW:
+ if(p1->from.type == D_REG)
+ if((r2 = findinc(r1, r, &p1->from)) != R) {
+ for(r3=uniqs(r2); r3->prog->as==ANOP; r3=uniqs(r3))
+ ;
+ if(r3 == r) {
+ /* post-indexing */
+ p1 = r2->prog;
+ a->reg = p1->to.reg;
+ a->offset = p1->from.offset;
+ p->scond |= C_PBIT;
+ if(!finduse(r, &r1->prog->to))
+ excise(r1);
+ excise(r2);
+ return 1;
+ }
+ }
+ break;
+ }
+ }
+ if(a != &p->from || a->reg != p->to.reg)
+ if((r1 = findinc(r, R, &v)) != R) {
+ /* post-indexing */
+ p1 = r1->prog;
+ a->offset = p1->from.offset;
+ p->scond |= C_PBIT;
+ excise(r1);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * return
+ * 1 if v only used (and substitute),
+ * 2 if read-alter-rewrite
+ * 3 if set
+ * 4 if set and used
+ * 0 otherwise (not touched)
+ */
+int
+copyu(Prog *p, Adr *v, Adr *s)
+{
+
+ switch(p->as) {
+
+ default:
+ if(debug['P'])
+ print(" (?)");
+ return 2;
+
+ case AMOVM:
+ if(v->type != D_REG)
+ return 0;
+ if(p->from.type == D_CONST) { /* read reglist, read/rar */
+ if(s != A) {
+ if(p->from.offset&(1<<v->reg))
+ return 1;
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v)) {
+ if(p->scond&C_WBIT)
+ return 2;
+ return 1;
+ }
+ if(p->from.offset&(1<<v->reg))
+ return 1;
+ } else { /* read/rar, write reglist */
+ if(s != A) {
+ if(p->to.offset&(1<<v->reg))
+ return 1;
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->from, v)) {
+ if(p->scond&C_WBIT)
+ return 2;
+ if(p->to.offset&(1<<v->reg))
+ return 4;
+ return 1;
+ }
+ if(p->to.offset&(1<<v->reg))
+ return 3;
+ }
+ return 0;
+
+ case ANOP: /* read, write */
+ case AMOVW:
+ case AMOVF:
+ case AMOVD:
+ case AMOVH:
+ case AMOVHU:
+ case AMOVB:
+ case AMOVBU:
+ case AMOVDW:
+ case AMOVWD:
+ case AMOVFD:
+ case AMOVDF:
+ if(p->scond&(C_WBIT|C_PBIT))
+ if(v->type == D_REG) {
+ if(p->from.type == D_OREG || p->from.type == D_SHIFT) {
+ if(p->from.reg == v->reg)
+ return 2;
+ } else {
+ if(p->to.reg == v->reg)
+ return 2;
+ }
+ }
+ if(s != A) {
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ if(!copyas(&p->to, v))
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyas(&p->to, v)) {
+ if(copyau(&p->from, v))
+ return 4;
+ return 3;
+ }
+ if(copyau(&p->from, v))
+ return 1;
+ if(copyau(&p->to, v))
+ return 1;
+ return 0;
+
+ case AADD: /* read, read, write */
+ case ASUB:
+ case ARSB:
+ case ASLL:
+ case ASRL:
+ case ASRA:
+ case AORR:
+ case AAND:
+ case AEOR:
+ case AMUL:
+ case ADIV:
+ case ADIVU:
+ case AADDF:
+ case AADDD:
+ case ASUBF:
+ case ASUBD:
+ case AMULF:
+ case AMULD:
+ case ADIVF:
+ case ADIVD:
+
+ case ACMPF:
+ case ACMPD:
+ case ACMP:
+ case ACMN:
+ case ACASE:
+ if(s != A) {
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ if(copysub1(p, v, s, 1))
+ return 1;
+ if(!copyas(&p->to, v))
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyas(&p->to, v)) {
+ if(p->reg == NREG)
+ p->reg = p->to.reg;
+ if(copyau(&p->from, v))
+ return 4;
+ if(copyau1(p, v))
+ return 4;
+ return 3;
+ }
+ if(copyau(&p->from, v))
+ return 1;
+ if(copyau1(p, v))
+ return 1;
+ if(copyau(&p->to, v))
+ return 1;
+ return 0;
+
+ case ABEQ: /* read, read */
+ case ABNE:
+ case ABCS:
+ case ABHS:
+ case ABCC:
+ case ABLO:
+ case ABMI:
+ case ABPL:
+ case ABVS:
+ case ABVC:
+ case ABHI:
+ case ABLS:
+ case ABGE:
+ case ABLT:
+ case ABGT:
+ case ABLE:
+ if(s != A) {
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ return copysub1(p, v, s, 1);
+ }
+ if(copyau(&p->from, v))
+ return 1;
+ if(copyau1(p, v))
+ return 1;
+ return 0;
+
+ case AB: /* funny */
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 1;
+ return 0;
+
+ case ARET: /* funny */
+ if(v->type == D_REG)
+ if(v->reg == REGRET)
+ return 2;
+ if(v->type == D_FREG)
+ if(v->reg == FREGRET)
+ return 2;
+
+ case ABL: /* funny */
+ if(v->type == D_REG) {
+ if(v->reg <= REGEXT && v->reg > exregoffset)
+ return 2;
+ if(v->reg == REGARG)
+ return 2;
+ }
+ if(v->type == D_FREG)
+ if(v->reg <= FREGEXT && v->reg > exfregoffset)
+ return 2;
+
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 4;
+ return 3;
+
+ case ATEXT: /* funny */
+ if(v->type == D_REG)
+ if(v->reg == REGARG)
+ return 3;
+ return 0;
+ }
+ return 0;
+}
+
+int
+a2type(Prog *p)
+{
+
+ switch(p->as) {
+
+ case ACMP:
+ case ACMN:
+
+ case AADD:
+ case ASUB:
+ case ARSB:
+ case ASLL:
+ case ASRL:
+ case ASRA:
+ case AORR:
+ case AAND:
+ case AEOR:
+ case AMUL:
+ case ADIV:
+ case ADIVU:
+ return D_REG;
+
+ case ACMPF:
+ case ACMPD:
+
+ case AADDF:
+ case AADDD:
+ case ASUBF:
+ case ASUBD:
+ case AMULF:
+ case AMULD:
+ case ADIVF:
+ case ADIVD:
+ return D_FREG;
+ }
+ return D_NONE;
+}
+
+/*
+ * direct reference,
+ * could be set/use depending on
+ * semantics
+ */
+int
+copyas(Adr *a, Adr *v)
+{
+
+ if(regtyp(v)) {
+ if(a->type == v->type)
+ if(a->reg == v->reg)
+ return 1;
+ } else
+ if(v->type == D_CONST) { /* for constprop */
+ if(a->type == v->type)
+ if(a->name == v->name)
+ if(a->sym == v->sym)
+ if(a->reg == v->reg)
+ if(a->offset == v->offset)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * either direct or indirect
+ */
+int
+copyau(Adr *a, Adr *v)
+{
+
+ if(copyas(a, v))
+ return 1;
+ if(v->type == D_REG) {
+ if(a->type == D_CONST && a->reg != NREG) {
+ if(v->reg == a->reg)
+ return 1;
+ } else
+ if(a->type == D_OREG) {
+ if(v->reg == a->reg)
+ return 1;
+ } else
+ if(a->type == D_REGREG) {
+ if(v->reg == a->reg)
+ return 1;
+ if(a->offset == v->reg)
+ return 1;
+ } else
+ if(a->type == D_SHIFT) {
+ if((a->offset&0xf) == v->reg)
+ return 1;
+ if((a->offset&(1<<4)) && (a->offset>>8) == v->reg)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+copyau1(Prog *p, Adr *v)
+{
+
+ if(regtyp(v)) {
+ if(a2type(p) == v->type)
+ if(p->reg == v->reg) {
+ if(a2type(p) != v->type)
+ print("botch a2type %P\n", p);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * substitute s for v in a
+ * return failure to substitute
+ */
+int
+copysub(Adr *a, Adr *v, Adr *s, int f)
+{
+
+ if(f)
+ if(copyau(a, v)) {
+ if(a->type == D_SHIFT) {
+ if((a->offset&0xf) == v->reg)
+ a->offset = (a->offset&~0xf)|s->reg;
+ if((a->offset&(1<<4)) && (a->offset>>8) == v->reg)
+ a->offset = (a->offset&~(0xf<<8))|(s->reg<<8);
+ } else
+ if(a->type == D_REGREG) {
+ if(a->offset == v->reg)
+ a->offset = s->reg;
+ if(a->reg == v->reg)
+ a->reg = s->reg;
+ } else
+ a->reg = s->reg;
+ }
+ return 0;
+}
+
+int
+copysub1(Prog *p1, Adr *v, Adr *s, int f)
+{
+
+ if(f)
+ if(copyau1(p1, v))
+ p1->reg = s->reg;
+ return 0;
+}
+
+struct {
+ int opcode;
+ int notopcode;
+ int scond;
+ int notscond;
+} predinfo[] = {
+ { ABEQ, ABNE, 0x0, 0x1, },
+ { ABNE, ABEQ, 0x1, 0x0, },
+ { ABCS, ABCC, 0x2, 0x3, },
+ { ABHS, ABLO, 0x2, 0x3, },
+ { ABCC, ABCS, 0x3, 0x2, },
+ { ABLO, ABHS, 0x3, 0x2, },
+ { ABMI, ABPL, 0x4, 0x5, },
+ { ABPL, ABMI, 0x5, 0x4, },
+ { ABVS, ABVC, 0x6, 0x7, },
+ { ABVC, ABVS, 0x7, 0x6, },
+ { ABHI, ABLS, 0x8, 0x9, },
+ { ABLS, ABHI, 0x9, 0x8, },
+ { ABGE, ABLT, 0xA, 0xB, },
+ { ABLT, ABGE, 0xB, 0xA, },
+ { ABGT, ABLE, 0xC, 0xD, },
+ { ABLE, ABGT, 0xD, 0xC, },
+};
+
+typedef struct {
+ Reg *start;
+ Reg *last;
+ Reg *end;
+ int len;
+} Joininfo;
+
+enum {
+ Join,
+ Split,
+ End,
+ Branch,
+ Setcond,
+ Toolong
+};
+
+enum {
+ Falsecond,
+ Truecond,
+ Delbranch,
+ Keepbranch
+};
+
+int
+isbranch(Prog *p)
+{
+ return (ABEQ <= p->as) && (p->as <= ABLE);
+}
+
+int
+predicable(Prog *p)
+{
+ switch(p->as) {
+ case ANOP:
+ case AXXX:
+ case ADATA:
+ case AGLOBL:
+ case AGOK:
+ case AHISTORY:
+ case ANAME:
+ case ASIGNAME:
+ case ATEXT:
+ case AWORD:
+ case ABCASE:
+ case ACASE:
+ return 0;
+ }
+ if(isbranch(p))
+ return 0;
+ return 1;
+}
+
+/*
+ * Depends on an analysis of the encodings performed by 5l.
+ * These seem to be all of the opcodes that lead to the "S" bit
+ * being set in the instruction encodings.
+ *
+ * C_SBIT may also have been set explicitly in p->scond.
+ */
+int
+modifiescpsr(Prog *p)
+{
+ switch(p->as) {
+ case ATST:
+ case ATEQ:
+ case ACMN:
+ case ACMP:
+ case AMULU:
+ case ADIVU:
+ case AMUL:
+ case ADIV:
+ case AMOD:
+ case AMODU:
+ case ABL:
+ return 1;
+ }
+ if(p->scond & C_SBIT)
+ return 1;
+ return 0;
+}
+
+/*
+ * Find the maximal chain of instructions starting with r which could
+ * be executed conditionally
+ */
+int
+joinsplit(Reg *r, Joininfo *j)
+{
+ j->start = r;
+ j->last = r;
+ j->len = 0;
+ do {
+ if (r->p2 && (r->p1 || r->p2->p2link)) {
+ j->end = r;
+ return Join;
+ }
+ if (r->s1 && r->s2) {
+ j->end = r;
+ return Split;
+ }
+ j->last = r;
+ if (r->prog->as != ANOP)
+ j->len++;
+ if (!r->s1 && !r->s2) {
+ j->end = r->link;
+ return End;
+ }
+ if (r->s2) {
+ j->end = r->s2;
+ return Branch;
+ }
+ if (modifiescpsr(r->prog)) {
+ j->end = r->s1;
+ return Setcond;
+ }
+ r = r->s1;
+ } while (j->len < 4);
+ j->end = r;
+ return Toolong;
+}
+
+Reg*
+successor(Reg *r)
+{
+ if(r->s1)
+ return r->s1;
+ else
+ return r->s2;
+}
+
+void
+applypred(Reg *rstart, Joininfo *j, int cond, int branch)
+{
+ int pred;
+ Reg *r;
+
+ if(j->len == 0)
+ return;
+ if(cond == Truecond)
+ pred = predinfo[rstart->prog->as - ABEQ].scond;
+ else
+ pred = predinfo[rstart->prog->as - ABEQ].notscond;
+
+ for(r = j->start;; r = successor(r)) {
+ if (r->prog->as == AB) {
+ if (r != j->last || branch == Delbranch)
+ excise(r);
+ else {
+ if (cond == Truecond)
+ r->prog->as = predinfo[rstart->prog->as - ABEQ].opcode;
+ else
+ r->prog->as = predinfo[rstart->prog->as - ABEQ].notopcode;
+ }
+ }
+ else
+ if (predicable(r->prog))
+ r->prog->scond = (r->prog->scond&~C_SCOND)|pred;
+ if (r->s1 != r->link) {
+ r->s1 = r->link;
+ r->link->p1 = r;
+ }
+ if (r == j->last)
+ break;
+ }
+}
+
+void
+predicate(void)
+{
+ Reg *r;
+ int t1, t2;
+ Joininfo j1, j2;
+
+ for(r=firstr; r!=R; r=r->link) {
+ if (isbranch(r->prog)) {
+ t1 = joinsplit(r->s1, &j1);
+ t2 = joinsplit(r->s2, &j2);
+ if(j1.last->link != j2.start)
+ continue;
+ if(j1.end == j2.end)
+ if((t1 == Branch && (t2 == Join || t2 == Setcond)) ||
+ (t2 == Join && (t1 == Join || t1 == Setcond))) {
+ applypred(r, &j1, Falsecond, Delbranch);
+ applypred(r, &j2, Truecond, Delbranch);
+ excise(r);
+ continue;
+ }
+ if(t1 == End || t1 == Branch) {
+ applypred(r, &j1, Falsecond, Keepbranch);
+ excise(r);
+ continue;
+ }
+ }
+ }
+}
+
+int
+isdconst(Addr *a)
+{
+ if(a->type == D_CONST && a->reg == NREG)
+ return 1;
+ return 0;
+}
diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c
new file mode 100644
index 000000000..5011e75cc
--- /dev/null
+++ b/src/cmd/5g/reg.c
@@ -0,0 +1,1329 @@
+// Inferno utils/5c/reg.c
+// http://code.google.com/p/inferno-os/source/browse/utils/5g/reg.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#include "gg.h"
+#include "opt.h"
+
+#define P2R(p) (Reg*)(p->reg)
+
+ void addsplits(void);
+ int noreturn(Prog *p);
+static int first = 0;
+
+Reg*
+rega(void)
+{
+ Reg *r;
+
+ r = freer;
+ if(r == R) {
+ r = mal(sizeof(*r));
+ } else
+ freer = r->link;
+
+ *r = zreg;
+ return r;
+}
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+static void
+setoutvar(void)
+{
+ Type *t;
+ Node *n;
+ Addr a;
+ Iter save;
+ Bits bit;
+ int z;
+
+ t = structfirst(&save, getoutarg(curfn->type));
+ while(t != T) {
+ n = nodarg(t, 1);
+ a = zprog.from;
+ naddr(n, &a, 0);
+ bit = mkvar(R, &a, 0);
+ for(z=0; z<BITS; z++)
+ ovar.b[z] |= bit.b[z];
+ t = structnext(&save);
+ }
+//if(bany(b))
+//print("ovars = %Q\n", &ovar);
+}
+
+void
+excise(Reg *r)
+{
+ Prog *p;
+
+ p = r->prog;
+ p->as = ANOP;
+ p->scond = zprog.scond;
+ p->from = zprog.from;
+ p->to = zprog.to;
+ p->reg = zprog.reg;
+}
+
+static void
+setaddrs(Bits bit)
+{
+ int i, n;
+ Var *v;
+ Sym *s;
+
+ while(bany(&bit)) {
+ // convert each bit to a variable
+ i = bnum(bit);
+ s = var[i].sym;
+ n = var[i].name;
+ bit.b[i/32] &= ~(1L<<(i%32));
+
+ // disable all pieces of that variable
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->sym == s && v->name == n)
+ v->addr = 2;
+ }
+ }
+}
+
+void
+regopt(Prog *firstp)
+{
+ Reg *r, *r1;
+ Prog *p;
+ int i, z, nr;
+ uint32 vreg;
+ Bits bit;
+
+return; // disabled for the moment
+ if(first == 0) {
+ fmtinstall('Q', Qconv);
+ }
+ first++;
+
+ if(debug['K']) {
+ if(first != 20)
+ return;
+// debug['R'] = 2;
+// debug['P'] = 2;
+ print("optimizing %S\n", curfn->nname->sym);
+ }
+
+ // count instructions
+ nr = 0;
+ for(p=firstp; p!=P; p=p->link)
+ nr++;
+
+ // if too big dont bother
+ if(nr >= 10000) {
+// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
+ return;
+ }
+
+ r1 = R;
+ firstr = R;
+ lastr = R;
+ nvar = 0;
+ regbits = 0;
+ for(z=0; z<BITS; z++) {
+ externs.b[z] = 0;
+ params.b[z] = 0;
+ consts.b[z] = 0;
+ addrs.b[z] = 0;
+ ovar.b[z] = 0;
+ }
+
+ // build list of return variables
+ setoutvar();
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ nr = 0;
+ for(p=firstp; p != P; p = p->link) {
+ switch(p->as) {
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ continue;
+ }
+ r = rega();
+ nr++;
+ if(firstr == R) {
+ firstr = r;
+ lastr = r;
+ } else {
+ lastr->link = r;
+ r->p1 = lastr;
+ lastr->s1 = r;
+ lastr = r;
+ }
+ r->prog = p;
+ p->regp = r;
+
+ r1 = r->p1;
+ if(r1 != R) {
+ switch(r1->prog->as) {
+ case ARET:
+ case AB:
+ case ARFE:
+ r->p1 = R;
+ r1->s1 = R;
+ }
+ }
+
+ /*
+ * left side always read
+ */
+ bit = mkvar(r, &p->from, p->as==AMOVW);
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+
+ /*
+ * right side depends on opcode
+ */
+ bit = mkvar(r, &p->to, 0);
+ if(bany(&bit))
+ switch(p->as) {
+ default:
+ yyerror("reg: unknown op: %A", p->as);
+ break;
+
+ /*
+ * right side write
+ */
+ case ANOP:
+ case AMOVB:
+ case AMOVBU:
+ case AMOVH:
+ case AMOVHU:
+ case AMOVW:
+ case AMOVF:
+ case AMOVD:
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * funny
+ */
+ case ABL:
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ break;
+ }
+
+ if(p->as == AMOVM) {
+ z = p->to.offset;
+ if(p->from.type == D_CONST)
+ z = p->from.offset;
+ for(i=0; z; i++) {
+ if(z&1)
+ regbits |= RtoB(i);
+ z >>= 1;
+ }
+ }
+ }
+ if(firstr == R)
+ return;
+
+ /*
+ * pass 2
+ * turn branch references to pointers
+ * build back pointers
+ */
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(p->to.type == D_BRANCH) {
+ if(p->to.branch == P)
+ fatal("pnil %P", p);
+ r1 = p->to.branch->regp;
+ if(r1 == R)
+ fatal("rnil %P", p);
+ if(r1 == r) {
+ //fatal("ref to self %P", p);
+ continue;
+ }
+ r->s2 = r1;
+ r->p2link = r1->p2;
+ r1->p2 = r;
+ }
+ }
+ if(debug['R']) {
+ p = firstr->prog;
+ print("\n%L %D\n", p->lineno, &p->from);
+ }
+
+ /*
+ * pass 2.5
+ * find looping structure
+ */
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ change = 0;
+ loopit(firstr, nr);
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ for(r = firstr; r != R; r = r->link)
+ if(r->prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = r->link;
+ if(r1 && r1->active && !r->active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+ addsplits();
+
+ if(debug['R'] > 1) {
+ print("\nprop structure:\n");
+ for(r = firstr; r != R; r = r->link) {
+ print("%d:%P", r->loop, r->prog);
+ for(z=0; z<BITS; z++) {
+ bit.b[z] = r->set.b[z] |
+ r->refahead.b[z] | r->calahead.b[z] |
+ r->refbehind.b[z] | r->calbehind.b[z] |
+ r->use1.b[z] | r->use2.b[z];
+ }
+
+ if(bany(&bit)) {
+ print("\t");
+ if(bany(&r->use1))
+ print(" u1=%Q", r->use1);
+ if(bany(&r->use2))
+ print(" u2=%Q", r->use2);
+ if(bany(&r->set))
+ print(" st=%Q", r->set);
+ if(bany(&r->refahead))
+ print(" ra=%Q", r->refahead);
+ if(bany(&r->calahead))
+ print(" ca=%Q", r->calahead);
+ if(bany(&r->refbehind))
+ print(" rb=%Q", r->refbehind);
+ if(bany(&r->calbehind))
+ print(" cb=%Q", r->calbehind);
+ }
+ print("\n");
+ }
+ }
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit) & !r->refset) {
+ // should never happen - all variables are preset
+ if(debug['w'])
+ print("%L: used and not set: %Q\n", r->prog->lineno, bit);
+ r->refset = 1;
+ }
+ }
+
+ for(r = firstr; r != R; r = r->link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = r->link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit) && !r->refset) {
+ if(debug['w'])
+ print("%L: set and not used: %Q\n", r->prog->lineno, bit);
+ r->refset = 1;
+ excise(r);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ if(debug['R'] > 1)
+ print("\n");
+ paint1(r, i);
+ bit.b[i/32] &= ~(1L<<(i%32));
+ if(change <= 0) {
+ if(debug['R'])
+ print("%L $%d: %Q\n",
+ r->prog->lineno, change, blsh(i));
+ continue;
+ }
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ if(debug['R'] > 1)
+ print("too many regions\n");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ for(i=0; i<nregion; i++) {
+ bit = blsh(rgp->varno);
+ vreg = paint2(rgp->enter, rgp->varno);
+ vreg = allreg(vreg, rgp);
+ if(debug['R']) {
+ if(rgp->regno >= NREG)
+ print("%L $%d F%d: %Q\n",
+ rgp->enter->prog->lineno,
+ rgp->cost,
+ rgp->regno-NREG,
+ bit);
+ else
+ print("%L $%d R%d: %Q\n",
+ rgp->enter->prog->lineno,
+ rgp->cost,
+ rgp->regno,
+ bit);
+ }
+ if(rgp->regno != 0)
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ rgp++;
+ }
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(!debug['R'] || debug['P']) {
+// peep();
+ }
+
+ /*
+ * last pass
+ * eliminate nops
+ * free aux structures
+ */
+ for(p = firstp; p != P; p = p->link) {
+ while(p->link != P && p->link->as == ANOP)
+ p->link = p->link->link;
+ if(p->to.type == D_BRANCH)
+ while(p->to.branch != P && p->to.branch->as == ANOP)
+ p->to.branch = p->to.branch->link;
+ }
+ if(r1 != R) {
+ r1->link = freer;
+ freer = firstr;
+ }
+}
+
+void
+addsplits(void)
+{
+ Reg *r, *r1;
+ int z, i;
+ Bits bit;
+
+ for(r = firstr; r != R; r = r->link) {
+ if(r->loop > 1)
+ continue;
+ if(r->prog->as == ABL)
+ continue;
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
+ if(r1->loop <= 1)
+ continue;
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r1->calbehind.b[z] &
+ (r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
+ ~(r->calahead.b[z] & addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ bit.b[i/32] &= ~(1L << (i%32));
+ }
+ }
+ }
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ *p1 = zprog;
+ p = r->prog;
+
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->sym = v->sym;
+ a->name = v->name;
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = D_OREG;
+ if(a->etype == TARRAY || a->sym == S)
+ a->type = D_CONST;
+
+ switch(v->etype) {
+ default:
+ print("What is this %E\n", v->etype);
+
+ case TINT32:
+ case TUINT32:
+ case TPTR32:
+ case TBOOL:
+ p1->as = AMOVW;
+ break;
+ case TINT8:
+ case TUINT8:
+ p1->as = AMOVB;
+ break;
+ case TINT16:
+ case TUINT16:
+ p1->as = AMOVH;
+ break;
+ case TFLOAT32:
+ p1->as = AMOVF;
+ break;
+ case TFLOAT64:
+ p1->as = AMOVD;
+ break;
+ }
+
+ p1->from.type = D_REG;
+ p1->from.reg = rn;
+ if(rn >= NREG) {
+ p1->from.type = D_FREG;
+ p1->from.reg = rn-NREG;
+ }
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+ if(v->etype == TUINT8)
+ p1->as = AMOVBU;
+ if(v->etype == TUINT16)
+ p1->as = AMOVHU;
+ }
+ if(debug['R'])
+ print("%P\t.a%P\n", p, p1);
+}
+
+static int
+overlap(int32 o1, int w1, int32 o2, int w2)
+{
+ int32 t1, t2;
+
+ t1 = o1+w1;
+ t2 = o2+w2;
+
+ if(!(t1 > o2 && t2 > o1))
+ return 0;
+
+ return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a, int docon)
+{
+ Var *v;
+ int i, t, n, et, z, w, flag;
+ int32 o;
+ Bits bit;
+ Sym *s;
+
+ // mark registers used
+ t = a->type;
+ n = D_NONE;
+
+ switch(t) {
+ default:
+ print("type %d %d %D\n", t, a->name, a);
+ goto none;
+
+ case D_CONST:
+ if(a->reg != NREG)
+ r->regu |= RtoB(a->reg);
+ // fallthrough
+
+ case D_NONE:
+ case D_FCONST:
+ case D_BRANCH:
+ goto none;
+
+ case D_REGREG:
+ if(a->offset != NREG)
+ r->regu |= RtoB(a->offset);
+ // fallthrough
+
+ case D_REG:
+ case D_SHIFT:
+ case D_OREG:
+ if(a->reg != NREG)
+ r->regu |= RtoB(a->reg);
+ break;
+
+ case D_FREG:
+ if(a->reg != NREG)
+ r->regu |= FtoB(a->reg);
+ break;
+ }
+
+ switch(a->name) {
+ default:
+ goto none;
+
+ case D_EXTERN:
+ case D_STATIC:
+ case D_AUTO:
+ case D_PARAM:
+ n = a->name;
+ break;
+ }
+
+ flag = 0;
+// if(a->pun)
+// flag = 1;
+
+ s = a->sym;
+ if(s == S)
+ goto none;
+ if(s->name[0] == '.')
+ goto none;
+ et = a->etype;
+ o = a->offset;
+ w = a->width;
+
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->sym == s && v->name == n) {
+ if(v->offset == o)
+ if(v->etype == et)
+ if(v->width == w)
+ if(!flag)
+ return blsh(i);
+
+ // if they overlaps, disable both
+ if(overlap(v->offset, v->width, o, w)) {
+ v->addr = 1;
+ flag = 1;
+ }
+ }
+ }
+
+ switch(et) {
+ case 0:
+ case TFUNC:
+ case TARRAY:
+ case TSTRING:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && s)
+ fatal("variable not optimized: %D", a);
+ goto none;
+ }
+
+ i = nvar;
+ nvar++;
+ v = var+i;
+ v->sym = s;
+ v->offset = o;
+ v->name = n;
+// v->gotype = a->gotype;
+ v->etype = et;
+ v->width = w;
+ v->addr = flag; // funny punning
+
+ if(debug['R'])
+ print("bit=%2d et=%E pun=%d %D\n", i, et, flag, a);
+
+out:
+ bit = blsh(i);
+ if(n == D_EXTERN || n == D_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == D_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+
+// if(t == D_CONST) {
+// if(s == S) {
+// for(z=0; z<BITS; z++)
+// consts.b[z] |= bit.b[z];
+// return bit;
+// }
+// if(et != TARRAY)
+// for(z=0; z<BITS; z++)
+// addrs.b[z] |= bit.b[z];
+// for(z=0; z<BITS; z++)
+// params.b[z] |= bit.b[z];
+// return bit;
+// }
+// if(t != D_OREG)
+// goto none;
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->prog->as) {
+ case ABL:
+ if(noreturn(r1->prog))
+ break;
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z] | ovar.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ }
+ for(; r != r1; r = r->p1)
+ for(r2 = r->p2; r2 != R; r2 = r2->p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+/*
+ * find looping structure
+ *
+ * 1) find reverse postordering
+ * 2) find approximate dominators,
+ * the actual dominators if the flow graph is reducible
+ * otherwise, dominators plus some other non-dominators.
+ * See Matthew S. Hecht and Jeffrey D. Ullman,
+ * "Analysis of a Simple Algorithm for Global Data Flow Problems",
+ * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
+ * Oct. 1-3, 1973, pp. 207-217.
+ * 3) find all nodes with a predecessor dominated by the current node.
+ * such a node is a loop head.
+ * recursively, all preds with a greater rpo number are in the loop
+ */
+int32
+postorder(Reg *r, Reg **rpo2r, int32 n)
+{
+ Reg *r1;
+
+ r->rpo = 1;
+ r1 = r->s1;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ r1 = r->s2;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ rpo2r[n] = r;
+ n++;
+ return n;
+}
+
+int32
+rpolca(int32 *idom, int32 rpo1, int32 rpo2)
+{
+ int32 t;
+
+ if(rpo1 == -1)
+ return rpo2;
+ while(rpo1 != rpo2){
+ if(rpo1 > rpo2){
+ t = rpo2;
+ rpo2 = rpo1;
+ rpo1 = t;
+ }
+ while(rpo1 < rpo2){
+ t = idom[rpo2];
+ if(t >= rpo2)
+ fatal("bad idom");
+ rpo2 = t;
+ }
+ }
+ return rpo1;
+}
+
+int
+doms(int32 *idom, int32 r, int32 s)
+{
+ while(s > r)
+ s = idom[s];
+ return s == r;
+}
+
+int
+loophead(int32 *idom, Reg *r)
+{
+ int32 src;
+
+ src = r->rpo;
+ if(r->p1 != R && doms(idom, src, r->p1->rpo))
+ return 1;
+ for(r = r->p2; r != R; r = r->p2link)
+ if(doms(idom, src, r->rpo))
+ return 1;
+ return 0;
+}
+
+void
+loopmark(Reg **rpo2r, int32 head, Reg *r)
+{
+ if(r->rpo < head || r->active == head)
+ return;
+ r->active = head;
+ r->loop += LOOP;
+ if(r->p1 != R)
+ loopmark(rpo2r, head, r->p1);
+ for(r = r->p2; r != R; r = r->p2link)
+ loopmark(rpo2r, head, r);
+}
+
+void
+loopit(Reg *r, int32 nr)
+{
+ Reg *r1;
+ int32 i, d, me;
+
+ if(nr > maxnr) {
+ rpo2r = mal(nr * sizeof(Reg*));
+ idom = mal(nr * sizeof(int32));
+ maxnr = nr;
+ }
+ d = postorder(r, rpo2r, 0);
+ if(d > nr)
+ fatal("too many reg nodes");
+ nr = d;
+ for(i = 0; i < nr / 2; i++){
+ r1 = rpo2r[i];
+ rpo2r[i] = rpo2r[nr - 1 - i];
+ rpo2r[nr - 1 - i] = r1;
+ }
+ for(i = 0; i < nr; i++)
+ rpo2r[i]->rpo = i;
+
+ idom[0] = 0;
+ for(i = 0; i < nr; i++){
+ r1 = rpo2r[i];
+ me = r1->rpo;
+ d = -1;
+ if(r1->p1 != R && r1->p1->rpo < me)
+ d = r1->p1->rpo;
+ for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
+ if(r1->rpo < me)
+ d = rpolca(idom, d, r1->rpo);
+ idom[i] = d;
+ }
+
+ for(i = 0; i < nr; i++){
+ r1 = rpo2r[i];
+ r1->loop++;
+ if(r1->p2 != R && loophead(idom, r1))
+ loopmark(rpo2r, i, r1);
+ }
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->s2 != R)
+ synch(r1->s2, dif);
+ }
+}
+
+uint32
+allreg(uint32 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%E", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ i = BtoR(~b);
+ if(i && r->cost >= 0) {
+ r->regno = i;
+ return RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ case TFLOAT:
+ i = BtoF(~b);
+ if(i && r->cost >= 0) {
+ r->regno = i+NREG;
+ return FtoB(i);
+ }
+ break;
+
+ case TINT64:
+ case TUINT64:
+ case TPTR64:
+ case TINTER:
+ case TSTRUCT:
+ case TARRAY:
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L<<(bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->loop;
+ if(debug['R'] > 1)
+ print("%d%P\td %Q $%d\n", r->loop,
+ r->prog, blsh(bn), change);
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ change += CREF * r->loop;
+ if(debug['R'] > 1)
+ print("%d%P\tu1 %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ change += CREF * r->loop;
+ if(debug['R'] > 1)
+ print("%d%P\tu2 %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->loop;
+ if(debug['R'] > 1)
+ print("%d%P\tst %Q $%d\n", r->loop,
+ p, blsh(bn), change);
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint32
+paint2(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint32 bb, vreg;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, int32 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'])
+ print("\t.c%P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'])
+ print("\t.c%P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+
+ a->sym = 0;
+ a->name = D_NONE;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+}
+
+/*
+ * bit reg
+ * 0 R0
+ * 1 R1
+ * ... ...
+ * 10 R10
+ */
+int32
+RtoB(int r)
+{
+
+ if(r < 2 || r >= REGTMP-2) // excluded R9 and R10 for m and g
+ return 0;
+ return 1L << r;
+}
+
+int
+BtoR(int32 b)
+{
+ b &= 0x01fcL; // excluded R9 and R10 for m and g
+ if(b == 0)
+ return 0;
+ return bitno(b);
+}
+
+/*
+ * bit reg
+ * 18 F2
+ * 19 F3
+ * ... ...
+ * 23 F7
+ */
+int32
+FtoB(int f)
+{
+
+ if(f < 2 || f > NFREG-1)
+ return 0;
+ return 1L << (f + 16);
+}
+
+int
+BtoF(int32 b)
+{
+
+ b &= 0xfc0000L;
+ if(b == 0)
+ return 0;
+ return bitno(b) - 16;
+}
+
+static Sym* symlist[10];
+
+int
+noreturn(Prog *p)
+{
+ Sym *s;
+ int i;
+
+ if(symlist[0] == S) {
+ symlist[0] = pkglookup("panicindex", runtimepkg);
+ symlist[1] = pkglookup("panicslice", runtimepkg);
+ symlist[2] = pkglookup("throwinit", runtimepkg);
+ symlist[3] = pkglookup("panic", runtimepkg);
+ }
+
+ s = p->to.sym;
+ if(s == S)
+ return 0;
+ for(i=0; symlist[i]!=S; i++)
+ if(s == symlist[i])
+ return 1;
+ return 0;
+}