summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2009-05-28 15:48:47 -0700
committerRuss Cox <rsc@golang.org>2009-05-28 15:48:47 -0700
commit627363e8a8fddda42af80877cfed1546f44d87cb (patch)
tree5a18fd0ec502eec4183934fa52ef107a2d3eb20e /src
parenta3e3696086d7f8bae5edc2e853259638fc01dd62 (diff)
downloadgolang-627363e8a8fddda42af80877cfed1546f44d87cb.tar.gz
better 64-bit handling in 8g.
fewer moves, fewer stupid LEALs. powser1 runs (with evaln commented out). beginnings of floating point. R=ken OCL=29540 CL=29543
Diffstat (limited to 'src')
-rw-r--r--src/cmd/6g/gsubr.c2
-rw-r--r--src/cmd/8c/list.c24
-rw-r--r--src/cmd/8g/cgen.c319
-rw-r--r--src/cmd/8g/gg.h12
-rw-r--r--src/cmd/8g/ggen.c2
-rwxr-xr-xsrc/cmd/8g/gsubr.c639
-rw-r--r--src/cmd/gc/const.c2
7 files changed, 695 insertions, 305 deletions
diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c
index d8bd0767f..f9b092039 100644
--- a/src/cmd/6g/gsubr.c
+++ b/src/cmd/6g/gsubr.c
@@ -505,7 +505,7 @@ gmove(Node *f, Node *t)
switch(CASE(ft, tt)) {
default:
- fatal("gmove %T -> %T", f, t);
+ fatal("gmove %lT -> %lT", f->type, t->type);
/*
* integer copy and truncate
diff --git a/src/cmd/8c/list.c b/src/cmd/8c/list.c
index ec5ac9d60..c2ce5b295 100644
--- a/src/cmd/8c/list.c
+++ b/src/cmd/8c/list.c
@@ -57,7 +57,7 @@ Bconv(Fmt *fp)
if(str[0])
strcat(str, " ");
if(var[i].sym == S) {
- sprint(ss, "$%ld", var[i].offset);
+ sprint(ss, "$%d", var[i].offset);
s = ss;
} else
s = var[i].sym->name;
@@ -108,7 +108,7 @@ Dconv(Fmt *fp)
i = a->type;
if(i >= D_INDIR) {
if(a->offset)
- sprint(str, "%ld(%R)", a->offset, i-D_INDIR);
+ sprint(str, "%d(%R)", a->offset, i-D_INDIR);
else
sprint(str, "(%R)", i-D_INDIR);
goto brk;
@@ -117,7 +117,7 @@ Dconv(Fmt *fp)
default:
if(a->offset)
- sprint(str, "$%ld,%R", a->offset, i);
+ sprint(str, "$%d,%R", a->offset, i);
else
sprint(str, "%R", i);
break;
@@ -127,35 +127,35 @@ Dconv(Fmt *fp)
break;
case D_BRANCH:
- sprint(str, "%ld(PC)", a->offset-pc);
+ sprint(str, "%d(PC)", a->offset-pc);
break;
case D_EXTERN:
- sprint(str, "%s+%ld(SB)", a->sym->name, a->offset);
+ sprint(str, "%s+%d(SB)", a->sym->name, a->offset);
break;
case D_STATIC:
- sprint(str, "%s<>+%ld(SB)", a->sym->name,
+ sprint(str, "%s<>+%d(SB)", a->sym->name,
a->offset);
break;
case D_AUTO:
- sprint(str, "%s+%ld(SP)", a->sym->name, a->offset);
+ sprint(str, "%s+%d(SP)", a->sym->name, a->offset);
break;
case D_PARAM:
if(a->sym)
- sprint(str, "%s+%ld(FP)", a->sym->name, a->offset);
+ sprint(str, "%s+%d(FP)", a->sym->name, a->offset);
else
- sprint(str, "%ld(FP)", a->offset);
+ sprint(str, "%d(FP)", a->offset);
break;
case D_CONST:
- sprint(str, "$%ld", a->offset);
+ sprint(str, "$%d", a->offset);
break;
case D_CONST2:
- sprint(str, "$%ld-%ld", a->offset, a->offset2);
+ sprint(str, "$%d-%d", a->offset, a->offset2);
break;
case D_FCONST:
@@ -185,7 +185,7 @@ conv:
char* regstr[] =
{
- "AL", /*[D_AL]*/
+ "AL", /*[D_AL]*/
"CL",
"DL",
"BL",
diff --git a/src/cmd/8g/cgen.c b/src/cmd/8g/cgen.c
index 609d900b0..faa81d330 100644
--- a/src/cmd/8g/cgen.c
+++ b/src/cmd/8g/cgen.c
@@ -27,6 +27,42 @@ is64(Type *t)
return 0;
}
+int
+noconv(Type *t1, Type *t2)
+{
+ int e1, e2;
+
+ e1 = simtype[t1->etype];
+ e2 = simtype[t2->etype];
+
+ switch(e1) {
+ case TINT8:
+ case TUINT8:
+ return e2 == TINT8 || e2 == TUINT8;
+
+ case TINT16:
+ case TUINT16:
+ return e2 == TINT16 || e2 == TUINT16;
+
+ case TINT32:
+ case TUINT32:
+ case TPTR32:
+ return e2 == TINT32 || e2 == TUINT32 || e2 == TPTR32;
+
+ case TINT64:
+ case TUINT64:
+ case TPTR64:
+ return e2 == TINT64 || e2 == TUINT64 || e2 == TPTR64;
+
+ case TFLOAT32:
+ return e2 == TFLOAT32;
+
+ case TFLOAT64:
+ return e2 == TFLOAT64;
+ }
+ return 0;
+}
+
/*
* generate:
* res = n;
@@ -38,7 +74,7 @@ is64(Type *t)
void
cgen(Node *n, Node *res)
{
- Node *nl, *nr, *r, n1, n2, rr;
+ Node *nl, *nr, *r, n1, n2, rr, f0, f1;
Prog *p1, *p2, *p3;
int a;
@@ -65,13 +101,13 @@ cgen(Node *n, Node *res)
sgen(n, res, n->type->width);
return;
}
-
+
// if both are addressable, move
if(n->addable && res->addable) {
gmove(n, res);
return;
}
-
+
// if both are not addressable, use a temporary.
if(!n->addable && !res->addable) {
tempalloc(&n1, n->type);
@@ -96,7 +132,7 @@ cgen(Node *n, Node *res)
// 64-bit ops are hard on 32-bit machine.
if(is64(n->type) && cancgen64(n, res))
return;
-
+
// use ullman to pick operand to eval first.
nl = n->left;
nr = n->right;
@@ -112,12 +148,15 @@ cgen(Node *n, Node *res)
return;
}
+ if(isfloat[n->type->etype] && isfloat[nl->type->etype])
+ goto flt;
+
switch(n->op) {
default:
dump("cgen", n);
fatal("cgen %O", n->op);
break;
-
+
// these call bgen to get a bool value
case OOROR:
case OANDAND:
@@ -162,7 +201,7 @@ cgen(Node *n, Node *res)
goto abop;
case OCONV:
- if(eqtype(n->type, nl->type)) {
+ if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
cgen(nl, res);
break;
}
@@ -236,7 +275,7 @@ cgen(Node *n, Node *res)
case OADDR:
agen(nl, res);
break;
-
+
case OCALLMETH:
cgen_callmeth(n, 0);
cgen_callret(n, res);
@@ -303,6 +342,29 @@ uop: // unary
gmove(&n1, res);
tempfree(&n1);
return;
+
+flt: // floating-point. 387 (not SSE2) to interoperate with 6c
+ nodreg(&f0, n->type, D_F0);
+ nodreg(&f1, n->type, D_F0+1);
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &f0);
+ if(nr->addable)
+ gins(foptoas(n->op, n->type, 0), nr, &f0);
+ else {
+ cgen(nr, &f0);
+ gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
+ }
+ } else {
+ cgen(nr, &f0);
+ if(nl->addable)
+ gins(foptoas(n->op, n->type, Frev), nl, &f0);
+ else {
+ cgen(nl, &f0);
+ gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
+ }
+ }
+ gmove(&f0, res);
+ return;
}
/*
@@ -334,21 +396,21 @@ agen(Node *n, Node *res)
regfree(&n1);
return;
}
-
+
// let's compute
nl = n->left;
nr = n->right;
-
+
switch(n->op) {
default:
fatal("agen %O", n->op);
-
+
case OCONV:
if(!eqtype(n->type, nl->type))
fatal("agen: non-trivial OCONV");
agen(nl, res);
break;
-
+
case OCALLMETH:
cgen_callmeth(n, 0);
cgen_aret(n, res);
@@ -506,11 +568,11 @@ agen(Node *n, Node *res)
gins(optoas(OADD, types[tptr]), &n1, res);
}
break;
-
+
case OIND:
cgen(nl, res);
break;
-
+
case ODOT:
t = nl->type;
agen(nl, res);
@@ -719,7 +781,7 @@ bgen(Node *n, int true, Prog *to)
regfree(&n1);
break;
}
-
+
if(is64(nr->type)) {
if(!nl->addable) {
tempalloc(&n1, nl->type);
@@ -916,7 +978,8 @@ sgen(Node *n, Node *res, int w)
static int
cancgen64(Node *n, Node *res)
{
- Node adr1, adr2, t1, t2, r1, r2, r3, r4, r5, nod, *l, *r;
+ Node t1, t2, ax, dx, cx, ex, fx, zero, *l, *r;
+ Node lo1, lo2, hi1, hi2;
Prog *p1, *p2;
if(n->op == OCALL)
@@ -936,14 +999,13 @@ cancgen64(Node *n, Node *res)
return 1;
case OMINUS:
+ nodconst(&zero, types[TINT32], 0);
cgen(n->left, res);
- gins(ANEGL, N, res);
- res->xoffset += 4;
- regalloc(&nod, types[TINT32], N);
- gins(AXORL, &nod, &nod);
- gins(ASBBL, res, &nod);
- gins(AMOVL, &nod, res);
- regfree(&nod);
+ split64(res, &lo1, &hi1);
+ gins(ANEGL, N, &lo1);
+ gins(AADCL, &zero, &hi1);
+ gins(ANEGL, N, &hi1);
+ splitclean();
return 1;
case OADD:
@@ -951,7 +1013,7 @@ cancgen64(Node *n, Node *res)
case OMUL:
break;
}
-
+
l = n->left;
r = n->right;
if(!l->addable) {
@@ -963,97 +1025,73 @@ cancgen64(Node *n, Node *res)
tempalloc(&t2, r->type);
cgen(r, &t2);
r = &t2;
- }
+ }
// Setup for binary operation.
- tempalloc(&adr1, types[TPTR32]);
- agen(l, &adr1);
- tempalloc(&adr2, types[TPTR32]);
- agen(r, &adr2);
+ split64(l, &lo1, &hi1);
+ split64(r, &lo2, &hi2);
- nodreg(&r1, types[TPTR32], D_AX);
- nodreg(&r2, types[TPTR32], D_DX);
- nodreg(&r3, types[TPTR32], D_CX);
+ nodreg(&ax, types[TPTR32], D_AX);
+ nodreg(&cx, types[TPTR32], D_CX);
+ nodreg(&dx, types[TPTR32], D_DX);
+ // Do op. Leave result in DX:AX.
switch(n->op) {
case OADD:
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(AADDL, &lo2, &ax);
+ gins(AADCL, &hi2, &dx);
+ break;
+
case OSUB:
- gmove(&adr1, &r3);
- r3.op = OINDREG;
- r3.xoffset = 0;
- gins(AMOVL, &r3, &r1);
- r3.xoffset = 4;
- gins(AMOVL, &r3, &r2);
-
- r3.xoffset = 0;
- r3.op = OREGISTER;
- gmove(&adr2, &r3);
- r3.op = OINDREG;
- if(n->op == OADD)
- gins(AADDL, &r3, &r1);
- else
- gins(ASUBL, &r3, &r1);
- r3.xoffset = 4;
- if(n->op == OADD)
- gins(AADCL, &r3, &r2);
- else
- gins(ASBBL, &r3, &r2);
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(ASUBL, &lo2, &ax);
+ gins(ASBBL, &hi2, &dx);
break;
- case OMUL:
- regalloc(&r4, types[TPTR32], N);
- regalloc(&r5, types[TPTR32], N);
-
- // load args into r2:r1 and r4:r3.
- // leave result in r2:r1 (DX:AX)
- gmove(&adr1, &r5);
- r5.op = OINDREG;
- r5.xoffset = 0;
- gmove(&r5, &r1);
- r5.xoffset = 4;
- gmove(&r5, &r2);
- r5.xoffset = 0;
- r5.op = OREGISTER;
- gmove(&adr2, &r5);
- r5.op = OINDREG;
- gmove(&r5, &r3);
- r5.xoffset = 4;
- gmove(&r5, &r4);
- r5.xoffset = 0;
- r5.op = OREGISTER;
-
- // if r2|r4 == 0, use one 32 x 32 -> 64 unsigned multiply
- gmove(&r2, &r5);
- gins(AORL, &r4, &r5);
+ case OMUL:
+ // let's call the next two EX and FX.
+ regalloc(&ex, types[TPTR32], N);
+ regalloc(&fx, types[TPTR32], N);
+
+ // load args into DX:AX and EX:CX.
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(AMOVL, &lo2, &cx);
+ gins(AMOVL, &hi2, &ex);
+
+ // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
+ gins(AMOVL, &dx, &fx);
+ gins(AORL, &ex, &fx);
p1 = gbranch(AJNE, T);
- gins(AMULL, &r3, N); // AX (=r1) is implied
+ gins(AMULL, &cx, N); // implicit &ax
p2 = gbranch(AJMP, T);
patch(p1, pc);
-
- // full 64x64 -> 64, from 32 x 32 -> 64.
- gins(AIMULL, &r3, &r2);
- gins(AMOVL, &r1, &r5);
- gins(AIMULL, &r4, &r5);
- gins(AADDL, &r2, &r5);
- gins(AMOVL, &r3, &r2);
- gins(AMULL, &r2, N); // AX (=r1) is implied
- gins(AADDL, &r5, &r2);
+
+ // full 64x64 -> 64, from 32x32 -> 64.
+ gins(AIMULL, &cx, &dx);
+ gins(AMOVL, &ax, &fx);
+ gins(AIMULL, &ex, &fx);
+ gins(AADDL, &dx, &fx);
+ gins(AMOVL, &cx, &dx);
+ gins(AMULL, &dx, N); // implicit &ax
+ gins(AADDL, &fx, &dx);
patch(p2, pc);
- regfree(&r4);
- regfree(&r5);
+
+ regfree(&ex);
+ regfree(&fx);
break;
-
}
-
- tempfree(&adr2);
- tempfree(&adr1);
-
- // Store result.
- gins(AMOVL, &r1, res);
- res->xoffset += 4;
- gins(AMOVL, &r2, res);
- res->xoffset -= 4;
-
+ splitclean();
+ splitclean();
+
+ split64(res, &lo1, &hi1);
+ gins(AMOVL, &ax, &lo1);
+ gins(AMOVL, &dx, &hi1);
+ splitclean();
+
if(r == &t2)
tempfree(&t2);
if(l == &t1)
@@ -1068,47 +1106,23 @@ cancgen64(Node *n, Node *res)
void
cmp64(Node *nl, Node *nr, int op, Prog *to)
{
- int64 x;
- Node adr1, adr2, rr;
- Prog *br, *p;
+ Node lo1, hi1, lo2, hi2, rr;
+ Prog *br;
Type *t;
-
- t = nr->type;
-
- memset(&adr1, 0, sizeof adr1);
- memset(&adr2, 0, sizeof adr2);
-
- regalloc(&adr1, types[TPTR32], N);
- agen(nl, &adr1);
- adr1.op = OINDREG;
- nl = &adr1;
-
- x = 0;
- if(nr->op == OLITERAL) {
- if(!isconst(nr, CTINT))
- fatal("bad const in cmp64");
- x = mpgetfix(nr->val.u.xval);
- } else {
- regalloc(&adr2, types[TPTR32], N);
- agen(nr, &adr2);
- adr2.op = OINDREG;
- nr = &adr2;
- }
-
+
+ split64(nl, &lo1, &hi1);
+ split64(nr, &lo2, &hi2);
+
// compare most significant word
- nl->xoffset += 4;
- if(nr->op == OLITERAL) {
- p = gins(ACMPL, nl, nodintconst((uint32)(x>>32)));
- } else {
- regalloc(&rr, types[TUINT32], N);
- nr->xoffset += 4;
- gins(AMOVL, nr, &rr);
- gins(ACMPL, nl, &rr);
- nr->xoffset -= 4;
+ t = hi1.type;
+ if(nl->op == OLITERAL || nr->op == OLITERAL)
+ gins(ACMPL, &hi1, &hi2);
+ else {
+ regalloc(&rr, types[TINT32], N);
+ gins(AMOVL, &hi1, &rr);
+ gins(ACMPL, &rr, &hi2);
regfree(&rr);
}
- nl->xoffset -= 4;
-
br = P;
switch(op) {
default:
@@ -1149,39 +1163,28 @@ cmp64(Node *nl, Node *nr, int op, Prog *to)
// L:
patch(gbranch(optoas(OLT, t), T), to);
br = gbranch(optoas(OGT, t), T);
- break;
+ break;
}
// compare least significant word
- if(nr->op == OLITERAL) {
- p = gins(ACMPL, nl, nodintconst((uint32)x));
- } else {
- regalloc(&rr, types[TUINT32], N);
- gins(AMOVL, nr, &rr);
- gins(ACMPL, nl, &rr);
+ t = lo1.type;
+ if(nl->op == OLITERAL || nr->op == OLITERAL)
+ gins(ACMPL, &lo1, &lo2);
+ else {
+ regalloc(&rr, types[TINT32], N);
+ gins(AMOVL, &lo1, &rr);
+ gins(ACMPL, &rr, &lo2);
regfree(&rr);
}
// jump again
- switch(op) {
- default:
- fatal("cmp64 %O %T", op, nr->type);
- case OEQ:
- case ONE:
- case OGE:
- case OGT:
- case OLE:
- case OLT:
- patch(gbranch(optoas(op, t), T), to);
- break;
- }
+ patch(gbranch(optoas(op, t), T), to);
// point first branch down here if appropriate
if(br != P)
patch(br, pc);
- regfree(&adr1);
- if(nr == &adr2)
- regfree(&adr2);
+ splitclean();
+ splitclean();
}
diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h
index d7a9851f4..9943c2b60 100644
--- a/src/cmd/8g/gg.h
+++ b/src/cmd/8g/gg.h
@@ -43,6 +43,14 @@ struct Prog
void* reg; // pointer to containing Reg struct
};
+// foptoas flags
+enum
+{
+ Frev = 1<<0,
+ Fpop = 1<<1,
+ Fpop2 = 1<<2,
+};
+
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
@@ -114,6 +122,7 @@ Prog* gop(int, Node*, Node*, Node*);
void setconst(Addr*, vlong);
void setaddr(Addr*, Node*);
int optoas(int, Type*);
+int foptoas(int, Type*, int);
void ginit(void);
void gclean(void);
void regalloc(Node*, Type*, Node*);
@@ -131,7 +140,10 @@ Plist* newplist(void);
int isfat(Type*);
void sudoclean(void);
int sudoaddable(int, Node*, Addr*);
+int dotaddable(Node*, Node*);
void afunclit(Addr*);
+void split64(Node*, Node*, Node*);
+void splitclean(void);
/*
* list.c
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c
index 47f21bcb6..249c9fe8c 100644
--- a/src/cmd/8g/ggen.c
+++ b/src/cmd/8g/ggen.c
@@ -468,7 +468,7 @@ cgen_asop(Node *n)
hard:
if(nr->ullman > nl->ullman) {
- regalloc(&n2, nr->type, N);
+ tempalloc(&n2, nr->type);
cgen(nr, &n2);
igen(nl, &n1, N);
} else {
diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c
index 99c2b8af1..6e82890d3 100755
--- a/src/cmd/8g/gsubr.c
+++ b/src/cmd/8g/gsubr.c
@@ -571,6 +571,81 @@ optoas(int op, Type *t)
return a;
}
+#define FCASE(a, b, c) (((a)<<16)|((b)<<8)|(c))
+int
+foptoas(int op, Type *t, int flg)
+{
+ int et;
+
+ et = t->etype;
+
+ // clear Frev if unneeded
+ switch(op) {
+ case OADD:
+ case OMUL:
+ flg &= ~Frev;
+ break;
+ }
+
+ switch(FCASE(op, et, flg)) {
+ case FCASE(OADD, TFLOAT32, 0):
+ return AFADDF;
+ case FCASE(OADD, TFLOAT64, 0):
+ return AFADDD;
+ case FCASE(OADD, TFLOAT64, Fpop):
+ return AFADDDP;
+
+ case FCASE(OSUB, TFLOAT32, 0):
+ return AFSUBF;
+ case FCASE(OSUB, TFLOAT32, Frev):
+ return AFSUBRF;
+
+ case FCASE(OSUB, TFLOAT64, 0):
+ return AFSUBD;
+ case FCASE(OSUB, TFLOAT64, Frev):
+ return AFSUBRD;
+ case FCASE(OSUB, TFLOAT64, Fpop):
+ return AFSUBDP;
+ case FCASE(OSUB, TFLOAT64, Fpop|Frev):
+ return AFSUBRDP;
+
+ case FCASE(OMUL, TFLOAT32, 0):
+ return AFMULF;
+ case FCASE(OMUL, TFLOAT64, 0):
+ return AFMULD;
+ case FCASE(OMUL, TFLOAT64, Fpop):
+ return AFMULDP;
+
+ case FCASE(ODIV, TFLOAT32, 0):
+ return AFDIVF;
+ case FCASE(ODIV, TFLOAT32, Frev):
+ return AFDIVRF;
+
+ case FCASE(ODIV, TFLOAT64, 0):
+ return AFDIVD;
+ case FCASE(ODIV, TFLOAT64, Frev):
+ return AFDIVRD;
+ case FCASE(ODIV, TFLOAT64, Fpop):
+ return AFDIVDP;
+ case FCASE(ODIV, TFLOAT64, Fpop|Frev):
+ return AFDIVRDP;
+
+ case FCASE(OCMP, TFLOAT32, 0):
+ return AFCOMF;
+ case FCASE(OCMP, TFLOAT32, Fpop):
+ return AFCOMFP;
+ case FCASE(OCMP, TFLOAT64, 0):
+ return AFCOMD;
+ case FCASE(OCMP, TFLOAT64, Fpop):
+ return AFCOMDP;
+ case FCASE(OCMP, TFLOAT64, Fpop2):
+ return AFCOMDPP;
+ }
+
+ fatal("foptoas %O %T %#x", op, t, flg);
+ return 0;
+}
+
static int resvd[] =
{
// D_DI, // for movstring
@@ -600,6 +675,8 @@ ginit(void)
reg[resvd[i]]++;
}
+ulong regpc[D_NONE];
+
void
gclean(void)
{
@@ -610,14 +687,12 @@ gclean(void)
for(i=D_AX; i<=D_DI; i++)
if(reg[i])
- yyerror("reg %R left allocated\n", i);
+ yyerror("reg %R left allocated at %lux\n", i, regpc[i]);
for(i=D_F0; i<=D_F7; i++)
if(reg[i])
yyerror("reg %R left allocated\n", i);
}
-ulong regpc[D_NONE];
-
/*
* allocate register of type t, leave in n.
* if o != N, o is desired fixed register.
@@ -681,7 +756,7 @@ err:
out:
if(reg[i] == 0) {
- regpc[i] = getcallerpc(&n);
+ regpc[i] = (ulong)__builtin_return_address(0);
if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) {
dump("regalloc-o", o);
fatal("regalloc %R", i);
@@ -837,168 +912,445 @@ gconreg(int as, vlong c, int reg)
gins(as, &n1, &n2);
}
+
/*
- * generate move:
- * t = f
- * f may be in memory,
- * t is known to be a 32-bit register.
+ * Is this node a memory operand?
+ */
+int
+ismem(Node *n)
+{
+ switch(n->op) {
+ case OINDREG:
+ case ONAME:
+ case OPARAM:
+ return 1;
+ }
+ return 0;
+}
+
+Node sclean[10];
+int nsclean;
+
+/*
+ * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves.
*/
void
-gload(Node *f, Node *t)
+split64(Node *n, Node *lo, Node *hi)
{
- int a, ft;
+ Node n1;
+ int64 i;
- ft = simtype[f->type->etype];
+ if(!is64(n->type))
+ fatal("split64 %T", n->type);
- switch(ft) {
+ sclean[nsclean].op = OEMPTY;
+ if(nsclean >= nelem(sclean))
+ fatal("split64 clean");
+ nsclean++;
+ switch(n->op) {
default:
- fatal("gload %T", f->type);
- case TINT8:
- a = AMOVBLSX;
- if(isconst(f, CTINT) || isconst(f, CTBOOL))
- a = AMOVL;
- break;
- case TBOOL:
- case TUINT8:
- a = AMOVBLZX;
- if(isconst(f, CTINT) || isconst(f, CTBOOL))
- a = AMOVL;
- break;
- case TINT16:
- a = AMOVWLSX;
- if(isconst(f, CTINT) || isconst(f, CTBOOL))
- a = AMOVL;
- break;
- case TUINT16:
- a = AMOVWLZX;
- if(isconst(f, CTINT))
- a = AMOVL;
- break;
- case TINT32:
- case TUINT32:
- case TPTR32:
- a = AMOVL;
+ if(!dotaddable(n, &n1)) {
+ igen(n, &n1, N);
+ sclean[nsclean-1] = n1;
+ }
+ n = &n1;
+ // fall through
+ case ONAME:
+ case OINDREG:
+ *lo = *n;
+ *hi = *n;
+ lo->type = types[TUINT32];
+ if(n->type->etype == TINT64)
+ hi->type = types[TINT32];
+ else
+ hi->type = types[TUINT32];
+ hi->xoffset += 4;
break;
- case TINT64:
- case TUINT64:
- a = AMOVL; // truncating
+
+ case OLITERAL:
+ convconst(&n1, n->type, &n->val);
+ i = mpgetfix(n1.val.u.xval);
+ nodconst(lo, types[TUINT32], (uint32)i);
+ i >>= 32;
+ if(n->type->etype == TINT64)
+ nodconst(hi, types[TINT32], (int32)i);
+ else
+ nodconst(hi, types[TUINT32], (uint32)i);
break;
}
+}
- gins(a, f, t);
+void
+splitclean(void)
+{
+ if(nsclean <= 0)
+ fatal("splitclean");
+ nsclean--;
+ if(sclean[nsclean].op != OEMPTY)
+ regfree(&sclean[nsclean]);
}
-/*
- * generate move:
- * t = f
- * f is known to be a 32-bit register.
- * t may be in memory.
- */
void
-gstore(Node *f, Node *t)
+gmove(Node *f, Node *t)
{
int a, ft, tt;
- Node nod, adr;
+ Type *cvt;
+ Node r1, r2, flo, fhi, tlo, thi, con;
+
+ if(debug['M'])
+ print("gmove %N -> %N\n", f, t);
+
+ ft = simsimtype(f->type);
+ tt = simsimtype(t->type);
+ cvt = t->type;
+
+ // cannot have two memory operands;
+ // except 64-bit, which always copies via registers anyway.
+ if(ismem(f) && ismem(t) && !is64(f->type) && !is64(t->type))
+ goto hard;
+
+ // convert constant to desired type
+ if(f->op == OLITERAL) {
+ convconst(&con, t->type, &f->val);
+ f = &con;
+ ft = tt; // so big switch will choose a simple mov
+
+ // some constants can't move directly to memory.
+ if(ismem(t)) {
+ // float constants come from memory.
+ if(isfloat[tt])
+ goto hard;
+ }
+ }
- ft = simtype[f->type->etype];
- tt = simtype[t->type->etype];
+ // value -> value copy, only one memory operand.
+ // figure out the instruction to use.
+ // break out of switch for one-instruction gins.
+ // goto rdst for "destination must be register".
+ // goto hard for "convert to cvt type first".
+ // otherwise handle and return.
- switch(tt) {
+ switch(CASE(ft, tt)) {
default:
- fatal("gstore %T", t->type);
- case TINT8:
- case TBOOL:
- case TUINT8:
+ fatal("gmove %N -> %N", f, t);
+
+ /*
+ * integer copy and truncate
+ */
+ case CASE(TINT8, TINT8): // same size
+ case CASE(TINT8, TUINT8):
+ case CASE(TUINT8, TINT8):
+ case CASE(TUINT8, TUINT8):
+ case CASE(TINT16, TINT8): // truncate
+ case CASE(TUINT16, TINT8):
+ case CASE(TINT32, TINT8):
+ case CASE(TUINT32, TINT8):
+// case CASE(TINT64, TINT8):
+// case CASE(TUINT64, TINT8):
+ case CASE(TINT16, TUINT8):
+ case CASE(TUINT16, TUINT8):
+ case CASE(TINT32, TUINT8):
+ case CASE(TUINT32, TUINT8):
+// case CASE(TINT64, TUINT8):
+// case CASE(TUINT64, TUINT8):
a = AMOVB;
break;
- case TINT16:
- case TUINT16:
+
+ case CASE(TINT16, TINT16): // same size
+ case CASE(TINT16, TUINT16):
+ case CASE(TUINT16, TINT16):
+ case CASE(TUINT16, TUINT16):
+ case CASE(TINT32, TINT16): // truncate
+ case CASE(TUINT32, TINT16):
+// case CASE(TINT64, TINT16):
+// case CASE(TUINT64, TINT16):
+ case CASE(TINT32, TUINT16):
+ case CASE(TUINT32, TUINT16):
+// case CASE(TINT64, TUINT16):
+// case CASE(TUINT64, TUINT16):
a = AMOVW;
break;
- case TINT32:
- case TUINT32:
- case TPTR32:
+
+ case CASE(TINT32, TINT32): // same size
+ case CASE(TINT32, TUINT32):
+ case CASE(TUINT32, TINT32):
+ case CASE(TUINT32, TUINT32):
+// case CASE(TINT64, TINT32): // truncate
+// case CASE(TUINT64, TINT32):
+// case CASE(TINT64, TUINT32):
+// case CASE(TUINT64, TUINT32):
a = AMOVL;
break;
- case TINT64:
- case TUINT64:
- if(t->op == OREGISTER)
- fatal("gstore %T %O", t->type, t->op);
- memset(&adr, 0, sizeof adr);
- igen(t, &adr, N);
- t = &adr;
- t->xoffset += 4;
- switch(ft) {
- default:
- fatal("gstore %T -> %T", f, t);
- break;
- case TINT32:
- nodreg(&nod, types[TINT32], D_AX);
- gins(AMOVL, f, &nod);
- gins(ACDQ, N, N);
- nodreg(&nod, types[TINT32], D_DX);
- gins(AMOVL, &nod, t);
- break;
- case TUINT32:
- gins(AMOVL, nodintconst(0), t);
- break;
+
+ case CASE(TINT64, TINT64): // same size
+ case CASE(TINT64, TUINT64):
+ case CASE(TUINT64, TINT64):
+ case CASE(TUINT64, TUINT64):
+ split64(f, &flo, &fhi);
+ split64(t, &tlo, &thi);
+ if(f->op == OLITERAL) {
+ gins(AMOVL, &flo, &tlo);
+ gins(AMOVL, &fhi, &thi);
+ } else {
+ regalloc(&r1, types[TUINT32], N);
+ regalloc(&r2, types[TUINT32], N);
+ gins(AMOVL, &flo, &r1);
+ gins(AMOVL, &fhi, &r2);
+ gins(AMOVL, &r1, &tlo);
+ gins(AMOVL, &r2, &thi);
+ regfree(&r2);
+ regfree(&r1);
}
- t->xoffset -= 4;
- a = AMOVL;
- }
+ splitclean();
+ splitclean();
+ return;
- gins(a, f, t);
- if(t == &adr)
- regfree(&adr);
-}
+ /*
+ * integer up-conversions
+ */
+ case CASE(TINT8, TINT16): // sign extend int8
+ case CASE(TINT8, TUINT16):
+ a = AMOVBWSX;
+ goto rdst;
+ case CASE(TINT8, TINT32):
+ case CASE(TINT8, TUINT32):
+ a = AMOVBLSX;
+ goto rdst;
+// case CASE(TINT8, TINT64):
+// case CASE(TINT8, TUINT64):
+// a = AMOVBQSX;
+// goto rdst;
+
+ case CASE(TUINT8, TINT16): // zero extend uint8
+ case CASE(TUINT8, TUINT16):
+ a = AMOVBWZX;
+ goto rdst;
+ case CASE(TUINT8, TINT32):
+ case CASE(TUINT8, TUINT32):
+ a = AMOVBLZX;
+ goto rdst;
+// case CASE(TUINT8, TINT64):
+// case CASE(TUINT8, TUINT64):
+// a = AMOVBQZX;
+// goto rdst;
+
+ case CASE(TINT16, TINT32): // sign extend int16
+ case CASE(TINT16, TUINT32):
+ a = AMOVWLSX;
+ goto rdst;
+// case CASE(TINT16, TINT64):
+// case CASE(TINT16, TUINT64):
+// a = AMOVWQSX;
+// goto rdst;
+
+ case CASE(TUINT16, TINT32): // zero extend uint16
+ case CASE(TUINT16, TUINT32):
+ a = AMOVWLZX;
+ goto rdst;
+// case CASE(TUINT16, TINT64):
+// case CASE(TUINT16, TUINT64):
+// a = AMOVWQZX;
+// goto rdst;
+
+ case CASE(TINT32, TINT64): // sign extend int32
+ case CASE(TINT32, TUINT64):
+ split64(t, &tlo, &thi);
+ nodreg(&flo, tlo.type, D_AX);
+ nodreg(&fhi, thi.type, D_DX);
+ gmove(f, &flo);
+ gins(ACDQ, N, N);
+ gins(AMOVL, &flo, &tlo);
+ gins(AMOVL, &fhi, &thi);
+ return;
-void
-gmove(Node *f, Node *t)
-{
- int ft, tt, t64, a;
- Node nod;
+// case CASE(TUINT32, TINT64): // zero extend uint32
+// case CASE(TUINT32, TUINT64):
+// // AMOVL into a register zeros the top of the register,
+// // so this is not always necessary, but if we rely on AMOVL
+// // the optimizer is almost certain to screw with us.
+// a = AMOVLQZX;
+// goto rdst;
- ft = simtype[f->type->etype];
- tt = simtype[t->type->etype];
+ /*
+ * float to integer
+ *
+ case CASE(TFLOAT32, TINT16):
+ case CASE(TFLOAT32, TINT32):
+ case CASE(TFLOAT32, TINT64):
+ case CASE(TFLOAT64, TINT16):
+ case CASE(TFLOAT64, TINT32):
+ case CASE(TFLOAT64, TINT64):
+ if(ft == TFLOAT32)
+ gins(AFMOVF, f, &f0);
+ else
+ gins(AFMOVD, f, &f0);
+ if(tt == TINT16)
+ gins(AFMOVWP, &f0, t);
+ else if(tt == TINT32)
+ gins(AFMOVLP, &f0, t);
+ else
+ gins(AFMOVVP, &f0, t);
+ return;
- a = AGOK;
+ case CASE(TFLOAT32, TINT8):
+ case CASE(TFLOAT32, TUINT16):
+ case CASE(TFLOAT32, TUINT8):
+ case CASE(TFLOAT64, TINT8):
+ case CASE(TFLOAT64, TUINT16):
+ case CASE(TFLOAT64, TUINT8):
+ // convert via int32.
+ cvt = types[TINT32];
+ goto hard;
+
+ case CASE(TFLOAT32, TUINT32):
+ case CASE(TFLOAT64, TUINT32):
+ // could potentially convert via int64.
+ cvt = types[TINT64];
+ goto hard;
+
+ case CASE(TFLOAT32, TUINT64):
+ case CASE(TFLOAT64, TUINT64):
+ if(ft == TFLOAT32)
+ gins(AFMOVF, f, &f0);
+ else
+ gins(AFMOVD, f, &f0);
+ // algorithm is:
+ // if small enough, use native float64 -> int64 conversion.
+ // otherwise, subtract 2^63, convert, and add it back.
+ bignodes();
+ regalloc(&r1, types[ft], N);
+ regalloc(&r2, types[ft], N);
+ gins(optoas(OCMP, f->type), &bigf, &r1);
+ p1 = gbranch(optoas(OLE, f->type), T);
+ gins(a, &r1, &r2);
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+ gins(optoas(OAS, f->type), &bigf, &r3);
+ gins(optoas(OSUB, f->type), &r3, &r1);
+ gins(a, &r1, &r2);
+ gins(AMOVQ, &bigi, &r4);
+ gins(AXORQ, &r4, &r2);
+ patch(p2, pc);
+ gmove(&r2, t);
+ regfree(&r4);
+ regfree(&r3);
+ regfree(&r2);
+ regfree(&r1);
+ fatal("lazy");
+ return;
+ */
+ /*
+ * integer to float
+ *
+ case CASE(TINT32, TFLOAT32):
+ a = ACVTSL2SS;
+ goto rdst;
+
+
+ case CASE(TINT32, TFLOAT64):
+ a = ACVTSL2SD;
+ goto rdst;
+
+ case CASE(TINT64, TFLOAT32):
+ a = ACVTSQ2SS;
+ goto rdst;
+
+ case CASE(TINT64, TFLOAT64):
+ a = ACVTSQ2SD;
+ goto rdst;
+
+ case CASE(TINT16, TFLOAT32):
+ case CASE(TINT16, TFLOAT64):
+ case CASE(TINT8, TFLOAT32):
+ case CASE(TINT8, TFLOAT64):
+ case CASE(TUINT16, TFLOAT32):
+ case CASE(TUINT16, TFLOAT64):
+ case CASE(TUINT8, TFLOAT32):
+ case CASE(TUINT8, TFLOAT64):
+ // convert via int32
+ cvt = types[TINT32];
+ goto hard;
+
+ case CASE(TUINT32, TFLOAT32):
+ case CASE(TUINT32, TFLOAT64):
+ // convert via int64.
+ cvt = types[TINT64];
+ goto hard;
+
+ case CASE(TUINT64, TFLOAT32):
+ case CASE(TUINT64, TFLOAT64):
+ // algorithm is:
+ // if small enough, use native int64 -> uint64 conversion.
+ // otherwise, halve (rounding to odd?), convert, and double.
+ a = ACVTSQ2SS;
+ if(tt == TFLOAT64)
+ a = ACVTSQ2SD;
+ nodconst(&zero, types[TUINT64], 0);
+ nodconst(&one, types[TUINT64], 1);
+ regalloc(&r1, f->type, f);
+ regalloc(&r2, t->type, t);
+ regalloc(&r3, f->type, N);
+ regalloc(&r4, f->type, N);
+ gmove(f, &r1);
+ gins(ACMPQ, &r1, &zero);
+ p1 = gbranch(AJLT, T);
+ gins(a, &r1, &r2);
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+ gmove(&r1, &r3);
+ gins(ASHRQ, &one, &r3);
+ gmove(&r1, &r4);
+ gins(AANDL, &one, &r4);
+ gins(AORQ, &r4, &r3);
+ gins(a, &r3, &r2);
+ gins(optoas(OADD, t->type), &r2, &r2);
+ patch(p2, pc);
+ gmove(&r2, t);
+ regfree(&r4);
+ regfree(&r3);
+ regfree(&r2);
+ regfree(&r1);
+ return;
+ */
+ /*
+ * float to float
+ *
+ case CASE(TFLOAT32, TFLOAT32):
+ a = AMOVSS;
+ break;
- t64 = 0;
- if(tt == TINT64 || tt == TUINT64 || tt == TPTR64)
- t64 = 1;
+ case CASE(TFLOAT64, TFLOAT64):
+ a = AMOVSD;
+ break;
- if(debug['M'])
- print("gop: %O %O[%E],%O[%E]\n", OAS,
- f->op, ft, t->op, tt);
- if(isfloat[ft] && f->op == OCONST) {
- fatal("fp");
- /* TO DO: pick up special constants, possibly preloaded */
- //F
- /*
- if(mpgetflt(f->val.u.fval) == 0.0) {
- regalloc(&nod, t->type, t);
- gins(AXORPD, &nod, &nod);
- gmove(&nod, t);
- regfree(&nod);
- return;
- }
+ case CASE(TFLOAT32, TFLOAT64):
+ a = ACVTSS2SD;
+ goto rdst;
+
+ case CASE(TFLOAT64, TFLOAT32):
+ a = ACVTSD2SS;
+ goto rdst;
*/
}
- if(is64(types[ft]) && isconst(f, CTINT)) {
- f->type = types[TINT32]; // XXX check constant value, choose correct type
- ft = TINT32;
- }
+ gins(a, f, t);
+ return;
- if(is64(types[ft]) && is64(types[tt])) {
- sgen(f, t, 8);
- return;
- }
+rdst:
+ // requires register destination
+ regalloc(&r1, t->type, t);
+ gins(a, f, &r1);
+ gmove(&r1, t);
+ regfree(&r1);
+ return;
- regalloc(&nod, types[TINT32], t);
- gload(f, &nod);
- gstore(&nod, t);
- regfree(&nod);
+hard:
+ // requires register intermediate
+ regalloc(&r1, cvt, t);
+ gmove(f, &r1);
+ gmove(&r1, t);
+ regfree(&r1);
+ return;
}
int
@@ -1025,9 +1377,13 @@ gins(int as, Node *f, Node *t)
{
Prog *p;
- // generating AMOVL BX, BX is just dumb.
- if(f != N && t != N && samaddr(f, t) && as == AMOVL)
- return nil;
+ switch(as) {
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ if(f != N && t != N && samaddr(f, t))
+ return nil;
+ }
p = prog(as);
if(f != N)
@@ -1173,6 +1529,25 @@ naddr(Node *n, Addr *a)
}
}
+int
+dotaddable(Node *n, Node *n1)
+{
+ int o, oary[10];
+ Node *nn;
+
+ if(n->op != ODOT)
+ return 0;
+
+ o = dotoffset(n, oary, &nn);
+ if(nn != N && nn->addable && o == 1 && oary[0] >= 0) {
+ *n1 = *nn;
+ n1->type = n->type;
+ n1->xoffset += oary[0];
+ return 1;
+ }
+ return 0;
+}
+
void
sudoclean(void)
{
diff --git a/src/cmd/gc/const.c b/src/cmd/gc/const.c
index 72cf684ad..d672ec9f0 100644
--- a/src/cmd/gc/const.c
+++ b/src/cmd/gc/const.c
@@ -857,7 +857,7 @@ convconst(Node *con, Type *t, Val *val)
con->val.u.xval = mal(sizeof *con->val.u.xval);
switch(val->ctype) {
default:
- fatal("convconst ctype=%d %lT", val->ctype, t->type);
+ fatal("convconst ctype=%d %lT", val->ctype, t);
case CTINT:
i = mpgetfix(val->u.xval);
break;