summaryrefslogtreecommitdiff
path: root/src/cmd/8g/ggen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/8g/ggen.c')
-rw-r--r--src/cmd/8g/ggen.c841
1 files changed, 407 insertions, 434 deletions
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c
index 6a4570199..70148106c 100644
--- a/src/cmd/8g/ggen.c
+++ b/src/cmd/8g/ggen.c
@@ -27,6 +27,9 @@ void
markautoused(Prog* p)
{
for (; p; p = p->link) {
+ if (p->as == ATYPE)
+ continue;
+
if (p->from.type == D_AUTO && p->from.node)
p->from.node->used = 1;
@@ -39,12 +42,21 @@ markautoused(Prog* p)
void
fixautoused(Prog* p)
{
- for (; p; p = p->link) {
+ Prog **lp;
+
+ for (lp=&p; (p=*lp) != P; ) {
+ if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
+ *lp = p->link;
+ continue;
+ }
+
if (p->from.type == D_AUTO && p->from.node)
p->from.offset += p->from.node->stkdelta;
if (p->to.type == D_AUTO && p->to.node)
p->to.offset += p->to.node->stkdelta;
+
+ lp = &p->link;
}
}
@@ -59,6 +71,10 @@ clearfat(Node *nl)
dump("\nclearfat", nl);
w = nl->type->width;
+ // Avoid taking the address for simple enough types.
+ if(componentgen(N, nl))
+ return;
+
c = w % 4; // bytes
q = w / 4; // quads
@@ -90,15 +106,17 @@ clearfat(Node *nl)
/*
* generate:
* call f
+ * proc=-1 normal call but no return
* proc=0 normal call
* proc=1 goroutine run in new proc
* proc=2 defer call save away stack
+ * proc=3 normal call to C pointer (not Go func value)
*/
void
ginscall(Node *f, int proc)
{
Prog *p;
- Node reg, con;
+ Node reg, r1, con;
switch(proc) {
default:
@@ -106,8 +124,25 @@ ginscall(Node *f, int proc)
break;
case 0: // normal call
- p = gins(ACALL, N, f);
- afunclit(&p->to);
+ case -1: // normal call but no return
+ if(f->op == ONAME && f->class == PFUNC) {
+ p = gins(ACALL, N, f);
+ afunclit(&p->to, f);
+ if(proc == -1 || noreturn(p))
+ gins(AUNDEF, N, N);
+ break;
+ }
+ nodreg(&reg, types[tptr], D_DX);
+ nodreg(&r1, types[tptr], D_BX);
+ gmove(f, &reg);
+ reg.op = OINDREG;
+ gmove(&reg, &r1);
+ reg.op = OREGISTER;
+ gins(ACALL, &reg, &r1);
+ break;
+
+ case 3: // normal call of c function pointer
+ gins(ACALL, N, f);
break;
case 1: // call in new proc (go)
@@ -125,7 +160,7 @@ ginscall(Node *f, int proc)
if(proc == 2) {
nodreg(&reg, types[TINT64], D_AX);
gins(ATESTL, &reg, &reg);
- patch(gbranch(AJNE, T), retpc);
+ patch(gbranch(AJNE, T, -1), retpc);
}
break;
}
@@ -139,7 +174,7 @@ void
cgen_callinter(Node *n, Node *res, int proc)
{
Node *i, *f;
- Node tmpi, nodo, nodr, nodsp;
+ Node tmpi, nodi, nodo, nodr, nodsp;
i = n->left;
if(i->op != ODOTINTER)
@@ -159,25 +194,35 @@ cgen_callinter(Node *n, Node *res, int proc)
genlist(n->list); // assign the args
- // Can regalloc now; i is known to be addable,
- // so the agen will be easy.
- regalloc(&nodr, types[tptr], res);
- regalloc(&nodo, types[tptr], &nodr);
- nodo.op = OINDREG;
-
- agen(i, &nodr); // REG = &inter
+ // i is now addable, prepare an indirected
+ // register to hold its address.
+ igen(i, &nodi, res); // REG = &inter
nodindreg(&nodsp, types[tptr], D_SP);
- nodo.xoffset += widthptr;
- cgen(&nodo, &nodsp); // 0(SP) = 4(REG) -- i.data
+ nodi.type = types[tptr];
+ nodi.xoffset += widthptr;
+ cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data
- nodo.xoffset -= widthptr;
- cgen(&nodo, &nodr); // REG = 0(REG) -- i.tab
+ regalloc(&nodo, types[tptr], res);
+ nodi.type = types[tptr];
+ nodi.xoffset -= widthptr;
+ cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab
+ regfree(&nodi);
+ regalloc(&nodr, types[tptr], &nodo);
if(n->left->xoffset == BADWIDTH)
fatal("cgen_callinter: badwidth");
+ nodo.op = OINDREG;
nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
- cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f]
+
+ if(proc == 0) {
+ // plain call: use direct c function pointer - more efficient
+ cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f]
+ proc = 3;
+ } else {
+ // go/defer. generate go func value.
+ gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f]
+ }
// BOTCH nodr.type = fntype;
nodr.type = n->left->type;
@@ -486,7 +531,7 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
int check;
Node n1, t1, t2, t3, t4, n4, nz;
Type *t, *t0;
- Prog *p1, *p2, *p3;
+ Prog *p1, *p2;
// Have to be careful about handling
// most negative int divided by -1 correctly.
@@ -535,23 +580,22 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
regalloc(&n1, t, N);
gmove(&t2, &n1);
gmove(&t1, ax);
- p3 = P;
+ p2 = P;
if(check) {
nodconst(&n4, t, -1);
gins(optoas(OCMP, t), &n1, &n4);
- p1 = gbranch(optoas(ONE, t), T);
- nodconst(&n4, t, -1LL<<(t->width*8-1));
- gins(optoas(OCMP, t), ax, &n4);
- p2 = gbranch(optoas(ONE, t), T);
- if(op == ODIV)
- gmove(&n4, res);
- if(op == OMOD) {
+ p1 = gbranch(optoas(ONE, t), T, +1);
+ if(op == ODIV) {
+ // a / (-1) is -a.
+ gins(optoas(OMINUS, t), N, ax);
+ gmove(ax, res);
+ } else {
+ // a % (-1) is 0.
nodconst(&n4, t, 0);
gmove(&n4, res);
}
- p3 = gbranch(AJMP, T);
+ p2 = gbranch(AJMP, T, 0);
patch(p1, pc);
- patch(p2, pc);
}
if(!issigned[t->etype]) {
nodconst(&nz, t, 0);
@@ -566,7 +610,7 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
else
gmove(dx, res);
if(check)
- patch(p3, pc);
+ patch(p2, pc);
}
static void
@@ -630,7 +674,7 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
* res = nl >> nr
*/
void
-cgen_shift(int op, Node *nl, Node *nr, Node *res)
+cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
{
Node n1, n2, nt, cx, oldcx, hi, lo;
int a, w;
@@ -651,7 +695,7 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)
gmove(&n2, &n1);
sc = mpgetfix(nr->val.u.xval);
if(sc >= nl->type->width*8) {
- // large shift gets 2 shifts by width
+ // large shift gets 2 shifts by width-1
gins(a, ncon(w-1), &n1);
gins(a, ncon(w-1), &n1);
} else
@@ -689,27 +733,39 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)
}
// test and fix up large shifts
- if(nr->type->width > 4) {
- // delayed reg alloc
- nodreg(&n1, types[TUINT32], D_CX);
- regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
- split64(&nt, &lo, &hi);
- gmove(&lo, &n1);
- gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
- p2 = gbranch(optoas(ONE, types[TUINT32]), T);
- gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
- p1 = gbranch(optoas(OLT, types[TUINT32]), T);
- patch(p2, pc);
- } else {
- gins(optoas(OCMP, nr->type), &n1, ncon(w));
- p1 = gbranch(optoas(OLT, types[TUINT32]), T);
- }
- if(op == ORSH && issigned[nl->type->etype]) {
- gins(a, ncon(w-1), &n2);
+ if(bounded) {
+ if(nr->type->width > 4) {
+ // delayed reg alloc
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
+ split64(&nt, &lo, &hi);
+ gmove(&lo, &n1);
+ splitclean();
+ }
} else {
- gmove(ncon(0), &n2);
+ if(nr->type->width > 4) {
+ // delayed reg alloc
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
+ split64(&nt, &lo, &hi);
+ gmove(&lo, &n1);
+ gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
+ p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1);
+ gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
+ splitclean();
+ patch(p2, pc);
+ } else {
+ gins(optoas(OCMP, nr->type), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1);
+ }
+ if(op == ORSH && issigned[nl->type->etype]) {
+ gins(a, ncon(w-1), &n2);
+ } else {
+ gmove(ncon(0), &n2);
+ }
+ patch(p1, pc);
}
- patch(p1, pc);
gins(a, &n1, &n2);
if(oldcx.op != 0)
@@ -724,444 +780,361 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res)
/*
* generate byte multiply:
* res = nl * nr
- * no byte multiply instruction so have to do
- * 16-bit multiply and take bottom half.
+ * there is no 2-operand byte multiply instruction so
+ * we do a full-width multiplication and truncate afterwards.
*/
void
cgen_bmul(int op, Node *nl, Node *nr, Node *res)
{
- Node n1b, n2b, n1w, n2w;
+ Node n1, n2, nt, *tmp;
Type *t;
int a;
- if(nl->ullman >= nr->ullman) {
- regalloc(&n1b, nl->type, res);
- cgen(nl, &n1b);
- regalloc(&n2b, nr->type, N);
- cgen(nr, &n2b);
- } else {
- regalloc(&n2b, nr->type, N);
- cgen(nr, &n2b);
- regalloc(&n1b, nl->type, res);
- cgen(nl, &n1b);
- }
-
- // copy from byte to short registers
- t = types[TUINT16];
+ // copy from byte to full registers
+ t = types[TUINT32];
if(issigned[nl->type->etype])
- t = types[TINT16];
-
- regalloc(&n2w, t, &n2b);
- cgen(&n2b, &n2w);
+ t = types[TINT32];
- regalloc(&n1w, t, &n1b);
- cgen(&n1b, &n1w);
+ // largest ullman on left.
+ if(nl->ullman < nr->ullman) {
+ tmp = nl;
+ nl = nr;
+ nr = tmp;
+ }
+ tempname(&nt, nl->type);
+ cgen(nl, &nt);
+ regalloc(&n1, t, res);
+ cgen(nr, &n1);
+ regalloc(&n2, t, N);
+ gmove(&nt, &n2);
a = optoas(op, t);
- gins(a, &n2w, &n1w);
- cgen(&n1w, &n1b);
- cgen(&n1b, res);
-
- regfree(&n1w);
- regfree(&n2w);
- regfree(&n1b);
- regfree(&n2b);
+ gins(a, &n2, &n1);
+ regfree(&n2);
+ gmove(&n1, res);
+ regfree(&n1);
}
-static int
-regcmp(const void *va, const void *vb)
+/*
+ * generate high multiply:
+ * res = (nl*nr) >> width
+ */
+void
+cgen_hmul(Node *nl, Node *nr, Node *res)
{
- Node *ra, *rb;
-
- ra = (Node*)va;
- rb = (Node*)vb;
- return ra->local - rb->local;
-}
+ Type *t;
+ int a;
+ Node n1, n2, ax, dx;
-static Prog* throwpc;
+ t = nl->type;
+ a = optoas(OHMUL, t);
+ // gen nl in n1.
+ tempname(&n1, t);
+ cgen(nl, &n1);
+ // gen nr in n2.
+ regalloc(&n2, t, res);
+ cgen(nr, &n2);
+
+ // multiply.
+ nodreg(&ax, t, D_AX);
+ gmove(&n2, &ax);
+ gins(a, &n1, N);
+ regfree(&n2);
-// We're only going to bother inlining if we can
-// convert all the arguments to 32 bits safely. Can we?
-static int
-fix64(NodeList *nn, int n)
-{
- NodeList *l;
- Node *r;
- int i;
-
- l = nn;
- for(i=0; i<n; i++) {
- r = l->n->right;
- if(is64(r->type) && !smallintconst(r)) {
- if(r->op == OCONV)
- r = r->left;
- if(is64(r->type))
- return 0;
- }
- l = l->next;
+ if(t->width == 1) {
+ // byte multiply behaves differently.
+ nodreg(&ax, t, D_AH);
+ nodreg(&dx, t, D_DL);
+ gmove(&ax, &dx);
}
- return 1;
+ nodreg(&dx, t, D_DX);
+ gmove(&dx, res);
}
+static void cgen_float387(Node *n, Node *res);
+static void cgen_floatsse(Node *n, Node *res);
+
+/*
+ * generate floating-point operation.
+ */
void
-getargs(NodeList *nn, Node *reg, int n)
+cgen_float(Node *n, Node *res)
{
- NodeList *l;
- Node *r;
- int i;
-
- throwpc = nil;
-
- l = nn;
- for(i=0; i<n; i++) {
- r = l->n->right;
- if(is64(r->type)) {
- if(r->op == OCONV)
- r = r->left;
- else if(smallintconst(r))
- r->type = types[TUINT32];
- if(is64(r->type))
- fatal("getargs");
+ Node *nl;
+ Node n1, n2;
+ Prog *p1, *p2, *p3;
+
+ nl = n->left;
+ switch(n->op) {
+ case OEQ:
+ case ONE:
+ case OLT:
+ case OLE:
+ case OGE:
+ p1 = gbranch(AJMP, T, 0);
+ p2 = pc;
+ gmove(nodbool(1), res);
+ p3 = gbranch(AJMP, T, 0);
+ patch(p1, pc);
+ bgen(n, 1, 0, p2);
+ gmove(nodbool(0), res);
+ patch(p3, pc);
+ return;
+
+ case OPLUS:
+ cgen(nl, res);
+ return;
+
+ case OCONV:
+ if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
+ cgen(nl, res);
+ return;
}
- if(!smallintconst(r) && !isslice(r->type)) {
- if(i < 3) // AX CX DX
- nodreg(reg+i, r->type, D_AX+i);
- else
- reg[i].op = OXXX;
- regalloc(reg+i, r->type, reg+i);
- cgen(r, reg+i);
- } else
- reg[i] = *r;
- if(reg[i].local != 0)
- yyerror("local used");
- reg[i].local = l->n->left->xoffset;
- l = l->next;
+
+ tempname(&n2, n->type);
+ mgen(nl, &n1, res);
+ gmove(&n1, &n2);
+ gmove(&n2, res);
+ mfree(&n1);
+ return;
}
- qsort((void*)reg, n, sizeof(*reg), regcmp);
- for(i=0; i<n; i++)
- reg[i].local = 0;
+
+ if(use_sse)
+ cgen_floatsse(n, res);
+ else
+ cgen_float387(n, res);
}
-void
-cmpandthrow(Node *nl, Node *nr)
+// floating-point. 387 (not SSE2)
+static void
+cgen_float387(Node *n, Node *res)
{
- vlong cl;
- Prog *p1;
- int op;
- Node *c, n1;
- Type *t;
+ Node f0, f1;
+ Node *nl, *nr;
- op = OLE;
- if(smallintconst(nl)) {
- cl = mpgetfix(nl->val.u.xval);
- if(cl == 0)
- return;
- if(smallintconst(nr))
- return;
- // put the constant on the right
- op = brrev(op);
- c = nl;
- nl = nr;
- nr = c;
- }
-
- // Arguments are known not to be 64-bit,
- // but they might be smaller than 32 bits.
- // Check if we need to use a temporary.
- // At least one of the arguments is 32 bits
- // (the len or cap) so one temporary suffices.
- n1.op = OXXX;
- t = types[TUINT32];
- if(nl->type->width != t->width) {
- regalloc(&n1, t, nl);
- gmove(nl, &n1);
- nl = &n1;
- } else if(nr->type->width != t->width) {
- regalloc(&n1, t, nr);
- gmove(nr, &n1);
- nr = &n1;
- }
- gins(optoas(OCMP, t), nl, nr);
- if(n1.op != OXXX)
- regfree(&n1);
- if(throwpc == nil) {
- p1 = gbranch(optoas(op, t), T);
- throwpc = pc;
- ginscall(panicslice, 0);
- patch(p1, pc);
+ nl = n->left;
+ nr = n->right;
+ nodreg(&f0, nl->type, D_F0);
+ nodreg(&f1, n->type, D_F0+1);
+ if(nr != N)
+ goto flt2;
+
+ // unary
+ cgen(nl, &f0);
+ if(n->op != OCONV && n->op != OPLUS)
+ gins(foptoas(n->op, n->type, 0), N, N);
+ gmove(&f0, res);
+ return;
+
+flt2: // binary
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &f0);
+ if(nr->addable)
+ gins(foptoas(n->op, n->type, 0), nr, &f0);
+ else {
+ cgen(nr, &f0);
+ gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
+ }
} else {
- op = brcom(op);
- p1 = gbranch(optoas(op, t), T);
- patch(p1, throwpc);
+ cgen(nr, &f0);
+ if(nl->addable)
+ gins(foptoas(n->op, n->type, Frev), nl, &f0);
+ else {
+ cgen(nl, &f0);
+ gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
+ }
}
-}
+ gmove(&f0, res);
+ return;
-int
-sleasy(Node *n)
-{
- if(n->op != ONAME)
- return 0;
- if(!n->addable)
- return 0;
- return 1;
}
-// generate inline code for
-// slicearray
-// sliceslice
-// arraytoslice
-int
-cgen_inline(Node *n, Node *res)
+static void
+cgen_floatsse(Node *n, Node *res)
{
- Node nodes[5];
- Node n1, n2, nres, ntemp;
- vlong v;
- int i, narg, nochk;
-
- if(n->op != OCALLFUNC)
- goto no;
- if(!n->left->addable)
- goto no;
- if(n->left->sym == S)
- goto no;
- if(n->left->sym->pkg != runtimepkg)
- goto no;
- if(strcmp(n->left->sym->name, "slicearray") == 0)
- goto slicearray;
- if(strcmp(n->left->sym->name, "sliceslice") == 0) {
- narg = 4;
- goto sliceslice;
- }
- if(strcmp(n->left->sym->name, "sliceslice1") == 0) {
- narg = 3;
- goto sliceslice;
- }
- goto no;
-
-slicearray:
- if(!sleasy(res))
- goto no;
- if(!fix64(n->list, 5))
- goto no;
- getargs(n->list, nodes, 5);
-
- // if(hb[3] > nel[1]) goto throw
- cmpandthrow(&nodes[3], &nodes[1]);
-
- // if(lb[2] > hb[3]) goto throw
- cmpandthrow(&nodes[2], &nodes[3]);
-
- // len = hb[3] - lb[2] (destroys hb)
- n2 = *res;
- n2.xoffset += Array_nel;
- n2.type = types[TUINT32];
-
- if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) {
- v = mpgetfix(nodes[3].val.u.xval) -
- mpgetfix(nodes[2].val.u.xval);
- nodconst(&n1, types[TUINT32], v);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- } else {
- regalloc(&n1, types[TUINT32], &nodes[3]);
- gmove(&nodes[3], &n1);
- if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
- gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- regfree(&n1);
- }
+ Node *nl, *nr, *r;
+ Node n1, n2, nt;
+ int a;
- // cap = nel[1] - lb[2] (destroys nel)
- n2 = *res;
- n2.xoffset += Array_cap;
- n2.type = types[TUINT32];
+ nl = n->left;
+ nr = n->right;
+ switch(n->op) {
+ default:
+ dump("cgen_floatsse", n);
+ fatal("cgen_floatsse %O", n->op);
+ return;
- if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) {
- v = mpgetfix(nodes[1].val.u.xval) -
- mpgetfix(nodes[2].val.u.xval);
- nodconst(&n1, types[TUINT32], v);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- } else {
- regalloc(&n1, types[TUINT32], &nodes[1]);
- gmove(&nodes[1], &n1);
- if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
- gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- regfree(&n1);
+ case OMINUS:
+ case OCOM:
+ nr = nodintconst(-1);
+ convlit(&nr, n->type);
+ a = foptoas(OMUL, nl->type, 0);
+ goto sbop;
+
+ // symmetric binary
+ case OADD:
+ case OMUL:
+ a = foptoas(n->op, nl->type, 0);
+ goto sbop;
+
+ // asymmetric binary
+ case OSUB:
+ case OMOD:
+ case ODIV:
+ a = foptoas(n->op, nl->type, 0);
+ goto abop;
}
- // if slice could be too big, dereference to
- // catch nil array pointer.
- if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) {
- n2 = nodes[0];
- n2.xoffset = 0;
- n2.op = OINDREG;
- n2.type = types[TUINT8];
- gins(ATESTB, nodintconst(0), &n2);
+sbop: // symmetric binary
+ if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
+ r = nl;
+ nl = nr;
+ nr = r;
}
- // ary = old[0] + (lb[2] * width[4]) (destroys old)
- n2 = *res;
- n2.xoffset += Array_array;
- n2.type = types[tptr];
-
- if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) {
- v = mpgetfix(nodes[2].val.u.xval) *
- mpgetfix(nodes[4].val.u.xval);
- if(v != 0) {
- nodconst(&n1, types[tptr], v);
- gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
- }
+abop: // asymmetric binary
+ if(nl->ullman >= nr->ullman) {
+ tempname(&nt, nl->type);
+ cgen(nl, &nt);
+ mgen(nr, &n2, N);
+ regalloc(&n1, nl->type, res);
+ gmove(&nt, &n1);
+ gins(a, &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ mfree(&n2);
} else {
- regalloc(&n1, types[tptr], &nodes[2]);
- gmove(&nodes[2], &n1);
- if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1)
- gins(optoas(OMUL, types[tptr]), &nodes[4], &n1);
- gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
+ regalloc(&n2, nr->type, res);
+ cgen(nr, &n2);
+ regalloc(&n1, nl->type, N);
+ cgen(nl, &n1);
+ gins(a, &n2, &n1);
+ regfree(&n2);
+ gmove(&n1, res);
regfree(&n1);
}
- gins(optoas(OAS, types[tptr]), &nodes[0], &n2);
-
- for(i=0; i<5; i++) {
- if(nodes[i].op == OREGISTER)
- regfree(&nodes[i]);
- }
- return 1;
+ return;
+}
-sliceslice:
- if(!fix64(n->list, narg))
- goto no;
- nochk = n->etype; // skip bounds checking
- ntemp.op = OXXX;
- if(!sleasy(n->list->n->right)) {
- Node *n0;
-
- n0 = n->list->n->right;
- tempname(&ntemp, res->type);
- cgen(n0, &ntemp);
- n->list->n->right = &ntemp;
- getargs(n->list, nodes, narg);
- n->list->n->right = n0;
- } else
- getargs(n->list, nodes, narg);
+void
+bgen_float(Node *n, int true, int likely, Prog *to)
+{
+ int et, a;
+ Node *nl, *nr, *r;
+ Node n1, n2, n3, tmp, t1, t2, ax;
+ Prog *p1, *p2;
- nres = *res; // result
- if(!sleasy(res)) {
- if(ntemp.op == OXXX)
- tempname(&ntemp, res->type);
- nres = ntemp;
+ nl = n->left;
+ nr = n->right;
+ a = n->op;
+ if(!true) {
+ // brcom is not valid on floats when NaN is involved.
+ p1 = gbranch(AJMP, T, 0);
+ p2 = gbranch(AJMP, T, 0);
+ patch(p1, pc);
+ // No need to avoid re-genning ninit.
+ bgen_float(n, 1, -likely, p2);
+ patch(gbranch(AJMP, T, 0), to);
+ patch(p2, pc);
+ return;
}
- if(narg == 3) { // old[lb:]
- // move width to where it would be for old[lb:hb]
- nodes[3] = nodes[2];
- nodes[2].op = OXXX;
-
- // if(lb[1] > old.nel[0]) goto throw;
- n2 = nodes[0];
- n2.xoffset += Array_nel;
- n2.type = types[TUINT32];
- if(!nochk)
- cmpandthrow(&nodes[1], &n2);
-
- // ret.nel = old.nel[0]-lb[1];
- n2 = nodes[0];
- n2.xoffset += Array_nel;
- n2.type = types[TUINT32];
-
- regalloc(&n1, types[TUINT32], N);
- gins(optoas(OAS, types[TUINT32]), &n2, &n1);
- if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
- gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
-
- n2 = nres;
- n2.xoffset += Array_nel;
- n2.type = types[TUINT32];
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- regfree(&n1);
- } else { // old[lb:hb]
- n2 = nodes[0];
- n2.xoffset += Array_cap;
- n2.type = types[TUINT32];
- if (!nochk) {
- // if(hb[2] > old.cap[0]) goto throw;
- cmpandthrow(&nodes[2], &n2);
- // if(lb[1] > hb[2]) goto throw;
- cmpandthrow(&nodes[1], &nodes[2]);
- }
-
- // ret.len = hb[2]-lb[1]; (destroys hb[2])
- n2 = nres;
- n2.xoffset += Array_nel;
- n2.type = types[TUINT32];
-
- if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) {
- v = mpgetfix(nodes[2].val.u.xval) -
- mpgetfix(nodes[1].val.u.xval);
- nodconst(&n1, types[TUINT32], v);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- } else {
- regalloc(&n1, types[TUINT32], &nodes[2]);
- gmove(&nodes[2], &n1);
- if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
- gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- regfree(&n1);
- }
+ if(use_sse)
+ goto sse;
+ else
+ goto x87;
+
+x87:
+ a = brrev(a); // because the args are stacked
+ if(a == OGE || a == OGT) {
+ // only < and <= work right with NaN; reverse if needed
+ r = nr;
+ nr = nl;
+ nl = r;
+ a = brrev(a);
}
- // ret.cap = old.cap[0]-lb[1]; (uses hb[2])
- n2 = nodes[0];
- n2.xoffset += Array_cap;
- n2.type = types[TUINT32];
-
- regalloc(&n1, types[TUINT32], &nodes[2]);
- gins(optoas(OAS, types[TUINT32]), &n2, &n1);
- if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
- gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
-
- n2 = nres;
- n2.xoffset += Array_cap;
- n2.type = types[TUINT32];
- gins(optoas(OAS, types[TUINT32]), &n1, &n2);
- regfree(&n1);
-
- // ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1])
- n2 = nodes[0];
- n2.xoffset += Array_array;
- n2.type = types[tptr];
-
- regalloc(&n1, types[tptr], &nodes[1]);
- if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) {
- gins(optoas(OAS, types[tptr]), &n2, &n1);
- v = mpgetfix(nodes[1].val.u.xval) *
- mpgetfix(nodes[3].val.u.xval);
- if(v != 0) {
- nodconst(&n2, types[tptr], v);
- gins(optoas(OADD, types[tptr]), &n2, &n1);
+ nodreg(&tmp, nr->type, D_F0);
+ nodreg(&n2, nr->type, D_F0 + 1);
+ nodreg(&ax, types[TUINT16], D_AX);
+ et = simsimtype(nr->type);
+ if(et == TFLOAT64) {
+ if(nl->ullman > nr->ullman) {
+ cgen(nl, &tmp);
+ cgen(nr, &tmp);
+ gins(AFXCHD, &tmp, &n2);
+ } else {
+ cgen(nr, &tmp);
+ cgen(nl, &tmp);
}
+ gins(AFUCOMIP, &tmp, &n2);
+ gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF
} else {
- gmove(&nodes[1], &n1);
- if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1)
- gins(optoas(OMUL, types[tptr]), &nodes[3], &n1);
- gins(optoas(OADD, types[tptr]), &n2, &n1);
+ // TODO(rsc): The moves back and forth to memory
+ // here are for truncating the value to 32 bits.
+ // This handles 32-bit comparison but presumably
+ // all the other ops have the same problem.
+ // We need to figure out what the right general
+ // solution is, besides telling people to use float64.
+ tempname(&t1, types[TFLOAT32]);
+ tempname(&t2, types[TFLOAT32]);
+ cgen(nr, &t1);
+ cgen(nl, &t2);
+ gmove(&t2, &tmp);
+ gins(AFCOMFP, &t1, &tmp);
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
}
- n2 = nres;
- n2.xoffset += Array_array;
- n2.type = types[tptr];
- gins(optoas(OAS, types[tptr]), &n1, &n2);
- regfree(&n1);
+ goto ret;
- for(i=0; i<4; i++) {
- if(nodes[i].op == OREGISTER)
- regfree(&nodes[i]);
+sse:
+ if(!nl->addable) {
+ tempname(&n1, nl->type);
+ cgen(nl, &n1);
+ nl = &n1;
+ }
+ if(!nr->addable) {
+ tempname(&tmp, nr->type);
+ cgen(nr, &tmp);
+ nr = &tmp;
+ }
+ regalloc(&n2, nr->type, N);
+ gmove(nr, &n2);
+ nr = &n2;
+
+ if(nl->op != OREGISTER) {
+ regalloc(&n3, nl->type, N);
+ gmove(nl, &n3);
+ nl = &n3;
}
- if(!sleasy(res)) {
- cgen(&nres, res);
+ if(a == OGE || a == OGT) {
+ // only < and <= work right with NaN; reverse if needed
+ r = nr;
+ nr = nl;
+ nl = r;
+ a = brrev(a);
}
- return 1;
-no:
- return 0;
+ gins(foptoas(OCMP, nr->type, 0), nl, nr);
+ if(nl->op == OREGISTER)
+ regfree(nl);
+ regfree(nr);
+
+ret:
+ if(a == OEQ) {
+ // neither NE nor P
+ p1 = gbranch(AJNE, T, -likely);
+ p2 = gbranch(AJPS, T, -likely);
+ patch(gbranch(AJMP, T, 0), to);
+ patch(p1, pc);
+ patch(p2, pc);
+ } else if(a == ONE) {
+ // either NE or P
+ patch(gbranch(AJNE, T, likely), to);
+ patch(gbranch(AJPS, T, likely), to);
+ } else
+ patch(gbranch(optoas(a, nr->type), T, likely), to);
+
}