summaryrefslogtreecommitdiff
path: root/src/cmd/6g/ggen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/6g/ggen.c')
-rw-r--r--src/cmd/6g/ggen.c1412
1 files changed, 1412 insertions, 0 deletions
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c
new file mode 100644
index 000000000..a5f278384
--- /dev/null
+++ b/src/cmd/6g/ggen.c
@@ -0,0 +1,1412 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#undef EXTERN
+#define EXTERN
+#include "gg.h"
+#include "opt.h"
+
+void
+defframe(Prog *ptxt)
+{
+ // fill in argument size
+ ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
+
+ // fill in final stack size
+ ptxt->to.offset <<= 32;
+ ptxt->to.offset |= rnd(stksize+maxarg, widthptr);
+}
+
+// Sweep the prog list to mark any used nodes.
+void
+markautoused(Prog* p)
+{
+ for (; p; p = p->link) {
+ if (p->from.type == D_AUTO && p->from.node)
+ p->from.node->used++;
+
+ if (p->to.type == D_AUTO && p->to.node)
+ p->to.node->used++;
+ }
+}
+
+// Fixup instructions after compactframe has moved all autos around.
+void
+fixautoused(Prog* p)
+{
+ for (; p; p = p->link) {
+ if (p->from.type == D_AUTO && p->from.node)
+ p->from.offset += p->from.node->stkdelta;
+
+ if (p->to.type == D_AUTO && p->to.node)
+ p->to.offset += p->to.node->stkdelta;
+ }
+}
+
+
+/*
+ * generate:
+ * call f
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+void
+ginscall(Node *f, int proc)
+{
+ Prog *p;
+ Node reg, con;
+
+ switch(proc) {
+ default:
+ fatal("ginscall: bad proc %d", proc);
+ break;
+
+ case 0: // normal call
+ p = gins(ACALL, N, f);
+ afunclit(&p->to);
+ break;
+
+ case 1: // call in new proc (go)
+ case 2: // deferred call (defer)
+ nodreg(&reg, types[TINT64], D_CX);
+ gins(APUSHQ, f, N);
+ nodconst(&con, types[TINT32], argsize(f->type));
+ gins(APUSHQ, &con, N);
+ if(proc == 1)
+ ginscall(newproc, 0);
+ else {
+ if(!hasdefer)
+ fatal("hasdefer=0 but has defer");
+ ginscall(deferproc, 0);
+ }
+ gins(APOPQ, N, &reg);
+ gins(APOPQ, N, &reg);
+ if(proc == 2) {
+ nodreg(&reg, types[TINT64], D_AX);
+ gins(ATESTQ, &reg, &reg);
+ patch(gbranch(AJNE, T), retpc);
+ }
+ break;
+ }
+}
+
+/*
+ * n is call to interface method.
+ * generate res = n.
+ */
+void
+cgen_callinter(Node *n, Node *res, int proc)
+{
+ Node *i, *f;
+ Node tmpi, nodo, nodr, nodsp;
+
+ i = n->left;
+ if(i->op != ODOTINTER)
+ fatal("cgen_callinter: not ODOTINTER %O", i->op);
+
+ f = i->right; // field
+ if(f->op != ONAME)
+ fatal("cgen_callinter: not ONAME %O", f->op);
+
+ i = i->left; // interface
+
+ if(!i->addable) {
+ tempname(&tmpi, i->type);
+ cgen(i, &tmpi);
+ i = &tmpi;
+ }
+
+ genlist(n->list); // assign the args
+
+ regalloc(&nodr, types[tptr], res);
+ regalloc(&nodo, types[tptr], &nodr);
+ nodo.op = OINDREG;
+
+ agen(i, &nodr); // REG = &inter
+
+ nodindreg(&nodsp, types[tptr], D_SP);
+ nodo.xoffset += widthptr;
+ cgen(&nodo, &nodsp); // 0(SP) = 8(REG) -- i.data
+
+ nodo.xoffset -= widthptr;
+ cgen(&nodo, &nodr); // REG = 0(REG) -- i.tab
+
+ nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
+ cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f]
+
+ // BOTCH nodr.type = fntype;
+ nodr.type = n->left->type;
+ ginscall(&nodr, proc);
+
+ regfree(&nodr);
+ regfree(&nodo);
+
+ setmaxarg(n->left->type);
+}
+
+/*
+ * generate function call;
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+void
+cgen_call(Node *n, int proc)
+{
+ Type *t;
+ Node nod, afun;
+
+ if(n == N)
+ return;
+
+ if(n->left->ullman >= UINF) {
+ // if name involves a fn call
+ // precompute the address of the fn
+ tempname(&afun, types[tptr]);
+ cgen(n->left, &afun);
+ }
+
+ genlist(n->list); // assign the args
+ t = n->left->type;
+
+ setmaxarg(t);
+
+ // call tempname pointer
+ if(n->left->ullman >= UINF) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, &afun);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ goto ret;
+ }
+
+ // call pointer
+ if(n->left->op != ONAME || n->left->class != PFUNC) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, n->left);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ goto ret;
+ }
+
+ // call direct
+ n->left->method = 1;
+ ginscall(n->left, proc);
+
+
+ret:
+ ;
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = return value from call.
+ */
+void
+cgen_callret(Node *n, Node *res)
+{
+ Node nod;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(t->etype == TPTR32 || t->etype == TPTR64)
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_callret: nil");
+
+ memset(&nod, 0, sizeof(nod));
+ nod.op = OINDREG;
+ nod.val.u.reg = D_SP;
+ nod.addable = 1;
+
+ nod.xoffset = fp->width;
+ nod.type = fp->type;
+ cgen_as(res, &nod);
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = &return value from call.
+ */
+void
+cgen_aret(Node *n, Node *res)
+{
+ Node nod1, nod2;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(isptr[t->etype])
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_aret: nil");
+
+ memset(&nod1, 0, sizeof(nod1));
+ nod1.op = OINDREG;
+ nod1.val.u.reg = D_SP;
+ nod1.addable = 1;
+
+ nod1.xoffset = fp->width;
+ nod1.type = fp->type;
+
+ if(res->op != OREGISTER) {
+ regalloc(&nod2, types[tptr], res);
+ gins(ALEAQ, &nod1, &nod2);
+ gins(AMOVQ, &nod2, res);
+ regfree(&nod2);
+ } else
+ gins(ALEAQ, &nod1, res);
+}
+
+/*
+ * generate return.
+ * n->left is assignments to return values.
+ */
+void
+cgen_ret(Node *n)
+{
+ genlist(n->list); // copy out args
+ if(hasdefer || curfn->exit)
+ gjmp(retpc);
+ else
+ gins(ARET, N, N);
+}
+
+/*
+ * generate += *= etc.
+ */
+void
+cgen_asop(Node *n)
+{
+ Node n1, n2, n3, n4;
+ Node *nl, *nr;
+ Prog *p1;
+ Addr addr;
+ int a;
+
+ nl = n->left;
+ nr = n->right;
+
+ if(nr->ullman >= UINF && nl->ullman >= UINF) {
+ tempname(&n1, nr->type);
+ cgen(nr, &n1);
+ n2 = *n;
+ n2.right = &n1;
+ cgen_asop(&n2);
+ goto ret;
+ }
+
+ if(!isint[nl->type->etype])
+ goto hard;
+ if(!isint[nr->type->etype])
+ goto hard;
+
+ switch(n->etype) {
+ case OADD:
+ if(smallintconst(nr))
+ if(mpgetfix(nr->val.u.xval) == 1) {
+ a = optoas(OINC, nl->type);
+ if(nl->addable) {
+ gins(a, N, nl);
+ goto ret;
+ }
+ if(sudoaddable(a, nl, &addr)) {
+ p1 = gins(a, N, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ }
+ break;
+
+ case OSUB:
+ if(smallintconst(nr))
+ if(mpgetfix(nr->val.u.xval) == 1) {
+ a = optoas(ODEC, nl->type);
+ if(nl->addable) {
+ gins(a, N, nl);
+ goto ret;
+ }
+ if(sudoaddable(a, nl, &addr)) {
+ p1 = gins(a, N, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ }
+ break;
+ }
+
+ switch(n->etype) {
+ case OADD:
+ case OSUB:
+ case OXOR:
+ case OAND:
+ case OOR:
+ a = optoas(n->etype, nl->type);
+ if(nl->addable) {
+ if(smallintconst(nr)) {
+ gins(a, nr, nl);
+ goto ret;
+ }
+ regalloc(&n2, nr->type, N);
+ cgen(nr, &n2);
+ gins(a, &n2, nl);
+ regfree(&n2);
+ goto ret;
+ }
+ if(nr->ullman < UINF)
+ if(sudoaddable(a, nl, &addr)) {
+ if(smallintconst(nr)) {
+ p1 = gins(a, nr, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ regalloc(&n2, nr->type, N);
+ cgen(nr, &n2);
+ p1 = gins(a, &n2, N);
+ p1->to = addr;
+ regfree(&n2);
+ sudoclean();
+ goto ret;
+ }
+ }
+
+hard:
+ n2.op = 0;
+ n1.op = 0;
+ if(nr->ullman >= nl->ullman || nl->addable) {
+ regalloc(&n2, nr->type, N);
+ cgen(nr, &n2);
+ nr = &n2;
+ } else {
+ tempname(&n2, nr->type);
+ cgen(nr, &n2);
+ nr = &n2;
+ }
+ if(!nl->addable) {
+ igen(nl, &n1, N);
+ nl = &n1;
+ }
+
+ n3 = *n;
+ n3.left = nl;
+ n3.right = nr;
+ n3.op = n->etype;
+
+ regalloc(&n4, nl->type, N);
+ cgen(&n3, &n4);
+ gmove(&n4, nl);
+
+ if(n1.op)
+ regfree(&n1);
+ if(n2.op == OREGISTER)
+ regfree(&n2);
+ regfree(&n4);
+
+ret:
+ ;
+}
+
+int
+samereg(Node *a, Node *b)
+{
+ if(a == N || b == N)
+ return 0;
+ if(a->op != OREGISTER)
+ return 0;
+ if(b->op != OREGISTER)
+ return 0;
+ if(a->val.u.reg != b->val.u.reg)
+ return 0;
+ return 1;
+}
+
+/*
+ * generate division.
+ * generates one of:
+ * res = nl / nr
+ * res = nl % nr
+ * according to op.
+ */
+void
+dodiv(int op, Node *nl, Node *nr, Node *res)
+{
+ int a, check;
+ Node n3, n4, n5;
+ Type *t;
+ Node ax, dx, oldax, olddx;
+ Prog *p1, *p2, *p3;
+
+ // Have to be careful about handling
+ // most negative int divided by -1 correctly.
+ // The hardware will trap.
+ // Also the byte divide instruction needs AH,
+ // which we otherwise don't have to deal with.
+ // Easiest way to avoid for int8, int16: use int32.
+ // For int32 and int64, use explicit test.
+ // Could use int64 hw for int32.
+ t = nl->type;
+ check = 0;
+ if(issigned[t->etype]) {
+ check = 1;
+ if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
+ check = 0;
+ else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
+ check = 0;
+ }
+ if(t->width < 4) {
+ if(issigned[t->etype])
+ t = types[TINT32];
+ else
+ t = types[TUINT32];
+ check = 0;
+ }
+ a = optoas(op, t);
+
+ regalloc(&n3, t, N);
+ if(nl->ullman >= nr->ullman) {
+ savex(D_AX, &ax, &oldax, res, t);
+ cgen(nl, &ax);
+ regalloc(&ax, t, &ax); // mark ax live during cgen
+ cgen(nr, &n3);
+ regfree(&ax);
+ } else {
+ cgen(nr, &n3);
+ savex(D_AX, &ax, &oldax, res, t);
+ cgen(nl, &ax);
+ }
+ p3 = P;
+ if(check) {
+ nodconst(&n4, t, -1);
+ gins(optoas(OCMP, t), &n3, &n4);
+ p1 = gbranch(optoas(ONE, t), T);
+ nodconst(&n4, t, -1LL<<(t->width*8-1));
+ if(t->width == 8) {
+ n5 = n4;
+ regalloc(&n4, t, N);
+ gins(AMOVQ, &n5, &n4);
+ }
+ gins(optoas(OCMP, t), &ax, &n4);
+ p2 = gbranch(optoas(ONE, t), T);
+ if(op == ODIV)
+ gmove(&n4, res);
+ if(t->width == 8)
+ regfree(&n4);
+ if(op == OMOD) {
+ nodconst(&n4, t, 0);
+ gmove(&n4, res);
+ }
+ p3 = gbranch(AJMP, T);
+ patch(p1, pc);
+ patch(p2, pc);
+ }
+ savex(D_DX, &dx, &olddx, res, t);
+ if(!issigned[t->etype]) {
+ nodconst(&n4, t, 0);
+ gmove(&n4, &dx);
+ } else
+ gins(optoas(OEXTEND, t), N, N);
+ gins(a, &n3, N);
+ regfree(&n3);
+ if(op == ODIV)
+ gmove(&ax, res);
+ else
+ gmove(&dx, res);
+ restx(&dx, &olddx);
+ if(check)
+ patch(p3, pc);
+ restx(&ax, &oldax);
+}
+
+/*
+ * register dr is one of the special ones (AX, CX, DI, SI, etc.).
+ * we need to use it. if it is already allocated as a temporary
+ * (r > 1; can only happen if a routine like sgen passed a
+ * special as cgen's res and then cgen used regalloc to reuse
+ * it as its own temporary), then move it for now to another
+ * register. caller must call restx to move it back.
+ * the move is not necessary if dr == res, because res is
+ * known to be dead.
+ */
+void
+savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
+{
+ int r;
+
+ r = reg[dr];
+
+ // save current ax and dx if they are live
+ // and not the destination
+ memset(oldx, 0, sizeof *oldx);
+ nodreg(x, t, dr);
+ if(r > 1 && !samereg(x, res)) {
+ regalloc(oldx, types[TINT64], N);
+ x->type = types[TINT64];
+ gmove(x, oldx);
+ x->type = t;
+ oldx->ostk = r; // squirrel away old r value
+ reg[dr] = 1;
+ }
+}
+
+void
+restx(Node *x, Node *oldx)
+{
+ if(oldx->op != 0) {
+ x->type = types[TINT64];
+ reg[x->val.u.reg] = oldx->ostk;
+ gmove(oldx, x);
+ regfree(oldx);
+ }
+}
+
+/*
+ * generate division according to op, one of:
+ * res = nl / nr
+ * res = nl % nr
+ */
+void
+cgen_div(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1, n2, n3, savl, savr;
+ Node ax, dx, oldax, olddx;
+ int n, w, s, a;
+ Magic m;
+
+ if(nl->ullman >= UINF) {
+ tempname(&savl, nl->type);
+ cgen(nl, &savl);
+ nl = &savl;
+ }
+ if(nr->ullman >= UINF) {
+ tempname(&savr, nr->type);
+ cgen(nr, &savr);
+ nr = &savr;
+ }
+
+ if(nr->op != OLITERAL)
+ goto longdiv;
+
+ // special cases of mod/div
+ // by a constant
+ w = nl->type->width*8;
+ s = 0;
+ n = powtwo(nr);
+ if(n >= 1000) {
+ // negative power of 2
+ s = 1;
+ n -= 1000;
+ }
+
+ if(n+1 >= w) {
+ // just sign bit
+ goto longdiv;
+ }
+
+ if(n < 0)
+ goto divbymul;
+ switch(n) {
+ case 0:
+ // divide by 1
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ if(op == OMOD) {
+ gins(optoas(OXOR, nl->type), &n1, &n1);
+ } else
+ if(s)
+ gins(optoas(OMINUS, nl->type), N, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+ case 1:
+ // divide by 2
+ if(op == OMOD) {
+ if(issigned[nl->type->etype])
+ goto longmod;
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ nodconst(&n2, nl->type, 1);
+ gins(optoas(OAND, nl->type), &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+ }
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ if(!issigned[nl->type->etype])
+ break;
+
+ // develop -1 iff nl is negative
+ regalloc(&n2, nl->type, N);
+ gmove(&n1, &n2);
+ nodconst(&n3, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n3, &n2);
+ gins(optoas(OSUB, nl->type), &n2, &n1);
+ regfree(&n2);
+ break;
+ default:
+ if(op == OMOD) {
+ if(issigned[nl->type->etype])
+ goto longmod;
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ nodconst(&n2, nl->type, mpgetfix(nr->val.u.xval)-1);
+ if(!smallintconst(&n2)) {
+ regalloc(&n3, nl->type, N);
+ gmove(&n2, &n3);
+ gins(optoas(OAND, nl->type), &n3, &n1);
+ regfree(&n3);
+ } else
+ gins(optoas(OAND, nl->type), &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+ }
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ if(!issigned[nl->type->etype])
+ break;
+
+ // develop (2^k)-1 iff nl is negative
+ regalloc(&n2, nl->type, N);
+ gmove(&n1, &n2);
+ nodconst(&n3, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n3, &n2);
+ nodconst(&n3, nl->type, w-n);
+ gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
+ gins(optoas(OADD, nl->type), &n2, &n1);
+ regfree(&n2);
+ break;
+ }
+ nodconst(&n2, nl->type, n);
+ gins(optoas(ORSH, nl->type), &n2, &n1);
+ if(s)
+ gins(optoas(OMINUS, nl->type), N, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+
+divbymul:
+ // try to do division by multiply by (2^w)/d
+ // see hacker's delight chapter 10
+ switch(simtype[nl->type->etype]) {
+ default:
+ goto longdiv;
+
+ case TUINT8:
+ case TUINT16:
+ case TUINT32:
+ case TUINT64:
+ m.w = w;
+ m.ud = mpgetfix(nr->val.u.xval);
+ umagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD)
+ goto longmod;
+
+ regalloc(&n1, nl->type, N);
+ cgen(nl, &n1); // num -> reg(n1)
+
+ savex(D_AX, &ax, &oldax, res, nl->type);
+ savex(D_DX, &dx, &olddx, res, nl->type);
+
+ nodconst(&n2, nl->type, m.um);
+ gmove(&n2, &ax); // const->ax
+
+ gins(optoas(OHMUL, nl->type), &n1, N); // imul reg
+ if(w == 8) {
+ // fix up 8-bit multiply
+ Node ah, dl;
+ nodreg(&ah, types[TUINT8], D_AH);
+ nodreg(&dl, types[TUINT8], D_DL);
+ gins(AMOVB, &ah, &dl);
+ }
+
+ if(m.ua) {
+ // need to add numerator accounting for overflow
+ gins(optoas(OADD, nl->type), &n1, &dx);
+ nodconst(&n2, nl->type, 1);
+ gins(optoas(ORRC, nl->type), &n2, &dx);
+ nodconst(&n2, nl->type, m.s-1);
+ gins(optoas(ORSH, nl->type), &n2, &dx);
+ } else {
+ nodconst(&n2, nl->type, m.s);
+ gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx
+ }
+
+
+ regfree(&n1);
+ gmove(&dx, res);
+
+ restx(&ax, &oldax);
+ restx(&dx, &olddx);
+ return;
+
+ case TINT8:
+ case TINT16:
+ case TINT32:
+ case TINT64:
+ m.w = w;
+ m.sd = mpgetfix(nr->val.u.xval);
+ smagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD)
+ goto longmod;
+
+ regalloc(&n1, nl->type, N);
+ cgen(nl, &n1); // num -> reg(n1)
+
+ savex(D_AX, &ax, &oldax, res, nl->type);
+ savex(D_DX, &dx, &olddx, res, nl->type);
+
+ nodconst(&n2, nl->type, m.sm);
+ gmove(&n2, &ax); // const->ax
+
+ gins(optoas(OHMUL, nl->type), &n1, N); // imul reg
+ if(w == 8) {
+ // fix up 8-bit multiply
+ Node ah, dl;
+ nodreg(&ah, types[TUINT8], D_AH);
+ nodreg(&dl, types[TUINT8], D_DL);
+ gins(AMOVB, &ah, &dl);
+ }
+
+ if(m.sm < 0) {
+ // need to add numerator
+ gins(optoas(OADD, nl->type), &n1, &dx);
+ }
+
+ nodconst(&n2, nl->type, m.s);
+ gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx
+
+ nodconst(&n2, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
+ gins(optoas(OSUB, nl->type), &n1, &dx); // added
+
+ if(m.sd < 0) {
+ // this could probably be removed
+ // by factoring it into the multiplier
+ gins(optoas(OMINUS, nl->type), N, &dx);
+ }
+
+ regfree(&n1);
+ gmove(&dx, res);
+
+ restx(&ax, &oldax);
+ restx(&dx, &olddx);
+ return;
+ }
+ goto longdiv;
+
+longdiv:
+ // division and mod using (slow) hardware instruction
+ dodiv(op, nl, nr, res);
+ return;
+
+longmod:
+ // mod using formula A%B = A-(A/B*B) but
+ // we know that there is a fast algorithm for A/B
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ regalloc(&n2, nl->type, N);
+ cgen_div(ODIV, &n1, nr, &n2);
+ a = optoas(OMUL, nl->type);
+ if(w == 8) {
+ // use 2-operand 16-bit multiply
+ // because there is no 2-operand 8-bit multiply
+ a = AIMULW;
+ }
+ if(!smallintconst(nr)) {
+ regalloc(&n3, nl->type, N);
+ cgen(nr, &n3);
+ gins(a, &n3, &n2);
+ regfree(&n3);
+ } else
+ gins(a, nr, &n2);
+ gins(optoas(OSUB, nl->type), &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ regfree(&n2);
+}
+
+/*
+ * generate shift according to op, one of:
+ * res = nl << nr
+ * res = nl >> nr
+ */
+void
+cgen_shift(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1, n2, n3, n4, n5, cx, oldcx;
+ int a, rcx;
+ Prog *p1;
+ uvlong sc;
+ Type *tcount;
+
+ a = optoas(op, nl->type);
+
+ if(nr->op == OLITERAL) {
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ sc = mpgetfix(nr->val.u.xval);
+ if(sc >= nl->type->width*8) {
+ // large shift gets 2 shifts by width
+ nodconst(&n3, types[TUINT32], nl->type->width*8-1);
+ gins(a, &n3, &n1);
+ gins(a, &n3, &n1);
+ } else
+ gins(a, nr, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ goto ret;
+ }
+
+ if(nl->ullman >= UINF) {
+ tempname(&n4, nl->type);
+ cgen(nl, &n4);
+ nl = &n4;
+ }
+ if(nr->ullman >= UINF) {
+ tempname(&n5, nr->type);
+ cgen(nr, &n5);
+ nr = &n5;
+ }
+
+ rcx = reg[D_CX];
+ nodreg(&n1, types[TUINT32], D_CX);
+
+ // Allow either uint32 or uint64 as shift type,
+ // to avoid unnecessary conversion from uint32 to uint64
+ // just to do the comparison.
+ tcount = types[simtype[nr->type->etype]];
+ if(tcount->etype < TUINT32)
+ tcount = types[TUINT32];
+
+ regalloc(&n1, nr->type, &n1); // to hold the shift type in CX
+ regalloc(&n3, tcount, &n1); // to clear high bits of CX
+
+ nodreg(&cx, types[TUINT64], D_CX);
+ memset(&oldcx, 0, sizeof oldcx);
+ if(rcx > 0 && !samereg(&cx, res)) {
+ regalloc(&oldcx, types[TUINT64], N);
+ gmove(&cx, &oldcx);
+ }
+ cx.type = tcount;
+
+ if(samereg(&cx, res))
+ regalloc(&n2, nl->type, N);
+ else
+ regalloc(&n2, nl->type, res);
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &n2);
+ cgen(nr, &n1);
+ gmove(&n1, &n3);
+ } else {
+ cgen(nr, &n1);
+ gmove(&n1, &n3);
+ cgen(nl, &n2);
+ }
+ regfree(&n3);
+
+ // test and fix up large shifts
+ nodconst(&n3, tcount, nl->type->width*8);
+ gins(optoas(OCMP, tcount), &n1, &n3);
+ p1 = gbranch(optoas(OLT, tcount), T);
+ if(op == ORSH && issigned[nl->type->etype]) {
+ nodconst(&n3, types[TUINT32], nl->type->width*8-1);
+ gins(a, &n3, &n2);
+ } else {
+ nodconst(&n3, nl->type, 0);
+ gmove(&n3, &n2);
+ }
+ patch(p1, pc);
+ gins(a, &n1, &n2);
+
+ if(oldcx.op != 0) {
+ cx.type = types[TUINT64];
+ gmove(&oldcx, &cx);
+ regfree(&oldcx);
+ }
+
+ gmove(&n2, res);
+
+ regfree(&n1);
+ regfree(&n2);
+
+ret:
+ ;
+}
+
+/*
+ * generate byte multiply:
+ * res = nl * nr
+ * no 2-operand byte multiply instruction so have to do
+ * 16-bit multiply and take bottom half.
+ */
+void
+cgen_bmul(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1b, n2b, n1w, n2w;
+ Type *t;
+ int a;
+
+ if(nl->ullman >= nr->ullman) {
+ regalloc(&n1b, nl->type, res);
+ cgen(nl, &n1b);
+ regalloc(&n2b, nr->type, N);
+ cgen(nr, &n2b);
+ } else {
+ regalloc(&n2b, nr->type, N);
+ cgen(nr, &n2b);
+ regalloc(&n1b, nl->type, res);
+ cgen(nl, &n1b);
+ }
+
+ // copy from byte to short registers
+ t = types[TUINT16];
+ if(issigned[nl->type->etype])
+ t = types[TINT16];
+
+ regalloc(&n2w, t, &n2b);
+ cgen(&n2b, &n2w);
+
+ regalloc(&n1w, t, &n1b);
+ cgen(&n1b, &n1w);
+
+ a = optoas(op, t);
+ gins(a, &n2w, &n1w);
+ cgen(&n1w, &n1b);
+ cgen(&n1b, res);
+
+ regfree(&n1w);
+ regfree(&n2w);
+ regfree(&n1b);
+ regfree(&n2b);
+}
+
+void
+clearfat(Node *nl)
+{
+ uint32 w, c, q;
+ Node n1, oldn1, ax, oldax;
+
+ /* clear a fat object */
+ if(debug['g'])
+ dump("\nclearfat", nl);
+
+
+ w = nl->type->width;
+ if(w == 16)
+ if(componentgen(N, nl))
+ return;
+
+ c = w % 8; // bytes
+ q = w / 8; // quads
+
+ savex(D_DI, &n1, &oldn1, N, types[tptr]);
+ agen(nl, &n1);
+
+ savex(D_AX, &ax, &oldax, N, types[tptr]);
+ gconreg(AMOVQ, 0, D_AX);
+
+ if(q >= 4) {
+ gconreg(AMOVQ, q, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
+ } else
+ while(q > 0) {
+ gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
+ q--;
+ }
+
+ if(c >= 4) {
+ gconreg(AMOVQ, c, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(ASTOSB, N, N); // STOB AL,*(DI)+
+ } else
+ while(c > 0) {
+ gins(ASTOSB, N, N); // STOB AL,*(DI)+
+ c--;
+ }
+
+ restx(&n1, &oldn1);
+ restx(&ax, &oldax);
+}
+
+static int
+regcmp(const void *va, const void *vb)
+{
+ Node *ra, *rb;
+
+ ra = (Node*)va;
+ rb = (Node*)vb;
+ return ra->local - rb->local;
+}
+
+static Prog* throwpc;
+
+void
+getargs(NodeList *nn, Node *reg, int n)
+{
+ NodeList *l;
+ int i;
+
+ throwpc = nil;
+
+ l = nn;
+ for(i=0; i<n; i++) {
+ if(!smallintconst(l->n->right) && !isslice(l->n->right->type)) {
+ regalloc(reg+i, l->n->right->type, N);
+ cgen(l->n->right, reg+i);
+ } else
+ reg[i] = *l->n->right;
+ if(reg[i].local != 0)
+ yyerror("local used");
+ reg[i].local = l->n->left->xoffset;
+ l = l->next;
+ }
+ qsort((void*)reg, n, sizeof(*reg), regcmp);
+ for(i=0; i<n; i++)
+ reg[i].local = 0;
+}
+
+void
+cmpandthrow(Node *nl, Node *nr)
+{
+ vlong cl;
+ Prog *p1;
+ int op;
+ Node *c;
+ Type *t;
+ Node n1;
+
+ if(nl->op == OCONV && is64(nl->type))
+ nl = nl->left;
+ if(nr->op == OCONV && is64(nr->type))
+ nr = nr->left;
+
+ op = OLE;
+ if(smallintconst(nl)) {
+ cl = mpgetfix(nl->val.u.xval);
+ if(cl == 0)
+ return;
+ if(smallintconst(nr))
+ return;
+ // put the constant on the right
+ op = brrev(op);
+ c = nl;
+ nl = nr;
+ nr = c;
+ }
+ if(is64(nr->type) && smallintconst(nr))
+ nr->type = types[TUINT32];
+
+ n1.op = OXXX;
+ t = types[TUINT32];
+ if(nl->type->width != t->width || nr->type->width != t->width) {
+ if((is64(nl->type) && nl->op != OLITERAL) || (is64(nr->type) && nr->op != OLITERAL))
+ t = types[TUINT64];
+
+ // Check if we need to use a temporary.
+ // At least one of the arguments is 32 bits
+ // (the len or cap) so one temporary suffices.
+ if(nl->type->width != t->width && nl->op != OLITERAL) {
+ regalloc(&n1, t, nl);
+ gmove(nl, &n1);
+ nl = &n1;
+ } else if(nr->type->width != t->width && nr->op != OLITERAL) {
+ regalloc(&n1, t, nr);
+ gmove(nr, &n1);
+ nr = &n1;
+ }
+ }
+ gins(optoas(OCMP, t), nl, nr);
+ if(n1.op != OXXX)
+ regfree(&n1);
+ if(throwpc == nil) {
+ p1 = gbranch(optoas(op, t), T);
+ throwpc = pc;
+ ginscall(panicslice, 0);
+ patch(p1, pc);
+ } else {
+ op = brcom(op);
+ p1 = gbranch(optoas(op, t), T);
+ patch(p1, throwpc);
+ }
+}
+
+int
+sleasy(Node *n)
+{
+ if(n->op != ONAME)
+ return 0;
+ if(!n->addable)
+ return 0;
+ return 1;
+}
+
+// generate inline code for
+// slicearray
+// sliceslice
+// arraytoslice
+int
+cgen_inline(Node *n, Node *res)
+{
+ Node nodes[5];
+ Node n1, n2, nres, ntemp;
+ vlong v;
+ int i, narg, nochk;
+
+ if(n->op != OCALLFUNC)
+ goto no;
+ if(!n->left->addable)
+ goto no;
+ if(n->left->sym == S)
+ goto no;
+ if(n->left->sym->pkg != runtimepkg)
+ goto no;
+ if(strcmp(n->left->sym->name, "slicearray") == 0)
+ goto slicearray;
+ if(strcmp(n->left->sym->name, "sliceslice") == 0) {
+ narg = 4;
+ goto sliceslice;
+ }
+ if(strcmp(n->left->sym->name, "sliceslice1") == 0) {
+ narg = 3;
+ goto sliceslice;
+ }
+ goto no;
+
+slicearray:
+ if(!sleasy(res))
+ goto no;
+ getargs(n->list, nodes, 5);
+
+ // if(hb[3] > nel[1]) goto throw
+ cmpandthrow(&nodes[3], &nodes[1]);
+
+ // if(lb[2] > hb[3]) goto throw
+ cmpandthrow(&nodes[2], &nodes[3]);
+
+ // len = hb[3] - lb[2] (destroys hb)
+ n2 = *res;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) {
+ v = mpgetfix(nodes[3].val.u.xval) -
+ mpgetfix(nodes[2].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[3]);
+ gmove(&nodes[3], &n1);
+ if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+
+ // cap = nel[1] - lb[2] (destroys nel)
+ n2 = *res;
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) {
+ v = mpgetfix(nodes[1].val.u.xval) -
+ mpgetfix(nodes[2].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[1]);
+ gmove(&nodes[1], &n1);
+ if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+
+ // if slice could be too big, dereference to
+ // catch nil array pointer.
+ if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) {
+ n2 = nodes[0];
+ n2.xoffset = 0;
+ n2.op = OINDREG;
+ n2.type = types[TUINT8];
+ gins(ATESTB, nodintconst(0), &n2);
+ }
+
+ // ary = old[0] + (lb[2] * width[4]) (destroys old)
+ n2 = *res;
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+
+ if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) {
+ v = mpgetfix(nodes[2].val.u.xval) *
+ mpgetfix(nodes[4].val.u.xval);
+ if(v != 0)
+ ginscon(optoas(OADD, types[tptr]), v, &nodes[0]);
+ } else {
+ regalloc(&n1, types[tptr], &nodes[2]);
+ gmove(&nodes[2], &n1);
+ if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1)
+ gins(optoas(OMUL, types[tptr]), &nodes[4], &n1);
+ gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
+ regfree(&n1);
+ }
+ gins(optoas(OAS, types[tptr]), &nodes[0], &n2);
+
+ for(i=0; i<5; i++) {
+ if(nodes[i].op == OREGISTER)
+ regfree(&nodes[i]);
+ }
+ return 1;
+
+sliceslice:
+ nochk = n->etype; // skip bounds checking
+ ntemp.op = OXXX;
+ if(!sleasy(n->list->n->right)) {
+ Node *n0;
+
+ n0 = n->list->n->right;
+ tempname(&ntemp, res->type);
+ cgen(n0, &ntemp);
+ n->list->n->right = &ntemp;
+ getargs(n->list, nodes, narg);
+ n->list->n->right = n0;
+ } else
+ getargs(n->list, nodes, narg);
+
+ nres = *res; // result
+ if(!sleasy(res)) {
+ if(ntemp.op == OXXX)
+ tempname(&ntemp, res->type);
+ nres = ntemp;
+ }
+
+ if(narg == 3) { // old[lb:]
+ // move width to where it would be for old[lb:hb]
+ nodes[3] = nodes[2];
+ nodes[2].op = OXXX;
+
+ // if(lb[1] > old.nel[0]) goto throw;
+ n2 = nodes[0];
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+ if(!nochk)
+ cmpandthrow(&nodes[1], &n2);
+
+ // ret.nel = old.nel[0]-lb[1];
+ n2 = nodes[0];
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ regalloc(&n1, types[TUINT32], N);
+ gins(optoas(OAS, types[TUINT32]), &n2, &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+
+ n2 = nres;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ } else { // old[lb:hb]
+ n2 = nodes[0];
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+ if(!nochk) {
+ // if(hb[2] > old.cap[0]) goto throw;
+ cmpandthrow(&nodes[2], &n2);
+ // if(lb[1] > hb[2]) goto throw;
+ cmpandthrow(&nodes[1], &nodes[2]);
+ }
+ // ret.len = hb[2]-lb[1]; (destroys hb[2])
+ n2 = nres;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) {
+ v = mpgetfix(nodes[2].val.u.xval) -
+ mpgetfix(nodes[1].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[2]);
+ gmove(&nodes[2], &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+ }
+
+ // ret.cap = old.cap[0]-lb[1]; (uses hb[2])
+ n2 = nodes[0];
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+
+ regalloc(&n1, types[TUINT32], &nodes[2]);
+ gins(optoas(OAS, types[TUINT32]), &n2, &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+
+ n2 = nres;
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+
+ // ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1])
+ n2 = nodes[0];
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+ regalloc(&n1, types[tptr], &nodes[1]);
+ if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) {
+ gins(optoas(OAS, types[tptr]), &n2, &n1);
+ v = mpgetfix(nodes[1].val.u.xval) *
+ mpgetfix(nodes[3].val.u.xval);
+ if(v != 0)
+ ginscon(optoas(OADD, types[tptr]), v, &n1);
+ } else {
+ gmove(&nodes[1], &n1);
+ if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1)
+ gins(optoas(OMUL, types[tptr]), &nodes[3], &n1);
+ gins(optoas(OADD, types[tptr]), &n2, &n1);
+ }
+
+ n2 = nres;
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+ gins(optoas(OAS, types[tptr]), &n1, &n2);
+ regfree(&n1);
+
+ for(i=0; i<4; i++) {
+ if(nodes[i].op == OREGISTER)
+ regfree(&nodes[i]);
+ }
+
+ if(!sleasy(res)) {
+ cgen(&nres, res);
+ }
+ return 1;
+
+no:
+ return 0;
+}