summaryrefslogtreecommitdiff
path: root/src/cmd/8g
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
committerOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
commit5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch)
treec0650497e988f47be9c6f2324fa692a52dea82e1 /src/cmd/8g
parent80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff)
downloadgolang-5ff4c17907d5b19510a62e08fd8d3b11e62b431d.tar.gz
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/cmd/8g')
-rw-r--r--src/cmd/8g/Makefile36
-rw-r--r--src/cmd/8g/cgen.c1233
-rw-r--r--src/cmd/8g/cgen64.c512
-rw-r--r--src/cmd/8g/doc.go15
-rw-r--r--src/cmd/8g/galign.c37
-rw-r--r--src/cmd/8g/gg.h187
-rw-r--r--src/cmd/8g/ggen.c1155
-rw-r--r--src/cmd/8g/gobj.c651
-rw-r--r--src/cmd/8g/gsubr.c1959
-rw-r--r--src/cmd/8g/list.c302
-rw-r--r--src/cmd/8g/opt.h164
-rw-r--r--src/cmd/8g/peep.c890
-rw-r--r--src/cmd/8g/reg.c1544
13 files changed, 8685 insertions, 0 deletions
diff --git a/src/cmd/8g/Makefile b/src/cmd/8g/Makefile
new file mode 100644
index 000000000..b459782a3
--- /dev/null
+++ b/src/cmd/8g/Makefile
@@ -0,0 +1,36 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../Make.inc
+O:=$(HOST_O)
+
+TARG=8g
+
+HFILES=\
+ ../gc/go.h\
+ ../8l/8.out.h\
+ gg.h\
+ opt.h\
+
+OFILES=\
+ ../8l/enam.$O\
+ cgen.$O\
+ cgen64.$O\
+ cplx.$O\
+ galign.$O\
+ ggen.$O\
+ gobj.$O\
+ gsubr.$O\
+ list.$O\
+ peep.$O\
+ pgen.$O\
+ reg.$O\
+
+LIB=\
+ ../gc/gc.a\
+
+include ../../Make.ccmd
+
+%.$O: ../gc/%.c
+ $(HOST_CC) $(HOST_CFLAGS) -c -I. -o $@ ../gc/$*.c
diff --git a/src/cmd/8g/cgen.c b/src/cmd/8g/cgen.c
new file mode 100644
index 000000000..b316e6e34
--- /dev/null
+++ b/src/cmd/8g/cgen.c
@@ -0,0 +1,1233 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(rsc):
+// assume CLD?
+
+#include "gg.h"
+
+void
+mgen(Node *n, Node *n1, Node *rg)
+{
+ Node n2;
+
+ n1->op = OEMPTY;
+
+ if(n->addable) {
+ *n1 = *n;
+ if(n1->op == OREGISTER || n1->op == OINDREG)
+ reg[n->val.u.reg]++;
+ return;
+ }
+ tempname(n1, n->type);
+ cgen(n, n1);
+ if(n->type->width <= widthptr || isfloat[n->type->etype]) {
+ n2 = *n1;
+ regalloc(n1, n->type, rg);
+ gmove(&n2, n1);
+ }
+}
+
+void
+mfree(Node *n)
+{
+ if(n->op == OREGISTER)
+ regfree(n);
+}
+
+/*
+ * generate:
+ * res = n;
+ * simplifies and calls gmove.
+ *
+ * TODO:
+ * sudoaddable
+ */
+void
+cgen(Node *n, Node *res)
+{
+ Node *nl, *nr, *r, n1, n2, nt, f0, f1;
+ Prog *p1, *p2, *p3;
+ int a;
+
+ if(debug['g']) {
+ dump("\ncgen-n", n);
+ dump("cgen-res", res);
+ }
+
+ if(n == N || n->type == T)
+ fatal("cgen: n nil");
+ if(res == N || res->type == T)
+ fatal("cgen: res nil");
+
+ // inline slices
+ if(cgen_inline(n, res))
+ return;
+
+ while(n->op == OCONVNOP)
+ n = n->left;
+
+ // function calls on both sides? introduce temporary
+ if(n->ullman >= UINF && res->ullman >= UINF) {
+ tempname(&n1, n->type);
+ cgen(n, &n1);
+ cgen(&n1, res);
+ return;
+ }
+
+ // structs etc get handled specially
+ if(isfat(n->type)) {
+ if(n->type->width < 0)
+ fatal("forgot to compute width for %T", n->type);
+ sgen(n, res, n->type->width);
+ return;
+ }
+
+ // update addressability for string, slice
+ // can't do in walk because n->left->addable
+ // changes if n->left is an escaping local variable.
+ switch(n->op) {
+ case OLEN:
+ if(isslice(n->left->type) || istype(n->left->type, TSTRING))
+ n->addable = n->left->addable;
+ break;
+ case OCAP:
+ if(isslice(n->left->type))
+ n->addable = n->left->addable;
+ break;
+ }
+
+ // if both are addressable, move
+ if(n->addable && res->addable) {
+ gmove(n, res);
+ return;
+ }
+
+ // if both are not addressable, use a temporary.
+ if(!n->addable && !res->addable) {
+ // could use regalloc here sometimes,
+ // but have to check for ullman >= UINF.
+ tempname(&n1, n->type);
+ cgen(n, &n1);
+ cgen(&n1, res);
+ return;
+ }
+
+ // if result is not addressable directly but n is,
+ // compute its address and then store via the address.
+ if(!res->addable) {
+ igen(res, &n1, N);
+ cgen(n, &n1);
+ regfree(&n1);
+ return;
+ }
+
+ // complex types
+ if(complexop(n, res)) {
+ complexgen(n, res);
+ return;
+ }
+
+ // otherwise, the result is addressable but n is not.
+ // let's do some computation.
+
+ // use ullman to pick operand to eval first.
+ nl = n->left;
+ nr = n->right;
+ if(nl != N && nl->ullman >= UINF)
+ if(nr != N && nr->ullman >= UINF) {
+ // both are hard
+ tempname(&n1, nl->type);
+ cgen(nl, &n1);
+ n2 = *n;
+ n2.left = &n1;
+ cgen(&n2, res);
+ return;
+ }
+
+ // 64-bit ops are hard on 32-bit machine.
+ if(is64(n->type) || is64(res->type) || n->left != N && is64(n->left->type)) {
+ switch(n->op) {
+ // math goes to cgen64.
+ case OMINUS:
+ case OCOM:
+ case OADD:
+ case OSUB:
+ case OMUL:
+ case OLSH:
+ case ORSH:
+ case OAND:
+ case OOR:
+ case OXOR:
+ cgen64(n, res);
+ return;
+ }
+ }
+
+ if(nl != N && isfloat[n->type->etype] && isfloat[nl->type->etype])
+ goto flt;
+
+ switch(n->op) {
+ default:
+ dump("cgen", n);
+ fatal("cgen %O", n->op);
+ break;
+
+ case OREAL:
+ case OIMAG:
+ case OCOMPLEX:
+ fatal("unexpected complex");
+ return;
+
+ // these call bgen to get a bool value
+ case OOROR:
+ case OANDAND:
+ case OEQ:
+ case ONE:
+ case OLT:
+ case OLE:
+ case OGE:
+ case OGT:
+ case ONOT:
+ p1 = gbranch(AJMP, T);
+ p2 = pc;
+ gmove(nodbool(1), res);
+ p3 = gbranch(AJMP, T);
+ patch(p1, pc);
+ bgen(n, 1, p2);
+ gmove(nodbool(0), res);
+ patch(p3, pc);
+ return;
+
+ case OPLUS:
+ cgen(nl, res);
+ return;
+
+ case OMINUS:
+ case OCOM:
+ a = optoas(n->op, nl->type);
+ goto uop;
+
+ // symmetric binary
+ case OAND:
+ case OOR:
+ case OXOR:
+ case OADD:
+ case OMUL:
+ a = optoas(n->op, nl->type);
+ if(a == AIMULB) {
+ cgen_bmul(n->op, nl, nr, res);
+ break;
+ }
+ goto sbop;
+
+ // asymmetric binary
+ case OSUB:
+ a = optoas(n->op, nl->type);
+ goto abop;
+
+ case OCONV:
+ if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
+ cgen(nl, res);
+ break;
+ }
+
+ tempname(&n2, n->type);
+ mgen(nl, &n1, res);
+ gmove(&n1, &n2);
+ gmove(&n2, res);
+ mfree(&n1);
+ break;
+
+ case ODOT:
+ case ODOTPTR:
+ case OINDEX:
+ case OIND:
+ case ONAME: // PHEAP or PPARAMREF var
+ igen(n, &n1, res);
+ gmove(&n1, res);
+ regfree(&n1);
+ break;
+
+ case OLEN:
+ if(istype(nl->type, TMAP) || istype(nl->type, TCHAN)) {
+ // map has len in the first 32-bit word.
+ // a zero pointer means zero length
+ tempname(&n1, types[tptr]);
+ cgen(nl, &n1);
+ regalloc(&n2, types[tptr], N);
+ gmove(&n1, &n2);
+ n1 = n2;
+
+ nodconst(&n2, types[tptr], 0);
+ gins(optoas(OCMP, types[tptr]), &n1, &n2);
+ p1 = gbranch(optoas(OEQ, types[tptr]), T);
+
+ n2 = n1;
+ n2.op = OINDREG;
+ n2.type = types[TINT32];
+ gmove(&n2, &n1);
+
+ patch(p1, pc);
+
+ gmove(&n1, res);
+ regfree(&n1);
+ break;
+ }
+ if(istype(nl->type, TSTRING) || isslice(nl->type)) {
+ // both slice and string have len one pointer into the struct.
+ igen(nl, &n1, res);
+ n1.type = types[TUINT32];
+ n1.xoffset += Array_nel;
+ gmove(&n1, res);
+ regfree(&n1);
+ break;
+ }
+ fatal("cgen: OLEN: unknown type %lT", nl->type);
+ break;
+
+ case OCAP:
+ if(istype(nl->type, TCHAN)) {
+ // chan has cap in the second 32-bit word.
+ // a zero pointer means zero length
+ regalloc(&n1, types[tptr], res);
+ cgen(nl, &n1);
+
+ nodconst(&n2, types[tptr], 0);
+ gins(optoas(OCMP, types[tptr]), &n1, &n2);
+ p1 = gbranch(optoas(OEQ, types[tptr]), T);
+
+ n2 = n1;
+ n2.op = OINDREG;
+ n2.xoffset = 4;
+ n2.type = types[TINT32];
+ gmove(&n2, &n1);
+
+ patch(p1, pc);
+
+ gmove(&n1, res);
+ regfree(&n1);
+ break;
+ }
+ if(isslice(nl->type)) {
+ igen(nl, &n1, res);
+ n1.op = OINDREG;
+ n1.type = types[TUINT32];
+ n1.xoffset = Array_cap;
+ gmove(&n1, res);
+ regfree(&n1);
+ break;
+ }
+ fatal("cgen: OCAP: unknown type %lT", nl->type);
+ break;
+
+ case OADDR:
+ agen(nl, res);
+ break;
+
+ case OCALLMETH:
+ cgen_callmeth(n, 0);
+ cgen_callret(n, res);
+ break;
+
+ case OCALLINTER:
+ cgen_callinter(n, res, 0);
+ cgen_callret(n, res);
+ break;
+
+ case OCALLFUNC:
+ cgen_call(n, 0);
+ cgen_callret(n, res);
+ break;
+
+ case OMOD:
+ case ODIV:
+ cgen_div(n->op, nl, nr, res);
+ break;
+
+ case OLSH:
+ case ORSH:
+ cgen_shift(n->op, nl, nr, res);
+ break;
+ }
+ return;
+
+sbop: // symmetric binary
+ if(nl->ullman < nr->ullman) {
+ r = nl;
+ nl = nr;
+ nr = r;
+ }
+
+abop: // asymmetric binary
+ if(nl->ullman >= nr->ullman) {
+ tempname(&nt, nl->type);
+ cgen(nl, &nt);
+ mgen(nr, &n2, N);
+ regalloc(&n1, nl->type, res);
+ gmove(&nt, &n1);
+ gins(a, &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ mfree(&n2);
+ } else {
+ regalloc(&n2, nr->type, res);
+ cgen(nr, &n2);
+ regalloc(&n1, nl->type, N);
+ cgen(nl, &n1);
+ gins(a, &n2, &n1);
+ regfree(&n2);
+ gmove(&n1, res);
+ regfree(&n1);
+ }
+ return;
+
+uop: // unary
+ tempname(&n1, nl->type);
+ cgen(nl, &n1);
+ gins(a, N, &n1);
+ gmove(&n1, res);
+ return;
+
+flt: // floating-point. 387 (not SSE2) to interoperate with 8c
+ nodreg(&f0, nl->type, D_F0);
+ nodreg(&f1, n->type, D_F0+1);
+ if(nr != N)
+ goto flt2;
+
+ // unary
+ cgen(nl, &f0);
+ if(n->op != OCONV && n->op != OPLUS)
+ gins(foptoas(n->op, n->type, 0), N, N);
+ gmove(&f0, res);
+ return;
+
+flt2: // binary
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &f0);
+ if(nr->addable)
+ gins(foptoas(n->op, n->type, 0), nr, &f0);
+ else {
+ cgen(nr, &f0);
+ gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
+ }
+ } else {
+ cgen(nr, &f0);
+ if(nl->addable)
+ gins(foptoas(n->op, n->type, Frev), nl, &f0);
+ else {
+ cgen(nl, &f0);
+ gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
+ }
+ }
+ gmove(&f0, res);
+ return;
+}
+
+/*
+ * generate array index into res.
+ * n might be any size; res is 32-bit.
+ * returns Prog* to patch to panic call.
+ */
+Prog*
+cgenindex(Node *n, Node *res)
+{
+ Node tmp, lo, hi, zero;
+
+ if(!is64(n->type)) {
+ cgen(n, res);
+ return nil;
+ }
+
+ tempname(&tmp, types[TINT64]);
+ cgen(n, &tmp);
+ split64(&tmp, &lo, &hi);
+ gmove(&lo, res);
+ if(debug['B']) {
+ splitclean();
+ return nil;
+ }
+ nodconst(&zero, types[TINT32], 0);
+ gins(ACMPL, &hi, &zero);
+ splitclean();
+ return gbranch(AJNE, T);
+}
+
+/*
+ * address gen
+ * res = &n;
+ */
+void
+agen(Node *n, Node *res)
+{
+ Node *nl, *nr;
+ Node n1, n2, n3, n4, tmp;
+ Type *t;
+ uint32 w;
+ uint64 v;
+ Prog *p1, *p2;
+
+ if(debug['g']) {
+ dump("\nagen-res", res);
+ dump("agen-r", n);
+ }
+ if(n == N || n->type == T || res == N || res->type == T)
+ fatal("agen");
+
+ while(n->op == OCONVNOP)
+ n = n->left;
+
+ // addressable var is easy
+ if(n->addable) {
+ if(n->op == OREGISTER)
+ fatal("agen OREGISTER");
+ regalloc(&n1, types[tptr], res);
+ gins(ALEAL, n, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+ }
+
+ // let's compute
+ nl = n->left;
+ nr = n->right;
+
+ switch(n->op) {
+ default:
+ fatal("agen %O", n->op);
+
+ case OCALLMETH:
+ cgen_callmeth(n, 0);
+ cgen_aret(n, res);
+ break;
+
+ case OCALLINTER:
+ cgen_callinter(n, res, 0);
+ cgen_aret(n, res);
+ break;
+
+ case OCALLFUNC:
+ cgen_call(n, 0);
+ cgen_aret(n, res);
+ break;
+
+ case OINDEX:
+ p2 = nil; // to be patched to panicindex.
+ w = n->type->width;
+ if(nr->addable) {
+ if(!isconst(nr, CTINT))
+ tempname(&tmp, types[TINT32]);
+ if(!isconst(nl, CTSTR))
+ agenr(nl, &n3, res);
+ if(!isconst(nr, CTINT)) {
+ p2 = cgenindex(nr, &tmp);
+ regalloc(&n1, tmp.type, N);
+ gmove(&tmp, &n1);
+ }
+ } else if(nl->addable) {
+ if(!isconst(nr, CTINT)) {
+ tempname(&tmp, types[TINT32]);
+ p2 = cgenindex(nr, &tmp);
+ regalloc(&n1, tmp.type, N);
+ gmove(&tmp, &n1);
+ }
+ if(!isconst(nl, CTSTR)) {
+ regalloc(&n3, types[tptr], res);
+ agen(nl, &n3);
+ }
+ } else {
+ tempname(&tmp, types[TINT32]);
+ p2 = cgenindex(nr, &tmp);
+ nr = &tmp;
+ if(!isconst(nl, CTSTR))
+ agenr(nl, &n3, res);
+ regalloc(&n1, tmp.type, N);
+ gins(optoas(OAS, tmp.type), &tmp, &n1);
+ }
+
+ // &a is in &n3 (allocated in res)
+ // i is in &n1 (if not constant)
+ // w is width
+
+ // explicit check for nil if array is large enough
+ // that we might derive too big a pointer.
+ if(isfixedarray(nl->type) && nl->type->width >= unmappedzero) {
+ regalloc(&n4, types[tptr], &n3);
+ gmove(&n3, &n4);
+ n4.op = OINDREG;
+ n4.type = types[TUINT8];
+ n4.xoffset = 0;
+ gins(ATESTB, nodintconst(0), &n4);
+ regfree(&n4);
+ }
+
+ // constant index
+ if(isconst(nr, CTINT)) {
+ if(isconst(nl, CTSTR))
+ fatal("constant string constant index");
+ v = mpgetfix(nr->val.u.xval);
+ if(isslice(nl->type) || nl->type->etype == TSTRING) {
+ if(!debug['B'] && !n->etype) {
+ n1 = n3;
+ n1.op = OINDREG;
+ n1.type = types[tptr];
+ n1.xoffset = Array_nel;
+ nodconst(&n2, types[TUINT32], v);
+ gins(optoas(OCMP, types[TUINT32]), &n1, &n2);
+ p1 = gbranch(optoas(OGT, types[TUINT32]), T);
+ ginscall(panicindex, 0);
+ patch(p1, pc);
+ }
+
+ n1 = n3;
+ n1.op = OINDREG;
+ n1.type = types[tptr];
+ n1.xoffset = Array_array;
+ gmove(&n1, &n3);
+ }
+
+ if (v*w != 0) {
+ nodconst(&n2, types[tptr], v*w);
+ gins(optoas(OADD, types[tptr]), &n2, &n3);
+ }
+ gmove(&n3, res);
+ regfree(&n3);
+ break;
+ }
+
+ regalloc(&n2, types[TINT32], &n1); // i
+ gmove(&n1, &n2);
+ regfree(&n1);
+
+ if(!debug['B'] && !n->etype) {
+ // check bounds
+ if(isconst(nl, CTSTR))
+ nodconst(&n1, types[TUINT32], nl->val.u.sval->len);
+ else if(isslice(nl->type) || nl->type->etype == TSTRING) {
+ n1 = n3;
+ n1.op = OINDREG;
+ n1.type = types[tptr];
+ n1.xoffset = Array_nel;
+ } else
+ nodconst(&n1, types[TUINT32], nl->type->bound);
+ gins(optoas(OCMP, types[TUINT32]), &n2, &n1);
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(p2)
+ patch(p2, pc);
+ ginscall(panicindex, 0);
+ patch(p1, pc);
+ }
+
+ if(isconst(nl, CTSTR)) {
+ regalloc(&n3, types[tptr], res);
+ p1 = gins(ALEAL, N, &n3);
+ datastring(nl->val.u.sval->s, nl->val.u.sval->len, &p1->from);
+ p1->from.scale = 1;
+ p1->from.index = n2.val.u.reg;
+ goto indexdone;
+ }
+
+ if(isslice(nl->type) || nl->type->etype == TSTRING) {
+ n1 = n3;
+ n1.op = OINDREG;
+ n1.type = types[tptr];
+ n1.xoffset = Array_array;
+ gmove(&n1, &n3);
+ }
+
+ if(w == 0) {
+ // nothing to do
+ } else if(w == 1 || w == 2 || w == 4 || w == 8) {
+ p1 = gins(ALEAL, &n2, &n3);
+ p1->from.scale = w;
+ p1->from.index = p1->from.type;
+ p1->from.type = p1->to.type + D_INDIR;
+ } else {
+ nodconst(&n1, types[TUINT32], w);
+ gins(optoas(OMUL, types[TUINT32]), &n1, &n2);
+ gins(optoas(OADD, types[tptr]), &n2, &n3);
+ }
+
+ indexdone:
+ gmove(&n3, res);
+ regfree(&n2);
+ regfree(&n3);
+ break;
+
+ case ONAME:
+ // should only get here with names in this func.
+ if(n->funcdepth > 0 && n->funcdepth != funcdepth) {
+ dump("bad agen", n);
+ fatal("agen: bad ONAME funcdepth %d != %d",
+ n->funcdepth, funcdepth);
+ }
+
+ // should only get here for heap vars or paramref
+ if(!(n->class & PHEAP) && n->class != PPARAMREF) {
+ dump("bad agen", n);
+ fatal("agen: bad ONAME class %#x", n->class);
+ }
+ cgen(n->heapaddr, res);
+ if(n->xoffset != 0) {
+ nodconst(&n1, types[tptr], n->xoffset);
+ gins(optoas(OADD, types[tptr]), &n1, res);
+ }
+ break;
+
+ case OIND:
+ cgen(nl, res);
+ break;
+
+ case ODOT:
+ t = nl->type;
+ agen(nl, res);
+ if(n->xoffset != 0) {
+ nodconst(&n1, types[tptr], n->xoffset);
+ gins(optoas(OADD, types[tptr]), &n1, res);
+ }
+ break;
+
+ case ODOTPTR:
+ t = nl->type;
+ if(!isptr[t->etype])
+ fatal("agen: not ptr %N", n);
+ cgen(nl, res);
+ if(n->xoffset != 0) {
+ // explicit check for nil if struct is large enough
+ // that we might derive too big a pointer.
+ if(nl->type->type->width >= unmappedzero) {
+ regalloc(&n1, types[tptr], res);
+ gmove(res, &n1);
+ n1.op = OINDREG;
+ n1.type = types[TUINT8];
+ n1.xoffset = 0;
+ gins(ATESTB, nodintconst(0), &n1);
+ regfree(&n1);
+ }
+ nodconst(&n1, types[tptr], n->xoffset);
+ gins(optoas(OADD, types[tptr]), &n1, res);
+ }
+ break;
+ }
+}
+
+/*
+ * generate:
+ * newreg = &n;
+ * res = newreg
+ *
+ * on exit, a has been changed to be *newreg.
+ * caller must regfree(a).
+ */
+void
+igen(Node *n, Node *a, Node *res)
+{
+ Node n1;
+ Type *fp;
+ Iter flist;
+
+ switch(n->op) {
+ case ONAME:
+ if((n->class&PHEAP) || n->class == PPARAMREF)
+ break;
+ *a = *n;
+ return;
+
+ case OCALLFUNC:
+ fp = structfirst(&flist, getoutarg(n->left->type));
+ cgen_call(n, 0);
+ memset(a, 0, sizeof *a);
+ a->op = OINDREG;
+ a->val.u.reg = D_SP;
+ a->addable = 1;
+ a->xoffset = fp->width;
+ a->type = n->type;
+ return;
+ }
+ // release register for now, to avoid
+ // confusing tempname.
+ if(res != N && res->op == OREGISTER)
+ reg[res->val.u.reg]--;
+ tempname(&n1, types[tptr]);
+ agen(n, &n1);
+ if(res != N && res->op == OREGISTER)
+ reg[res->val.u.reg]++;
+ regalloc(a, types[tptr], res);
+ gmove(&n1, a);
+ a->op = OINDREG;
+ a->type = n->type;
+}
+
+/*
+ * generate:
+ * newreg = &n;
+ *
+ * caller must regfree(a).
+ */
+void
+agenr(Node *n, Node *a, Node *res)
+{
+ Node n1;
+
+ tempname(&n1, types[tptr]);
+ agen(n, &n1);
+ regalloc(a, types[tptr], res);
+ gmove(&n1, a);
+}
+
+/*
+ * branch gen
+ * if(n == true) goto to;
+ */
+void
+bgen(Node *n, int true, Prog *to)
+{
+ int et, a;
+ Node *nl, *nr, *r;
+ Node n1, n2, tmp, t1, t2, ax;
+ Prog *p1, *p2;
+
+ if(debug['g']) {
+ dump("\nbgen", n);
+ }
+
+ if(n == N)
+ n = nodbool(1);
+
+ if(n->ninit != nil)
+ genlist(n->ninit);
+
+ nl = n->left;
+ nr = n->right;
+
+ if(n->type == T) {
+ convlit(&n, types[TBOOL]);
+ if(n->type == T)
+ return;
+ }
+
+ et = n->type->etype;
+ if(et != TBOOL) {
+ yyerror("cgen: bad type %T for %O", n->type, n->op);
+ patch(gins(AEND, N, N), to);
+ return;
+ }
+ nl = N;
+ nr = N;
+
+ switch(n->op) {
+ default:
+ def:
+ regalloc(&n1, n->type, N);
+ cgen(n, &n1);
+ nodconst(&n2, n->type, 0);
+ gins(optoas(OCMP, n->type), &n1, &n2);
+ a = AJNE;
+ if(!true)
+ a = AJEQ;
+ patch(gbranch(a, n->type), to);
+ regfree(&n1);
+ return;
+
+ case OLITERAL:
+ // need to ask if it is bool?
+ if(!true == !n->val.u.bval)
+ patch(gbranch(AJMP, T), to);
+ return;
+
+ case ONAME:
+ if(!n->addable)
+ goto def;
+ nodconst(&n1, n->type, 0);
+ gins(optoas(OCMP, n->type), n, &n1);
+ a = AJNE;
+ if(!true)
+ a = AJEQ;
+ patch(gbranch(a, n->type), to);
+ return;
+
+ case OANDAND:
+ if(!true)
+ goto caseor;
+
+ caseand:
+ p1 = gbranch(AJMP, T);
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+ bgen(n->left, !true, p2);
+ bgen(n->right, !true, p2);
+ p1 = gbranch(AJMP, T);
+ patch(p1, to);
+ patch(p2, pc);
+ return;
+
+ case OOROR:
+ if(!true)
+ goto caseand;
+
+ caseor:
+ bgen(n->left, true, to);
+ bgen(n->right, true, to);
+ return;
+
+ case OEQ:
+ case ONE:
+ case OLT:
+ case OGT:
+ case OLE:
+ case OGE:
+ nr = n->right;
+ if(nr == N || nr->type == T)
+ return;
+
+ case ONOT: // unary
+ nl = n->left;
+ if(nl == N || nl->type == T)
+ return;
+ }
+
+ switch(n->op) {
+ case ONOT:
+ bgen(nl, !true, to);
+ break;
+
+ case OEQ:
+ case ONE:
+ case OLT:
+ case OGT:
+ case OLE:
+ case OGE:
+ a = n->op;
+ if(!true) {
+ if(isfloat[nl->type->etype]) {
+ // brcom is not valid on floats when NaN is involved.
+ p1 = gbranch(AJMP, T);
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+ bgen(n, 1, p2);
+ patch(gbranch(AJMP, T), to);
+ patch(p2, pc);
+ break;
+ }
+ a = brcom(a);
+ true = !true;
+ }
+
+ // make simplest on right
+ if(nl->op == OLITERAL || (nl->ullman < nr->ullman && nl->ullman < UINF)) {
+ a = brrev(a);
+ r = nl;
+ nl = nr;
+ nr = r;
+ }
+
+ if(isslice(nl->type)) {
+ // front end should only leave cmp to literal nil
+ if((a != OEQ && a != ONE) || nr->op != OLITERAL) {
+ yyerror("illegal array comparison");
+ break;
+ }
+ a = optoas(a, types[tptr]);
+ regalloc(&n1, types[tptr], N);
+ agen(nl, &n1);
+ n2 = n1;
+ n2.op = OINDREG;
+ n2.xoffset = Array_array;
+ n2.type = types[tptr];
+ nodconst(&tmp, types[tptr], 0);
+ gins(optoas(OCMP, types[tptr]), &n2, &tmp);
+ patch(gbranch(a, types[tptr]), to);
+ regfree(&n1);
+ break;
+ }
+
+ if(isinter(nl->type)) {
+ // front end should only leave cmp to literal nil
+ if((a != OEQ && a != ONE) || nr->op != OLITERAL) {
+ yyerror("illegal interface comparison");
+ break;
+ }
+ a = optoas(a, types[tptr]);
+ regalloc(&n1, types[tptr], N);
+ agen(nl, &n1);
+ n2 = n1;
+ n2.op = OINDREG;
+ n2.xoffset = 0;
+ nodconst(&tmp, types[tptr], 0);
+ gins(optoas(OCMP, types[tptr]), &n2, &tmp);
+ patch(gbranch(a, types[tptr]), to);
+ regfree(&n1);
+ break;
+ }
+
+ if(isfloat[nr->type->etype]) {
+ a = brrev(a); // because the args are stacked
+ if(a == OGE || a == OGT) {
+ // only < and <= work right with NaN; reverse if needed
+ r = nr;
+ nr = nl;
+ nl = r;
+ a = brrev(a);
+ }
+ nodreg(&tmp, nr->type, D_F0);
+ nodreg(&n2, nr->type, D_F0 + 1);
+ nodreg(&ax, types[TUINT16], D_AX);
+ et = simsimtype(nr->type);
+ if(et == TFLOAT64) {
+ if(nl->ullman > nr->ullman) {
+ cgen(nl, &tmp);
+ cgen(nr, &tmp);
+ gins(AFXCHD, &tmp, &n2);
+ } else {
+ cgen(nr, &tmp);
+ cgen(nl, &tmp);
+ }
+ gins(AFUCOMIP, &tmp, &n2);
+ gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF
+ } else {
+ // TODO(rsc): The moves back and forth to memory
+ // here are for truncating the value to 32 bits.
+ // This handles 32-bit comparison but presumably
+ // all the other ops have the same problem.
+ // We need to figure out what the right general
+ // solution is, besides telling people to use float64.
+ tempname(&t1, types[TFLOAT32]);
+ tempname(&t2, types[TFLOAT32]);
+ cgen(nr, &t1);
+ cgen(nl, &t2);
+ gmove(&t2, &tmp);
+ gins(AFCOMFP, &t1, &tmp);
+ gins(AFSTSW, N, &ax);
+ gins(ASAHF, N, N);
+ }
+ if(a == OEQ) {
+ // neither NE nor P
+ p1 = gbranch(AJNE, T);
+ p2 = gbranch(AJPS, T);
+ patch(gbranch(AJMP, T), to);
+ patch(p1, pc);
+ patch(p2, pc);
+ } else if(a == ONE) {
+ // either NE or P
+ patch(gbranch(AJNE, T), to);
+ patch(gbranch(AJPS, T), to);
+ } else
+ patch(gbranch(optoas(a, nr->type), T), to);
+ break;
+ }
+ if(iscomplex[nl->type->etype]) {
+ complexbool(a, nl, nr, true, to);
+ break;
+ }
+
+ if(is64(nr->type)) {
+ if(!nl->addable) {
+ tempname(&n1, nl->type);
+ cgen(nl, &n1);
+ nl = &n1;
+ }
+ if(!nr->addable) {
+ tempname(&n2, nr->type);
+ cgen(nr, &n2);
+ nr = &n2;
+ }
+ cmp64(nl, nr, a, to);
+ break;
+ }
+
+ a = optoas(a, nr->type);
+
+ if(nr->ullman >= UINF) {
+ tempname(&n1, nl->type);
+ tempname(&tmp, nr->type);
+ cgen(nl, &n1);
+ cgen(nr, &tmp);
+ regalloc(&n2, nr->type, N);
+ cgen(&tmp, &n2);
+ goto cmp;
+ }
+
+ tempname(&n1, nl->type);
+ cgen(nl, &n1);
+
+ if(smallintconst(nr)) {
+ gins(optoas(OCMP, nr->type), &n1, nr);
+ patch(gbranch(a, nr->type), to);
+ break;
+ }
+
+ tempname(&tmp, nr->type);
+ cgen(nr, &tmp);
+ regalloc(&n2, nr->type, N);
+ gmove(&tmp, &n2);
+
+cmp:
+ gins(optoas(OCMP, nr->type), &n1, &n2);
+ patch(gbranch(a, nr->type), to);
+ regfree(&n2);
+ break;
+ }
+}
+
+/*
+ * n is on stack, either local variable
+ * or return value from function call.
+ * return n's offset from SP.
+ */
+int32
+stkof(Node *n)
+{
+ Type *t;
+ Iter flist;
+ int32 off;
+
+ switch(n->op) {
+ case OINDREG:
+ return n->xoffset;
+
+ case ODOT:
+ t = n->left->type;
+ if(isptr[t->etype])
+ break;
+ off = stkof(n->left);
+ if(off == -1000 || off == 1000)
+ return off;
+ return off + n->xoffset;
+
+ case OINDEX:
+ t = n->left->type;
+ if(!isfixedarray(t))
+ break;
+ off = stkof(n->left);
+ if(off == -1000 || off == 1000)
+ return off;
+ if(isconst(n->right, CTINT))
+ return off + t->type->width * mpgetfix(n->right->val.u.xval);
+ return 1000;
+
+ case OCALLMETH:
+ case OCALLINTER:
+ case OCALLFUNC:
+ t = n->left->type;
+ if(isptr[t->etype])
+ t = t->type;
+
+ t = structfirst(&flist, getoutarg(t));
+ if(t != T)
+ return t->width;
+ break;
+ }
+
+ // botch - probably failing to recognize address
+ // arithmetic on the above. eg INDEX and DOT
+ return -1000;
+}
+
+/*
+ * struct gen
+ * memmove(&res, &n, w);
+ */
+void
+sgen(Node *n, Node *res, int32 w)
+{
+ Node dst, src, tdst, tsrc;
+ int32 c, q, odst, osrc;
+
+ if(debug['g']) {
+ print("\nsgen w=%d\n", w);
+ dump("r", n);
+ dump("res", res);
+ }
+ if(w == 0)
+ return;
+ if(n->ullman >= UINF && res->ullman >= UINF) {
+ fatal("sgen UINF");
+ }
+
+ if(w < 0)
+ fatal("sgen copy %d", w);
+
+ // offset on the stack
+ osrc = stkof(n);
+ odst = stkof(res);
+
+ if(osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000)) {
+ // osrc and odst both on stack, and at least one is in
+ // an unknown position. Could generate code to test
+ // for forward/backward copy, but instead just copy
+ // to a temporary location first.
+ tempname(&tsrc, n->type);
+ sgen(n, &tsrc, w);
+ sgen(&tsrc, res, w);
+ return;
+ }
+
+ nodreg(&dst, types[tptr], D_DI);
+ nodreg(&src, types[tptr], D_SI);
+
+ tempname(&tsrc, types[tptr]);
+ tempname(&tdst, types[tptr]);
+ if(!n->addable)
+ agen(n, &tsrc);
+ if(!res->addable)
+ agen(res, &tdst);
+ if(n->addable)
+ agen(n, &src);
+ else
+ gmove(&tsrc, &src);
+ if(res->addable)
+ agen(res, &dst);
+ else
+ gmove(&tdst, &dst);
+
+ c = w % 4; // bytes
+ q = w / 4; // doublewords
+
+ // if we are copying forward on the stack and
+ // the src and dst overlap, then reverse direction
+ if(osrc < odst && odst < osrc+w) {
+ // reverse direction
+ gins(ASTD, N, N); // set direction flag
+ if(c > 0) {
+ gconreg(AADDL, w-1, D_SI);
+ gconreg(AADDL, w-1, D_DI);
+
+ gconreg(AMOVL, c, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(AMOVSB, N, N); // MOVB *(SI)-,*(DI)-
+ }
+
+ if(q > 0) {
+ if(c > 0) {
+ gconreg(AADDL, -3, D_SI);
+ gconreg(AADDL, -3, D_DI);
+ } else {
+ gconreg(AADDL, w-4, D_SI);
+ gconreg(AADDL, w-4, D_DI);
+ }
+ gconreg(AMOVL, q, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(AMOVSL, N, N); // MOVL *(SI)-,*(DI)-
+ }
+ // we leave with the flag clear
+ gins(ACLD, N, N);
+ } else {
+ gins(ACLD, N, N); // paranoia. TODO(rsc): remove?
+ // normal direction
+ if(q >= 4) {
+ gconreg(AMOVL, q, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
+ } else
+ while(q > 0) {
+ gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
+ q--;
+ }
+ while(c > 0) {
+ gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+
+ c--;
+ }
+ }
+}
+
diff --git a/src/cmd/8g/cgen64.c b/src/cmd/8g/cgen64.c
new file mode 100644
index 000000000..ba99cec74
--- /dev/null
+++ b/src/cmd/8g/cgen64.c
@@ -0,0 +1,512 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gg.h"
+
+/*
+ * attempt to generate 64-bit
+ * res = n
+ * return 1 on success, 0 if op not handled.
+ */
+void
+cgen64(Node *n, Node *res)
+{
+ Node t1, t2, ax, dx, cx, ex, fx, *l, *r;
+ Node lo1, lo2, hi1, hi2;
+ Prog *p1, *p2;
+ uint64 v;
+ uint32 lv, hv;
+
+ if(res->op != OINDREG && res->op != ONAME) {
+ dump("n", n);
+ dump("res", res);
+ fatal("cgen64 %O of %O", n->op, res->op);
+ }
+ switch(n->op) {
+ default:
+ fatal("cgen64 %O", n->op);
+
+ case OMINUS:
+ cgen(n->left, res);
+ split64(res, &lo1, &hi1);
+ gins(ANEGL, N, &lo1);
+ gins(AADCL, ncon(0), &hi1);
+ gins(ANEGL, N, &hi1);
+ splitclean();
+ return;
+
+ case OCOM:
+ cgen(n->left, res);
+ split64(res, &lo1, &hi1);
+ gins(ANOTL, N, &lo1);
+ gins(ANOTL, N, &hi1);
+ splitclean();
+ return;
+
+ case OADD:
+ case OSUB:
+ case OMUL:
+ case OLSH:
+ case ORSH:
+ case OAND:
+ case OOR:
+ case OXOR:
+ // binary operators.
+ // common setup below.
+ break;
+ }
+
+ l = n->left;
+ r = n->right;
+ if(!l->addable) {
+ tempname(&t1, l->type);
+ cgen(l, &t1);
+ l = &t1;
+ }
+ if(r != N && !r->addable) {
+ tempname(&t2, r->type);
+ cgen(r, &t2);
+ r = &t2;
+ }
+
+ nodreg(&ax, types[TINT32], D_AX);
+ nodreg(&cx, types[TINT32], D_CX);
+ nodreg(&dx, types[TINT32], D_DX);
+
+ // Setup for binary operation.
+ split64(l, &lo1, &hi1);
+ if(is64(r->type))
+ split64(r, &lo2, &hi2);
+
+ // Do op. Leave result in DX:AX.
+ switch(n->op) {
+ case OADD:
+ // TODO: Constants
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(AADDL, &lo2, &ax);
+ gins(AADCL, &hi2, &dx);
+ break;
+
+ case OSUB:
+ // TODO: Constants.
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(ASUBL, &lo2, &ax);
+ gins(ASBBL, &hi2, &dx);
+ break;
+
+ case OMUL:
+ // let's call the next two EX and FX.
+ regalloc(&ex, types[TPTR32], N);
+ regalloc(&fx, types[TPTR32], N);
+
+ // load args into DX:AX and EX:CX.
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(AMOVL, &lo2, &cx);
+ gins(AMOVL, &hi2, &ex);
+
+ // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
+ gins(AMOVL, &dx, &fx);
+ gins(AORL, &ex, &fx);
+ p1 = gbranch(AJNE, T);
+ gins(AMULL, &cx, N); // implicit &ax
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+
+ // full 64x64 -> 64, from 32x32 -> 64.
+ gins(AIMULL, &cx, &dx);
+ gins(AMOVL, &ax, &fx);
+ gins(AIMULL, &ex, &fx);
+ gins(AADDL, &dx, &fx);
+ gins(AMOVL, &cx, &dx);
+ gins(AMULL, &dx, N); // implicit &ax
+ gins(AADDL, &fx, &dx);
+ patch(p2, pc);
+
+ regfree(&ex);
+ regfree(&fx);
+ break;
+
+ case OLSH:
+ if(r->op == OLITERAL) {
+ v = mpgetfix(r->val.u.xval);
+ if(v >= 64) {
+ if(is64(r->type))
+ splitclean();
+ splitclean();
+ split64(res, &lo2, &hi2);
+ gins(AMOVL, ncon(0), &lo2);
+ gins(AMOVL, ncon(0), &hi2);
+ splitclean();
+ goto out;
+ }
+ if(v >= 32) {
+ if(is64(r->type))
+ splitclean();
+ split64(res, &lo2, &hi2);
+ gmove(&lo1, &hi2);
+ if(v > 32) {
+ gins(ASHLL, ncon(v - 32), &hi2);
+ }
+ gins(AMOVL, ncon(0), &lo2);
+ splitclean();
+ splitclean();
+ goto out;
+ }
+
+ // general shift
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ p1 = gins(ASHLL, ncon(v), &dx);
+ p1->from.index = D_AX; // double-width shift
+ p1->from.scale = 0;
+ gins(ASHLL, ncon(v), &ax);
+ break;
+ }
+
+ // load value into DX:AX.
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+
+ // load shift value into register.
+ // if high bits are set, zero value.
+ p1 = P;
+ if(is64(r->type)) {
+ gins(ACMPL, &hi2, ncon(0));
+ p1 = gbranch(AJNE, T);
+ gins(AMOVL, &lo2, &cx);
+ } else {
+ cx.type = types[TUINT32];
+ gmove(r, &cx);
+ }
+
+ // if shift count is >=64, zero value
+ gins(ACMPL, &cx, ncon(64));
+ p2 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(p1 != P)
+ patch(p1, pc);
+ gins(AXORL, &dx, &dx);
+ gins(AXORL, &ax, &ax);
+ patch(p2, pc);
+
+ // if shift count is >= 32, zero low.
+ gins(ACMPL, &cx, ncon(32));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ gins(AMOVL, &ax, &dx);
+ gins(ASHLL, &cx, &dx); // SHLL only uses bottom 5 bits of count
+ gins(AXORL, &ax, &ax);
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+
+ // general shift
+ p1 = gins(ASHLL, &cx, &dx);
+ p1->from.index = D_AX; // double-width shift
+ p1->from.scale = 0;
+ gins(ASHLL, &cx, &ax);
+ patch(p2, pc);
+ break;
+
+ case ORSH:
+ if(r->op == OLITERAL) {
+ v = mpgetfix(r->val.u.xval);
+ if(v >= 64) {
+ if(is64(r->type))
+ splitclean();
+ splitclean();
+ split64(res, &lo2, &hi2);
+ if(hi1.type->etype == TINT32) {
+ gmove(&hi1, &lo2);
+ gins(ASARL, ncon(31), &lo2);
+ gmove(&hi1, &hi2);
+ gins(ASARL, ncon(31), &hi2);
+ } else {
+ gins(AMOVL, ncon(0), &lo2);
+ gins(AMOVL, ncon(0), &hi2);
+ }
+ splitclean();
+ goto out;
+ }
+ if(v >= 32) {
+ if(is64(r->type))
+ splitclean();
+ split64(res, &lo2, &hi2);
+ gmove(&hi1, &lo2);
+ if(v > 32)
+ gins(optoas(ORSH, hi1.type), ncon(v-32), &lo2);
+ if(hi1.type->etype == TINT32) {
+ gmove(&hi1, &hi2);
+ gins(ASARL, ncon(31), &hi2);
+ } else
+ gins(AMOVL, ncon(0), &hi2);
+ splitclean();
+ splitclean();
+ goto out;
+ }
+
+ // general shift
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ p1 = gins(ASHRL, ncon(v), &ax);
+ p1->from.index = D_DX; // double-width shift
+ p1->from.scale = 0;
+ gins(optoas(ORSH, hi1.type), ncon(v), &dx);
+ break;
+ }
+
+ // load value into DX:AX.
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+
+ // load shift value into register.
+ // if high bits are set, zero value.
+ p1 = P;
+ if(is64(r->type)) {
+ gins(ACMPL, &hi2, ncon(0));
+ p1 = gbranch(AJNE, T);
+ gins(AMOVL, &lo2, &cx);
+ } else {
+ cx.type = types[TUINT32];
+ gmove(r, &cx);
+ }
+
+ // if shift count is >=64, zero or sign-extend value
+ gins(ACMPL, &cx, ncon(64));
+ p2 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(p1 != P)
+ patch(p1, pc);
+ if(hi1.type->etype == TINT32) {
+ gins(ASARL, ncon(31), &dx);
+ gins(AMOVL, &dx, &ax);
+ } else {
+ gins(AXORL, &dx, &dx);
+ gins(AXORL, &ax, &ax);
+ }
+ patch(p2, pc);
+
+ // if shift count is >= 32, sign-extend hi.
+ gins(ACMPL, &cx, ncon(32));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ gins(AMOVL, &dx, &ax);
+ if(hi1.type->etype == TINT32) {
+ gins(ASARL, &cx, &ax); // SARL only uses bottom 5 bits of count
+ gins(ASARL, ncon(31), &dx);
+ } else {
+ gins(ASHRL, &cx, &ax);
+ gins(AXORL, &dx, &dx);
+ }
+ p2 = gbranch(AJMP, T);
+ patch(p1, pc);
+
+ // general shift
+ p1 = gins(ASHRL, &cx, &ax);
+ p1->from.index = D_DX; // double-width shift
+ p1->from.scale = 0;
+ gins(optoas(ORSH, hi1.type), &cx, &dx);
+ patch(p2, pc);
+ break;
+
+ case OXOR:
+ case OAND:
+ case OOR:
+ // make constant the right side (it usually is anyway).
+ if(lo1.op == OLITERAL) {
+ nswap(&lo1, &lo2);
+ nswap(&hi1, &hi2);
+ }
+ if(lo2.op == OLITERAL) {
+ // special cases for constants.
+ lv = mpgetfix(lo2.val.u.xval);
+ hv = mpgetfix(hi2.val.u.xval);
+ splitclean(); // right side
+ split64(res, &lo2, &hi2);
+ switch(n->op) {
+ case OXOR:
+ gmove(&lo1, &lo2);
+ gmove(&hi1, &hi2);
+ switch(lv) {
+ case 0:
+ break;
+ case 0xffffffffu:
+ gins(ANOTL, N, &lo2);
+ break;
+ default:
+ gins(AXORL, ncon(lv), &lo2);
+ break;
+ }
+ switch(hv) {
+ case 0:
+ break;
+ case 0xffffffffu:
+ gins(ANOTL, N, &hi2);
+ break;
+ default:
+ gins(AXORL, ncon(hv), &hi2);
+ break;
+ }
+ break;
+
+ case OAND:
+ switch(lv) {
+ case 0:
+ gins(AMOVL, ncon(0), &lo2);
+ break;
+ default:
+ gmove(&lo1, &lo2);
+ if(lv != 0xffffffffu)
+ gins(AANDL, ncon(lv), &lo2);
+ break;
+ }
+ switch(hv) {
+ case 0:
+ gins(AMOVL, ncon(0), &hi2);
+ break;
+ default:
+ gmove(&hi1, &hi2);
+ if(hv != 0xffffffffu)
+ gins(AANDL, ncon(hv), &hi2);
+ break;
+ }
+ break;
+
+ case OOR:
+ switch(lv) {
+ case 0:
+ gmove(&lo1, &lo2);
+ break;
+ case 0xffffffffu:
+ gins(AMOVL, ncon(0xffffffffu), &lo2);
+ break;
+ default:
+ gmove(&lo1, &lo2);
+ gins(AORL, ncon(lv), &lo2);
+ break;
+ }
+ switch(hv) {
+ case 0:
+ gmove(&hi1, &hi2);
+ break;
+ case 0xffffffffu:
+ gins(AMOVL, ncon(0xffffffffu), &hi2);
+ break;
+ default:
+ gmove(&hi1, &hi2);
+ gins(AORL, ncon(hv), &hi2);
+ break;
+ }
+ break;
+ }
+ splitclean();
+ splitclean();
+ goto out;
+ }
+ gins(AMOVL, &lo1, &ax);
+ gins(AMOVL, &hi1, &dx);
+ gins(optoas(n->op, lo1.type), &lo2, &ax);
+ gins(optoas(n->op, lo1.type), &hi2, &dx);
+ break;
+ }
+ if(is64(r->type))
+ splitclean();
+ splitclean();
+
+ split64(res, &lo1, &hi1);
+ gins(AMOVL, &ax, &lo1);
+ gins(AMOVL, &dx, &hi1);
+ splitclean();
+
+out:;
+}
+
+/*
+ * generate comparison of nl, nr, both 64-bit.
+ * nl is memory; nr is constant or memory.
+ */
+void
+cmp64(Node *nl, Node *nr, int op, Prog *to)
+{
+ Node lo1, hi1, lo2, hi2, rr;
+ Prog *br;
+ Type *t;
+
+ split64(nl, &lo1, &hi1);
+ split64(nr, &lo2, &hi2);
+
+ // compare most significant word;
+ // if they differ, we're done.
+ t = hi1.type;
+ if(nl->op == OLITERAL || nr->op == OLITERAL)
+ gins(ACMPL, &hi1, &hi2);
+ else {
+ regalloc(&rr, types[TINT32], N);
+ gins(AMOVL, &hi1, &rr);
+ gins(ACMPL, &rr, &hi2);
+ regfree(&rr);
+ }
+ br = P;
+ switch(op) {
+ default:
+ fatal("cmp64 %O %T", op, t);
+ case OEQ:
+ // cmp hi
+ // jne L
+ // cmp lo
+ // jeq to
+ // L:
+ br = gbranch(AJNE, T);
+ break;
+ case ONE:
+ // cmp hi
+ // jne to
+ // cmp lo
+ // jne to
+ patch(gbranch(AJNE, T), to);
+ break;
+ case OGE:
+ case OGT:
+ // cmp hi
+ // jgt to
+ // jlt L
+ // cmp lo
+ // jge to (or jgt to)
+ // L:
+ patch(gbranch(optoas(OGT, t), T), to);
+ br = gbranch(optoas(OLT, t), T);
+ break;
+ case OLE:
+ case OLT:
+ // cmp hi
+ // jlt to
+ // jgt L
+ // cmp lo
+ // jle to (or jlt to)
+ // L:
+ patch(gbranch(optoas(OLT, t), T), to);
+ br = gbranch(optoas(OGT, t), T);
+ break;
+ }
+
+ // compare least significant word
+ t = lo1.type;
+ if(nl->op == OLITERAL || nr->op == OLITERAL)
+ gins(ACMPL, &lo1, &lo2);
+ else {
+ regalloc(&rr, types[TINT32], N);
+ gins(AMOVL, &lo1, &rr);
+ gins(ACMPL, &rr, &lo2);
+ regfree(&rr);
+ }
+
+ // jump again
+ patch(gbranch(optoas(op, t), T), to);
+
+ // point first branch down here if appropriate
+ if(br != P)
+ patch(br, pc);
+
+ splitclean();
+ splitclean();
+}
+
diff --git a/src/cmd/8g/doc.go b/src/cmd/8g/doc.go
new file mode 100644
index 000000000..2d9ff9a42
--- /dev/null
+++ b/src/cmd/8g/doc.go
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+
+8g is the version of the gc compiler for the x86.
+The $GOARCH for these tools is 386.
+
+It reads .go files and outputs .8 files. The flags are documented in ../gc/doc.go.
+
+There is no instruction optimizer, so the -N flag is a no-op.
+
+*/
+package documentation
diff --git a/src/cmd/8g/galign.c b/src/cmd/8g/galign.c
new file mode 100644
index 000000000..7734603c4
--- /dev/null
+++ b/src/cmd/8g/galign.c
@@ -0,0 +1,37 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "gg.h"
+
+int thechar = '8';
+char* thestring = "386";
+
+vlong MAXWIDTH = (1LL<<32) - 1;
+
+/*
+ * go declares several platform-specific type aliases:
+ * int, uint, float, and uintptr
+ */
+Typedef typedefs[] =
+{
+ "int", TINT, TINT32,
+ "uint", TUINT, TUINT32,
+ "uintptr", TUINTPTR, TUINT32,
+ 0
+};
+
+void
+betypeinit(void)
+{
+ widthptr = 4;
+
+ zprog.link = P;
+ zprog.as = AGOK;
+ zprog.from.type = D_NONE;
+ zprog.from.index = D_NONE;
+ zprog.from.scale = 0;
+ zprog.to = zprog.from;
+
+ listinit();
+}
diff --git a/src/cmd/8g/gg.h b/src/cmd/8g/gg.h
new file mode 100644
index 000000000..9f7a66a29
--- /dev/null
+++ b/src/cmd/8g/gg.h
@@ -0,0 +1,187 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <u.h>
+#include <libc.h>
+
+#include "../gc/go.h"
+#include "../8l/8.out.h"
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif
+
+typedef struct Addr Addr;
+
+struct Addr
+{
+ int32 offset;
+ int32 offset2;
+
+ double dval;
+ Prog* branch;
+ char sval[NSNAME];
+
+ Sym* gotype;
+ Sym* sym;
+ Node* node;
+ int width;
+ uchar type;
+ uchar index;
+ uchar etype;
+ uchar scale; /* doubles as width in DATA op */
+ uchar pun; /* dont register variable */
+};
+#define A ((Addr*)0)
+
+struct Prog
+{
+ short as; // opcode
+ uint32 loc; // pc offset in this func
+ uint32 lineno; // source line that generated this
+ Addr from; // src address
+ Addr to; // dst address
+ Prog* link; // next instruction in this func
+ void* reg; // pointer to containing Reg struct
+};
+
+// foptoas flags
+enum
+{
+ Frev = 1<<0,
+ Fpop = 1<<1,
+ Fpop2 = 1<<2,
+};
+
+EXTERN Biobuf* bout;
+EXTERN int32 dynloc;
+EXTERN uchar reg[D_NONE];
+EXTERN int32 pcloc; // instruction counter
+EXTERN Strlit emptystring;
+extern char* anames[];
+EXTERN Hist* hist;
+EXTERN Prog zprog;
+EXTERN Node* curfn;
+EXTERN Node* newproc;
+EXTERN Node* deferproc;
+EXTERN Node* deferreturn;
+EXTERN Node* panicindex;
+EXTERN Node* panicslice;
+EXTERN Node* throwreturn;
+EXTERN int maxstksize;
+extern uint32 unmappedzero;
+
+
+/*
+ * ggen.c
+ */
+void compile(Node*);
+void proglist(void);
+void gen(Node*);
+Node* lookdot(Node*, Node*, int);
+void cgen_as(Node*, Node*);
+void cgen_callmeth(Node*, int);
+void cgen_callinter(Node*, Node*, int);
+void cgen_proc(Node*, int);
+void cgen_callret(Node*, Node*);
+void cgen_div(int, Node*, Node*, Node*);
+void cgen_bmul(int, Node*, Node*, Node*);
+void cgen_shift(int, Node*, Node*, Node*);
+void cgen_dcl(Node*);
+int needconvert(Type*, Type*);
+void genconv(Type*, Type*);
+void allocparams(void);
+void checklabels();
+void ginscall(Node*, int);
+
+/*
+ * cgen.c
+ */
+void agen(Node*, Node*);
+void agenr(Node *n, Node *a, Node *res);
+void igen(Node*, Node*, Node*);
+vlong fieldoffset(Type*, Node*);
+void bgen(Node*, int, Prog*);
+void sgen(Node*, Node*, int32);
+void gmove(Node*, Node*);
+Prog* gins(int, Node*, Node*);
+int samaddr(Node*, Node*);
+void naddr(Node*, Addr*, int);
+void cgen_aret(Node*, Node*);
+int cgen_inline(Node*, Node*);
+Node* ncon(uint32);
+void mgen(Node*, Node*, Node*);
+void mfree(Node*);
+
+/*
+ * cgen64.c
+ */
+void cmp64(Node*, Node*, int, Prog*);
+void cgen64(Node*, Node*);
+
+/*
+ * gsubr.c
+ */
+void clearp(Prog*);
+void proglist(void);
+Prog* gbranch(int, Type*);
+Prog* prog(int);
+void gaddoffset(Node*);
+void gconv(int, int);
+int conv2pt(Type*);
+vlong convvtox(vlong, int);
+void fnparam(Type*, int, int);
+Prog* gop(int, Node*, Node*, Node*);
+void setconst(Addr*, vlong);
+void setaddr(Addr*, Node*);
+int optoas(int, Type*);
+int foptoas(int, Type*, int);
+void ginit(void);
+void gclean(void);
+void regalloc(Node*, Type*, Node*);
+void regfree(Node*);
+Node* nodarg(Type*, int);
+void nodreg(Node*, Type*, int);
+void nodindreg(Node*, Type*, int);
+void nodconst(Node*, Type*, int64);
+void gconreg(int, vlong, int);
+void datagostring(Strlit*, Addr*);
+void datastring(char*, int, Addr*);
+void buildtxt(void);
+Plist* newplist(void);
+int isfat(Type*);
+void sudoclean(void);
+int sudoaddable(int, Node*, Addr*);
+int dotaddable(Node*, Node*);
+void afunclit(Addr*);
+void split64(Node*, Node*, Node*);
+void splitclean(void);
+void nswap(Node*, Node*);
+
+/*
+ * cplx.c
+ */
+int complexop(Node*, Node*);
+void complexmove(Node*, Node*);
+void complexgen(Node*, Node*);
+void complexbool(int, Node*, Node*, int, Prog*);
+
+/*
+ * gobj.c
+ */
+void data(void);
+void text(void);
+
+/*
+ * list.c
+ */
+int Aconv(Fmt*);
+int Dconv(Fmt*);
+int Pconv(Fmt*);
+int Rconv(Fmt*);
+int Yconv(Fmt*);
+void listinit(void);
+
+void zaddr(Biobuf*, Addr*, int, int);
+
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c
new file mode 100644
index 000000000..108c493aa
--- /dev/null
+++ b/src/cmd/8g/ggen.c
@@ -0,0 +1,1155 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#undef EXTERN
+#define EXTERN
+#include "gg.h"
+#include "opt.h"
+
+void
+defframe(Prog *ptxt)
+{
+ // fill in argument size
+ ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
+
+ // fill in final stack size
+ if(stksize > maxstksize)
+ maxstksize = stksize;
+ ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
+ maxstksize = 0;
+}
+
+// Sweep the prog list to mark any used nodes.
+void
+markautoused(Prog* p)
+{
+ for (; p; p = p->link) {
+ if (p->from.type == D_AUTO && p->from.node)
+ p->from.node->used++;
+
+ if (p->to.type == D_AUTO && p->to.node)
+ p->to.node->used++;
+ }
+}
+
+// Fixup instructions after compactframe has moved all autos around.
+void
+fixautoused(Prog* p)
+{
+ for (; p; p = p->link) {
+ if (p->from.type == D_AUTO && p->from.node)
+ p->from.offset += p->from.node->stkdelta;
+
+ if (p->to.type == D_AUTO && p->to.node)
+ p->to.offset += p->to.node->stkdelta;
+ }
+}
+
+void
+clearfat(Node *nl)
+{
+ uint32 w, c, q;
+ Node n1;
+
+ /* clear a fat object */
+ if(debug['g'])
+ dump("\nclearfat", nl);
+
+ w = nl->type->width;
+ c = w % 4; // bytes
+ q = w / 4; // quads
+
+ gconreg(AMOVL, 0, D_AX);
+ nodreg(&n1, types[tptr], D_DI);
+ agen(nl, &n1);
+
+ if(q >= 4) {
+ gconreg(AMOVL, q, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(ASTOSL, N, N); // STOL AL,*(DI)+
+ } else
+ while(q > 0) {
+ gins(ASTOSL, N, N); // STOL AL,*(DI)+
+ q--;
+ }
+
+ if(c >= 4) {
+ gconreg(AMOVL, c, D_CX);
+ gins(AREP, N, N); // repeat
+ gins(ASTOSB, N, N); // STOB AL,*(DI)+
+ } else
+ while(c > 0) {
+ gins(ASTOSB, N, N); // STOB AL,*(DI)+
+ c--;
+ }
+}
+
+/*
+ * generate:
+ * call f
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+void
+ginscall(Node *f, int proc)
+{
+ Prog *p;
+ Node reg, con;
+
+ switch(proc) {
+ default:
+ fatal("ginscall: bad proc %d", proc);
+ break;
+
+ case 0: // normal call
+ p = gins(ACALL, N, f);
+ afunclit(&p->to);
+ break;
+
+ case 1: // call in new proc (go)
+ case 2: // deferred call (defer)
+ nodreg(&reg, types[TINT32], D_CX);
+ gins(APUSHL, f, N);
+ nodconst(&con, types[TINT32], argsize(f->type));
+ gins(APUSHL, &con, N);
+ if(proc == 1)
+ ginscall(newproc, 0);
+ else
+ ginscall(deferproc, 0);
+ gins(APOPL, N, &reg);
+ gins(APOPL, N, &reg);
+ if(proc == 2) {
+ nodreg(&reg, types[TINT64], D_AX);
+ gins(ATESTL, &reg, &reg);
+ patch(gbranch(AJNE, T), retpc);
+ }
+ break;
+ }
+}
+
+/*
+ * n is call to interface method.
+ * generate res = n.
+ */
+void
+cgen_callinter(Node *n, Node *res, int proc)
+{
+ Node *i, *f;
+ Node tmpi, nodo, nodr, nodsp;
+
+ i = n->left;
+ if(i->op != ODOTINTER)
+ fatal("cgen_callinter: not ODOTINTER %O", i->op);
+
+ f = i->right; // field
+ if(f->op != ONAME)
+ fatal("cgen_callinter: not ONAME %O", f->op);
+
+ i = i->left; // interface
+
+ if(!i->addable) {
+ tempname(&tmpi, i->type);
+ cgen(i, &tmpi);
+ i = &tmpi;
+ }
+
+ genlist(n->list); // assign the args
+
+ // Can regalloc now; i is known to be addable,
+ // so the agen will be easy.
+ regalloc(&nodr, types[tptr], res);
+ regalloc(&nodo, types[tptr], &nodr);
+ nodo.op = OINDREG;
+
+ agen(i, &nodr); // REG = &inter
+
+ nodindreg(&nodsp, types[tptr], D_SP);
+ nodo.xoffset += widthptr;
+ cgen(&nodo, &nodsp); // 0(SP) = 4(REG) -- i.data
+
+ nodo.xoffset -= widthptr;
+ cgen(&nodo, &nodr); // REG = 0(REG) -- i.tab
+
+ if(n->left->xoffset == BADWIDTH)
+ fatal("cgen_callinter: badwidth");
+ nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
+ cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f]
+
+ // BOTCH nodr.type = fntype;
+ nodr.type = n->left->type;
+ ginscall(&nodr, proc);
+
+ regfree(&nodr);
+ regfree(&nodo);
+
+ setmaxarg(n->left->type);
+}
+
+/*
+ * generate function call;
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+void
+cgen_call(Node *n, int proc)
+{
+ Type *t;
+ Node nod, afun;
+
+ if(n == N)
+ return;
+
+ if(n->left->ullman >= UINF) {
+ // if name involves a fn call
+ // precompute the address of the fn
+ tempname(&afun, types[tptr]);
+ cgen(n->left, &afun);
+ }
+
+ genlist(n->list); // assign the args
+ t = n->left->type;
+
+ setmaxarg(t);
+
+ // call tempname pointer
+ if(n->left->ullman >= UINF) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, &afun);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ return;
+ }
+
+ // call pointer
+ if(n->left->op != ONAME || n->left->class != PFUNC) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, n->left);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ return;
+ }
+
+ // call direct
+ n->left->method = 1;
+ ginscall(n->left, proc);
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = return value from call.
+ */
+void
+cgen_callret(Node *n, Node *res)
+{
+ Node nod;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(t->etype == TPTR32 || t->etype == TPTR64)
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_callret: nil");
+
+ memset(&nod, 0, sizeof(nod));
+ nod.op = OINDREG;
+ nod.val.u.reg = D_SP;
+ nod.addable = 1;
+
+ nod.xoffset = fp->width;
+ nod.type = fp->type;
+ cgen_as(res, &nod);
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = &return value from call.
+ */
+void
+cgen_aret(Node *n, Node *res)
+{
+ Node nod1, nod2;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(isptr[t->etype])
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_aret: nil");
+
+ memset(&nod1, 0, sizeof(nod1));
+ nod1.op = OINDREG;
+ nod1.val.u.reg = D_SP;
+ nod1.addable = 1;
+
+ nod1.xoffset = fp->width;
+ nod1.type = fp->type;
+
+ if(res->op != OREGISTER) {
+ regalloc(&nod2, types[tptr], res);
+ gins(ALEAL, &nod1, &nod2);
+ gins(AMOVL, &nod2, res);
+ regfree(&nod2);
+ } else
+ gins(ALEAL, &nod1, res);
+}
+
+/*
+ * generate return.
+ * n->left is assignments to return values.
+ */
+void
+cgen_ret(Node *n)
+{
+ genlist(n->list); // copy out args
+ if(retpc)
+ gjmp(retpc);
+ else
+ gins(ARET, N, N);
+}
+
+/*
+ * generate += *= etc.
+ */
+void
+cgen_asop(Node *n)
+{
+ Node n1, n2, n3, n4;
+ Node *nl, *nr;
+ Prog *p1;
+ Addr addr;
+ int a;
+
+ nl = n->left;
+ nr = n->right;
+
+ if(nr->ullman >= UINF && nl->ullman >= UINF) {
+ tempname(&n1, nr->type);
+ cgen(nr, &n1);
+ n2 = *n;
+ n2.right = &n1;
+ cgen_asop(&n2);
+ goto ret;
+ }
+
+ if(!isint[nl->type->etype])
+ goto hard;
+ if(!isint[nr->type->etype])
+ goto hard;
+ if(is64(nl->type) || is64(nr->type))
+ goto hard;
+
+ switch(n->etype) {
+ case OADD:
+ if(smallintconst(nr))
+ if(mpgetfix(nr->val.u.xval) == 1) {
+ a = optoas(OINC, nl->type);
+ if(nl->addable) {
+ gins(a, N, nl);
+ goto ret;
+ }
+ if(sudoaddable(a, nl, &addr)) {
+ p1 = gins(a, N, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ }
+ break;
+
+ case OSUB:
+ if(smallintconst(nr))
+ if(mpgetfix(nr->val.u.xval) == 1) {
+ a = optoas(ODEC, nl->type);
+ if(nl->addable) {
+ gins(a, N, nl);
+ goto ret;
+ }
+ if(sudoaddable(a, nl, &addr)) {
+ p1 = gins(a, N, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ }
+ break;
+ }
+
+ switch(n->etype) {
+ case OADD:
+ case OSUB:
+ case OXOR:
+ case OAND:
+ case OOR:
+ a = optoas(n->etype, nl->type);
+ if(nl->addable) {
+ if(smallintconst(nr)) {
+ gins(a, nr, nl);
+ goto ret;
+ }
+ regalloc(&n2, nr->type, N);
+ cgen(nr, &n2);
+ gins(a, &n2, nl);
+ regfree(&n2);
+ goto ret;
+ }
+ if(nr->ullman < UINF)
+ if(sudoaddable(a, nl, &addr)) {
+ if(smallintconst(nr)) {
+ p1 = gins(a, nr, N);
+ p1->to = addr;
+ sudoclean();
+ goto ret;
+ }
+ regalloc(&n2, nr->type, N);
+ cgen(nr, &n2);
+ p1 = gins(a, &n2, N);
+ p1->to = addr;
+ regfree(&n2);
+ sudoclean();
+ goto ret;
+ }
+ }
+
+hard:
+ n2.op = 0;
+ n1.op = 0;
+ if(nr->ullman >= nl->ullman || nl->addable) {
+ mgen(nr, &n2, N);
+ nr = &n2;
+ nr = &n2;
+ } else {
+ tempname(&n2, nr->type);
+ cgen(nr, &n2);
+ nr = &n2;
+ }
+ if(!nl->addable) {
+ igen(nl, &n1, N);
+ nl = &n1;
+ }
+
+ n3 = *n;
+ n3.left = nl;
+ n3.right = nr;
+ n3.op = n->etype;
+
+ mgen(&n3, &n4, N);
+ gmove(&n4, nl);
+
+ if(n1.op)
+ regfree(&n1);
+ mfree(&n2);
+ mfree(&n4);
+
+ret:
+ ;
+}
+
+int
+samereg(Node *a, Node *b)
+{
+ if(a->op != OREGISTER)
+ return 0;
+ if(b->op != OREGISTER)
+ return 0;
+ if(a->val.u.reg != b->val.u.reg)
+ return 0;
+ return 1;
+}
+
+/*
+ * generate division.
+ * caller must set:
+ * ax = allocated AX register
+ * dx = allocated DX register
+ * generates one of:
+ * res = nl / nr
+ * res = nl % nr
+ * according to op.
+ */
+void
+dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
+{
+ int check;
+ Node n1, t1, t2, n4, nz;
+ Type *t;
+ Prog *p1, *p2, *p3;
+
+ // Have to be careful about handling
+ // most negative int divided by -1 correctly.
+ // The hardware will trap.
+ // Also the byte divide instruction needs AH,
+ // which we otherwise don't have to deal with.
+ // Easiest way to avoid for int8, int16: use int32.
+ // For int32 and int64, use explicit test.
+ // Could use int64 hw for int32.
+ t = nl->type;
+ check = 0;
+ if(issigned[t->etype]) {
+ check = 1;
+ if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1))
+ check = 0;
+ else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
+ check = 0;
+ }
+ if(t->width < 4) {
+ if(issigned[t->etype])
+ t = types[TINT32];
+ else
+ t = types[TUINT32];
+ check = 0;
+ }
+
+ tempname(&t1, t);
+ tempname(&t2, t);
+ cgen(nl, &t1);
+ cgen(nr, &t2);
+
+ if(!samereg(ax, res) && !samereg(dx, res))
+ regalloc(&n1, t, res);
+ else
+ regalloc(&n1, t, N);
+ gmove(&t2, &n1);
+ gmove(&t1, ax);
+ p3 = P;
+ if(check) {
+ nodconst(&n4, t, -1);
+ gins(optoas(OCMP, t), &n1, &n4);
+ p1 = gbranch(optoas(ONE, t), T);
+ nodconst(&n4, t, -1LL<<(t->width*8-1));
+ gins(optoas(OCMP, t), ax, &n4);
+ p2 = gbranch(optoas(ONE, t), T);
+ if(op == ODIV)
+ gmove(&n4, res);
+ if(op == OMOD) {
+ nodconst(&n4, t, 0);
+ gmove(&n4, res);
+ }
+ p3 = gbranch(AJMP, T);
+ patch(p1, pc);
+ patch(p2, pc);
+ }
+ if(!issigned[t->etype]) {
+ nodconst(&nz, t, 0);
+ gmove(&nz, dx);
+ } else
+ gins(optoas(OEXTEND, t), N, N);
+ gins(optoas(op, t), &n1, N);
+ regfree(&n1);
+
+ if(op == ODIV)
+ gmove(ax, res);
+ else
+ gmove(dx, res);
+ if(check)
+ patch(p3, pc);
+}
+
+static void
+savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
+{
+ int r;
+
+ r = reg[dr];
+ nodreg(x, types[TINT32], dr);
+
+ // save current ax and dx if they are live
+ // and not the destination
+ memset(oldx, 0, sizeof *oldx);
+ if(r > 0 && !samereg(x, res)) {
+ tempname(oldx, types[TINT32]);
+ gmove(x, oldx);
+ }
+
+ regalloc(x, t, x);
+}
+
+static void
+restx(Node *x, Node *oldx)
+{
+ regfree(x);
+
+ if(oldx->op != 0) {
+ x->type = types[TINT32];
+ gmove(oldx, x);
+ }
+}
+
+/*
+ * generate division according to op, one of:
+ * res = nl / nr
+ * res = nl % nr
+ */
+void
+cgen_div(int op, Node *nl, Node *nr, Node *res)
+{
+ Node ax, dx, oldax, olddx;
+ Type *t;
+
+ if(is64(nl->type))
+ fatal("cgen_div %T", nl->type);
+
+ if(issigned[nl->type->etype])
+ t = types[TINT32];
+ else
+ t = types[TUINT32];
+ savex(D_AX, &ax, &oldax, res, t);
+ savex(D_DX, &dx, &olddx, res, t);
+ dodiv(op, nl, nr, res, &ax, &dx);
+ restx(&dx, &olddx);
+ restx(&ax, &oldax);
+}
+
+/*
+ * generate shift according to op, one of:
+ * res = nl << nr
+ * res = nl >> nr
+ */
+void
+cgen_shift(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1, n2, nt, cx, oldcx, hi, lo;
+ int a, w;
+ Prog *p1, *p2;
+ uvlong sc;
+
+ if(nl->type->width > 4)
+ fatal("cgen_shift %T", nl->type);
+
+ w = nl->type->width * 8;
+
+ a = optoas(op, nl->type);
+
+ if(nr->op == OLITERAL) {
+ tempname(&n2, nl->type);
+ cgen(nl, &n2);
+ regalloc(&n1, nl->type, res);
+ gmove(&n2, &n1);
+ sc = mpgetfix(nr->val.u.xval);
+ if(sc >= nl->type->width*8) {
+ // large shift gets 2 shifts by width
+ gins(a, ncon(w-1), &n1);
+ gins(a, ncon(w-1), &n1);
+ } else
+ gins(a, nr, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ return;
+ }
+
+ memset(&oldcx, 0, sizeof oldcx);
+ nodreg(&cx, types[TUINT32], D_CX);
+ if(reg[D_CX] > 1 && !samereg(&cx, res)) {
+ tempname(&oldcx, types[TUINT32]);
+ gmove(&cx, &oldcx);
+ }
+
+ if(nr->type->width > 4) {
+ tempname(&nt, nr->type);
+ n1 = nt;
+ } else {
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, nr->type, &n1); // to hold the shift type in CX
+ }
+
+ if(samereg(&cx, res))
+ regalloc(&n2, nl->type, N);
+ else
+ regalloc(&n2, nl->type, res);
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &n2);
+ cgen(nr, &n1);
+ } else {
+ cgen(nr, &n1);
+ cgen(nl, &n2);
+ }
+
+ // test and fix up large shifts
+ if(nr->type->width > 4) {
+ // delayed reg alloc
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
+ split64(&nt, &lo, &hi);
+ gmove(&lo, &n1);
+ gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
+ p2 = gbranch(optoas(ONE, types[TUINT32]), T);
+ gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ patch(p2, pc);
+ } else {
+ gins(optoas(OCMP, nr->type), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ }
+ if(op == ORSH && issigned[nl->type->etype]) {
+ gins(a, ncon(w-1), &n2);
+ } else {
+ gmove(ncon(0), &n2);
+ }
+ patch(p1, pc);
+ gins(a, &n1, &n2);
+
+ if(oldcx.op != 0)
+ gmove(&oldcx, &cx);
+
+ gmove(&n2, res);
+
+ regfree(&n1);
+ regfree(&n2);
+}
+
+/*
+ * generate byte multiply:
+ * res = nl * nr
+ * no byte multiply instruction so have to do
+ * 16-bit multiply and take bottom half.
+ */
+void
+cgen_bmul(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1b, n2b, n1w, n2w;
+ Type *t;
+ int a;
+
+ if(nl->ullman >= nr->ullman) {
+ regalloc(&n1b, nl->type, res);
+ cgen(nl, &n1b);
+ regalloc(&n2b, nr->type, N);
+ cgen(nr, &n2b);
+ } else {
+ regalloc(&n2b, nr->type, N);
+ cgen(nr, &n2b);
+ regalloc(&n1b, nl->type, res);
+ cgen(nl, &n1b);
+ }
+
+ // copy from byte to short registers
+ t = types[TUINT16];
+ if(issigned[nl->type->etype])
+ t = types[TINT16];
+
+ regalloc(&n2w, t, &n2b);
+ cgen(&n2b, &n2w);
+
+ regalloc(&n1w, t, &n1b);
+ cgen(&n1b, &n1w);
+
+ a = optoas(op, t);
+ gins(a, &n2w, &n1w);
+ cgen(&n1w, &n1b);
+ cgen(&n1b, res);
+
+ regfree(&n1w);
+ regfree(&n2w);
+ regfree(&n1b);
+ regfree(&n2b);
+}
+
+static int
+regcmp(const void *va, const void *vb)
+{
+ Node *ra, *rb;
+
+ ra = (Node*)va;
+ rb = (Node*)vb;
+ return ra->local - rb->local;
+}
+
+static Prog* throwpc;
+
+// We're only going to bother inlining if we can
+// convert all the arguments to 32 bits safely. Can we?
+static int
+fix64(NodeList *nn, int n)
+{
+ NodeList *l;
+ Node *r;
+ int i;
+
+ l = nn;
+ for(i=0; i<n; i++) {
+ r = l->n->right;
+ if(is64(r->type) && !smallintconst(r)) {
+ if(r->op == OCONV)
+ r = r->left;
+ if(is64(r->type))
+ return 0;
+ }
+ l = l->next;
+ }
+ return 1;
+}
+
+void
+getargs(NodeList *nn, Node *reg, int n)
+{
+ NodeList *l;
+ Node *r;
+ int i;
+
+ throwpc = nil;
+
+ l = nn;
+ for(i=0; i<n; i++) {
+ r = l->n->right;
+ if(is64(r->type)) {
+ if(r->op == OCONV)
+ r = r->left;
+ else if(smallintconst(r))
+ r->type = types[TUINT32];
+ if(is64(r->type))
+ fatal("getargs");
+ }
+ if(!smallintconst(r) && !isslice(r->type)) {
+ if(i < 3) // AX CX DX
+ nodreg(reg+i, r->type, D_AX+i);
+ else
+ reg[i].op = OXXX;
+ regalloc(reg+i, r->type, reg+i);
+ cgen(r, reg+i);
+ } else
+ reg[i] = *r;
+ if(reg[i].local != 0)
+ yyerror("local used");
+ reg[i].local = l->n->left->xoffset;
+ l = l->next;
+ }
+ qsort((void*)reg, n, sizeof(*reg), regcmp);
+ for(i=0; i<n; i++)
+ reg[i].local = 0;
+}
+
+void
+cmpandthrow(Node *nl, Node *nr)
+{
+ vlong cl;
+ Prog *p1;
+ int op;
+ Node *c, n1;
+ Type *t;
+
+ op = OLE;
+ if(smallintconst(nl)) {
+ cl = mpgetfix(nl->val.u.xval);
+ if(cl == 0)
+ return;
+ if(smallintconst(nr))
+ return;
+ // put the constant on the right
+ op = brrev(op);
+ c = nl;
+ nl = nr;
+ nr = c;
+ }
+
+ // Arguments are known not to be 64-bit,
+ // but they might be smaller than 32 bits.
+ // Check if we need to use a temporary.
+ // At least one of the arguments is 32 bits
+ // (the len or cap) so one temporary suffices.
+ n1.op = OXXX;
+ t = types[TUINT32];
+ if(nl->type->width != t->width) {
+ regalloc(&n1, t, nl);
+ gmove(nl, &n1);
+ nl = &n1;
+ } else if(nr->type->width != t->width) {
+ regalloc(&n1, t, nr);
+ gmove(nr, &n1);
+ nr = &n1;
+ }
+ gins(optoas(OCMP, t), nl, nr);
+ if(n1.op != OXXX)
+ regfree(&n1);
+ if(throwpc == nil) {
+ p1 = gbranch(optoas(op, t), T);
+ throwpc = pc;
+ ginscall(panicslice, 0);
+ patch(p1, pc);
+ } else {
+ op = brcom(op);
+ p1 = gbranch(optoas(op, t), T);
+ patch(p1, throwpc);
+ }
+}
+
+int
+sleasy(Node *n)
+{
+ if(n->op != ONAME)
+ return 0;
+ if(!n->addable)
+ return 0;
+ return 1;
+}
+
+// generate inline code for
+// slicearray
+// sliceslice
+// arraytoslice
+int
+cgen_inline(Node *n, Node *res)
+{
+ Node nodes[5];
+ Node n1, n2, nres, ntemp;
+ vlong v;
+ int i, narg, nochk;
+
+ if(n->op != OCALLFUNC)
+ goto no;
+ if(!n->left->addable)
+ goto no;
+ if(n->left->sym == S)
+ goto no;
+ if(n->left->sym->pkg != runtimepkg)
+ goto no;
+ if(strcmp(n->left->sym->name, "slicearray") == 0)
+ goto slicearray;
+ if(strcmp(n->left->sym->name, "sliceslice") == 0) {
+ narg = 4;
+ goto sliceslice;
+ }
+ if(strcmp(n->left->sym->name, "sliceslice1") == 0) {
+ narg = 3;
+ goto sliceslice;
+ }
+ goto no;
+
+slicearray:
+ if(!sleasy(res))
+ goto no;
+ if(!fix64(n->list, 5))
+ goto no;
+ getargs(n->list, nodes, 5);
+
+ // if(hb[3] > nel[1]) goto throw
+ cmpandthrow(&nodes[3], &nodes[1]);
+
+ // if(lb[2] > hb[3]) goto throw
+ cmpandthrow(&nodes[2], &nodes[3]);
+
+ // len = hb[3] - lb[2] (destroys hb)
+ n2 = *res;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) {
+ v = mpgetfix(nodes[3].val.u.xval) -
+ mpgetfix(nodes[2].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[3]);
+ gmove(&nodes[3], &n1);
+ if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+
+ // cap = nel[1] - lb[2] (destroys nel)
+ n2 = *res;
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) {
+ v = mpgetfix(nodes[1].val.u.xval) -
+ mpgetfix(nodes[2].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[1]);
+ gmove(&nodes[1], &n1);
+ if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+
+ // if slice could be too big, dereference to
+ // catch nil array pointer.
+ if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) {
+ n2 = nodes[0];
+ n2.xoffset = 0;
+ n2.op = OINDREG;
+ n2.type = types[TUINT8];
+ gins(ATESTB, nodintconst(0), &n2);
+ }
+
+ // ary = old[0] + (lb[2] * width[4]) (destroys old)
+ n2 = *res;
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+
+ if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) {
+ v = mpgetfix(nodes[2].val.u.xval) *
+ mpgetfix(nodes[4].val.u.xval);
+ if(v != 0) {
+ nodconst(&n1, types[tptr], v);
+ gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
+ }
+ } else {
+ regalloc(&n1, types[tptr], &nodes[2]);
+ gmove(&nodes[2], &n1);
+ if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1)
+ gins(optoas(OMUL, types[tptr]), &nodes[4], &n1);
+ gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
+ regfree(&n1);
+ }
+ gins(optoas(OAS, types[tptr]), &nodes[0], &n2);
+
+ for(i=0; i<5; i++) {
+ if(nodes[i].op == OREGISTER)
+ regfree(&nodes[i]);
+ }
+ return 1;
+
+sliceslice:
+ if(!fix64(n->list, narg))
+ goto no;
+ nochk = n->etype; // skip bounds checking
+ ntemp.op = OXXX;
+ if(!sleasy(n->list->n->right)) {
+ Node *n0;
+
+ n0 = n->list->n->right;
+ tempname(&ntemp, res->type);
+ cgen(n0, &ntemp);
+ n->list->n->right = &ntemp;
+ getargs(n->list, nodes, narg);
+ n->list->n->right = n0;
+ } else
+ getargs(n->list, nodes, narg);
+
+ nres = *res; // result
+ if(!sleasy(res)) {
+ if(ntemp.op == OXXX)
+ tempname(&ntemp, res->type);
+ nres = ntemp;
+ }
+
+ if(narg == 3) { // old[lb:]
+ // move width to where it would be for old[lb:hb]
+ nodes[3] = nodes[2];
+ nodes[2].op = OXXX;
+
+ // if(lb[1] > old.nel[0]) goto throw;
+ n2 = nodes[0];
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+ if(!nochk)
+ cmpandthrow(&nodes[1], &n2);
+
+ // ret.nel = old.nel[0]-lb[1];
+ n2 = nodes[0];
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ regalloc(&n1, types[TUINT32], N);
+ gins(optoas(OAS, types[TUINT32]), &n2, &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+
+ n2 = nres;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ } else { // old[lb:hb]
+ n2 = nodes[0];
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+ if (!nochk) {
+ // if(hb[2] > old.cap[0]) goto throw;
+ cmpandthrow(&nodes[2], &n2);
+ // if(lb[1] > hb[2]) goto throw;
+ cmpandthrow(&nodes[1], &nodes[2]);
+ }
+
+ // ret.len = hb[2]-lb[1]; (destroys hb[2])
+ n2 = nres;
+ n2.xoffset += Array_nel;
+ n2.type = types[TUINT32];
+
+ if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) {
+ v = mpgetfix(nodes[2].val.u.xval) -
+ mpgetfix(nodes[1].val.u.xval);
+ nodconst(&n1, types[TUINT32], v);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ } else {
+ regalloc(&n1, types[TUINT32], &nodes[2]);
+ gmove(&nodes[2], &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+ }
+ }
+
+ // ret.cap = old.cap[0]-lb[1]; (uses hb[2])
+ n2 = nodes[0];
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+
+ regalloc(&n1, types[TUINT32], &nodes[2]);
+ gins(optoas(OAS, types[TUINT32]), &n2, &n1);
+ if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
+ gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
+
+ n2 = nres;
+ n2.xoffset += Array_cap;
+ n2.type = types[TUINT32];
+ gins(optoas(OAS, types[TUINT32]), &n1, &n2);
+ regfree(&n1);
+
+ // ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1])
+ n2 = nodes[0];
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+
+ regalloc(&n1, types[tptr], &nodes[1]);
+ if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) {
+ gins(optoas(OAS, types[tptr]), &n2, &n1);
+ v = mpgetfix(nodes[1].val.u.xval) *
+ mpgetfix(nodes[3].val.u.xval);
+ if(v != 0) {
+ nodconst(&n2, types[tptr], v);
+ gins(optoas(OADD, types[tptr]), &n2, &n1);
+ }
+ } else {
+ gmove(&nodes[1], &n1);
+ if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1)
+ gins(optoas(OMUL, types[tptr]), &nodes[3], &n1);
+ gins(optoas(OADD, types[tptr]), &n2, &n1);
+ }
+
+ n2 = nres;
+ n2.xoffset += Array_array;
+ n2.type = types[tptr];
+ gins(optoas(OAS, types[tptr]), &n1, &n2);
+ regfree(&n1);
+
+ for(i=0; i<4; i++) {
+ if(nodes[i].op == OREGISTER)
+ regfree(&nodes[i]);
+ }
+
+ if(!sleasy(res)) {
+ cgen(&nres, res);
+ }
+ return 1;
+
+no:
+ return 0;
+}
diff --git a/src/cmd/8g/gobj.c b/src/cmd/8g/gobj.c
new file mode 100644
index 000000000..31c42a3f2
--- /dev/null
+++ b/src/cmd/8g/gobj.c
@@ -0,0 +1,651 @@
+// Derived from Inferno utils/8c/swt.c
+// http://code.google.com/p/inferno-os/source/browse/utils/8c/swt.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+
+void
+zname(Biobuf *b, Sym *s, int t)
+{
+ Bputc(b, ANAME); /* as */
+ Bputc(b, ANAME>>8); /* as */
+ Bputc(b, t); /* type */
+ Bputc(b, s->sym); /* sym */
+
+ Bputname(b, s);
+}
+
+void
+zfile(Biobuf *b, char *p, int n)
+{
+ Bputc(b, ANAME);
+ Bputc(b, ANAME>>8);
+ Bputc(b, D_FILE);
+ Bputc(b, 1);
+ Bputc(b, '<');
+ Bwrite(b, p, n);
+ Bputc(b, 0);
+}
+
+void
+zhist(Biobuf *b, int line, vlong offset)
+{
+ Addr a;
+
+ Bputc(b, AHISTORY);
+ Bputc(b, AHISTORY>>8);
+ Bputc(b, line);
+ Bputc(b, line>>8);
+ Bputc(b, line>>16);
+ Bputc(b, line>>24);
+ zaddr(b, &zprog.from, 0, 0);
+ a = zprog.to;
+ if(offset != 0) {
+ a.offset = offset;
+ a.type = D_CONST;
+ }
+ zaddr(b, &a, 0, 0);
+}
+
+void
+zaddr(Biobuf *b, Addr *a, int s, int gotype)
+{
+ int32 l;
+ uint64 e;
+ int i, t;
+ char *n;
+
+ t = 0;
+ if(a->index != D_NONE || a->scale != 0)
+ t |= T_INDEX;
+ if(s != 0)
+ t |= T_SYM;
+ if(gotype != 0)
+ t |= T_GOTYPE;
+
+ switch(a->type) {
+
+ case D_BRANCH:
+ if(a->branch == nil)
+ fatal("unpatched branch");
+ a->offset = a->branch->loc;
+
+ default:
+ t |= T_TYPE;
+
+ case D_NONE:
+ if(a->offset != 0)
+ t |= T_OFFSET;
+ if(a->offset2 != 0)
+ t |= T_OFFSET2;
+ break;
+ case D_FCONST:
+ t |= T_FCONST;
+ break;
+ case D_SCONST:
+ t |= T_SCONST;
+ break;
+ }
+ Bputc(b, t);
+
+ if(t & T_INDEX) { /* implies index, scale */
+ Bputc(b, a->index);
+ Bputc(b, a->scale);
+ }
+ if(t & T_OFFSET) { /* implies offset */
+ l = a->offset;
+ Bputc(b, l);
+ Bputc(b, l>>8);
+ Bputc(b, l>>16);
+ Bputc(b, l>>24);
+ }
+ if(t & T_OFFSET2) { /* implies offset */
+ l = a->offset2;
+ Bputc(b, l);
+ Bputc(b, l>>8);
+ Bputc(b, l>>16);
+ Bputc(b, l>>24);
+ }
+ if(t & T_SYM) /* implies sym */
+ Bputc(b, s);
+ if(t & T_FCONST) {
+ ieeedtod(&e, a->dval);
+ l = e;
+ Bputc(b, l);
+ Bputc(b, l>>8);
+ Bputc(b, l>>16);
+ Bputc(b, l>>24);
+ l = e >> 32;
+ Bputc(b, l);
+ Bputc(b, l>>8);
+ Bputc(b, l>>16);
+ Bputc(b, l>>24);
+ return;
+ }
+ if(t & T_SCONST) {
+ n = a->sval;
+ for(i=0; i<NSNAME; i++) {
+ Bputc(b, *n);
+ n++;
+ }
+ return;
+ }
+ if(t & T_TYPE)
+ Bputc(b, a->type);
+ if(t & T_GOTYPE)
+ Bputc(b, gotype);
+}
+
+static struct {
+ struct { Sym *sym; short type; } h[NSYM];
+ int sym;
+} z;
+
+static void
+zsymreset(void)
+{
+ for(z.sym=0; z.sym<NSYM; z.sym++) {
+ z.h[z.sym].sym = S;
+ z.h[z.sym].type = 0;
+ }
+ z.sym = 1;
+}
+
+static int
+zsym(Sym *s, int t, int *new)
+{
+ int i;
+
+ *new = 0;
+ if(s == S)
+ return 0;
+
+ i = s->sym;
+ if(i < 0 || i >= NSYM)
+ i = 0;
+ if(z.h[i].type == t && z.h[i].sym == s)
+ return i;
+ i = z.sym;
+ s->sym = i;
+ zname(bout, s, t);
+ z.h[i].sym = s;
+ z.h[i].type = t;
+ if(++z.sym >= NSYM)
+ z.sym = 1;
+ *new = 1;
+ return i;
+}
+
+static int
+zsymaddr(Addr *a, int *new)
+{
+ int t;
+
+ t = a->type;
+ if(t == D_ADDR)
+ t = a->index;
+ return zsym(a->sym, t, new);
+}
+
+void
+dumpfuncs(void)
+{
+ Plist *pl;
+ int sf, st, gf, gt, new;
+ Sym *s;
+ Prog *p;
+
+ zsymreset();
+
+ // fix up pc
+ pcloc = 0;
+ for(pl=plist; pl!=nil; pl=pl->link) {
+ if(isblank(pl->name))
+ continue;
+ for(p=pl->firstpc; p!=P; p=p->link) {
+ p->loc = pcloc;
+ if(p->as != ADATA && p->as != AGLOBL)
+ pcloc++;
+ }
+ }
+
+ // put out functions
+ for(pl=plist; pl!=nil; pl=pl->link) {
+ if(isblank(pl->name))
+ continue;
+
+ if(debug['S']) {
+ s = S;
+ if(pl->name != N)
+ s = pl->name->sym;
+ print("\n--- prog list \"%S\" ---\n", s);
+ for(p=pl->firstpc; p!=P; p=p->link)
+ print("%P\n", p);
+ }
+
+ for(p=pl->firstpc; p!=P; p=p->link) {
+ for(;;) {
+ sf = zsymaddr(&p->from, &new);
+ gf = zsym(p->from.gotype, D_EXTERN, &new);
+ if(new && sf == gf)
+ continue;
+ st = zsymaddr(&p->to, &new);
+ if(new && (st == sf || st == gf))
+ continue;
+ gt = zsym(p->to.gotype, D_EXTERN, &new);
+ if(new && (gt == sf || gt == gf || gt == st))
+ continue;
+ break;
+ }
+
+ Bputc(bout, p->as);
+ Bputc(bout, p->as>>8);
+ Bputc(bout, p->lineno);
+ Bputc(bout, p->lineno>>8);
+ Bputc(bout, p->lineno>>16);
+ Bputc(bout, p->lineno>>24);
+ zaddr(bout, &p->from, sf, gf);
+ zaddr(bout, &p->to, st, gt);
+ }
+ }
+}
+
+/* deferred DATA output */
+static Prog *strdat;
+static Prog *estrdat;
+static int gflag;
+static Prog *savepc;
+
+void
+data(void)
+{
+ gflag = debug['g'];
+ debug['g'] = 0;
+
+ if(estrdat == nil) {
+ strdat = mal(sizeof(*pc));
+ clearp(strdat);
+ estrdat = strdat;
+ }
+ if(savepc)
+ fatal("data phase error");
+ savepc = pc;
+ pc = estrdat;
+}
+
+void
+text(void)
+{
+ if(!savepc)
+ fatal("text phase error");
+ debug['g'] = gflag;
+ estrdat = pc;
+ pc = savepc;
+ savepc = nil;
+}
+
+void
+dumpdata(void)
+{
+ Prog *p;
+
+ if(estrdat == nil)
+ return;
+ *pc = *strdat;
+ if(gflag)
+ for(p=pc; p!=estrdat; p=p->link)
+ print("%P\n", p);
+ pc = estrdat;
+}
+
+int
+dsname(Sym *s, int off, char *t, int n)
+{
+ Prog *p;
+
+ p = gins(ADATA, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.offset = off;
+ p->from.scale = n;
+ p->from.sym = s;
+
+ p->to.type = D_SCONST;
+ p->to.index = D_NONE;
+ memmove(p->to.sval, t, n);
+ return off + n;
+}
+
+/*
+ * make a refer to the data s, s+len
+ * emitting DATA if needed.
+ */
+void
+datastring(char *s, int len, Addr *a)
+{
+ Sym *sym;
+
+ sym = stringsym(s, len);
+ a->type = D_EXTERN;
+ a->sym = sym;
+ a->offset = widthptr+4; // skip header
+ a->etype = TINT32;
+}
+
+/*
+ * make a refer to the string sval,
+ * emitting DATA if needed.
+ */
+void
+datagostring(Strlit *sval, Addr *a)
+{
+ Sym *sym;
+
+ sym = stringsym(sval->s, sval->len);
+ a->type = D_EXTERN;
+ a->sym = sym;
+ a->offset = 0; // header
+ a->etype = TINT32;
+}
+
+void
+gdata(Node *nam, Node *nr, int wid)
+{
+ Prog *p;
+ vlong v;
+
+ if(wid == 8 && is64(nr->type)) {
+ v = mpgetfix(nr->val.u.xval);
+ p = gins(ADATA, nam, nodintconst(v));
+ p->from.scale = 4;
+ p = gins(ADATA, nam, nodintconst(v>>32));
+ p->from.scale = 4;
+ p->from.offset += 4;
+ return;
+ }
+ p = gins(ADATA, nam, nr);
+ p->from.scale = wid;
+}
+
+void
+gdatacomplex(Node *nam, Mpcplx *cval)
+{
+ Prog *p;
+ int w;
+
+ w = cplxsubtype(nam->type->etype);
+ w = types[w]->width;
+
+ p = gins(ADATA, nam, N);
+ p->from.scale = w;
+ p->to.type = D_FCONST;
+ p->to.dval = mpgetflt(&cval->real);
+
+ p = gins(ADATA, nam, N);
+ p->from.scale = w;
+ p->from.offset += w;
+ p->to.type = D_FCONST;
+ p->to.dval = mpgetflt(&cval->imag);
+}
+
+void
+gdatastring(Node *nam, Strlit *sval)
+{
+ Prog *p;
+ Node nod1;
+
+ p = gins(ADATA, nam, N);
+ datastring(sval->s, sval->len, &p->to);
+ p->from.scale = types[tptr]->width;
+ p->to.index = p->to.type;
+ p->to.type = D_ADDR;
+//print("%P\n", p);
+
+ nodconst(&nod1, types[TINT32], sval->len);
+ p = gins(ADATA, nam, &nod1);
+ p->from.scale = types[TINT32]->width;
+ p->from.offset += types[tptr]->width;
+}
+
+int
+dstringptr(Sym *s, int off, char *str)
+{
+ Prog *p;
+
+ off = rnd(off, widthptr);
+ p = gins(ADATA, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.sym = s;
+ p->from.offset = off;
+ p->from.scale = widthptr;
+
+ datastring(str, strlen(str)+1, &p->to);
+ p->to.index = p->to.type;
+ p->to.type = D_ADDR;
+ p->to.etype = TINT32;
+ off += widthptr;
+
+ return off;
+}
+
+int
+dgostrlitptr(Sym *s, int off, Strlit *lit)
+{
+ Prog *p;
+
+ if(lit == nil)
+ return duintptr(s, off, 0);
+
+ off = rnd(off, widthptr);
+ p = gins(ADATA, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.sym = s;
+ p->from.offset = off;
+ p->from.scale = widthptr;
+ datagostring(lit, &p->to);
+ p->to.index = p->to.type;
+ p->to.type = D_ADDR;
+ p->to.etype = TINT32;
+ off += widthptr;
+
+ return off;
+}
+
+int
+dgostringptr(Sym *s, int off, char *str)
+{
+ int n;
+ Strlit *lit;
+
+ if(str == nil)
+ return duintptr(s, off, 0);
+
+ n = strlen(str);
+ lit = mal(sizeof *lit + n);
+ strcpy(lit->s, str);
+ lit->len = n;
+ return dgostrlitptr(s, off, lit);
+}
+
+
+int
+duintxx(Sym *s, int off, uint64 v, int wid)
+{
+ Prog *p;
+
+ off = rnd(off, wid);
+
+ p = gins(ADATA, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.sym = s;
+ p->from.offset = off;
+ p->from.scale = wid;
+ p->to.type = D_CONST;
+ p->to.index = D_NONE;
+ p->to.offset = v;
+ off += wid;
+
+ return off;
+}
+
+int
+dsymptr(Sym *s, int off, Sym *x, int xoff)
+{
+ Prog *p;
+
+ off = rnd(off, widthptr);
+
+ p = gins(ADATA, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.sym = s;
+ p->from.offset = off;
+ p->from.scale = widthptr;
+ p->to.type = D_ADDR;
+ p->to.index = D_EXTERN;
+ p->to.sym = x;
+ p->to.offset = xoff;
+ off += widthptr;
+
+ return off;
+}
+
+void
+genembedtramp(Type *rcvr, Type *method, Sym *newnam, int iface)
+{
+ Sym *e;
+ int c, d, o, mov, add, loaded;
+ Prog *p;
+ Type *f;
+
+ e = method->sym;
+ for(d=0; d<nelem(dotlist); d++) {
+ c = adddot1(e, rcvr, d, nil, 0);
+ if(c == 1)
+ goto out;
+ }
+ fatal("genembedtramp %T.%S", rcvr, method->sym);
+
+out:
+ newplist()->name = newname(newnam);
+
+ //TEXT main·S_test2(SB),7,$0
+ p = pc;
+ gins(ATEXT, N, N);
+ p->from.type = D_EXTERN;
+ p->from.sym = newnam;
+ p->to.type = D_CONST;
+ p->to.offset = 0;
+ p->from.scale = 7;
+//print("1. %P\n", p);
+
+ mov = AMOVL;
+ add = AADDL;
+
+ loaded = 0;
+ o = 0;
+ for(c=d-1; c>=0; c--) {
+ f = dotlist[c].field;
+ o += f->width;
+ if(!isptr[f->type->etype])
+ continue;
+ if(!loaded) {
+ loaded = 1;
+ //MOVL 4(SP), AX
+ p = pc;
+ gins(mov, N, N);
+ p->from.type = D_INDIR+D_SP;
+ p->from.offset = widthptr;
+ p->to.type = D_AX;
+//print("2. %P\n", p);
+ }
+
+ //MOVL o(AX), AX
+ p = pc;
+ gins(mov, N, N);
+ p->from.type = D_INDIR+D_AX;
+ p->from.offset = o;
+ p->to.type = D_AX;
+//print("3. %P\n", p);
+ o = 0;
+ }
+ if(o != 0) {
+ //ADDL $XX, AX
+ p = pc;
+ gins(add, N, N);
+ p->from.type = D_CONST;
+ p->from.offset = o;
+ if(loaded)
+ p->to.type = D_AX;
+ else {
+ p->to.type = D_INDIR+D_SP;
+ p->to.offset = widthptr;
+ }
+//print("4. %P\n", p);
+ }
+
+ //MOVL AX, 4(SP)
+ if(loaded) {
+ p = pc;
+ gins(mov, N, N);
+ p->from.type = D_AX;
+ p->to.type = D_INDIR+D_SP;
+ p->to.offset = widthptr;
+//print("5. %P\n", p);
+ } else {
+ // TODO(rsc): obviously this is unnecessary,
+ // but 6l has a bug, and it can't handle
+ // JMP instructions too close to the top of
+ // a new function.
+ p = pc;
+ gins(ANOP, N, N);
+ }
+
+ f = dotlist[0].field;
+ //JMP main·*Sub_test2(SB)
+ if(isptr[f->type->etype])
+ f = f->type;
+ p = pc;
+ gins(AJMP, N, N);
+ p->to.type = D_EXTERN;
+ p->to.sym = methodsym(method->sym, ptrto(f->type), 0);
+//print("6. %P\n", p);
+
+ pc->as = ARET; // overwrite AEND
+}
+
+void
+nopout(Prog *p)
+{
+ p->as = ANOP;
+}
+
diff --git a/src/cmd/8g/gsubr.c b/src/cmd/8g/gsubr.c
new file mode 100644
index 000000000..a35c81eb1
--- /dev/null
+++ b/src/cmd/8g/gsubr.c
@@ -0,0 +1,1959 @@
+// Derived from Inferno utils/8c/txt.c
+// http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+
+// TODO(rsc): Can make this bigger if we move
+// the text segment up higher in 8l for all GOOS.
+uint32 unmappedzero = 4096;
+
+#define CASE(a,b) (((a)<<16)|((b)<<0))
+
+void
+clearp(Prog *p)
+{
+ p->as = AEND;
+ p->from.type = D_NONE;
+ p->from.index = D_NONE;
+ p->to.type = D_NONE;
+ p->to.index = D_NONE;
+ p->loc = pcloc;
+ pcloc++;
+}
+
+/*
+ * generate and return proc with p->as = as,
+ * linked into program. pc is next instruction.
+ */
+Prog*
+prog(int as)
+{
+ Prog *p;
+
+ p = pc;
+ pc = mal(sizeof(*pc));
+
+ clearp(pc);
+
+ if(lineno == 0) {
+ if(debug['K'])
+ warn("prog: line 0");
+ }
+
+ p->as = as;
+ p->lineno = lineno;
+ p->link = pc;
+ return p;
+}
+
+/*
+ * generate a branch.
+ * t is ignored.
+ */
+Prog*
+gbranch(int as, Type *t)
+{
+ Prog *p;
+
+ p = prog(as);
+ p->to.type = D_BRANCH;
+ p->to.branch = P;
+ return p;
+}
+
+/*
+ * patch previous branch to jump to to.
+ */
+void
+patch(Prog *p, Prog *to)
+{
+ if(p->to.type != D_BRANCH)
+ fatal("patch: not a branch");
+ p->to.branch = to;
+ p->to.offset = to->loc;
+}
+
+Prog*
+unpatch(Prog *p)
+{
+ Prog *q;
+
+ if(p->to.type != D_BRANCH)
+ fatal("unpatch: not a branch");
+ q = p->to.branch;
+ p->to.branch = P;
+ p->to.offset = 0;
+ return q;
+}
+
+/*
+ * start a new Prog list.
+ */
+Plist*
+newplist(void)
+{
+ Plist *pl;
+
+ pl = mal(sizeof(*pl));
+ if(plist == nil)
+ plist = pl;
+ else
+ plast->link = pl;
+ plast = pl;
+
+ pc = mal(sizeof(*pc));
+ clearp(pc);
+ pl->firstpc = pc;
+
+ return pl;
+}
+
+void
+clearstk(void)
+{
+ Plist *pl;
+ Prog *p1, *p2;
+ Node sp, di, cx, con, ax;
+
+ if(plast->firstpc->to.offset <= 0)
+ return;
+
+ // reestablish context for inserting code
+ // at beginning of function.
+ pl = plast;
+ p1 = pl->firstpc;
+ p2 = p1->link;
+ pc = mal(sizeof(*pc));
+ clearp(pc);
+ p1->link = pc;
+
+ // zero stack frame
+ nodreg(&sp, types[tptr], D_SP);
+ nodreg(&di, types[tptr], D_DI);
+ nodreg(&cx, types[TUINT32], D_CX);
+ nodconst(&con, types[TUINT32], p1->to.offset / widthptr);
+ gins(ACLD, N, N);
+ gins(AMOVL, &sp, &di);
+ gins(AMOVL, &con, &cx);
+ nodconst(&con, types[TUINT32], 0);
+ nodreg(&ax, types[TUINT32], D_AX);
+ gins(AMOVL, &con, &ax);
+ gins(AREP, N, N);
+ gins(ASTOSL, N, N);
+
+ // continue with original code.
+ gins(ANOP, N, N)->link = p2;
+ pc = P;
+}
+
+void
+gused(Node *n)
+{
+ gins(ANOP, n, N); // used
+}
+
+Prog*
+gjmp(Prog *to)
+{
+ Prog *p;
+
+ p = gbranch(AJMP, T);
+ if(to != P)
+ patch(p, to);
+ return p;
+}
+
+void
+ggloblnod(Node *nam, int32 width)
+{
+ Prog *p;
+
+ p = gins(AGLOBL, nam, N);
+ p->lineno = nam->lineno;
+ p->to.sym = S;
+ p->to.type = D_CONST;
+ p->to.offset = width;
+ if(nam->readonly)
+ p->from.scale = RODATA;
+}
+
+void
+ggloblsym(Sym *s, int32 width, int dupok)
+{
+ Prog *p;
+
+ p = gins(AGLOBL, N, N);
+ p->from.type = D_EXTERN;
+ p->from.index = D_NONE;
+ p->from.sym = s;
+ p->to.type = D_CONST;
+ p->to.index = D_NONE;
+ p->to.offset = width;
+ if(dupok)
+ p->from.scale = DUPOK;
+ p->from.scale |= RODATA;
+}
+
+int
+isfat(Type *t)
+{
+ if(t != T)
+ switch(t->etype) {
+ case TSTRUCT:
+ case TARRAY:
+ case TSTRING:
+ case TINTER: // maybe remove later
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * naddr of func generates code for address of func.
+ * if using opcode that can take address implicitly,
+ * call afunclit to fix up the argument.
+ */
+void
+afunclit(Addr *a)
+{
+ if(a->type == D_ADDR && a->index == D_EXTERN) {
+ a->type = D_EXTERN;
+ a->index = D_NONE;
+ }
+}
+
+/*
+ * return Axxx for Oxxx on type t.
+ */
+int
+optoas(int op, Type *t)
+{
+ int a;
+
+ if(t == T)
+ fatal("optoas: t is nil");
+
+ a = AGOK;
+ switch(CASE(op, simtype[t->etype])) {
+ default:
+ fatal("optoas: no entry %O-%T", op, t);
+ break;
+
+ case CASE(OADDR, TPTR32):
+ a = ALEAL;
+ break;
+
+ case CASE(OEQ, TBOOL):
+ case CASE(OEQ, TINT8):
+ case CASE(OEQ, TUINT8):
+ case CASE(OEQ, TINT16):
+ case CASE(OEQ, TUINT16):
+ case CASE(OEQ, TINT32):
+ case CASE(OEQ, TUINT32):
+ case CASE(OEQ, TINT64):
+ case CASE(OEQ, TUINT64):
+ case CASE(OEQ, TPTR32):
+ case CASE(OEQ, TPTR64):
+ case CASE(OEQ, TFLOAT32):
+ case CASE(OEQ, TFLOAT64):
+ a = AJEQ;
+ break;
+
+ case CASE(ONE, TBOOL):
+ case CASE(ONE, TINT8):
+ case CASE(ONE, TUINT8):
+ case CASE(ONE, TINT16):
+ case CASE(ONE, TUINT16):
+ case CASE(ONE, TINT32):
+ case CASE(ONE, TUINT32):
+ case CASE(ONE, TINT64):
+ case CASE(ONE, TUINT64):
+ case CASE(ONE, TPTR32):
+ case CASE(ONE, TPTR64):
+ case CASE(ONE, TFLOAT32):
+ case CASE(ONE, TFLOAT64):
+ a = AJNE;
+ break;
+
+ case CASE(OLT, TINT8):
+ case CASE(OLT, TINT16):
+ case CASE(OLT, TINT32):
+ case CASE(OLT, TINT64):
+ a = AJLT;
+ break;
+
+ case CASE(OLT, TUINT8):
+ case CASE(OLT, TUINT16):
+ case CASE(OLT, TUINT32):
+ case CASE(OLT, TUINT64):
+ a = AJCS;
+ break;
+
+ case CASE(OLE, TINT8):
+ case CASE(OLE, TINT16):
+ case CASE(OLE, TINT32):
+ case CASE(OLE, TINT64):
+ a = AJLE;
+ break;
+
+ case CASE(OLE, TUINT8):
+ case CASE(OLE, TUINT16):
+ case CASE(OLE, TUINT32):
+ case CASE(OLE, TUINT64):
+ a = AJLS;
+ break;
+
+ case CASE(OGT, TINT8):
+ case CASE(OGT, TINT16):
+ case CASE(OGT, TINT32):
+ case CASE(OGT, TINT64):
+ a = AJGT;
+ break;
+
+ case CASE(OGT, TUINT8):
+ case CASE(OGT, TUINT16):
+ case CASE(OGT, TUINT32):
+ case CASE(OGT, TUINT64):
+ case CASE(OLT, TFLOAT32):
+ case CASE(OLT, TFLOAT64):
+ a = AJHI;
+ break;
+
+ case CASE(OGE, TINT8):
+ case CASE(OGE, TINT16):
+ case CASE(OGE, TINT32):
+ case CASE(OGE, TINT64):
+ a = AJGE;
+ break;
+
+ case CASE(OGE, TUINT8):
+ case CASE(OGE, TUINT16):
+ case CASE(OGE, TUINT32):
+ case CASE(OGE, TUINT64):
+ case CASE(OLE, TFLOAT32):
+ case CASE(OLE, TFLOAT64):
+ a = AJCC;
+ break;
+
+ case CASE(OCMP, TBOOL):
+ case CASE(OCMP, TINT8):
+ case CASE(OCMP, TUINT8):
+ a = ACMPB;
+ break;
+
+ case CASE(OCMP, TINT16):
+ case CASE(OCMP, TUINT16):
+ a = ACMPW;
+ break;
+
+ case CASE(OCMP, TINT32):
+ case CASE(OCMP, TUINT32):
+ case CASE(OCMP, TPTR32):
+ a = ACMPL;
+ break;
+
+ case CASE(OAS, TBOOL):
+ case CASE(OAS, TINT8):
+ case CASE(OAS, TUINT8):
+ a = AMOVB;
+ break;
+
+ case CASE(OAS, TINT16):
+ case CASE(OAS, TUINT16):
+ a = AMOVW;
+ break;
+
+ case CASE(OAS, TINT32):
+ case CASE(OAS, TUINT32):
+ case CASE(OAS, TPTR32):
+ a = AMOVL;
+ break;
+
+ case CASE(OADD, TINT8):
+ case CASE(OADD, TUINT8):
+ a = AADDB;
+ break;
+
+ case CASE(OADD, TINT16):
+ case CASE(OADD, TUINT16):
+ a = AADDW;
+ break;
+
+ case CASE(OADD, TINT32):
+ case CASE(OADD, TUINT32):
+ case CASE(OADD, TPTR32):
+ a = AADDL;
+ break;
+
+ case CASE(OSUB, TINT8):
+ case CASE(OSUB, TUINT8):
+ a = ASUBB;
+ break;
+
+ case CASE(OSUB, TINT16):
+ case CASE(OSUB, TUINT16):
+ a = ASUBW;
+ break;
+
+ case CASE(OSUB, TINT32):
+ case CASE(OSUB, TUINT32):
+ case CASE(OSUB, TPTR32):
+ a = ASUBL;
+ break;
+
+ case CASE(OINC, TINT8):
+ case CASE(OINC, TUINT8):
+ a = AINCB;
+ break;
+
+ case CASE(OINC, TINT16):
+ case CASE(OINC, TUINT16):
+ a = AINCW;
+ break;
+
+ case CASE(OINC, TINT32):
+ case CASE(OINC, TUINT32):
+ case CASE(OINC, TPTR32):
+ a = AINCL;
+ break;
+
+ case CASE(ODEC, TINT8):
+ case CASE(ODEC, TUINT8):
+ a = ADECB;
+ break;
+
+ case CASE(ODEC, TINT16):
+ case CASE(ODEC, TUINT16):
+ a = ADECW;
+ break;
+
+ case CASE(ODEC, TINT32):
+ case CASE(ODEC, TUINT32):
+ case CASE(ODEC, TPTR32):
+ a = ADECL;
+ break;
+
+ case CASE(OCOM, TINT8):
+ case CASE(OCOM, TUINT8):
+ a = ANOTB;
+ break;
+
+ case CASE(OCOM, TINT16):
+ case CASE(OCOM, TUINT16):
+ a = ANOTW;
+ break;
+
+ case CASE(OCOM, TINT32):
+ case CASE(OCOM, TUINT32):
+ case CASE(OCOM, TPTR32):
+ a = ANOTL;
+ break;
+
+ case CASE(OMINUS, TINT8):
+ case CASE(OMINUS, TUINT8):
+ a = ANEGB;
+ break;
+
+ case CASE(OMINUS, TINT16):
+ case CASE(OMINUS, TUINT16):
+ a = ANEGW;
+ break;
+
+ case CASE(OMINUS, TINT32):
+ case CASE(OMINUS, TUINT32):
+ case CASE(OMINUS, TPTR32):
+ a = ANEGL;
+ break;
+
+ case CASE(OAND, TINT8):
+ case CASE(OAND, TUINT8):
+ a = AANDB;
+ break;
+
+ case CASE(OAND, TINT16):
+ case CASE(OAND, TUINT16):
+ a = AANDW;
+ break;
+
+ case CASE(OAND, TINT32):
+ case CASE(OAND, TUINT32):
+ case CASE(OAND, TPTR32):
+ a = AANDL;
+ break;
+
+ case CASE(OOR, TINT8):
+ case CASE(OOR, TUINT8):
+ a = AORB;
+ break;
+
+ case CASE(OOR, TINT16):
+ case CASE(OOR, TUINT16):
+ a = AORW;
+ break;
+
+ case CASE(OOR, TINT32):
+ case CASE(OOR, TUINT32):
+ case CASE(OOR, TPTR32):
+ a = AORL;
+ break;
+
+ case CASE(OXOR, TINT8):
+ case CASE(OXOR, TUINT8):
+ a = AXORB;
+ break;
+
+ case CASE(OXOR, TINT16):
+ case CASE(OXOR, TUINT16):
+ a = AXORW;
+ break;
+
+ case CASE(OXOR, TINT32):
+ case CASE(OXOR, TUINT32):
+ case CASE(OXOR, TPTR32):
+ a = AXORL;
+ break;
+
+ case CASE(OLSH, TINT8):
+ case CASE(OLSH, TUINT8):
+ a = ASHLB;
+ break;
+
+ case CASE(OLSH, TINT16):
+ case CASE(OLSH, TUINT16):
+ a = ASHLW;
+ break;
+
+ case CASE(OLSH, TINT32):
+ case CASE(OLSH, TUINT32):
+ case CASE(OLSH, TPTR32):
+ a = ASHLL;
+ break;
+
+ case CASE(ORSH, TUINT8):
+ a = ASHRB;
+ break;
+
+ case CASE(ORSH, TUINT16):
+ a = ASHRW;
+ break;
+
+ case CASE(ORSH, TUINT32):
+ case CASE(ORSH, TPTR32):
+ a = ASHRL;
+ break;
+
+ case CASE(ORSH, TINT8):
+ a = ASARB;
+ break;
+
+ case CASE(ORSH, TINT16):
+ a = ASARW;
+ break;
+
+ case CASE(ORSH, TINT32):
+ a = ASARL;
+ break;
+
+ case CASE(OMUL, TINT8):
+ case CASE(OMUL, TUINT8):
+ a = AIMULB;
+ break;
+
+ case CASE(OMUL, TINT16):
+ case CASE(OMUL, TUINT16):
+ a = AIMULW;
+ break;
+
+ case CASE(OMUL, TINT32):
+ case CASE(OMUL, TUINT32):
+ case CASE(OMUL, TPTR32):
+ a = AIMULL;
+ break;
+
+ case CASE(ODIV, TINT8):
+ case CASE(OMOD, TINT8):
+ a = AIDIVB;
+ break;
+
+ case CASE(ODIV, TUINT8):
+ case CASE(OMOD, TUINT8):
+ a = ADIVB;
+ break;
+
+ case CASE(ODIV, TINT16):
+ case CASE(OMOD, TINT16):
+ a = AIDIVW;
+ break;
+
+ case CASE(ODIV, TUINT16):
+ case CASE(OMOD, TUINT16):
+ a = ADIVW;
+ break;
+
+ case CASE(ODIV, TINT32):
+ case CASE(OMOD, TINT32):
+ a = AIDIVL;
+ break;
+
+ case CASE(ODIV, TUINT32):
+ case CASE(ODIV, TPTR32):
+ case CASE(OMOD, TUINT32):
+ case CASE(OMOD, TPTR32):
+ a = ADIVL;
+ break;
+
+ case CASE(OEXTEND, TINT16):
+ a = ACWD;
+ break;
+
+ case CASE(OEXTEND, TINT32):
+ a = ACDQ;
+ break;
+ }
+ return a;
+}
+
+#define FCASE(a, b, c) (((a)<<16)|((b)<<8)|(c))
+int
+foptoas(int op, Type *t, int flg)
+{
+ int et;
+
+ et = simtype[t->etype];
+
+ // If we need Fpop, it means we're working on
+ // two different floating-point registers, not memory.
+ // There the instruction only has a float64 form.
+ if(flg & Fpop)
+ et = TFLOAT64;
+
+ // clear Frev if unneeded
+ switch(op) {
+ case OADD:
+ case OMUL:
+ flg &= ~Frev;
+ break;
+ }
+
+ switch(FCASE(op, et, flg)) {
+ case FCASE(OADD, TFLOAT32, 0):
+ return AFADDF;
+ case FCASE(OADD, TFLOAT64, 0):
+ return AFADDD;
+ case FCASE(OADD, TFLOAT64, Fpop):
+ return AFADDDP;
+
+ case FCASE(OSUB, TFLOAT32, 0):
+ return AFSUBF;
+ case FCASE(OSUB, TFLOAT32, Frev):
+ return AFSUBRF;
+
+ case FCASE(OSUB, TFLOAT64, 0):
+ return AFSUBD;
+ case FCASE(OSUB, TFLOAT64, Frev):
+ return AFSUBRD;
+ case FCASE(OSUB, TFLOAT64, Fpop):
+ return AFSUBDP;
+ case FCASE(OSUB, TFLOAT64, Fpop|Frev):
+ return AFSUBRDP;
+
+ case FCASE(OMUL, TFLOAT32, 0):
+ return AFMULF;
+ case FCASE(OMUL, TFLOAT64, 0):
+ return AFMULD;
+ case FCASE(OMUL, TFLOAT64, Fpop):
+ return AFMULDP;
+
+ case FCASE(ODIV, TFLOAT32, 0):
+ return AFDIVF;
+ case FCASE(ODIV, TFLOAT32, Frev):
+ return AFDIVRF;
+
+ case FCASE(ODIV, TFLOAT64, 0):
+ return AFDIVD;
+ case FCASE(ODIV, TFLOAT64, Frev):
+ return AFDIVRD;
+ case FCASE(ODIV, TFLOAT64, Fpop):
+ return AFDIVDP;
+ case FCASE(ODIV, TFLOAT64, Fpop|Frev):
+ return AFDIVRDP;
+
+ case FCASE(OCMP, TFLOAT32, 0):
+ return AFCOMF;
+ case FCASE(OCMP, TFLOAT32, Fpop):
+ return AFCOMFP;
+ case FCASE(OCMP, TFLOAT64, 0):
+ return AFCOMD;
+ case FCASE(OCMP, TFLOAT64, Fpop):
+ return AFCOMDP;
+ case FCASE(OCMP, TFLOAT64, Fpop2):
+ return AFCOMDPP;
+
+ case FCASE(OMINUS, TFLOAT32, 0):
+ return AFCHS;
+ case FCASE(OMINUS, TFLOAT64, 0):
+ return AFCHS;
+ }
+
+ fatal("foptoas %O %T %#x", op, t, flg);
+ return 0;
+}
+
+static int resvd[] =
+{
+// D_DI, // for movstring
+// D_SI, // for movstring
+
+ D_AX, // for divide
+ D_CX, // for shift
+ D_DX, // for divide
+ D_SP, // for stack
+
+ D_BL, // because D_BX can be allocated
+ D_BH,
+};
+
+void
+ginit(void)
+{
+ int i;
+
+ for(i=0; i<nelem(reg); i++)
+ reg[i] = 1;
+ for(i=D_AL; i<=D_DI; i++)
+ reg[i] = 0;
+ for(i=0; i<nelem(resvd); i++)
+ reg[resvd[i]]++;
+}
+
+ulong regpc[D_NONE];
+
+void
+gclean(void)
+{
+ int i;
+
+ for(i=0; i<nelem(resvd); i++)
+ reg[resvd[i]]--;
+
+ for(i=D_AL; i<=D_DI; i++)
+ if(reg[i])
+ yyerror("reg %R left allocated at %ux", i, regpc[i]);
+}
+
+int32
+anyregalloc(void)
+{
+ int i, j;
+
+ for(i=D_AL; i<=D_DI; i++) {
+ if(reg[i] == 0)
+ goto ok;
+ for(j=0; j<nelem(resvd); j++)
+ if(resvd[j] == i)
+ goto ok;
+ return 1;
+ ok:;
+ }
+ return 0;
+}
+
+/*
+ * allocate register of type t, leave in n.
+ * if o != N, o is desired fixed register.
+ * caller must regfree(n).
+ */
+void
+regalloc(Node *n, Type *t, Node *o)
+{
+ int i, et;
+
+ if(t == T)
+ fatal("regalloc: t nil");
+ et = simtype[t->etype];
+
+ switch(et) {
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TUINT64:
+ case TPTR32:
+ case TPTR64:
+ case TBOOL:
+ if(o != N && o->op == OREGISTER) {
+ i = o->val.u.reg;
+ if(i >= D_AX && i <= D_DI)
+ goto out;
+ }
+ for(i=D_AX; i<=D_DI; i++)
+ if(reg[i] == 0)
+ goto out;
+
+ fprint(2, "registers allocated at\n");
+ for(i=D_AX; i<=D_DI; i++)
+ fprint(2, "\t%R\t%#ux\n", i, regpc[i]);
+ yyerror("out of fixed registers");
+ goto err;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ i = D_F0;
+ goto out;
+ }
+ yyerror("regalloc: unknown type %T", t);
+ i = 0;
+
+err:
+ nodreg(n, t, 0);
+ return;
+
+out:
+ if (i == D_SP)
+ print("alloc SP\n");
+ if(reg[i] == 0) {
+ regpc[i] = (ulong)__builtin_return_address(0);
+ if(i == D_AX || i == D_CX || i == D_DX || i == D_SP) {
+ dump("regalloc-o", o);
+ fatal("regalloc %R", i);
+ }
+ }
+ reg[i]++;
+ nodreg(n, t, i);
+}
+
+void
+regfree(Node *n)
+{
+ int i;
+
+ if(n->op == ONAME)
+ return;
+ if(n->op != OREGISTER && n->op != OINDREG)
+ fatal("regfree: not a register");
+ i = n->val.u.reg;
+ if(i == D_SP)
+ return;
+ if(i < 0 || i >= sizeof(reg))
+ fatal("regfree: reg out of range");
+ if(reg[i] <= 0)
+ fatal("regfree: reg not allocated");
+ reg[i]--;
+ if(reg[i] == 0 && (i == D_AX || i == D_CX || i == D_DX || i == D_SP))
+ fatal("regfree %R", i);
+}
+
+/*
+ * initialize n to be register r of type t.
+ */
+void
+nodreg(Node *n, Type *t, int r)
+{
+ if(t == T)
+ fatal("nodreg: t nil");
+
+ memset(n, 0, sizeof(*n));
+ n->op = OREGISTER;
+ n->addable = 1;
+ ullmancalc(n);
+ n->val.u.reg = r;
+ n->type = t;
+}
+
+/*
+ * initialize n to be indirect of register r; n is type t.
+ */
+void
+nodindreg(Node *n, Type *t, int r)
+{
+ nodreg(n, t, r);
+ n->op = OINDREG;
+}
+
+Node*
+nodarg(Type *t, int fp)
+{
+ Node *n;
+ Type *first;
+ Iter savet;
+
+ // entire argument struct, not just one arg
+ switch(t->etype) {
+ default:
+ fatal("nodarg %T", t);
+
+ case TSTRUCT:
+ if(!t->funarg)
+ fatal("nodarg: TSTRUCT but not funarg");
+ n = nod(ONAME, N, N);
+ n->sym = lookup(".args");
+ n->type = t;
+ first = structfirst(&savet, &t);
+ if(first == nil)
+ fatal("nodarg: bad struct");
+ if(first->width == BADWIDTH)
+ fatal("nodarg: offset not computed for %T", t);
+ n->xoffset = first->width;
+ n->addable = 1;
+ break;
+
+ case TFIELD:
+ n = nod(ONAME, N, N);
+ n->type = t->type;
+ n->sym = t->sym;
+ if(t->width == BADWIDTH)
+ fatal("nodarg: offset not computed for %T", t);
+ n->xoffset = t->width;
+ n->addable = 1;
+ break;
+ }
+
+ switch(fp) {
+ default:
+ fatal("nodarg %T %d", t, fp);
+
+ case 0: // output arg
+ n->op = OINDREG;
+ n->val.u.reg = D_SP;
+ break;
+
+ case 1: // input arg
+ n->class = PPARAM;
+ break;
+ }
+
+ n->typecheck = 1;
+ return n;
+}
+
+/*
+ * generate
+ * as $c, reg
+ */
+void
+gconreg(int as, vlong c, int reg)
+{
+ Node n1, n2;
+
+ nodconst(&n1, types[TINT64], c);
+ nodreg(&n2, types[TINT64], reg);
+ gins(as, &n1, &n2);
+}
+
+/*
+ * swap node contents
+ */
+void
+nswap(Node *a, Node *b)
+{
+ Node t;
+
+ t = *a;
+ *a = *b;
+ *b = t;
+}
+
+/*
+ * return constant i node.
+ * overwritten by next call, but useful in calls to gins.
+ */
+Node*
+ncon(uint32 i)
+{
+ static Node n;
+
+ if(n.type == T)
+ nodconst(&n, types[TUINT32], 0);
+ mpmovecfix(n.val.u.xval, i);
+ return &n;
+}
+
+/*
+ * Is this node a memory operand?
+ */
+int
+ismem(Node *n)
+{
+ switch(n->op) {
+ case OLEN:
+ case OCAP:
+ case OINDREG:
+ case ONAME:
+ case OPARAM:
+ return 1;
+ }
+ return 0;
+}
+
+Node sclean[10];
+int nsclean;
+
+/*
+ * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves.
+ */
+void
+split64(Node *n, Node *lo, Node *hi)
+{
+ Node n1;
+ int64 i;
+
+ if(!is64(n->type))
+ fatal("split64 %T", n->type);
+
+ sclean[nsclean].op = OEMPTY;
+ if(nsclean >= nelem(sclean))
+ fatal("split64 clean");
+ nsclean++;
+ switch(n->op) {
+ default:
+ if(!dotaddable(n, &n1)) {
+ igen(n, &n1, N);
+ sclean[nsclean-1] = n1;
+ }
+ n = &n1;
+ goto common;
+ case ONAME:
+ if(n->class == PPARAMREF) {
+ cgen(n->heapaddr, &n1);
+ sclean[nsclean-1] = n1;
+ // fall through.
+ n = &n1;
+ }
+ goto common;
+ case OINDREG:
+ common:
+ *lo = *n;
+ *hi = *n;
+ lo->type = types[TUINT32];
+ if(n->type->etype == TINT64)
+ hi->type = types[TINT32];
+ else
+ hi->type = types[TUINT32];
+ hi->xoffset += 4;
+ break;
+
+ case OLITERAL:
+ convconst(&n1, n->type, &n->val);
+ i = mpgetfix(n1.val.u.xval);
+ nodconst(lo, types[TUINT32], (uint32)i);
+ i >>= 32;
+ if(n->type->etype == TINT64)
+ nodconst(hi, types[TINT32], (int32)i);
+ else
+ nodconst(hi, types[TUINT32], (uint32)i);
+ break;
+ }
+}
+
+void
+splitclean(void)
+{
+ if(nsclean <= 0)
+ fatal("splitclean");
+ nsclean--;
+ if(sclean[nsclean].op != OEMPTY)
+ regfree(&sclean[nsclean]);
+}
+
+/*
+ * set up nodes representing fp constants
+ */
+Node zerof;
+Node two64f;
+Node two63f;
+
+void
+bignodes(void)
+{
+ static int did;
+
+ if(did)
+ return;
+ did = 1;
+
+ two64f = *ncon(0);
+ two64f.type = types[TFLOAT64];
+ two64f.val.ctype = CTFLT;
+ two64f.val.u.fval = mal(sizeof *two64f.val.u.fval);
+ mpmovecflt(two64f.val.u.fval, 18446744073709551616.);
+
+ two63f = two64f;
+ two63f.val.u.fval = mal(sizeof *two63f.val.u.fval);
+ mpmovecflt(two63f.val.u.fval, 9223372036854775808.);
+
+ zerof = two64f;
+ zerof.val.u.fval = mal(sizeof *zerof.val.u.fval);
+ mpmovecflt(zerof.val.u.fval, 0);
+}
+
+void
+memname(Node *n, Type *t)
+{
+ tempname(n, t);
+ strcpy(namebuf, n->sym->name);
+ namebuf[0] = '.'; // keep optimizer from registerizing
+ n->sym = lookup(namebuf);
+}
+
+void
+gmove(Node *f, Node *t)
+{
+ int a, ft, tt;
+ Type *cvt;
+ Node r1, r2, t1, t2, flo, fhi, tlo, thi, con, f0, f1, ax, dx, cx;
+ Prog *p1, *p2, *p3;
+
+ if(debug['M'])
+ print("gmove %N -> %N\n", f, t);
+
+ ft = simsimtype(f->type);
+ tt = simsimtype(t->type);
+ cvt = t->type;
+
+ if(iscomplex[ft] || iscomplex[tt]) {
+ complexmove(f, t);
+ return;
+ }
+
+ // cannot have two integer memory operands;
+ // except 64-bit, which always copies via registers anyway.
+ if(isint[ft] && isint[tt] && !is64(f->type) && !is64(t->type) && ismem(f) && ismem(t))
+ goto hard;
+
+ // convert constant to desired type
+ if(f->op == OLITERAL) {
+ if(tt == TFLOAT32)
+ convconst(&con, types[TFLOAT64], &f->val);
+ else
+ convconst(&con, t->type, &f->val);
+ f = &con;
+ ft = simsimtype(con.type);
+
+ // some constants can't move directly to memory.
+ if(ismem(t)) {
+ // float constants come from memory.
+ if(isfloat[tt])
+ goto hard;
+ }
+ }
+
+ // value -> value copy, only one memory operand.
+ // figure out the instruction to use.
+ // break out of switch for one-instruction gins.
+ // goto rdst for "destination must be register".
+ // goto hard for "convert to cvt type first".
+ // otherwise handle and return.
+
+ switch(CASE(ft, tt)) {
+ default:
+ goto fatal;
+
+ /*
+ * integer copy and truncate
+ */
+ case CASE(TINT8, TINT8): // same size
+ case CASE(TINT8, TUINT8):
+ case CASE(TUINT8, TINT8):
+ case CASE(TUINT8, TUINT8):
+ a = AMOVB;
+ break;
+
+ case CASE(TINT16, TINT8): // truncate
+ case CASE(TUINT16, TINT8):
+ case CASE(TINT32, TINT8):
+ case CASE(TUINT32, TINT8):
+ case CASE(TINT16, TUINT8):
+ case CASE(TUINT16, TUINT8):
+ case CASE(TINT32, TUINT8):
+ case CASE(TUINT32, TUINT8):
+ a = AMOVB;
+ goto rsrc;
+
+ case CASE(TINT64, TINT8): // truncate low word
+ case CASE(TUINT64, TINT8):
+ case CASE(TINT64, TUINT8):
+ case CASE(TUINT64, TUINT8):
+ split64(f, &flo, &fhi);
+ nodreg(&r1, t->type, D_AX);
+ gmove(&flo, &r1);
+ gins(AMOVB, &r1, t);
+ splitclean();
+ return;
+
+ case CASE(TINT16, TINT16): // same size
+ case CASE(TINT16, TUINT16):
+ case CASE(TUINT16, TINT16):
+ case CASE(TUINT16, TUINT16):
+ a = AMOVW;
+ break;
+
+ case CASE(TINT32, TINT16): // truncate
+ case CASE(TUINT32, TINT16):
+ case CASE(TINT32, TUINT16):
+ case CASE(TUINT32, TUINT16):
+ a = AMOVW;
+ goto rsrc;
+
+ case CASE(TINT64, TINT16): // truncate low word
+ case CASE(TUINT64, TINT16):
+ case CASE(TINT64, TUINT16):
+ case CASE(TUINT64, TUINT16):
+ split64(f, &flo, &fhi);
+ nodreg(&r1, t->type, D_AX);
+ gmove(&flo, &r1);
+ gins(AMOVW, &r1, t);
+ splitclean();
+ return;
+
+ case CASE(TINT32, TINT32): // same size
+ case CASE(TINT32, TUINT32):
+ case CASE(TUINT32, TINT32):
+ case CASE(TUINT32, TUINT32):
+ a = AMOVL;
+ break;
+
+ case CASE(TINT64, TINT32): // truncate
+ case CASE(TUINT64, TINT32):
+ case CASE(TINT64, TUINT32):
+ case CASE(TUINT64, TUINT32):
+ split64(f, &flo, &fhi);
+ nodreg(&r1, t->type, D_AX);
+ gmove(&flo, &r1);
+ gins(AMOVL, &r1, t);
+ splitclean();
+ return;
+
+ case CASE(TINT64, TINT64): // same size
+ case CASE(TINT64, TUINT64):
+ case CASE(TUINT64, TINT64):
+ case CASE(TUINT64, TUINT64):
+ split64(f, &flo, &fhi);
+ split64(t, &tlo, &thi);
+ if(f->op == OLITERAL) {
+ gins(AMOVL, &flo, &tlo);
+ gins(AMOVL, &fhi, &thi);
+ } else {
+ nodreg(&r1, t->type, D_AX);
+ nodreg(&r2, t->type, D_DX);
+ gins(AMOVL, &flo, &r1);
+ gins(AMOVL, &fhi, &r2);
+ gins(AMOVL, &r1, &tlo);
+ gins(AMOVL, &r2, &thi);
+ }
+ splitclean();
+ splitclean();
+ return;
+
+ /*
+ * integer up-conversions
+ */
+ case CASE(TINT8, TINT16): // sign extend int8
+ case CASE(TINT8, TUINT16):
+ a = AMOVBWSX;
+ goto rdst;
+ case CASE(TINT8, TINT32):
+ case CASE(TINT8, TUINT32):
+ a = AMOVBLSX;
+ goto rdst;
+ case CASE(TINT8, TINT64): // convert via int32
+ case CASE(TINT8, TUINT64):
+ cvt = types[TINT32];
+ goto hard;
+
+ case CASE(TUINT8, TINT16): // zero extend uint8
+ case CASE(TUINT8, TUINT16):
+ a = AMOVBWZX;
+ goto rdst;
+ case CASE(TUINT8, TINT32):
+ case CASE(TUINT8, TUINT32):
+ a = AMOVBLZX;
+ goto rdst;
+ case CASE(TUINT8, TINT64): // convert via uint32
+ case CASE(TUINT8, TUINT64):
+ cvt = types[TUINT32];
+ goto hard;
+
+ case CASE(TINT16, TINT32): // sign extend int16
+ case CASE(TINT16, TUINT32):
+ a = AMOVWLSX;
+ goto rdst;
+ case CASE(TINT16, TINT64): // convert via int32
+ case CASE(TINT16, TUINT64):
+ cvt = types[TINT32];
+ goto hard;
+
+ case CASE(TUINT16, TINT32): // zero extend uint16
+ case CASE(TUINT16, TUINT32):
+ a = AMOVWLZX;
+ goto rdst;
+ case CASE(TUINT16, TINT64): // convert via uint32
+ case CASE(TUINT16, TUINT64):
+ cvt = types[TUINT32];
+ goto hard;
+
+ case CASE(TINT32, TINT64): // sign extend int32
+ case CASE(TINT32, TUINT64):
+ split64(t, &tlo, &thi);
+ nodreg(&flo, tlo.type, D_AX);
+ nodreg(&fhi, thi.type, D_DX);
+ gmove(f, &flo);
+ gins(ACDQ, N, N);
+ gins(AMOVL, &flo, &tlo);
+ gins(AMOVL, &fhi, &thi);
+ splitclean();
+ return;
+
+ case CASE(TUINT32, TINT64): // zero extend uint32
+ case CASE(TUINT32, TUINT64):
+ split64(t, &tlo, &thi);
+ gmove(f, &tlo);
+ gins(AMOVL, ncon(0), &thi);
+ splitclean();
+ return;
+
+ /*
+ * float to integer
+ */
+ case CASE(TFLOAT32, TINT16):
+ case CASE(TFLOAT32, TINT32):
+ case CASE(TFLOAT32, TINT64):
+ case CASE(TFLOAT64, TINT16):
+ case CASE(TFLOAT64, TINT32):
+ case CASE(TFLOAT64, TINT64):
+ if(t->op == OREGISTER)
+ goto hardmem;
+ nodreg(&r1, types[ft], D_F0);
+ if(f->op != OREGISTER) {
+ if(ft == TFLOAT32)
+ gins(AFMOVF, f, &r1);
+ else
+ gins(AFMOVD, f, &r1);
+ }
+
+ // set round to zero mode during conversion
+ memname(&t1, types[TUINT16]);
+ memname(&t2, types[TUINT16]);
+ gins(AFSTCW, N, &t1);
+ gins(AMOVW, ncon(0xf7f), &t2);
+ gins(AFLDCW, &t2, N);
+ if(tt == TINT16)
+ gins(AFMOVWP, &r1, t);
+ else if(tt == TINT32)
+ gins(AFMOVLP, &r1, t);
+ else
+ gins(AFMOVVP, &r1, t);
+ gins(AFLDCW, &t1, N);
+ return;
+
+ case CASE(TFLOAT32, TINT8):
+ case CASE(TFLOAT32, TUINT16):
+ case CASE(TFLOAT32, TUINT8):
+ case CASE(TFLOAT64, TINT8):
+ case CASE(TFLOAT64, TUINT16):
+ case CASE(TFLOAT64, TUINT8):
+ // convert via int32.
+ tempname(&t1, types[TINT32]);
+ gmove(f, &t1);
+ switch(tt) {
+ default:
+ fatal("gmove %T", t);
+ case TINT8:
+ gins(ACMPL, &t1, ncon(-0x80));
+ p1 = gbranch(optoas(OLT, types[TINT32]), T);
+ gins(ACMPL, &t1, ncon(0x7f));
+ p2 = gbranch(optoas(OGT, types[TINT32]), T);
+ p3 = gbranch(AJMP, T);
+ patch(p1, pc);
+ patch(p2, pc);
+ gmove(ncon(-0x80), &t1);
+ patch(p3, pc);
+ gmove(&t1, t);
+ break;
+ case TUINT8:
+ gins(ATESTL, ncon(0xffffff00), &t1);
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVL, ncon(0), &t1);
+ patch(p1, pc);
+ gmove(&t1, t);
+ break;
+ case TUINT16:
+ gins(ATESTL, ncon(0xffff0000), &t1);
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVL, ncon(0), &t1);
+ patch(p1, pc);
+ gmove(&t1, t);
+ break;
+ }
+ return;
+
+ case CASE(TFLOAT32, TUINT32):
+ case CASE(TFLOAT64, TUINT32):
+ // convert via int64.
+ tempname(&t1, types[TINT64]);
+ gmove(f, &t1);
+ split64(&t1, &tlo, &thi);
+ gins(ACMPL, &thi, ncon(0));
+ p1 = gbranch(AJEQ, T);
+ gins(AMOVL, ncon(0), &tlo);
+ patch(p1, pc);
+ gmove(&tlo, t);
+ splitclean();
+ return;
+
+ case CASE(TFLOAT32, TUINT64):
+ case CASE(TFLOAT64, TUINT64):
+ bignodes();
+ nodreg(&f0, types[ft], D_F0);
+ nodreg(&f1, types[ft], D_F0 + 1);
+ nodreg(&ax, types[TUINT16], D_AX);
+
+ gmove(f, &f0);
+
+ // if 0 > v { answer = 0 }
+ gmove(&zerof, &f0);
+ gins(AFUCOMIP, &f0, &f1);
+ p1 = gbranch(optoas(OGT, types[tt]), T);
+ // if 1<<64 <= v { answer = 0 too }
+ gmove(&two64f, &f0);
+ gins(AFUCOMIP, &f0, &f1);
+ p2 = gbranch(optoas(OGT, types[tt]), T);
+ patch(p1, pc);
+ gins(AFMOVVP, &f0, t); // don't care about t, but will pop the stack
+ split64(t, &tlo, &thi);
+ gins(AMOVL, ncon(0), &tlo);
+ gins(AMOVL, ncon(0), &thi);
+ splitclean();
+ p1 = gbranch(AJMP, T);
+ patch(p2, pc);
+
+ // in range; algorithm is:
+ // if small enough, use native float64 -> int64 conversion.
+ // otherwise, subtract 2^63, convert, and add it back.
+
+ // set round to zero mode during conversion
+ memname(&t1, types[TUINT16]);
+ memname(&t2, types[TUINT16]);
+ gins(AFSTCW, N, &t1);
+ gins(AMOVW, ncon(0xf7f), &t2);
+ gins(AFLDCW, &t2, N);
+
+ // actual work
+ gmove(&two63f, &f0);
+ gins(AFUCOMIP, &f0, &f1);
+ p2 = gbranch(optoas(OLE, types[tt]), T);
+ gins(AFMOVVP, &f0, t);
+ p3 = gbranch(AJMP, T);
+ patch(p2, pc);
+ gmove(&two63f, &f0);
+ gins(AFSUBDP, &f0, &f1);
+ gins(AFMOVVP, &f0, t);
+ split64(t, &tlo, &thi);
+ gins(AXORL, ncon(0x80000000), &thi); // + 2^63
+ patch(p3, pc);
+ splitclean();
+ // restore rounding mode
+ gins(AFLDCW, &t1, N);
+
+ patch(p1, pc);
+ return;
+
+ /*
+ * integer to float
+ */
+ case CASE(TINT16, TFLOAT32):
+ case CASE(TINT16, TFLOAT64):
+ case CASE(TINT32, TFLOAT32):
+ case CASE(TINT32, TFLOAT64):
+ case CASE(TINT64, TFLOAT32):
+ case CASE(TINT64, TFLOAT64):
+ if(t->op != OREGISTER)
+ goto hard;
+ if(f->op == OREGISTER) {
+ cvt = f->type;
+ goto hardmem;
+ }
+ switch(ft) {
+ case TINT16:
+ a = AFMOVW;
+ break;
+ case TINT32:
+ a = AFMOVL;
+ break;
+ default:
+ a = AFMOVV;
+ break;
+ }
+ break;
+
+ case CASE(TINT8, TFLOAT32):
+ case CASE(TINT8, TFLOAT64):
+ case CASE(TUINT16, TFLOAT32):
+ case CASE(TUINT16, TFLOAT64):
+ case CASE(TUINT8, TFLOAT32):
+ case CASE(TUINT8, TFLOAT64):
+ // convert via int32 memory
+ cvt = types[TINT32];
+ goto hardmem;
+
+ case CASE(TUINT32, TFLOAT32):
+ case CASE(TUINT32, TFLOAT64):
+ // convert via int64 memory
+ cvt = types[TINT64];
+ goto hardmem;
+
+ case CASE(TUINT64, TFLOAT32):
+ case CASE(TUINT64, TFLOAT64):
+ // algorithm is:
+ // if small enough, use native int64 -> uint64 conversion.
+ // otherwise, halve (rounding to odd?), convert, and double.
+ nodreg(&ax, types[TUINT32], D_AX);
+ nodreg(&dx, types[TUINT32], D_DX);
+ nodreg(&cx, types[TUINT32], D_CX);
+ tempname(&t1, f->type);
+ split64(&t1, &tlo, &thi);
+ gmove(f, &t1);
+ gins(ACMPL, &thi, ncon(0));
+ p1 = gbranch(AJLT, T);
+ // native
+ t1.type = types[TINT64];
+ gmove(&t1, t);
+ p2 = gbranch(AJMP, T);
+ // simulated
+ patch(p1, pc);
+ gmove(&tlo, &ax);
+ gmove(&thi, &dx);
+ p1 = gins(ASHRL, ncon(1), &ax);
+ p1->from.index = D_DX; // double-width shift DX -> AX
+ p1->from.scale = 0;
+ gins(ASETCC, N, &cx);
+ gins(AORB, &cx, &ax);
+ gins(ASHRL, ncon(1), &dx);
+ gmove(&dx, &thi);
+ gmove(&ax, &tlo);
+ nodreg(&r1, types[tt], D_F0);
+ nodreg(&r2, types[tt], D_F0 + 1);
+ gmove(&t1, &r1); // t1.type is TINT64 now, set above
+ gins(AFMOVD, &r1, &r1);
+ gins(AFADDDP, &r1, &r2);
+ gmove(&r1, t);
+ patch(p2, pc);
+ splitclean();
+ return;
+
+ /*
+ * float to float
+ */
+ case CASE(TFLOAT32, TFLOAT32):
+ case CASE(TFLOAT64, TFLOAT64):
+ // The way the code generator uses floating-point
+ // registers, a move from F0 to F0 is intended as a no-op.
+ // On the x86, it's not: it pushes a second copy of F0
+ // on the floating point stack. So toss it away here.
+ // Also, F0 is the *only* register we ever evaluate
+ // into, so we should only see register/register as F0/F0.
+ if(ismem(f) && ismem(t))
+ goto hard;
+ if(f->op == OREGISTER && t->op == OREGISTER) {
+ if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
+ goto fatal;
+ return;
+ }
+ a = AFMOVF;
+ if(ft == TFLOAT64)
+ a = AFMOVD;
+ if(ismem(t)) {
+ if(f->op != OREGISTER || f->val.u.reg != D_F0)
+ fatal("gmove %N", f);
+ a = AFMOVFP;
+ if(ft == TFLOAT64)
+ a = AFMOVDP;
+ }
+ break;
+
+ case CASE(TFLOAT32, TFLOAT64):
+ if(ismem(f) && ismem(t))
+ goto hard;
+ if(f->op == OREGISTER && t->op == OREGISTER) {
+ if(f->val.u.reg != D_F0 || t->val.u.reg != D_F0)
+ goto fatal;
+ return;
+ }
+ if(f->op == OREGISTER)
+ gins(AFMOVDP, f, t);
+ else
+ gins(AFMOVF, f, t);
+ return;
+
+ case CASE(TFLOAT64, TFLOAT32):
+ if(ismem(f) && ismem(t))
+ goto hard;
+ if(f->op == OREGISTER && t->op == OREGISTER) {
+ tempname(&r1, types[TFLOAT32]);
+ gins(AFMOVFP, f, &r1);
+ gins(AFMOVF, &r1, t);
+ return;
+ }
+ if(f->op == OREGISTER)
+ gins(AFMOVFP, f, t);
+ else
+ gins(AFMOVD, f, t);
+ return;
+ }
+
+ gins(a, f, t);
+ return;
+
+rsrc:
+ // requires register source
+ regalloc(&r1, f->type, t);
+ gmove(f, &r1);
+ gins(a, &r1, t);
+ regfree(&r1);
+ return;
+
+rdst:
+ // requires register destination
+ regalloc(&r1, t->type, t);
+ gins(a, f, &r1);
+ gmove(&r1, t);
+ regfree(&r1);
+ return;
+
+hard:
+ // requires register intermediate
+ regalloc(&r1, cvt, t);
+ gmove(f, &r1);
+ gmove(&r1, t);
+ regfree(&r1);
+ return;
+
+hardmem:
+ // requires memory intermediate
+ tempname(&r1, cvt);
+ gmove(f, &r1);
+ gmove(&r1, t);
+ return;
+
+fatal:
+ // should not happen
+ fatal("gmove %N -> %N", f, t);
+}
+
+int
+samaddr(Node *f, Node *t)
+{
+
+ if(f->op != t->op)
+ return 0;
+
+ switch(f->op) {
+ case OREGISTER:
+ if(f->val.u.reg != t->val.u.reg)
+ break;
+ return 1;
+ }
+ return 0;
+}
+/*
+ * generate one instruction:
+ * as f, t
+ */
+Prog*
+gins(int as, Node *f, Node *t)
+{
+ Prog *p;
+ Addr af, at;
+ int w;
+
+ if(as == AFMOVF && f && f->op == OREGISTER && t && t->op == OREGISTER)
+ fatal("gins MOVF reg, reg");
+
+ switch(as) {
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ if(f != N && t != N && samaddr(f, t))
+ return nil;
+ }
+
+ memset(&af, 0, sizeof af);
+ memset(&at, 0, sizeof at);
+ if(f != N)
+ naddr(f, &af, 1);
+ if(t != N)
+ naddr(t, &at, 1);
+ p = prog(as);
+ if(f != N)
+ p->from = af;
+ if(t != N)
+ p->to = at;
+ if(debug['g'])
+ print("%P\n", p);
+
+ w = 0;
+ switch(as) {
+ case AMOVB:
+ w = 1;
+ break;
+ case AMOVW:
+ w = 2;
+ break;
+ case AMOVL:
+ w = 4;
+ break;
+ }
+
+ if(1 && w != 0 && f != N && (af.width > w || at.width > w)) {
+ dump("bad width from:", f);
+ dump("bad width to:", t);
+ fatal("bad width: %P (%d, %d)\n", p, af.width, at.width);
+ }
+
+ return p;
+}
+
+static void
+checkoffset(Addr *a, int canemitcode)
+{
+ Prog *p;
+
+ if(a->offset < unmappedzero)
+ return;
+ if(!canemitcode)
+ fatal("checkoffset %#x, cannot emit code", a->offset);
+
+ // cannot rely on unmapped nil page at 0 to catch
+ // reference with large offset. instead, emit explicit
+ // test of 0(reg).
+ p = gins(ATESTB, nodintconst(0), N);
+ p->to = *a;
+ p->to.offset = 0;
+}
+
+/*
+ * generate code to compute n;
+ * make a refer to result.
+ */
+void
+naddr(Node *n, Addr *a, int canemitcode)
+{
+ a->scale = 0;
+ a->index = D_NONE;
+ a->type = D_NONE;
+ a->gotype = S;
+ a->node = N;
+ if(n == N)
+ return;
+
+ switch(n->op) {
+ default:
+ fatal("naddr: bad %O %D", n->op, a);
+ break;
+
+ case OREGISTER:
+ a->type = n->val.u.reg;
+ a->sym = S;
+ break;
+
+ case OINDREG:
+ a->type = n->val.u.reg+D_INDIR;
+ a->sym = n->sym;
+ a->offset = n->xoffset;
+ break;
+
+ case OPARAM:
+ // n->left is PHEAP ONAME for stack parameter.
+ // compute address of actual parameter on stack.
+ a->etype = n->left->type->etype;
+ a->width = n->left->type->width;
+ a->offset = n->xoffset;
+ a->sym = n->left->sym;
+ a->type = D_PARAM;
+ break;
+
+ case ONAME:
+ a->etype = 0;
+ a->width = 0;
+ if(n->type != T) {
+ a->etype = simtype[n->type->etype];
+ a->width = n->type->width;
+ a->gotype = ngotype(n);
+ }
+ a->pun = n->pun;
+ a->offset = n->xoffset;
+ a->sym = n->sym;
+ if(a->sym == S)
+ a->sym = lookup(".noname");
+ if(n->method) {
+ if(n->type != T)
+ if(n->type->sym != S)
+ if(n->type->sym->pkg != nil)
+ a->sym = pkglookup(a->sym->name, n->type->sym->pkg);
+ }
+
+ switch(n->class) {
+ default:
+ fatal("naddr: ONAME class %S %d\n", n->sym, n->class);
+ case PEXTERN:
+ a->type = D_EXTERN;
+ break;
+ case PAUTO:
+ a->type = D_AUTO;
+ if (n->sym)
+ a->node = n->orig;
+ break;
+ case PPARAM:
+ case PPARAMOUT:
+ a->type = D_PARAM;
+ break;
+ case PFUNC:
+ a->index = D_EXTERN;
+ a->type = D_ADDR;
+ break;
+ }
+ break;
+
+ case OLITERAL:
+ switch(n->val.ctype) {
+ default:
+ fatal("naddr: const %lT", n->type);
+ break;
+ case CTFLT:
+ a->type = D_FCONST;
+ a->dval = mpgetflt(n->val.u.fval);
+ break;
+ case CTINT:
+ a->sym = S;
+ a->type = D_CONST;
+ a->offset = mpgetfix(n->val.u.xval);
+ break;
+ case CTSTR:
+ datagostring(n->val.u.sval, a);
+ break;
+ case CTBOOL:
+ a->sym = S;
+ a->type = D_CONST;
+ a->offset = n->val.u.bval;
+ break;
+ case CTNIL:
+ a->sym = S;
+ a->type = D_CONST;
+ a->offset = 0;
+ break;
+ }
+ break;
+
+ case OADDR:
+ naddr(n->left, a, canemitcode);
+ if(a->type >= D_INDIR) {
+ a->type -= D_INDIR;
+ break;
+ }
+ if(a->type == D_EXTERN || a->type == D_STATIC ||
+ a->type == D_AUTO || a->type == D_PARAM)
+ if(a->index == D_NONE) {
+ a->index = a->type;
+ a->type = D_ADDR;
+ break;
+ }
+ fatal("naddr: OADDR\n");
+
+ case OLEN:
+ // len of string or slice
+ naddr(n->left, a, canemitcode);
+ if(a->type == D_CONST && a->offset == 0)
+ break; // len(nil)
+ a->etype = TUINT32;
+ a->offset += Array_nel;
+ a->width = 4;
+ if(a->offset >= unmappedzero && a->offset-Array_nel < unmappedzero)
+ checkoffset(a, canemitcode);
+ break;
+
+ case OCAP:
+ // cap of string or slice
+ naddr(n->left, a, canemitcode);
+ if(a->type == D_CONST && a->offset == 0)
+ break; // cap(nil)
+ a->etype = TUINT32;
+ a->offset += Array_cap;
+ a->width = 4;
+ if(a->offset >= unmappedzero && a->offset-Array_nel < unmappedzero)
+ checkoffset(a, canemitcode);
+ break;
+
+// case OADD:
+// if(n->right->op == OLITERAL) {
+// v = n->right->vconst;
+// naddr(n->left, a, canemitcode);
+// } else
+// if(n->left->op == OLITERAL) {
+// v = n->left->vconst;
+// naddr(n->right, a, canemitcode);
+// } else
+// goto bad;
+// a->offset += v;
+// break;
+
+ }
+}
+
+int
+dotaddable(Node *n, Node *n1)
+{
+ int o, oary[10];
+ Node *nn;
+
+ if(n->op != ODOT)
+ return 0;
+
+ o = dotoffset(n, oary, &nn);
+ if(nn != N && nn->addable && o == 1 && oary[0] >= 0) {
+ *n1 = *nn;
+ n1->type = n->type;
+ n1->xoffset += oary[0];
+ return 1;
+ }
+ return 0;
+}
+
+void
+sudoclean(void)
+{
+}
+
+int
+sudoaddable(int as, Node *n, Addr *a)
+{
+ return 0;
+}
diff --git a/src/cmd/8g/list.c b/src/cmd/8g/list.c
new file mode 100644
index 000000000..edb1ece84
--- /dev/null
+++ b/src/cmd/8g/list.c
@@ -0,0 +1,302 @@
+// Derived from Inferno utils/8c/list.c
+// http://code.google.com/p/inferno-os/source/browse/utils/8c/list.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+
+static int sconsize;
+void
+listinit(void)
+{
+
+ fmtinstall('A', Aconv); // as
+ fmtinstall('P', Pconv); // Prog*
+ fmtinstall('D', Dconv); // Addr*
+ fmtinstall('R', Rconv); // reg
+ fmtinstall('Y', Yconv); // sconst
+}
+
+int
+Pconv(Fmt *fp)
+{
+ char str[STRINGSZ];
+ Prog *p;
+ char scale[40];
+
+ p = va_arg(fp->args, Prog*);
+ sconsize = 8;
+ scale[0] = '\0';
+ if(p->from.scale != 0 && (p->as == AGLOBL || p->as == ATEXT))
+ snprint(scale, sizeof scale, "%d,", p->from.scale);
+ switch(p->as) {
+ default:
+ snprint(str, sizeof(str), "%.4d (%L) %-7A %D,%s%D",
+ p->loc, p->lineno, p->as, &p->from, scale, &p->to);
+ break;
+
+ case ADATA:
+ sconsize = p->from.scale;
+ snprint(str, sizeof(str), "%.4d (%L) %-7A %D/%d,%D",
+ p->loc, p->lineno, p->as, &p->from, sconsize, &p->to);
+ break;
+
+ case ATEXT:
+ snprint(str, sizeof(str), "%.4d (%L) %-7A %D,%s%lD",
+ p->loc, p->lineno, p->as, &p->from, scale, &p->to);
+ break;
+ }
+ return fmtstrcpy(fp, str);
+}
+
+int
+Dconv(Fmt *fp)
+{
+ char str[STRINGSZ], s[STRINGSZ];
+ Addr *a;
+ int i;
+ uint32 d1, d2;
+
+ a = va_arg(fp->args, Addr*);
+ i = a->type;
+ if(i >= D_INDIR) {
+ if(a->offset)
+ snprint(str, sizeof(str), "%d(%R)", a->offset, i-D_INDIR);
+ else
+ snprint(str, sizeof(str), "(%R)", i-D_INDIR);
+ goto brk;
+ }
+ switch(i) {
+
+ default:
+ if(a->offset)
+ snprint(str, sizeof(str), "$%d,%R", a->offset, i);
+ else
+ snprint(str, sizeof(str), "%R", i);
+ break;
+
+ case D_NONE:
+ str[0] = 0;
+ break;
+
+ case D_BRANCH:
+ snprint(str, sizeof(str), "%d", a->branch->loc);
+ break;
+
+ case D_EXTERN:
+ snprint(str, sizeof(str), "%S+%d(SB)", a->sym, a->offset);
+ break;
+
+ case D_STATIC:
+ snprint(str, sizeof(str), "%S<>+%d(SB)", a->sym, a->offset);
+ break;
+
+ case D_AUTO:
+ snprint(str, sizeof(str), "%S+%d(SP)", a->sym, a->offset);
+ break;
+
+ case D_PARAM:
+ snprint(str, sizeof(str), "%S+%d(FP)", a->sym, a->offset);
+ break;
+
+ case D_CONST:
+ if(fp->flags & FmtLong) {
+ d1 = a->offset;
+ d2 = a->offset2;
+ snprint(str, sizeof(str), "$%ud-%ud", (ulong)d1, (ulong)d2);
+ break;
+ }
+ snprint(str, sizeof(str), "$%d", a->offset);
+ break;
+
+ case D_FCONST:
+ snprint(str, sizeof(str), "$(%.17e)", a->dval);
+ break;
+
+ case D_SCONST:
+ snprint(str, sizeof(str), "$\"%Y\"", a->sval);
+ break;
+
+ case D_ADDR:
+ a->type = a->index;
+ a->index = D_NONE;
+ snprint(str, sizeof(str), "$%D", a);
+ a->index = a->type;
+ a->type = D_ADDR;
+ goto conv;
+ }
+brk:
+ if(a->index != D_NONE) {
+ snprint(s, sizeof(s), "(%R*%d)", (int)a->index, (int)a->scale);
+ strcat(str, s);
+ }
+conv:
+ return fmtstrcpy(fp, str);
+}
+
+static char* regstr[] =
+{
+ "AL", /* [D_AL] */
+ "CL",
+ "DL",
+ "BL",
+
+ "AH", /* [D_AH] */
+ "CH",
+ "DH",
+ "BH",
+
+ "AX", /* [D_AX] */
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+
+ "F0", /* [D_F0] */
+ "F1",
+ "F2",
+ "F3",
+ "F4",
+ "F5",
+ "F6",
+ "F7",
+
+ "CS", /* [D_CS] */
+ "SS",
+ "DS",
+ "ES",
+ "FS",
+ "GS",
+
+ "GDTR", /* [D_GDTR] */
+ "IDTR", /* [D_IDTR] */
+ "LDTR", /* [D_LDTR] */
+ "MSW", /* [D_MSW] */
+ "TASK", /* [D_TASK] */
+
+ "CR0", /* [D_CR] */
+ "CR1",
+ "CR2",
+ "CR3",
+ "CR4",
+ "CR5",
+ "CR6",
+ "CR7",
+
+ "DR0", /* [D_DR] */
+ "DR1",
+ "DR2",
+ "DR3",
+ "DR4",
+ "DR5",
+ "DR6",
+ "DR7",
+
+ "TR0", /* [D_TR] */
+ "TR1",
+ "TR2",
+ "TR3",
+ "TR4",
+ "TR5",
+ "TR6",
+ "TR7",
+
+ "NONE", /* [D_NONE] */
+};
+
+int
+Rconv(Fmt *fp)
+{
+ char str[STRINGSZ];
+ int r;
+
+ r = va_arg(fp->args, int);
+ if(r < 0 || r >= nelem(regstr) || regstr[r] == nil) {
+ snprint(str, sizeof(str), "BAD_R(%d)", r);
+ return fmtstrcpy(fp, str);
+ }
+ return fmtstrcpy(fp, regstr[r]);
+}
+
+int
+Aconv(Fmt *fp)
+{
+ int i;
+
+ i = va_arg(fp->args, int);
+ return fmtstrcpy(fp, anames[i]);
+}
+
+
+int
+Yconv(Fmt *fp)
+{
+ int i, c;
+ char str[STRINGSZ], *p, *a;
+
+ a = va_arg(fp->args, char*);
+ p = str;
+ for(i=0; i<sconsize; i++) {
+ c = a[i] & 0xff;
+ if((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9')) {
+ *p++ = c;
+ continue;
+ }
+ *p++ = '\\';
+ switch(c) {
+ default:
+ if(c < 040 || c >= 0177)
+ break; /* not portable */
+ p[-1] = c;
+ continue;
+ case 0:
+ *p++ = 'z';
+ continue;
+ case '\\':
+ case '"':
+ *p++ = c;
+ continue;
+ case '\n':
+ *p++ = 'n';
+ continue;
+ case '\t':
+ *p++ = 't';
+ continue;
+ }
+ *p++ = (c>>6) + '0';
+ *p++ = ((c>>3) & 7) + '0';
+ *p++ = (c & 7) + '0';
+ }
+ *p = 0;
+ return fmtstrcpy(fp, str);
+}
diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h
new file mode 100644
index 000000000..8f31dec3b
--- /dev/null
+++ b/src/cmd/8g/opt.h
@@ -0,0 +1,164 @@
+// Derived from Inferno utils/6c/gc.h
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define Z N
+#define Adr Addr
+
+#define D_HI D_NONE
+#define D_LO D_NONE
+
+#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
+#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
+#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
+#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
+
+#define CLOAD 5
+#define CREF 5
+#define CINF 1000
+#define LOOP 3
+
+typedef struct Reg Reg;
+typedef struct Rgn Rgn;
+
+struct Reg
+{
+
+ Bits set;
+ Bits use1;
+ Bits use2;
+
+ Bits refbehind;
+ Bits refahead;
+ Bits calbehind;
+ Bits calahead;
+ Bits regdiff;
+ Bits act;
+
+ int32 regu; // register used bitmap
+ int32 rpo; // reverse post ordering
+ int32 active;
+
+ uint16 loop; // x5 for every loop
+ uchar refset; // diagnostic generated
+
+ Reg* p1;
+ Reg* p2;
+ Reg* p2link;
+ Reg* s1;
+ Reg* s2;
+ Reg* link;
+ Prog* prog;
+};
+#define R ((Reg*)0)
+
+#define NRGN 600
+struct Rgn
+{
+ Reg* enter;
+ short cost;
+ short varno;
+ short regno;
+};
+
+EXTERN int32 exregoffset; // not set
+EXTERN int32 exfregoffset; // not set
+EXTERN Reg* firstr;
+EXTERN Reg* lastr;
+EXTERN Reg zreg;
+EXTERN Reg* freer;
+EXTERN Reg** rpo2r;
+EXTERN Rgn region[NRGN];
+EXTERN Rgn* rgp;
+EXTERN int nregion;
+EXTERN int nvar;
+EXTERN int32 regbits;
+EXTERN int32 exregbits;
+EXTERN Bits externs;
+EXTERN Bits params;
+EXTERN Bits consts;
+EXTERN Bits addrs;
+EXTERN Bits ovar;
+EXTERN int change;
+EXTERN int32 maxnr;
+EXTERN int32* idom;
+
+EXTERN struct
+{
+ int32 ncvtreg;
+ int32 nspill;
+ int32 nreload;
+ int32 ndelmov;
+ int32 nvar;
+ int32 naddr;
+} ostats;
+
+/*
+ * reg.c
+ */
+Reg* rega(void);
+int rcmp(const void*, const void*);
+void regopt(Prog*);
+void addmove(Reg*, int, int, int);
+Bits mkvar(Reg*, Adr*);
+void prop(Reg*, Bits, Bits);
+void loopit(Reg*, int32);
+void synch(Reg*, Bits);
+uint32 allreg(uint32, Rgn*);
+void paint1(Reg*, int);
+uint32 paint2(Reg*, int);
+void paint3(Reg*, int, int32, int);
+void addreg(Adr*, int);
+void dumpone(Reg*);
+void dumpit(char*, Reg*);
+int noreturn(Prog *p);
+
+/*
+ * peep.c
+ */
+void peep(void);
+void excise(Reg*);
+Reg* uniqp(Reg*);
+Reg* uniqs(Reg*);
+int regtyp(Adr*);
+int anyvar(Adr*);
+int subprop(Reg*);
+int copyprop(Reg*);
+int copy1(Adr*, Adr*, Reg*, int);
+int copyu(Prog*, Adr*, Adr*);
+
+int copyas(Adr*, Adr*);
+int copyau(Adr*, Adr*);
+int copysub(Adr*, Adr*, Adr*, int);
+int copysub1(Prog*, Adr*, Adr*, int);
+
+int32 RtoB(int);
+int32 FtoB(int);
+int BtoR(int32);
+int BtoF(int32);
diff --git a/src/cmd/8g/peep.c b/src/cmd/8g/peep.c
new file mode 100644
index 000000000..5ad29e1b2
--- /dev/null
+++ b/src/cmd/8g/peep.c
@@ -0,0 +1,890 @@
+// Derived from Inferno utils/6c/peep.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/peep.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+#include "opt.h"
+
+#define REGEXT 0
+
+static void conprop(Reg *r);
+
+// do we need the carry bit
+static int
+needc(Prog *p)
+{
+ while(p != P) {
+ switch(p->as) {
+ case AADCL:
+ case ASBBL:
+ case ARCRL:
+ return 1;
+ case AADDL:
+ case ASUBL:
+ case AJMP:
+ case ARET:
+ case ACALL:
+ return 0;
+ default:
+ if(p->to.type == D_BRANCH)
+ return 0;
+ }
+ p = p->link;
+ }
+ return 0;
+}
+
+static Reg*
+rnops(Reg *r)
+{
+ Prog *p;
+ Reg *r1;
+
+ if(r != R)
+ for(;;) {
+ p = r->prog;
+ if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
+ break;
+ r1 = uniqs(r);
+ if(r1 == R)
+ break;
+ r = r1;
+ }
+ return r;
+}
+
+void
+peep(void)
+{
+ Reg *r, *r1, *r2;
+ Prog *p, *p1;
+ int t;
+
+ /*
+ * complete R structure
+ */
+ t = 0;
+ for(r=firstr; r!=R; r=r1) {
+ r1 = r->link;
+ if(r1 == R)
+ break;
+ p = r->prog->link;
+ while(p != r1->prog)
+ switch(p->as) {
+ default:
+ r2 = rega();
+ r->link = r2;
+ r2->link = r1;
+
+ r2->prog = p;
+ p->reg = r2;
+
+ r2->p1 = r;
+ r->s1 = r2;
+ r2->s1 = r1;
+ r1->p1 = r2;
+
+ r = r2;
+ t++;
+
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ p = p->link;
+ }
+ }
+
+ // movb elimination.
+ // movb is simulated by the linker
+ // when a register other than ax, bx, cx, dx
+ // is used, so rewrite to other instructions
+ // when possible. a movb into a register
+ // can smash the entire 32-bit register without
+ // causing any trouble.
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(p->as == AMOVB && regtyp(&p->to)) {
+ // movb into register.
+ // from another register or constant can be movl.
+ if(regtyp(&p->from) || p->from.type == D_CONST)
+ p->as = AMOVL;
+ else
+ p->as = AMOVBLZX;
+ }
+ }
+
+ // constant propagation
+ // find MOV $con,R followed by
+ // another MOV $con,R without
+ // setting R in the interim
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case ALEAL:
+ if(regtyp(&p->to))
+ if(p->from.sym != S)
+ conprop(r);
+ break;
+
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ if(regtyp(&p->to))
+ if(p->from.type == D_CONST)
+ conprop(r);
+ break;
+ }
+ }
+
+loop1:
+ if(debug['P'] && debug['v'])
+ dumpit("loop1", firstr);
+
+ t = 0;
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ switch(p->as) {
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ if(regtyp(&p->to))
+ if(regtyp(&p->from)) {
+ if(copyprop(r)) {
+ excise(r);
+ t++;
+ } else
+ if(subprop(r) && copyprop(r)) {
+ excise(r);
+ t++;
+ }
+ }
+ break;
+
+ case AMOVBLZX:
+ case AMOVWLZX:
+ case AMOVBLSX:
+ case AMOVWLSX:
+ if(regtyp(&p->to)) {
+ r1 = rnops(uniqs(r));
+ if(r1 != R) {
+ p1 = r1->prog;
+ if(p->as == p1->as && p->to.type == p1->from.type){
+ p1->as = AMOVL;
+ t++;
+ }
+ }
+ }
+ break;
+
+ case AADDB:
+ case AADDL:
+ case AADDW:
+ if(p->from.type != D_CONST || needc(p->link))
+ break;
+ if(p->from.offset == -1){
+ if(p->as == AADDL)
+ p->as = ADECL;
+ else
+ p->as = ADECW;
+ p->from = zprog.from;
+ break;
+ }
+ if(p->from.offset == 1){
+ if(p->as == AADDL)
+ p->as = AINCL;
+ else
+ p->as = AINCW;
+ p->from = zprog.from;
+ break;
+ }
+ break;
+
+ case ASUBB:
+ case ASUBL:
+ case ASUBW:
+ if(p->from.type != D_CONST || needc(p->link))
+ break;
+ if(p->from.offset == -1) {
+ if(p->as == ASUBL)
+ p->as = AINCL;
+ else
+ p->as = AINCW;
+ p->from = zprog.from;
+ break;
+ }
+ if(p->from.offset == 1){
+ if(p->as == ASUBL)
+ p->as = ADECL;
+ else
+ p->as = ADECW;
+ p->from = zprog.from;
+ break;
+ }
+ break;
+ }
+ }
+ if(t)
+ goto loop1;
+}
+
+void
+excise(Reg *r)
+{
+ Prog *p;
+
+ p = r->prog;
+ if(debug['P'] && debug['v'])
+ print("%P ===delete===\n", p);
+
+ p->as = ANOP;
+ p->from = zprog.from;
+ p->to = zprog.to;
+
+ ostats.ndelmov++;
+}
+
+Reg*
+uniqp(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->p1;
+ if(r1 == R) {
+ r1 = r->p2;
+ if(r1 == R || r1->p2link != R)
+ return R;
+ } else
+ if(r->p2 != R)
+ return R;
+ return r1;
+}
+
+Reg*
+uniqs(Reg *r)
+{
+ Reg *r1;
+
+ r1 = r->s1;
+ if(r1 == R) {
+ r1 = r->s2;
+ if(r1 == R)
+ return R;
+ } else
+ if(r->s2 != R)
+ return R;
+ return r1;
+}
+
+int
+regtyp(Adr *a)
+{
+ int t;
+
+ t = a->type;
+ if(t >= D_AX && t <= D_DI)
+ return 1;
+ return 0;
+}
+
+/*
+ * the idea is to substitute
+ * one register for another
+ * from one MOV to another
+ * MOV a, R0
+ * ADD b, R0 / no use of R1
+ * MOV R0, R1
+ * would be converted to
+ * MOV a, R1
+ * ADD b, R1
+ * MOV R1, R0
+ * hopefully, then the former or latter MOV
+ * will be eliminated by copy propagation.
+ */
+int
+subprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+ int t;
+
+ p = r0->prog;
+ v1 = &p->from;
+ if(!regtyp(v1))
+ return 0;
+ v2 = &p->to;
+ if(!regtyp(v2))
+ return 0;
+ for(r=uniqp(r0); r!=R; r=uniqp(r)) {
+ if(uniqs(r) == R)
+ break;
+ p = r->prog;
+ switch(p->as) {
+ case ACALL:
+ return 0;
+
+ case AIMULL:
+ case AIMULW:
+ if(p->to.type != D_NONE)
+ break;
+
+ case ADIVB:
+ case ADIVL:
+ case ADIVW:
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVW:
+ case AIMULB:
+ case AMULB:
+ case AMULL:
+ case AMULW:
+
+ case ARCLB:
+ case ARCLL:
+ case ARCLW:
+ case ARCRB:
+ case ARCRL:
+ case ARCRW:
+ case AROLB:
+ case AROLL:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORW:
+ case ASALB:
+ case ASALL:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRW:
+
+ case AREP:
+ case AREPN:
+
+ case ACWD:
+ case ACDQ:
+
+ case ASTOSB:
+ case ASTOSL:
+ case AMOVSB:
+ case AMOVSL:
+ return 0;
+
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ if(p->to.type == v1->type)
+ goto gotit;
+ break;
+ }
+ if(copyau(&p->from, v2) ||
+ copyau(&p->to, v2))
+ break;
+ if(copysub(&p->from, v1, v2, 0) ||
+ copysub(&p->to, v1, v2, 0))
+ break;
+ }
+ return 0;
+
+gotit:
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P']) {
+ print("gotit: %D->%D\n%P", v1, v2, r->prog);
+ if(p->from.type == v2->type)
+ print(" excise");
+ print("\n");
+ }
+ for(r=uniqs(r); r!=r0; r=uniqs(r)) {
+ p = r->prog;
+ copysub(&p->from, v1, v2, 1);
+ copysub(&p->to, v1, v2, 1);
+ if(debug['P'])
+ print("%P\n", r->prog);
+ }
+ t = v1->type;
+ v1->type = v2->type;
+ v2->type = t;
+ if(debug['P'])
+ print("%P last\n", r->prog);
+ return 1;
+}
+
+/*
+ * The idea is to remove redundant copies.
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * use v2 return fail
+ * -----------------
+ * v1->v2 F=0
+ * (use v2 s/v2/v1/)*
+ * set v1 F=1
+ * set v2 return success
+ */
+int
+copyprop(Reg *r0)
+{
+ Prog *p;
+ Adr *v1, *v2;
+ Reg *r;
+
+ p = r0->prog;
+ v1 = &p->from;
+ v2 = &p->to;
+ if(copyas(v1, v2))
+ return 1;
+ for(r=firstr; r!=R; r=r->link)
+ r->active = 0;
+ return copy1(v1, v2, r0->s1, 0);
+}
+
+int
+copy1(Adr *v1, Adr *v2, Reg *r, int f)
+{
+ int t;
+ Prog *p;
+
+ if(r->active) {
+ if(debug['P'])
+ print("act set; return 1\n");
+ return 1;
+ }
+ r->active = 1;
+ if(debug['P'])
+ print("copy %D->%D f=%d\n", v1, v2, f);
+ for(; r != R; r = r->s1) {
+ p = r->prog;
+ if(debug['P'])
+ print("%P", p);
+ if(!f && uniqp(r) == R) {
+ f = 1;
+ if(debug['P'])
+ print("; merge; f=%d", f);
+ }
+ t = copyu(p, v2, A);
+ switch(t) {
+ case 2: /* rar, cant split */
+ if(debug['P'])
+ print("; %D rar; return 0\n", v2);
+ return 0;
+
+ case 3: /* set */
+ if(debug['P'])
+ print("; %D set; return 1\n", v2);
+ return 1;
+
+ case 1: /* used, substitute */
+ case 4: /* use and set */
+ if(f) {
+ if(!debug['P'])
+ return 0;
+ if(t == 4)
+ print("; %D used+set and f=%d; return 0\n", v2, f);
+ else
+ print("; %D used and f=%d; return 0\n", v2, f);
+ return 0;
+ }
+ if(copyu(p, v2, v1)) {
+ if(debug['P'])
+ print("; sub fail; return 0\n");
+ return 0;
+ }
+ if(debug['P'])
+ print("; sub %D/%D", v2, v1);
+ if(t == 4) {
+ if(debug['P'])
+ print("; %D used+set; return 1\n", v2);
+ return 1;
+ }
+ break;
+ }
+ if(!f) {
+ t = copyu(p, v1, A);
+ if(!f && (t == 2 || t == 3 || t == 4)) {
+ f = 1;
+ if(debug['P'])
+ print("; %D set and !f; f=%d", v1, f);
+ }
+ }
+ if(debug['P'])
+ print("\n");
+ if(r->s2)
+ if(!copy1(v1, v2, r->s2, f))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * return
+ * 1 if v only used (and substitute),
+ * 2 if read-alter-rewrite
+ * 3 if set
+ * 4 if set and used
+ * 0 otherwise (not touched)
+ */
+int
+copyu(Prog *p, Adr *v, Adr *s)
+{
+
+ switch(p->as) {
+
+ default:
+ if(debug['P'])
+ print("unknown op %A\n", p->as);
+ /* SBBL; ADCL; FLD1; SAHF */
+ return 2;
+
+
+ case ANEGB:
+ case ANEGW:
+ case ANEGL:
+ case ANOTB:
+ case ANOTW:
+ case ANOTL:
+ if(copyas(&p->to, v))
+ return 2;
+ break;
+
+ case ALEAL: /* lhs addr, rhs store */
+ if(copyas(&p->from, v))
+ return 2;
+
+
+ case ANOP: /* rhs store */
+ case AMOVB:
+ case AMOVW:
+ case AMOVL:
+ case AMOVBLSX:
+ case AMOVBLZX:
+ case AMOVWLSX:
+ case AMOVWLZX:
+ if(copyas(&p->to, v)) {
+ if(s != A)
+ return copysub(&p->from, v, s, 1);
+ if(copyau(&p->from, v))
+ return 4;
+ return 3;
+ }
+ goto caseread;
+
+ case ARCLB:
+ case ARCLL:
+ case ARCLW:
+ case ARCRB:
+ case ARCRL:
+ case ARCRW:
+ case AROLB:
+ case AROLL:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORW:
+ case ASALB:
+ case ASALL:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRW:
+ if(copyas(&p->to, v))
+ return 2;
+ if(copyas(&p->from, v))
+ if(p->from.type == D_CX)
+ return 2;
+ goto caseread;
+
+ case AADDB: /* rhs rar */
+ case AADDL:
+ case AADDW:
+ case AANDB:
+ case AANDL:
+ case AANDW:
+ case ADECL:
+ case ADECW:
+ case AINCL:
+ case AINCW:
+ case ASUBB:
+ case ASUBL:
+ case ASUBW:
+ case AORB:
+ case AORL:
+ case AORW:
+ case AXORB:
+ case AXORL:
+ case AXORW:
+ if(copyas(&p->to, v))
+ return 2;
+ goto caseread;
+
+ case ACMPL: /* read only */
+ case ACMPW:
+ case ACMPB:
+ caseread:
+ if(s != A) {
+ if(copysub(&p->from, v, s, 1))
+ return 1;
+ return copysub(&p->to, v, s, 1);
+ }
+ if(copyau(&p->from, v))
+ return 1;
+ if(copyau(&p->to, v))
+ return 1;
+ break;
+
+ case AJGE: /* no reference */
+ case AJNE:
+ case AJLE:
+ case AJEQ:
+ case AJHI:
+ case AJLS:
+ case AJMI:
+ case AJPL:
+ case AJGT:
+ case AJLT:
+ case AJCC:
+ case AJCS:
+
+ case AADJSP:
+ case AWAIT:
+ case ACLD:
+ break;
+
+ case AIMULL:
+ case AIMULW:
+ if(p->to.type != D_NONE) {
+ if(copyas(&p->to, v))
+ return 2;
+ goto caseread;
+ }
+
+ case ADIVB:
+ case ADIVL:
+ case ADIVW:
+ case AIDIVB:
+ case AIDIVL:
+ case AIDIVW:
+ case AIMULB:
+ case AMULB:
+ case AMULL:
+ case AMULW:
+
+ case ACWD:
+ case ACDQ:
+ if(v->type == D_AX || v->type == D_DX)
+ return 2;
+ goto caseread;
+
+ case AREP:
+ case AREPN:
+ if(v->type == D_CX)
+ return 2;
+ goto caseread;
+
+ case AMOVSB:
+ case AMOVSL:
+ if(v->type == D_DI || v->type == D_SI)
+ return 2;
+ goto caseread;
+
+ case ASTOSB:
+ case ASTOSL:
+ if(v->type == D_AX || v->type == D_DI)
+ return 2;
+ goto caseread;
+
+ case AJMP: /* funny */
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 1;
+ return 0;
+
+ case ARET: /* funny */
+ if(v->type == REGRET || v->type == FREGRET)
+ return 2;
+ if(s != A)
+ return 1;
+ return 3;
+
+ case ACALL: /* funny */
+ if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
+ return 2;
+ if(REGARG >= 0 && v->type == (uchar)REGARG)
+ return 2;
+
+ if(s != A) {
+ if(copysub(&p->to, v, s, 1))
+ return 1;
+ return 0;
+ }
+ if(copyau(&p->to, v))
+ return 4;
+ return 3;
+
+ case ATEXT: /* funny */
+ if(REGARG >= 0 && v->type == (uchar)REGARG)
+ return 3;
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * direct reference,
+ * could be set/use depending on
+ * semantics
+ */
+int
+copyas(Adr *a, Adr *v)
+{
+ if(a->type != v->type)
+ return 0;
+ if(regtyp(v))
+ return 1;
+ if(v->type == D_AUTO || v->type == D_PARAM)
+ if(v->offset == a->offset)
+ return 1;
+ return 0;
+}
+
+/*
+ * either direct or indirect
+ */
+int
+copyau(Adr *a, Adr *v)
+{
+
+ if(copyas(a, v))
+ return 1;
+ if(regtyp(v)) {
+ if(a->type-D_INDIR == v->type)
+ return 1;
+ if(a->index == v->type)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * substitute s for v in a
+ * return failure to substitute
+ */
+int
+copysub(Adr *a, Adr *v, Adr *s, int f)
+{
+ int t;
+
+ if(copyas(a, v)) {
+ t = s->type;
+ if(t >= D_AX && t <= D_DI) {
+ if(f)
+ a->type = t;
+ }
+ return 0;
+ }
+ if(regtyp(v)) {
+ t = v->type;
+ if(a->type == t+D_INDIR) {
+ if((s->type == D_BP) && a->index != D_NONE)
+ return 1; /* can't use BP-base with index */
+ if(f)
+ a->type = s->type+D_INDIR;
+// return 0;
+ }
+ if(a->index == t) {
+ if(f)
+ a->index = s->type;
+ return 0;
+ }
+ return 0;
+ }
+ return 0;
+}
+
+static void
+conprop(Reg *r0)
+{
+ Reg *r;
+ Prog *p, *p0;
+ int t;
+ Adr *v0;
+
+ p0 = r0->prog;
+ v0 = &p0->to;
+ r = r0;
+
+loop:
+ r = uniqs(r);
+ if(r == R || r == r0)
+ return;
+ if(uniqp(r) == R)
+ return;
+
+ p = r->prog;
+ t = copyu(p, v0, A);
+ switch(t) {
+ case 0: // miss
+ case 1: // use
+ goto loop;
+
+ case 2: // rar
+ case 4: // use and set
+ break;
+
+ case 3: // set
+ if(p->as == p0->as)
+ if(p->from.type == p0->from.type)
+ if(p->from.sym == p0->from.sym)
+ if(p->from.offset == p0->from.offset)
+ if(p->from.scale == p0->from.scale)
+ if(p->from.dval == p0->from.dval)
+ if(p->from.index == p0->from.index) {
+ excise(r);
+ t++;
+ goto loop;
+ }
+ break;
+ }
+}
diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c
new file mode 100644
index 000000000..2b878f62a
--- /dev/null
+++ b/src/cmd/8g/reg.c
@@ -0,0 +1,1544 @@
+// Derived from Inferno utils/6c/reg.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "gg.h"
+#undef EXTERN
+#define EXTERN
+#include "opt.h"
+
+#define NREGVAR 8
+#define REGBITS ((uint32)0xff)
+#define P2R(p) (Reg*)(p->reg)
+
+static int first = 1;
+
+Reg*
+rega(void)
+{
+ Reg *r;
+
+ r = freer;
+ if(r == R) {
+ r = mal(sizeof(*r));
+ } else
+ freer = r->link;
+
+ *r = zreg;
+ return r;
+}
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+static void
+setoutvar(void)
+{
+ Type *t;
+ Node *n;
+ Addr a;
+ Iter save;
+ Bits bit;
+ int z;
+
+ t = structfirst(&save, getoutarg(curfn->type));
+ while(t != T) {
+ n = nodarg(t, 1);
+ a = zprog.from;
+ naddr(n, &a, 0);
+ bit = mkvar(R, &a);
+ for(z=0; z<BITS; z++)
+ ovar.b[z] |= bit.b[z];
+ t = structnext(&save);
+ }
+//if(bany(b))
+//print("ovars = %Q\n", &ovar);
+}
+
+static void
+setaddrs(Bits bit)
+{
+ int i, n;
+ Var *v;
+ Sym *s;
+
+ while(bany(&bit)) {
+ // convert each bit to a variable
+ i = bnum(bit);
+ s = var[i].sym;
+ n = var[i].name;
+ bit.b[i/32] &= ~(1L<<(i%32));
+
+ // disable all pieces of that variable
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->sym == s && v->name == n)
+ v->addr = 2;
+ }
+ }
+}
+
+static char* regname[] = { ".ax", ".cx", ".dx", ".bx", ".sp", ".bp", ".si", ".di" };
+
+void
+regopt(Prog *firstp)
+{
+ Reg *r, *r1;
+ Prog *p;
+ int i, z, nr;
+ uint32 vreg;
+ Bits bit;
+
+ if(first) {
+ fmtinstall('Q', Qconv);
+ exregoffset = D_DI; // no externals
+ first = 0;
+ }
+
+ // count instructions
+ nr = 0;
+ for(p=firstp; p!=P; p=p->link)
+ nr++;
+ // if too big dont bother
+ if(nr >= 10000) {
+// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
+ return;
+ }
+
+ r1 = R;
+ firstr = R;
+ lastr = R;
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++)
+ var[i].sym = lookup(regname[i]);
+
+ regbits = RtoB(D_SP);
+ for(z=0; z<BITS; z++) {
+ externs.b[z] = 0;
+ params.b[z] = 0;
+ consts.b[z] = 0;
+ addrs.b[z] = 0;
+ ovar.b[z] = 0;
+ }
+
+ // build list of return variables
+ setoutvar();
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ nr = 0;
+ for(p=firstp; p!=P; p=p->link) {
+ switch(p->as) {
+ case ADATA:
+ case AGLOBL:
+ case ANAME:
+ case ASIGNAME:
+ continue;
+ }
+ r = rega();
+ nr++;
+ if(firstr == R) {
+ firstr = r;
+ lastr = r;
+ } else {
+ lastr->link = r;
+ r->p1 = lastr;
+ lastr->s1 = r;
+ lastr = r;
+ }
+ r->prog = p;
+ p->reg = r;
+
+ r1 = r->p1;
+ if(r1 != R) {
+ switch(r1->prog->as) {
+ case ARET:
+ case AJMP:
+ case AIRETL:
+ r->p1 = R;
+ r1->s1 = R;
+ }
+ }
+
+ bit = mkvar(r, &p->from);
+ if(bany(&bit))
+ switch(p->as) {
+ /*
+ * funny
+ */
+ case ALEAL:
+ setaddrs(bit);
+ break;
+
+ /*
+ * left side read
+ */
+ default:
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * left side read+write
+ */
+ case AXCHGB:
+ case AXCHGW:
+ case AXCHGL:
+ for(z=0; z<BITS; z++) {
+ r->use1.b[z] |= bit.b[z];
+ r->set.b[z] |= bit.b[z];
+ }
+ break;
+ }
+
+ bit = mkvar(r, &p->to);
+ if(bany(&bit))
+ switch(p->as) {
+ default:
+ yyerror("reg: unknown op: %A", p->as);
+ break;
+
+ /*
+ * right side read
+ */
+ case ACMPB:
+ case ACMPL:
+ case ACMPW:
+ case ATESTB:
+ case ATESTL:
+ case ATESTW:
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * right side write
+ */
+ case AFSTSW:
+ case ALEAL:
+ case ANOP:
+ case AMOVL:
+ case AMOVB:
+ case AMOVW:
+ case AMOVBLSX:
+ case AMOVBLZX:
+ case AMOVBWSX:
+ case AMOVBWZX:
+ case AMOVWLSX:
+ case AMOVWLZX:
+ case APOPL:
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ break;
+
+ /*
+ * right side read+write
+ */
+ case AINCB:
+ case AINCL:
+ case AINCW:
+ case ADECB:
+ case ADECL:
+ case ADECW:
+
+ case AADDB:
+ case AADDL:
+ case AADDW:
+ case AANDB:
+ case AANDL:
+ case AANDW:
+ case ASUBB:
+ case ASUBL:
+ case ASUBW:
+ case AORB:
+ case AORL:
+ case AORW:
+ case AXORB:
+ case AXORL:
+ case AXORW:
+ case ASALB:
+ case ASALL:
+ case ASALW:
+ case ASARB:
+ case ASARL:
+ case ASARW:
+ case ARCLB:
+ case ARCLL:
+ case ARCLW:
+ case ARCRB:
+ case ARCRL:
+ case ARCRW:
+ case AROLB:
+ case AROLL:
+ case AROLW:
+ case ARORB:
+ case ARORL:
+ case ARORW:
+ case ASHLB:
+ case ASHLL:
+ case ASHLW:
+ case ASHRB:
+ case ASHRL:
+ case ASHRW:
+ case AIMULL:
+ case AIMULW:
+ case ANEGB:
+ case ANEGL:
+ case ANEGW:
+ case ANOTB:
+ case ANOTL:
+ case ANOTW:
+ case AADCL:
+ case ASBBL:
+
+ case ASETCC:
+ case ASETCS:
+ case ASETEQ:
+ case ASETGE:
+ case ASETGT:
+ case ASETHI:
+ case ASETLE:
+ case ASETLS:
+ case ASETLT:
+ case ASETMI:
+ case ASETNE:
+ case ASETOC:
+ case ASETOS:
+ case ASETPC:
+ case ASETPL:
+ case ASETPS:
+
+ case AXCHGB:
+ case AXCHGW:
+ case AXCHGL:
+ for(z=0; z<BITS; z++) {
+ r->set.b[z] |= bit.b[z];
+ r->use2.b[z] |= bit.b[z];
+ }
+ break;
+
+ /*
+ * funny
+ */
+ case AFMOVDP:
+ case AFMOVFP:
+ case AFMOVLP:
+ case AFMOVVP:
+ case AFMOVWP:
+ case ACALL:
+ setaddrs(bit);
+ break;
+ }
+
+ switch(p->as) {
+ case AIMULL:
+ case AIMULW:
+ if(p->to.type != D_NONE)
+ break;
+
+ case AIDIVL:
+ case AIDIVW:
+ case ADIVL:
+ case ADIVW:
+ case AMULL:
+ case AMULW:
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ break;
+
+ case AIDIVB:
+ case AIMULB:
+ case ADIVB:
+ case AMULB:
+ r->set.b[0] |= RtoB(D_AX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
+
+ case ACWD:
+ r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
+
+ case ACDQ:
+ r->set.b[0] |= RtoB(D_DX);
+ r->use1.b[0] |= RtoB(D_AX);
+ break;
+
+ case AREP:
+ case AREPN:
+ case ALOOP:
+ case ALOOPEQ:
+ case ALOOPNE:
+ r->set.b[0] |= RtoB(D_CX);
+ r->use1.b[0] |= RtoB(D_CX);
+ break;
+
+ case AMOVSB:
+ case AMOVSL:
+ case AMOVSW:
+ case ACMPSB:
+ case ACMPSL:
+ case ACMPSW:
+ r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI);
+ break;
+
+ case ASTOSB:
+ case ASTOSL:
+ case ASTOSW:
+ case ASCASB:
+ case ASCASL:
+ case ASCASW:
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI);
+ break;
+
+ case AINSB:
+ case AINSL:
+ case AINSW:
+ r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DI);
+ break;
+
+ case AOUTSB:
+ case AOUTSL:
+ case AOUTSW:
+ r->set.b[0] |= RtoB(D_DI);
+ r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI);
+ break;
+ }
+ }
+ if(firstr == R)
+ return;
+
+ for(i=0; i<nvar; i++) {
+ Var *v = var+i;
+ if(v->addr) {
+ bit = blsh(i);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ }
+
+// print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n",
+// i, v->addr, v->etype, v->width, v->sym, v->offset);
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass1", firstr);
+
+ /*
+ * pass 2
+ * turn branch references to pointers
+ * build back pointers
+ */
+ for(r=firstr; r!=R; r=r->link) {
+ p = r->prog;
+ if(p->to.type == D_BRANCH) {
+ if(p->to.branch == P)
+ fatal("pnil %P", p);
+ r1 = p->to.branch->reg;
+ if(r1 == R)
+ fatal("rnil %P", p);
+ if(r1 == r) {
+ //fatal("ref to self %P", p);
+ continue;
+ }
+ r->s2 = r1;
+ r->p2link = r1->p2;
+ r1->p2 = r;
+ }
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass2", firstr);
+
+ /*
+ * pass 2.5
+ * find looping structure
+ */
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ change = 0;
+ loopit(firstr, nr);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass2.5", firstr);
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ for(r = firstr; r != R; r = r->link)
+ if(r->prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = r->link;
+ if(r1 && r1->active && !r->active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass3", firstr);
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = r->link)
+ r->active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4", firstr);
+
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = r->link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit) && !r->refset) {
+ // should never happen - all variables are preset
+ if(debug['w'])
+ print("%L: used and not set: %Q\n", r->prog->lineno, bit);
+ r->refset = 1;
+ }
+ }
+ for(r = firstr; r != R; r = r->link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = r->link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit) && !r->refset) {
+ if(debug['w'])
+ print("%L: set and not used: %Q\n", r->prog->lineno, bit);
+ r->refset = 1;
+ excise(r);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ paint1(r, i);
+ bit.b[i/32] &= ~(1L<<(i%32));
+ if(change <= 0)
+ continue;
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ if(debug['R'] && debug['v'])
+ print("too many regions\n");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ for(i=0; i<nregion; i++) {
+ bit = blsh(rgp->varno);
+ vreg = paint2(rgp->enter, rgp->varno);
+ vreg = allreg(vreg, rgp);
+ if(rgp->regno != 0)
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ rgp++;
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass6", firstr);
+
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(!debug['R'] || debug['P']) {
+ peep();
+ }
+
+ /*
+ * eliminate nops
+ * free aux structures
+ */
+ for(p=firstp; p!=P; p=p->link) {
+ while(p->link != P && p->link->as == ANOP)
+ p->link = p->link->link;
+ if(p->to.type == D_BRANCH)
+ while(p->to.branch != P && p->to.branch->as == ANOP)
+ p->to.branch = p->to.branch->link;
+ }
+
+ if(r1 != R) {
+ r1->link = freer;
+ freer = firstr;
+ }
+
+ if(debug['R']) {
+ if(ostats.ncvtreg ||
+ ostats.nspill ||
+ ostats.nreload ||
+ ostats.ndelmov ||
+ ostats.nvar ||
+ ostats.naddr ||
+ 0)
+ print("\nstats\n");
+
+ if(ostats.ncvtreg)
+ print(" %4d cvtreg\n", ostats.ncvtreg);
+ if(ostats.nspill)
+ print(" %4d spill\n", ostats.nspill);
+ if(ostats.nreload)
+ print(" %4d reload\n", ostats.nreload);
+ if(ostats.ndelmov)
+ print(" %4d delmov\n", ostats.ndelmov);
+ if(ostats.nvar)
+ print(" %4d delmov\n", ostats.nvar);
+ if(ostats.naddr)
+ print(" %4d delmov\n", ostats.naddr);
+
+ memset(&ostats, 0, sizeof(ostats));
+ }
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ clearp(p1);
+ p1->loc = 9999;
+
+ p = r->prog;
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->sym = v->sym;
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = v->name;
+ a->gotype = v->gotype;
+ a->node = v->node;
+
+ // need to clean this up with wptr and
+ // some of the defaults
+ p1->as = AMOVL;
+ switch(v->etype) {
+ default:
+ fatal("unknown type\n");
+ case TINT8:
+ case TUINT8:
+ case TBOOL:
+ p1->as = AMOVB;
+ break;
+ case TINT16:
+ case TUINT16:
+ p1->as = AMOVW;
+ break;
+ case TINT:
+ case TUINT:
+ case TINT32:
+ case TUINT32:
+ case TPTR32:
+ break;
+ }
+
+ p1->from.type = rn;
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = rn;
+ if(v->etype == TUINT8)
+ p1->as = AMOVB;
+ if(v->etype == TUINT16)
+ p1->as = AMOVW;
+ }
+ if(debug['R'] && debug['v'])
+ print("%P ===add=== %P\n", p, p1);
+ ostats.nspill++;
+}
+
+uint32
+doregbits(int r)
+{
+ uint32 b;
+
+ b = 0;
+ if(r >= D_INDIR)
+ r -= D_INDIR;
+ if(r >= D_AX && r <= D_DI)
+ b |= RtoB(r);
+ else
+ if(r >= D_AL && r <= D_BL)
+ b |= RtoB(r-D_AL+D_AX);
+ else
+ if(r >= D_AH && r <= D_BH)
+ b |= RtoB(r-D_AH+D_AX);
+ return b;
+}
+
+static int
+overlap(int32 o1, int w1, int32 o2, int w2)
+{
+ int32 t1, t2;
+
+ t1 = o1+w1;
+ t2 = o2+w2;
+
+ if(!(t1 > o2 && t2 > o1))
+ return 0;
+
+ return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+ Var *v;
+ int i, t, n, et, z, w, flag, regu;
+ int32 o;
+ Bits bit;
+ Sym *s;
+
+ /*
+ * mark registers used
+ */
+ t = a->type;
+ if(t == D_NONE)
+ goto none;
+
+ if(r != R)
+ r->use1.b[0] |= doregbits(a->index);
+
+ switch(t) {
+ default:
+ regu = doregbits(t);
+ if(regu == 0)
+ goto none;
+ bit = zbits;
+ bit.b[0] = regu;
+ return bit;
+
+ case D_ADDR:
+ a->type = a->index;
+ bit = mkvar(r, a);
+ setaddrs(bit);
+ a->type = t;
+ ostats.naddr++;
+ goto none;
+
+ case D_EXTERN:
+ case D_STATIC:
+ case D_PARAM:
+ case D_AUTO:
+ n = t;
+ break;
+ }
+
+ s = a->sym;
+ if(s == S)
+ goto none;
+ if(s->name[0] == '.')
+ goto none;
+ et = a->etype;
+ o = a->offset;
+ w = a->width;
+
+ flag = 0;
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->sym == s && v->name == n) {
+ if(v->offset == o)
+ if(v->etype == et)
+ if(v->width == w)
+ return blsh(i);
+
+ // if they overlap, disable both
+ if(overlap(v->offset, v->width, o, w)) {
+ if(debug['R'])
+ print("disable %s\n", v->sym->name);
+ v->addr = 1;
+ flag = 1;
+ }
+ }
+ }
+
+ switch(et) {
+ case 0:
+ case TFUNC:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && s)
+ fatal("variable not optimized: %D", a);
+ goto none;
+ }
+
+ i = nvar;
+ nvar++;
+ v = var+i;
+ v->sym = s;
+ v->offset = o;
+ v->name = n;
+ v->gotype = a->gotype;
+ v->etype = et;
+ v->width = w;
+ v->addr = flag; // funny punning
+ v->node = a->node;
+
+ if(debug['R'])
+ print("bit=%2d et=%2d w=%d+%d %S %D flag=%d\n", i, et, o, w, s, a, v->addr);
+ ostats.nvar++;
+
+ bit = blsh(i);
+ if(n == D_EXTERN || n == D_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == D_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->prog->as) {
+ case ACALL:
+ if(noreturn(r1->prog))
+ break;
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z] | ovar.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ }
+ for(; r != r1; r = r->p1)
+ for(r2 = r->p2; r2 != R; r2 = r2->p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+/*
+ * find looping structure
+ *
+ * 1) find reverse postordering
+ * 2) find approximate dominators,
+ * the actual dominators if the flow graph is reducible
+ * otherwise, dominators plus some other non-dominators.
+ * See Matthew S. Hecht and Jeffrey D. Ullman,
+ * "Analysis of a Simple Algorithm for Global Data Flow Problems",
+ * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
+ * Oct. 1-3, 1973, pp. 207-217.
+ * 3) find all nodes with a predecessor dominated by the current node.
+ * such a node is a loop head.
+ * recursively, all preds with a greater rpo number are in the loop
+ */
+int32
+postorder(Reg *r, Reg **rpo2r, int32 n)
+{
+ Reg *r1;
+
+ r->rpo = 1;
+ r1 = r->s1;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ r1 = r->s2;
+ if(r1 && !r1->rpo)
+ n = postorder(r1, rpo2r, n);
+ rpo2r[n] = r;
+ n++;
+ return n;
+}
+
+int32
+rpolca(int32 *idom, int32 rpo1, int32 rpo2)
+{
+ int32 t;
+
+ if(rpo1 == -1)
+ return rpo2;
+ while(rpo1 != rpo2){
+ if(rpo1 > rpo2){
+ t = rpo2;
+ rpo2 = rpo1;
+ rpo1 = t;
+ }
+ while(rpo1 < rpo2){
+ t = idom[rpo2];
+ if(t >= rpo2)
+ fatal("bad idom");
+ rpo2 = t;
+ }
+ }
+ return rpo1;
+}
+
+int
+doms(int32 *idom, int32 r, int32 s)
+{
+ while(s > r)
+ s = idom[s];
+ return s == r;
+}
+
+int
+loophead(int32 *idom, Reg *r)
+{
+ int32 src;
+
+ src = r->rpo;
+ if(r->p1 != R && doms(idom, src, r->p1->rpo))
+ return 1;
+ for(r = r->p2; r != R; r = r->p2link)
+ if(doms(idom, src, r->rpo))
+ return 1;
+ return 0;
+}
+
+void
+loopmark(Reg **rpo2r, int32 head, Reg *r)
+{
+ if(r->rpo < head || r->active == head)
+ return;
+ r->active = head;
+ r->loop += LOOP;
+ if(r->p1 != R)
+ loopmark(rpo2r, head, r->p1);
+ for(r = r->p2; r != R; r = r->p2link)
+ loopmark(rpo2r, head, r);
+}
+
+void
+loopit(Reg *r, int32 nr)
+{
+ Reg *r1;
+ int32 i, d, me;
+
+ if(nr > maxnr) {
+ rpo2r = mal(nr * sizeof(Reg*));
+ idom = mal(nr * sizeof(int32));
+ maxnr = nr;
+ }
+
+ d = postorder(r, rpo2r, 0);
+ if(d > nr)
+ fatal("too many reg nodes %d %d", d, nr);
+ nr = d;
+ for(i = 0; i < nr / 2; i++) {
+ r1 = rpo2r[i];
+ rpo2r[i] = rpo2r[nr - 1 - i];
+ rpo2r[nr - 1 - i] = r1;
+ }
+ for(i = 0; i < nr; i++)
+ rpo2r[i]->rpo = i;
+
+ idom[0] = 0;
+ for(i = 0; i < nr; i++) {
+ r1 = rpo2r[i];
+ me = r1->rpo;
+ d = -1;
+ if(r1->p1 != R && r1->p1->rpo < me)
+ d = r1->p1->rpo;
+ for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
+ if(r1->rpo < me)
+ d = rpolca(idom, d, r1->rpo);
+ idom[i] = d;
+ }
+
+ for(i = 0; i < nr; i++) {
+ r1 = rpo2r[i];
+ r1->loop++;
+ if(r1->p2 != R && loophead(idom, r1))
+ loopmark(rpo2r, i, r1);
+ }
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = r1->s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->active)
+ break;
+ r1->active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->s2 != R)
+ synch(r1->s2, dif);
+ }
+}
+
+uint32
+allreg(uint32 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%E", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ i = BtoR(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L<<(bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->loop;
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ change += CREF * r->loop;
+ if(p->as == AFMOVL || p->as == AFMOVW)
+ if(BtoR(bb) != D_F0)
+ change = -CINF;
+ }
+
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ change += CREF * r->loop;
+ if(p->as == AFMOVL || p->as == AFMOVW)
+ if(BtoR(bb) != D_F0)
+ change = -CINF;
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->loop;
+ if(p->as == AFMOVL || p->as == AFMOVW)
+ if(BtoR(bb) != D_F0)
+ change = -CINF;
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint32
+regset(Reg *r, uint32 bb)
+{
+ uint32 b, set;
+ Adr v;
+ int c;
+
+ set = 0;
+ v = zprog.from;
+ while(b = bb & ~(bb-1)) {
+ v.type = BtoR(b);
+ c = copyu(r->prog, &v, A);
+ if(c == 3)
+ set |= b;
+ bb &= ~b;
+ }
+ return set;
+}
+
+uint32
+reguse(Reg *r, uint32 bb)
+{
+ uint32 b, set;
+ Adr v;
+ int c;
+
+ set = 0;
+ v = zprog.from;
+ while(b = bb & ~(bb-1)) {
+ v.type = BtoR(b);
+ c = copyu(r->prog, &v, A);
+ if(c == 1 || c == 2 || c == 4)
+ set |= b;
+ bb &= ~b;
+ }
+ return set;
+}
+
+uint32
+paint2(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint32 bb, vreg, x;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+
+ bb = vreg;
+ for(; r; r=r->s1) {
+ x = r->regu & ~bb;
+ if(x) {
+ vreg |= reguse(r, x);
+ bb |= regset(r, x);
+ }
+ }
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, int32 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint32 bb;
+
+ z = bn/32;
+ bb = 1L << (bn%32);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = r->p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = r->p2; r1 != R; r1 = r1->p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = r->s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = r->s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+
+ a->sym = 0;
+ a->offset = 0;
+ a->type = rn;
+
+ ostats.ncvtreg++;
+}
+
+int32
+RtoB(int r)
+{
+
+ if(r < D_AX || r > D_DI)
+ return 0;
+ return 1L << (r-D_AX);
+}
+
+int
+BtoR(int32 b)
+{
+
+ b &= 0xffL;
+ if(b == 0)
+ return 0;
+ return bitno(b) + D_AX;
+}
+
+void
+dumpone(Reg *r)
+{
+ int z;
+ Bits bit;
+
+ print("%d:%P", r->loop, r->prog);
+ for(z=0; z<BITS; z++)
+ bit.b[z] =
+ r->set.b[z] |
+ r->use1.b[z] |
+ r->use2.b[z] |
+ r->refbehind.b[z] |
+ r->refahead.b[z] |
+ r->calbehind.b[z] |
+ r->calahead.b[z] |
+ r->regdiff.b[z] |
+ r->act.b[z] |
+ 0;
+ if(bany(&bit)) {
+ print("\t");
+ if(bany(&r->set))
+ print(" s:%Q", r->set);
+ if(bany(&r->use1))
+ print(" u1:%Q", r->use1);
+ if(bany(&r->use2))
+ print(" u2:%Q", r->use2);
+ if(bany(&r->refbehind))
+ print(" rb:%Q ", r->refbehind);
+ if(bany(&r->refahead))
+ print(" ra:%Q ", r->refahead);
+ if(bany(&r->calbehind))
+ print("cb:%Q ", r->calbehind);
+ if(bany(&r->calahead))
+ print(" ca:%Q ", r->calahead);
+ if(bany(&r->regdiff))
+ print(" d:%Q ", r->regdiff);
+ if(bany(&r->act))
+ print(" a:%Q ", r->act);
+ }
+ print("\n");
+}
+
+void
+dumpit(char *str, Reg *r0)
+{
+ Reg *r, *r1;
+
+ print("\n%s\n", str);
+ for(r = r0; r != R; r = r->link) {
+ dumpone(r);
+ r1 = r->p2;
+ if(r1 != R) {
+ print(" pred:");
+ for(; r1 != R; r1 = r1->p2link)
+ print(" %.4ud", r1->prog->loc);
+ print("\n");
+ }
+// r1 = r->s1;
+// if(r1 != R) {
+// print(" succ:");
+// for(; r1 != R; r1 = r1->s1)
+// print(" %.4ud", r1->prog->loc);
+// print("\n");
+// }
+ }
+}
+
+static Sym* symlist[10];
+
+int
+noreturn(Prog *p)
+{
+ Sym *s;
+ int i;
+
+ if(symlist[0] == S) {
+ symlist[0] = pkglookup("panicindex", runtimepkg);
+ symlist[1] = pkglookup("panicslice", runtimepkg);
+ symlist[2] = pkglookup("throwinit", runtimepkg);
+ symlist[3] = pkglookup("panic", runtimepkg);
+ symlist[4] = pkglookup("panicwrap", runtimepkg);
+ }
+
+ s = p->to.sym;
+ if(s == S)
+ return 0;
+ for(i=0; symlist[i]!=S; i++)
+ if(s == symlist[i])
+ return 1;
+ return 0;
+}