diff options
Diffstat (limited to 'src/cmd/6g')
-rw-r--r-- | src/cmd/6g/cgen.c | 746 | ||||
-rw-r--r-- | src/cmd/6g/doc.go | 4 | ||||
-rw-r--r-- | src/cmd/6g/galign.c | 5 | ||||
-rw-r--r-- | src/cmd/6g/gg.h | 36 | ||||
-rw-r--r-- | src/cmd/6g/ggen.c | 804 | ||||
-rw-r--r-- | src/cmd/6g/gobj.c | 30 | ||||
-rw-r--r-- | src/cmd/6g/gsubr.c | 297 | ||||
-rw-r--r-- | src/cmd/6g/list.c | 13 | ||||
-rw-r--r-- | src/cmd/6g/opt.h | 22 | ||||
-rw-r--r-- | src/cmd/6g/peep.c | 368 | ||||
-rw-r--r-- | src/cmd/6g/reg.c | 100 |
11 files changed, 1389 insertions, 1036 deletions
diff --git a/src/cmd/6g/cgen.c b/src/cmd/6g/cgen.c index 00334e71b..a51c0ca58 100644 --- a/src/cmd/6g/cgen.c +++ b/src/cmd/6g/cgen.c @@ -33,9 +33,26 @@ cgen(Node *n, Node *res) while(n->op == OCONVNOP) n = n->left; - // inline slices - if(cgen_inline(n, res)) + switch(n->op) { + case OSLICE: + case OSLICEARR: + case OSLICESTR: + if (res->op != ONAME || !res->addable) { + tempname(&n1, n->type); + cgen_slice(n, &n1); + cgen(&n1, res); + } else + cgen_slice(n, res); goto ret; + case OEFACE: + if (res->op != ONAME || !res->addable) { + tempname(&n1, n->type); + cgen_eface(n, &n1); + cgen(&n1, res); + } else + cgen_eface(n, res); + goto ret; + } if(n->ullman >= UINF) { if(n->op == OINDREG) @@ -174,7 +191,7 @@ cgen(Node *n, Node *res) switch(n->op) { default: dump("cgen", n); - fatal("cgen: unknown op %N", n); + fatal("cgen: unknown op %+hN", n); break; // these call bgen to get a bool value @@ -187,12 +204,12 @@ cgen(Node *n, Node *res) case OGE: case OGT: case ONOT: - p1 = gbranch(AJMP, T); + p1 = gbranch(AJMP, T, 0); p2 = pc; gmove(nodbool(1), res); - p3 = gbranch(AJMP, T); + p3 = gbranch(AJMP, T, 0); patch(p1, pc); - bgen(n, 1, p2); + bgen(n, 1, 0, p2); gmove(nodbool(0), res); patch(p3, pc); goto ret; @@ -229,17 +246,41 @@ cgen(Node *n, Node *res) case OADD: case OMUL: a = optoas(n->op, nl->type); - if(a != AIMULB) - goto sbop; - cgen_bmul(n->op, nl, nr, res); - break; + if(a == AIMULB) { + cgen_bmul(n->op, nl, nr, res); + break; + } + goto sbop; // asymmetric binary case OSUB: a = optoas(n->op, nl->type); goto abop; + case OHMUL: + cgen_hmul(nl, nr, res); + break; + case OCONV: + if(n->type->width > nl->type->width) { + // If loading from memory, do conversion during load, + // so as to avoid use of 8-bit register in, say, int(*byteptr). + switch(nl->op) { + case ODOT: + case ODOTPTR: + case OINDEX: + case OIND: + case ONAME: + igen(nl, &n1, res); + regalloc(&n2, n->type, res); + gmove(&n1, &n2); + gmove(&n2, res); + regfree(&n2); + regfree(&n1); + goto ret; + } + } + regalloc(&n1, nl->type, res); regalloc(&n2, n->type, &n1); cgen(nl, &n1); @@ -273,18 +314,18 @@ cgen(Node *n, Node *res) case OLEN: if(istype(nl->type, TMAP) || istype(nl->type, TCHAN)) { - // map and chan have len in the first 32-bit word. + // map and chan have len in the first int-sized word. // a zero pointer means zero length regalloc(&n1, types[tptr], res); cgen(nl, &n1); nodconst(&n2, types[tptr], 0); gins(optoas(OCMP, types[tptr]), &n1, &n2); - p1 = gbranch(optoas(OEQ, types[tptr]), T); + p1 = gbranch(optoas(OEQ, types[tptr]), T, 0); n2 = n1; n2.op = OINDREG; - n2.type = types[TINT32]; + n2.type = types[simtype[TINT]]; gmove(&n2, &n1); patch(p1, pc); @@ -297,7 +338,7 @@ cgen(Node *n, Node *res) // both slice and string have len one pointer into the struct. // a zero pointer means zero length igen(nl, &n1, res); - n1.type = types[TUINT32]; + n1.type = types[simtype[TUINT]]; n1.xoffset += Array_nel; gmove(&n1, res); regfree(&n1); @@ -308,19 +349,19 @@ cgen(Node *n, Node *res) case OCAP: if(istype(nl->type, TCHAN)) { - // chan has cap in the second 32-bit word. + // chan has cap in the second int-sized word. // a zero pointer means zero length regalloc(&n1, types[tptr], res); cgen(nl, &n1); nodconst(&n2, types[tptr], 0); gins(optoas(OCMP, types[tptr]), &n1, &n2); - p1 = gbranch(optoas(OEQ, types[tptr]), T); + p1 = gbranch(optoas(OEQ, types[tptr]), T, 0); n2 = n1; n2.op = OINDREG; - n2.xoffset = 4; - n2.type = types[TINT32]; + n2.xoffset = widthint; + n2.type = types[simtype[TINT]]; gmove(&n2, &n1); patch(p1, pc); @@ -331,7 +372,7 @@ cgen(Node *n, Node *res) } if(isslice(nl->type)) { igen(nl, &n1, res); - n1.type = types[TUINT32]; + n1.type = types[simtype[TUINT]]; n1.xoffset += Array_cap; gmove(&n1, res); regfree(&n1); @@ -365,18 +406,53 @@ cgen(Node *n, Node *res) a = optoas(n->op, nl->type); goto abop; } - cgen_div(n->op, nl, nr, res); + + if(nl->ullman >= nr->ullman) { + regalloc(&n1, nl->type, res); + cgen(nl, &n1); + cgen_div(n->op, &n1, nr, res); + regfree(&n1); + } else { + if(!smallintconst(nr)) { + regalloc(&n2, nr->type, res); + cgen(nr, &n2); + } else { + n2 = *nr; + } + cgen_div(n->op, nl, &n2, res); + if(n2.op != OLITERAL) + regfree(&n2); + } break; case OLSH: case ORSH: - cgen_shift(n->op, nl, nr, res); + case OLROT: + cgen_shift(n->op, n->bounded, nl, nr, res); break; } goto ret; sbop: // symmetric binary - if(nl->ullman < nr->ullman) { + /* + * put simplest on right - we'll generate into left + * and then adjust it using the computation of right. + * constants and variables have the same ullman + * count, so look for constants specially. + * + * an integer constant we can use as an immediate + * is simpler than a variable - we can use the immediate + * in the adjustment instruction directly - so it goes + * on the right. + * + * other constants, like big integers or floating point + * constants, require a mov into a register, so those + * might as well go on the left, so we can reuse that + * register for the computation. + */ + if(nl->ullman < nr->ullman || + (nl->ullman == nr->ullman && + (smallintconst(nl) || (nr->op == OLITERAL && !smallintconst(nr))))) { r = nl; nl = nr; nr = r; @@ -386,7 +462,13 @@ abop: // asymmetric binary if(nl->ullman >= nr->ullman) { regalloc(&n1, nl->type, res); cgen(nl, &n1); - + /* + * This generates smaller code - it avoids a MOV - but it's + * easily 10% slower due to not being able to + * optimize/manipulate the move. + * To see, run: go test -bench . crypto/md5 + * with and without. + * if(sudoaddable(a, nr, &addr)) { p1 = gins(a, N, &n1); p1->from = addr; @@ -395,18 +477,30 @@ abop: // asymmetric binary regfree(&n1); goto ret; } - regalloc(&n2, nr->type, N); - cgen(nr, &n2); + * + */ + + if(smallintconst(nr)) + n2 = *nr; + else { + regalloc(&n2, nr->type, N); + cgen(nr, &n2); + } } else { - regalloc(&n2, nr->type, res); - cgen(nr, &n2); + if(smallintconst(nr)) + n2 = *nr; + else { + regalloc(&n2, nr->type, res); + cgen(nr, &n2); + } regalloc(&n1, nl->type, N); cgen(nl, &n1); } gins(a, &n2, &n1); gmove(&n1, res); regfree(&n1); - regfree(&n2); + if(n2.op != OLITERAL) + regfree(&n2); goto ret; uop: // unary @@ -422,93 +516,142 @@ ret: } /* - * generate: - * res = &n; + * allocate a register in res and generate + * newreg = &n + * The caller must call regfree(a). */ void -agen(Node *n, Node *res) +cgenr(Node *n, Node *a, Node *res) +{ + Node n1; + + if(debug['g']) + dump("cgenr-n", n); + + if(isfat(n->type)) + fatal("cgenr on fat node"); + + if(n->addable) { + regalloc(a, n->type, res); + gmove(n, a); + return; + } + + switch(n->op) { + case ONAME: + case ODOT: + case ODOTPTR: + case OINDEX: + case OCALLFUNC: + case OCALLMETH: + case OCALLINTER: + igen(n, &n1, res); + regalloc(a, types[tptr], &n1); + gmove(&n1, a); + regfree(&n1); + break; + default: + regalloc(a, n->type, res); + cgen(n, a); + break; + } +} + +/* + * allocate a register in res and generate + * res = &n + */ +void +agenr(Node *n, Node *a, Node *res) { Node *nl, *nr; - Node n1, n2, n3, tmp, n4, n5; + Node n1, n2, n3, n4, n5, tmp, tmp2, nlen; Prog *p1; + Type *t; uint32 w; uint64 v; - Type *t; + int freelen; if(debug['g']) { - dump("\nagen-res", res); - dump("agen-r", n); - } - if(n == N || n->type == T) - return; - - while(n->op == OCONVNOP) - n = n->left; - - if(n->addable) { - regalloc(&n1, types[tptr], res); - gins(ALEAQ, n, &n1); - gmove(&n1, res); - regfree(&n1); - goto ret; + dump("\nagenr-n", n); } nl = n->left; nr = n->right; switch(n->op) { - default: - fatal("agen: unknown op %N", n); - break; - + case ODOT: + case ODOTPTR: + case OCALLFUNC: case OCALLMETH: - cgen_callmeth(n, 0); - cgen_aret(n, res); - break; - case OCALLINTER: - cgen_callinter(n, res, 0); - cgen_aret(n, res); + igen(n, &n1, res); + regalloc(a, types[tptr], &n1); + agen(&n1, a); + regfree(&n1); break; - case OCALLFUNC: - cgen_call(n, 0); - cgen_aret(n, res); + case OIND: + cgenr(n->left, a, res); break; case OINDEX: + freelen = 0; w = n->type->width; + // Generate the non-addressable child first. if(nr->addable) goto irad; if(nl->addable) { - if(!isconst(nr, CTINT)) { - regalloc(&n1, nr->type, N); - cgen(nr, &n1); - } + cgenr(nr, &n1, N); if(!isconst(nl, CTSTR)) { - regalloc(&n3, types[tptr], res); - agen(nl, &n3); + if(isfixedarray(nl->type)) { + agenr(nl, &n3, res); + } else { + igen(nl, &nlen, res); + freelen = 1; + nlen.type = types[tptr]; + nlen.xoffset += Array_array; + regalloc(&n3, types[tptr], res); + gmove(&nlen, &n3); + nlen.type = types[simtype[TUINT]]; + nlen.xoffset += Array_nel-Array_array; + } } goto index; } tempname(&tmp, nr->type); cgen(nr, &tmp); nr = &tmp; - irad: if(!isconst(nl, CTSTR)) { - regalloc(&n3, types[tptr], res); - agen(nl, &n3); + if(isfixedarray(nl->type)) { + agenr(nl, &n3, res); + } else { + if(!nl->addable) { + // igen will need an addressable node. + tempname(&tmp2, nl->type); + cgen(nl, &tmp2); + nl = &tmp2; + } + igen(nl, &nlen, res); + freelen = 1; + nlen.type = types[tptr]; + nlen.xoffset += Array_array; + regalloc(&n3, types[tptr], res); + gmove(&nlen, &n3); + nlen.type = types[simtype[TUINT]]; + nlen.xoffset += Array_nel-Array_array; + } } if(!isconst(nr, CTINT)) { - regalloc(&n1, nr->type, N); - cgen(nr, &n1); + cgenr(nr, &n1, N); } goto index; index: // &a is in &n3 (allocated in res) // i is in &n1 (if not constant) + // len(a) is in nlen (if needed) // w is width // explicit check for nil if array is large enough @@ -529,29 +672,26 @@ agen(Node *n, Node *res) fatal("constant string constant index"); // front end should handle v = mpgetfix(nr->val.u.xval); if(isslice(nl->type) || nl->type->etype == TSTRING) { - if(!debug['B'] && !n->etype) { - n1 = n3; - n1.op = OINDREG; - n1.type = types[tptr]; - n1.xoffset = Array_nel; - nodconst(&n2, types[TUINT32], v); - gins(optoas(OCMP, types[TUINT32]), &n1, &n2); - p1 = gbranch(optoas(OGT, types[TUINT32]), T); - ginscall(panicindex, 0); + if(!debug['B'] && !n->bounded) { + nodconst(&n2, types[simtype[TUINT]], v); + if(smallintconst(nr)) { + gins(optoas(OCMP, types[simtype[TUINT]]), &nlen, &n2); + } else { + regalloc(&tmp, types[simtype[TUINT]], N); + gmove(&n2, &tmp); + gins(optoas(OCMP, types[simtype[TUINT]]), &nlen, &tmp); + regfree(&tmp); + } + p1 = gbranch(optoas(OGT, types[simtype[TUINT]]), T, +1); + ginscall(panicindex, -1); patch(p1, pc); } - - n1 = n3; - n1.op = OINDREG; - n1.type = types[tptr]; - n1.xoffset = Array_array; - gmove(&n1, &n3); + regfree(&nlen); } if (v*w != 0) ginscon(optoas(OADD, types[tptr]), v*w, &n3); - gmove(&n3, res); - regfree(&n3); + *a = n3; break; } @@ -564,32 +704,32 @@ agen(Node *n, Node *res) gmove(&n1, &n2); regfree(&n1); - if(!debug['B'] && !n->etype) { + if(!debug['B'] && !n->bounded) { // check bounds - n5.op = OXXX; - t = types[TUINT32]; + t = types[simtype[TUINT]]; if(is64(nr->type)) t = types[TUINT64]; if(isconst(nl, CTSTR)) { - nodconst(&n1, t, nl->val.u.sval->len); + nodconst(&nlen, t, nl->val.u.sval->len); } else if(isslice(nl->type) || nl->type->etype == TSTRING) { - n1 = n3; - n1.op = OINDREG; - n1.type = types[TUINT32]; - n1.xoffset = Array_nel; if(is64(nr->type)) { regalloc(&n5, t, N); - gmove(&n1, &n5); - n1 = n5; + gmove(&nlen, &n5); + regfree(&nlen); + nlen = n5; } } else { - nodconst(&n1, t, nl->type->bound); + nodconst(&nlen, t, nl->type->bound); + if(!smallintconst(&nlen)) { + regalloc(&n5, t, N); + gmove(&nlen, &n5); + nlen = n5; + freelen = 1; + } } - gins(optoas(OCMP, t), &n2, &n1); - p1 = gbranch(optoas(OLT, t), T); - if(n5.op != OXXX) - regfree(&n5); - ginscall(panicindex, 0); + gins(optoas(OCMP, t), &n2, &nlen); + p1 = gbranch(optoas(OLT, t), T, +1); + ginscall(panicindex, -1); patch(p1, pc); } @@ -597,19 +737,15 @@ agen(Node *n, Node *res) regalloc(&n3, types[tptr], res); p1 = gins(ALEAQ, N, &n3); datastring(nl->val.u.sval->s, nl->val.u.sval->len, &p1->from); - p1->from.scale = 1; - p1->from.index = n2.val.u.reg; + if(flag_largemodel) { + gins(AADDQ, &n2, &n3); + } else { + p1->from.scale = 1; + p1->from.index = n2.val.u.reg; + } goto indexdone; } - if(isslice(nl->type) || nl->type->etype == TSTRING) { - n1 = n3; - n1.op = OINDREG; - n1.type = types[tptr]; - n1.xoffset = Array_array; - gmove(&n1, &n3); - } - if(w == 0) { // nothing to do } else if(w == 1 || w == 2 || w == 4 || w == 8) { @@ -623,9 +759,103 @@ agen(Node *n, Node *res) } indexdone: - gmove(&n3, res); + *a = n3; regfree(&n2); - regfree(&n3); + if(freelen) + regfree(&nlen); + break; + + default: + regalloc(a, types[tptr], res); + agen(n, a); + break; + } +} + +/* + * generate: + * res = &n; + */ +void +agen(Node *n, Node *res) +{ + Node *nl, *nr; + Node n1, n2; + + if(debug['g']) { + dump("\nagen-res", res); + dump("agen-r", n); + } + if(n == N || n->type == T) + return; + + while(n->op == OCONVNOP) + n = n->left; + + if(isconst(n, CTNIL) && n->type->width > widthptr) { + // Use of a nil interface or nil slice. + // Create a temporary we can take the address of and read. + // The generated code is just going to panic, so it need not + // be terribly efficient. See issue 3670. + tempname(&n1, n->type); + clearfat(&n1); + regalloc(&n2, types[tptr], res); + gins(ALEAQ, &n1, &n2); + gmove(&n2, res); + regfree(&n2); + goto ret; + } + + if(n->addable) { + regalloc(&n1, types[tptr], res); + gins(ALEAQ, n, &n1); + gmove(&n1, res); + regfree(&n1); + goto ret; + } + + nl = n->left; + nr = n->right; + USED(nr); + + switch(n->op) { + default: + fatal("agen: unknown op %+hN", n); + break; + + case OCALLMETH: + cgen_callmeth(n, 0); + cgen_aret(n, res); + break; + + case OCALLINTER: + cgen_callinter(n, res, 0); + cgen_aret(n, res); + break; + + case OCALLFUNC: + cgen_call(n, 0); + cgen_aret(n, res); + break; + + case OSLICE: + case OSLICEARR: + case OSLICESTR: + tempname(&n1, n->type); + cgen_slice(n, &n1); + agen(&n1, res); + break; + + case OEFACE: + tempname(&n1, n->type); + cgen_eface(n, &n1); + agen(&n1, res); + break; + + case OINDEX: + agenr(n, &n1, res); + gmove(&n1, res); + regfree(&n1); break; case ONAME: @@ -692,7 +922,11 @@ igen(Node *n, Node *a, Node *res) { Type *fp; Iter flist; - + Node n1; + + if(debug['g']) { + dump("\nigen-n", n); + } switch(n->op) { case ONAME: if((n->class&PHEAP) || n->class == PPARAMREF) @@ -700,9 +934,53 @@ igen(Node *n, Node *a, Node *res) *a = *n; return; + case OINDREG: + // Increase the refcount of the register so that igen's caller + // has to call regfree. + if(n->val.u.reg != D_SP) + reg[n->val.u.reg]++; + *a = *n; + return; + + case ODOT: + igen(n->left, a, res); + a->xoffset += n->xoffset; + a->type = n->type; + return; + + case ODOTPTR: + cgenr(n->left, a, res); + if(n->xoffset != 0) { + // explicit check for nil if struct is large enough + // that we might derive too big a pointer. + if(n->left->type->type->width >= unmappedzero) { + n1 = *a; + n1.op = OINDREG; + n1.type = types[TUINT8]; + n1.xoffset = 0; + gins(ATESTB, nodintconst(0), &n1); + } + } + a->op = OINDREG; + a->xoffset += n->xoffset; + a->type = n->type; + return; + case OCALLFUNC: + case OCALLMETH: + case OCALLINTER: + switch(n->op) { + case OCALLFUNC: + cgen_call(n, 0); + break; + case OCALLMETH: + cgen_callmeth(n, 0); + break; + case OCALLINTER: + cgen_callinter(n, N, 0); + break; + } fp = structfirst(&flist, getoutarg(n->left->type)); - cgen_call(n, 0); memset(a, 0, sizeof *a); a->op = OINDREG; a->val.u.reg = D_SP; @@ -710,10 +988,34 @@ igen(Node *n, Node *a, Node *res) a->xoffset = fp->width; a->type = n->type; return; + + case OINDEX: + // Index of fixed-size array by constant can + // put the offset in the addressing. + // Could do the same for slice except that we need + // to use the real index for the bounds checking. + if(isfixedarray(n->left->type) || + (isptr[n->left->type->etype] && isfixedarray(n->left->left->type))) + if(isconst(n->right, CTINT)) { + // Compute &a. + if(!isptr[n->left->type->etype]) + igen(n->left, a, res); + else { + igen(n->left, &n1, res); + regalloc(a, types[tptr], res); + gmove(&n1, a); + regfree(&n1); + a->op = OINDREG; + } + + // Compute &a[i] as &a + i*width. + a->type = n->type; + a->xoffset += mpgetfix(n->right->val.u.xval)*n->type->width; + return; + } } - - regalloc(a, types[tptr], res); - agen(n, a); + + agenr(n, a, res); a->op = OINDREG; a->type = n->type; } @@ -723,7 +1025,7 @@ igen(Node *n, Node *a, Node *res) * if(n == true) goto to; */ void -bgen(Node *n, int true, Prog *to) +bgen(Node *n, int true, int likely, Prog *to) { int et, a; Node *nl, *nr, *l, *r; @@ -765,14 +1067,14 @@ bgen(Node *n, int true, Prog *to) a = AJNE; if(!true) a = AJEQ; - patch(gbranch(a, n->type), to); + patch(gbranch(a, n->type, likely), to); regfree(&n1); goto ret; case OLITERAL: // need to ask if it is bool? if(!true == !n->val.u.bval) - patch(gbranch(AJMP, T), to); + patch(gbranch(AJMP, T, likely), to); goto ret; case ONAME: @@ -783,7 +1085,7 @@ bgen(Node *n, int true, Prog *to) a = AJNE; if(!true) a = AJEQ; - patch(gbranch(a, n->type), to); + patch(gbranch(a, n->type, likely), to); goto ret; case OANDAND: @@ -791,12 +1093,12 @@ bgen(Node *n, int true, Prog *to) goto caseor; caseand: - p1 = gbranch(AJMP, T); - p2 = gbranch(AJMP, T); + p1 = gbranch(AJMP, T, 0); + p2 = gbranch(AJMP, T, 0); patch(p1, pc); - bgen(n->left, !true, p2); - bgen(n->right, !true, p2); - p1 = gbranch(AJMP, T); + bgen(n->left, !true, -likely, p2); + bgen(n->right, !true, -likely, p2); + p1 = gbranch(AJMP, T, 0); patch(p1, to); patch(p2, pc); goto ret; @@ -806,8 +1108,8 @@ bgen(Node *n, int true, Prog *to) goto caseand; caseor: - bgen(n->left, true, to); - bgen(n->right, true, to); + bgen(n->left, true, likely, to); + bgen(n->right, true, likely, to); goto ret; case OEQ: @@ -830,7 +1132,7 @@ bgen(Node *n, int true, Prog *to) switch(n->op) { case ONOT: - bgen(nl, !true, to); + bgen(nl, !true, likely, to); goto ret; case OEQ: @@ -843,14 +1145,14 @@ bgen(Node *n, int true, Prog *to) if(!true) { if(isfloat[nr->type->etype]) { // brcom is not valid on floats when NaN is involved. - p1 = gbranch(AJMP, T); - p2 = gbranch(AJMP, T); + p1 = gbranch(AJMP, T, 0); + p2 = gbranch(AJMP, T, 0); patch(p1, pc); ll = n->ninit; // avoid re-genning ninit n->ninit = nil; - bgen(n, 1, p2); + bgen(n, 1, -likely, p2); n->ninit = ll; - patch(gbranch(AJMP, T), to); + patch(gbranch(AJMP, T, 0), to); patch(p2, pc); goto ret; } @@ -865,47 +1167,41 @@ bgen(Node *n, int true, Prog *to) nl = nr; nr = r; } - + if(isslice(nl->type)) { - // only valid to cmp darray to literal nil + // front end should only leave cmp to literal nil if((a != OEQ && a != ONE) || nr->op != OLITERAL) { - yyerror("illegal array comparison"); + yyerror("illegal slice comparison"); break; } a = optoas(a, types[tptr]); - regalloc(&n1, types[tptr], N); - agen(nl, &n1); - n2 = n1; - n2.op = OINDREG; - n2.xoffset = Array_array; - n2.type = types[tptr]; + igen(nl, &n1, N); + n1.xoffset += Array_array; + n1.type = types[tptr]; nodconst(&tmp, types[tptr], 0); - gins(optoas(OCMP, types[tptr]), &n2, &tmp); - patch(gbranch(a, types[tptr]), to); + gins(optoas(OCMP, types[tptr]), &n1, &tmp); + patch(gbranch(a, types[tptr], likely), to); regfree(&n1); break; } if(isinter(nl->type)) { - // front end shold only leave cmp to literal nil + // front end should only leave cmp to literal nil if((a != OEQ && a != ONE) || nr->op != OLITERAL) { yyerror("illegal interface comparison"); break; } a = optoas(a, types[tptr]); - regalloc(&n1, types[tptr], N); - agen(nl, &n1); - n2 = n1; - n2.op = OINDREG; - n2.xoffset = 0; + igen(nl, &n1, N); + n1.type = types[tptr]; nodconst(&tmp, types[tptr], 0); - gins(optoas(OCMP, types[tptr]), &n2, &tmp); - patch(gbranch(a, types[tptr]), to); + gins(optoas(OCMP, types[tptr]), &n1, &tmp); + patch(gbranch(a, types[tptr], likely), to); regfree(&n1); break; } if(iscomplex[nl->type->etype]) { - complexbool(a, nl, nr, true, to); + complexbool(a, nl, nr, true, likely, to); break; } @@ -931,7 +1227,7 @@ bgen(Node *n, int true, Prog *to) if(smallintconst(nr)) { gins(optoas(OCMP, nr->type), &n1, nr); - patch(gbranch(optoas(a, nr->type), nr->type), to); + patch(gbranch(optoas(a, nr->type), nr->type, likely), to); regfree(&n1); break; } @@ -953,18 +1249,18 @@ bgen(Node *n, int true, Prog *to) if(isfloat[nr->type->etype] && (n->op == OEQ || n->op == ONE)) { if(n->op == OEQ) { // neither NE nor P - p1 = gbranch(AJNE, T); - p2 = gbranch(AJPS, T); - patch(gbranch(AJMP, T), to); + p1 = gbranch(AJNE, T, -likely); + p2 = gbranch(AJPS, T, -likely); + patch(gbranch(AJMP, T, 0), to); patch(p1, pc); patch(p2, pc); } else { // either NE or P - patch(gbranch(AJNE, T), to); - patch(gbranch(AJPS, T), to); + patch(gbranch(AJNE, T, likely), to); + patch(gbranch(AJPS, T, likely), to); } } else - patch(gbranch(optoas(a, nr->type), nr->type), to); + patch(gbranch(optoas(a, nr->type), nr->type, likely), to); regfree(&n1); regfree(&n2); break; @@ -1036,8 +1332,8 @@ stkof(Node *n) void sgen(Node *n, Node *ns, int64 w) { - Node nodl, nodr, oldl, oldr, cx, oldcx, tmp; - int32 c, q, odst, osrc; + Node nodl, nodr, nodsi, noddi, cx, oldcx, tmp; + vlong c, q, odst, osrc; if(debug['g']) { print("\nsgen w=%lld\n", w); @@ -1051,9 +1347,9 @@ sgen(Node *n, Node *ns, int64 w) if(w < 0) fatal("sgen copy %lld", w); - if(w == 16) - if(componentgen(n, ns)) - return; + // Avoid taking the address for simple enough types. + if(componentgen(n, ns)) + return; if(w == 0) { // evaluate side effects only @@ -1080,22 +1376,18 @@ sgen(Node *n, Node *ns, int64 w) } if(n->ullman >= ns->ullman) { - savex(D_SI, &nodr, &oldr, N, types[tptr]); - agen(n, &nodr); - - regalloc(&nodr, types[tptr], &nodr); // mark nodr as live - savex(D_DI, &nodl, &oldl, N, types[tptr]); - agen(ns, &nodl); - regfree(&nodr); + agenr(n, &nodr, N); + agenr(ns, &nodl, N); } else { - savex(D_DI, &nodl, &oldl, N, types[tptr]); - agen(ns, &nodl); - - regalloc(&nodl, types[tptr], &nodl); // mark nodl as live - savex(D_SI, &nodr, &oldr, N, types[tptr]); - agen(n, &nodr); - regfree(&nodl); + agenr(ns, &nodl, N); + agenr(n, &nodr, N); } + nodreg(&noddi, types[tptr], D_DI); + nodreg(&nodsi, types[tptr], D_SI); + gmove(&nodl, &noddi); + gmove(&nodr, &nodsi); + regfree(&nodl); + regfree(&nodr); c = w % 8; // bytes q = w / 8; // quads @@ -1152,9 +1444,6 @@ sgen(Node *n, Node *ns, int64 w) } } - - restx(&nodl, &oldl); - restx(&nodr, &oldr); restx(&cx, &oldcx); } @@ -1175,15 +1464,21 @@ cadable(Node *n) } /* - * copy a structure component by component + * copy a composite value by moving its individual components. + * Slices, strings and interfaces are supported. + * Small structs or arrays with elements of basic type are + * also supported. + * nr is N when assigning a zero value. * return 1 if can do, 0 if cant. - * nr is N for copy zero */ int componentgen(Node *nr, Node *nl) { Node nodl, nodr; + Type *t; int freel, freer; + vlong fldcount; + vlong loffset, roffset; freel = 0; freer = 0; @@ -1193,8 +1488,33 @@ componentgen(Node *nr, Node *nl) goto no; case TARRAY: - if(!isslice(nl->type)) + t = nl->type; + + // Slices are ok. + if(isslice(t)) + break; + // Small arrays are ok. + if(t->bound > 0 && t->bound <= 3 && !isfat(t->type)) + break; + + goto no; + + case TSTRUCT: + // Small structs with non-fat types are ok. + // Zero-sized structs are treated separately elsewhere. + fldcount = 0; + for(t=nl->type->type; t; t=t->down) { + if(isfat(t->type)) + goto no; + if(t->etype != TFIELD) + fatal("componentgen: not a TFIELD: %lT", t); + fldcount++; + } + if(fldcount == 0 || fldcount > 3) goto no; + + break; + case TSTRING: case TINTER: break; @@ -1218,9 +1538,23 @@ componentgen(Node *nr, Node *nl) switch(nl->type->etype) { case TARRAY: - if(!isslice(nl->type)) - goto no; + // componentgen for arrays. + t = nl->type; + if(!isslice(t)) { + nodl.type = t->type; + nodr.type = nodl.type; + for(fldcount=0; fldcount < t->bound; fldcount++) { + if(nr == N) + clearslim(&nodl); + else + gmove(&nodr, &nodl); + nodl.xoffset += t->type->width; + nodr.xoffset += t->type->width; + } + goto yes; + } + // componentgen for slices. nodl.xoffset += Array_array; nodl.type = ptrto(nl->type->type); @@ -1232,7 +1566,7 @@ componentgen(Node *nr, Node *nl) gmove(&nodr, &nodl); nodl.xoffset += Array_nel-Array_array; - nodl.type = types[TUINT32]; + nodl.type = types[simtype[TUINT]]; if(nr != N) { nodr.xoffset += Array_nel-Array_array; @@ -1242,7 +1576,7 @@ componentgen(Node *nr, Node *nl) gmove(&nodr, &nodl); nodl.xoffset += Array_cap-Array_nel; - nodl.type = types[TUINT32]; + nodl.type = types[simtype[TUINT]]; if(nr != N) { nodr.xoffset += Array_cap-Array_nel; @@ -1265,7 +1599,7 @@ componentgen(Node *nr, Node *nl) gmove(&nodr, &nodl); nodl.xoffset += Array_nel-Array_array; - nodl.type = types[TUINT32]; + nodl.type = types[simtype[TUINT]]; if(nr != N) { nodr.xoffset += Array_nel-Array_array; @@ -1300,7 +1634,27 @@ componentgen(Node *nr, Node *nl) goto yes; case TSTRUCT: - goto no; + loffset = nodl.xoffset; + roffset = nodr.xoffset; + // funarg structs may not begin at offset zero. + if(nl->type->etype == TSTRUCT && nl->type->funarg && nl->type->type) + loffset -= nl->type->type->width; + if(nr != N && nr->type->etype == TSTRUCT && nr->type->funarg && nr->type->type) + roffset -= nr->type->type->width; + + for(t=nl->type->type; t; t=t->down) { + nodl.xoffset = loffset + t->width; + nodl.type = t->type; + + if(nr == N) + clearslim(&nodl); + else { + nodr.xoffset = roffset + t->width; + nodr.type = nodl.type; + gmove(&nodr, &nodl); + } + } + goto yes; } no: diff --git a/src/cmd/6g/doc.go b/src/cmd/6g/doc.go index 64f1d2ba9..07b2818da 100644 --- a/src/cmd/6g/doc.go +++ b/src/cmd/6g/doc.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build ignore + /* 6g is the version of the gc compiler for the x86-64. @@ -10,4 +12,4 @@ The $GOARCH for these tools is amd64. It reads .go files and outputs .6 files. The flags are documented in ../gc/doc.go. */ -package documentation +package main diff --git a/src/cmd/6g/galign.c b/src/cmd/6g/galign.c index b03ac1ed6..526c04c06 100644 --- a/src/cmd/6g/galign.c +++ b/src/cmd/6g/galign.c @@ -17,8 +17,8 @@ vlong MAXWIDTH = 1LL<<50; */ Typedef typedefs[] = { - "int", TINT, TINT32, - "uint", TUINT, TUINT32, + "int", TINT, TINT64, + "uint", TUINT, TUINT64, "uintptr", TUINTPTR, TUINT64, 0 }; @@ -27,6 +27,7 @@ void betypeinit(void) { widthptr = 8; + widthint = 8; zprog.link = P; zprog.as = AGOK; diff --git a/src/cmd/6g/gg.h b/src/cmd/6g/gg.h index 47a540082..ceb6a2caa 100644 --- a/src/cmd/6g/gg.h +++ b/src/cmd/6g/gg.h @@ -14,19 +14,22 @@ typedef struct Addr Addr; struct Addr { vlong offset; - double dval; - Prog* branch; - char sval[NSNAME]; + + union { + double dval; + vlong vval; + Prog* branch; + char sval[NSNAME]; + } u; Sym* gotype; Sym* sym; Node* node; - int width; + int64 width; uchar type; uchar index; uchar etype; uchar scale; /* doubles as width in DATA op */ - uchar pun; /* dont register variable */ }; #define A ((Addr*)0) @@ -58,7 +61,7 @@ EXTERN Node* throwreturn; extern vlong unmappedzero; /* - * gen.c + * ggen.c */ void compile(Node*); void proglist(void); @@ -71,29 +74,31 @@ void cgen_proc(Node*, int); void cgen_callret(Node*, Node*); void cgen_div(int, Node*, Node*, Node*); void cgen_bmul(int, Node*, Node*, Node*); -void cgen_shift(int, Node*, Node*, Node*); +void cgen_hmul(Node*, Node*, Node*); +void cgen_shift(int, int, Node*, Node*, Node*); void cgen_dcl(Node*); int needconvert(Type*, Type*); void genconv(Type*, Type*); void allocparams(void); -void checklabels(); +void checklabels(void); void ginscall(Node*, int); int gen_as_init(Node*); +void clearslim(Node*); /* - * cgen + * cgen.c */ void agen(Node*, Node*); +void agenr(Node*, Node*, Node*); +void cgenr(Node*, Node*, Node*); void igen(Node*, Node*, Node*); vlong fieldoffset(Type*, Node*); -void bgen(Node*, int, Prog*); void sgen(Node*, Node*, int64); void gmove(Node*, Node*); Prog* gins(int, Node*, Node*); int samaddr(Node*, Node*); void naddr(Node*, Addr*, int); void cgen_aret(Node*, Node*); -int cgen_inline(Node*, Node*); void restx(Node*, Node*); void savex(int, Node*, Node*, Node*, Type*); int componentgen(Node*, Node*); @@ -103,9 +108,8 @@ int componentgen(Node*, Node*); */ void clearp(Prog*); void proglist(void); -Prog* gbranch(int, Type*); +Prog* gbranch(int, Type*, int); Prog* prog(int); -void gaddoffset(Node*); void gconv(int, int); int conv2pt(Type*); vlong convvtox(vlong, int); @@ -126,9 +130,9 @@ Plist* newplist(void); int isfat(Type*); void sudoclean(void); int sudoaddable(int, Node*, Addr*); -void afunclit(Addr*); -void datagostring(Strlit*, Addr*); +void afunclit(Addr*, Node*); void nodfconst(Node*, Type*, Mpflt*); +void gtrack(Sym*); /* * cplx.c @@ -136,12 +140,12 @@ void nodfconst(Node*, Type*, Mpflt*); int complexop(Node*, Node*); void complexmove(Node*, Node*); void complexgen(Node*, Node*); -void complexbool(int, Node*, Node*, int, Prog*); /* * gobj.c */ void datastring(char*, int, Addr*); +void datagostring(Strlit*, Addr*); /* * list.c diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index 02e67d6d4..23bb5093f 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -25,6 +25,9 @@ void markautoused(Prog* p) { for (; p; p = p->link) { + if (p->as == ATYPE) + continue; + if (p->from.type == D_AUTO && p->from.node) p->from.node->used = 1; @@ -35,14 +38,22 @@ markautoused(Prog* p) // Fixup instructions after compactframe has moved all autos around. void -fixautoused(Prog* p) +fixautoused(Prog *p) { - for (; p; p = p->link) { + Prog **lp; + + for (lp=&p; (p=*lp) != P; ) { + if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { + *lp = p->link; + continue; + } if (p->from.type == D_AUTO && p->from.node) p->from.offset += p->from.node->stkdelta; if (p->to.type == D_AUTO && p->to.node) p->to.offset += p->to.node->stkdelta; + + lp = &p->link; } } @@ -50,15 +61,18 @@ fixautoused(Prog* p) /* * generate: * call f + * proc=-1 normal call but no return * proc=0 normal call * proc=1 goroutine run in new proc * proc=2 defer call save away stack + * proc=3 normal call to C pointer (not Go func value) */ void ginscall(Node *f, int proc) { Prog *p; Node reg, con; + Node r1; switch(proc) { default: @@ -66,14 +80,38 @@ ginscall(Node *f, int proc) break; case 0: // normal call - p = gins(ACALL, N, f); - afunclit(&p->to); + case -1: // normal call but no return + if(f->op == ONAME && f->class == PFUNC) { + p = gins(ACALL, N, f); + afunclit(&p->to, f); + if(proc == -1 || noreturn(p)) + gins(AUNDEF, N, N); + break; + } + nodreg(®, types[tptr], D_DX); + nodreg(&r1, types[tptr], D_BX); + gmove(f, ®); + reg.op = OINDREG; + gmove(®, &r1); + reg.op = OREGISTER; + gins(ACALL, ®, &r1); + break; + + case 3: // normal call of c function pointer + gins(ACALL, N, f); break; case 1: // call in new proc (go) case 2: // deferred call (defer) nodreg(®, types[TINT64], D_CX); - gins(APUSHQ, f, N); + if(flag_largemodel) { + regalloc(&r1, f->type, f); + gmove(f, &r1); + gins(APUSHQ, &r1, N); + regfree(&r1); + } else { + gins(APUSHQ, f, N); + } nodconst(&con, types[TINT32], argsize(f->type)); gins(APUSHQ, &con, N); if(proc == 1) @@ -88,7 +126,7 @@ ginscall(Node *f, int proc) if(proc == 2) { nodreg(®, types[TINT64], D_AX); gins(ATESTQ, ®, ®); - patch(gbranch(AJNE, T), retpc); + patch(gbranch(AJNE, T, -1), retpc); } break; } @@ -102,7 +140,7 @@ void cgen_callinter(Node *n, Node *res, int proc) { Node *i, *f; - Node tmpi, nodo, nodr, nodsp; + Node tmpi, nodi, nodo, nodr, nodsp; i = n->left; if(i->op != ODOTINTER) @@ -122,21 +160,34 @@ cgen_callinter(Node *n, Node *res, int proc) genlist(n->list); // assign the args - regalloc(&nodr, types[tptr], res); - regalloc(&nodo, types[tptr], &nodr); - nodo.op = OINDREG; - - agen(i, &nodr); // REG = &inter + // i is now addable, prepare an indirected + // register to hold its address. + igen(i, &nodi, res); // REG = &inter nodindreg(&nodsp, types[tptr], D_SP); - nodo.xoffset += widthptr; - cgen(&nodo, &nodsp); // 0(SP) = 8(REG) -- i.data - - nodo.xoffset -= widthptr; - cgen(&nodo, &nodr); // REG = 0(REG) -- i.tab - + nodi.type = types[tptr]; + nodi.xoffset += widthptr; + cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data + + regalloc(&nodo, types[tptr], res); + nodi.type = types[tptr]; + nodi.xoffset -= widthptr; + cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab + regfree(&nodi); + + regalloc(&nodr, types[tptr], &nodo); + if(n->left->xoffset == BADWIDTH) + fatal("cgen_callinter: badwidth"); + nodo.op = OINDREG; nodo.xoffset = n->left->xoffset + 3*widthptr + 8; - cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] + if(proc == 0) { + // plain call: use direct c function pointer - more efficient + cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f] + proc = 3; + } else { + // go/defer. generate go func value. + gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f] + } // BOTCH nodr.type = fntype; nodr.type = n->left->type; @@ -182,7 +233,7 @@ cgen_call(Node *n, int proc) nod.type = t; ginscall(&nod, proc); regfree(&nod); - goto ret; + return; } // call pointer @@ -192,16 +243,12 @@ cgen_call(Node *n, int proc) nod.type = t; ginscall(&nod, proc); regfree(&nod); - goto ret; + return; } // call direct n->left->method = 1; ginscall(n->left, proc); - - -ret: - ; } /* @@ -389,7 +436,9 @@ cgen_asop(Node *n) hard: n2.op = 0; n1.op = 0; - if(nr->ullman >= nl->ullman || nl->addable) { + if(nr->op == OLITERAL) { + // don't allocate a register for literals. + } else if(nr->ullman >= nl->ullman || nl->addable) { regalloc(&n2, nr->type, N); cgen(nr, &n2); nr = &n2; @@ -447,10 +496,10 @@ void dodiv(int op, Node *nl, Node *nr, Node *res) { int a, check; - Node n3, n4, n5; + Node n3, n4; Type *t, *t0; Node ax, dx, ax1, n31, oldax, olddx; - Prog *p1, *p2, *p3; + Prog *p1, *p2; // Have to be careful about handling // most negative int divided by -1 correctly. @@ -501,30 +550,22 @@ dodiv(int op, Node *nl, Node *nr, Node *res) gmove(&n31, &n3); } - p3 = P; + p2 = P; if(check) { nodconst(&n4, t, -1); gins(optoas(OCMP, t), &n3, &n4); - p1 = gbranch(optoas(ONE, t), T); - nodconst(&n4, t, -1LL<<(t->width*8-1)); - if(t->width == 8) { - n5 = n4; - regalloc(&n4, t, N); - gins(AMOVQ, &n5, &n4); - } - gins(optoas(OCMP, t), &ax, &n4); - p2 = gbranch(optoas(ONE, t), T); - if(op == ODIV) - gmove(&n4, res); - if(t->width == 8) - regfree(&n4); - if(op == OMOD) { + p1 = gbranch(optoas(ONE, t), T, +1); + if(op == ODIV) { + // a / (-1) is -a. + gins(optoas(OMINUS, t), N, &ax); + gmove(&ax, res); + } else { + // a % (-1) is 0. nodconst(&n4, t, 0); gmove(&n4, res); } - p3 = gbranch(AJMP, T); + p2 = gbranch(AJMP, T, 0); patch(p1, pc); - patch(p2, pc); } savex(D_DX, &dx, &olddx, res, t); if(!issigned[t->etype]) { @@ -540,7 +581,7 @@ dodiv(int op, Node *nl, Node *nr, Node *res) gmove(&dx, res); restx(&dx, &olddx); if(check) - patch(p3, pc); + patch(p2, pc); restx(&ax, &oldax); } @@ -594,134 +635,21 @@ restx(Node *x, Node *oldx) void cgen_div(int op, Node *nl, Node *nr, Node *res) { - Node n1, n2, n3, savl, savr; - Node ax, dx, oldax, olddx; - int n, w, s, a; + Node n1, n2, n3; + int w, a; Magic m; - if(nl->ullman >= UINF) { - tempname(&savl, nl->type); - cgen(nl, &savl); - nl = &savl; - } - if(nr->ullman >= UINF) { - tempname(&savr, nr->type); - cgen(nr, &savr); - nr = &savr; - } - if(nr->op != OLITERAL) goto longdiv; - - // special cases of mod/div - // by a constant w = nl->type->width*8; - s = 0; - n = powtwo(nr); - if(n >= 1000) { - // negative power of 2 - s = 1; - n -= 1000; - } - - if(n+1 >= w) { - // just sign bit - goto longdiv; - } - if(n < 0) - goto divbymul; - switch(n) { - case 0: - // divide by 1 - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - if(op == OMOD) { - gins(optoas(OXOR, nl->type), &n1, &n1); - } else - if(s) - gins(optoas(OMINUS, nl->type), N, &n1); - gmove(&n1, res); - regfree(&n1); - return; - case 1: - // divide by 2 - if(op == OMOD) { - if(issigned[nl->type->etype]) - goto longmod; - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - nodconst(&n2, nl->type, 1); - gins(optoas(OAND, nl->type), &n2, &n1); - gmove(&n1, res); - regfree(&n1); - return; - } - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - if(!issigned[nl->type->etype]) - break; - - // develop -1 iff nl is negative - regalloc(&n2, nl->type, N); - gmove(&n1, &n2); - nodconst(&n3, nl->type, w-1); - gins(optoas(ORSH, nl->type), &n3, &n2); - gins(optoas(OSUB, nl->type), &n2, &n1); - regfree(&n2); - break; - default: - if(op == OMOD) { - if(issigned[nl->type->etype]) - goto longmod; - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - nodconst(&n2, nl->type, mpgetfix(nr->val.u.xval)-1); - if(!smallintconst(&n2)) { - regalloc(&n3, nl->type, N); - gmove(&n2, &n3); - gins(optoas(OAND, nl->type), &n3, &n1); - regfree(&n3); - } else - gins(optoas(OAND, nl->type), &n2, &n1); - gmove(&n1, res); - regfree(&n1); - return; - } - regalloc(&n1, nl->type, res); - cgen(nl, &n1); - if(!issigned[nl->type->etype]) - break; - - // develop (2^k)-1 iff nl is negative - regalloc(&n2, nl->type, N); - gmove(&n1, &n2); - nodconst(&n3, nl->type, w-1); - gins(optoas(ORSH, nl->type), &n3, &n2); - nodconst(&n3, nl->type, w-n); - gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2); - gins(optoas(OADD, nl->type), &n2, &n1); - regfree(&n2); - break; - } - nodconst(&n2, nl->type, n); - gins(optoas(ORSH, nl->type), &n2, &n1); - if(s) - gins(optoas(OMINUS, nl->type), N, &n1); - gmove(&n1, res); - regfree(&n1); - return; - -divbymul: + // Front end handled 32-bit division. We only need to handle 64-bit. // try to do division by multiply by (2^w)/d // see hacker's delight chapter 10 switch(simtype[nl->type->etype]) { default: goto longdiv; - case TUINT8: - case TUINT16: - case TUINT32: case TUINT64: m.w = w; m.ud = mpgetfix(nr->val.u.xval); @@ -731,47 +659,28 @@ divbymul: if(op == OMOD) goto longmod; - regalloc(&n1, nl->type, N); - cgen(nl, &n1); // num -> reg(n1) - - savex(D_AX, &ax, &oldax, res, nl->type); - savex(D_DX, &dx, &olddx, res, nl->type); - + cgenr(nl, &n1, N); nodconst(&n2, nl->type, m.um); - gmove(&n2, &ax); // const->ax - - gins(optoas(OHMUL, nl->type), &n1, N); // imul reg - if(w == 8) { - // fix up 8-bit multiply - Node ah, dl; - nodreg(&ah, types[TUINT8], D_AH); - nodreg(&dl, types[TUINT8], D_DL); - gins(AMOVB, &ah, &dl); - } + regalloc(&n3, nl->type, res); + cgen_hmul(&n1, &n2, &n3); if(m.ua) { // need to add numerator accounting for overflow - gins(optoas(OADD, nl->type), &n1, &dx); + gins(optoas(OADD, nl->type), &n1, &n3); nodconst(&n2, nl->type, 1); - gins(optoas(ORRC, nl->type), &n2, &dx); + gins(optoas(ORROTC, nl->type), &n2, &n3); nodconst(&n2, nl->type, m.s-1); - gins(optoas(ORSH, nl->type), &n2, &dx); + gins(optoas(ORSH, nl->type), &n2, &n3); } else { nodconst(&n2, nl->type, m.s); - gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx + gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx } - + gmove(&n3, res); regfree(&n1); - gmove(&dx, res); - - restx(&ax, &oldax); - restx(&dx, &olddx); + regfree(&n3); return; - case TINT8: - case TINT16: - case TINT32: case TINT64: m.w = w; m.sd = mpgetfix(nr->val.u.xval); @@ -781,47 +690,32 @@ divbymul: if(op == OMOD) goto longmod; - regalloc(&n1, nl->type, N); - cgen(nl, &n1); // num -> reg(n1) - - savex(D_AX, &ax, &oldax, res, nl->type); - savex(D_DX, &dx, &olddx, res, nl->type); - + cgenr(nl, &n1, res); nodconst(&n2, nl->type, m.sm); - gmove(&n2, &ax); // const->ax - - gins(optoas(OHMUL, nl->type), &n1, N); // imul reg - if(w == 8) { - // fix up 8-bit multiply - Node ah, dl; - nodreg(&ah, types[TUINT8], D_AH); - nodreg(&dl, types[TUINT8], D_DL); - gins(AMOVB, &ah, &dl); - } + regalloc(&n3, nl->type, N); + cgen_hmul(&n1, &n2, &n3); if(m.sm < 0) { // need to add numerator - gins(optoas(OADD, nl->type), &n1, &dx); + gins(optoas(OADD, nl->type), &n1, &n3); } nodconst(&n2, nl->type, m.s); - gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx + gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3 nodconst(&n2, nl->type, w-1); gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg - gins(optoas(OSUB, nl->type), &n1, &dx); // added + gins(optoas(OSUB, nl->type), &n1, &n3); // added if(m.sd < 0) { // this could probably be removed // by factoring it into the multiplier - gins(optoas(OMINUS, nl->type), N, &dx); + gins(optoas(OMINUS, nl->type), N, &n3); } + gmove(&n3, res); regfree(&n1); - gmove(&dx, res); - - restx(&ax, &oldax); - restx(&dx, &olddx); + regfree(&n3); return; } goto longdiv; @@ -858,12 +752,48 @@ longmod: } /* + * generate high multiply: + * res = (nl*nr) >> width + */ +void +cgen_hmul(Node *nl, Node *nr, Node *res) +{ + Type *t; + int a; + Node n1, n2, ax, dx, *tmp; + + t = nl->type; + a = optoas(OHMUL, t); + if(nl->ullman < nr->ullman) { + tmp = nl; + nl = nr; + nr = tmp; + } + cgenr(nl, &n1, res); + cgenr(nr, &n2, N); + nodreg(&ax, t, D_AX); + gmove(&n1, &ax); + gins(a, &n2, N); + regfree(&n2); + regfree(&n1); + + if(t->width == 1) { + // byte multiply behaves differently. + nodreg(&ax, t, D_AH); + nodreg(&dx, t, D_DL); + gmove(&ax, &dx); + } + nodreg(&dx, t, D_DX); + gmove(&dx, res); +} + +/* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ void -cgen_shift(int op, Node *nl, Node *nr, Node *res) +cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) { Node n1, n2, n3, n4, n5, cx, oldcx; int a, rcx; @@ -878,7 +808,7 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res) cgen(nl, &n1); sc = mpgetfix(nr->val.u.xval); if(sc >= nl->type->width*8) { - // large shift gets 2 shifts by width + // large shift gets 2 shifts by width-1 nodconst(&n3, types[TUINT32], nl->type->width*8-1); gins(a, &n3, &n1); gins(a, &n3, &n1); @@ -937,17 +867,20 @@ cgen_shift(int op, Node *nl, Node *nr, Node *res) regfree(&n3); // test and fix up large shifts - nodconst(&n3, tcount, nl->type->width*8); - gins(optoas(OCMP, tcount), &n1, &n3); - p1 = gbranch(optoas(OLT, tcount), T); - if(op == ORSH && issigned[nl->type->etype]) { - nodconst(&n3, types[TUINT32], nl->type->width*8-1); - gins(a, &n3, &n2); - } else { - nodconst(&n3, nl->type, 0); - gmove(&n3, &n2); + if(!bounded) { + nodconst(&n3, tcount, nl->type->width*8); + gins(optoas(OCMP, tcount), &n1, &n3); + p1 = gbranch(optoas(OLT, tcount), T, +1); + if(op == ORSH && issigned[nl->type->etype]) { + nodconst(&n3, types[TUINT32], nl->type->width*8-1); + gins(a, &n3, &n2); + } else { + nodconst(&n3, nl->type, 0); + gmove(&n3, &n2); + } + patch(p1, pc); } - patch(p1, pc); + gins(a, &n1, &n2); if(oldcx.op != 0) { @@ -968,46 +901,40 @@ ret: /* * generate byte multiply: * res = nl * nr - * no 2-operand byte multiply instruction so have to do - * 16-bit multiply and take bottom half. + * there is no 2-operand byte multiply instruction so + * we do a full-width multiplication and truncate afterwards. */ void cgen_bmul(int op, Node *nl, Node *nr, Node *res) { - Node n1b, n2b, n1w, n2w; + Node n1, n2, n1b, n2b, *tmp; Type *t; int a; - if(nl->ullman >= nr->ullman) { - regalloc(&n1b, nl->type, res); - cgen(nl, &n1b); - regalloc(&n2b, nr->type, N); - cgen(nr, &n2b); - } else { - regalloc(&n2b, nr->type, N); - cgen(nr, &n2b); - regalloc(&n1b, nl->type, res); - cgen(nl, &n1b); + // largest ullman on left. + if(nl->ullman < nr->ullman) { + tmp = nl; + nl = nr; + nr = tmp; } - // copy from byte to short registers - t = types[TUINT16]; - if(issigned[nl->type->etype]) - t = types[TINT16]; - - regalloc(&n2w, t, &n2b); - cgen(&n2b, &n2w); - - regalloc(&n1w, t, &n1b); - cgen(&n1b, &n1w); + // generate operands in "8-bit" registers. + regalloc(&n1b, nl->type, res); + cgen(nl, &n1b); + regalloc(&n2b, nr->type, N); + cgen(nr, &n2b); + // perform full-width multiplication. + t = types[TUINT64]; + if(issigned[nl->type->etype]) + t = types[TINT64]; + nodreg(&n1, t, n1b.val.u.reg); + nodreg(&n2, t, n2b.val.u.reg); a = optoas(op, t); - gins(a, &n2w, &n1w); - cgen(&n1w, &n1b); - cgen(&n1b, res); + gins(a, &n2, &n1); - regfree(&n1w); - regfree(&n2w); + // truncate. + gmove(&n1, res); regfree(&n1b); regfree(&n2b); } @@ -1024,9 +951,9 @@ clearfat(Node *nl) w = nl->type->width; - if(w == 16) - if(componentgen(N, nl)) - return; + // Avoid taking the address for simple enough types. + if(componentgen(N, nl)) + return; c = w % 8; // bytes q = w / 8; // quads @@ -1060,366 +987,3 @@ clearfat(Node *nl) restx(&n1, &oldn1); restx(&ax, &oldax); } - -static int -regcmp(const void *va, const void *vb) -{ - Node *ra, *rb; - - ra = (Node*)va; - rb = (Node*)vb; - return ra->local - rb->local; -} - -static Prog* throwpc; - -void -getargs(NodeList *nn, Node *reg, int n) -{ - NodeList *l; - int i; - - throwpc = nil; - - l = nn; - for(i=0; i<n; i++) { - if(!smallintconst(l->n->right) && !isslice(l->n->right->type)) { - regalloc(reg+i, l->n->right->type, N); - cgen(l->n->right, reg+i); - } else - reg[i] = *l->n->right; - if(reg[i].local != 0) - yyerror("local used"); - reg[i].local = l->n->left->xoffset; - l = l->next; - } - qsort((void*)reg, n, sizeof(*reg), regcmp); - for(i=0; i<n; i++) - reg[i].local = 0; -} - -void -cmpandthrow(Node *nl, Node *nr) -{ - vlong cl; - Prog *p1; - int op; - Node *c; - Type *t; - Node n1; - - if(nl->op == OCONV && is64(nl->type)) - nl = nl->left; - if(nr->op == OCONV && is64(nr->type)) - nr = nr->left; - - op = OLE; - if(smallintconst(nl)) { - cl = mpgetfix(nl->val.u.xval); - if(cl == 0) - return; - if(smallintconst(nr)) - return; - // put the constant on the right - op = brrev(op); - c = nl; - nl = nr; - nr = c; - } - if(is64(nr->type) && smallintconst(nr)) - nr->type = types[TUINT32]; - - n1.op = OXXX; - t = types[TUINT32]; - if(nl->type->width != t->width || nr->type->width != t->width) { - if((is64(nl->type) && nl->op != OLITERAL) || (is64(nr->type) && nr->op != OLITERAL)) - t = types[TUINT64]; - - // Check if we need to use a temporary. - // At least one of the arguments is 32 bits - // (the len or cap) so one temporary suffices. - if(nl->type->width != t->width && nl->op != OLITERAL) { - regalloc(&n1, t, nl); - gmove(nl, &n1); - nl = &n1; - } else if(nr->type->width != t->width && nr->op != OLITERAL) { - regalloc(&n1, t, nr); - gmove(nr, &n1); - nr = &n1; - } - } - gins(optoas(OCMP, t), nl, nr); - if(n1.op != OXXX) - regfree(&n1); - if(throwpc == nil) { - p1 = gbranch(optoas(op, t), T); - throwpc = pc; - ginscall(panicslice, 0); - patch(p1, pc); - } else { - op = brcom(op); - p1 = gbranch(optoas(op, t), T); - patch(p1, throwpc); - } -} - -int -sleasy(Node *n) -{ - if(n->op != ONAME) - return 0; - if(!n->addable) - return 0; - return 1; -} - -// generate inline code for -// slicearray -// sliceslice -// arraytoslice -int -cgen_inline(Node *n, Node *res) -{ - Node nodes[5]; - Node n1, n2, nres, ntemp; - vlong v; - int i, narg, nochk; - - if(n->op != OCALLFUNC) - goto no; - if(!n->left->addable) - goto no; - if(n->left->sym == S) - goto no; - if(n->left->sym->pkg != runtimepkg) - goto no; - if(strcmp(n->left->sym->name, "slicearray") == 0) - goto slicearray; - if(strcmp(n->left->sym->name, "sliceslice") == 0) { - narg = 4; - goto sliceslice; - } - if(strcmp(n->left->sym->name, "sliceslice1") == 0) { - narg = 3; - goto sliceslice; - } - goto no; - -slicearray: - if(!sleasy(res)) - goto no; - getargs(n->list, nodes, 5); - - // if(hb[3] > nel[1]) goto throw - cmpandthrow(&nodes[3], &nodes[1]); - - // if(lb[2] > hb[3]) goto throw - cmpandthrow(&nodes[2], &nodes[3]); - - // len = hb[3] - lb[2] (destroys hb) - n2 = *res; - n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - - if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) { - v = mpgetfix(nodes[3].val.u.xval) - - mpgetfix(nodes[2].val.u.xval); - nodconst(&n1, types[TUINT32], v); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - } else { - regalloc(&n1, types[TUINT32], &nodes[3]); - gmove(&nodes[3], &n1); - if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0) - gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - regfree(&n1); - } - - // cap = nel[1] - lb[2] (destroys nel) - n2 = *res; - n2.xoffset += Array_cap; - n2.type = types[TUINT32]; - - if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) { - v = mpgetfix(nodes[1].val.u.xval) - - mpgetfix(nodes[2].val.u.xval); - nodconst(&n1, types[TUINT32], v); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - } else { - regalloc(&n1, types[TUINT32], &nodes[1]); - gmove(&nodes[1], &n1); - if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0) - gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - regfree(&n1); - } - - // if slice could be too big, dereference to - // catch nil array pointer. - if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) { - n2 = nodes[0]; - n2.xoffset = 0; - n2.op = OINDREG; - n2.type = types[TUINT8]; - gins(ATESTB, nodintconst(0), &n2); - } - - // ary = old[0] + (lb[2] * width[4]) (destroys old) - n2 = *res; - n2.xoffset += Array_array; - n2.type = types[tptr]; - - if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) { - v = mpgetfix(nodes[2].val.u.xval) * - mpgetfix(nodes[4].val.u.xval); - if(v != 0) - ginscon(optoas(OADD, types[tptr]), v, &nodes[0]); - } else { - regalloc(&n1, types[tptr], &nodes[2]); - gmove(&nodes[2], &n1); - if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1) - gins(optoas(OMUL, types[tptr]), &nodes[4], &n1); - gins(optoas(OADD, types[tptr]), &n1, &nodes[0]); - regfree(&n1); - } - gins(optoas(OAS, types[tptr]), &nodes[0], &n2); - - for(i=0; i<5; i++) { - if(nodes[i].op == OREGISTER) - regfree(&nodes[i]); - } - return 1; - -sliceslice: - nochk = n->etype; // skip bounds checking - ntemp.op = OXXX; - if(!sleasy(n->list->n->right)) { - Node *n0; - - n0 = n->list->n->right; - tempname(&ntemp, res->type); - cgen(n0, &ntemp); - n->list->n->right = &ntemp; - getargs(n->list, nodes, narg); - n->list->n->right = n0; - } else - getargs(n->list, nodes, narg); - - nres = *res; // result - if(!sleasy(res)) { - if(ntemp.op == OXXX) - tempname(&ntemp, res->type); - nres = ntemp; - } - - if(narg == 3) { // old[lb:] - // move width to where it would be for old[lb:hb] - nodes[3] = nodes[2]; - nodes[2].op = OXXX; - - // if(lb[1] > old.nel[0]) goto throw; - n2 = nodes[0]; - n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - if(!nochk) - cmpandthrow(&nodes[1], &n2); - - // ret.nel = old.nel[0]-lb[1]; - n2 = nodes[0]; - n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - - regalloc(&n1, types[TUINT32], N); - gins(optoas(OAS, types[TUINT32]), &n2, &n1); - if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) - gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); - - n2 = nres; - n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - regfree(&n1); - } else { // old[lb:hb] - n2 = nodes[0]; - n2.xoffset += Array_cap; - n2.type = types[TUINT32]; - if(!nochk) { - // if(hb[2] > old.cap[0]) goto throw; - cmpandthrow(&nodes[2], &n2); - // if(lb[1] > hb[2]) goto throw; - cmpandthrow(&nodes[1], &nodes[2]); - } - // ret.len = hb[2]-lb[1]; (destroys hb[2]) - n2 = nres; - n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - - if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) { - v = mpgetfix(nodes[2].val.u.xval) - - mpgetfix(nodes[1].val.u.xval); - nodconst(&n1, types[TUINT32], v); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - } else { - regalloc(&n1, types[TUINT32], &nodes[2]); - gmove(&nodes[2], &n1); - if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) - gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - regfree(&n1); - } - } - - // ret.cap = old.cap[0]-lb[1]; (uses hb[2]) - n2 = nodes[0]; - n2.xoffset += Array_cap; - n2.type = types[TUINT32]; - - regalloc(&n1, types[TUINT32], &nodes[2]); - gins(optoas(OAS, types[TUINT32]), &n2, &n1); - if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0) - gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1); - - n2 = nres; - n2.xoffset += Array_cap; - n2.type = types[TUINT32]; - - gins(optoas(OAS, types[TUINT32]), &n1, &n2); - regfree(&n1); - - // ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1]) - n2 = nodes[0]; - n2.xoffset += Array_array; - n2.type = types[tptr]; - regalloc(&n1, types[tptr], &nodes[1]); - if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) { - gins(optoas(OAS, types[tptr]), &n2, &n1); - v = mpgetfix(nodes[1].val.u.xval) * - mpgetfix(nodes[3].val.u.xval); - if(v != 0) - ginscon(optoas(OADD, types[tptr]), v, &n1); - } else { - gmove(&nodes[1], &n1); - if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1) - gins(optoas(OMUL, types[tptr]), &nodes[3], &n1); - gins(optoas(OADD, types[tptr]), &n2, &n1); - } - - n2 = nres; - n2.xoffset += Array_array; - n2.type = types[tptr]; - gins(optoas(OAS, types[tptr]), &n1, &n2); - regfree(&n1); - - for(i=0; i<4; i++) { - if(nodes[i].op == OREGISTER) - regfree(&nodes[i]); - } - - if(!sleasy(res)) { - cgen(&nres, res); - } - return 1; - -no: - return 0; -} diff --git a/src/cmd/6g/gobj.c b/src/cmd/6g/gobj.c index 8c9208374..508a3548f 100644 --- a/src/cmd/6g/gobj.c +++ b/src/cmd/6g/gobj.c @@ -94,9 +94,9 @@ zaddr(Biobuf *b, Addr *a, int s, int gotype) switch(a->type) { case D_BRANCH: - if(a->branch == nil) + if(a->u.branch == nil) fatal("unpatched branch"); - a->offset = a->branch->loc; + a->offset = a->u.branch->loc; default: t |= T_TYPE; @@ -139,7 +139,7 @@ zaddr(Biobuf *b, Addr *a, int s, int gotype) if(t & T_SYM) /* implies sym */ Bputc(b, s); if(t & T_FCONST) { - ieeedtod(&e, a->dval); + ieeedtod(&e, a->u.dval); l = e; Bputc(b, l); Bputc(b, l>>8); @@ -153,7 +153,7 @@ zaddr(Biobuf *b, Addr *a, int s, int gotype) return; } if(t & T_SCONST) { - n = a->sval; + n = a->u.sval; for(i=0; i<NSNAME; i++) { Bputc(b, *n); n++; @@ -295,7 +295,7 @@ dsname(Sym *s, int off, char *t, int n) p->to.type = D_SCONST; p->to.index = D_NONE; - memmove(p->to.sval, t, n); + memmove(p->to.u.sval, t, n); return off + n; } @@ -312,8 +312,8 @@ datastring(char *s, int len, Addr *a) a->type = D_EXTERN; a->sym = sym; a->node = sym->def; - a->offset = widthptr+4; // skip header - a->etype = TINT32; + a->offset = widthptr+widthint; // skip header + a->etype = simtype[TINT]; } /* @@ -324,7 +324,7 @@ void datagostring(Strlit *sval, Addr *a) { Sym *sym; - + sym = stringsym(sval->s, sval->len); a->type = D_EXTERN; a->sym = sym; @@ -364,13 +364,13 @@ gdatacomplex(Node *nam, Mpcplx *cval) p = gins(ADATA, nam, N); p->from.scale = w; p->to.type = D_FCONST; - p->to.dval = mpgetflt(&cval->real); + p->to.u.dval = mpgetflt(&cval->real); p = gins(ADATA, nam, N); p->from.scale = w; p->from.offset += w; p->to.type = D_FCONST; - p->to.dval = mpgetflt(&cval->imag); + p->to.u.dval = mpgetflt(&cval->imag); } void @@ -386,10 +386,10 @@ gdatastring(Node *nam, Strlit *sval) p->to.type = D_ADDR; //print("%P\n", p); - nodconst(&nod1, types[TINT32], sval->len); + nodconst(&nod1, types[TINT], sval->len); p = gins(ADATA, nam, &nod1); - p->from.scale = types[TINT32]->width; - p->from.offset += types[tptr]->width; + p->from.scale = widthint; + p->from.offset += widthptr; } int @@ -408,7 +408,7 @@ dstringptr(Sym *s, int off, char *str) datastring(str, strlen(str)+1, &p->to); p->to.index = p->to.type; p->to.type = D_ADDR; - p->to.etype = TINT32; + p->to.etype = simtype[TINT]; off += widthptr; return off; @@ -432,7 +432,7 @@ dgostrlitptr(Sym *s, int off, Strlit *lit) datagostring(lit, &p->to); p->to.index = p->to.type; p->to.type = D_ADDR; - p->to.etype = TINT32; + p->to.etype = simtype[TINT]; off += widthptr; return off; diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c index ededcf673..fc5407a1f 100644 --- a/src/cmd/6g/gsubr.c +++ b/src/cmd/6g/gsubr.c @@ -103,9 +103,13 @@ dumpdata(void) /* * generate a branch. * t is ignored. + * likely values are for branch prediction: + * -1 unlikely + * 0 no opinion + * +1 likely */ Prog* -gbranch(int as, Type *t) +gbranch(int as, Type *t, int likely) { Prog *p; @@ -113,7 +117,11 @@ gbranch(int as, Type *t) p = prog(as); p->to.type = D_BRANCH; - p->to.branch = P; + p->to.u.branch = P; + if(as != AJMP && likely != 0) { + p->from.type = D_CONST; + p->from.offset = likely > 0; + } return p; } @@ -125,7 +133,7 @@ patch(Prog *p, Prog *to) { if(p->to.type != D_BRANCH) fatal("patch: not a branch"); - p->to.branch = to; + p->to.u.branch = to; p->to.offset = to->loc; } @@ -136,8 +144,8 @@ unpatch(Prog *p) if(p->to.type != D_BRANCH) fatal("unpatch: not a branch"); - q = p->to.branch; - p->to.branch = P; + q = p->to.u.branch; + p->to.u.branch = P; p->to.offset = 0; return q; } @@ -165,44 +173,6 @@ newplist(void) } void -clearstk(void) -{ - Plist *pl; - Prog *p1, *p2; - Node sp, di, cx, con, ax; - - if((uint32)plast->firstpc->to.offset <= 0) - return; - - // reestablish context for inserting code - // at beginning of function. - pl = plast; - p1 = pl->firstpc; - p2 = p1->link; - pc = mal(sizeof(*pc)); - clearp(pc); - p1->link = pc; - - // zero stack frame - nodreg(&sp, types[tptr], D_SP); - nodreg(&di, types[tptr], D_DI); - nodreg(&cx, types[TUINT64], D_CX); - nodconst(&con, types[TUINT64], (uint32)p1->to.offset / widthptr); - gins(ACLD, N, N); - gins(AMOVQ, &sp, &di); - gins(AMOVQ, &con, &cx); - nodconst(&con, types[TUINT64], 0); - nodreg(&ax, types[TUINT64], D_AX); - gins(AMOVQ, &con, &ax); - gins(AREP, N, N); - gins(ASTOSQ, N, N); - - // continue with original code. - gins(ANOP, N, N)->link = p2; - pc = P; -} - -void gused(Node *n) { gins(ANOP, n, N); // used @@ -213,22 +183,23 @@ gjmp(Prog *to) { Prog *p; - p = gbranch(AJMP, T); + p = gbranch(AJMP, T, 0); if(to != P) patch(p, to); return p; } void -ggloblnod(Node *nam, int32 width) +ggloblnod(Node *nam) { Prog *p; p = gins(AGLOBL, nam, N); p->lineno = nam->lineno; + p->from.gotype = ngotype(nam); p->to.sym = S; p->to.type = D_CONST; - p->to.offset = width; + p->to.offset = nam->type->width; if(nam->readonly) p->from.scale = RODATA; if(nam->type != T && !haspointers(nam->type)) @@ -236,7 +207,18 @@ ggloblnod(Node *nam, int32 width) } void -ggloblsym(Sym *s, int32 width, int dupok) +gtrack(Sym *s) +{ + Prog *p; + + p = gins(AUSEFIELD, N, N); + p->from.type = D_EXTERN; + p->from.index = D_NONE; + p->from.sym = s; +} + +void +ggloblsym(Sym *s, int32 width, int dupok, int rodata) { Prog *p; @@ -248,8 +230,9 @@ ggloblsym(Sym *s, int32 width, int dupok) p->to.index = D_NONE; p->to.offset = width; if(dupok) - p->from.scale = DUPOK; - p->from.scale |= RODATA; + p->from.scale |= DUPOK; + if(rodata) + p->from.scale |= RODATA; } int @@ -272,11 +255,12 @@ isfat(Type *t) * call afunclit to fix up the argument. */ void -afunclit(Addr *a) +afunclit(Addr *a, Node *n) { if(a->type == D_ADDR && a->index == D_EXTERN) { a->type = D_EXTERN; a->index = D_NONE; + a->sym = n->sym; } } @@ -300,7 +284,7 @@ ginit(void) reg[i] = 1; for(i=D_AX; i<=D_R15; i++) reg[i] = 0; - for(i=D_X0; i<=D_X7; i++) + for(i=D_X0; i<=D_X15; i++) reg[i] = 0; for(i=0; i<nelem(resvd); i++) @@ -318,7 +302,7 @@ gclean(void) for(i=D_AX; i<=D_R15; i++) if(reg[i]) yyerror("reg %R left allocated\n", i); - for(i=D_X0; i<=D_X7; i++) + for(i=D_X0; i<=D_X15; i++) if(reg[i]) yyerror("reg %R left allocated\n", i); } @@ -388,10 +372,10 @@ regalloc(Node *n, Type *t, Node *o) case TFLOAT64: if(o != N && o->op == OREGISTER) { i = o->val.u.reg; - if(i >= D_X0 && i <= D_X7) + if(i >= D_X0 && i <= D_X15) goto out; } - for(i=D_X0; i<=D_X7; i++) + for(i=D_X0; i<=D_X15; i++) if(reg[i] == 0) goto out; fatal("out of floating registers"); @@ -572,6 +556,10 @@ ismem(Node *n) case ONAME: case OPARAM: return 1; + case OADDR: + if(flag_largemodel) + return 1; + break; } return 0; } @@ -616,7 +604,7 @@ gmove(Node *f, Node *t) Prog *p1, *p2; if(debug['M']) - print("gmove %N -> %N\n", f, t); + print("gmove %lN -> %lN\n", f, t); ft = simsimtype(f->type); tt = simsimtype(t->type); @@ -706,11 +694,14 @@ gmove(Node *f, Node *t) case CASE(TINT32, TUINT32): case CASE(TUINT32, TINT32): case CASE(TUINT32, TUINT32): + a = AMOVL; + break; + case CASE(TINT64, TINT32): // truncate case CASE(TUINT64, TINT32): case CASE(TINT64, TUINT32): case CASE(TUINT64, TUINT32): - a = AMOVL; + a = AMOVQL; break; case CASE(TINT64, TINT64): // same size @@ -822,9 +813,9 @@ gmove(Node *f, Node *t) // algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. - a = ACVTSS2SQ; + a = ACVTTSS2SQ; if(ft == TFLOAT64) - a = ACVTSD2SQ; + a = ACVTTSD2SQ; bignodes(); regalloc(&r1, types[ft], N); regalloc(&r2, types[tt], t); @@ -832,9 +823,9 @@ gmove(Node *f, Node *t) regalloc(&r4, types[tt], N); gins(optoas(OAS, f->type), f, &r1); gins(optoas(OCMP, f->type), &bigf, &r1); - p1 = gbranch(optoas(OLE, f->type), T); + p1 = gbranch(optoas(OLE, f->type), T, +1); gins(a, &r1, &r2); - p2 = gbranch(AJMP, T); + p2 = gbranch(AJMP, T, 0); patch(p1, pc); gins(optoas(OAS, f->type), &bigf, &r3); gins(optoas(OSUB, f->type), &r3, &r1); @@ -903,9 +894,9 @@ gmove(Node *f, Node *t) regalloc(&r4, f->type, N); gmove(f, &r1); gins(ACMPQ, &r1, &zero); - p1 = gbranch(AJLT, T); + p1 = gbranch(AJLT, T, +1); gins(a, &r1, &r2); - p2 = gbranch(AJMP, T); + p2 = gbranch(AJMP, T, 0); patch(p1, pc); gmove(&r1, &r3); gins(ASHRQ, &one, &r3); @@ -1016,6 +1007,13 @@ gins(int as, Node *f, Node *t) case AMOVSD: if(f != N && t != N && samaddr(f, t)) return nil; + break; + + case ALEAQ: + if(f != N && isconst(f, CTNIL)) { + fatal("gins LEAQ nil %T", f->type); + } + break; } memset(&af, 0, sizeof af); @@ -1047,13 +1045,34 @@ gins(int as, Node *f, Node *t) w = 8; break; } - if(w != 0 && f != N && (af.width > w || at.width > w)) { + if(w != 0 && ((f != N && af.width < w) || (t != N && at.width > w))) { + dump("f", f); + dump("t", t); fatal("bad width: %P (%d, %d)\n", p, af.width, at.width); } return p; } +// Generate an instruction referencing *n +// to force segv on nil pointer dereference. +void +checkref(Node *n) +{ + Node m; + + if(n->type->type->width < unmappedzero) + return; + + regalloc(&m, types[TUINTPTR], n); + cgen(n, &m); + m.xoffset = 0; + m.op = OINDREG; + m.type = types[TUINT8]; + gins(ATESTB, nodintconst(0), &m); + regfree(&m); +} + static void checkoffset(Addr *a, int canemitcode) { @@ -1079,14 +1098,22 @@ checkoffset(Addr *a, int canemitcode) void naddr(Node *n, Addr *a, int canemitcode) { + Prog *p; + a->scale = 0; a->index = D_NONE; a->type = D_NONE; a->gotype = S; a->node = N; + a->width = 0; if(n == N) return; + if(n->type != T && n->type->etype != TIDEAL) { + dowidth(n->type); + a->width = n->type->width; + } + switch(n->op) { default: fatal("naddr: bad %O %D", n->op, a); @@ -1134,15 +1161,27 @@ naddr(Node *n, Addr *a, int canemitcode) a->type = D_PARAM; a->node = n->left->orig; break; + + case OCLOSUREVAR: + if(!canemitcode) + fatal("naddr OCLOSUREVAR cannot emit code"); + p = gins(AMOVQ, N, N); + p->from.type = D_DX+D_INDIR; + p->from.offset = n->xoffset; + p->to.type = D_BX; + a->type = D_BX; + a->sym = S; + break; + + case OCFUNC: + naddr(n->left, a, canemitcode); + a->sym = n->left->sym; + break; case ONAME: a->etype = 0; - a->width = 0; - if(n->type != T) { + if(n->type != T) a->etype = simtype[n->type->etype]; - a->width = n->type->width; - a->gotype = ngotype(n); - } a->offset = n->xoffset; a->sym = n->sym; a->node = n->orig; @@ -1173,6 +1212,8 @@ naddr(Node *n, Addr *a, int canemitcode) case PFUNC: a->index = D_EXTERN; a->type = D_ADDR; + a->width = widthptr; + a->sym = funcsym(a->sym); break; } break; @@ -1184,7 +1225,7 @@ naddr(Node *n, Addr *a, int canemitcode) break; case CTFLT: a->type = D_FCONST; - a->dval = mpgetflt(n->val.u.fval); + a->u.dval = mpgetflt(n->val.u.fval); break; case CTINT: case CTRUNE: @@ -1210,6 +1251,7 @@ naddr(Node *n, Addr *a, int canemitcode) case OADDR: naddr(n->left, a, canemitcode); + a->width = widthptr; if(a->type >= D_INDIR) { a->type -= D_INDIR; break; @@ -1239,9 +1281,9 @@ naddr(Node *n, Addr *a, int canemitcode) naddr(n->left, a, canemitcode); if(a->type == D_CONST && a->offset == 0) break; // len(nil) - a->etype = TUINT32; + a->etype = simtype[TUINT]; a->offset += Array_nel; - a->width = 4; + a->width = widthint; if(a->offset >= unmappedzero && a->offset-Array_nel < unmappedzero) checkoffset(a, canemitcode); break; @@ -1251,9 +1293,9 @@ naddr(Node *n, Addr *a, int canemitcode) naddr(n->left, a, canemitcode); if(a->type == D_CONST && a->offset == 0) break; // cap(nil) - a->etype = TUINT32; + a->etype = simtype[TUINT]; a->offset += Array_cap; - a->width = 4; + a->width = widthint; if(a->offset >= unmappedzero && a->offset-Array_cap < unmappedzero) checkoffset(a, canemitcode); break; @@ -1645,6 +1687,28 @@ optoas(int op, Type *t) a = AXORQ; break; + case CASE(OLROT, TINT8): + case CASE(OLROT, TUINT8): + a = AROLB; + break; + + case CASE(OLROT, TINT16): + case CASE(OLROT, TUINT16): + a = AROLW; + break; + + case CASE(OLROT, TINT32): + case CASE(OLROT, TUINT32): + case CASE(OLROT, TPTR32): + a = AROLL; + break; + + case CASE(OLROT, TINT64): + case CASE(OLROT, TUINT64): + case CASE(OLROT, TPTR64): + a = AROLQ; + break; + case CASE(OLSH, TINT8): case CASE(OLSH, TUINT8): a = ASHLB; @@ -1701,23 +1765,23 @@ optoas(int op, Type *t) a = ASARQ; break; - case CASE(ORRC, TINT8): - case CASE(ORRC, TUINT8): + case CASE(ORROTC, TINT8): + case CASE(ORROTC, TUINT8): a = ARCRB; break; - case CASE(ORRC, TINT16): - case CASE(ORRC, TUINT16): + case CASE(ORROTC, TINT16): + case CASE(ORROTC, TUINT16): a = ARCRW; break; - case CASE(ORRC, TINT32): - case CASE(ORRC, TUINT32): + case CASE(ORROTC, TINT32): + case CASE(ORROTC, TUINT32): a = ARCRL; break; - case CASE(ORRC, TINT64): - case CASE(ORRC, TUINT64): + case CASE(ORROTC, TINT64): + case CASE(ORROTC, TUINT64): a = ARCRQ; break; @@ -1919,6 +1983,9 @@ sudoaddable(int as, Node *n, Addr *a) goto odot; case OINDEX: + return 0; + // disabled: OINDEX case is now covered by agenr + // for a more suitable register allocation pattern. if(n->left->type->etype == TSTRING) return 0; goto oindex; @@ -2053,32 +2120,20 @@ oindex: } // check bounds - if(!debug['B'] && !n->etype) { + if(!debug['B'] && !n->bounded) { // check bounds n4.op = OXXX; - t = types[TUINT32]; + t = types[simtype[TUINT]]; if(o & ODynam) { if(o & OAddable) { n2 = *l; n2.xoffset += Array_nel; - n2.type = types[TUINT32]; - if(is64(r->type)) { - t = types[TUINT64]; - regalloc(&n4, t, N); - gmove(&n2, &n4); - n2 = n4; - } + n2.type = types[simtype[TUINT]]; } else { n2 = *reg; n2.xoffset = Array_nel; n2.op = OINDREG; - n2.type = types[TUINT32]; - if(is64(r->type)) { - t = types[TUINT64]; - regalloc(&n4, t, N); - gmove(&n2, &n4); - n2 = n4; - } + n2.type = types[simtype[TUINT]]; } } else { if(is64(r->type)) @@ -2086,10 +2141,10 @@ oindex: nodconst(&n2, types[TUINT64], l->type->bound); } gins(optoas(OCMP, t), reg1, &n2); - p1 = gbranch(optoas(OLT, t), T); + p1 = gbranch(optoas(OLT, t), T, +1); if(n4.op != OXXX) regfree(&n4); - ginscall(panicindex, 0); + ginscall(panicindex, -1); patch(p1, pc); } @@ -2140,19 +2195,19 @@ oindex_const: reg->op = OEMPTY; reg1->op = OEMPTY; - regalloc(reg, types[tptr], N); - agen(l, reg); - if(o & ODynam) { - if(!debug['B'] && !n->etype) { + regalloc(reg, types[tptr], N); + agen(l, reg); + + if(!debug['B'] && !n->bounded) { n1 = *reg; n1.op = OINDREG; n1.type = types[tptr]; n1.xoffset = Array_nel; nodconst(&n2, types[TUINT64], v); - gins(optoas(OCMP, types[TUINT32]), &n1, &n2); - p1 = gbranch(optoas(OGT, types[TUINT32]), T); - ginscall(panicindex, 0); + gins(optoas(OCMP, types[simtype[TUINT]]), &n1, &n2); + p1 = gbranch(optoas(OGT, types[simtype[TUINT]]), T, +1); + ginscall(panicindex, -1); patch(p1, pc); } @@ -2162,14 +2217,24 @@ oindex_const: n1.xoffset = Array_array; gmove(&n1, reg); + n2 = *reg; + n2.op = OINDREG; + n2.xoffset = v*w; + a->type = D_NONE; + a->index = D_NONE; + naddr(&n2, a, 1); + goto yes; } - - n2 = *reg; - n2.op = OINDREG; - n2.xoffset = v*w; + + igen(l, &n1, N); + if(n1.op == OINDREG) { + *reg = n1; + reg->op = OREGISTER; + } + n1.xoffset += v*w; a->type = D_NONE; - a->index = D_NONE; - naddr(&n2, a, 1); + a->index= D_NONE; + naddr(&n1, a, 1); goto yes; oindex_const_sudo: @@ -2180,13 +2245,13 @@ oindex_const_sudo: } // slice indexed by a constant - if(!debug['B'] && !n->etype) { + if(!debug['B'] && !n->bounded) { a->offset += Array_nel; nodconst(&n2, types[TUINT64], v); - p1 = gins(optoas(OCMP, types[TUINT32]), N, &n2); + p1 = gins(optoas(OCMP, types[simtype[TUINT]]), N, &n2); p1->from = *a; - p1 = gbranch(optoas(OGT, types[TUINT32]), T); - ginscall(panicindex, 0); + p1 = gbranch(optoas(OGT, types[simtype[TUINT]]), T, +1); + ginscall(panicindex, -1); patch(p1, pc); a->offset -= Array_nel; } diff --git a/src/cmd/6g/list.c b/src/cmd/6g/list.c index ad63f7d29..9d27a6a09 100644 --- a/src/cmd/6g/list.c +++ b/src/cmd/6g/list.c @@ -107,10 +107,10 @@ Dconv(Fmt *fp) break; case D_BRANCH: - if(a->branch == nil) + if(a->u.branch == nil) snprint(str, sizeof(str), "<nil>"); else - snprint(str, sizeof(str), "%d", a->branch->loc); + snprint(str, sizeof(str), "%d", a->u.branch->loc); break; case D_EXTERN: @@ -140,11 +140,11 @@ Dconv(Fmt *fp) break; case D_FCONST: - snprint(str, sizeof(str), "$(%.17e)", a->dval); + snprint(str, sizeof(str), "$(%.17e)", a->u.dval); break; case D_SCONST: - snprint(str, sizeof(str), "$\"%Y\"", a->sval); + snprint(str, sizeof(str), "$\"%Y\"", a->u.sval); break; case D_ADDR: @@ -161,7 +161,10 @@ brk: strcat(str, s); } conv: - return fmtstrcpy(fp, str); + fmtstrcpy(fp, str); + if(a->gotype) + fmtprint(fp, "{%s}", a->gotype->name); + return 0; } static char* regstr[] = diff --git a/src/cmd/6g/opt.h b/src/cmd/6g/opt.h index 9a8866b8d..9b0ea1b5a 100644 --- a/src/cmd/6g/opt.h +++ b/src/cmd/6g/opt.h @@ -34,8 +34,6 @@ #define D_HI D_NONE #define D_LO D_NONE -#define isregtype(t) ((t)>= D_AX && (t)<=D_R15) - #define BLOAD(r) band(bnot(r->refbehind), r->refahead) #define BSTORE(r) band(bnot(r->calbehind), r->calahead) #define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z]) @@ -49,12 +47,16 @@ typedef struct Reg Reg; typedef struct Rgn Rgn; +// A Reg is a wrapper around a single Prog (one instruction) that holds +// register optimization information while the optimizer runs. +// r->prog is the instruction. +// r->prog->regp points back to r. struct Reg { - Bits set; - Bits use1; - Bits use2; + Bits set; // variables written by this instruction. + Bits use1; // variables read by prog->from. + Bits use2; // variables read by prog->to. Bits refbehind; Bits refahead; @@ -70,13 +72,13 @@ struct Reg uint16 loop; // x5 for every loop uchar refset; // diagnostic generated - Reg* p1; - Reg* p2; + Reg* p1; // predecessors of this instruction: p1, + Reg* p2; // and then p2 linked though p2link. Reg* p2link; - Reg* s1; + Reg* s1; // successors of this instruction (at most two: s1 and s2). Reg* s2; - Reg* link; - Prog* prog; + Reg* link; // next instruction in function code + Prog* prog; // actual instruction }; #define R ((Reg*)0) diff --git a/src/cmd/6g/peep.c b/src/cmd/6g/peep.c index 3710033b2..569655786 100644 --- a/src/cmd/6g/peep.c +++ b/src/cmd/6g/peep.c @@ -34,6 +34,10 @@ #include "opt.h" static void conprop(Reg *r); +static void elimshortmov(Reg *r); +static int prevl(Reg *r, int reg); +static void pushback(Reg *r); +static int regconsttyp(Adr*); // do we need the carry bit static int @@ -45,11 +49,17 @@ needc(Prog *p) case AADCQ: case ASBBL: case ASBBQ: + case ARCRB: + case ARCRW: case ARCRL: case ARCRQ: return 1; + case AADDB: + case AADDW: case AADDL: case AADDQ: + case ASUBB: + case ASUBW: case ASUBL: case ASUBQ: case AJMP: @@ -122,9 +132,14 @@ peep(void) case AGLOBL: case ANAME: case ASIGNAME: + case ALOCALS: + case ATYPE: p = p->link; } } + + // byte, word arithmetic elimination. + elimshortmov(r); // constant propagation // find MOV $con,R followed by @@ -200,6 +215,7 @@ loop1: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: + case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { @@ -272,6 +288,115 @@ loop1: } if(t) goto loop1; + + // MOVLQZX removal. + // The MOVLQZX exists to avoid being confused for a + // MOVL that is just copying 32-bit data around during + // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 + // if it is dominated by an earlier ADDL/MOVL/etc into R1 that + // will have already cleared the high bits. + // + // MOVSD removal. + // We never use packed registers, so a MOVSD between registers + // can be replaced by MOVAPD, which moves the pair of float64s + // instead of just the lower one. We only use the lower one, but + // the processor can do better if we do moves using both. + for(r=firstr; r!=R; r=r->link) { + p = r->prog; + if(p->as == AMOVLQZX) + if(regtyp(&p->from)) + if(p->from.type == p->to.type) + if(prevl(r, p->from.type)) + excise(r); + + if(p->as == AMOVSD) + if(regtyp(&p->from)) + if(regtyp(&p->to)) + p->as = AMOVAPD; + } + + // load pipelining + // push any load from memory as early as possible + // to give it time to complete before use. + for(r=firstr; r!=R; r=r->link) { + p = r->prog; + switch(p->as) { + case AMOVB: + case AMOVW: + case AMOVL: + case AMOVQ: + case AMOVLQZX: + if(regtyp(&p->to) && !regconsttyp(&p->from)) + pushback(r); + } + } +} + +static void +pushback(Reg *r0) +{ + Reg *r, *b; + Prog *p0, *p, t; + + b = R; + p0 = r0->prog; + for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) { + p = r->prog; + if(p->as != ANOP) { + if(!regconsttyp(&p->from) || !regtyp(&p->to)) + break; + if(copyu(p, &p0->to, A) || copyu(p0, &p->to, A)) + break; + } + if(p->as == ACALL) + break; + b = r; + } + + if(b == R) { + if(debug['v']) { + print("no pushback: %P\n", r0->prog); + if(r) + print("\t%P [%d]\n", r->prog, uniqs(r)!=R); + } + return; + } + + if(debug['v']) { + print("pushback\n"); + for(r=b;; r=r->link) { + print("\t%P\n", r->prog); + if(r == r0) + break; + } + } + + t = *r0->prog; + for(r=uniqp(r0);; r=uniqp(r)) { + p0 = r->link->prog; + p = r->prog; + p0->as = p->as; + p0->lineno = p->lineno; + p0->from = p->from; + p0->to = p->to; + + if(r == b) + break; + } + p0 = r->prog; + p0->as = t.as; + p0->lineno = t.lineno; + p0->from = t.from; + p0->to = t.to; + + if(debug['v']) { + print("\tafter\n"); + for(r=b;; r=r->link) { + print("\t%P\n", r->prog); + if(r == r0) + break; + } + } } void @@ -335,6 +460,155 @@ regtyp(Adr *a) return 0; } +// movb elimination. +// movb is simulated by the linker +// when a register other than ax, bx, cx, dx +// is used, so rewrite to other instructions +// when possible. a movb into a register +// can smash the entire 32-bit register without +// causing any trouble. +static void +elimshortmov(Reg *r) +{ + Prog *p; + + for(r=firstr; r!=R; r=r->link) { + p = r->prog; + if(regtyp(&p->to)) { + switch(p->as) { + case AINCB: + case AINCW: + p->as = AINCQ; + break; + case ADECB: + case ADECW: + p->as = ADECQ; + break; + case ANEGB: + case ANEGW: + p->as = ANEGQ; + break; + case ANOTB: + case ANOTW: + p->as = ANOTQ; + break; + } + if(regtyp(&p->from) || p->from.type == D_CONST) { + // move or artihmetic into partial register. + // from another register or constant can be movl. + // we don't switch to 64-bit arithmetic if it can + // change how the carry bit is set (and the carry bit is needed). + switch(p->as) { + case AMOVB: + case AMOVW: + p->as = AMOVQ; + break; + case AADDB: + case AADDW: + if(!needc(p->link)) + p->as = AADDQ; + break; + case ASUBB: + case ASUBW: + if(!needc(p->link)) + p->as = ASUBQ; + break; + case AMULB: + case AMULW: + p->as = AMULQ; + break; + case AIMULB: + case AIMULW: + p->as = AIMULQ; + break; + case AANDB: + case AANDW: + p->as = AANDQ; + break; + case AORB: + case AORW: + p->as = AORQ; + break; + case AXORB: + case AXORW: + p->as = AXORQ; + break; + case ASHLB: + case ASHLW: + p->as = ASHLQ; + break; + } + } else if(p->from.type >= D_NONE) { + // explicit zero extension, but don't + // do that if source is a byte register + // (only AH can occur and it's forbidden). + switch(p->as) { + case AMOVB: + p->as = AMOVBQZX; + break; + case AMOVW: + p->as = AMOVWQZX; + break; + } + } + } + } +} + +int +regconsttyp(Adr *a) +{ + if(regtyp(a)) + return 1; + switch(a->type) { + case D_CONST: + case D_FCONST: + case D_SCONST: + case D_ADDR: + return 1; + } + return 0; +} + +// is reg guaranteed to be truncated by a previous L instruction? +static int +prevl(Reg *r0, int reg) +{ + Prog *p; + Reg *r; + + for(r=uniqp(r0); r!=R; r=uniqp(r)) { + p = r->prog; + if(p->to.type == reg) { + switch(p->as) { + case AADDL: + case AANDL: + case ADECL: + case ADIVL: + case AIDIVL: + case AIMULL: + case AINCL: + case AMOVL: + case AMULL: + case AORL: + case ARCLL: + case ARCRL: + case AROLL: + case ARORL: + case ASALL: + case ASARL: + case ASHLL: + case ASHRL: + case ASUBL: + case AXORL: + return 1; + } + return 0; + } + } + return 0; +} + /* * the idea is to substitute * one register for another @@ -357,19 +631,34 @@ subprop(Reg *r0) Reg *r; int t; + if(debug['P'] && debug['v']) + print("subprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; - if(!regtyp(v1)) + if(!regtyp(v1)) { + if(debug['P'] && debug['v']) + print("\tnot regtype %D; return 0\n", v1); return 0; + } v2 = &p->to; - if(!regtyp(v2)) + if(!regtyp(v2)) { + if(debug['P'] && debug['v']) + print("\tnot regtype %D; return 0\n", v2); return 0; + } for(r=uniqp(r0); r!=R; r=uniqp(r)) { - if(uniqs(r) == R) + if(debug['P'] && debug['v']) + print("\t? %P\n", r->prog); + if(uniqs(r) == R) { + if(debug['P'] && debug['v']) + print("\tno unique successor\n"); break; + } p = r->prog; switch(p->as) { case ACALL: + if(debug['P'] && debug['v']) + print("\tfound %P; return 0\n", p); return 0; case AIMULL: @@ -377,20 +666,7 @@ subprop(Reg *r0) case AIMULW: if(p->to.type != D_NONE) break; - - case ADIVB: - case ADIVL: - case ADIVQ: - case ADIVW: - case AIDIVB: - case AIDIVL: - case AIDIVQ: - case AIDIVW: - case AIMULB: - case AMULB: - case AMULL: - case AMULQ: - case AMULW: + goto giveup; case ARCLB: case ARCLL: @@ -424,6 +700,23 @@ subprop(Reg *r0) case ASHRL: case ASHRQ: case ASHRW: + if(p->from.type == D_CONST) + break; + goto giveup; + + case ADIVB: + case ADIVL: + case ADIVQ: + case ADIVW: + case AIDIVB: + case AIDIVL: + case AIDIVQ: + case AIDIVW: + case AIMULB: + case AMULB: + case AMULL: + case AMULQ: + case AMULW: case AREP: case AREPN: @@ -438,21 +731,34 @@ subprop(Reg *r0) case AMOVSB: case AMOVSL: case AMOVSQ: + giveup: + if(debug['P'] && debug['v']) + print("\tfound %P; return 0\n", p); return 0; case AMOVL: case AMOVQ: + case AMOVSS: + case AMOVSD: if(p->to.type == v1->type) goto gotit; break; } if(copyau(&p->from, v2) || - copyau(&p->to, v2)) + copyau(&p->to, v2)) { + if(debug['P'] && debug['v']) + print("\tcopyau %D failed\n", v2); break; + } if(copysub(&p->from, v1, v2, 0) || - copysub(&p->to, v1, v2, 0)) + copysub(&p->to, v1, v2, 0)) { + if(debug['P'] && debug['v']) + print("\tcopysub failed\n"); break; + } } + if(debug['P'] && debug['v']) + print("\tran off end; return 0\n", p); return 0; gotit: @@ -497,6 +803,8 @@ copyprop(Reg *r0) Adr *v1, *v2; Reg *r; + if(debug['P'] && debug['v']) + print("copyprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; v2 = &p->to; @@ -636,6 +944,7 @@ copyu(Prog *p, Adr *v, Adr *s) case AMOVWLZX: case AMOVWQSX: case AMOVWQZX: + case AMOVQL: case AMOVSS: case AMOVSD: @@ -853,8 +1162,6 @@ copyu(Prog *p, Adr *v, Adr *s) return 0; case ARET: /* funny */ - if(v->type == REGRET || v->type == FREGRET) - return 2; if(s != A) return 1; return 3; @@ -864,6 +1171,8 @@ copyu(Prog *p, Adr *v, Adr *s) return 2; if(REGARG >= 0 && v->type == (uchar)REGARG) return 2; + if(v->type == p->from.type) + return 2; if(s != A) { if(copysub(&p->to, v, s, 1)) @@ -907,13 +1216,22 @@ int copyau(Adr *a, Adr *v) { - if(copyas(a, v)) + if(copyas(a, v)) { + if(debug['P'] && debug['v']) + print("\tcopyau: copyas returned 1\n"); return 1; + } if(regtyp(v)) { - if(a->type-D_INDIR == v->type) + if(a->type-D_INDIR == v->type) { + if(debug['P'] && debug['v']) + print("\tcopyau: found indir use - return 1\n"); return 1; - if(a->index == v->type) + } + if(a->index == v->type) { + if(debug['P'] && debug['v']) + print("\tcopyau: found index use - return 1\n"); return 1; + } } return 0; } @@ -990,7 +1308,7 @@ loop: if(p->from.node == p0->from.node) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) - if(p->from.dval == p0->from.dval) + if(p->from.u.vval == p0->from.u.vval) if(p->from.index == p0->from.index) { excise(r); goto loop; diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 049c63f17..c56d71678 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -151,6 +151,8 @@ static char* regname[] = { ".X15", }; +static Node* regnodes[NREGVAR]; + static void fixjmp(Prog*); void @@ -164,7 +166,7 @@ regopt(Prog *firstp) if(first) { fmtinstall('Q', Qconv); - exregoffset = D_R13; // R14,R15 are external + exregoffset = D_R15; first = 0; } @@ -191,8 +193,11 @@ regopt(Prog *firstp) */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); - for(i=0; i<NREGVAR; i++) - var[i].node = newname(lookup(regname[i])); + for(i=0; i<NREGVAR; i++) { + if(regnodes[i] == N) + regnodes[i] = newname(lookup(regname[i])); + var[i].node = regnodes[i]; + } regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { @@ -219,6 +224,8 @@ regopt(Prog *firstp) case AGLOBL: case ANAME: case ASIGNAME: + case ALOCALS: + case ATYPE: continue; } r = rega(); @@ -247,6 +254,20 @@ regopt(Prog *firstp) } } + // Avoid making variables for direct-called functions. + if(p->as == ACALL && p->to.type == D_EXTERN) + continue; + + // Addressing makes some registers used. + if(p->from.type >= D_INDIR) + r->use1.b[0] |= RtoB(p->from.type-D_INDIR); + if(p->from.index != D_NONE) + r->use1.b[0] |= RtoB(p->from.index); + if(p->to.type >= D_INDIR) + r->use2.b[0] |= RtoB(p->to.type-D_INDIR); + if(p->to.index != D_NONE) + r->use2.b[0] |= RtoB(p->to.index); + bit = mkvar(r, &p->from); if(bany(&bit)) switch(p->as) { @@ -326,6 +347,7 @@ regopt(Prog *firstp) case AMOVWLZX: case AMOVWQSX: case AMOVWQZX: + case AMOVQL: case APOPQ: case AMOVSS: @@ -578,8 +600,9 @@ regopt(Prog *firstp) addrs.b[z] |= bit.b[z]; } -// print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n", -// i, v->addr, v->etype, v->width, v->sym, v->offset); + if(debug['R'] && debug['v']) + print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", + i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) @@ -593,9 +616,9 @@ regopt(Prog *firstp) for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->to.type == D_BRANCH) { - if(p->to.branch == P) + if(p->to.u.branch == P) fatal("pnil %P", p); - r1 = p->to.branch->reg; + r1 = p->to.u.branch->reg; if(r1 == R) fatal("rnil %P", p); if(r1 == r) { @@ -742,6 +765,9 @@ loop2: brk: qsort(region, nregion, sizeof(region[0]), rcmp); + if(debug['R'] && debug['v']) + dumpit("pass5", firstr); + /* * pass 6 * determine used registers (paint2) @@ -752,8 +778,16 @@ brk: bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); - if(rgp->regno != 0) + if(rgp->regno != 0) { + if(debug['R'] && debug['v']) { + Var *v; + + v = var + rgp->varno; + print("registerize %N+%d (bit=%2d et=%2E) in %R\n", + v->node, v->offset, rgp->varno, v->etype, rgp->regno); + } paint3(rgp->enter, rgp->varno, vreg, rgp->regno); + } rgp++; } @@ -776,8 +810,8 @@ brk: while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) - while(p->to.branch != P && p->to.branch->as == ANOP) - p->to.branch = p->to.branch->link; + while(p->to.u.branch != P && p->to.u.branch->as == ANOP) + p->to.u.branch = p->to.u.branch->link; } if(lastr != R) { @@ -838,7 +872,6 @@ addmove(Reg *r, int bn, int rn, int f) a->offset = v->offset; a->etype = v->etype; a->type = v->name; - a->gotype = v->gotype; a->node = v->node; a->sym = v->node->sym; @@ -847,7 +880,7 @@ addmove(Reg *r, int bn, int rn, int f) p1->as = AMOVL; switch(v->etype) { default: - fatal("unknown type\n"); + fatal("unknown type %E", v->etype); case TINT8: case TUINT8: case TBOOL: @@ -932,7 +965,8 @@ Bits mkvar(Reg *r, Adr *a) { Var *v; - int i, t, n, et, z, w, flag; + int i, t, n, et, z, flag; + int64 w; uint32 regu; int32 o; Bits bit; @@ -984,6 +1018,8 @@ mkvar(Reg *r, Adr *a) et = a->etype; o = a->offset; w = a->width; + if(w < 0) + fatal("bad width %lld for %D", w, a); flag = 0; for(i=0; i<nvar; i++) { @@ -1019,14 +1055,14 @@ mkvar(Reg *r, Adr *a) v = var+i; v->offset = o; v->name = n; - v->gotype = a->gotype; v->etype = et; v->width = w; v->addr = flag; // funny punning v->node = node; if(debug['R']) - print("bit=%2d et=%2d w=%d %#N %D\n", i, et, w, node, a); + print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); + ostats.nvar++; bit = blsh(i); @@ -1089,8 +1125,12 @@ prop(Reg *r, Bits ref, Bits cal) default: // Work around for issue 1304: // flush modified globals before each instruction. - for(z=0; z<BITS; z++) + for(z=0; z<BITS; z++) { cal.b[z] |= externs.b[z]; + // issue 4066: flush modified return variables in case of panic + if(hasdefer) + cal.b[z] |= ovar.b[z]; + } break; } for(z=0; z<BITS; z++) { @@ -1576,7 +1616,7 @@ RtoB(int r) int BtoR(int32 b) { - b &= 0x3fffL; // no R14 or R15 + b &= 0xffffL; if(b == 0) return 0; return bitno(b) + D_AX; @@ -1584,26 +1624,26 @@ BtoR(int32 b) /* * bit reg - * 16 X5 (FREGMIN) + * 16 X0 * ... - * 26 X15 (FREGEXT) + * 31 X15 */ int32 FtoB(int f) { - if(f < FREGMIN || f > FREGEXT) + if(f < D_X0 || f > D_X15) return 0; - return 1L << (f - FREGMIN + 16); + return 1L << (f - D_X0 + 16); } int BtoF(int32 b) { - b &= 0xFF0000L; + b &= 0xFFFF0000L; if(b == 0) return 0; - return bitno(b) - 16 + FREGMIN; + return bitno(b) - 16 + D_X0; } void @@ -1719,7 +1759,7 @@ chasejmp(Prog *p, int *jmploop) *jmploop = 1; break; } - p = p->to.branch; + p = p->to.u.branch; } return p; } @@ -1741,9 +1781,9 @@ mark(Prog *firstp) if(p->reg != dead) break; p->reg = alive; - if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch) - mark(p->to.branch); - if(p->as == AJMP || p->as == ARET || (p->as == ACALL && noreturn(p))) + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) + mark(p->to.u.branch); + if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) break; } } @@ -1762,8 +1802,8 @@ fixjmp(Prog *firstp) for(p=firstp; p; p=p->link) { if(debug['R'] && debug['v']) print("%P\n", p); - if(p->as != ACALL && p->to.type == D_BRANCH && p->to.branch && p->to.branch->as == AJMP) { - p->to.branch = chasejmp(p->to.branch, &jmploop); + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { + p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); if(debug['R'] && debug['v']) print("->%P\n", p); } @@ -1799,7 +1839,7 @@ fixjmp(Prog *firstp) if(!jmploop) { last = nil; for(p=firstp; p; p=p->link) { - if(p->as == AJMP && p->to.type == D_BRANCH && p->to.branch == p->link) { + if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { if(debug['R'] && debug['v']) print("del %P\n", p); continue; |