// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include #include #include "gg.h" /* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ void cgen64(Node *n, Node *res) { Node t1, t2, *l, *r; Node lo1, lo2, hi1, hi2; Node al, ah, bl, bh, cl, ch, s, n1, creg; Prog *p1, *p2, *p3, *p4, *p5, *p6; uint64 v; if(res->op != OINDREG && res->op != ONAME) { dump("n", n); dump("res", res); fatal("cgen64 %O of %O", n->op, res->op); } l = n->left; if(!l->addable) { tempname(&t1, l->type); cgen(l, &t1); l = &t1; } split64(l, &lo1, &hi1); switch(n->op) { default: fatal("cgen64 %O", n->op); case OMINUS: split64(res, &lo2, &hi2); regalloc(&t1, lo1.type, N); regalloc(&al, lo1.type, N); regalloc(&ah, hi1.type, N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gmove(ncon(0), &t1); p1 = gins(ASUB, &al, &t1); p1->scond |= C_SBIT; gins(AMOVW, &t1, &lo2); gmove(ncon(0), &t1); gins(ASBC, &ah, &t1); gins(AMOVW, &t1, &hi2); regfree(&t1); regfree(&al); regfree(&ah); splitclean(); splitclean(); return; case OCOM: regalloc(&t1, lo1.type, N); gmove(ncon(-1), &t1); split64(res, &lo2, &hi2); regalloc(&n1, lo1.type, N); gins(AMOVW, &lo1, &n1); gins(AEOR, &t1, &n1); gins(AMOVW, &n1, &lo2); gins(AMOVW, &hi1, &n1); gins(AEOR, &t1, &n1); gins(AMOVW, &n1, &hi2); regfree(&t1); regfree(&n1); splitclean(); splitclean(); return; case OADD: case OSUB: case OMUL: case OLSH: case ORSH: case OAND: case OOR: case OXOR: case OLROT: // binary operators. // common setup below. break; } // setup for binary operators r = n->right; if(r != N && !r->addable) { tempname(&t2, r->type); cgen(r, &t2); r = &t2; } if(is64(r->type)) split64(r, &lo2, &hi2); regalloc(&al, lo1.type, N); regalloc(&ah, hi1.type, N); // Do op. Leave result in ah:al. switch(n->op) { default: fatal("cgen64: not implemented: %N\n", n); case OADD: // TODO: Constants regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi2, &bh); gins(AMOVW, &lo2, &bl); p1 = gins(AADD, &bl, &al); p1->scond |= C_SBIT; gins(AADC, &bh, &ah); regfree(&bl); regfree(&bh); break; case OSUB: // TODO: Constants. regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo2, &bl); gins(AMOVW, &hi2, &bh); p1 = gins(ASUB, &bl, &al); p1->scond |= C_SBIT; gins(ASBC, &bh, &ah); regfree(&bl); regfree(&bh); break; case OMUL: // TODO(kaib): this can be done with 4 regs and does not need 6 regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); regalloc(&cl, types[TPTR32], N); regalloc(&ch, types[TPTR32], N); // load args into bh:bl and bh:bl. gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); gins(AMOVW, &hi2, &ch); gins(AMOVW, &lo2, &cl); // bl * cl -> ah al p1 = gins(AMULLU, N, N); p1->from.type = D_REG; p1->from.reg = bl.val.u.reg; p1->reg = cl.val.u.reg; p1->to.type = D_REGREG; p1->to.reg = ah.val.u.reg; p1->to.offset = al.val.u.reg; //print("%P\n", p1); // bl * ch + ah -> ah p1 = gins(AMULA, N, N); p1->from.type = D_REG; p1->from.reg = bl.val.u.reg; p1->reg = ch.val.u.reg; p1->to.type = D_REGREG2; p1->to.reg = ah.val.u.reg; p1->to.offset = ah.val.u.reg; //print("%P\n", p1); // bh * cl + ah -> ah p1 = gins(AMULA, N, N); p1->from.type = D_REG; p1->from.reg = bh.val.u.reg; p1->reg = cl.val.u.reg; p1->to.type = D_REGREG2; p1->to.reg = ah.val.u.reg; p1->to.offset = ah.val.u.reg; //print("%P\n", p1); regfree(&bh); regfree(&bl); regfree(&ch); regfree(&cl); break; case OLROT: // We only rotate by a constant c in [0,64). // if c >= 32: // lo, hi = hi, lo // c -= 32 // if c == 0: // no-op // else: // t = hi // shld hi:lo, c // shld lo:t, c v = mpgetfix(r->val.u.xval); regalloc(&bl, lo1.type, N); regalloc(&bh, hi1.type, N); if(v >= 32) { // reverse during load to do the first 32 bits of rotate v -= 32; gins(AMOVW, &hi1, &bl); gins(AMOVW, &lo1, &bh); } else { gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); } if(v == 0) { gins(AMOVW, &bh, &ah); gins(AMOVW, &bl, &al); } else { // rotate by 1 <= v <= 31 // MOVW bl<>(32-v), ah // OR bh>>(32-v), al gshift(AMOVW, &bl, SHIFT_LL, v, &al); gshift(AMOVW, &bh, SHIFT_LL, v, &ah); gshift(AORR, &bl, SHIFT_LR, 32-v, &ah); gshift(AORR, &bh, SHIFT_LR, 32-v, &al); } regfree(&bl); regfree(&bh); break; case OLSH: regalloc(&bl, lo1.type, N); regalloc(&bh, hi1.type, N); gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); if(r->op == OLITERAL) { v = mpgetfix(r->val.u.xval); if(v >= 64) { // TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al) // here and below (verify it optimizes to EOR) gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); } else if(v > 32) { gins(AEOR, &al, &al); // MOVW bl<<(v-32), ah gshift(AMOVW, &bl, SHIFT_LL, (v-32), &ah); } else if(v == 32) { gins(AEOR, &al, &al); gins(AMOVW, &bl, &ah); } else if(v > 0) { // MOVW bl<>(32-v), ah gshift(AORR, &bl, SHIFT_LR, 32-v, &ah); } else { gins(AMOVW, &bl, &al); gins(AMOVW, &bh, &ah); } goto olsh_break; } regalloc(&s, types[TUINT32], N); regalloc(&creg, types[TUINT32], N); if (is64(r->type)) { // shift is >= 1<<32 split64(r, &cl, &ch); gmove(&ch, &s); gins(ATST, &s, N); p6 = gbranch(ABNE, T, 0); gmove(&cl, &s); splitclean(); } else { gmove(r, &s); p6 = P; } gins(ATST, &s, N); // shift == 0 p1 = gins(AMOVW, &bl, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bh, &ah); p1->scond = C_SCOND_EQ; p2 = gbranch(ABEQ, T, 0); // shift is < 32 nodconst(&n1, types[TUINT32], 32); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO bl<scond = C_SCOND_LO; // MOVW.LO bh<scond = C_SCOND_LO; // SUB.LO s, creg p1 = gins(ASUB, &s, &creg); p1->scond = C_SCOND_LO; // OR.LO bl>>creg, ah p1 = gregshift(AORR, &bl, SHIFT_LR, &creg, &ah); p1->scond = C_SCOND_LO; // BLO end p3 = gbranch(ABLO, T, 0); // shift == 32 p1 = gins(AEOR, &al, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bl, &ah); p1->scond = C_SCOND_EQ; p4 = gbranch(ABEQ, T, 0); // shift is < 64 nodconst(&n1, types[TUINT32], 64); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // EOR.LO al, al p1 = gins(AEOR, &al, &al); p1->scond = C_SCOND_LO; // MOVW.LO creg>>1, creg p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg); p1->scond = C_SCOND_LO; // SUB.LO creg, s p1 = gins(ASUB, &creg, &s); p1->scond = C_SCOND_LO; // MOVW bl<scond = C_SCOND_LO; p5 = gbranch(ABLO, T, 0); // shift >= 64 if (p6 != P) patch(p6, pc); gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); patch(p2, pc); patch(p3, pc); patch(p4, pc); patch(p5, pc); regfree(&s); regfree(&creg); olsh_break: regfree(&bl); regfree(&bh); break; case ORSH: regalloc(&bl, lo1.type, N); regalloc(&bh, hi1.type, N); gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); if(r->op == OLITERAL) { v = mpgetfix(r->val.u.xval); if(v >= 64) { if(bh.type->etype == TINT32) { // MOVW bh->31, al gshift(AMOVW, &bh, SHIFT_AR, 31, &al); // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); } } else if(v > 32) { if(bh.type->etype == TINT32) { // MOVW bh->(v-32), al gshift(AMOVW, &bh, SHIFT_AR, v-32, &al); // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { // MOVW bh>>(v-32), al gshift(AMOVW, &bh, SHIFT_LR, v-32, &al); gins(AEOR, &ah, &ah); } } else if(v == 32) { gins(AMOVW, &bh, &al); if(bh.type->etype == TINT32) { // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { gins(AEOR, &ah, &ah); } } else if( v > 0) { // MOVW bl>>v, al gshift(AMOVW, &bl, SHIFT_LR, v, &al); // OR bh<<(32-v), al gshift(AORR, &bh, SHIFT_LL, 32-v, &al); if(bh.type->etype == TINT32) { // MOVW bh->v, ah gshift(AMOVW, &bh, SHIFT_AR, v, &ah); } else { // MOVW bh>>v, ah gshift(AMOVW, &bh, SHIFT_LR, v, &ah); } } else { gins(AMOVW, &bl, &al); gins(AMOVW, &bh, &ah); } goto orsh_break; } regalloc(&s, types[TUINT32], N); regalloc(&creg, types[TUINT32], N); if(is64(r->type)) { // shift is >= 1<<32 split64(r, &cl, &ch); gmove(&ch, &s); gins(ATST, &s, N); if(bh.type->etype == TINT32) p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); else p1 = gins(AEOR, &ah, &ah); p1->scond = C_SCOND_NE; p6 = gbranch(ABNE, T, 0); gmove(&cl, &s); splitclean(); } else { gmove(r, &s); p6 = P; } gins(ATST, &s, N); // shift == 0 p1 = gins(AMOVW, &bl, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bh, &ah); p1->scond = C_SCOND_EQ; p2 = gbranch(ABEQ, T, 0); // check if shift is < 32 nodconst(&n1, types[TUINT32], 32); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO bl>>s, al p1 = gregshift(AMOVW, &bl, SHIFT_LR, &s, &al); p1->scond = C_SCOND_LO; // SUB.LO s,creg p1 = gins(ASUB, &s, &creg); p1->scond = C_SCOND_LO; // OR.LO bh<<(32-s), al p1 = gregshift(AORR, &bh, SHIFT_LL, &creg, &al); p1->scond = C_SCOND_LO; if(bh.type->etype == TINT32) { // MOVW bh->s, ah p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &ah); } else { // MOVW bh>>s, ah p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &ah); } p1->scond = C_SCOND_LO; // BLO end p3 = gbranch(ABLO, T, 0); // shift == 32 p1 = gins(AMOVW, &bh, &al); p1->scond = C_SCOND_EQ; if(bh.type->etype == TINT32) gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); else gins(AEOR, &ah, &ah); p4 = gbranch(ABEQ, T, 0); // check if shift is < 64 nodconst(&n1, types[TUINT32], 64); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO creg>>1, creg p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg); p1->scond = C_SCOND_LO; // SUB.LO creg, s p1 = gins(ASUB, &creg, &s); p1->scond = C_SCOND_LO; if(bh.type->etype == TINT32) { // MOVW bh->(s-32), al p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &al); p1->scond = C_SCOND_LO; } else { // MOVW bh>>(v-32), al p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &al); p1->scond = C_SCOND_LO; } // BLO end p5 = gbranch(ABLO, T, 0); // s >= 64 if(p6 != P) patch(p6, pc); if(bh.type->etype == TINT32) { // MOVW bh->31, al gshift(AMOVW, &bh, SHIFT_AR, 31, &al); } else { gins(AEOR, &al, &al); } patch(p2, pc); patch(p3, pc); patch(p4, pc); patch(p5, pc); regfree(&s); regfree(&creg); orsh_break: regfree(&bl); regfree(&bh); break; case OXOR: case OAND: case OOR: // TODO(kaib): literal optimizations // make constant the right side (it usually is anyway). // if(lo1.op == OLITERAL) { // nswap(&lo1, &lo2); // nswap(&hi1, &hi2); // } // if(lo2.op == OLITERAL) { // // special cases for constants. // lv = mpgetfix(lo2.val.u.xval); // hv = mpgetfix(hi2.val.u.xval); // splitclean(); // right side // split64(res, &lo2, &hi2); // switch(n->op) { // case OXOR: // gmove(&lo1, &lo2); // gmove(&hi1, &hi2); // switch(lv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &lo2); // break; // default: // gins(AXORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &hi2); // break; // default: // gins(AXORL, ncon(hv), &hi2); // break; // } // break; // case OAND: // switch(lv) { // case 0: // gins(AMOVL, ncon(0), &lo2); // break; // default: // gmove(&lo1, &lo2); // if(lv != 0xffffffffu) // gins(AANDL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gins(AMOVL, ncon(0), &hi2); // break; // default: // gmove(&hi1, &hi2); // if(hv != 0xffffffffu) // gins(AANDL, ncon(hv), &hi2); // break; // } // break; // case OOR: // switch(lv) { // case 0: // gmove(&lo1, &lo2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &lo2); // break; // default: // gmove(&lo1, &lo2); // gins(AORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gmove(&hi1, &hi2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &hi2); // break; // default: // gmove(&hi1, &hi2); // gins(AORL, ncon(hv), &hi2); // break; // } // break; // } // splitclean(); // splitclean(); // goto out; // } regalloc(&n1, lo1.type, N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo2, &n1); gins(optoas(n->op, lo1.type), &n1, &al); gins(AMOVW, &hi2, &n1); gins(optoas(n->op, lo1.type), &n1, &ah); regfree(&n1); break; } if(is64(r->type)) splitclean(); splitclean(); split64(res, &lo1, &hi1); gins(AMOVW, &al, &lo1); gins(AMOVW, &ah, &hi1); splitclean(); //out: regfree(&al); regfree(&ah); } /* * generate comparison of nl, nr, both 64-bit. * nl is memory; nr is constant or memory. */ void cmp64(Node *nl, Node *nr, int op, int likely, Prog *to) { Node lo1, hi1, lo2, hi2, r1, r2; Prog *br; Type *t; split64(nl, &lo1, &hi1); split64(nr, &lo2, &hi2); // compare most significant word; // if they differ, we're done. t = hi1.type; regalloc(&r1, types[TINT32], N); regalloc(&r2, types[TINT32], N); gins(AMOVW, &hi1, &r1); gins(AMOVW, &hi2, &r2); gcmp(ACMP, &r1, &r2); regfree(&r1); regfree(&r2); br = P; switch(op) { default: fatal("cmp64 %O %T", op, t); case OEQ: // cmp hi // bne L // cmp lo // beq to // L: br = gbranch(ABNE, T, -likely); break; case ONE: // cmp hi // bne to // cmp lo // bne to patch(gbranch(ABNE, T, likely), to); break; case OGE: case OGT: // cmp hi // bgt to // blt L // cmp lo // bge to (or bgt to) // L: patch(gbranch(optoas(OGT, t), T, likely), to); br = gbranch(optoas(OLT, t), T, -likely); break; case OLE: case OLT: // cmp hi // blt to // bgt L // cmp lo // ble to (or jlt to) // L: patch(gbranch(optoas(OLT, t), T, likely), to); br = gbranch(optoas(OGT, t), T, -likely); break; } // compare least significant word t = lo1.type; regalloc(&r1, types[TINT32], N); regalloc(&r2, types[TINT32], N); gins(AMOVW, &lo1, &r1); gins(AMOVW, &lo2, &r2); gcmp(ACMP, &r1, &r2); regfree(&r1); regfree(&r2); // jump again patch(gbranch(optoas(op, t), T, likely), to); // point first branch down here if appropriate if(br != P) patch(br, pc); splitclean(); splitclean(); }