// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #undef EXTERN #define EXTERN #include #include #include "gg.h" #include "opt.h" static Prog* appendp(Prog*, int, int, int32, int, int32); void defframe(Prog *ptxt, Bvec *bv) { uint32 frame; Prog *p; int i, j; // fill in argument size ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); // fill in final stack size if(stksize > maxstksize) maxstksize = stksize; frame = rnd(maxstksize+maxarg, widthptr); ptxt->to.offset = frame; maxstksize = 0; // insert code to clear pointered part of the frame, // so that garbage collector only sees initialized values // when it looks for pointers. p = ptxt; if(stkzerosize >= 8*widthptr) { p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0); p = appendp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0); p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0); } else { j = (stkptrsize - stkzerosize)/widthptr * 2; for(i=0; ias = as; q->lineno = p->lineno; q->from.type = ftype; q->from.offset = foffset; q->to.type = ttype; q->to.offset = toffset; q->link = p->link; p->link = q; return q; } // Sweep the prog list to mark any used nodes. void markautoused(Prog* p) { for (; p; p = p->link) { if (p->as == ATYPE) continue; if (p->from.type == D_AUTO && p->from.node) p->from.node->used = 1; if (p->to.type == D_AUTO && p->to.node) p->to.node->used = 1; } } // Fixup instructions after allocauto (formerly compactframe) has moved all autos around. void fixautoused(Prog* p) { Prog **lp; for (lp=&p; (p=*lp) != P; ) { if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) { *lp = p->link; continue; } if (p->from.type == D_AUTO && p->from.node) p->from.offset += p->from.node->stkdelta; if (p->to.type == D_AUTO && p->to.node) p->to.offset += p->to.node->stkdelta; lp = &p->link; } } void clearfat(Node *nl) { uint32 w, c, q; Node n1; /* clear a fat object */ if(debug['g']) dump("\nclearfat", nl); w = nl->type->width; // Avoid taking the address for simple enough types. if(componentgen(N, nl)) return; c = w % 4; // bytes q = w / 4; // quads nodreg(&n1, types[tptr], D_DI); agen(nl, &n1); gconreg(AMOVL, 0, D_AX); if(q >= 4) { gconreg(AMOVL, q, D_CX); gins(AREP, N, N); // repeat gins(ASTOSL, N, N); // STOL AL,*(DI)+ } else while(q > 0) { gins(ASTOSL, N, N); // STOL AL,*(DI)+ q--; } if(c >= 4) { gconreg(AMOVL, c, D_CX); gins(AREP, N, N); // repeat gins(ASTOSB, N, N); // STOB AL,*(DI)+ } else while(c > 0) { gins(ASTOSB, N, N); // STOB AL,*(DI)+ c--; } } /* * generate: * call f * proc=-1 normal call but no return * proc=0 normal call * proc=1 goroutine run in new proc * proc=2 defer call save away stack * proc=3 normal call to C pointer (not Go func value) */ void ginscall(Node *f, int proc) { int32 arg; Prog *p; Node reg, r1, con; if(f->type != T) setmaxarg(f->type); arg = -1; // Most functions have a fixed-size argument block, so traceback uses that during unwind. // Not all, though: there are some variadic functions in package runtime, // and for those we emit call-specific metadata recorded by caller. // Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub), // so we do this for all indirect calls as well. if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) { arg = f->type->argwid; if(proc == 1 || proc == 2) arg += 2*widthptr; } if(arg != -1) gargsize(arg); switch(proc) { default: fatal("ginscall: bad proc %d", proc); break; case 0: // normal call case -1: // normal call but no return if(f->op == ONAME && f->class == PFUNC) { if(f == deferreturn) { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an x86 NOP that we will have the right line number. // x86 NOP 0x90 is really XCHG AX, AX; use that description // because the NOP pseudo-instruction will be removed by // the linker. nodreg(®, types[TINT], D_AX); gins(AXCHGL, ®, ®); } p = gins(ACALL, N, f); afunclit(&p->to, f); if(proc == -1 || noreturn(p)) gins(AUNDEF, N, N); break; } nodreg(®, types[tptr], D_DX); nodreg(&r1, types[tptr], D_BX); gmove(f, ®); reg.op = OINDREG; gmove(®, &r1); reg.op = OREGISTER; gins(ACALL, ®, &r1); break; case 3: // normal call of c function pointer gins(ACALL, N, f); break; case 1: // call in new proc (go) case 2: // deferred call (defer) nodreg(®, types[TINT32], D_CX); gins(APUSHL, f, N); nodconst(&con, types[TINT32], argsize(f->type)); gins(APUSHL, &con, N); if(proc == 1) ginscall(newproc, 0); else ginscall(deferproc, 0); gins(APOPL, N, ®); gins(APOPL, N, ®); if(proc == 2) { nodreg(®, types[TINT64], D_AX); gins(ATESTL, ®, ®); patch(gbranch(AJNE, T, -1), retpc); } break; } if(arg != -1) gargsize(-1); } /* * n is call to interface method. * generate res = n. */ void cgen_callinter(Node *n, Node *res, int proc) { Node *i, *f; Node tmpi, nodi, nodo, nodr, nodsp; i = n->left; if(i->op != ODOTINTER) fatal("cgen_callinter: not ODOTINTER %O", i->op); f = i->right; // field if(f->op != ONAME) fatal("cgen_callinter: not ONAME %O", f->op); i = i->left; // interface if(!i->addable) { tempname(&tmpi, i->type); cgen(i, &tmpi); i = &tmpi; } genlist(n->list); // assign the args // i is now addable, prepare an indirected // register to hold its address. igen(i, &nodi, res); // REG = &inter nodindreg(&nodsp, types[tptr], D_SP); nodi.type = types[tptr]; nodi.xoffset += widthptr; cgen(&nodi, &nodsp); // 0(SP) = 4(REG) -- i.data regalloc(&nodo, types[tptr], res); nodi.type = types[tptr]; nodi.xoffset -= widthptr; cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab regfree(&nodi); regalloc(&nodr, types[tptr], &nodo); if(n->left->xoffset == BADWIDTH) fatal("cgen_callinter: badwidth"); cgen_checknil(&nodo); nodo.op = OINDREG; nodo.xoffset = n->left->xoffset + 3*widthptr + 8; if(proc == 0) { // plain call: use direct c function pointer - more efficient cgen(&nodo, &nodr); // REG = 20+offset(REG) -- i.tab->fun[f] proc = 3; } else { // go/defer. generate go func value. gins(ALEAL, &nodo, &nodr); // REG = &(20+offset(REG)) -- i.tab->fun[f] } nodr.type = n->left->type; ginscall(&nodr, proc); regfree(&nodr); regfree(&nodo); } /* * generate function call; * proc=0 normal call * proc=1 goroutine run in new proc * proc=2 defer call save away stack */ void cgen_call(Node *n, int proc) { Type *t; Node nod, afun; if(n == N) return; if(n->left->ullman >= UINF) { // if name involves a fn call // precompute the address of the fn tempname(&afun, types[tptr]); cgen(n->left, &afun); } genlist(n->list); // assign the args t = n->left->type; // call tempname pointer if(n->left->ullman >= UINF) { regalloc(&nod, types[tptr], N); cgen_as(&nod, &afun); nod.type = t; ginscall(&nod, proc); regfree(&nod); return; } // call pointer if(n->left->op != ONAME || n->left->class != PFUNC) { regalloc(&nod, types[tptr], N); cgen_as(&nod, n->left); nod.type = t; ginscall(&nod, proc); regfree(&nod); return; } // call direct n->left->method = 1; ginscall(n->left, proc); } /* * call to n has already been generated. * generate: * res = return value from call. */ void cgen_callret(Node *n, Node *res) { Node nod; Type *fp, *t; Iter flist; t = n->left->type; if(t->etype == TPTR32 || t->etype == TPTR64) t = t->type; fp = structfirst(&flist, getoutarg(t)); if(fp == T) fatal("cgen_callret: nil"); memset(&nod, 0, sizeof(nod)); nod.op = OINDREG; nod.val.u.reg = D_SP; nod.addable = 1; nod.xoffset = fp->width; nod.type = fp->type; cgen_as(res, &nod); } /* * call to n has already been generated. * generate: * res = &return value from call. */ void cgen_aret(Node *n, Node *res) { Node nod1, nod2; Type *fp, *t; Iter flist; t = n->left->type; if(isptr[t->etype]) t = t->type; fp = structfirst(&flist, getoutarg(t)); if(fp == T) fatal("cgen_aret: nil"); memset(&nod1, 0, sizeof(nod1)); nod1.op = OINDREG; nod1.val.u.reg = D_SP; nod1.addable = 1; nod1.xoffset = fp->width; nod1.type = fp->type; if(res->op != OREGISTER) { regalloc(&nod2, types[tptr], res); gins(ALEAL, &nod1, &nod2); gins(AMOVL, &nod2, res); regfree(&nod2); } else gins(ALEAL, &nod1, res); } /* * generate return. * n->left is assignments to return values. */ void cgen_ret(Node *n) { Prog *p; genlist(n->list); // copy out args if(retpc) { gjmp(retpc); return; } p = gins(ARET, N, N); if(n->op == ORETJMP) { p->to.type = D_EXTERN; p->to.sym = n->left->sym; } } /* * generate += *= etc. */ void cgen_asop(Node *n) { Node n1, n2, n3, n4; Node *nl, *nr; Prog *p1; Addr addr; int a; nl = n->left; nr = n->right; if(nr->ullman >= UINF && nl->ullman >= UINF) { tempname(&n1, nr->type); cgen(nr, &n1); n2 = *n; n2.right = &n1; cgen_asop(&n2); goto ret; } if(!isint[nl->type->etype]) goto hard; if(!isint[nr->type->etype]) goto hard; if(is64(nl->type) || is64(nr->type)) goto hard; switch(n->etype) { case OADD: if(smallintconst(nr)) if(mpgetfix(nr->val.u.xval) == 1) { a = optoas(OINC, nl->type); if(nl->addable) { gins(a, N, nl); goto ret; } if(sudoaddable(a, nl, &addr)) { p1 = gins(a, N, N); p1->to = addr; sudoclean(); goto ret; } } break; case OSUB: if(smallintconst(nr)) if(mpgetfix(nr->val.u.xval) == 1) { a = optoas(ODEC, nl->type); if(nl->addable) { gins(a, N, nl); goto ret; } if(sudoaddable(a, nl, &addr)) { p1 = gins(a, N, N); p1->to = addr; sudoclean(); goto ret; } } break; } switch(n->etype) { case OADD: case OSUB: case OXOR: case OAND: case OOR: a = optoas(n->etype, nl->type); if(nl->addable) { if(smallintconst(nr)) { gins(a, nr, nl); goto ret; } regalloc(&n2, nr->type, N); cgen(nr, &n2); gins(a, &n2, nl); regfree(&n2); goto ret; } if(nr->ullman < UINF) if(sudoaddable(a, nl, &addr)) { if(smallintconst(nr)) { p1 = gins(a, nr, N); p1->to = addr; sudoclean(); goto ret; } regalloc(&n2, nr->type, N); cgen(nr, &n2); p1 = gins(a, &n2, N); p1->to = addr; regfree(&n2); sudoclean(); goto ret; } } hard: n2.op = 0; n1.op = 0; if(nr->ullman >= nl->ullman || nl->addable) { mgen(nr, &n2, N); nr = &n2; } else { tempname(&n2, nr->type); cgen(nr, &n2); nr = &n2; } if(!nl->addable) { igen(nl, &n1, N); nl = &n1; } n3 = *n; n3.left = nl; n3.right = nr; n3.op = n->etype; mgen(&n3, &n4, N); gmove(&n4, nl); if(n1.op) regfree(&n1); mfree(&n2); mfree(&n4); ret: ; } int samereg(Node *a, Node *b) { if(a->op != OREGISTER) return 0; if(b->op != OREGISTER) return 0; if(a->val.u.reg != b->val.u.reg) return 0; return 1; } /* * generate division. * caller must set: * ax = allocated AX register * dx = allocated DX register * generates one of: * res = nl / nr * res = nl % nr * according to op. */ void dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) { int check; Node n1, t1, t2, t3, t4, n4, nz; Type *t, *t0; Prog *p1, *p2; // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will trap. // Also the byte divide instruction needs AH, // which we otherwise don't have to deal with. // Easiest way to avoid for int8, int16: use int32. // For int32 and int64, use explicit test. // Could use int64 hw for int32. t = nl->type; t0 = t; check = 0; if(issigned[t->etype]) { check = 1; if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -1LL<<(t->width*8-1)) check = 0; else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1) check = 0; } if(t->width < 4) { if(issigned[t->etype]) t = types[TINT32]; else t = types[TUINT32]; check = 0; } tempname(&t1, t); tempname(&t2, t); if(t0 != t) { tempname(&t3, t0); tempname(&t4, t0); cgen(nl, &t3); cgen(nr, &t4); // Convert. gmove(&t3, &t1); gmove(&t4, &t2); } else { cgen(nl, &t1); cgen(nr, &t2); } if(!samereg(ax, res) && !samereg(dx, res)) regalloc(&n1, t, res); else regalloc(&n1, t, N); gmove(&t2, &n1); gmove(&t1, ax); p2 = P; if(check) { nodconst(&n4, t, -1); gins(optoas(OCMP, t), &n1, &n4); p1 = gbranch(optoas(ONE, t), T, +1); if(op == ODIV) { // a / (-1) is -a. gins(optoas(OMINUS, t), N, ax); gmove(ax, res); } else { // a % (-1) is 0. nodconst(&n4, t, 0); gmove(&n4, res); } p2 = gbranch(AJMP, T, 0); patch(p1, pc); } if(!issigned[t->etype]) { nodconst(&nz, t, 0); gmove(&nz, dx); } else gins(optoas(OEXTEND, t), N, N); gins(optoas(op, t), &n1, N); regfree(&n1); if(op == ODIV) gmove(ax, res); else gmove(dx, res); if(check) patch(p2, pc); } static void savex(int dr, Node *x, Node *oldx, Node *res, Type *t) { int r; r = reg[dr]; nodreg(x, types[TINT32], dr); // save current ax and dx if they are live // and not the destination memset(oldx, 0, sizeof *oldx); if(r > 0 && !samereg(x, res)) { tempname(oldx, types[TINT32]); gmove(x, oldx); } regalloc(x, t, x); } static void restx(Node *x, Node *oldx) { regfree(x); if(oldx->op != 0) { x->type = types[TINT32]; gmove(oldx, x); } } /* * generate division according to op, one of: * res = nl / nr * res = nl % nr */ void cgen_div(int op, Node *nl, Node *nr, Node *res) { Node ax, dx, oldax, olddx; Type *t; if(is64(nl->type)) fatal("cgen_div %T", nl->type); if(issigned[nl->type->etype]) t = types[TINT32]; else t = types[TUINT32]; savex(D_AX, &ax, &oldax, res, t); savex(D_DX, &dx, &olddx, res, t); dodiv(op, nl, nr, res, &ax, &dx); restx(&dx, &olddx); restx(&ax, &oldax); } /* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ void cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) { Node n1, n2, nt, cx, oldcx, hi, lo; int a, w; Prog *p1, *p2; uvlong sc; if(nl->type->width > 4) fatal("cgen_shift %T", nl->type); w = nl->type->width * 8; a = optoas(op, nl->type); if(nr->op == OLITERAL) { tempname(&n2, nl->type); cgen(nl, &n2); regalloc(&n1, nl->type, res); gmove(&n2, &n1); sc = mpgetfix(nr->val.u.xval); if(sc >= nl->type->width*8) { // large shift gets 2 shifts by width-1 gins(a, ncon(w-1), &n1); gins(a, ncon(w-1), &n1); } else gins(a, nr, &n1); gmove(&n1, res); regfree(&n1); return; } memset(&oldcx, 0, sizeof oldcx); nodreg(&cx, types[TUINT32], D_CX); if(reg[D_CX] > 1 && !samereg(&cx, res)) { tempname(&oldcx, types[TUINT32]); gmove(&cx, &oldcx); } if(nr->type->width > 4) { tempname(&nt, nr->type); n1 = nt; } else { nodreg(&n1, types[TUINT32], D_CX); regalloc(&n1, nr->type, &n1); // to hold the shift type in CX } if(samereg(&cx, res)) regalloc(&n2, nl->type, N); else regalloc(&n2, nl->type, res); if(nl->ullman >= nr->ullman) { cgen(nl, &n2); cgen(nr, &n1); } else { cgen(nr, &n1); cgen(nl, &n2); } // test and fix up large shifts if(bounded) { if(nr->type->width > 4) { // delayed reg alloc nodreg(&n1, types[TUINT32], D_CX); regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX split64(&nt, &lo, &hi); gmove(&lo, &n1); splitclean(); } } else { if(nr->type->width > 4) { // delayed reg alloc nodreg(&n1, types[TUINT32], D_CX); regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX split64(&nt, &lo, &hi); gmove(&lo, &n1); gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0)); p2 = gbranch(optoas(ONE, types[TUINT32]), T, +1); gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w)); p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); splitclean(); patch(p2, pc); } else { gins(optoas(OCMP, nr->type), &n1, ncon(w)); p1 = gbranch(optoas(OLT, types[TUINT32]), T, +1); } if(op == ORSH && issigned[nl->type->etype]) { gins(a, ncon(w-1), &n2); } else { gmove(ncon(0), &n2); } patch(p1, pc); } gins(a, &n1, &n2); if(oldcx.op != 0) gmove(&oldcx, &cx); gmove(&n2, res); regfree(&n1); regfree(&n2); } /* * generate byte multiply: * res = nl * nr * there is no 2-operand byte multiply instruction so * we do a full-width multiplication and truncate afterwards. */ void cgen_bmul(int op, Node *nl, Node *nr, Node *res) { Node n1, n2, nt, *tmp; Type *t; int a; // copy from byte to full registers t = types[TUINT32]; if(issigned[nl->type->etype]) t = types[TINT32]; // largest ullman on left. if(nl->ullman < nr->ullman) { tmp = nl; nl = nr; nr = tmp; } tempname(&nt, nl->type); cgen(nl, &nt); regalloc(&n1, t, res); cgen(nr, &n1); regalloc(&n2, t, N); gmove(&nt, &n2); a = optoas(op, t); gins(a, &n2, &n1); regfree(&n2); gmove(&n1, res); regfree(&n1); } /* * generate high multiply: * res = (nl*nr) >> width */ void cgen_hmul(Node *nl, Node *nr, Node *res) { Type *t; int a; Node n1, n2, ax, dx; t = nl->type; a = optoas(OHMUL, t); // gen nl in n1. tempname(&n1, t); cgen(nl, &n1); // gen nr in n2. regalloc(&n2, t, res); cgen(nr, &n2); // multiply. nodreg(&ax, t, D_AX); gmove(&n2, &ax); gins(a, &n1, N); regfree(&n2); if(t->width == 1) { // byte multiply behaves differently. nodreg(&ax, t, D_AH); nodreg(&dx, t, D_DL); gmove(&ax, &dx); } nodreg(&dx, t, D_DX); gmove(&dx, res); } static void cgen_float387(Node *n, Node *res); static void cgen_floatsse(Node *n, Node *res); /* * generate floating-point operation. */ void cgen_float(Node *n, Node *res) { Node *nl; Node n1, n2; Prog *p1, *p2, *p3; nl = n->left; switch(n->op) { case OEQ: case ONE: case OLT: case OLE: case OGE: p1 = gbranch(AJMP, T, 0); p2 = pc; gmove(nodbool(1), res); p3 = gbranch(AJMP, T, 0); patch(p1, pc); bgen(n, 1, 0, p2); gmove(nodbool(0), res); patch(p3, pc); return; case OPLUS: cgen(nl, res); return; case OCONV: if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) { cgen(nl, res); return; } tempname(&n2, n->type); mgen(nl, &n1, res); gmove(&n1, &n2); gmove(&n2, res); mfree(&n1); return; } if(use_sse) cgen_floatsse(n, res); else cgen_float387(n, res); } // floating-point. 387 (not SSE2) static void cgen_float387(Node *n, Node *res) { Node f0, f1; Node *nl, *nr; nl = n->left; nr = n->right; nodreg(&f0, nl->type, D_F0); nodreg(&f1, n->type, D_F0+1); if(nr != N) goto flt2; // unary cgen(nl, &f0); if(n->op != OCONV && n->op != OPLUS) gins(foptoas(n->op, n->type, 0), N, N); gmove(&f0, res); return; flt2: // binary if(nl->ullman >= nr->ullman) { cgen(nl, &f0); if(nr->addable) gins(foptoas(n->op, n->type, 0), nr, &f0); else { cgen(nr, &f0); gins(foptoas(n->op, n->type, Fpop), &f0, &f1); } } else { cgen(nr, &f0); if(nl->addable) gins(foptoas(n->op, n->type, Frev), nl, &f0); else { cgen(nl, &f0); gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1); } } gmove(&f0, res); return; } static void cgen_floatsse(Node *n, Node *res) { Node *nl, *nr, *r; Node n1, n2, nt; int a; nl = n->left; nr = n->right; switch(n->op) { default: dump("cgen_floatsse", n); fatal("cgen_floatsse %O", n->op); return; case OMINUS: case OCOM: nr = nodintconst(-1); convlit(&nr, n->type); a = foptoas(OMUL, nl->type, 0); goto sbop; // symmetric binary case OADD: case OMUL: a = foptoas(n->op, nl->type, 0); goto sbop; // asymmetric binary case OSUB: case OMOD: case ODIV: a = foptoas(n->op, nl->type, 0); goto abop; } sbop: // symmetric binary if(nl->ullman < nr->ullman || nl->op == OLITERAL) { r = nl; nl = nr; nr = r; } abop: // asymmetric binary if(nl->ullman >= nr->ullman) { tempname(&nt, nl->type); cgen(nl, &nt); mgen(nr, &n2, N); regalloc(&n1, nl->type, res); gmove(&nt, &n1); gins(a, &n2, &n1); gmove(&n1, res); regfree(&n1); mfree(&n2); } else { regalloc(&n2, nr->type, res); cgen(nr, &n2); regalloc(&n1, nl->type, N); cgen(nl, &n1); gins(a, &n2, &n1); regfree(&n2); gmove(&n1, res); regfree(&n1); } return; } void bgen_float(Node *n, int true, int likely, Prog *to) { int et, a; Node *nl, *nr, *r; Node n1, n2, n3, tmp, t1, t2, ax; Prog *p1, *p2; nl = n->left; nr = n->right; a = n->op; if(!true) { // brcom is not valid on floats when NaN is involved. p1 = gbranch(AJMP, T, 0); p2 = gbranch(AJMP, T, 0); patch(p1, pc); // No need to avoid re-genning ninit. bgen_float(n, 1, -likely, p2); patch(gbranch(AJMP, T, 0), to); patch(p2, pc); return; } if(use_sse) goto sse; else goto x87; x87: a = brrev(a); // because the args are stacked if(a == OGE || a == OGT) { // only < and <= work right with NaN; reverse if needed r = nr; nr = nl; nl = r; a = brrev(a); } nodreg(&tmp, nr->type, D_F0); nodreg(&n2, nr->type, D_F0 + 1); nodreg(&ax, types[TUINT16], D_AX); et = simsimtype(nr->type); if(et == TFLOAT64) { if(nl->ullman > nr->ullman) { cgen(nl, &tmp); cgen(nr, &tmp); gins(AFXCHD, &tmp, &n2); } else { cgen(nr, &tmp); cgen(nl, &tmp); } gins(AFUCOMIP, &tmp, &n2); gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF } else { // TODO(rsc): The moves back and forth to memory // here are for truncating the value to 32 bits. // This handles 32-bit comparison but presumably // all the other ops have the same problem. // We need to figure out what the right general // solution is, besides telling people to use float64. tempname(&t1, types[TFLOAT32]); tempname(&t2, types[TFLOAT32]); cgen(nr, &t1); cgen(nl, &t2); gmove(&t2, &tmp); gins(AFCOMFP, &t1, &tmp); gins(AFSTSW, N, &ax); gins(ASAHF, N, N); } goto ret; sse: if(!nl->addable) { tempname(&n1, nl->type); cgen(nl, &n1); nl = &n1; } if(!nr->addable) { tempname(&tmp, nr->type); cgen(nr, &tmp); nr = &tmp; } regalloc(&n2, nr->type, N); gmove(nr, &n2); nr = &n2; if(nl->op != OREGISTER) { regalloc(&n3, nl->type, N); gmove(nl, &n3); nl = &n3; } if(a == OGE || a == OGT) { // only < and <= work right with NaN; reverse if needed r = nr; nr = nl; nl = r; a = brrev(a); } gins(foptoas(OCMP, nr->type, 0), nl, nr); if(nl->op == OREGISTER) regfree(nl); regfree(nr); ret: if(a == OEQ) { // neither NE nor P p1 = gbranch(AJNE, T, -likely); p2 = gbranch(AJPS, T, -likely); patch(gbranch(AJMP, T, 0), to); patch(p1, pc); patch(p2, pc); } else if(a == ONE) { // either NE or P patch(gbranch(AJNE, T, likely), to); patch(gbranch(AJPS, T, likely), to); } else patch(gbranch(optoas(a, nr->type), T, likely), to); } // Called after regopt and peep have run. // Expand CHECKNIL pseudo-op into actual nil pointer check. void expandchecks(Prog *firstp) { Prog *p, *p1, *p2; for(p = firstp; p != P; p = p->link) { if(p->as != ACHECKNIL) continue; if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers warnl(p->lineno, "generated nil check"); // check is // CMP arg, $0 // JNE 2(PC) (likely) // MOV AX, 0 p1 = mal(sizeof *p1); p2 = mal(sizeof *p2); clearp(p1); clearp(p2); p1->link = p2; p2->link = p->link; p->link = p1; p1->lineno = p->lineno; p2->lineno = p->lineno; p1->loc = 9999; p2->loc = 9999; p->as = ACMPL; p->to.type = D_CONST; p->to.offset = 0; p1->as = AJNE; p1->from.type = D_CONST; p1->from.offset = 1; // likely p1->to.type = D_BRANCH; p1->to.u.branch = p2->link; // crash by write to memory address 0. // if possible, since we know arg is 0, use 0(arg), // which will be shorter to encode than plain 0. p2->as = AMOVL; p2->from.type = D_AX; if(regtyp(&p->from)) p2->to.type = p->from.type + D_INDIR; else p2->to.type = D_INDIR+D_NONE; p2->to.offset = 0; } }