diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
---|---|---|
committer | Michael Stapelberg <stapelberg@debian.org> | 2014-06-19 09:22:53 +0200 |
commit | 8a39ee361feb9bf46d728ff1ba4f07ca1d9610b1 (patch) | |
tree | 4449f2036cccf162e8417cc5841a35815b3e7ac5 /src/liblink/asm6.c | |
parent | c8bf49ef8a92e2337b69c14b9b88396efe498600 (diff) | |
download | golang-upstream/1.3.tar.gz |
Imported Upstream version 1.3upstream/1.3
Diffstat (limited to 'src/liblink/asm6.c')
-rw-r--r-- | src/liblink/asm6.c | 3585 |
1 files changed, 3585 insertions, 0 deletions
diff --git a/src/liblink/asm6.c b/src/liblink/asm6.c new file mode 100644 index 000000000..66afc7a12 --- /dev/null +++ b/src/liblink/asm6.c @@ -0,0 +1,3585 @@ +// Inferno utils/6l/span.c +// http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// Instruction layout. + +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <link.h> +#include "../cmd/6l/6.out.h" +#include "../pkg/runtime/stack.h" + +enum +{ + MaxAlign = 32, // max data alignment + + // Loop alignment constants: + // want to align loop entry to LoopAlign-byte boundary, + // and willing to insert at most MaxLoopPad bytes of NOP to do so. + // We define a loop entry as the target of a backward jump. + // + // gcc uses MaxLoopPad = 10 for its 'generic x86-64' config, + // and it aligns all jump targets, not just backward jump targets. + // + // As of 6/1/2012, the effect of setting MaxLoopPad = 10 here + // is very slight but negative, so the alignment is disabled by + // setting MaxLoopPad = 0. The code is here for reference and + // for future experiments. + // + LoopAlign = 16, + MaxLoopPad = 0, + + FuncAlign = 16 +}; + +extern char *anames6[]; + +typedef struct Optab Optab; +typedef struct Movtab Movtab; + +struct Optab +{ + short as; + uchar* ytab; + uchar prefix; + uchar op[23]; +}; +struct Movtab +{ + short as; + uchar ft; + uchar tt; + uchar code; + uchar op[4]; +}; + +enum +{ + Yxxx = 0, + Ynone, + Yi0, + Yi1, + Yi8, + Ys32, + Yi32, + Yi64, + Yiauto, + Yal, + Ycl, + Yax, + Ycx, + Yrb, + Yrl, + Yrf, + Yf0, + Yrx, + Ymb, + Yml, + Ym, + Ybr, + Ycol, + + Ycs, Yss, Yds, Yes, Yfs, Ygs, + Ygdtr, Yidtr, Yldtr, Ymsw, Ytask, + Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7, Ycr8, + Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7, + Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Yrl32, Yrl64, + Ymr, Ymm, + Yxr, Yxm, + Ytls, + Ymax, + + Zxxx = 0, + + Zlit, + Zlitm_r, + Z_rp, + Zbr, + Zcall, + Zcallindreg, + Zib_, + Zib_rp, + Zibo_m, + Zibo_m_xm, + Zil_, + Zil_rp, + Ziq_rp, + Zilo_m, + Ziqo_m, + Zjmp, + Zloop, + Zo_iw, + Zm_o, + Zm_r, + Zm2_r, + Zm_r_xm, + Zm_r_i_xm, + Zm_r_3d, + Zm_r_xm_nr, + Zr_m_xm_nr, + Zibm_r, /* mmx1,mmx2/mem64,imm8 */ + Zmb_r, + Zaut_r, + Zo_m, + Zo_m64, + Zpseudo, + Zr_m, + Zr_m_xm, + Zr_m_i_xm, + Zrp_, + Z_ib, + Z_il, + Zm_ibo, + Zm_ilo, + Zib_rr, + Zil_rr, + Zclr, + Zbyte, + Zmax, + + Px = 0, + P32 = 0x32, /* 32-bit only */ + Pe = 0x66, /* operand escape */ + Pm = 0x0f, /* 2byte opcode escape */ + Pq = 0xff, /* both escapes: 66 0f */ + Pb = 0xfe, /* byte operands */ + Pf2 = 0xf2, /* xmm escape 1: f2 0f */ + Pf3 = 0xf3, /* xmm escape 2: f3 0f */ + Pq3 = 0x67, /* xmm escape 3: 66 48 0f */ + Pw = 0x48, /* Rex.w */ + Py = 0x80, /* defaults to 64-bit mode */ + + Rxf = 1<<9, /* internal flag for Rxr on from */ + Rxt = 1<<8, /* internal flag for Rxr on to */ + Rxw = 1<<3, /* =1, 64-bit operand size */ + Rxr = 1<<2, /* extend modrm reg */ + Rxx = 1<<1, /* extend sib index */ + Rxb = 1<<0, /* extend modrm r/m, sib base, or opcode reg */ + + Maxand = 10, /* in -a output width of the byte codes */ +}; + +static char ycover[Ymax*Ymax]; +static int reg[D_NONE]; +static int regrex[D_NONE+1]; +static void asmins(Link *ctxt, Prog *p); + +static uchar ynone[] = +{ + Ynone, Ynone, Zlit, 1, + 0 +}; +static uchar ytext[] = +{ + Ymb, Yi64, Zpseudo,1, + 0 +}; +static uchar ynop[] = +{ + Ynone, Ynone, Zpseudo,0, + Ynone, Yiauto, Zpseudo,0, + Ynone, Yml, Zpseudo,0, + Ynone, Yrf, Zpseudo,0, + Ynone, Yxr, Zpseudo,0, + Yiauto, Ynone, Zpseudo,0, + Yml, Ynone, Zpseudo,0, + Yrf, Ynone, Zpseudo,0, + Yxr, Ynone, Zpseudo,1, + 0 +}; +static uchar yfuncdata[] = +{ + Yi32, Ym, Zpseudo, 0, + 0 +}; +static uchar ypcdata[] = +{ + Yi32, Yi32, Zpseudo, 0, + 0 +}; +static uchar yxorb[] = +{ + Yi32, Yal, Zib_, 1, + Yi32, Ymb, Zibo_m, 2, + Yrb, Ymb, Zr_m, 1, + Ymb, Yrb, Zm_r, 1, + 0 +}; +static uchar yxorl[] = +{ + Yi8, Yml, Zibo_m, 2, + Yi32, Yax, Zil_, 1, + Yi32, Yml, Zilo_m, 2, + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + 0 +}; +static uchar yaddl[] = +{ + Yi8, Yml, Zibo_m, 2, + Yi32, Yax, Zil_, 1, + Yi32, Yml, Zilo_m, 2, + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + 0 +}; +static uchar yincb[] = +{ + Ynone, Ymb, Zo_m, 2, + 0 +}; +static uchar yincw[] = +{ + Ynone, Yml, Zo_m, 2, + 0 +}; +static uchar yincl[] = +{ + Ynone, Yml, Zo_m, 2, + 0 +}; +static uchar ycmpb[] = +{ + Yal, Yi32, Z_ib, 1, + Ymb, Yi32, Zm_ibo, 2, + Ymb, Yrb, Zm_r, 1, + Yrb, Ymb, Zr_m, 1, + 0 +}; +static uchar ycmpl[] = +{ + Yml, Yi8, Zm_ibo, 2, + Yax, Yi32, Z_il, 1, + Yml, Yi32, Zm_ilo, 2, + Yml, Yrl, Zm_r, 1, + Yrl, Yml, Zr_m, 1, + 0 +}; +static uchar yshb[] = +{ + Yi1, Ymb, Zo_m, 2, + Yi32, Ymb, Zibo_m, 2, + Ycx, Ymb, Zo_m, 2, + 0 +}; +static uchar yshl[] = +{ + Yi1, Yml, Zo_m, 2, + Yi32, Yml, Zibo_m, 2, + Ycl, Yml, Zo_m, 2, + Ycx, Yml, Zo_m, 2, + 0 +}; +static uchar ytestb[] = +{ + Yi32, Yal, Zib_, 1, + Yi32, Ymb, Zibo_m, 2, + Yrb, Ymb, Zr_m, 1, + Ymb, Yrb, Zm_r, 1, + 0 +}; +static uchar ytestl[] = +{ + Yi32, Yax, Zil_, 1, + Yi32, Yml, Zilo_m, 2, + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + 0 +}; +static uchar ymovb[] = +{ + Yrb, Ymb, Zr_m, 1, + Ymb, Yrb, Zm_r, 1, + Yi32, Yrb, Zib_rp, 1, + Yi32, Ymb, Zibo_m, 2, + 0 +}; +static uchar ymbs[] = +{ + Ymb, Ynone, Zm_o, 2, + 0 +}; +static uchar ybtl[] = +{ + Yi8, Yml, Zibo_m, 2, + Yrl, Yml, Zr_m, 1, + 0 +}; +static uchar ymovw[] = +{ + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + Yi0, Yrl, Zclr, 1, + Yi32, Yrl, Zil_rp, 1, + Yi32, Yml, Zilo_m, 2, + Yiauto, Yrl, Zaut_r, 2, + 0 +}; +static uchar ymovl[] = +{ + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + Yi0, Yrl, Zclr, 1, + Yi32, Yrl, Zil_rp, 1, + Yi32, Yml, Zilo_m, 2, + Yml, Ymr, Zm_r_xm, 1, // MMX MOVD + Ymr, Yml, Zr_m_xm, 1, // MMX MOVD + Yml, Yxr, Zm_r_xm, 2, // XMM MOVD (32 bit) + Yxr, Yml, Zr_m_xm, 2, // XMM MOVD (32 bit) + Yiauto, Yrl, Zaut_r, 2, + 0 +}; +static uchar yret[] = +{ + Ynone, Ynone, Zo_iw, 1, + Yi32, Ynone, Zo_iw, 1, + 0 +}; +static uchar ymovq[] = +{ + Yrl, Yml, Zr_m, 1, // 0x89 + Yml, Yrl, Zm_r, 1, // 0x8b + Yi0, Yrl, Zclr, 1, // 0x31 + Ys32, Yrl, Zilo_m, 2, // 32 bit signed 0xc7,(0) + Yi64, Yrl, Ziq_rp, 1, // 0xb8 -- 32/64 bit immediate + Yi32, Yml, Zilo_m, 2, // 0xc7,(0) + Ym, Ymr, Zm_r_xm_nr, 1, // MMX MOVQ (shorter encoding) + Ymr, Ym, Zr_m_xm_nr, 1, // MMX MOVQ + Ymm, Ymr, Zm_r_xm, 1, // MMX MOVD + Ymr, Ymm, Zr_m_xm, 1, // MMX MOVD + Yxr, Ymr, Zm_r_xm_nr, 2, // MOVDQ2Q + Yxm, Yxr, Zm_r_xm_nr, 2, // MOVQ xmm1/m64 -> xmm2 + Yxr, Yxm, Zr_m_xm_nr, 2, // MOVQ xmm1 -> xmm2/m64 + Yml, Yxr, Zm_r_xm, 2, // MOVD xmm load + Yxr, Yml, Zr_m_xm, 2, // MOVD xmm store + Yiauto, Yrl, Zaut_r, 2, // built-in LEAQ + 0 +}; +static uchar ym_rl[] = +{ + Ym, Yrl, Zm_r, 1, + 0 +}; +static uchar yrl_m[] = +{ + Yrl, Ym, Zr_m, 1, + 0 +}; +static uchar ymb_rl[] = +{ + Ymb, Yrl, Zmb_r, 1, + 0 +}; +static uchar yml_rl[] = +{ + Yml, Yrl, Zm_r, 1, + 0 +}; +static uchar yrl_ml[] = +{ + Yrl, Yml, Zr_m, 1, + 0 +}; +static uchar yml_mb[] = +{ + Yrb, Ymb, Zr_m, 1, + Ymb, Yrb, Zm_r, 1, + 0 +}; +static uchar yrb_mb[] = +{ + Yrb, Ymb, Zr_m, 1, + 0 +}; +static uchar yxchg[] = +{ + Yax, Yrl, Z_rp, 1, + Yrl, Yax, Zrp_, 1, + Yrl, Yml, Zr_m, 1, + Yml, Yrl, Zm_r, 1, + 0 +}; +static uchar ydivl[] = +{ + Yml, Ynone, Zm_o, 2, + 0 +}; +static uchar ydivb[] = +{ + Ymb, Ynone, Zm_o, 2, + 0 +}; +static uchar yimul[] = +{ + Yml, Ynone, Zm_o, 2, + Yi8, Yrl, Zib_rr, 1, + Yi32, Yrl, Zil_rr, 1, + Yml, Yrl, Zm_r, 2, + 0 +}; +static uchar yimul3[] = +{ + Yml, Yrl, Zibm_r, 2, + 0 +}; +static uchar ybyte[] = +{ + Yi64, Ynone, Zbyte, 1, + 0 +}; +static uchar yin[] = +{ + Yi32, Ynone, Zib_, 1, + Ynone, Ynone, Zlit, 1, + 0 +}; +static uchar yint[] = +{ + Yi32, Ynone, Zib_, 1, + 0 +}; +static uchar ypushl[] = +{ + Yrl, Ynone, Zrp_, 1, + Ym, Ynone, Zm_o, 2, + Yi8, Ynone, Zib_, 1, + Yi32, Ynone, Zil_, 1, + 0 +}; +static uchar ypopl[] = +{ + Ynone, Yrl, Z_rp, 1, + Ynone, Ym, Zo_m, 2, + 0 +}; +static uchar ybswap[] = +{ + Ynone, Yrl, Z_rp, 2, + 0, +}; +static uchar yscond[] = +{ + Ynone, Ymb, Zo_m, 2, + 0 +}; +static uchar yjcond[] = +{ + Ynone, Ybr, Zbr, 0, + Yi0, Ybr, Zbr, 0, + Yi1, Ybr, Zbr, 1, + 0 +}; +static uchar yloop[] = +{ + Ynone, Ybr, Zloop, 1, + 0 +}; +static uchar ycall[] = +{ + Ynone, Yml, Zcallindreg, 0, + Yrx, Yrx, Zcallindreg, 2, + Ynone, Ybr, Zcall, 1, + 0 +}; +static uchar yduff[] = +{ + Ynone, Yi32, Zcall, 1, + 0 +}; +static uchar yjmp[] = +{ + Ynone, Yml, Zo_m64, 2, + Ynone, Ybr, Zjmp, 1, + 0 +}; + +static uchar yfmvd[] = +{ + Ym, Yf0, Zm_o, 2, + Yf0, Ym, Zo_m, 2, + Yrf, Yf0, Zm_o, 2, + Yf0, Yrf, Zo_m, 2, + 0 +}; +static uchar yfmvdp[] = +{ + Yf0, Ym, Zo_m, 2, + Yf0, Yrf, Zo_m, 2, + 0 +}; +static uchar yfmvf[] = +{ + Ym, Yf0, Zm_o, 2, + Yf0, Ym, Zo_m, 2, + 0 +}; +static uchar yfmvx[] = +{ + Ym, Yf0, Zm_o, 2, + 0 +}; +static uchar yfmvp[] = +{ + Yf0, Ym, Zo_m, 2, + 0 +}; +static uchar yfadd[] = +{ + Ym, Yf0, Zm_o, 2, + Yrf, Yf0, Zm_o, 2, + Yf0, Yrf, Zo_m, 2, + 0 +}; +static uchar yfaddp[] = +{ + Yf0, Yrf, Zo_m, 2, + 0 +}; +static uchar yfxch[] = +{ + Yf0, Yrf, Zo_m, 2, + Yrf, Yf0, Zm_o, 2, + 0 +}; +static uchar ycompp[] = +{ + Yf0, Yrf, Zo_m, 2, /* botch is really f0,f1 */ + 0 +}; +static uchar ystsw[] = +{ + Ynone, Ym, Zo_m, 2, + Ynone, Yax, Zlit, 1, + 0 +}; +static uchar ystcw[] = +{ + Ynone, Ym, Zo_m, 2, + Ym, Ynone, Zm_o, 2, + 0 +}; +static uchar ysvrs[] = +{ + Ynone, Ym, Zo_m, 2, + Ym, Ynone, Zm_o, 2, + 0 +}; +static uchar ymm[] = +{ + Ymm, Ymr, Zm_r_xm, 1, + Yxm, Yxr, Zm_r_xm, 2, + 0 +}; +static uchar yxm[] = +{ + Yxm, Yxr, Zm_r_xm, 1, + 0 +}; +static uchar yxcvm1[] = +{ + Yxm, Yxr, Zm_r_xm, 2, + Yxm, Ymr, Zm_r_xm, 2, + 0 +}; +static uchar yxcvm2[] = +{ + Yxm, Yxr, Zm_r_xm, 2, + Ymm, Yxr, Zm_r_xm, 2, + 0 +}; +/* +static uchar yxmq[] = +{ + Yxm, Yxr, Zm_r_xm, 2, + 0 +}; +*/ +static uchar yxr[] = +{ + Yxr, Yxr, Zm_r_xm, 1, + 0 +}; +static uchar yxr_ml[] = +{ + Yxr, Yml, Zr_m_xm, 1, + 0 +}; +static uchar ymr[] = +{ + Ymr, Ymr, Zm_r, 1, + 0 +}; +static uchar ymr_ml[] = +{ + Ymr, Yml, Zr_m_xm, 1, + 0 +}; +static uchar yxcmp[] = +{ + Yxm, Yxr, Zm_r_xm, 1, + 0 +}; +static uchar yxcmpi[] = +{ + Yxm, Yxr, Zm_r_i_xm, 2, + 0 +}; +static uchar yxmov[] = +{ + Yxm, Yxr, Zm_r_xm, 1, + Yxr, Yxm, Zr_m_xm, 1, + 0 +}; +static uchar yxcvfl[] = +{ + Yxm, Yrl, Zm_r_xm, 1, + 0 +}; +static uchar yxcvlf[] = +{ + Yml, Yxr, Zm_r_xm, 1, + 0 +}; +static uchar yxcvfq[] = +{ + Yxm, Yrl, Zm_r_xm, 2, + 0 +}; +static uchar yxcvqf[] = +{ + Yml, Yxr, Zm_r_xm, 2, + 0 +}; +static uchar yps[] = +{ + Ymm, Ymr, Zm_r_xm, 1, + Yi8, Ymr, Zibo_m_xm, 2, + Yxm, Yxr, Zm_r_xm, 2, + Yi8, Yxr, Zibo_m_xm, 3, + 0 +}; +static uchar yxrrl[] = +{ + Yxr, Yrl, Zm_r, 1, + 0 +}; +static uchar ymfp[] = +{ + Ymm, Ymr, Zm_r_3d, 1, + 0, +}; +static uchar ymrxr[] = +{ + Ymr, Yxr, Zm_r, 1, + Yxm, Yxr, Zm_r_xm, 1, + 0 +}; +static uchar ymshuf[] = +{ + Ymm, Ymr, Zibm_r, 2, + 0 +}; +static uchar ymshufb[] = +{ + Yxm, Yxr, Zm2_r, 2, + 0 +}; +static uchar yxshuf[] = +{ + Yxm, Yxr, Zibm_r, 2, + 0 +}; +static uchar yextrw[] = +{ + Yxr, Yrl, Zibm_r, 2, + 0 +}; +static uchar yinsrw[] = +{ + Yml, Yxr, Zibm_r, 2, + 0 +}; +static uchar yinsr[] = +{ + Ymm, Yxr, Zibm_r, 3, + 0 +}; +static uchar ypsdq[] = +{ + Yi8, Yxr, Zibo_m, 2, + 0 +}; +static uchar ymskb[] = +{ + Yxr, Yrl, Zm_r_xm, 2, + Ymr, Yrl, Zm_r_xm, 1, + 0 +}; +static uchar ycrc32l[] = +{ + Yml, Yrl, Zlitm_r, 0, +}; +static uchar yprefetch[] = +{ + Ym, Ynone, Zm_o, 2, + 0, +}; +static uchar yaes[] = +{ + Yxm, Yxr, Zlitm_r, 2, + 0 +}; +static uchar yaes2[] = +{ + Yxm, Yxr, Zibm_r, 2, + 0 +}; + +/* + * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32, + * and p->from and p->to as operands (Addr*). The linker scans optab to find + * the entry with the given p->as and then looks through the ytable for that + * instruction (the second field in the optab struct) for a line whose first + * two values match the Ytypes of the p->from and p->to operands. The function + * oclass in span.c computes the specific Ytype of an operand and then the set + * of more general Ytypes that it satisfies is implied by the ycover table, set + * up in instinit. For example, oclass distinguishes the constants 0 and 1 + * from the more general 8-bit constants, but instinit says + * + * ycover[Yi0*Ymax + Ys32] = 1; + * ycover[Yi1*Ymax + Ys32] = 1; + * ycover[Yi8*Ymax + Ys32] = 1; + * + * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) + * if that's what an instruction can handle. + * + * In parallel with the scan through the ytable for the appropriate line, there + * is a z pointer that starts out pointing at the strange magic byte list in + * the Optab struct. With each step past a non-matching ytable line, z + * advances by the 4th entry in the line. When a matching line is found, that + * z pointer has the extra data to use in laying down the instruction bytes. + * The actual bytes laid down are a function of the 3rd entry in the line (that + * is, the Ztype) and the z bytes. + * + * For example, let's look at AADDL. The optab line says: + * { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, + * + * and yaddl says + * uchar yaddl[] = + * { + * Yi8, Yml, Zibo_m, 2, + * Yi32, Yax, Zil_, 1, + * Yi32, Yml, Zilo_m, 2, + * Yrl, Yml, Zr_m, 1, + * Yml, Yrl, Zm_r, 1, + * 0 + * }; + * + * so there are 5 possible types of ADDL instruction that can be laid down, and + * possible states used to lay them down (Ztype and z pointer, assuming z + * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are: + * + * Yi8, Yml -> Zibo_m, z (0x83, 00) + * Yi32, Yax -> Zil_, z+2 (0x05) + * Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) + * Yrl, Yml -> Zr_m, z+2+1+2 (0x01) + * Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) + * + * The Pconstant in the optab line controls the prefix bytes to emit. That's + * relatively straightforward as this program goes. + * + * The switch on t[2] in doasm implements the various Z cases. Zibo_m, for + * example, is an opcode byte (z[0]) then an asmando (which is some kind of + * encoded addressing mode for the Yml arg), and then a single immediate byte. + * Zilo_m is the same but a long (32-bit) immediate. + */ +Optab optab[] = +/* as, ytab, andproto, opcode */ +{ + { AXXX }, + { AAAA, ynone, P32, 0x37 }, + { AAAD, ynone, P32, 0xd5,0x0a }, + { AAAM, ynone, P32, 0xd4,0x0a }, + { AAAS, ynone, P32, 0x3f }, + { AADCB, yxorb, Pb, 0x14,0x80,(02),0x10,0x10 }, + { AADCL, yxorl, Px, 0x83,(02),0x15,0x81,(02),0x11,0x13 }, + { AADCQ, yxorl, Pw, 0x83,(02),0x15,0x81,(02),0x11,0x13 }, + { AADCW, yxorl, Pe, 0x83,(02),0x15,0x81,(02),0x11,0x13 }, + { AADDB, yxorb, Pb, 0x04,0x80,(00),0x00,0x02 }, + { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, + { AADDPD, yxm, Pq, 0x58 }, + { AADDPS, yxm, Pm, 0x58 }, + { AADDQ, yaddl, Pw, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, + { AADDSD, yxm, Pf2, 0x58 }, + { AADDSS, yxm, Pf3, 0x58 }, + { AADDW, yaddl, Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, + { AADJSP }, + { AANDB, yxorb, Pb, 0x24,0x80,(04),0x20,0x22 }, + { AANDL, yxorl, Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 }, + { AANDNPD, yxm, Pq, 0x55 }, + { AANDNPS, yxm, Pm, 0x55 }, + { AANDPD, yxm, Pq, 0x54 }, + { AANDPS, yxm, Pq, 0x54 }, + { AANDQ, yxorl, Pw, 0x83,(04),0x25,0x81,(04),0x21,0x23 }, + { AANDW, yxorl, Pe, 0x83,(04),0x25,0x81,(04),0x21,0x23 }, + { AARPL, yrl_ml, P32, 0x63 }, + { ABOUNDL, yrl_m, P32, 0x62 }, + { ABOUNDW, yrl_m, Pe, 0x62 }, + { ABSFL, yml_rl, Pm, 0xbc }, + { ABSFQ, yml_rl, Pw, 0x0f,0xbc }, + { ABSFW, yml_rl, Pq, 0xbc }, + { ABSRL, yml_rl, Pm, 0xbd }, + { ABSRQ, yml_rl, Pw, 0x0f,0xbd }, + { ABSRW, yml_rl, Pq, 0xbd }, + { ABSWAPL, ybswap, Px, 0x0f,0xc8 }, + { ABSWAPQ, ybswap, Pw, 0x0f,0xc8 }, + { ABTCL, ybtl, Pm, 0xba,(07),0xbb }, + { ABTCQ, ybtl, Pw, 0x0f,0xba,(07),0x0f,0xbb }, + { ABTCW, ybtl, Pq, 0xba,(07),0xbb }, + { ABTL, ybtl, Pm, 0xba,(04),0xa3 }, + { ABTQ, ybtl, Pw, 0x0f,0xba,(04),0x0f,0xa3}, + { ABTRL, ybtl, Pm, 0xba,(06),0xb3 }, + { ABTRQ, ybtl, Pw, 0x0f,0xba,(06),0x0f,0xb3 }, + { ABTRW, ybtl, Pq, 0xba,(06),0xb3 }, + { ABTSL, ybtl, Pm, 0xba,(05),0xab }, + { ABTSQ, ybtl, Pw, 0x0f,0xba,(05),0x0f,0xab }, + { ABTSW, ybtl, Pq, 0xba,(05),0xab }, + { ABTW, ybtl, Pq, 0xba,(04),0xa3 }, + { ABYTE, ybyte, Px, 1 }, + { ACALL, ycall, Px, 0xff,(02),0xe8 }, + { ACDQ, ynone, Px, 0x99 }, + { ACLC, ynone, Px, 0xf8 }, + { ACLD, ynone, Px, 0xfc }, + { ACLI, ynone, Px, 0xfa }, + { ACLTS, ynone, Pm, 0x06 }, + { ACMC, ynone, Px, 0xf5 }, + { ACMOVLCC, yml_rl, Pm, 0x43 }, + { ACMOVLCS, yml_rl, Pm, 0x42 }, + { ACMOVLEQ, yml_rl, Pm, 0x44 }, + { ACMOVLGE, yml_rl, Pm, 0x4d }, + { ACMOVLGT, yml_rl, Pm, 0x4f }, + { ACMOVLHI, yml_rl, Pm, 0x47 }, + { ACMOVLLE, yml_rl, Pm, 0x4e }, + { ACMOVLLS, yml_rl, Pm, 0x46 }, + { ACMOVLLT, yml_rl, Pm, 0x4c }, + { ACMOVLMI, yml_rl, Pm, 0x48 }, + { ACMOVLNE, yml_rl, Pm, 0x45 }, + { ACMOVLOC, yml_rl, Pm, 0x41 }, + { ACMOVLOS, yml_rl, Pm, 0x40 }, + { ACMOVLPC, yml_rl, Pm, 0x4b }, + { ACMOVLPL, yml_rl, Pm, 0x49 }, + { ACMOVLPS, yml_rl, Pm, 0x4a }, + { ACMOVQCC, yml_rl, Pw, 0x0f,0x43 }, + { ACMOVQCS, yml_rl, Pw, 0x0f,0x42 }, + { ACMOVQEQ, yml_rl, Pw, 0x0f,0x44 }, + { ACMOVQGE, yml_rl, Pw, 0x0f,0x4d }, + { ACMOVQGT, yml_rl, Pw, 0x0f,0x4f }, + { ACMOVQHI, yml_rl, Pw, 0x0f,0x47 }, + { ACMOVQLE, yml_rl, Pw, 0x0f,0x4e }, + { ACMOVQLS, yml_rl, Pw, 0x0f,0x46 }, + { ACMOVQLT, yml_rl, Pw, 0x0f,0x4c }, + { ACMOVQMI, yml_rl, Pw, 0x0f,0x48 }, + { ACMOVQNE, yml_rl, Pw, 0x0f,0x45 }, + { ACMOVQOC, yml_rl, Pw, 0x0f,0x41 }, + { ACMOVQOS, yml_rl, Pw, 0x0f,0x40 }, + { ACMOVQPC, yml_rl, Pw, 0x0f,0x4b }, + { ACMOVQPL, yml_rl, Pw, 0x0f,0x49 }, + { ACMOVQPS, yml_rl, Pw, 0x0f,0x4a }, + { ACMOVWCC, yml_rl, Pq, 0x43 }, + { ACMOVWCS, yml_rl, Pq, 0x42 }, + { ACMOVWEQ, yml_rl, Pq, 0x44 }, + { ACMOVWGE, yml_rl, Pq, 0x4d }, + { ACMOVWGT, yml_rl, Pq, 0x4f }, + { ACMOVWHI, yml_rl, Pq, 0x47 }, + { ACMOVWLE, yml_rl, Pq, 0x4e }, + { ACMOVWLS, yml_rl, Pq, 0x46 }, + { ACMOVWLT, yml_rl, Pq, 0x4c }, + { ACMOVWMI, yml_rl, Pq, 0x48 }, + { ACMOVWNE, yml_rl, Pq, 0x45 }, + { ACMOVWOC, yml_rl, Pq, 0x41 }, + { ACMOVWOS, yml_rl, Pq, 0x40 }, + { ACMOVWPC, yml_rl, Pq, 0x4b }, + { ACMOVWPL, yml_rl, Pq, 0x49 }, + { ACMOVWPS, yml_rl, Pq, 0x4a }, + { ACMPB, ycmpb, Pb, 0x3c,0x80,(07),0x38,0x3a }, + { ACMPL, ycmpl, Px, 0x83,(07),0x3d,0x81,(07),0x39,0x3b }, + { ACMPPD, yxcmpi, Px, Pe,0xc2 }, + { ACMPPS, yxcmpi, Pm, 0xc2,0 }, + { ACMPQ, ycmpl, Pw, 0x83,(07),0x3d,0x81,(07),0x39,0x3b }, + { ACMPSB, ynone, Pb, 0xa6 }, + { ACMPSD, yxcmpi, Px, Pf2,0xc2 }, + { ACMPSL, ynone, Px, 0xa7 }, + { ACMPSQ, ynone, Pw, 0xa7 }, + { ACMPSS, yxcmpi, Px, Pf3,0xc2 }, + { ACMPSW, ynone, Pe, 0xa7 }, + { ACMPW, ycmpl, Pe, 0x83,(07),0x3d,0x81,(07),0x39,0x3b }, + { ACOMISD, yxcmp, Pe, 0x2f }, + { ACOMISS, yxcmp, Pm, 0x2f }, + { ACPUID, ynone, Pm, 0xa2 }, + { ACVTPL2PD, yxcvm2, Px, Pf3,0xe6,Pe,0x2a }, + { ACVTPL2PS, yxcvm2, Pm, 0x5b,0,0x2a,0, }, + { ACVTPD2PL, yxcvm1, Px, Pf2,0xe6,Pe,0x2d }, + { ACVTPD2PS, yxm, Pe, 0x5a }, + { ACVTPS2PL, yxcvm1, Px, Pe,0x5b,Pm,0x2d }, + { ACVTPS2PD, yxm, Pm, 0x5a }, + { API2FW, ymfp, Px, 0x0c }, + { ACVTSD2SL, yxcvfl, Pf2, 0x2d }, + { ACVTSD2SQ, yxcvfq, Pw, Pf2,0x2d }, + { ACVTSD2SS, yxm, Pf2, 0x5a }, + { ACVTSL2SD, yxcvlf, Pf2, 0x2a }, + { ACVTSQ2SD, yxcvqf, Pw, Pf2,0x2a }, + { ACVTSL2SS, yxcvlf, Pf3, 0x2a }, + { ACVTSQ2SS, yxcvqf, Pw, Pf3,0x2a }, + { ACVTSS2SD, yxm, Pf3, 0x5a }, + { ACVTSS2SL, yxcvfl, Pf3, 0x2d }, + { ACVTSS2SQ, yxcvfq, Pw, Pf3,0x2d }, + { ACVTTPD2PL, yxcvm1, Px, Pe,0xe6,Pe,0x2c }, + { ACVTTPS2PL, yxcvm1, Px, Pf3,0x5b,Pm,0x2c }, + { ACVTTSD2SL, yxcvfl, Pf2, 0x2c }, + { ACVTTSD2SQ, yxcvfq, Pw, Pf2,0x2c }, + { ACVTTSS2SL, yxcvfl, Pf3, 0x2c }, + { ACVTTSS2SQ, yxcvfq, Pw, Pf3,0x2c }, + { ACWD, ynone, Pe, 0x99 }, + { ACQO, ynone, Pw, 0x99 }, + { ADAA, ynone, P32, 0x27 }, + { ADAS, ynone, P32, 0x2f }, + { ADATA }, + { ADECB, yincb, Pb, 0xfe,(01) }, + { ADECL, yincl, Px, 0xff,(01) }, + { ADECQ, yincl, Pw, 0xff,(01) }, + { ADECW, yincw, Pe, 0xff,(01) }, + { ADIVB, ydivb, Pb, 0xf6,(06) }, + { ADIVL, ydivl, Px, 0xf7,(06) }, + { ADIVPD, yxm, Pe, 0x5e }, + { ADIVPS, yxm, Pm, 0x5e }, + { ADIVQ, ydivl, Pw, 0xf7,(06) }, + { ADIVSD, yxm, Pf2, 0x5e }, + { ADIVSS, yxm, Pf3, 0x5e }, + { ADIVW, ydivl, Pe, 0xf7,(06) }, + { AEMMS, ynone, Pm, 0x77 }, + { AENTER }, /* botch */ + { AFXRSTOR, ysvrs, Pm, 0xae,(01),0xae,(01) }, + { AFXSAVE, ysvrs, Pm, 0xae,(00),0xae,(00) }, + { AFXRSTOR64, ysvrs, Pw, 0x0f,0xae,(01),0x0f,0xae,(01) }, + { AFXSAVE64, ysvrs, Pw, 0x0f,0xae,(00),0x0f,0xae,(00) }, + { AGLOBL }, + { AGOK }, + { AHISTORY }, + { AHLT, ynone, Px, 0xf4 }, + { AIDIVB, ydivb, Pb, 0xf6,(07) }, + { AIDIVL, ydivl, Px, 0xf7,(07) }, + { AIDIVQ, ydivl, Pw, 0xf7,(07) }, + { AIDIVW, ydivl, Pe, 0xf7,(07) }, + { AIMULB, ydivb, Pb, 0xf6,(05) }, + { AIMULL, yimul, Px, 0xf7,(05),0x6b,0x69,Pm,0xaf }, + { AIMULQ, yimul, Pw, 0xf7,(05),0x6b,0x69,Pm,0xaf }, + { AIMULW, yimul, Pe, 0xf7,(05),0x6b,0x69,Pm,0xaf }, + { AIMUL3Q, yimul3, Pw, 0x6b,(00) }, + { AINB, yin, Pb, 0xe4,0xec }, + { AINCB, yincb, Pb, 0xfe,(00) }, + { AINCL, yincl, Px, 0xff,(00) }, + { AINCQ, yincl, Pw, 0xff,(00) }, + { AINCW, yincw, Pe, 0xff,(00) }, + { AINL, yin, Px, 0xe5,0xed }, + { AINSB, ynone, Pb, 0x6c }, + { AINSL, ynone, Px, 0x6d }, + { AINSW, ynone, Pe, 0x6d }, + { AINT, yint, Px, 0xcd }, + { AINTO, ynone, P32, 0xce }, + { AINW, yin, Pe, 0xe5,0xed }, + { AIRETL, ynone, Px, 0xcf }, + { AIRETQ, ynone, Pw, 0xcf }, + { AIRETW, ynone, Pe, 0xcf }, + { AJCC, yjcond, Px, 0x73,0x83,(00) }, + { AJCS, yjcond, Px, 0x72,0x82 }, + { AJCXZL, yloop, Px, 0xe3 }, + { AJCXZQ, yloop, Px, 0xe3 }, + { AJEQ, yjcond, Px, 0x74,0x84 }, + { AJGE, yjcond, Px, 0x7d,0x8d }, + { AJGT, yjcond, Px, 0x7f,0x8f }, + { AJHI, yjcond, Px, 0x77,0x87 }, + { AJLE, yjcond, Px, 0x7e,0x8e }, + { AJLS, yjcond, Px, 0x76,0x86 }, + { AJLT, yjcond, Px, 0x7c,0x8c }, + { AJMI, yjcond, Px, 0x78,0x88 }, + { AJMP, yjmp, Px, 0xff,(04),0xeb,0xe9 }, + { AJNE, yjcond, Px, 0x75,0x85 }, + { AJOC, yjcond, Px, 0x71,0x81,(00) }, + { AJOS, yjcond, Px, 0x70,0x80,(00) }, + { AJPC, yjcond, Px, 0x7b,0x8b }, + { AJPL, yjcond, Px, 0x79,0x89 }, + { AJPS, yjcond, Px, 0x7a,0x8a }, + { ALAHF, ynone, Px, 0x9f }, + { ALARL, yml_rl, Pm, 0x02 }, + { ALARW, yml_rl, Pq, 0x02 }, + { ALDMXCSR, ysvrs, Pm, 0xae,(02),0xae,(02) }, + { ALEAL, ym_rl, Px, 0x8d }, + { ALEAQ, ym_rl, Pw, 0x8d }, + { ALEAVEL, ynone, P32, 0xc9 }, + { ALEAVEQ, ynone, Py, 0xc9 }, + { ALEAVEW, ynone, Pe, 0xc9 }, + { ALEAW, ym_rl, Pe, 0x8d }, + { ALOCK, ynone, Px, 0xf0 }, + { ALODSB, ynone, Pb, 0xac }, + { ALODSL, ynone, Px, 0xad }, + { ALODSQ, ynone, Pw, 0xad }, + { ALODSW, ynone, Pe, 0xad }, + { ALONG, ybyte, Px, 4 }, + { ALOOP, yloop, Px, 0xe2 }, + { ALOOPEQ, yloop, Px, 0xe1 }, + { ALOOPNE, yloop, Px, 0xe0 }, + { ALSLL, yml_rl, Pm, 0x03 }, + { ALSLW, yml_rl, Pq, 0x03 }, + { AMASKMOVOU, yxr, Pe, 0xf7 }, + { AMASKMOVQ, ymr, Pm, 0xf7 }, + { AMAXPD, yxm, Pe, 0x5f }, + { AMAXPS, yxm, Pm, 0x5f }, + { AMAXSD, yxm, Pf2, 0x5f }, + { AMAXSS, yxm, Pf3, 0x5f }, + { AMINPD, yxm, Pe, 0x5d }, + { AMINPS, yxm, Pm, 0x5d }, + { AMINSD, yxm, Pf2, 0x5d }, + { AMINSS, yxm, Pf3, 0x5d }, + { AMOVAPD, yxmov, Pe, 0x28,0x29 }, + { AMOVAPS, yxmov, Pm, 0x28,0x29 }, + { AMOVB, ymovb, Pb, 0x88,0x8a,0xb0,0xc6,(00) }, + { AMOVBLSX, ymb_rl, Pm, 0xbe }, + { AMOVBLZX, ymb_rl, Pm, 0xb6 }, + { AMOVBQSX, ymb_rl, Pw, 0x0f,0xbe }, + { AMOVBQZX, ymb_rl, Pw, 0x0f,0xb6 }, + { AMOVBWSX, ymb_rl, Pq, 0xbe }, + { AMOVBWZX, ymb_rl, Pq, 0xb6 }, + { AMOVO, yxmov, Pe, 0x6f,0x7f }, + { AMOVOU, yxmov, Pf3, 0x6f,0x7f }, + { AMOVHLPS, yxr, Pm, 0x12 }, + { AMOVHPD, yxmov, Pe, 0x16,0x17 }, + { AMOVHPS, yxmov, Pm, 0x16,0x17 }, + { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e,0 }, + { AMOVLHPS, yxr, Pm, 0x16 }, + { AMOVLPD, yxmov, Pe, 0x12,0x13 }, + { AMOVLPS, yxmov, Pm, 0x12,0x13 }, + { AMOVLQSX, yml_rl, Pw, 0x63 }, + { AMOVLQZX, yml_rl, Px, 0x8b }, + { AMOVMSKPD, yxrrl, Pq, 0x50 }, + { AMOVMSKPS, yxrrl, Pm, 0x50 }, + { AMOVNTO, yxr_ml, Pe, 0xe7 }, + { AMOVNTPD, yxr_ml, Pe, 0x2b }, + { AMOVNTPS, yxr_ml, Pm, 0x2b }, + { AMOVNTQ, ymr_ml, Pm, 0xe7 }, + { AMOVQ, ymovq, Pw, 0x89, 0x8b, 0x31, 0xc7,(00), 0xb8, 0xc7,(00), 0x6f, 0x7f, 0x6e, 0x7e, Pf2,0xd6, Pf3,0x7e, Pe,0xd6, Pe,0x6e, Pe,0x7e,0 }, + { AMOVQOZX, ymrxr, Pf3, 0xd6,0x7e }, + { AMOVSB, ynone, Pb, 0xa4 }, + { AMOVSD, yxmov, Pf2, 0x10,0x11 }, + { AMOVSL, ynone, Px, 0xa5 }, + { AMOVSQ, ynone, Pw, 0xa5 }, + { AMOVSS, yxmov, Pf3, 0x10,0x11 }, + { AMOVSW, ynone, Pe, 0xa5 }, + { AMOVUPD, yxmov, Pe, 0x10,0x11 }, + { AMOVUPS, yxmov, Pm, 0x10,0x11 }, + { AMOVW, ymovw, Pe, 0x89,0x8b,0x31,0xb8,0xc7,(00),0 }, + { AMOVWLSX, yml_rl, Pm, 0xbf }, + { AMOVWLZX, yml_rl, Pm, 0xb7 }, + { AMOVWQSX, yml_rl, Pw, 0x0f,0xbf }, + { AMOVWQZX, yml_rl, Pw, 0x0f,0xb7 }, + { AMULB, ydivb, Pb, 0xf6,(04) }, + { AMULL, ydivl, Px, 0xf7,(04) }, + { AMULPD, yxm, Pe, 0x59 }, + { AMULPS, yxm, Ym, 0x59 }, + { AMULQ, ydivl, Pw, 0xf7,(04) }, + { AMULSD, yxm, Pf2, 0x59 }, + { AMULSS, yxm, Pf3, 0x59 }, + { AMULW, ydivl, Pe, 0xf7,(04) }, + { ANAME }, + { ANEGB, yscond, Pb, 0xf6,(03) }, + { ANEGL, yscond, Px, 0xf7,(03) }, + { ANEGQ, yscond, Pw, 0xf7,(03) }, + { ANEGW, yscond, Pe, 0xf7,(03) }, + { ANOP, ynop, Px, 0,0 }, + { ANOTB, yscond, Pb, 0xf6,(02) }, + { ANOTL, yscond, Px, 0xf7,(02) }, + { ANOTQ, yscond, Pw, 0xf7,(02) }, + { ANOTW, yscond, Pe, 0xf7,(02) }, + { AORB, yxorb, Pb, 0x0c,0x80,(01),0x08,0x0a }, + { AORL, yxorl, Px, 0x83,(01),0x0d,0x81,(01),0x09,0x0b }, + { AORPD, yxm, Pq, 0x56 }, + { AORPS, yxm, Pm, 0x56 }, + { AORQ, yxorl, Pw, 0x83,(01),0x0d,0x81,(01),0x09,0x0b }, + { AORW, yxorl, Pe, 0x83,(01),0x0d,0x81,(01),0x09,0x0b }, + { AOUTB, yin, Pb, 0xe6,0xee }, + { AOUTL, yin, Px, 0xe7,0xef }, + { AOUTSB, ynone, Pb, 0x6e }, + { AOUTSL, ynone, Px, 0x6f }, + { AOUTSW, ynone, Pe, 0x6f }, + { AOUTW, yin, Pe, 0xe7,0xef }, + { APACKSSLW, ymm, Py, 0x6b,Pe,0x6b }, + { APACKSSWB, ymm, Py, 0x63,Pe,0x63 }, + { APACKUSWB, ymm, Py, 0x67,Pe,0x67 }, + { APADDB, ymm, Py, 0xfc,Pe,0xfc }, + { APADDL, ymm, Py, 0xfe,Pe,0xfe }, + { APADDQ, yxm, Pe, 0xd4 }, + { APADDSB, ymm, Py, 0xec,Pe,0xec }, + { APADDSW, ymm, Py, 0xed,Pe,0xed }, + { APADDUSB, ymm, Py, 0xdc,Pe,0xdc }, + { APADDUSW, ymm, Py, 0xdd,Pe,0xdd }, + { APADDW, ymm, Py, 0xfd,Pe,0xfd }, + { APAND, ymm, Py, 0xdb,Pe,0xdb }, + { APANDN, ymm, Py, 0xdf,Pe,0xdf }, + { APAUSE, ynone, Px, 0xf3,0x90 }, + { APAVGB, ymm, Py, 0xe0,Pe,0xe0 }, + { APAVGW, ymm, Py, 0xe3,Pe,0xe3 }, + { APCMPEQB, ymm, Py, 0x74,Pe,0x74 }, + { APCMPEQL, ymm, Py, 0x76,Pe,0x76 }, + { APCMPEQW, ymm, Py, 0x75,Pe,0x75 }, + { APCMPGTB, ymm, Py, 0x64,Pe,0x64 }, + { APCMPGTL, ymm, Py, 0x66,Pe,0x66 }, + { APCMPGTW, ymm, Py, 0x65,Pe,0x65 }, + { APEXTRW, yextrw, Pq, 0xc5,(00) }, + { APF2IL, ymfp, Px, 0x1d }, + { APF2IW, ymfp, Px, 0x1c }, + { API2FL, ymfp, Px, 0x0d }, + { APFACC, ymfp, Px, 0xae }, + { APFADD, ymfp, Px, 0x9e }, + { APFCMPEQ, ymfp, Px, 0xb0 }, + { APFCMPGE, ymfp, Px, 0x90 }, + { APFCMPGT, ymfp, Px, 0xa0 }, + { APFMAX, ymfp, Px, 0xa4 }, + { APFMIN, ymfp, Px, 0x94 }, + { APFMUL, ymfp, Px, 0xb4 }, + { APFNACC, ymfp, Px, 0x8a }, + { APFPNACC, ymfp, Px, 0x8e }, + { APFRCP, ymfp, Px, 0x96 }, + { APFRCPIT1, ymfp, Px, 0xa6 }, + { APFRCPI2T, ymfp, Px, 0xb6 }, + { APFRSQIT1, ymfp, Px, 0xa7 }, + { APFRSQRT, ymfp, Px, 0x97 }, + { APFSUB, ymfp, Px, 0x9a }, + { APFSUBR, ymfp, Px, 0xaa }, + { APINSRW, yinsrw, Pq, 0xc4,(00) }, + { APINSRD, yinsr, Pq, 0x3a, 0x22, (00) }, + { APINSRQ, yinsr, Pq3, 0x3a, 0x22, (00) }, + { APMADDWL, ymm, Py, 0xf5,Pe,0xf5 }, + { APMAXSW, yxm, Pe, 0xee }, + { APMAXUB, yxm, Pe, 0xde }, + { APMINSW, yxm, Pe, 0xea }, + { APMINUB, yxm, Pe, 0xda }, + { APMOVMSKB, ymskb, Px, Pe,0xd7,0xd7 }, + { APMULHRW, ymfp, Px, 0xb7 }, + { APMULHUW, ymm, Py, 0xe4,Pe,0xe4 }, + { APMULHW, ymm, Py, 0xe5,Pe,0xe5 }, + { APMULLW, ymm, Py, 0xd5,Pe,0xd5 }, + { APMULULQ, ymm, Py, 0xf4,Pe,0xf4 }, + { APOPAL, ynone, P32, 0x61 }, + { APOPAW, ynone, Pe, 0x61 }, + { APOPFL, ynone, P32, 0x9d }, + { APOPFQ, ynone, Py, 0x9d }, + { APOPFW, ynone, Pe, 0x9d }, + { APOPL, ypopl, P32, 0x58,0x8f,(00) }, + { APOPQ, ypopl, Py, 0x58,0x8f,(00) }, + { APOPW, ypopl, Pe, 0x58,0x8f,(00) }, + { APOR, ymm, Py, 0xeb,Pe,0xeb }, + { APSADBW, yxm, Pq, 0xf6 }, + { APSHUFHW, yxshuf, Pf3, 0x70,(00) }, + { APSHUFL, yxshuf, Pq, 0x70,(00) }, + { APSHUFLW, yxshuf, Pf2, 0x70,(00) }, + { APSHUFW, ymshuf, Pm, 0x70,(00) }, + { APSHUFB, ymshufb,Pq, 0x38, 0x00 }, + { APSLLO, ypsdq, Pq, 0x73,(07) }, + { APSLLL, yps, Py, 0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06) }, + { APSLLQ, yps, Py, 0xf3, 0x73,(06), Pe,0xf3, Pe,0x73,(06) }, + { APSLLW, yps, Py, 0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06) }, + { APSRAL, yps, Py, 0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04) }, + { APSRAW, yps, Py, 0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04) }, + { APSRLO, ypsdq, Pq, 0x73,(03) }, + { APSRLL, yps, Py, 0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02) }, + { APSRLQ, yps, Py, 0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02) }, + { APSRLW, yps, Py, 0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02) }, + { APSUBB, yxm, Pe, 0xf8 }, + { APSUBL, yxm, Pe, 0xfa }, + { APSUBQ, yxm, Pe, 0xfb }, + { APSUBSB, yxm, Pe, 0xe8 }, + { APSUBSW, yxm, Pe, 0xe9 }, + { APSUBUSB, yxm, Pe, 0xd8 }, + { APSUBUSW, yxm, Pe, 0xd9 }, + { APSUBW, yxm, Pe, 0xf9 }, + { APSWAPL, ymfp, Px, 0xbb }, + { APUNPCKHBW, ymm, Py, 0x68,Pe,0x68 }, + { APUNPCKHLQ, ymm, Py, 0x6a,Pe,0x6a }, + { APUNPCKHQDQ, yxm, Pe, 0x6d }, + { APUNPCKHWL, ymm, Py, 0x69,Pe,0x69 }, + { APUNPCKLBW, ymm, Py, 0x60,Pe,0x60 }, + { APUNPCKLLQ, ymm, Py, 0x62,Pe,0x62 }, + { APUNPCKLQDQ, yxm, Pe, 0x6c }, + { APUNPCKLWL, ymm, Py, 0x61,Pe,0x61 }, + { APUSHAL, ynone, P32, 0x60 }, + { APUSHAW, ynone, Pe, 0x60 }, + { APUSHFL, ynone, P32, 0x9c }, + { APUSHFQ, ynone, Py, 0x9c }, + { APUSHFW, ynone, Pe, 0x9c }, + { APUSHL, ypushl, P32, 0x50,0xff,(06),0x6a,0x68 }, + { APUSHQ, ypushl, Py, 0x50,0xff,(06),0x6a,0x68 }, + { APUSHW, ypushl, Pe, 0x50,0xff,(06),0x6a,0x68 }, + { APXOR, ymm, Py, 0xef,Pe,0xef }, + { AQUAD, ybyte, Px, 8 }, + { ARCLB, yshb, Pb, 0xd0,(02),0xc0,(02),0xd2,(02) }, + { ARCLL, yshl, Px, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, + { ARCLQ, yshl, Pw, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, + { ARCLW, yshl, Pe, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, + { ARCPPS, yxm, Pm, 0x53 }, + { ARCPSS, yxm, Pf3, 0x53 }, + { ARCRB, yshb, Pb, 0xd0,(03),0xc0,(03),0xd2,(03) }, + { ARCRL, yshl, Px, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, + { ARCRQ, yshl, Pw, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, + { ARCRW, yshl, Pe, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, + { AREP, ynone, Px, 0xf3 }, + { AREPN, ynone, Px, 0xf2 }, + { ARET, ynone, Px, 0xc3 }, + { ARETFW, yret, Pe, 0xcb,0xca }, + { ARETFL, yret, Px, 0xcb,0xca }, + { ARETFQ, yret, Pw, 0xcb,0xca }, + { AROLB, yshb, Pb, 0xd0,(00),0xc0,(00),0xd2,(00) }, + { AROLL, yshl, Px, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) }, + { AROLQ, yshl, Pw, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) }, + { AROLW, yshl, Pe, 0xd1,(00),0xc1,(00),0xd3,(00),0xd3,(00) }, + { ARORB, yshb, Pb, 0xd0,(01),0xc0,(01),0xd2,(01) }, + { ARORL, yshl, Px, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, + { ARORQ, yshl, Pw, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, + { ARORW, yshl, Pe, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, + { ARSQRTPS, yxm, Pm, 0x52 }, + { ARSQRTSS, yxm, Pf3, 0x52 }, + { ASAHF, ynone, Px, 0x86,0xe0,0x50,0x9d }, /* XCHGB AH,AL; PUSH AX; POPFL */ + { ASALB, yshb, Pb, 0xd0,(04),0xc0,(04),0xd2,(04) }, + { ASALL, yshl, Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASALQ, yshl, Pw, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASALW, yshl, Pe, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASARB, yshb, Pb, 0xd0,(07),0xc0,(07),0xd2,(07) }, + { ASARL, yshl, Px, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) }, + { ASARQ, yshl, Pw, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) }, + { ASARW, yshl, Pe, 0xd1,(07),0xc1,(07),0xd3,(07),0xd3,(07) }, + { ASBBB, yxorb, Pb, 0x1c,0x80,(03),0x18,0x1a }, + { ASBBL, yxorl, Px, 0x83,(03),0x1d,0x81,(03),0x19,0x1b }, + { ASBBQ, yxorl, Pw, 0x83,(03),0x1d,0x81,(03),0x19,0x1b }, + { ASBBW, yxorl, Pe, 0x83,(03),0x1d,0x81,(03),0x19,0x1b }, + { ASCASB, ynone, Pb, 0xae }, + { ASCASL, ynone, Px, 0xaf }, + { ASCASQ, ynone, Pw, 0xaf }, + { ASCASW, ynone, Pe, 0xaf }, + { ASETCC, yscond, Pm, 0x93,(00) }, + { ASETCS, yscond, Pm, 0x92,(00) }, + { ASETEQ, yscond, Pm, 0x94,(00) }, + { ASETGE, yscond, Pm, 0x9d,(00) }, + { ASETGT, yscond, Pm, 0x9f,(00) }, + { ASETHI, yscond, Pm, 0x97,(00) }, + { ASETLE, yscond, Pm, 0x9e,(00) }, + { ASETLS, yscond, Pm, 0x96,(00) }, + { ASETLT, yscond, Pm, 0x9c,(00) }, + { ASETMI, yscond, Pm, 0x98,(00) }, + { ASETNE, yscond, Pm, 0x95,(00) }, + { ASETOC, yscond, Pm, 0x91,(00) }, + { ASETOS, yscond, Pm, 0x90,(00) }, + { ASETPC, yscond, Pm, 0x96,(00) }, + { ASETPL, yscond, Pm, 0x99,(00) }, + { ASETPS, yscond, Pm, 0x9a,(00) }, + { ASHLB, yshb, Pb, 0xd0,(04),0xc0,(04),0xd2,(04) }, + { ASHLL, yshl, Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASHLQ, yshl, Pw, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASHLW, yshl, Pe, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, + { ASHRB, yshb, Pb, 0xd0,(05),0xc0,(05),0xd2,(05) }, + { ASHRL, yshl, Px, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) }, + { ASHRQ, yshl, Pw, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) }, + { ASHRW, yshl, Pe, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) }, + { ASHUFPD, yxshuf, Pq, 0xc6,(00) }, + { ASHUFPS, yxshuf, Pm, 0xc6,(00) }, + { ASQRTPD, yxm, Pe, 0x51 }, + { ASQRTPS, yxm, Pm, 0x51 }, + { ASQRTSD, yxm, Pf2, 0x51 }, + { ASQRTSS, yxm, Pf3, 0x51 }, + { ASTC, ynone, Px, 0xf9 }, + { ASTD, ynone, Px, 0xfd }, + { ASTI, ynone, Px, 0xfb }, + { ASTMXCSR, ysvrs, Pm, 0xae,(03),0xae,(03) }, + { ASTOSB, ynone, Pb, 0xaa }, + { ASTOSL, ynone, Px, 0xab }, + { ASTOSQ, ynone, Pw, 0xab }, + { ASTOSW, ynone, Pe, 0xab }, + { ASUBB, yxorb, Pb, 0x2c,0x80,(05),0x28,0x2a }, + { ASUBL, yaddl, Px, 0x83,(05),0x2d,0x81,(05),0x29,0x2b }, + { ASUBPD, yxm, Pe, 0x5c }, + { ASUBPS, yxm, Pm, 0x5c }, + { ASUBQ, yaddl, Pw, 0x83,(05),0x2d,0x81,(05),0x29,0x2b }, + { ASUBSD, yxm, Pf2, 0x5c }, + { ASUBSS, yxm, Pf3, 0x5c }, + { ASUBW, yaddl, Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b }, + { ASWAPGS, ynone, Pm, 0x01,0xf8 }, + { ASYSCALL, ynone, Px, 0x0f,0x05 }, /* fast syscall */ + { ATESTB, ytestb, Pb, 0xa8,0xf6,(00),0x84,0x84 }, + { ATESTL, ytestl, Px, 0xa9,0xf7,(00),0x85,0x85 }, + { ATESTQ, ytestl, Pw, 0xa9,0xf7,(00),0x85,0x85 }, + { ATESTW, ytestl, Pe, 0xa9,0xf7,(00),0x85,0x85 }, + { ATEXT, ytext, Px }, + { AUCOMISD, yxcmp, Pe, 0x2e }, + { AUCOMISS, yxcmp, Pm, 0x2e }, + { AUNPCKHPD, yxm, Pe, 0x15 }, + { AUNPCKHPS, yxm, Pm, 0x15 }, + { AUNPCKLPD, yxm, Pe, 0x14 }, + { AUNPCKLPS, yxm, Pm, 0x14 }, + { AVERR, ydivl, Pm, 0x00,(04) }, + { AVERW, ydivl, Pm, 0x00,(05) }, + { AWAIT, ynone, Px, 0x9b }, + { AWORD, ybyte, Px, 2 }, + { AXCHGB, yml_mb, Pb, 0x86,0x86 }, + { AXCHGL, yxchg, Px, 0x90,0x90,0x87,0x87 }, + { AXCHGQ, yxchg, Pw, 0x90,0x90,0x87,0x87 }, + { AXCHGW, yxchg, Pe, 0x90,0x90,0x87,0x87 }, + { AXLAT, ynone, Px, 0xd7 }, + { AXORB, yxorb, Pb, 0x34,0x80,(06),0x30,0x32 }, + { AXORL, yxorl, Px, 0x83,(06),0x35,0x81,(06),0x31,0x33 }, + { AXORPD, yxm, Pe, 0x57 }, + { AXORPS, yxm, Pm, 0x57 }, + { AXORQ, yxorl, Pw, 0x83,(06),0x35,0x81,(06),0x31,0x33 }, + { AXORW, yxorl, Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 }, + + { AFMOVB, yfmvx, Px, 0xdf,(04) }, + { AFMOVBP, yfmvp, Px, 0xdf,(06) }, + { AFMOVD, yfmvd, Px, 0xdd,(00),0xdd,(02),0xd9,(00),0xdd,(02) }, + { AFMOVDP, yfmvdp, Px, 0xdd,(03),0xdd,(03) }, + { AFMOVF, yfmvf, Px, 0xd9,(00),0xd9,(02) }, + { AFMOVFP, yfmvp, Px, 0xd9,(03) }, + { AFMOVL, yfmvf, Px, 0xdb,(00),0xdb,(02) }, + { AFMOVLP, yfmvp, Px, 0xdb,(03) }, + { AFMOVV, yfmvx, Px, 0xdf,(05) }, + { AFMOVVP, yfmvp, Px, 0xdf,(07) }, + { AFMOVW, yfmvf, Px, 0xdf,(00),0xdf,(02) }, + { AFMOVWP, yfmvp, Px, 0xdf,(03) }, + { AFMOVX, yfmvx, Px, 0xdb,(05) }, + { AFMOVXP, yfmvp, Px, 0xdb,(07) }, + + { AFCOMB }, + { AFCOMBP }, + { AFCOMD, yfadd, Px, 0xdc,(02),0xd8,(02),0xdc,(02) }, /* botch */ + { AFCOMDP, yfadd, Px, 0xdc,(03),0xd8,(03),0xdc,(03) }, /* botch */ + { AFCOMDPP, ycompp, Px, 0xde,(03) }, + { AFCOMF, yfmvx, Px, 0xd8,(02) }, + { AFCOMFP, yfmvx, Px, 0xd8,(03) }, + { AFCOML, yfmvx, Px, 0xda,(02) }, + { AFCOMLP, yfmvx, Px, 0xda,(03) }, + { AFCOMW, yfmvx, Px, 0xde,(02) }, + { AFCOMWP, yfmvx, Px, 0xde,(03) }, + + { AFUCOM, ycompp, Px, 0xdd,(04) }, + { AFUCOMP, ycompp, Px, 0xdd,(05) }, + { AFUCOMPP, ycompp, Px, 0xda,(13) }, + + { AFADDDP, yfaddp, Px, 0xde,(00) }, + { AFADDW, yfmvx, Px, 0xde,(00) }, + { AFADDL, yfmvx, Px, 0xda,(00) }, + { AFADDF, yfmvx, Px, 0xd8,(00) }, + { AFADDD, yfadd, Px, 0xdc,(00),0xd8,(00),0xdc,(00) }, + + { AFMULDP, yfaddp, Px, 0xde,(01) }, + { AFMULW, yfmvx, Px, 0xde,(01) }, + { AFMULL, yfmvx, Px, 0xda,(01) }, + { AFMULF, yfmvx, Px, 0xd8,(01) }, + { AFMULD, yfadd, Px, 0xdc,(01),0xd8,(01),0xdc,(01) }, + + { AFSUBDP, yfaddp, Px, 0xde,(05) }, + { AFSUBW, yfmvx, Px, 0xde,(04) }, + { AFSUBL, yfmvx, Px, 0xda,(04) }, + { AFSUBF, yfmvx, Px, 0xd8,(04) }, + { AFSUBD, yfadd, Px, 0xdc,(04),0xd8,(04),0xdc,(05) }, + + { AFSUBRDP, yfaddp, Px, 0xde,(04) }, + { AFSUBRW, yfmvx, Px, 0xde,(05) }, + { AFSUBRL, yfmvx, Px, 0xda,(05) }, + { AFSUBRF, yfmvx, Px, 0xd8,(05) }, + { AFSUBRD, yfadd, Px, 0xdc,(05),0xd8,(05),0xdc,(04) }, + + { AFDIVDP, yfaddp, Px, 0xde,(07) }, + { AFDIVW, yfmvx, Px, 0xde,(06) }, + { AFDIVL, yfmvx, Px, 0xda,(06) }, + { AFDIVF, yfmvx, Px, 0xd8,(06) }, + { AFDIVD, yfadd, Px, 0xdc,(06),0xd8,(06),0xdc,(07) }, + + { AFDIVRDP, yfaddp, Px, 0xde,(06) }, + { AFDIVRW, yfmvx, Px, 0xde,(07) }, + { AFDIVRL, yfmvx, Px, 0xda,(07) }, + { AFDIVRF, yfmvx, Px, 0xd8,(07) }, + { AFDIVRD, yfadd, Px, 0xdc,(07),0xd8,(07),0xdc,(06) }, + + { AFXCHD, yfxch, Px, 0xd9,(01),0xd9,(01) }, + { AFFREE }, + { AFLDCW, ystcw, Px, 0xd9,(05),0xd9,(05) }, + { AFLDENV, ystcw, Px, 0xd9,(04),0xd9,(04) }, + { AFRSTOR, ysvrs, Px, 0xdd,(04),0xdd,(04) }, + { AFSAVE, ysvrs, Px, 0xdd,(06),0xdd,(06) }, + { AFSTCW, ystcw, Px, 0xd9,(07),0xd9,(07) }, + { AFSTENV, ystcw, Px, 0xd9,(06),0xd9,(06) }, + { AFSTSW, ystsw, Px, 0xdd,(07),0xdf,0xe0 }, + { AF2XM1, ynone, Px, 0xd9, 0xf0 }, + { AFABS, ynone, Px, 0xd9, 0xe1 }, + { AFCHS, ynone, Px, 0xd9, 0xe0 }, + { AFCLEX, ynone, Px, 0xdb, 0xe2 }, + { AFCOS, ynone, Px, 0xd9, 0xff }, + { AFDECSTP, ynone, Px, 0xd9, 0xf6 }, + { AFINCSTP, ynone, Px, 0xd9, 0xf7 }, + { AFINIT, ynone, Px, 0xdb, 0xe3 }, + { AFLD1, ynone, Px, 0xd9, 0xe8 }, + { AFLDL2E, ynone, Px, 0xd9, 0xea }, + { AFLDL2T, ynone, Px, 0xd9, 0xe9 }, + { AFLDLG2, ynone, Px, 0xd9, 0xec }, + { AFLDLN2, ynone, Px, 0xd9, 0xed }, + { AFLDPI, ynone, Px, 0xd9, 0xeb }, + { AFLDZ, ynone, Px, 0xd9, 0xee }, + { AFNOP, ynone, Px, 0xd9, 0xd0 }, + { AFPATAN, ynone, Px, 0xd9, 0xf3 }, + { AFPREM, ynone, Px, 0xd9, 0xf8 }, + { AFPREM1, ynone, Px, 0xd9, 0xf5 }, + { AFPTAN, ynone, Px, 0xd9, 0xf2 }, + { AFRNDINT, ynone, Px, 0xd9, 0xfc }, + { AFSCALE, ynone, Px, 0xd9, 0xfd }, + { AFSIN, ynone, Px, 0xd9, 0xfe }, + { AFSINCOS, ynone, Px, 0xd9, 0xfb }, + { AFSQRT, ynone, Px, 0xd9, 0xfa }, + { AFTST, ynone, Px, 0xd9, 0xe4 }, + { AFXAM, ynone, Px, 0xd9, 0xe5 }, + { AFXTRACT, ynone, Px, 0xd9, 0xf4 }, + { AFYL2X, ynone, Px, 0xd9, 0xf1 }, + { AFYL2XP1, ynone, Px, 0xd9, 0xf9 }, + + { ACMPXCHGB, yrb_mb, Pb, 0x0f,0xb0 }, + { ACMPXCHGL, yrl_ml, Px, 0x0f,0xb1 }, + { ACMPXCHGW, yrl_ml, Pe, 0x0f,0xb1 }, + { ACMPXCHGQ, yrl_ml, Pw, 0x0f,0xb1 }, + { ACMPXCHG8B, yscond, Pm, 0xc7,(01) }, + { AINVD, ynone, Pm, 0x08 }, + { AINVLPG, ymbs, Pm, 0x01,(07) }, + { ALFENCE, ynone, Pm, 0xae,0xe8 }, + { AMFENCE, ynone, Pm, 0xae,0xf0 }, + { AMOVNTIL, yrl_ml, Pm, 0xc3 }, + { AMOVNTIQ, yrl_ml, Pw, 0x0f,0xc3 }, + { ARDMSR, ynone, Pm, 0x32 }, + { ARDPMC, ynone, Pm, 0x33 }, + { ARDTSC, ynone, Pm, 0x31 }, + { ARSM, ynone, Pm, 0xaa }, + { ASFENCE, ynone, Pm, 0xae,0xf8 }, + { ASYSRET, ynone, Pm, 0x07 }, + { AWBINVD, ynone, Pm, 0x09 }, + { AWRMSR, ynone, Pm, 0x30 }, + + { AXADDB, yrb_mb, Pb, 0x0f,0xc0 }, + { AXADDL, yrl_ml, Px, 0x0f,0xc1 }, + { AXADDQ, yrl_ml, Pw, 0x0f,0xc1 }, + { AXADDW, yrl_ml, Pe, 0x0f,0xc1 }, + + { ACRC32B, ycrc32l,Px, 0xf2,0x0f,0x38,0xf0,0 }, + { ACRC32Q, ycrc32l,Pw, 0xf2,0x0f,0x38,0xf1,0 }, + + { APREFETCHT0, yprefetch, Pm, 0x18,(01) }, + { APREFETCHT1, yprefetch, Pm, 0x18,(02) }, + { APREFETCHT2, yprefetch, Pm, 0x18,(03) }, + { APREFETCHNTA, yprefetch, Pm, 0x18,(00) }, + + { AMOVQL, yrl_ml, Px, 0x89 }, + + { AUNDEF, ynone, Px, 0x0f, 0x0b }, + + { AAESENC, yaes, Pq, 0x38,0xdc,(0) }, + { AAESENCLAST, yaes, Pq, 0x38,0xdd,(0) }, + { AAESDEC, yaes, Pq, 0x38,0xde,(0) }, + { AAESDECLAST, yaes, Pq, 0x38,0xdf,(0) }, + { AAESIMC, yaes, Pq, 0x38,0xdb,(0) }, + { AAESKEYGENASSIST, yaes2, Pq, 0x3a,0xdf,(0) }, + + { APSHUFD, yaes2, Pq, 0x70,(0) }, + { APCLMULQDQ, yxshuf, Pq, 0x3a,0x44,0 }, + + { AUSEFIELD, ynop, Px, 0,0 }, + { ATYPE }, + { AFUNCDATA, yfuncdata, Px, 0,0 }, + { APCDATA, ypcdata, Px, 0,0 }, + { ACHECKNIL }, + { AVARDEF }, + { AVARKILL }, + { ADUFFCOPY, yduff, Px, 0xe8 }, + { ADUFFZERO, yduff, Px, 0xe8 }, + + { AEND }, + 0 +}; + +static Optab* opindex[ALAST+1]; +static vlong vaddr(Link*, Addr*, Reloc*); + +// single-instruction no-ops of various lengths. +// constructed by hand and disassembled with gdb to verify. +// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. +static uchar nop[][16] = { + {0x90}, + {0x66, 0x90}, + {0x0F, 0x1F, 0x00}, + {0x0F, 0x1F, 0x40, 0x00}, + {0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // Native Client rejects the repeated 0x66 prefix. + // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, +}; + +static void +fillnop(uchar *p, int n) +{ + int m; + + while(n > 0) { + m = n; + if(m > nelem(nop)) + m = nelem(nop); + memmove(p, nop[m-1], m); + p += m; + n -= m; + } +} + +static void instinit(void); + +static int32 +naclpad(Link *ctxt, LSym *s, int32 c, int32 pad) +{ + symgrow(ctxt, s, c+pad); + fillnop(s->p+c, pad); + return c+pad; +} + +static int +spadjop(Link *ctxt, Prog *p, int l, int q) +{ + if(p->mode != 64 || ctxt->arch->ptrsize == 4) + return l; + return q; +} + +void +span6(Link *ctxt, LSym *s) +{ + Prog *p, *q; + int32 c, v, loop; + uchar *bp; + int n, m, i; + + ctxt->cursym = s; + + if(s->p != nil) + return; + + if(ycover[0] == 0) + instinit(); + + for(p = ctxt->cursym->text; p != nil; p = p->link) { + n = 0; + if(p->to.type == D_BRANCH) + if(p->pcond == nil) + p->pcond = p; + if((q = p->pcond) != nil) + if(q->back != 2) + n = 1; + p->back = n; + if(p->as == AADJSP) { + p->to.type = D_SP; + v = -p->from.offset; + p->from.offset = v; + p->as = spadjop(ctxt, p, AADDL, AADDQ); + if(v < 0) { + p->as = spadjop(ctxt, p, ASUBL, ASUBQ); + v = -v; + p->from.offset = v; + } + if(v == 0) + p->as = ANOP; + } + } + + for(p = s->text; p != nil; p = p->link) { + p->back = 2; // use short branches first time through + if((q = p->pcond) != nil && (q->back & 2)) { + p->back |= 1; // backward jump + q->back |= 4; // loop head + } + + if(p->as == AADJSP) { + p->to.type = D_SP; + v = -p->from.offset; + p->from.offset = v; + p->as = spadjop(ctxt, p, AADDL, AADDQ); + if(v < 0) { + p->as = spadjop(ctxt, p, ASUBL, ASUBQ); + v = -v; + p->from.offset = v; + } + if(v == 0) + p->as = ANOP; + } + } + + n = 0; + do { + loop = 0; + memset(s->r, 0, s->nr*sizeof s->r[0]); + s->nr = 0; + s->np = 0; + c = 0; + for(p = s->text; p != nil; p = p->link) { + if(ctxt->headtype == Hnacl && p->isize > 0) { + static LSym *deferreturn; + + if(deferreturn == nil) + deferreturn = linklookup(ctxt, "runtime.deferreturn", 0); + + // pad everything to avoid crossing 32-byte boundary + if((c>>5) != ((c+p->isize-1)>>5)) + c = naclpad(ctxt, s, c, -c&31); + // pad call deferreturn to start at 32-byte boundary + // so that subtracting 5 in jmpdefer will jump back + // to that boundary and rerun the call. + if(p->as == ACALL && p->to.sym == deferreturn) + c = naclpad(ctxt, s, c, -c&31); + // pad call to end at 32-byte boundary + if(p->as == ACALL) + c = naclpad(ctxt, s, c, -(c+p->isize)&31); + + // the linker treats REP and STOSQ as different instructions + // but in fact the REP is a prefix on the STOSQ. + // make sure REP has room for 2 more bytes, so that + // padding will not be inserted before the next instruction. + if((p->as == AREP || p->as == AREPN) && (c>>5) != ((c+3-1)>>5)) + c = naclpad(ctxt, s, c, -c&31); + + // same for LOCK. + // various instructions follow; the longest is 4 bytes. + // give ourselves 8 bytes so as to avoid surprises. + if(p->as == ALOCK && (c>>5) != ((c+8-1)>>5)) + c = naclpad(ctxt, s, c, -c&31); + } + + if((p->back & 4) && (c&(LoopAlign-1)) != 0) { + // pad with NOPs + v = -c&(LoopAlign-1); + if(v <= MaxLoopPad) { + symgrow(ctxt, s, c+v); + fillnop(s->p+c, v); + c += v; + } + } + + p->pc = c; + + // process forward jumps to p + for(q = p->comefrom; q != nil; q = q->forwd) { + v = p->pc - (q->pc + q->mark); + if(q->back & 2) { // short + if(v > 127) { + loop++; + q->back ^= 2; + } + if(q->as == AJCXZL) + s->p[q->pc+2] = v; + else + s->p[q->pc+1] = v; + } else { + bp = s->p + q->pc + q->mark - 4; + *bp++ = v; + *bp++ = v>>8; + *bp++ = v>>16; + *bp = v>>24; + } + } + p->comefrom = nil; + + p->pc = c; + asmins(ctxt, p); + m = ctxt->andptr-ctxt->and; + if(p->isize != m) { + p->isize = m; + loop++; + } + symgrow(ctxt, s, p->pc+m); + memmove(s->p+p->pc, ctxt->and, m); + p->mark = m; + c += m; + } + if(++n > 20) { + ctxt->diag("span must be looping"); + sysfatal("loop"); + } + } while(loop); + + if(ctxt->headtype == Hnacl) + c = naclpad(ctxt, s, c, -c&31); + + c += -c&(FuncAlign-1); + s->size = c; + + if(0 /* debug['a'] > 1 */) { + print("span1 %s %lld (%d tries)\n %.6ux", s->name, s->size, n, 0); + for(i=0; i<s->np; i++) { + print(" %.2ux", s->p[i]); + if(i%16 == 15) + print("\n %.6ux", i+1); + } + if(i%16) + print("\n"); + + for(i=0; i<s->nr; i++) { + Reloc *r; + + r = &s->r[i]; + print(" rel %#.4ux/%d %s%+lld\n", r->off, r->siz, r->sym->name, r->add); + } + } +} + +static void +instinit(void) +{ + int c, i; + + for(i=1; optab[i].as; i++) { + c = optab[i].as; + if(opindex[c] != nil) + sysfatal("phase error in optab: %d (%A)", i, c); + opindex[c] = &optab[i]; + } + + for(i=0; i<Ymax; i++) + ycover[i*Ymax + i] = 1; + + ycover[Yi0*Ymax + Yi8] = 1; + ycover[Yi1*Ymax + Yi8] = 1; + + ycover[Yi0*Ymax + Ys32] = 1; + ycover[Yi1*Ymax + Ys32] = 1; + ycover[Yi8*Ymax + Ys32] = 1; + + ycover[Yi0*Ymax + Yi32] = 1; + ycover[Yi1*Ymax + Yi32] = 1; + ycover[Yi8*Ymax + Yi32] = 1; + ycover[Ys32*Ymax + Yi32] = 1; + + ycover[Yi0*Ymax + Yi64] = 1; + ycover[Yi1*Ymax + Yi64] = 1; + ycover[Yi8*Ymax + Yi64] = 1; + ycover[Ys32*Ymax + Yi64] = 1; + ycover[Yi32*Ymax + Yi64] = 1; + + ycover[Yal*Ymax + Yrb] = 1; + ycover[Ycl*Ymax + Yrb] = 1; + ycover[Yax*Ymax + Yrb] = 1; + ycover[Ycx*Ymax + Yrb] = 1; + ycover[Yrx*Ymax + Yrb] = 1; + ycover[Yrl*Ymax + Yrb] = 1; + + ycover[Ycl*Ymax + Ycx] = 1; + + ycover[Yax*Ymax + Yrx] = 1; + ycover[Ycx*Ymax + Yrx] = 1; + + ycover[Yax*Ymax + Yrl] = 1; + ycover[Ycx*Ymax + Yrl] = 1; + ycover[Yrx*Ymax + Yrl] = 1; + + ycover[Yf0*Ymax + Yrf] = 1; + + ycover[Yal*Ymax + Ymb] = 1; + ycover[Ycl*Ymax + Ymb] = 1; + ycover[Yax*Ymax + Ymb] = 1; + ycover[Ycx*Ymax + Ymb] = 1; + ycover[Yrx*Ymax + Ymb] = 1; + ycover[Yrb*Ymax + Ymb] = 1; + ycover[Yrl*Ymax + Ymb] = 1; + ycover[Ym*Ymax + Ymb] = 1; + + ycover[Yax*Ymax + Yml] = 1; + ycover[Ycx*Ymax + Yml] = 1; + ycover[Yrx*Ymax + Yml] = 1; + ycover[Yrl*Ymax + Yml] = 1; + ycover[Ym*Ymax + Yml] = 1; + + ycover[Yax*Ymax + Ymm] = 1; + ycover[Ycx*Ymax + Ymm] = 1; + ycover[Yrx*Ymax + Ymm] = 1; + ycover[Yrl*Ymax + Ymm] = 1; + ycover[Ym*Ymax + Ymm] = 1; + ycover[Ymr*Ymax + Ymm] = 1; + + ycover[Ym*Ymax + Yxm] = 1; + ycover[Yxr*Ymax + Yxm] = 1; + + for(i=0; i<D_NONE; i++) { + reg[i] = -1; + if(i >= D_AL && i <= D_R15B) { + reg[i] = (i-D_AL) & 7; + if(i >= D_SPB && i <= D_DIB) + regrex[i] = 0x40; + if(i >= D_R8B && i <= D_R15B) + regrex[i] = Rxr | Rxx | Rxb; + } + if(i >= D_AH && i<= D_BH) + reg[i] = 4 + ((i-D_AH) & 7); + if(i >= D_AX && i <= D_R15) { + reg[i] = (i-D_AX) & 7; + if(i >= D_R8) + regrex[i] = Rxr | Rxx | Rxb; + } + if(i >= D_F0 && i <= D_F0+7) + reg[i] = (i-D_F0) & 7; + if(i >= D_M0 && i <= D_M0+7) + reg[i] = (i-D_M0) & 7; + if(i >= D_X0 && i <= D_X0+15) { + reg[i] = (i-D_X0) & 7; + if(i >= D_X0+8) + regrex[i] = Rxr | Rxx | Rxb; + } + if(i >= D_CR+8 && i <= D_CR+15) + regrex[i] = Rxr; + } +} + +static int +prefixof(Link *ctxt, Addr *a) +{ + switch(a->type) { + case D_INDIR+D_CS: + return 0x2e; + case D_INDIR+D_DS: + return 0x3e; + case D_INDIR+D_ES: + return 0x26; + case D_INDIR+D_FS: + return 0x64; + case D_INDIR+D_GS: + return 0x65; + case D_INDIR+D_TLS: + // NOTE: Systems listed here should be only systems that + // support direct TLS references like 8(TLS) implemented as + // direct references from FS or GS. Systems that require + // the initial-exec model, where you load the TLS base into + // a register and then index from that register, do not reach + // this code and should not be listed. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype)); + case Hdragonfly: + case Hfreebsd: + case Hlinux: + case Hnetbsd: + case Hopenbsd: + case Hplan9: + case Hsolaris: + return 0x64; // FS + case Hdarwin: + return 0x65; // GS + } + } + switch(a->index) { + case D_CS: + return 0x2e; + case D_DS: + return 0x3e; + case D_ES: + return 0x26; + case D_FS: + return 0x64; + case D_GS: + return 0x65; + } + return 0; +} + +static int +oclass(Link *ctxt, Addr *a) +{ + vlong v; + int32 l; + + if(a->type >= D_INDIR || a->index != D_NONE) { + if(a->index != D_NONE && a->scale == 0) { + if(a->type == D_ADDR) { + switch(a->index) { + case D_EXTERN: + case D_STATIC: + if(ctxt->flag_shared || ctxt->headtype == Hnacl) + return Yiauto; + else + return Yi32; /* TO DO: Yi64 */ + case D_AUTO: + case D_PARAM: + return Yiauto; + } + return Yxxx; + } + return Ycol; + } + return Ym; + } + switch(a->type) + { + case D_AL: + return Yal; + + case D_AX: + return Yax; + +/* + case D_SPB: +*/ + case D_BPB: + case D_SIB: + case D_DIB: + case D_R8B: + case D_R9B: + case D_R10B: + case D_R11B: + case D_R12B: + case D_R13B: + case D_R14B: + case D_R15B: + if(ctxt->asmode != 64) + return Yxxx; + case D_DL: + case D_BL: + case D_AH: + case D_CH: + case D_DH: + case D_BH: + return Yrb; + + case D_CL: + return Ycl; + + case D_CX: + return Ycx; + + case D_DX: + case D_BX: + return Yrx; + + case D_R8: /* not really Yrl */ + case D_R9: + case D_R10: + case D_R11: + case D_R12: + case D_R13: + case D_R14: + case D_R15: + if(ctxt->asmode != 64) + return Yxxx; + case D_SP: + case D_BP: + case D_SI: + case D_DI: + return Yrl; + + case D_F0+0: + return Yf0; + + case D_F0+1: + case D_F0+2: + case D_F0+3: + case D_F0+4: + case D_F0+5: + case D_F0+6: + case D_F0+7: + return Yrf; + + case D_M0+0: + case D_M0+1: + case D_M0+2: + case D_M0+3: + case D_M0+4: + case D_M0+5: + case D_M0+6: + case D_M0+7: + return Ymr; + + case D_X0+0: + case D_X0+1: + case D_X0+2: + case D_X0+3: + case D_X0+4: + case D_X0+5: + case D_X0+6: + case D_X0+7: + case D_X0+8: + case D_X0+9: + case D_X0+10: + case D_X0+11: + case D_X0+12: + case D_X0+13: + case D_X0+14: + case D_X0+15: + return Yxr; + + case D_NONE: + return Ynone; + + case D_CS: return Ycs; + case D_SS: return Yss; + case D_DS: return Yds; + case D_ES: return Yes; + case D_FS: return Yfs; + case D_GS: return Ygs; + case D_TLS: return Ytls; + + case D_GDTR: return Ygdtr; + case D_IDTR: return Yidtr; + case D_LDTR: return Yldtr; + case D_MSW: return Ymsw; + case D_TASK: return Ytask; + + case D_CR+0: return Ycr0; + case D_CR+1: return Ycr1; + case D_CR+2: return Ycr2; + case D_CR+3: return Ycr3; + case D_CR+4: return Ycr4; + case D_CR+5: return Ycr5; + case D_CR+6: return Ycr6; + case D_CR+7: return Ycr7; + case D_CR+8: return Ycr8; + + case D_DR+0: return Ydr0; + case D_DR+1: return Ydr1; + case D_DR+2: return Ydr2; + case D_DR+3: return Ydr3; + case D_DR+4: return Ydr4; + case D_DR+5: return Ydr5; + case D_DR+6: return Ydr6; + case D_DR+7: return Ydr7; + + case D_TR+0: return Ytr0; + case D_TR+1: return Ytr1; + case D_TR+2: return Ytr2; + case D_TR+3: return Ytr3; + case D_TR+4: return Ytr4; + case D_TR+5: return Ytr5; + case D_TR+6: return Ytr6; + case D_TR+7: return Ytr7; + + case D_EXTERN: + case D_STATIC: + case D_AUTO: + case D_PARAM: + return Ym; + + case D_CONST: + case D_ADDR: + if(a->sym == nil) { + v = a->offset; + if(v == 0) + return Yi0; + if(v == 1) + return Yi1; + if(v >= -128 && v <= 127) + return Yi8; + l = v; + if((vlong)l == v) + return Ys32; /* can sign extend */ + if((v>>32) == 0) + return Yi32; /* unsigned */ + return Yi64; + } + return Yi32; /* TO DO: D_ADDR as Yi64 */ + + case D_BRANCH: + return Ybr; + } + return Yxxx; +} + +static void +asmidx(Link *ctxt, int scale, int index, int base) +{ + int i; + + switch(index) { + default: + goto bad; + + case D_NONE: + i = 4 << 3; + goto bas; + + case D_R8: + case D_R9: + case D_R10: + case D_R11: + case D_R12: + case D_R13: + case D_R14: + case D_R15: + if(ctxt->asmode != 64) + goto bad; + case D_AX: + case D_CX: + case D_DX: + case D_BX: + case D_BP: + case D_SI: + case D_DI: + i = reg[index] << 3; + break; + } + switch(scale) { + default: + goto bad; + case 1: + break; + case 2: + i |= (1<<6); + break; + case 4: + i |= (2<<6); + break; + case 8: + i |= (3<<6); + break; + } +bas: + switch(base) { + default: + goto bad; + case D_NONE: /* must be mod=00 */ + i |= 5; + break; + case D_R8: + case D_R9: + case D_R10: + case D_R11: + case D_R12: + case D_R13: + case D_R14: + case D_R15: + if(ctxt->asmode != 64) + goto bad; + case D_AX: + case D_CX: + case D_DX: + case D_BX: + case D_SP: + case D_BP: + case D_SI: + case D_DI: + i |= reg[base]; + break; + } + *ctxt->andptr++ = i; + return; +bad: + ctxt->diag("asmidx: bad address %d/%d/%d", scale, index, base); + *ctxt->andptr++ = 0; + return; +} + +static void +put4(Link *ctxt, int32 v) +{ + ctxt->andptr[0] = v; + ctxt->andptr[1] = v>>8; + ctxt->andptr[2] = v>>16; + ctxt->andptr[3] = v>>24; + ctxt->andptr += 4; +} + +static void +relput4(Link *ctxt, Prog *p, Addr *a) +{ + vlong v; + Reloc rel, *r; + + v = vaddr(ctxt, a, &rel); + if(rel.siz != 0) { + if(rel.siz != 4) + ctxt->diag("bad reloc"); + r = addrel(ctxt->cursym); + *r = rel; + r->off = p->pc + ctxt->andptr - ctxt->and; + } + put4(ctxt, v); +} + +static void +put8(Link *ctxt, vlong v) +{ + ctxt->andptr[0] = v; + ctxt->andptr[1] = v>>8; + ctxt->andptr[2] = v>>16; + ctxt->andptr[3] = v>>24; + ctxt->andptr[4] = v>>32; + ctxt->andptr[5] = v>>40; + ctxt->andptr[6] = v>>48; + ctxt->andptr[7] = v>>56; + ctxt->andptr += 8; +} + +/* +static void +relput8(Prog *p, Addr *a) +{ + vlong v; + Reloc rel, *r; + + v = vaddr(ctxt, a, &rel); + if(rel.siz != 0) { + r = addrel(ctxt->cursym); + *r = rel; + r->siz = 8; + r->off = p->pc + ctxt->andptr - ctxt->and; + } + put8(ctxt, v); +} +*/ + +static vlong +vaddr(Link *ctxt, Addr *a, Reloc *r) +{ + int t; + vlong v; + LSym *s; + + if(r != nil) + memset(r, 0, sizeof *r); + + t = a->type; + v = a->offset; + if(t == D_ADDR) + t = a->index; + switch(t) { + case D_STATIC: + case D_EXTERN: + s = a->sym; + if(r == nil) { + ctxt->diag("need reloc for %D", a); + sysfatal("reloc"); + } + r->siz = 4; // TODO: 8 for external symbols + r->off = -1; // caller must fill in + r->sym = s; + r->add = v; + v = 0; + if(ctxt->flag_shared || ctxt->headtype == Hnacl) { + if(s->type == STLSBSS) { + r->xadd = r->add - r->siz; + r->type = R_TLS; + r->xsym = s; + } else + r->type = R_PCREL; + } else + r->type = R_ADDR; + break; + + case D_INDIR+D_TLS: + if(r == nil) { + ctxt->diag("need reloc for %D", a); + sysfatal("reloc"); + } + r->type = R_TLS_LE; + r->siz = 4; + r->off = -1; // caller must fill in + r->add = v; + v = 0; + break; + } + return v; +} + +static void +asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) +{ + int32 v; + int t, scale; + Reloc rel; + + USED(m64); + rex &= (0x40 | Rxr); + v = a->offset; + t = a->type; + rel.siz = 0; + if(a->index != D_NONE && a->index != D_TLS) { + if(t < D_INDIR) { + switch(t) { + default: + goto bad; + case D_STATIC: + case D_EXTERN: + if(ctxt->flag_shared || ctxt->headtype == Hnacl) + goto bad; + t = D_NONE; + v = vaddr(ctxt, a, &rel); + break; + case D_AUTO: + case D_PARAM: + t = D_SP; + break; + } + } else + t -= D_INDIR; + ctxt->rexflag |= (regrex[(int)a->index] & Rxx) | (regrex[t] & Rxb) | rex; + if(t == D_NONE) { + *ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3); + asmidx(ctxt, a->scale, a->index, t); + goto putrelv; + } + if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) { + *ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3); + asmidx(ctxt, a->scale, a->index, t); + return; + } + if(v >= -128 && v < 128 && rel.siz == 0) { + *ctxt->andptr++ = (1 << 6) | (4 << 0) | (r << 3); + asmidx(ctxt, a->scale, a->index, t); + *ctxt->andptr++ = v; + return; + } + *ctxt->andptr++ = (2 << 6) | (4 << 0) | (r << 3); + asmidx(ctxt, a->scale, a->index, t); + goto putrelv; + } + if(t >= D_AL && t <= D_X0+15) { + if(v) + goto bad; + *ctxt->andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3); + ctxt->rexflag |= (regrex[t] & (0x40 | Rxb)) | rex; + return; + } + + scale = a->scale; + if(t < D_INDIR) { + switch(a->type) { + default: + goto bad; + case D_STATIC: + case D_EXTERN: + t = D_NONE; + v = vaddr(ctxt, a, &rel); + break; + case D_AUTO: + case D_PARAM: + t = D_SP; + break; + } + scale = 1; + } else + t -= D_INDIR; + if(t == D_TLS) + v = vaddr(ctxt, a, &rel); + + ctxt->rexflag |= (regrex[t] & Rxb) | rex; + if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) { + if((ctxt->flag_shared || ctxt->headtype == Hnacl) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) { + *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3); + goto putrelv; + } + /* temporary */ + *ctxt->andptr++ = (0 << 6) | (4 << 0) | (r << 3); /* sib present */ + *ctxt->andptr++ = (0 << 6) | (4 << 3) | (5 << 0); /* DS:d32 */ + goto putrelv; + } + if(t == D_SP || t == D_R12) { + if(v == 0) { + *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); + asmidx(ctxt, scale, D_NONE, t); + return; + } + if(v >= -128 && v < 128) { + *ctxt->andptr++ = (1 << 6) | (reg[t] << 0) | (r << 3); + asmidx(ctxt, scale, D_NONE, t); + *ctxt->andptr++ = v; + return; + } + *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); + asmidx(ctxt, scale, D_NONE, t); + goto putrelv; + } + if(t >= D_AX && t <= D_R15) { + if(a->index == D_TLS) { + memset(&rel, 0, sizeof rel); + rel.type = R_TLS_IE; + rel.siz = 4; + rel.sym = nil; + rel.add = v; + v = 0; + } + if(v == 0 && rel.siz == 0 && t != D_BP && t != D_R13) { + *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); + return; + } + if(v >= -128 && v < 128 && rel.siz == 0) { + ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3); + ctxt->andptr[1] = v; + ctxt->andptr += 2; + return; + } + *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); + goto putrelv; + } + goto bad; + +putrelv: + if(rel.siz != 0) { + Reloc *r; + + if(rel.siz != 4) { + ctxt->diag("bad rel"); + goto bad; + } + r = addrel(ctxt->cursym); + *r = rel; + r->off = ctxt->curp->pc + ctxt->andptr - ctxt->and; + } + + put4(ctxt, v); + return; + +bad: + ctxt->diag("asmand: bad address %D", a); + return; +} + +static void +asmand(Link *ctxt, Addr *a, Addr *ra) +{ + asmandsz(ctxt, a, reg[ra->type], regrex[ra->type], 0); +} + +static void +asmando(Link *ctxt, Addr *a, int o) +{ + asmandsz(ctxt, a, o, 0, 0); +} + +static void +bytereg(Addr *a, char *t) +{ + if(a->index == D_NONE && (a->type >= D_AX && a->type <= D_R15)) { + a->type = D_AL + (a->type-D_AX); + *t = 0; + } +} + +#define E 0xff +static Movtab ymovtab[] = +{ +/* push */ + {APUSHL, Ycs, Ynone, 0, 0x0e,E,0,0}, + {APUSHL, Yss, Ynone, 0, 0x16,E,0,0}, + {APUSHL, Yds, Ynone, 0, 0x1e,E,0,0}, + {APUSHL, Yes, Ynone, 0, 0x06,E,0,0}, + {APUSHL, Yfs, Ynone, 0, 0x0f,0xa0,E,0}, + {APUSHL, Ygs, Ynone, 0, 0x0f,0xa8,E,0}, + {APUSHQ, Yfs, Ynone, 0, 0x0f,0xa0,E,0}, + {APUSHQ, Ygs, Ynone, 0, 0x0f,0xa8,E,0}, + + {APUSHW, Ycs, Ynone, 0, Pe,0x0e,E,0}, + {APUSHW, Yss, Ynone, 0, Pe,0x16,E,0}, + {APUSHW, Yds, Ynone, 0, Pe,0x1e,E,0}, + {APUSHW, Yes, Ynone, 0, Pe,0x06,E,0}, + {APUSHW, Yfs, Ynone, 0, Pe,0x0f,0xa0,E}, + {APUSHW, Ygs, Ynone, 0, Pe,0x0f,0xa8,E}, + +/* pop */ + {APOPL, Ynone, Yds, 0, 0x1f,E,0,0}, + {APOPL, Ynone, Yes, 0, 0x07,E,0,0}, + {APOPL, Ynone, Yss, 0, 0x17,E,0,0}, + {APOPL, Ynone, Yfs, 0, 0x0f,0xa1,E,0}, + {APOPL, Ynone, Ygs, 0, 0x0f,0xa9,E,0}, + {APOPQ, Ynone, Yfs, 0, 0x0f,0xa1,E,0}, + {APOPQ, Ynone, Ygs, 0, 0x0f,0xa9,E,0}, + + {APOPW, Ynone, Yds, 0, Pe,0x1f,E,0}, + {APOPW, Ynone, Yes, 0, Pe,0x07,E,0}, + {APOPW, Ynone, Yss, 0, Pe,0x17,E,0}, + {APOPW, Ynone, Yfs, 0, Pe,0x0f,0xa1,E}, + {APOPW, Ynone, Ygs, 0, Pe,0x0f,0xa9,E}, + +/* mov seg */ + {AMOVW, Yes, Yml, 1, 0x8c,0,0,0}, + {AMOVW, Ycs, Yml, 1, 0x8c,1,0,0}, + {AMOVW, Yss, Yml, 1, 0x8c,2,0,0}, + {AMOVW, Yds, Yml, 1, 0x8c,3,0,0}, + {AMOVW, Yfs, Yml, 1, 0x8c,4,0,0}, + {AMOVW, Ygs, Yml, 1, 0x8c,5,0,0}, + + {AMOVW, Yml, Yes, 2, 0x8e,0,0,0}, + {AMOVW, Yml, Ycs, 2, 0x8e,1,0,0}, + {AMOVW, Yml, Yss, 2, 0x8e,2,0,0}, + {AMOVW, Yml, Yds, 2, 0x8e,3,0,0}, + {AMOVW, Yml, Yfs, 2, 0x8e,4,0,0}, + {AMOVW, Yml, Ygs, 2, 0x8e,5,0,0}, + +/* mov cr */ + {AMOVL, Ycr0, Yml, 3, 0x0f,0x20,0,0}, + {AMOVL, Ycr2, Yml, 3, 0x0f,0x20,2,0}, + {AMOVL, Ycr3, Yml, 3, 0x0f,0x20,3,0}, + {AMOVL, Ycr4, Yml, 3, 0x0f,0x20,4,0}, + {AMOVL, Ycr8, Yml, 3, 0x0f,0x20,8,0}, + {AMOVQ, Ycr0, Yml, 3, 0x0f,0x20,0,0}, + {AMOVQ, Ycr2, Yml, 3, 0x0f,0x20,2,0}, + {AMOVQ, Ycr3, Yml, 3, 0x0f,0x20,3,0}, + {AMOVQ, Ycr4, Yml, 3, 0x0f,0x20,4,0}, + {AMOVQ, Ycr8, Yml, 3, 0x0f,0x20,8,0}, + + {AMOVL, Yml, Ycr0, 4, 0x0f,0x22,0,0}, + {AMOVL, Yml, Ycr2, 4, 0x0f,0x22,2,0}, + {AMOVL, Yml, Ycr3, 4, 0x0f,0x22,3,0}, + {AMOVL, Yml, Ycr4, 4, 0x0f,0x22,4,0}, + {AMOVL, Yml, Ycr8, 4, 0x0f,0x22,8,0}, + {AMOVQ, Yml, Ycr0, 4, 0x0f,0x22,0,0}, + {AMOVQ, Yml, Ycr2, 4, 0x0f,0x22,2,0}, + {AMOVQ, Yml, Ycr3, 4, 0x0f,0x22,3,0}, + {AMOVQ, Yml, Ycr4, 4, 0x0f,0x22,4,0}, + {AMOVQ, Yml, Ycr8, 4, 0x0f,0x22,8,0}, + +/* mov dr */ + {AMOVL, Ydr0, Yml, 3, 0x0f,0x21,0,0}, + {AMOVL, Ydr6, Yml, 3, 0x0f,0x21,6,0}, + {AMOVL, Ydr7, Yml, 3, 0x0f,0x21,7,0}, + {AMOVQ, Ydr0, Yml, 3, 0x0f,0x21,0,0}, + {AMOVQ, Ydr6, Yml, 3, 0x0f,0x21,6,0}, + {AMOVQ, Ydr7, Yml, 3, 0x0f,0x21,7,0}, + + {AMOVL, Yml, Ydr0, 4, 0x0f,0x23,0,0}, + {AMOVL, Yml, Ydr6, 4, 0x0f,0x23,6,0}, + {AMOVL, Yml, Ydr7, 4, 0x0f,0x23,7,0}, + {AMOVQ, Yml, Ydr0, 4, 0x0f,0x23,0,0}, + {AMOVQ, Yml, Ydr6, 4, 0x0f,0x23,6,0}, + {AMOVQ, Yml, Ydr7, 4, 0x0f,0x23,7,0}, + +/* mov tr */ + {AMOVL, Ytr6, Yml, 3, 0x0f,0x24,6,0}, + {AMOVL, Ytr7, Yml, 3, 0x0f,0x24,7,0}, + + {AMOVL, Yml, Ytr6, 4, 0x0f,0x26,6,E}, + {AMOVL, Yml, Ytr7, 4, 0x0f,0x26,7,E}, + +/* lgdt, sgdt, lidt, sidt */ + {AMOVL, Ym, Ygdtr, 4, 0x0f,0x01,2,0}, + {AMOVL, Ygdtr, Ym, 3, 0x0f,0x01,0,0}, + {AMOVL, Ym, Yidtr, 4, 0x0f,0x01,3,0}, + {AMOVL, Yidtr, Ym, 3, 0x0f,0x01,1,0}, + {AMOVQ, Ym, Ygdtr, 4, 0x0f,0x01,2,0}, + {AMOVQ, Ygdtr, Ym, 3, 0x0f,0x01,0,0}, + {AMOVQ, Ym, Yidtr, 4, 0x0f,0x01,3,0}, + {AMOVQ, Yidtr, Ym, 3, 0x0f,0x01,1,0}, + +/* lldt, sldt */ + {AMOVW, Yml, Yldtr, 4, 0x0f,0x00,2,0}, + {AMOVW, Yldtr, Yml, 3, 0x0f,0x00,0,0}, + +/* lmsw, smsw */ + {AMOVW, Yml, Ymsw, 4, 0x0f,0x01,6,0}, + {AMOVW, Ymsw, Yml, 3, 0x0f,0x01,4,0}, + +/* ltr, str */ + {AMOVW, Yml, Ytask, 4, 0x0f,0x00,3,0}, + {AMOVW, Ytask, Yml, 3, 0x0f,0x00,1,0}, + +/* load full pointer */ + {AMOVL, Yml, Ycol, 5, 0,0,0,0}, + {AMOVW, Yml, Ycol, 5, Pe,0,0,0}, + +/* double shift */ + {ASHLL, Ycol, Yml, 6, 0xa4,0xa5,0,0}, + {ASHRL, Ycol, Yml, 6, 0xac,0xad,0,0}, + {ASHLQ, Ycol, Yml, 6, Pw,0xa4,0xa5,0}, + {ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0}, + {ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0}, + {ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0}, + +/* load TLS base */ + {AMOVQ, Ytls, Yrl, 7, 0,0,0,0}, + + 0 +}; + +static int +isax(Addr *a) +{ + + switch(a->type) { + case D_AX: + case D_AL: + case D_AH: + case D_INDIR+D_AX: + return 1; + } + if(a->index == D_AX) + return 1; + return 0; +} + +static void +subreg(Prog *p, int from, int to) +{ + + if(0 /*debug['Q']*/) + print("\n%P s/%R/%R/\n", p, from, to); + + if(p->from.type == from) + p->from.type = to; + if(p->to.type == from) + p->to.type = to; + + if(p->from.index == from) + p->from.index = to; + if(p->to.index == from) + p->to.index = to; + + from += D_INDIR; + if(p->from.type == from) + p->from.type = to+D_INDIR; + if(p->to.type == from) + p->to.type = to+D_INDIR; + + if(0 /*debug['Q']*/) + print("%P\n", p); +} + +static int +mediaop(Link *ctxt, Optab *o, int op, int osize, int z) +{ + switch(op){ + case Pm: + case Pe: + case Pf2: + case Pf3: + if(osize != 1){ + if(op != Pm) + *ctxt->andptr++ = op; + *ctxt->andptr++ = Pm; + op = o->op[++z]; + break; + } + default: + if(ctxt->andptr == ctxt->and || ctxt->andptr[-1] != Pm) + *ctxt->andptr++ = Pm; + break; + } + *ctxt->andptr++ = op; + return z; +} + +static void +doasm(Link *ctxt, Prog *p) +{ + Optab *o; + Prog *q, pp; + uchar *t; + Movtab *mo; + int z, op, ft, tt, xo, l, pre; + vlong v; + Reloc rel, *r; + Addr *a; + + ctxt->curp = p; // TODO + + o = opindex[p->as]; + if(o == nil) { + ctxt->diag("asmins: missing op %P", p); + return; + } + + pre = prefixof(ctxt, &p->from); + if(pre) + *ctxt->andptr++ = pre; + pre = prefixof(ctxt, &p->to); + if(pre) + *ctxt->andptr++ = pre; + + if(p->ft == 0) + p->ft = oclass(ctxt, &p->from); + if(p->tt == 0) + p->tt = oclass(ctxt, &p->to); + + ft = p->ft * Ymax; + tt = p->tt * Ymax; + + t = o->ytab; + if(t == 0) { + ctxt->diag("asmins: noproto %P", p); + return; + } + xo = o->op[0] == 0x0f; + for(z=0; *t; z+=t[3]+xo,t+=4) + if(ycover[ft+t[0]]) + if(ycover[tt+t[1]]) + goto found; + goto domov; + +found: + switch(o->prefix) { + case Pq: /* 16 bit escape and opcode escape */ + *ctxt->andptr++ = Pe; + *ctxt->andptr++ = Pm; + break; + case Pq3: /* 16 bit escape, Rex.w, and opcode escape */ + *ctxt->andptr++ = Pe; + *ctxt->andptr++ = Pw; + *ctxt->andptr++ = Pm; + break; + + case Pf2: /* xmm opcode escape */ + case Pf3: + *ctxt->andptr++ = o->prefix; + *ctxt->andptr++ = Pm; + break; + + case Pm: /* opcode escape */ + *ctxt->andptr++ = Pm; + break; + + case Pe: /* 16 bit escape */ + *ctxt->andptr++ = Pe; + break; + + case Pw: /* 64-bit escape */ + if(p->mode != 64) + ctxt->diag("asmins: illegal 64: %P", p); + ctxt->rexflag |= Pw; + break; + + case Pb: /* botch */ + bytereg(&p->from, &p->ft); + bytereg(&p->to, &p->tt); + break; + + case P32: /* 32 bit but illegal if 64-bit mode */ + if(p->mode == 64) + ctxt->diag("asmins: illegal in 64-bit mode: %P", p); + break; + + case Py: /* 64-bit only, no prefix */ + if(p->mode != 64) + ctxt->diag("asmins: illegal in %d-bit mode: %P", p->mode, p); + break; + } + + if(z >= nelem(o->op)) + sysfatal("asmins bad table %P", p); + op = o->op[z]; + if(op == 0x0f) { + *ctxt->andptr++ = op; + op = o->op[++z]; + } + switch(t[2]) { + default: + ctxt->diag("asmins: unknown z %d %P", t[2], p); + return; + + case Zpseudo: + break; + + case Zlit: + for(; op = o->op[z]; z++) + *ctxt->andptr++ = op; + break; + + case Zlitm_r: + for(; op = o->op[z]; z++) + *ctxt->andptr++ = op; + asmand(ctxt, &p->from, &p->to); + break; + + case Zmb_r: + bytereg(&p->from, &p->ft); + /* fall through */ + case Zm_r: + *ctxt->andptr++ = op; + asmand(ctxt, &p->from, &p->to); + break; + case Zm2_r: + *ctxt->andptr++ = op; + *ctxt->andptr++ = o->op[z+1]; + asmand(ctxt, &p->from, &p->to); + break; + + case Zm_r_xm: + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->from, &p->to); + break; + + case Zm_r_xm_nr: + ctxt->rexflag = 0; + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->from, &p->to); + break; + + case Zm_r_i_xm: + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->from, &p->to); + *ctxt->andptr++ = p->to.offset; + break; + + case Zm_r_3d: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = 0x0f; + asmand(ctxt, &p->from, &p->to); + *ctxt->andptr++ = op; + break; + + case Zibm_r: + while ((op = o->op[z++]) != 0) + *ctxt->andptr++ = op; + asmand(ctxt, &p->from, &p->to); + *ctxt->andptr++ = p->to.offset; + break; + + case Zaut_r: + *ctxt->andptr++ = 0x8d; /* leal */ + if(p->from.type != D_ADDR) + ctxt->diag("asmins: Zaut sb type ADDR"); + p->from.type = p->from.index; + p->from.index = D_NONE; + asmand(ctxt, &p->from, &p->to); + p->from.index = p->from.type; + p->from.type = D_ADDR; + break; + + case Zm_o: + *ctxt->andptr++ = op; + asmando(ctxt, &p->from, o->op[z+1]); + break; + + case Zr_m: + *ctxt->andptr++ = op; + asmand(ctxt, &p->to, &p->from); + break; + + case Zr_m_xm: + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->to, &p->from); + break; + + case Zr_m_xm_nr: + ctxt->rexflag = 0; + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->to, &p->from); + break; + + case Zr_m_i_xm: + mediaop(ctxt, o, op, t[3], z); + asmand(ctxt, &p->to, &p->from); + *ctxt->andptr++ = p->from.offset; + break; + + case Zo_m: + *ctxt->andptr++ = op; + asmando(ctxt, &p->to, o->op[z+1]); + break; + + case Zo_m64: + case_Zo_m64: + *ctxt->andptr++ = op; + asmandsz(ctxt, &p->to, o->op[z+1], 0, 1); + break; + + case Zm_ibo: + *ctxt->andptr++ = op; + asmando(ctxt, &p->from, o->op[z+1]); + *ctxt->andptr++ = vaddr(ctxt, &p->to, nil); + break; + + case Zibo_m: + *ctxt->andptr++ = op; + asmando(ctxt, &p->to, o->op[z+1]); + *ctxt->andptr++ = vaddr(ctxt, &p->from, nil); + break; + + case Zibo_m_xm: + z = mediaop(ctxt, o, op, t[3], z); + asmando(ctxt, &p->to, o->op[z+1]); + *ctxt->andptr++ = vaddr(ctxt, &p->from, nil); + break; + + case Z_ib: + case Zib_: + if(t[2] == Zib_) + a = &p->from; + else + a = &p->to; + *ctxt->andptr++ = op; + *ctxt->andptr++ = vaddr(ctxt, a, nil); + break; + + case Zib_rp: + ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40); + *ctxt->andptr++ = op + reg[p->to.type]; + *ctxt->andptr++ = vaddr(ctxt, &p->from, nil); + break; + + case Zil_rp: + ctxt->rexflag |= regrex[p->to.type] & Rxb; + *ctxt->andptr++ = op + reg[p->to.type]; + if(o->prefix == Pe) { + v = vaddr(ctxt, &p->from, nil); + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + } + else + relput4(ctxt, p, &p->from); + break; + + case Zo_iw: + *ctxt->andptr++ = op; + if(p->from.type != D_NONE){ + v = vaddr(ctxt, &p->from, nil); + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + } + break; + + case Ziq_rp: + v = vaddr(ctxt, &p->from, &rel); + l = v>>32; + if(l == 0 && rel.siz != 8){ + //p->mark |= 0100; + //print("zero: %llux %P\n", v, p); + ctxt->rexflag &= ~(0x40|Rxw); + ctxt->rexflag |= regrex[p->to.type] & Rxb; + *ctxt->andptr++ = 0xb8 + reg[p->to.type]; + if(rel.type != 0) { + r = addrel(ctxt->cursym); + *r = rel; + r->off = p->pc + ctxt->andptr - ctxt->and; + } + put4(ctxt, v); + }else if(l == -1 && (v&((uvlong)1<<31))!=0){ /* sign extend */ + //p->mark |= 0100; + //print("sign: %llux %P\n", v, p); + *ctxt->andptr ++ = 0xc7; + asmando(ctxt, &p->to, 0); + put4(ctxt, v); + }else{ /* need all 8 */ + //print("all: %llux %P\n", v, p); + ctxt->rexflag |= regrex[p->to.type] & Rxb; + *ctxt->andptr++ = op + reg[p->to.type]; + if(rel.type != 0) { + r = addrel(ctxt->cursym); + *r = rel; + r->off = p->pc + ctxt->andptr - ctxt->and; + } + put8(ctxt, v); + } + break; + + case Zib_rr: + *ctxt->andptr++ = op; + asmand(ctxt, &p->to, &p->to); + *ctxt->andptr++ = vaddr(ctxt, &p->from, nil); + break; + + case Z_il: + case Zil_: + if(t[2] == Zil_) + a = &p->from; + else + a = &p->to; + *ctxt->andptr++ = op; + if(o->prefix == Pe) { + v = vaddr(ctxt, a, nil); + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + } + else + relput4(ctxt, p, a); + break; + + case Zm_ilo: + case Zilo_m: + *ctxt->andptr++ = op; + if(t[2] == Zilo_m) { + a = &p->from; + asmando(ctxt, &p->to, o->op[z+1]); + } else { + a = &p->to; + asmando(ctxt, &p->from, o->op[z+1]); + } + if(o->prefix == Pe) { + v = vaddr(ctxt, a, nil); + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + } + else + relput4(ctxt, p, a); + break; + + case Zil_rr: + *ctxt->andptr++ = op; + asmand(ctxt, &p->to, &p->to); + if(o->prefix == Pe) { + v = vaddr(ctxt, &p->from, nil); + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + } + else + relput4(ctxt, p, &p->from); + break; + + case Z_rp: + ctxt->rexflag |= regrex[p->to.type] & (Rxb|0x40); + *ctxt->andptr++ = op + reg[p->to.type]; + break; + + case Zrp_: + ctxt->rexflag |= regrex[p->from.type] & (Rxb|0x40); + *ctxt->andptr++ = op + reg[p->from.type]; + break; + + case Zclr: + *ctxt->andptr++ = op; + asmand(ctxt, &p->to, &p->to); + break; + + case Zcall: + if(p->to.sym == nil) { + ctxt->diag("call without target"); + sysfatal("bad code"); + } + *ctxt->andptr++ = op; + r = addrel(ctxt->cursym); + r->off = p->pc + ctxt->andptr - ctxt->and; + r->sym = p->to.sym; + r->add = p->to.offset; + r->type = R_CALL; + r->siz = 4; + put4(ctxt, 0); + break; + + case Zcallindreg: + r = addrel(ctxt->cursym); + r->off = p->pc; + r->type = R_CALLIND; + r->siz = 0; + goto case_Zo_m64; + + case Zbr: + case Zjmp: + case Zloop: + // TODO: jump across functions needs reloc + if(p->to.sym != nil) { + if(t[2] != Zjmp) { + ctxt->diag("branch to ATEXT"); + sysfatal("bad code"); + } + *ctxt->andptr++ = o->op[z+1]; + r = addrel(ctxt->cursym); + r->off = p->pc + ctxt->andptr - ctxt->and; + r->sym = p->to.sym; + r->type = R_PCREL; + r->siz = 4; + put4(ctxt, 0); + break; + } + // Assumes q is in this function. + // TODO: Check in input, preserve in brchain. + + // Fill in backward jump now. + q = p->pcond; + if(q == nil) { + ctxt->diag("jmp/branch/loop without target"); + sysfatal("bad code"); + } + if(p->back & 1) { + v = q->pc - (p->pc + 2); + if(v >= -128) { + if(p->as == AJCXZL) + *ctxt->andptr++ = 0x67; + *ctxt->andptr++ = op; + *ctxt->andptr++ = v; + } else if(t[2] == Zloop) { + ctxt->diag("loop too far: %P", p); + } else { + v -= 5-2; + if(t[2] == Zbr) { + *ctxt->andptr++ = 0x0f; + v--; + } + *ctxt->andptr++ = o->op[z+1]; + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + *ctxt->andptr++ = v>>16; + *ctxt->andptr++ = v>>24; + } + break; + } + + // Annotate target; will fill in later. + p->forwd = q->comefrom; + q->comefrom = p; + if(p->back & 2) { // short + if(p->as == AJCXZL) + *ctxt->andptr++ = 0x67; + *ctxt->andptr++ = op; + *ctxt->andptr++ = 0; + } else if(t[2] == Zloop) { + ctxt->diag("loop too far: %P", p); + } else { + if(t[2] == Zbr) + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = o->op[z+1]; + *ctxt->andptr++ = 0; + *ctxt->andptr++ = 0; + *ctxt->andptr++ = 0; + *ctxt->andptr++ = 0; + } + break; + +/* + v = q->pc - p->pc - 2; + if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) { + *ctxt->andptr++ = op; + *ctxt->andptr++ = v; + } else { + v -= 5-2; + if(t[2] == Zbr) { + *ctxt->andptr++ = 0x0f; + v--; + } + *ctxt->andptr++ = o->op[z+1]; + *ctxt->andptr++ = v; + *ctxt->andptr++ = v>>8; + *ctxt->andptr++ = v>>16; + *ctxt->andptr++ = v>>24; + } +*/ + break; + + case Zbyte: + v = vaddr(ctxt, &p->from, &rel); + if(rel.siz != 0) { + rel.siz = op; + r = addrel(ctxt->cursym); + *r = rel; + r->off = p->pc + ctxt->andptr - ctxt->and; + } + *ctxt->andptr++ = v; + if(op > 1) { + *ctxt->andptr++ = v>>8; + if(op > 2) { + *ctxt->andptr++ = v>>16; + *ctxt->andptr++ = v>>24; + if(op > 4) { + *ctxt->andptr++ = v>>32; + *ctxt->andptr++ = v>>40; + *ctxt->andptr++ = v>>48; + *ctxt->andptr++ = v>>56; + } + } + } + break; + } + return; + +domov: + for(mo=ymovtab; mo->as; mo++) + if(p->as == mo->as) + if(ycover[ft+mo->ft]) + if(ycover[tt+mo->tt]){ + t = mo->op; + goto mfound; + } +bad: + if(p->mode != 64){ + /* + * here, the assembly has failed. + * if its a byte instruction that has + * unaddressable registers, try to + * exchange registers and reissue the + * instruction with the operands renamed. + */ + pp = *p; + z = p->from.type; + if(z >= D_BP && z <= D_DI) { + if(isax(&p->to) || p->to.type == D_NONE) { + // We certainly don't want to exchange + // with AX if the op is MUL or DIV. + *ctxt->andptr++ = 0x87; /* xchg lhs,bx */ + asmando(ctxt, &p->from, reg[D_BX]); + subreg(&pp, z, D_BX); + doasm(ctxt, &pp); + *ctxt->andptr++ = 0x87; /* xchg lhs,bx */ + asmando(ctxt, &p->from, reg[D_BX]); + } else { + *ctxt->andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */ + subreg(&pp, z, D_AX); + doasm(ctxt, &pp); + *ctxt->andptr++ = 0x90 + reg[z]; /* xchg lsh,ax */ + } + return; + } + z = p->to.type; + if(z >= D_BP && z <= D_DI) { + if(isax(&p->from)) { + *ctxt->andptr++ = 0x87; /* xchg rhs,bx */ + asmando(ctxt, &p->to, reg[D_BX]); + subreg(&pp, z, D_BX); + doasm(ctxt, &pp); + *ctxt->andptr++ = 0x87; /* xchg rhs,bx */ + asmando(ctxt, &p->to, reg[D_BX]); + } else { + *ctxt->andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */ + subreg(&pp, z, D_AX); + doasm(ctxt, &pp); + *ctxt->andptr++ = 0x90 + reg[z]; /* xchg rsh,ax */ + } + return; + } + } + ctxt->diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p); + return; + +mfound: + switch(mo->code) { + default: + ctxt->diag("asmins: unknown mov %d %P", mo->code, p); + break; + + case 0: /* lit */ + for(z=0; t[z]!=E; z++) + *ctxt->andptr++ = t[z]; + break; + + case 1: /* r,m */ + *ctxt->andptr++ = t[0]; + asmando(ctxt, &p->to, t[1]); + break; + + case 2: /* m,r */ + *ctxt->andptr++ = t[0]; + asmando(ctxt, &p->from, t[1]); + break; + + case 3: /* r,m - 2op */ + *ctxt->andptr++ = t[0]; + *ctxt->andptr++ = t[1]; + asmando(ctxt, &p->to, t[2]); + ctxt->rexflag |= regrex[p->from.type] & (Rxr|0x40); + break; + + case 4: /* m,r - 2op */ + *ctxt->andptr++ = t[0]; + *ctxt->andptr++ = t[1]; + asmando(ctxt, &p->from, t[2]); + ctxt->rexflag |= regrex[p->to.type] & (Rxr|0x40); + break; + + case 5: /* load full pointer, trash heap */ + if(t[0]) + *ctxt->andptr++ = t[0]; + switch(p->to.index) { + default: + goto bad; + case D_DS: + *ctxt->andptr++ = 0xc5; + break; + case D_SS: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = 0xb2; + break; + case D_ES: + *ctxt->andptr++ = 0xc4; + break; + case D_FS: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = 0xb4; + break; + case D_GS: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = 0xb5; + break; + } + asmand(ctxt, &p->from, &p->to); + break; + + case 6: /* double shift */ + if(t[0] == Pw){ + if(p->mode != 64) + ctxt->diag("asmins: illegal 64: %P", p); + ctxt->rexflag |= Pw; + t++; + }else if(t[0] == Pe){ + *ctxt->andptr++ = Pe; + t++; + } + z = p->from.type; + switch(z) { + default: + goto bad; + case D_CONST: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = t[0]; + asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0); + *ctxt->andptr++ = p->from.offset; + break; + case D_CL: + case D_CX: + *ctxt->andptr++ = 0x0f; + *ctxt->andptr++ = t[1]; + asmandsz(ctxt, &p->to, reg[(int)p->from.index], regrex[(int)p->from.index], 0); + break; + } + break; + + case 7: /* mov tls, r */ + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype)); + + case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. + // TLS base is 0(FS). + pp.from = p->from; + pp.from.type = D_INDIR+D_NONE; + pp.from.offset = 0; + pp.from.index = D_NONE; + pp.from.scale = 0; + ctxt->rexflag |= Pw; + *ctxt->andptr++ = 0x64; // FS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, &p->to); + break; + + case Hwindows: + // Windows TLS base is always 0x28(GS). + pp.from = p->from; + pp.from.type = D_INDIR+D_GS; + pp.from.offset = 0x28; + pp.from.index = D_NONE; + pp.from.scale = 0; + ctxt->rexflag |= Pw; + *ctxt->andptr++ = 0x65; // GS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, &p->to); + break; + } + break; + } +} + +static uchar naclret[] = { + 0x5e, // POPL SI + // 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging + 0x83, 0xe6, 0xe0, // ANDL $~31, SI + 0x4c, 0x01, 0xfe, // ADDQ R15, SI + 0xff, 0xe6, // JMP SI +}; + +static uchar naclspfix[] = { + 0x4c, 0x01, 0xfc, // ADDQ R15, SP +}; + +static uchar naclbpfix[] = { + 0x4c, 0x01, 0xfd, // ADDQ R15, BP +}; + +static uchar naclmovs[] = { + 0x89, 0xf6, // MOVL SI, SI + 0x49, 0x8d, 0x34, 0x37, // LEAQ (R15)(SI*1), SI + 0x89, 0xff, // MOVL DI, DI + 0x49, 0x8d, 0x3c, 0x3f, // LEAQ (R15)(DI*1), DI +}; + +static uchar naclstos[] = { + 0x89, 0xff, // MOVL DI, DI + 0x49, 0x8d, 0x3c, 0x3f, // LEAQ (R15)(DI*1), DI +}; + +static void +nacltrunc(Link *ctxt, int reg) +{ + if(reg >= D_R8) + *ctxt->andptr++ = 0x45; + reg = (reg - D_AX) & 7; + *ctxt->andptr++ = 0x89; + *ctxt->andptr++ = (3<<6) | (reg<<3) | reg; +} + +static void +asmins(Link *ctxt, Prog *p) +{ + int n, np, c; + uchar *and0; + Reloc *r; + + ctxt->andptr = ctxt->and; + ctxt->asmode = p->mode; + + if(p->as == AUSEFIELD) { + r = addrel(ctxt->cursym); + r->off = 0; + r->siz = 0; + r->sym = p->from.sym; + r->type = R_USEFIELD; + return; + } + + if(ctxt->headtype == Hnacl) { + if(p->as == AREP) { + ctxt->rep++; + return; + } + if(p->as == AREPN) { + ctxt->repn++; + return; + } + if(p->as == ALOCK) { + ctxt->lock++; + return; + } + if(p->as != ALEAQ && p->as != ALEAL) { + if(p->from.index != D_NONE && p->from.scale > 0) + nacltrunc(ctxt, p->from.index); + if(p->to.index != D_NONE && p->to.scale > 0) + nacltrunc(ctxt, p->to.index); + } + switch(p->as) { + case ARET: + memmove(ctxt->andptr, naclret, sizeof naclret); + ctxt->andptr += sizeof naclret; + return; + case ACALL: + case AJMP: + if(D_AX <= p->to.type && p->to.type <= D_DI) { + // ANDL $~31, reg + *ctxt->andptr++ = 0x83; + *ctxt->andptr++ = 0xe0 | (p->to.type - D_AX); + *ctxt->andptr++ = 0xe0; + // ADDQ R15, reg + *ctxt->andptr++ = 0x4c; + *ctxt->andptr++ = 0x01; + *ctxt->andptr++ = 0xf8 | (p->to.type - D_AX); + } + if(D_R8 <= p->to.type && p->to.type <= D_R15) { + // ANDL $~31, reg + *ctxt->andptr++ = 0x41; + *ctxt->andptr++ = 0x83; + *ctxt->andptr++ = 0xe0 | (p->to.type - D_R8); + *ctxt->andptr++ = 0xe0; + // ADDQ R15, reg + *ctxt->andptr++ = 0x4d; + *ctxt->andptr++ = 0x01; + *ctxt->andptr++ = 0xf8 | (p->to.type - D_R8); + } + break; + case AINT: + *ctxt->andptr++ = 0xf4; + return; + case ASCASB: + case ASCASW: + case ASCASL: + case ASCASQ: + case ASTOSB: + case ASTOSW: + case ASTOSL: + case ASTOSQ: + memmove(ctxt->andptr, naclstos, sizeof naclstos); + ctxt->andptr += sizeof naclstos; + break; + case AMOVSB: + case AMOVSW: + case AMOVSL: + case AMOVSQ: + memmove(ctxt->andptr, naclmovs, sizeof naclmovs); + ctxt->andptr += sizeof naclmovs; + break; + } + if(ctxt->rep) { + *ctxt->andptr++ = 0xf3; + ctxt->rep = 0; + } + if(ctxt->repn) { + *ctxt->andptr++ = 0xf2; + ctxt->repn = 0; + } + if(ctxt->lock) { + *ctxt->andptr++ = 0xf0; + ctxt->lock = 0; + } + } + + ctxt->rexflag = 0; + and0 = ctxt->andptr; + ctxt->asmode = p->mode; + doasm(ctxt, p); + if(ctxt->rexflag){ + /* + * as befits the whole approach of the architecture, + * the rex prefix must appear before the first opcode byte + * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but + * before the 0f opcode escape!), or it might be ignored. + * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. + */ + if(p->mode != 64) + ctxt->diag("asmins: illegal in mode %d: %P", p->mode, p); + n = ctxt->andptr - and0; + for(np = 0; np < n; np++) { + c = and0[np]; + if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26) + break; + } + memmove(and0+np+1, and0+np, n-np); + and0[np] = 0x40 | ctxt->rexflag; + ctxt->andptr++; + } + n = ctxt->andptr - ctxt->and; + for(r=ctxt->cursym->r+ctxt->cursym->nr; r-- > ctxt->cursym->r; ) { + if(r->off < p->pc) + break; + if(ctxt->rexflag) + r->off++; + if(r->type == R_PCREL || r->type == R_CALL) + r->add -= p->pc + n - (r->off + r->siz); + } + + if(ctxt->headtype == Hnacl && p->as != ACMPL && p->as != ACMPQ) { + switch(p->to.type) { + case D_SP: + memmove(ctxt->andptr, naclspfix, sizeof naclspfix); + ctxt->andptr += sizeof naclspfix; + break; + case D_BP: + memmove(ctxt->andptr, naclbpfix, sizeof naclbpfix); + ctxt->andptr += sizeof naclbpfix; + break; + } + } +} |