summaryrefslogtreecommitdiff
path: root/src/liblink/obj6.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblink/obj6.c')
-rw-r--r--src/liblink/obj6.c1171
1 files changed, 1171 insertions, 0 deletions
diff --git a/src/liblink/obj6.c b/src/liblink/obj6.c
new file mode 100644
index 000000000..b1bcd0dc0
--- /dev/null
+++ b/src/liblink/obj6.c
@@ -0,0 +1,1171 @@
+// Inferno utils/6l/pass.c
+// http://code.google.com/p/inferno-os/source/browse/utils/6l/pass.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <link.h>
+#include "../cmd/6l/6.out.h"
+#include "../pkg/runtime/stack.h"
+
+static Prog zprg = {
+ .back = 2,
+ .as = AGOK,
+ .from = {
+ .type = D_NONE,
+ .index = D_NONE,
+ },
+ .to = {
+ .type = D_NONE,
+ .index = D_NONE,
+ },
+};
+
+static void
+nopout(Prog *p)
+{
+ p->as = ANOP;
+ p->from.type = D_NONE;
+ p->to.type = D_NONE;
+}
+
+static int
+symtype(Addr *a)
+{
+ int t;
+
+ t = a->type;
+ if(t == D_ADDR)
+ t = a->index;
+ return t;
+}
+
+static int
+isdata(Prog *p)
+{
+ return p->as == ADATA || p->as == AGLOBL;
+}
+
+static int
+iscall(Prog *p)
+{
+ return p->as == ACALL;
+}
+
+static int
+datasize(Prog *p)
+{
+ return p->from.scale;
+}
+
+static int
+textflag(Prog *p)
+{
+ return p->from.scale;
+}
+
+static void
+settextflag(Prog *p, int f)
+{
+ p->from.scale = f;
+}
+
+static void nacladdr(Link*, Prog*, Addr*);
+
+static int
+canuselocaltls(Link *ctxt)
+{
+ switch(ctxt->headtype) {
+// case Hlinux:
+ case Hwindows:
+ return 0;
+ }
+ return 1;
+}
+
+static void
+progedit(Link *ctxt, Prog *p)
+{
+ char literal[64];
+ LSym *s;
+ Prog *q;
+
+ // Thread-local storage references use the TLS pseudo-register.
+ // As a register, TLS refers to the thread-local storage base, and it
+ // can only be loaded into another register:
+ //
+ // MOVQ TLS, AX
+ //
+ // An offset from the thread-local storage base is written off(reg)(TLS*1).
+ // Semantically it is off(reg), but the (TLS*1) annotation marks this as
+ // indexing from the loaded TLS base. This emits a relocation so that
+ // if the linker needs to adjust the offset, it can. For example:
+ //
+ // MOVQ TLS, AX
+ // MOVQ 8(AX)(TLS*1), CX // load m into CX
+ //
+ // On systems that support direct access to the TLS memory, this
+ // pair of instructions can be reduced to a direct TLS memory reference:
+ //
+ // MOVQ 8(TLS), CX // load m into CX
+ //
+ // The 2-instruction and 1-instruction forms correspond roughly to
+ // ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
+ //
+ // We applies this rewrite on systems that support the 1-instruction form.
+ // The decision is made using only the operating system (and probably
+ // the -shared flag, eventually), not the link mode. If some link modes
+ // on a particular operating system require the 2-instruction form,
+ // then all builds for that operating system will use the 2-instruction
+ // form, so that the link mode decision can be delayed to link time.
+ //
+ // In this way, all supported systems use identical instructions to
+ // access TLS, and they are rewritten appropriately first here in
+ // liblink and then finally using relocations in the linker.
+
+ if(canuselocaltls(ctxt)) {
+ // Reduce TLS initial exec model to TLS local exec model.
+ // Sequences like
+ // MOVQ TLS, BX
+ // ... off(BX)(TLS*1) ...
+ // become
+ // NOP
+ // ... off(TLS) ...
+ //
+ // TODO(rsc): Remove the Hsolaris special case. It exists only to
+ // guarantee we are producing byte-identical binaries as before this code.
+ // But it should be unnecessary.
+ if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
+ nopout(p);
+ if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
+ p->from.type = D_INDIR+D_TLS;
+ p->from.scale = 0;
+ p->from.index = D_NONE;
+ }
+ if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
+ p->to.type = D_INDIR+D_TLS;
+ p->to.scale = 0;
+ p->to.index = D_NONE;
+ }
+ } else {
+ // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
+ // The instruction
+ // MOVQ off(TLS), BX
+ // becomes the sequence
+ // MOVQ TLS, BX
+ // MOVQ off(BX)(TLS*1), BX
+ // This allows the C compilers to emit references to m and g using the direct off(TLS) form.
+ if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
+ q = appendp(ctxt, p);
+ q->as = p->as;
+ q->from = p->from;
+ q->from.type = D_INDIR + p->to.type;
+ q->from.index = D_TLS;
+ q->from.scale = 2; // TODO: use 1
+ q->to = p->to;
+ p->from.type = D_TLS;
+ p->from.index = D_NONE;
+ p->from.offset = 0;
+ }
+ }
+
+ // TODO: Remove.
+ if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
+ if(p->from.scale == 1 && p->from.index == D_TLS)
+ p->from.scale = 2;
+ if(p->to.scale == 1 && p->to.index == D_TLS)
+ p->to.scale = 2;
+ }
+
+ if(ctxt->headtype == Hnacl) {
+ nacladdr(ctxt, p, &p->from);
+ nacladdr(ctxt, p, &p->to);
+ }
+
+ // Maintain information about code generation mode.
+ if(ctxt->mode == 0)
+ ctxt->mode = 64;
+ p->mode = ctxt->mode;
+
+ switch(p->as) {
+ case AMODE:
+ if(p->from.type == D_CONST || p->from.type == D_INDIR+D_NONE) {
+ switch((int)p->from.offset) {
+ case 16:
+ case 32:
+ case 64:
+ ctxt->mode = p->from.offset;
+ break;
+ }
+ }
+ nopout(p);
+ break;
+ }
+
+ // Rewrite CALL/JMP/RET to symbol as D_BRANCH.
+ switch(p->as) {
+ case ACALL:
+ case AJMP:
+ case ARET:
+ if((p->to.type == D_EXTERN || p->to.type == D_STATIC) && p->to.sym != nil)
+ p->to.type = D_BRANCH;
+ break;
+ }
+
+ // Rewrite float constants to values stored in memory.
+ switch(p->as) {
+ case AFMOVF:
+ case AFADDF:
+ case AFSUBF:
+ case AFSUBRF:
+ case AFMULF:
+ case AFDIVF:
+ case AFDIVRF:
+ case AFCOMF:
+ case AFCOMFP:
+ case AMOVSS:
+ case AADDSS:
+ case ASUBSS:
+ case AMULSS:
+ case ADIVSS:
+ case ACOMISS:
+ case AUCOMISS:
+ if(p->from.type == D_FCONST) {
+ int32 i32;
+ float32 f32;
+ f32 = p->from.u.dval;
+ memmove(&i32, &f32, 4);
+ sprint(literal, "$f32.%08ux", (uint32)i32);
+ s = linklookup(ctxt, literal, 0);
+ if(s->type == 0) {
+ s->type = SRODATA;
+ adduint32(ctxt, s, i32);
+ s->reachable = 0;
+ }
+ p->from.type = D_EXTERN;
+ p->from.sym = s;
+ p->from.offset = 0;
+ }
+ break;
+
+ case AFMOVD:
+ case AFADDD:
+ case AFSUBD:
+ case AFSUBRD:
+ case AFMULD:
+ case AFDIVD:
+ case AFDIVRD:
+ case AFCOMD:
+ case AFCOMDP:
+ case AMOVSD:
+ case AADDSD:
+ case ASUBSD:
+ case AMULSD:
+ case ADIVSD:
+ case ACOMISD:
+ case AUCOMISD:
+ if(p->from.type == D_FCONST) {
+ int64 i64;
+ memmove(&i64, &p->from.u.dval, 8);
+ sprint(literal, "$f64.%016llux", (uvlong)i64);
+ s = linklookup(ctxt, literal, 0);
+ if(s->type == 0) {
+ s->type = SRODATA;
+ adduint64(ctxt, s, i64);
+ s->reachable = 0;
+ }
+ p->from.type = D_EXTERN;
+ p->from.sym = s;
+ p->from.offset = 0;
+ }
+ break;
+ }
+}
+
+static void
+nacladdr(Link *ctxt, Prog *p, Addr *a)
+{
+ if(p->as == ALEAL || p->as == ALEAQ)
+ return;
+
+ if(a->type == D_BP || a->type == D_INDIR+D_BP) {
+ ctxt->diag("invalid address: %P", p);
+ return;
+ }
+ if(a->type == D_INDIR+D_TLS)
+ a->type = D_INDIR+D_BP;
+ else if(a->type == D_TLS)
+ a->type = D_BP;
+ if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
+ switch(a->type) {
+ case D_INDIR+D_BP:
+ case D_INDIR+D_SP:
+ case D_INDIR+D_R15:
+ // all ok
+ break;
+ default:
+ if(a->index != D_NONE)
+ ctxt->diag("invalid address %P", p);
+ a->index = a->type - D_INDIR;
+ if(a->index != D_NONE)
+ a->scale = 1;
+ a->type = D_INDIR+D_R15;
+ break;
+ }
+ }
+}
+
+static char*
+morename[] =
+{
+ "runtime.morestack00",
+ "runtime.morestack00_noctxt",
+ "runtime.morestack10",
+ "runtime.morestack10_noctxt",
+ "runtime.morestack01",
+ "runtime.morestack01_noctxt",
+ "runtime.morestack11",
+ "runtime.morestack11_noctxt",
+
+ "runtime.morestack8",
+ "runtime.morestack8_noctxt",
+ "runtime.morestack16",
+ "runtime.morestack16_noctxt",
+ "runtime.morestack24",
+ "runtime.morestack24_noctxt",
+ "runtime.morestack32",
+ "runtime.morestack32_noctxt",
+ "runtime.morestack40",
+ "runtime.morestack40_noctxt",
+ "runtime.morestack48",
+ "runtime.morestack48_noctxt",
+};
+
+static Prog* load_g_cx(Link*, Prog*);
+static Prog* stacksplit(Link*, Prog*, int32, int32, int, Prog**);
+static void indir_cx(Link*, Addr*);
+
+static void
+parsetextconst(vlong arg, vlong *textstksiz, vlong *textarg)
+{
+ *textstksiz = arg & 0xffffffffLL;
+ if(*textstksiz & 0x80000000LL)
+ *textstksiz = -(-*textstksiz & 0xffffffffLL);
+
+ *textarg = (arg >> 32) & 0xffffffffLL;
+ if(*textarg & 0x80000000LL)
+ *textarg = 0;
+ *textarg = (*textarg+7) & ~7LL;
+}
+
+static void
+addstacksplit(Link *ctxt, LSym *cursym)
+{
+ Prog *p, *q, *q1;
+ int32 autoffset, deltasp;
+ int a, pcsize;
+ uint32 i;
+ vlong textstksiz, textarg;
+
+ if(ctxt->gmsym == nil)
+ ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0);
+ if(ctxt->symmorestack[0] == nil) {
+ if(nelem(morename) > nelem(ctxt->symmorestack))
+ sysfatal("Link.symmorestack needs at least %d elements", nelem(morename));
+ for(i=0; i<nelem(morename); i++)
+ ctxt->symmorestack[i] = linklookup(ctxt, morename[i], 0);
+ }
+ ctxt->cursym = cursym;
+
+ if(cursym->text == nil || cursym->text->link == nil)
+ return;
+
+ p = cursym->text;
+ parsetextconst(p->to.offset, &textstksiz, &textarg);
+ autoffset = textstksiz;
+ if(autoffset < 0)
+ autoffset = 0;
+
+ cursym->args = p->to.offset>>32;
+ cursym->locals = textstksiz;
+
+ if(autoffset < StackSmall && !(p->from.scale & NOSPLIT)) {
+ for(q = p; q != nil; q = q->link) {
+ if(q->as == ACALL)
+ goto noleaf;
+ if((q->as == ADUFFCOPY || q->as == ADUFFZERO) && autoffset >= StackSmall - 8)
+ goto noleaf;
+ }
+ p->from.scale |= NOSPLIT;
+ noleaf:;
+ }
+
+ q = nil;
+ if(!(p->from.scale & NOSPLIT) || (p->from.scale & WRAPPER)) {
+ p = appendp(ctxt, p);
+ p = load_g_cx(ctxt, p); // load g into CX
+ }
+ if(!(cursym->text->from.scale & NOSPLIT))
+ p = stacksplit(ctxt, p, autoffset, textarg, !(cursym->text->from.scale&NEEDCTXT), &q); // emit split check
+
+ if(autoffset) {
+ if(autoffset%ctxt->arch->regsize != 0)
+ ctxt->diag("unaligned stack size %d", autoffset);
+ p = appendp(ctxt, p);
+ p->as = AADJSP;
+ p->from.type = D_CONST;
+ p->from.offset = autoffset;
+ p->spadj = autoffset;
+ } else {
+ // zero-byte stack adjustment.
+ // Insert a fake non-zero adjustment so that stkcheck can
+ // recognize the end of the stack-splitting prolog.
+ p = appendp(ctxt, p);
+ p->as = ANOP;
+ p->spadj = -ctxt->arch->ptrsize;
+ p = appendp(ctxt, p);
+ p->as = ANOP;
+ p->spadj = ctxt->arch->ptrsize;
+ }
+ if(q != nil)
+ q->pcond = p;
+ deltasp = autoffset;
+
+ if(cursym->text->from.scale & WRAPPER) {
+ // g->panicwrap += autoffset + ctxt->arch->regsize;
+ p = appendp(ctxt, p);
+ p->as = AADDL;
+ p->from.type = D_CONST;
+ p->from.offset = autoffset + ctxt->arch->regsize;
+ indir_cx(ctxt, &p->to);
+ p->to.offset = 2*ctxt->arch->ptrsize;
+ }
+
+ if(ctxt->debugstack > 1 && autoffset) {
+ // 6l -K -K means double-check for stack overflow
+ // even after calling morestack and even if the
+ // function is marked as nosplit.
+ p = appendp(ctxt, p);
+ p->as = AMOVQ;
+ indir_cx(ctxt, &p->from);
+ p->from.offset = 0;
+ p->to.type = D_BX;
+
+ p = appendp(ctxt, p);
+ p->as = ASUBQ;
+ p->from.type = D_CONST;
+ p->from.offset = StackSmall+32;
+ p->to.type = D_BX;
+
+ p = appendp(ctxt, p);
+ p->as = ACMPQ;
+ p->from.type = D_SP;
+ p->to.type = D_BX;
+
+ p = appendp(ctxt, p);
+ p->as = AJHI;
+ p->to.type = D_BRANCH;
+ q1 = p;
+
+ p = appendp(ctxt, p);
+ p->as = AINT;
+ p->from.type = D_CONST;
+ p->from.offset = 3;
+
+ p = appendp(ctxt, p);
+ p->as = ANOP;
+ q1->pcond = p;
+ }
+
+ if(ctxt->debugzerostack && autoffset && !(cursym->text->from.scale&NOSPLIT)) {
+ // 6l -Z means zero the stack frame on entry.
+ // This slows down function calls but can help avoid
+ // false positives in garbage collection.
+ p = appendp(ctxt, p);
+ p->as = AMOVQ;
+ p->from.type = D_SP;
+ p->to.type = D_DI;
+
+ p = appendp(ctxt, p);
+ p->as = AMOVQ;
+ p->from.type = D_CONST;
+ p->from.offset = autoffset/8;
+ p->to.type = D_CX;
+
+ p = appendp(ctxt, p);
+ p->as = AMOVQ;
+ p->from.type = D_CONST;
+ p->from.offset = 0;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = AREP;
+
+ p = appendp(ctxt, p);
+ p->as = ASTOSQ;
+ }
+
+ for(; p != nil; p = p->link) {
+ pcsize = p->mode/8;
+ a = p->from.type;
+ if(a == D_AUTO)
+ p->from.offset += deltasp;
+ if(a == D_PARAM)
+ p->from.offset += deltasp + pcsize;
+ a = p->to.type;
+ if(a == D_AUTO)
+ p->to.offset += deltasp;
+ if(a == D_PARAM)
+ p->to.offset += deltasp + pcsize;
+
+ switch(p->as) {
+ default:
+ continue;
+ case APUSHL:
+ case APUSHFL:
+ deltasp += 4;
+ p->spadj = 4;
+ continue;
+ case APUSHQ:
+ case APUSHFQ:
+ deltasp += 8;
+ p->spadj = 8;
+ continue;
+ case APUSHW:
+ case APUSHFW:
+ deltasp += 2;
+ p->spadj = 2;
+ continue;
+ case APOPL:
+ case APOPFL:
+ deltasp -= 4;
+ p->spadj = -4;
+ continue;
+ case APOPQ:
+ case APOPFQ:
+ deltasp -= 8;
+ p->spadj = -8;
+ continue;
+ case APOPW:
+ case APOPFW:
+ deltasp -= 2;
+ p->spadj = -2;
+ continue;
+ case ARET:
+ break;
+ }
+
+ if(autoffset != deltasp)
+ ctxt->diag("unbalanced PUSH/POP");
+
+ if(cursym->text->from.scale & WRAPPER) {
+ p = load_g_cx(ctxt, p);
+ p = appendp(ctxt, p);
+ // g->panicwrap -= autoffset + ctxt->arch->regsize;
+ p->as = ASUBL;
+ p->from.type = D_CONST;
+ p->from.offset = autoffset + ctxt->arch->regsize;
+ indir_cx(ctxt, &p->to);
+ p->to.offset = 2*ctxt->arch->ptrsize;
+ p = appendp(ctxt, p);
+ p->as = ARET;
+ }
+
+ if(autoffset) {
+ p->as = AADJSP;
+ p->from.type = D_CONST;
+ p->from.offset = -autoffset;
+ p->spadj = -autoffset;
+ p = appendp(ctxt, p);
+ p->as = ARET;
+ // If there are instructions following
+ // this ARET, they come from a branch
+ // with the same stackframe, so undo
+ // the cleanup.
+ p->spadj = +autoffset;
+ }
+ if(p->to.sym) // retjmp
+ p->as = AJMP;
+ }
+}
+
+static void
+indir_cx(Link *ctxt, Addr *a)
+{
+ if(ctxt->headtype == Hnacl) {
+ a->type = D_INDIR + D_R15;
+ a->index = D_CX;
+ a->scale = 1;
+ return;
+ }
+
+ a->type = D_INDIR+D_CX;
+}
+
+// Append code to p to load g into cx.
+// Overwrites p with the first instruction (no first appendp).
+// Overwriting p is unusual but it lets use this in both the
+// prologue (caller must call appendp first) and in the epilogue.
+// Returns last new instruction.
+static Prog*
+load_g_cx(Link *ctxt, Prog *p)
+{
+ Prog *next;
+
+ p->as = AMOVQ;
+ if(ctxt->arch->ptrsize == 4)
+ p->as = AMOVL;
+ p->from.type = D_INDIR+D_TLS;
+ p->from.offset = 0;
+ p->to.type = D_CX;
+
+ next = p->link;
+ progedit(ctxt, p);
+ while(p->link != next)
+ p = p->link;
+
+ if(p->from.index == D_TLS)
+ p->from.scale = 2;
+
+ return p;
+}
+
+// Append code to p to check for stack split.
+// Appends to (does not overwrite) p.
+// Assumes g is in CX.
+// Returns last new instruction.
+// On return, *jmpok is the instruction that should jump
+// to the stack frame allocation if no split is needed.
+static Prog*
+stacksplit(Link *ctxt, Prog *p, int32 framesize, int32 textarg, int noctxt, Prog **jmpok)
+{
+ Prog *q, *q1;
+ uint32 moreconst1, moreconst2, i;
+ int cmp, lea, mov, sub;
+
+ cmp = ACMPQ;
+ lea = ALEAQ;
+ mov = AMOVQ;
+ sub = ASUBQ;
+
+ if(ctxt->headtype == Hnacl) {
+ cmp = ACMPL;
+ lea = ALEAL;
+ mov = AMOVL;
+ sub = ASUBL;
+ }
+
+ if(ctxt->debugstack) {
+ // 6l -K means check not only for stack
+ // overflow but stack underflow.
+ // On underflow, INT 3 (breakpoint).
+ // Underflow itself is rare but this also
+ // catches out-of-sync stack guard info
+
+ p = appendp(ctxt, p);
+ p->as = cmp;
+ indir_cx(ctxt, &p->from);
+ p->from.offset = 8;
+ p->to.type = D_SP;
+
+ p = appendp(ctxt, p);
+ p->as = AJHI;
+ p->to.type = D_BRANCH;
+ p->to.offset = 4;
+ q1 = p;
+
+ p = appendp(ctxt, p);
+ p->as = AINT;
+ p->from.type = D_CONST;
+ p->from.offset = 3;
+
+ p = appendp(ctxt, p);
+ p->as = ANOP;
+ q1->pcond = p;
+ }
+
+ q1 = nil;
+ if(framesize <= StackSmall) {
+ // small stack: SP <= stackguard
+ // CMPQ SP, stackguard
+ p = appendp(ctxt, p);
+ p->as = cmp;
+ p->from.type = D_SP;
+ indir_cx(ctxt, &p->to);
+ } else if(framesize <= StackBig) {
+ // large stack: SP-framesize <= stackguard-StackSmall
+ // LEAQ -xxx(SP), AX
+ // CMPQ AX, stackguard
+ p = appendp(ctxt, p);
+ p->as = lea;
+ p->from.type = D_INDIR+D_SP;
+ p->from.offset = -(framesize-StackSmall);
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = cmp;
+ p->from.type = D_AX;
+ indir_cx(ctxt, &p->to);
+ } else {
+ // Such a large stack we need to protect against wraparound.
+ // If SP is close to zero:
+ // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
+ // The +StackGuard on both sides is required to keep the left side positive:
+ // SP is allowed to be slightly below stackguard. See stack.h.
+ //
+ // Preemption sets stackguard to StackPreempt, a very large value.
+ // That breaks the math above, so we have to check for that explicitly.
+ // MOVQ stackguard, CX
+ // CMPQ CX, $StackPreempt
+ // JEQ label-of-call-to-morestack
+ // LEAQ StackGuard(SP), AX
+ // SUBQ CX, AX
+ // CMPQ AX, $(framesize+(StackGuard-StackSmall))
+
+ p = appendp(ctxt, p);
+ p->as = mov;
+ indir_cx(ctxt, &p->from);
+ p->from.offset = 0;
+ p->to.type = D_SI;
+
+ p = appendp(ctxt, p);
+ p->as = cmp;
+ p->from.type = D_SI;
+ p->to.type = D_CONST;
+ p->to.offset = StackPreempt;
+
+ p = appendp(ctxt, p);
+ p->as = AJEQ;
+ p->to.type = D_BRANCH;
+ q1 = p;
+
+ p = appendp(ctxt, p);
+ p->as = lea;
+ p->from.type = D_INDIR+D_SP;
+ p->from.offset = StackGuard;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = sub;
+ p->from.type = D_SI;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = cmp;
+ p->from.type = D_AX;
+ p->to.type = D_CONST;
+ p->to.offset = framesize+(StackGuard-StackSmall);
+ }
+
+ // common
+ p = appendp(ctxt, p);
+ p->as = AJHI;
+ p->to.type = D_BRANCH;
+ q = p;
+
+ // If we ask for more stack, we'll get a minimum of StackMin bytes.
+ // We need a stack frame large enough to hold the top-of-stack data,
+ // the function arguments+results, our caller's PC, our frame,
+ // a word for the return PC of the next call, and then the StackLimit bytes
+ // that must be available on entry to any function called from a function
+ // that did a stack check. If StackMin is enough, don't ask for a specific
+ // amount: then we can use the custom functions and save a few
+ // instructions.
+ moreconst1 = 0;
+ if(StackTop + textarg + ctxt->arch->ptrsize + framesize + ctxt->arch->ptrsize + StackLimit >= StackMin)
+ moreconst1 = framesize;
+ moreconst2 = textarg;
+ if(moreconst2 == 1) // special marker
+ moreconst2 = 0;
+ if((moreconst2&7) != 0)
+ ctxt->diag("misaligned argument size in stack split");
+ // 4 varieties varieties (const1==0 cross const2==0)
+ // and 6 subvarieties of (const1==0 and const2!=0)
+ p = appendp(ctxt, p);
+ if(moreconst1 == 0 && moreconst2 == 0) {
+ p->as = ACALL;
+ p->to.type = D_BRANCH;
+ p->to.sym = ctxt->symmorestack[0*2+noctxt];
+ } else
+ if(moreconst1 != 0 && moreconst2 == 0) {
+ p->as = AMOVL;
+ p->from.type = D_CONST;
+ p->from.offset = moreconst1;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = ACALL;
+ p->to.type = D_BRANCH;
+ p->to.sym = ctxt->symmorestack[1*2+noctxt];
+ } else
+ if(moreconst1 == 0 && moreconst2 <= 48 && moreconst2%8 == 0) {
+ i = moreconst2/8 + 3;
+ p->as = ACALL;
+ p->to.type = D_BRANCH;
+ p->to.sym = ctxt->symmorestack[i*2+noctxt];
+ } else
+ if(moreconst1 == 0 && moreconst2 != 0) {
+ p->as = AMOVL;
+ p->from.type = D_CONST;
+ p->from.offset = moreconst2;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = ACALL;
+ p->to.type = D_BRANCH;
+ p->to.sym = ctxt->symmorestack[2*2+noctxt];
+ } else {
+ // Pass framesize and argsize.
+ p->as = AMOVQ;
+ p->from.type = D_CONST;
+ p->from.offset = (uint64)moreconst2 << 32;
+ p->from.offset |= moreconst1;
+ p->to.type = D_AX;
+
+ p = appendp(ctxt, p);
+ p->as = ACALL;
+ p->to.type = D_BRANCH;
+ p->to.sym = ctxt->symmorestack[3*2+noctxt];
+ }
+
+ p = appendp(ctxt, p);
+ p->as = AJMP;
+ p->to.type = D_BRANCH;
+ p->pcond = ctxt->cursym->text->link;
+
+ if(q != nil)
+ q->pcond = p->link;
+ if(q1 != nil)
+ q1->pcond = q->link;
+
+ *jmpok = q;
+ return p;
+}
+
+static void xfol(Link*, Prog*, Prog**);
+
+static void
+follow(Link *ctxt, LSym *s)
+{
+ Prog *firstp, *lastp;
+
+ ctxt->cursym = s;
+
+ firstp = ctxt->arch->prg();
+ lastp = firstp;
+ xfol(ctxt, s->text, &lastp);
+ lastp->link = nil;
+ s->text = firstp->link;
+}
+
+static int
+nofollow(int a)
+{
+ switch(a) {
+ case AJMP:
+ case ARET:
+ case AIRETL:
+ case AIRETQ:
+ case AIRETW:
+ case ARETFL:
+ case ARETFQ:
+ case ARETFW:
+ case AUNDEF:
+ return 1;
+ }
+ return 0;
+}
+
+static int
+pushpop(int a)
+{
+ switch(a) {
+ case APUSHL:
+ case APUSHFL:
+ case APUSHQ:
+ case APUSHFQ:
+ case APUSHW:
+ case APUSHFW:
+ case APOPL:
+ case APOPFL:
+ case APOPQ:
+ case APOPFQ:
+ case APOPW:
+ case APOPFW:
+ return 1;
+ }
+ return 0;
+}
+
+static int
+relinv(int a)
+{
+ switch(a) {
+ case AJEQ: return AJNE;
+ case AJNE: return AJEQ;
+ case AJLE: return AJGT;
+ case AJLS: return AJHI;
+ case AJLT: return AJGE;
+ case AJMI: return AJPL;
+ case AJGE: return AJLT;
+ case AJPL: return AJMI;
+ case AJGT: return AJLE;
+ case AJHI: return AJLS;
+ case AJCS: return AJCC;
+ case AJCC: return AJCS;
+ case AJPS: return AJPC;
+ case AJPC: return AJPS;
+ case AJOS: return AJOC;
+ case AJOC: return AJOS;
+ }
+ sysfatal("unknown relation: %s", anames6[a]);
+ return 0;
+}
+
+static void
+xfol(Link *ctxt, Prog *p, Prog **last)
+{
+ Prog *q;
+ int i;
+ enum as a;
+
+loop:
+ if(p == nil)
+ return;
+ if(p->as == AJMP)
+ if((q = p->pcond) != nil && q->as != ATEXT) {
+ /* mark instruction as done and continue layout at target of jump */
+ p->mark = 1;
+ p = q;
+ if(p->mark == 0)
+ goto loop;
+ }
+ if(p->mark) {
+ /*
+ * p goes here, but already used it elsewhere.
+ * copy up to 4 instructions or else branch to other copy.
+ */
+ for(i=0,q=p; i<4; i++,q=q->link) {
+ if(q == nil)
+ break;
+ if(q == *last)
+ break;
+ a = q->as;
+ if(a == ANOP) {
+ i--;
+ continue;
+ }
+ if(nofollow(a) || pushpop(a))
+ break; // NOTE(rsc): arm does goto copy
+ if(q->pcond == nil || q->pcond->mark)
+ continue;
+ if(a == ACALL || a == ALOOP)
+ continue;
+ for(;;) {
+ if(p->as == ANOP) {
+ p = p->link;
+ continue;
+ }
+ q = copyp(ctxt, p);
+ p = p->link;
+ q->mark = 1;
+ (*last)->link = q;
+ *last = q;
+ if(q->as != a || q->pcond == nil || q->pcond->mark)
+ continue;
+
+ q->as = relinv(q->as);
+ p = q->pcond;
+ q->pcond = q->link;
+ q->link = p;
+ xfol(ctxt, q->link, last);
+ p = q->link;
+ if(p->mark)
+ return;
+ goto loop;
+ }
+ } /* */
+ q = ctxt->arch->prg();
+ q->as = AJMP;
+ q->lineno = p->lineno;
+ q->to.type = D_BRANCH;
+ q->to.offset = p->pc;
+ q->pcond = p;
+ p = q;
+ }
+
+ /* emit p */
+ p->mark = 1;
+ (*last)->link = p;
+ *last = p;
+ a = p->as;
+
+ /* continue loop with what comes after p */
+ if(nofollow(a))
+ return;
+ if(p->pcond != nil && a != ACALL) {
+ /*
+ * some kind of conditional branch.
+ * recurse to follow one path.
+ * continue loop on the other.
+ */
+ if((q = brchain(ctxt, p->pcond)) != nil)
+ p->pcond = q;
+ if((q = brchain(ctxt, p->link)) != nil)
+ p->link = q;
+ if(p->from.type == D_CONST) {
+ if(p->from.offset == 1) {
+ /*
+ * expect conditional jump to be taken.
+ * rewrite so that's the fall-through case.
+ */
+ p->as = relinv(a);
+ q = p->link;
+ p->link = p->pcond;
+ p->pcond = q;
+ }
+ } else {
+ q = p->link;
+ if(q->mark)
+ if(a != ALOOP) {
+ p->as = relinv(a);
+ p->link = p->pcond;
+ p->pcond = q;
+ }
+ }
+ xfol(ctxt, p->link, last);
+ if(p->pcond->mark)
+ return;
+ p = p->pcond;
+ goto loop;
+ }
+ p = p->link;
+ goto loop;
+}
+
+static Prog*
+prg(void)
+{
+ Prog *p;
+
+ p = emallocz(sizeof(*p));
+ *p = zprg;
+ return p;
+}
+
+LinkArch linkamd64 = {
+ .name = "amd64",
+ .thechar = '6',
+
+ .addstacksplit = addstacksplit,
+ .assemble = span6,
+ .datasize = datasize,
+ .follow = follow,
+ .iscall = iscall,
+ .isdata = isdata,
+ .prg = prg,
+ .progedit = progedit,
+ .settextflag = settextflag,
+ .symtype = symtype,
+ .textflag = textflag,
+
+ .minlc = 1,
+ .ptrsize = 8,
+ .regsize = 8,
+
+ .D_ADDR = D_ADDR,
+ .D_AUTO = D_AUTO,
+ .D_BRANCH = D_BRANCH,
+ .D_CONST = D_CONST,
+ .D_EXTERN = D_EXTERN,
+ .D_FCONST = D_FCONST,
+ .D_NONE = D_NONE,
+ .D_PARAM = D_PARAM,
+ .D_SCONST = D_SCONST,
+ .D_STATIC = D_STATIC,
+
+ .ACALL = ACALL,
+ .ADATA = ADATA,
+ .AEND = AEND,
+ .AFUNCDATA = AFUNCDATA,
+ .AGLOBL = AGLOBL,
+ .AJMP = AJMP,
+ .ANOP = ANOP,
+ .APCDATA = APCDATA,
+ .ARET = ARET,
+ .ATEXT = ATEXT,
+ .ATYPE = ATYPE,
+ .AUSEFIELD = AUSEFIELD,
+};
+
+LinkArch linkamd64p32 = {
+ .name = "amd64p32",
+ .thechar = '6',
+
+ .addstacksplit = addstacksplit,
+ .assemble = span6,
+ .datasize = datasize,
+ .follow = follow,
+ .iscall = iscall,
+ .isdata = isdata,
+ .prg = prg,
+ .progedit = progedit,
+ .settextflag = settextflag,
+ .symtype = symtype,
+ .textflag = textflag,
+
+ .minlc = 1,
+ .ptrsize = 4,
+ .regsize = 8,
+
+ .D_ADDR = D_ADDR,
+ .D_AUTO = D_AUTO,
+ .D_BRANCH = D_BRANCH,
+ .D_CONST = D_CONST,
+ .D_EXTERN = D_EXTERN,
+ .D_FCONST = D_FCONST,
+ .D_NONE = D_NONE,
+ .D_PARAM = D_PARAM,
+ .D_SCONST = D_SCONST,
+ .D_STATIC = D_STATIC,
+
+ .ACALL = ACALL,
+ .ADATA = ADATA,
+ .AEND = AEND,
+ .AFUNCDATA = AFUNCDATA,
+ .AGLOBL = AGLOBL,
+ .AJMP = AJMP,
+ .ANOP = ANOP,
+ .APCDATA = APCDATA,
+ .ARET = ARET,
+ .ATEXT = ATEXT,
+ .ATYPE = ATYPE,
+ .AUSEFIELD = AUSEFIELD,
+};