summaryrefslogtreecommitdiff
path: root/src/cmd/6l/span.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/6l/span.c')
-rw-r--r--src/cmd/6l/span.c80
1 files changed, 69 insertions, 11 deletions
diff --git a/src/cmd/6l/span.c b/src/cmd/6l/span.c
index 28eb38f40..283a0e349 100644
--- a/src/cmd/6l/span.c
+++ b/src/cmd/6l/span.c
@@ -37,6 +37,37 @@ static int rexflag;
static int asmode;
static vlong vaddr(Adr*, Reloc*);
+// single-instruction no-ops of various lengths.
+// constructed by hand and disassembled with gdb to verify.
+// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
+static uchar nop[][16] = {
+ {0x90},
+ {0x66, 0x90},
+ {0x0F, 0x1F, 0x00},
+ {0x0F, 0x1F, 0x40, 0x00},
+ {0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+ {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+ {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+static void
+fillnop(uchar *p, int n)
+{
+ int m;
+
+ while(n > 0) {
+ m = n;
+ if(m > nelem(nop))
+ m = nelem(nop);
+ memmove(p, nop[m-1], m);
+ p += m;
+ n -= m;
+ }
+}
+
void
span1(Sym *s)
{
@@ -52,8 +83,10 @@ span1(Sym *s)
for(p = s->text; p != P; p = p->link) {
p->back = 2; // use short branches first time through
- if((q = p->pcond) != P && (q->back & 2))
+ if((q = p->pcond) != P && (q->back & 2)) {
p->back |= 1; // backward jump
+ q->back |= 4; // loop head
+ }
if(p->as == AADJSP) {
p->to.type = D_SP;
@@ -78,6 +111,16 @@ span1(Sym *s)
s->np = 0;
c = 0;
for(p = s->text; p != P; p = p->link) {
+ if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
+ // pad with NOPs
+ v = -c&(LoopAlign-1);
+ if(v <= MaxLoopPad) {
+ symgrow(s, c+v);
+ fillnop(s->p+c, v);
+ c += v;
+ }
+ }
+
p->pc = c;
// process forward jumps to p
@@ -329,7 +372,10 @@ oclass(Adr *a)
switch(a->index) {
case D_EXTERN:
case D_STATIC:
- return Yi32; /* TO DO: Yi64 */
+ if(flag_shared)
+ return Yiauto;
+ else
+ return Yi32; /* TO DO: Yi64 */
case D_AUTO:
case D_PARAM:
return Yiauto;
@@ -688,7 +734,10 @@ vaddr(Adr *a, Reloc *r)
diag("need reloc for %D", a);
errorexit();
}
- r->type = D_ADDR;
+ if(flag_shared)
+ r->type = D_PCREL;
+ else
+ r->type = D_ADDR;
r->siz = 4; // TODO: 8 for external symbols
r->off = -1; // caller must fill in
r->sym = s;
@@ -717,6 +766,8 @@ asmandsz(Adr *a, int r, int rex, int m64)
goto bad;
case D_STATIC:
case D_EXTERN:
+ if(flag_shared)
+ goto bad;
t = D_NONE;
v = vaddr(a, &rel);
break;
@@ -777,7 +828,7 @@ asmandsz(Adr *a, int r, int rex, int m64)
rexflag |= (regrex[t] & Rxb) | rex;
if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
- if(asmode != 64){
+ if(flag_shared && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || asmode != 64) {
*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
goto putrelv;
}
@@ -1204,7 +1255,8 @@ found:
break;
case Zibm_r:
- *andptr++ = op;
+ while ((op = o->op[z++]) != 0)
+ *andptr++ = op;
asmand(&p->from, &p->to);
*andptr++ = p->to.offset;
break;
@@ -1574,7 +1626,9 @@ bad:
pp = *p;
z = p->from.type;
if(z >= D_BP && z <= D_DI) {
- if(isax(&p->to)) {
+ if(isax(&p->to) || p->to.type == D_NONE) {
+ // We certainly don't want to exchange
+ // with AX if the op is MUL or DIV.
*andptr++ = 0x87; /* xchg lhs,bx */
asmando(&p->from, reg[D_BX]);
subreg(&pp, z, D_BX);
@@ -1730,13 +1784,17 @@ asmins(Prog *p)
if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
break;
}
- for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
- if(r->off < p->pc)
- break;
- r->off++;
- }
memmove(and+np+1, and+np, n-np);
and[np] = 0x40 | rexflag;
andptr++;
}
+ n = andptr - and;
+ for(r=cursym->r+cursym->nr; r-- > cursym->r; ) {
+ if(r->off < p->pc)
+ break;
+ if(rexflag)
+ r->off++;
+ if(r->type == D_PCREL)
+ r->add -= p->pc + n - (r->off + r->siz);
+ }
}