summaryrefslogtreecommitdiff
path: root/src/cmd/5g/cgen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/5g/cgen.c')
-rw-r--r--src/cmd/5g/cgen.c217
1 files changed, 83 insertions, 134 deletions
diff --git a/src/cmd/5g/cgen.c b/src/cmd/5g/cgen.c
index 4e5f7ebcd..76e2707fa 100644
--- a/src/cmd/5g/cgen.c
+++ b/src/cmd/5g/cgen.c
@@ -1190,7 +1190,8 @@ void
sgen(Node *n, Node *res, int32 w)
{
Node dst, src, tmp, nend;
- int32 c, q, odst, osrc;
+ int32 c, odst, osrc;
+ int dir, align, op;
Prog *p, *ploop;
if(debug['g']) {
@@ -1200,17 +1201,39 @@ sgen(Node *n, Node *res, int32 w)
}
if(w == 0)
return;
- if(n->ullman >= UINF && res->ullman >= UINF) {
- fatal("sgen UINF");
- }
-
if(w < 0)
fatal("sgen copy %d", w);
+ if(n->ullman >= UINF && res->ullman >= UINF)
+ fatal("sgen UINF");
+ if(n->type == T)
+ fatal("sgen: missing type");
+
+ // determine alignment.
+ // want to avoid unaligned access, so have to use
+ // smaller operations for less aligned types.
+ // for example moving [4]byte must use 4 MOVB not 1 MOVW.
+ align = n->type->align;
+ op = 0;
+ switch(align) {
+ default:
+ fatal("sgen: invalid alignment %d for %T", align, n->type);
+ case 1:
+ op = AMOVB;
+ break;
+ case 2:
+ op = AMOVH;
+ break;
+ case 4:
+ op = AMOVW;
+ break;
+ }
+ if(w%align)
+ fatal("sgen: unaligned size %d (align=%d) for %T", w, align, n->type);
+ c = w / align;
// offset on the stack
osrc = stkof(n);
odst = stkof(res);
-
if(osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000)) {
// osrc and odst both on stack, and at least one is in
// an unknown position. Could generate code to test
@@ -1221,12 +1244,15 @@ sgen(Node *n, Node *res, int32 w)
sgen(&tmp, res, w);
return;
}
-
- if(osrc % 4 != 0 || odst %4 != 0)
- fatal("sgen: non word(4) aligned offset src %d or dst %d", osrc, odst);
+ if(osrc%align != 0 || odst%align != 0)
+ fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align);
+ // if we are copying forward on the stack and
+ // the src and dst overlap, then reverse direction
+ dir = align;
+ if(osrc < odst && odst < osrc+w)
+ dir = -dir;
regalloc(&dst, types[tptr], res);
-
if(n->ullman >= res->ullman) {
agen(n, &dst); // temporarily use dst
regalloc(&src, types[tptr], N);
@@ -1240,141 +1266,64 @@ sgen(Node *n, Node *res, int32 w)
regalloc(&tmp, types[TUINT32], N);
- c = w % 4; // bytes
- q = w / 4; // quads
-
- // if we are copying forward on the stack and
- // the src and dst overlap, then reverse direction
- if(osrc < odst && odst < osrc+w) {
- if(c != 0)
- fatal("sgen: reverse character copy not implemented");
- if(q >= 4) {
- regalloc(&nend, types[TUINT32], N);
- // set up end marker to 4 bytes before source
- p = gins(AMOVW, &src, &nend);
- p->from.type = D_CONST;
- p->from.offset = -4;
-
- // move src and dest to the end of block
- p = gins(AMOVW, &src, &src);
- p->from.type = D_CONST;
- p->from.offset = (q-1)*4;
-
- p = gins(AMOVW, &dst, &dst);
- p->from.type = D_CONST;
- p->from.offset = (q-1)*4;
-
- p = gins(AMOVW, &src, &tmp);
- p->from.type = D_OREG;
- p->from.offset = -4;
- p->scond |= C_PBIT;
- ploop = p;
+ // set up end marker
+ memset(&nend, 0, sizeof nend);
+ if(c >= 4) {
+ regalloc(&nend, types[TUINT32], N);
- p = gins(AMOVW, &tmp, &dst);
- p->to.type = D_OREG;
- p->to.offset = -4;
- p->scond |= C_PBIT;
-
- p = gins(ACMP, &src, N);
- raddr(&nend, p);
+ p = gins(AMOVW, &src, &nend);
+ p->from.type = D_CONST;
+ if(dir < 0)
+ p->from.offset = dir;
+ else
+ p->from.offset = w;
+ }
- patch(gbranch(ABNE, T), ploop);
+ // move src and dest to the end of block if necessary
+ if(dir < 0) {
+ p = gins(AMOVW, &src, &src);
+ p->from.type = D_CONST;
+ p->from.offset = w + dir;
- regfree(&nend);
- } else {
- // move src and dest to the end of block
- p = gins(AMOVW, &src, &src);
- p->from.type = D_CONST;
- p->from.offset = (q-1)*4;
-
- p = gins(AMOVW, &dst, &dst);
- p->from.type = D_CONST;
- p->from.offset = (q-1)*4;
-
- while(q > 0) {
- p = gins(AMOVW, &src, &tmp);
- p->from.type = D_OREG;
- p->from.offset = -4;
- p->scond |= C_PBIT;
-
- p = gins(AMOVW, &tmp, &dst);
- p->to.type = D_OREG;
- p->to.offset = -4;
- p->scond |= C_PBIT;
-
- q--;
- }
- }
+ p = gins(AMOVW, &dst, &dst);
+ p->from.type = D_CONST;
+ p->from.offset = w + dir;
+ }
+
+ // move
+ if(c >= 4) {
+ p = gins(op, &src, &tmp);
+ p->from.type = D_OREG;
+ p->from.offset = dir;
+ p->scond |= C_PBIT;
+ ploop = p;
+
+ p = gins(op, &tmp, &dst);
+ p->to.type = D_OREG;
+ p->to.offset = dir;
+ p->scond |= C_PBIT;
+
+ p = gins(ACMP, &src, N);
+ raddr(&nend, p);
+
+ patch(gbranch(ABNE, T), ploop);
+ regfree(&nend);
} else {
- // normal direction
- if(q >= 4) {
- regalloc(&nend, types[TUINT32], N);
- p = gins(AMOVW, &src, &nend);
- p->from.type = D_CONST;
- p->from.offset = q*4;
-
- p = gins(AMOVW, &src, &tmp);
+ while(c-- > 0) {
+ p = gins(op, &src, &tmp);
p->from.type = D_OREG;
- p->from.offset = 4;
+ p->from.offset = dir;
p->scond |= C_PBIT;
ploop = p;
-
- p = gins(AMOVW, &tmp, &dst);
+
+ p = gins(op, &tmp, &dst);
p->to.type = D_OREG;
- p->to.offset = 4;
+ p->to.offset = dir;
p->scond |= C_PBIT;
-
- p = gins(ACMP, &src, N);
- raddr(&nend, p);
-
- patch(gbranch(ABNE, T), ploop);
-
- regfree(&nend);
- } else
- while(q > 0) {
- p = gins(AMOVW, &src, &tmp);
- p->from.type = D_OREG;
- p->from.offset = 4;
- p->scond |= C_PBIT;
-
- p = gins(AMOVW, &tmp, &dst);
- p->to.type = D_OREG;
- p->to.offset = 4;
- p->scond |= C_PBIT;
-
- q--;
- }
-
- if (c != 0) {
- // MOVW (src), tmp
- p = gins(AMOVW, &src, &tmp);
- p->from.type = D_OREG;
-
- // MOVW tmp<<((4-c)*8),src
- gshift(AMOVW, &tmp, SHIFT_LL, ((4-c)*8), &src);
-
- // MOVW src>>((4-c)*8),src
- gshift(AMOVW, &src, SHIFT_LR, ((4-c)*8), &src);
-
- // MOVW (dst), tmp
- p = gins(AMOVW, &dst, &tmp);
- p->from.type = D_OREG;
-
- // MOVW tmp>>(c*8),tmp
- gshift(AMOVW, &tmp, SHIFT_LR, (c*8), &tmp);
-
- // MOVW tmp<<(c*8),tmp
- gshift(AMOVW, &tmp, SHIFT_LL, c*8, &tmp);
-
- // ORR src, tmp
- gins(AORR, &src, &tmp);
-
- // MOVW tmp, (dst)
- p = gins(AMOVW, &tmp, &dst);
- p->to.type = D_OREG;
}
}
- regfree(&dst);
+
+ regfree(&dst);
regfree(&src);
regfree(&tmp);
}