diff options
Diffstat (limited to 'src/pkg/crypto/sha1/sha1block_arm.s')
-rw-r--r-- | src/pkg/crypto/sha1/sha1block_arm.s | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/src/pkg/crypto/sha1/sha1block_arm.s b/src/pkg/crypto/sha1/sha1block_arm.s new file mode 100644 index 000000000..5917e8b24 --- /dev/null +++ b/src/pkg/crypto/sha1/sha1block_arm.s @@ -0,0 +1,217 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// ARM version of md5block.go + +#include "../../../cmd/ld/textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +// Register definitions +data = 0 // Pointer to incoming data +const = 1 // Current constant for SHA round +a = 2 // SHA1 accumulator +b = 3 // SHA1 accumulator +c = 4 // SHA1 accumulator +d = 5 // SHA1 accumulator +e = 6 // SHA1 accumulator +t0 = 7 // Temporary +t1 = 8 // Temporary +// r9, r10 are forbidden +// r11 is OK provided you check the assembler that no synthetic instructions use it +t2 = 11 // Temporary +ctr = 12 // loop counter +w = 14 // point to w buffer + +// func block(dig *digest, p []byte) +// 0(FP) is *digest +// 4(FP) is p.array (struct Slice) +// 8(FP) is p.len +//12(FP) is p.cap +// +// Stack frame +p_end = -4 // -4(SP) pointer to the end of data +p_data = p_end - 4 // -8(SP) current data pointer +w_buf = p_data - 4*80 // -328(SP) 80 words temporary buffer w uint32[80] +saved = w_buf - 4*5 // -348(SP) saved sha1 registers a,b,c,d,e - these must be last +// Total size +4 for saved LR is 352 + + // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] + // e += w[i] +#define LOAD(e) \ + MOVBU 2(R(data)), R(t0) ; \ + MOVBU 3(R(data)), R(t1) ; \ + MOVBU 1(R(data)), R(t2) ; \ + ORR R(t0)<<8, R(t1), R(t0) ; \ + MOVBU.P 4(R(data)), R(t1) ; \ + ORR R(t2)<<16, R(t0), R(t0) ; \ + ORR R(t1)<<24, R(t0), R(t0) ; \ + MOVW.P R(t0), 4(R(w)) ; \ + ADD R(t0), R(e), R(e) + + // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + // w[i&0xf] = tmp<<1 | tmp>>(32-1) + // e += w[i&0xf] +#define SHUFFLE(e) \ + MOVW (-16*4)(R(w)), R(t0) ; \ + MOVW (-14*4)(R(w)), R(t1) ; \ + MOVW (-8*4)(R(w)), R(t2) ; \ + EOR R(t0), R(t1), R(t0) ; \ + MOVW (-3*4)(R(w)), R(t1) ; \ + EOR R(t2), R(t0), R(t0) ; \ + EOR R(t0), R(t1), R(t0) ; \ + MOVW R(t0)@>(32-1), R(t0) ; \ + MOVW.P R(t0), 4(R(w)) ; \ + ADD R(t0), R(e), R(e) + + // t1 = (b & c) | ((~b) & d) +#define FUNC1(a, b, c, d, e) \ + MVN R(b), R(t1) ; \ + AND R(b), R(c), R(t0) ; \ + AND R(d), R(t1), R(t1) ; \ + ORR R(t0), R(t1), R(t1) + + // t1 = b ^ c ^ d +#define FUNC2(a, b, c, d, e) \ + EOR R(b), R(c), R(t1) ; \ + EOR R(d), R(t1), R(t1) + + // t1 = (b & c) | (b & d) | (c & d) = + // t1 = (b & c) | ((b | c) & d) +#define FUNC3(a, b, c, d, e) \ + ORR R(b), R(c), R(t0) ; \ + AND R(b), R(c), R(t1) ; \ + AND R(d), R(t0), R(t0) ; \ + ORR R(t0), R(t1), R(t1) + +#define FUNC4 FUNC2 + + // a5 := a<<5 | a>>(32-5) + // b = b<<30 | b>>(32-30) + // e = a5 + t1 + e + const +#define MIX(a, b, c, d, e) \ + ADD R(t1), R(e), R(e) ; \ + MOVW R(b)@>(32-30), R(b) ; \ + ADD R(a)@>(32-5), R(e), R(e) ; \ + ADD R(const), R(e), R(e) + +#define ROUND1(a, b, c, d, e) \ + LOAD(e) ; \ + FUNC1(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND1x(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC1(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND2(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC2(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND3(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC3(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND4(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC4(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + + +// func block(dig *digest, p []byte) +TEXT ·block(SB), 0, $352-16 + MOVW p+4(FP), R(data) // pointer to the data + MOVW p_len+8(FP), R(t0) // number of bytes + ADD R(data), R(t0) + MOVW R(t0), p_end(SP) // pointer to end of data + + // Load up initial SHA1 accumulator + MOVW dig+0(FP), R(t0) + MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)] + +loop: + // Save registers at SP+4 onwards + MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13) + + MOVW $w_buf(SP), R(w) + MOVW $0x5A827999, R(const) + MOVW $3, R(ctr) +loop1: ROUND1(a, b, c, d, e) + ROUND1(e, a, b, c, d) + ROUND1(d, e, a, b, c) + ROUND1(c, d, e, a, b) + ROUND1(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop1 + + ROUND1(a, b, c, d, e) + ROUND1x(e, a, b, c, d) + ROUND1x(d, e, a, b, c) + ROUND1x(c, d, e, a, b) + ROUND1x(b, c, d, e, a) + + MOVW $0x6ED9EBA1, R(const) + MOVW $4, R(ctr) +loop2: ROUND2(a, b, c, d, e) + ROUND2(e, a, b, c, d) + ROUND2(d, e, a, b, c) + ROUND2(c, d, e, a, b) + ROUND2(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop2 + + MOVW $0x8F1BBCDC, R(const) + MOVW $4, R(ctr) +loop3: ROUND3(a, b, c, d, e) + ROUND3(e, a, b, c, d) + ROUND3(d, e, a, b, c) + ROUND3(c, d, e, a, b) + ROUND3(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop3 + + MOVW $0xCA62C1D6, R(const) + MOVW $4, R(ctr) +loop4: ROUND4(a, b, c, d, e) + ROUND4(e, a, b, c, d) + ROUND4(d, e, a, b, c) + ROUND4(c, d, e, a, b) + ROUND4(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop4 + + // Accumulate - restoring registers from SP+4 + MOVM.IB (R13), [R(t0),R(t1),R(t2),R(ctr),R(w)] + ADD R(t0), R(a) + ADD R(t1), R(b) + ADD R(t2), R(c) + ADD R(ctr), R(d) + ADD R(w), R(e) + + MOVW p_end(SP), R(t0) + CMP R(t0), R(data) + BLO loop + + // Save final SHA1 accumulator + MOVW dig+0(FP), R(t0) + MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0)) + + RET |