diff options
Diffstat (limited to 'src/pkg/crypto/sha1/sha1block_amd64p32.s')
-rw-r--r-- | src/pkg/crypto/sha1/sha1block_amd64p32.s | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/src/pkg/crypto/sha1/sha1block_amd64p32.s b/src/pkg/crypto/sha1/sha1block_amd64p32.s new file mode 100644 index 000000000..3c589d94f --- /dev/null +++ b/src/pkg/crypto/sha1/sha1block_amd64p32.s @@ -0,0 +1,216 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "../../../cmd/ld/textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. +// +// amd64p32 version. +// To ensure safety for Native Client, avoids use of BP and R15 +// as well as two-register addressing modes. + +#define LOAD(index) \ + MOVL (index*4)(SI), R10; \ + BSWAPL R10; \ + MOVL R10, (index*4)(SP) + +#define SHUFFLE(index) \ + MOVL (((index)&0xf)*4)(SP), R10; \ + XORL (((index-3)&0xf)*4)(SP), R10; \ + XORL (((index-8)&0xf)*4)(SP), R10; \ + XORL (((index-14)&0xf)*4)(SP), R10; \ + ROLL $1, R10; \ + MOVL R10, (((index)&0xf)*4)(SP) + +#define FUNC1(a, b, c, d, e) \ + MOVL d, R9; \ + XORL c, R9; \ + ANDL b, R9; \ + XORL d, R9 + +#define FUNC2(a, b, c, d, e) \ + MOVL b, R9; \ + XORL c, R9; \ + XORL d, R9 + +#define FUNC3(a, b, c, d, e) \ + MOVL b, R8; \ + ORL c, R8; \ + ANDL d, R8; \ + MOVL b, R9; \ + ANDL c, R9; \ + ORL R8, R9 + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL R9, e; \ + MOVL a, R8; \ + ROLL $5, R8; \ + LEAL const(e)(R10*1), e; \ + ADDL R8, e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +TEXT ·block(SB),NOSPLIT,$64-32 + MOVL dig+0(FP), R14 + MOVL p_base+4(FP), SI + MOVL p_len+8(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(R14), AX + MOVL (1*4)(R14), BX + MOVL (2*4)(R14), CX + MOVL (3*4)(R14), DX + MOVL (4*4)(R14), R13 + + CMPQ SI, DI + JEQ end + +loop: +#define BP R13 /* keep diff from sha1block_amd64.s small */ + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) +#undef BP + + ADDL (0*4)(R14), AX + ADDL (1*4)(R14), BX + ADDL (2*4)(R14), CX + ADDL (3*4)(R14), DX + ADDL (4*4)(R14), R13 + + MOVL AX, (0*4)(R14) + MOVL BX, (1*4)(R14) + MOVL CX, (2*4)(R14) + MOVL DX, (3*4)(R14) + MOVL R13, (4*4)(R14) + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + RET |