diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/crypto/sha1 | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/crypto/sha1')
-rw-r--r-- | src/crypto/sha1/example_test.go | 25 | ||||
-rw-r--r-- | src/crypto/sha1/sha1.go | 130 | ||||
-rw-r--r-- | src/crypto/sha1/sha1_test.go | 129 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block.go | 90 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_386.s | 233 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_amd64.s | 216 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_amd64p32.s | 216 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_arm.s | 217 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_decl.go | 11 | ||||
-rw-r--r-- | src/crypto/sha1/sha1block_generic.go | 9 |
10 files changed, 1276 insertions, 0 deletions
diff --git a/src/crypto/sha1/example_test.go b/src/crypto/sha1/example_test.go new file mode 100644 index 000000000..42aec8afa --- /dev/null +++ b/src/crypto/sha1/example_test.go @@ -0,0 +1,25 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sha1_test + +import ( + "crypto/sha1" + "fmt" + "io" +) + +func ExampleNew() { + h := sha1.New() + io.WriteString(h, "His money is twice tainted:") + io.WriteString(h, " 'taint yours and 'taint mine.") + fmt.Printf("% x", h.Sum(nil)) + // Output: 59 7f 6a 54 00 10 f9 4c 15 d7 18 06 a9 9a 2c 87 10 e7 47 bd +} + +func ExampleSum() { + data := []byte("This page intentionally left blank.") + fmt.Printf("% x", sha1.Sum(data)) + // Output: af 06 49 23 bb f2 30 15 96 aa c4 c2 73 ba 32 17 8e bc 4a 96 +} diff --git a/src/crypto/sha1/sha1.go b/src/crypto/sha1/sha1.go new file mode 100644 index 000000000..9f1a96e36 --- /dev/null +++ b/src/crypto/sha1/sha1.go @@ -0,0 +1,130 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package sha1 implements the SHA1 hash algorithm as defined in RFC 3174. +package sha1 + +import ( + "crypto" + "hash" +) + +func init() { + crypto.RegisterHash(crypto.SHA1, New) +} + +// The size of a SHA1 checksum in bytes. +const Size = 20 + +// The blocksize of SHA1 in bytes. +const BlockSize = 64 + +const ( + chunk = 64 + init0 = 0x67452301 + init1 = 0xEFCDAB89 + init2 = 0x98BADCFE + init3 = 0x10325476 + init4 = 0xC3D2E1F0 +) + +// digest represents the partial evaluation of a checksum. +type digest struct { + h [5]uint32 + x [chunk]byte + nx int + len uint64 +} + +func (d *digest) Reset() { + d.h[0] = init0 + d.h[1] = init1 + d.h[2] = init2 + d.h[3] = init3 + d.h[4] = init4 + d.nx = 0 + d.len = 0 +} + +// New returns a new hash.Hash computing the SHA1 checksum. +func New() hash.Hash { + d := new(digest) + d.Reset() + return d +} + +func (d *digest) Size() int { return Size } + +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Write(p []byte) (nn int, err error) { + nn = len(p) + d.len += uint64(nn) + if d.nx > 0 { + n := copy(d.x[d.nx:], p) + d.nx += n + if d.nx == chunk { + block(d, d.x[:]) + d.nx = 0 + } + p = p[n:] + } + if len(p) >= chunk { + n := len(p) &^ (chunk - 1) + block(d, p[:n]) + p = p[n:] + } + if len(p) > 0 { + d.nx = copy(d.x[:], p) + } + return +} + +func (d0 *digest) Sum(in []byte) []byte { + // Make a copy of d0 so that caller can keep writing and summing. + d := *d0 + hash := d.checkSum() + return append(in, hash[:]...) +} + +func (d *digest) checkSum() [Size]byte { + len := d.len + // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. + var tmp [64]byte + tmp[0] = 0x80 + if len%64 < 56 { + d.Write(tmp[0 : 56-len%64]) + } else { + d.Write(tmp[0 : 64+56-len%64]) + } + + // Length in bits. + len <<= 3 + for i := uint(0); i < 8; i++ { + tmp[i] = byte(len >> (56 - 8*i)) + } + d.Write(tmp[0:8]) + + if d.nx != 0 { + panic("d.nx != 0") + } + + var digest [Size]byte + for i, s := range d.h { + digest[i*4] = byte(s >> 24) + digest[i*4+1] = byte(s >> 16) + digest[i*4+2] = byte(s >> 8) + digest[i*4+3] = byte(s) + } + + return digest +} + +// Sum returns the SHA1 checksum of the data. +func Sum(data []byte) [Size]byte { + var d digest + d.Reset() + d.Write(data) + return d.checkSum() +} diff --git a/src/crypto/sha1/sha1_test.go b/src/crypto/sha1/sha1_test.go new file mode 100644 index 000000000..4a629518b --- /dev/null +++ b/src/crypto/sha1/sha1_test.go @@ -0,0 +1,129 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// SHA1 hash algorithm. See RFC 3174. + +package sha1 + +import ( + "crypto/rand" + "fmt" + "io" + "testing" +) + +type sha1Test struct { + out string + in string +} + +var golden = []sha1Test{ + {"da39a3ee5e6b4b0d3255bfef95601890afd80709", ""}, + {"86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "a"}, + {"da23614e02469a0d7c7bd1bdab5c9c474b1904dc", "ab"}, + {"a9993e364706816aba3e25717850c26c9cd0d89d", "abc"}, + {"81fe8bfe87576c3ecb22426f8e57847382917acf", "abcd"}, + {"03de6c570bfe24bfc328ccd7ca46b76eadaf4334", "abcde"}, + {"1f8ac10f23c5b5bc1167bda84b833e5c057a77d2", "abcdef"}, + {"2fb5e13419fc89246865e7a324f476ec624e8740", "abcdefg"}, + {"425af12a0743502b322e93a015bcf868e324d56a", "abcdefgh"}, + {"c63b19f1e4c8b5f76b25c49b8b87f57d8e4872a1", "abcdefghi"}, + {"d68c19a0a345b7eab78d5e11e991c026ec60db63", "abcdefghij"}, + {"ebf81ddcbe5bf13aaabdc4d65354fdf2044f38a7", "Discard medicine more than two years old."}, + {"e5dea09392dd886ca63531aaa00571dc07554bb6", "He who has a shady past knows that nice guys finish last."}, + {"45988f7234467b94e3e9494434c96ee3609d8f8f", "I wouldn't marry him with a ten foot pole."}, + {"55dee037eb7460d5a692d1ce11330b260e40c988", "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, + {"b7bc5fb91080c7de6b582ea281f8a396d7c0aee8", "The days of the digital watch are numbered. -Tom Stoppard"}, + {"c3aed9358f7c77f523afe86135f06b95b3999797", "Nepal premier won't resign."}, + {"6e29d302bf6e3a5e4305ff318d983197d6906bb9", "For every action there is an equal and opposite government program."}, + {"597f6a540010f94c15d71806a99a2c8710e747bd", "His money is twice tainted: 'taint yours and 'taint mine."}, + {"6859733b2590a8a091cecf50086febc5ceef1e80", "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, + {"514b2630ec089b8aee18795fc0cf1f4860cdacad", "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, + {"c5ca0d4a7b6676fc7aa72caa41cc3d5df567ed69", "size: a.out: bad magic"}, + {"74c51fa9a04eadc8c1bbeaa7fc442f834b90a00a", "The major problem is with sendmail. -Mark Horton"}, + {"0b4c4ce5f52c3ad2821852a8dc00217fa18b8b66", "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, + {"3ae7937dd790315beb0f48330e8642237c61550a", "If the enemy is within range, then so are you."}, + {"410a2b296df92b9a47412b13281df8f830a9f44b", "It's well we cannot hear the screams/That we create in others' dreams."}, + {"841e7c85ca1adcddbdd0187f1289acb5c642f7f5", "You remind me of a TV show, but that's all right: I watch it anyway."}, + {"163173b825d03b952601376b25212df66763e1db", "C is as portable as Stonehedge!!"}, + {"32b0377f2687eb88e22106f133c586ab314d5279", "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, + {"0885aaf99b569542fd165fa44e322718f4a984e0", "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, + {"6627d6904d71420b0bf3886ab629623538689f45", "How can you write a big system without C++? -Paul Glick"}, +} + +func TestGolden(t *testing.T) { + for i := 0; i < len(golden); i++ { + g := golden[i] + s := fmt.Sprintf("%x", Sum([]byte(g.in))) + if s != g.out { + t.Fatalf("Sum function: sha1(%s) = %s want %s", g.in, s, g.out) + } + c := New() + for j := 0; j < 3; j++ { + if j < 2 { + io.WriteString(c, g.in) + } else { + io.WriteString(c, g.in[0:len(g.in)/2]) + c.Sum(nil) + io.WriteString(c, g.in[len(g.in)/2:]) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != g.out { + t.Fatalf("sha1[%d](%s) = %s want %s", j, g.in, s, g.out) + } + c.Reset() + } + } +} + +func TestSize(t *testing.T) { + c := New() + if got := c.Size(); got != Size { + t.Errorf("Size = %d; want %d", got, Size) + } +} + +func TestBlockSize(t *testing.T) { + c := New() + if got := c.BlockSize(); got != BlockSize { + t.Errorf("BlockSize = %d; want %d", got, BlockSize) + } +} + +// Tests that blockGeneric (pure Go) and block (in assembly for amd64, 386, arm) match. +func TestBlockGeneric(t *testing.T) { + gen, asm := New().(*digest), New().(*digest) + buf := make([]byte, BlockSize*20) // arbitrary factor + rand.Read(buf) + blockGeneric(gen, buf) + block(asm, buf) + if *gen != *asm { + t.Error("block and blockGeneric resulted in different states") + } +} + +var bench = New() +var buf = make([]byte, 8192) + +func benchmarkSize(b *testing.B, size int) { + b.SetBytes(int64(size)) + sum := make([]byte, bench.Size()) + for i := 0; i < b.N; i++ { + bench.Reset() + bench.Write(buf[:size]) + bench.Sum(sum[:0]) + } +} + +func BenchmarkHash8Bytes(b *testing.B) { + benchmarkSize(b, 8) +} + +func BenchmarkHash1K(b *testing.B) { + benchmarkSize(b, 1024) +} + +func BenchmarkHash8K(b *testing.B) { + benchmarkSize(b, 8192) +} diff --git a/src/crypto/sha1/sha1block.go b/src/crypto/sha1/sha1block.go new file mode 100644 index 000000000..fde3c981c --- /dev/null +++ b/src/crypto/sha1/sha1block.go @@ -0,0 +1,90 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sha1 + +const ( + _K0 = 0x5A827999 + _K1 = 0x6ED9EBA1 + _K2 = 0x8F1BBCDC + _K3 = 0xCA62C1D6 +) + +// blockGeneric is a portable, pure Go version of the SHA1 block step. +// It's used by sha1block_generic.go and tests. +func blockGeneric(dig *digest, p []byte) { + var w [16]uint32 + + h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] + for len(p) >= chunk { + // Can interlace the computation of w with the + // rounds below if needed for speed. + for i := 0; i < 16; i++ { + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + } + + a, b, c, d, e := h0, h1, h2, h3, h4 + + // Each of the four 20-iteration rounds + // differs only in the computation of f and + // the choice of K (_K0, _K1, etc). + i := 0 + for ; i < 16; i++ { + f := b&c | (^b)&d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K0 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 20; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := b&c | (^b)&d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K0 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 40; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := b ^ c ^ d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K1 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 60; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := ((b | c) & d) | (b & c) + + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K2 + a, b, c, d, e = t, a, b30, c, d + } + for ; i < 80; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + f := b ^ c ^ d + a5 := a<<5 | a>>(32-5) + b30 := b<<30 | b>>(32-30) + t := a5 + f + e + w[i&0xf] + _K3 + a, b, c, d, e = t, a, b30, c, d + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + + p = p[chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] = h0, h1, h2, h3, h4 +} diff --git a/src/crypto/sha1/sha1block_386.s b/src/crypto/sha1/sha1block_386.s new file mode 100644 index 000000000..a0adabc3c --- /dev/null +++ b/src/crypto/sha1/sha1block_386.s @@ -0,0 +1,233 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +// Like sha1block_amd64.s, but we keep the data and limit pointers on the stack. +// To free up the word pointer (R10 on amd64, DI here), we add it to e during +// LOAD/SHUFFLE instead of during MIX. +// +// The stack holds the intermediate word array - 16 uint32s - at 0(SP) up to 64(SP). +// The saved a, b, c, d, e (R11 through R15 on amd64) are at 64(SP) up to 84(SP). +// The saved limit pointer (DI on amd64) is at 84(SP). +// The saved data pointer (SI on amd64) is at 88(SP). + +#define LOAD(index, e) \ + MOVL 88(SP), SI; \ + MOVL (index*4)(SI), DI; \ + BSWAPL DI; \ + MOVL DI, (index*4)(SP); \ + ADDL DI, e + +#define SHUFFLE(index, e) \ + MOVL (((index)&0xf)*4)(SP), DI; \ + XORL (((index-3)&0xf)*4)(SP), DI; \ + XORL (((index-8)&0xf)*4)(SP), DI; \ + XORL (((index-14)&0xf)*4)(SP), DI; \ + ROLL $1, DI; \ + MOVL DI, (((index)&0xf)*4)(SP); \ + ADDL DI, e + +#define FUNC1(a, b, c, d, e) \ + MOVL d, DI; \ + XORL c, DI; \ + ANDL b, DI; \ + XORL d, DI + +#define FUNC2(a, b, c, d, e) \ + MOVL b, DI; \ + XORL c, DI; \ + XORL d, DI + +#define FUNC3(a, b, c, d, e) \ + MOVL b, SI; \ + ORL c, SI; \ + ANDL d, SI; \ + MOVL b, DI; \ + ANDL c, DI; \ + ORL SI, DI + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL DI, e; \ + MOVL a, SI; \ + ROLL $5, SI; \ + LEAL const(e)(SI*1), e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index, e); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +// func block(dig *digest, p []byte) +TEXT ·block(SB),NOSPLIT,$92-16 + MOVL dig+0(FP), BP + MOVL p+4(FP), SI + MOVL p_len+8(FP), DX + SHRL $6, DX + SHLL $6, DX + + LEAL (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + MOVL (4*4)(BP), BP + + CMPL SI, DI + JEQ end + + MOVL DI, 84(SP) + +loop: + MOVL SI, 88(SP) + + MOVL AX, 64(SP) + MOVL BX, 68(SP) + MOVL CX, 72(SP) + MOVL DX, 76(SP) + MOVL BP, 80(SP) + + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) + + ADDL 64(SP), AX + ADDL 68(SP), BX + ADDL 72(SP), CX + ADDL 76(SP), DX + ADDL 80(SP), BP + + MOVL 88(SP), SI + ADDL $64, SI + CMPL SI, 84(SP) + JB loop + +end: + MOVL dig+0(FP), DI + MOVL AX, (0*4)(DI) + MOVL BX, (1*4)(DI) + MOVL CX, (2*4)(DI) + MOVL DX, (3*4)(DI) + MOVL BP, (4*4)(DI) + RET diff --git a/src/crypto/sha1/sha1block_amd64.s b/src/crypto/sha1/sha1block_amd64.s new file mode 100644 index 000000000..4319df63e --- /dev/null +++ b/src/crypto/sha1/sha1block_amd64.s @@ -0,0 +1,216 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +#define LOAD(index) \ + MOVL (index*4)(SI), R10; \ + BSWAPL R10; \ + MOVL R10, (index*4)(SP) + +#define SHUFFLE(index) \ + MOVL (((index)&0xf)*4)(SP), R10; \ + XORL (((index-3)&0xf)*4)(SP), R10; \ + XORL (((index-8)&0xf)*4)(SP), R10; \ + XORL (((index-14)&0xf)*4)(SP), R10; \ + ROLL $1, R10; \ + MOVL R10, (((index)&0xf)*4)(SP) + +#define FUNC1(a, b, c, d, e) \ + MOVL d, R9; \ + XORL c, R9; \ + ANDL b, R9; \ + XORL d, R9 + +#define FUNC2(a, b, c, d, e) \ + MOVL b, R9; \ + XORL c, R9; \ + XORL d, R9 + +#define FUNC3(a, b, c, d, e) \ + MOVL b, R8; \ + ORL c, R8; \ + ANDL d, R8; \ + MOVL b, R9; \ + ANDL c, R9; \ + ORL R8, R9 + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL R9, e; \ + MOVL a, R8; \ + ROLL $5, R8; \ + LEAL const(e)(R10*1), e; \ + ADDL R8, e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +TEXT ·block(SB),NOSPLIT,$64-32 + MOVQ dig+0(FP), BP + MOVQ p_base+8(FP), SI + MOVQ p_len+16(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + MOVL (4*4)(BP), BP + + CMPQ SI, DI + JEQ end + +loop: + MOVL AX, R11 + MOVL BX, R12 + MOVL CX, R13 + MOVL DX, R14 + MOVL BP, R15 + + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) + + ADDL R11, AX + ADDL R12, BX + ADDL R13, CX + ADDL R14, DX + ADDL R15, BP + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + MOVQ dig+0(FP), DI + MOVL AX, (0*4)(DI) + MOVL BX, (1*4)(DI) + MOVL CX, (2*4)(DI) + MOVL DX, (3*4)(DI) + MOVL BP, (4*4)(DI) + RET diff --git a/src/crypto/sha1/sha1block_amd64p32.s b/src/crypto/sha1/sha1block_amd64p32.s new file mode 100644 index 000000000..d93fbf1ed --- /dev/null +++ b/src/crypto/sha1/sha1block_amd64p32.s @@ -0,0 +1,216 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. +// +// amd64p32 version. +// To ensure safety for Native Client, avoids use of BP and R15 +// as well as two-register addressing modes. + +#define LOAD(index) \ + MOVL (index*4)(SI), R10; \ + BSWAPL R10; \ + MOVL R10, (index*4)(SP) + +#define SHUFFLE(index) \ + MOVL (((index)&0xf)*4)(SP), R10; \ + XORL (((index-3)&0xf)*4)(SP), R10; \ + XORL (((index-8)&0xf)*4)(SP), R10; \ + XORL (((index-14)&0xf)*4)(SP), R10; \ + ROLL $1, R10; \ + MOVL R10, (((index)&0xf)*4)(SP) + +#define FUNC1(a, b, c, d, e) \ + MOVL d, R9; \ + XORL c, R9; \ + ANDL b, R9; \ + XORL d, R9 + +#define FUNC2(a, b, c, d, e) \ + MOVL b, R9; \ + XORL c, R9; \ + XORL d, R9 + +#define FUNC3(a, b, c, d, e) \ + MOVL b, R8; \ + ORL c, R8; \ + ANDL d, R8; \ + MOVL b, R9; \ + ANDL c, R9; \ + ORL R8, R9 + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL R9, e; \ + MOVL a, R8; \ + ROLL $5, R8; \ + LEAL const(e)(R10*1), e; \ + ADDL R8, e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +TEXT ·block(SB),NOSPLIT,$64-32 + MOVL dig+0(FP), R14 + MOVL p_base+4(FP), SI + MOVL p_len+8(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(R14), AX + MOVL (1*4)(R14), BX + MOVL (2*4)(R14), CX + MOVL (3*4)(R14), DX + MOVL (4*4)(R14), R13 + + CMPQ SI, DI + JEQ end + +loop: +#define BP R13 /* keep diff from sha1block_amd64.s small */ + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) +#undef BP + + ADDL (0*4)(R14), AX + ADDL (1*4)(R14), BX + ADDL (2*4)(R14), CX + ADDL (3*4)(R14), DX + ADDL (4*4)(R14), R13 + + MOVL AX, (0*4)(R14) + MOVL BX, (1*4)(R14) + MOVL CX, (2*4)(R14) + MOVL DX, (3*4)(R14) + MOVL R13, (4*4)(R14) + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + RET diff --git a/src/crypto/sha1/sha1block_arm.s b/src/crypto/sha1/sha1block_arm.s new file mode 100644 index 000000000..f11f33dc3 --- /dev/null +++ b/src/crypto/sha1/sha1block_arm.s @@ -0,0 +1,217 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// ARM version of md5block.go + +#include "textflag.h" + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +// Register definitions +data = 0 // Pointer to incoming data +const = 1 // Current constant for SHA round +a = 2 // SHA1 accumulator +b = 3 // SHA1 accumulator +c = 4 // SHA1 accumulator +d = 5 // SHA1 accumulator +e = 6 // SHA1 accumulator +t0 = 7 // Temporary +t1 = 8 // Temporary +// r9, r10 are forbidden +// r11 is OK provided you check the assembler that no synthetic instructions use it +t2 = 11 // Temporary +ctr = 12 // loop counter +w = 14 // point to w buffer + +// func block(dig *digest, p []byte) +// 0(FP) is *digest +// 4(FP) is p.array (struct Slice) +// 8(FP) is p.len +//12(FP) is p.cap +// +// Stack frame +p_end = -4 // -4(SP) pointer to the end of data +p_data = p_end - 4 // -8(SP) current data pointer +w_buf = p_data - 4*80 // -328(SP) 80 words temporary buffer w uint32[80] +saved = w_buf - 4*5 // -348(SP) saved sha1 registers a,b,c,d,e - these must be last +// Total size +4 for saved LR is 352 + + // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] + // e += w[i] +#define LOAD(e) \ + MOVBU 2(R(data)), R(t0) ; \ + MOVBU 3(R(data)), R(t1) ; \ + MOVBU 1(R(data)), R(t2) ; \ + ORR R(t0)<<8, R(t1), R(t0) ; \ + MOVBU.P 4(R(data)), R(t1) ; \ + ORR R(t2)<<16, R(t0), R(t0) ; \ + ORR R(t1)<<24, R(t0), R(t0) ; \ + MOVW.P R(t0), 4(R(w)) ; \ + ADD R(t0), R(e), R(e) + + // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + // w[i&0xf] = tmp<<1 | tmp>>(32-1) + // e += w[i&0xf] +#define SHUFFLE(e) \ + MOVW (-16*4)(R(w)), R(t0) ; \ + MOVW (-14*4)(R(w)), R(t1) ; \ + MOVW (-8*4)(R(w)), R(t2) ; \ + EOR R(t0), R(t1), R(t0) ; \ + MOVW (-3*4)(R(w)), R(t1) ; \ + EOR R(t2), R(t0), R(t0) ; \ + EOR R(t0), R(t1), R(t0) ; \ + MOVW R(t0)@>(32-1), R(t0) ; \ + MOVW.P R(t0), 4(R(w)) ; \ + ADD R(t0), R(e), R(e) + + // t1 = (b & c) | ((~b) & d) +#define FUNC1(a, b, c, d, e) \ + MVN R(b), R(t1) ; \ + AND R(b), R(c), R(t0) ; \ + AND R(d), R(t1), R(t1) ; \ + ORR R(t0), R(t1), R(t1) + + // t1 = b ^ c ^ d +#define FUNC2(a, b, c, d, e) \ + EOR R(b), R(c), R(t1) ; \ + EOR R(d), R(t1), R(t1) + + // t1 = (b & c) | (b & d) | (c & d) = + // t1 = (b & c) | ((b | c) & d) +#define FUNC3(a, b, c, d, e) \ + ORR R(b), R(c), R(t0) ; \ + AND R(b), R(c), R(t1) ; \ + AND R(d), R(t0), R(t0) ; \ + ORR R(t0), R(t1), R(t1) + +#define FUNC4 FUNC2 + + // a5 := a<<5 | a>>(32-5) + // b = b<<30 | b>>(32-30) + // e = a5 + t1 + e + const +#define MIX(a, b, c, d, e) \ + ADD R(t1), R(e), R(e) ; \ + MOVW R(b)@>(32-30), R(b) ; \ + ADD R(a)@>(32-5), R(e), R(e) ; \ + ADD R(const), R(e), R(e) + +#define ROUND1(a, b, c, d, e) \ + LOAD(e) ; \ + FUNC1(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND1x(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC1(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND2(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC2(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND3(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC3(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + +#define ROUND4(a, b, c, d, e) \ + SHUFFLE(e) ; \ + FUNC4(a, b, c, d, e) ; \ + MIX(a, b, c, d, e) + + +// func block(dig *digest, p []byte) +TEXT ·block(SB), 0, $352-16 + MOVW p+4(FP), R(data) // pointer to the data + MOVW p_len+8(FP), R(t0) // number of bytes + ADD R(data), R(t0) + MOVW R(t0), p_end(SP) // pointer to end of data + + // Load up initial SHA1 accumulator + MOVW dig+0(FP), R(t0) + MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)] + +loop: + // Save registers at SP+4 onwards + MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13) + + MOVW $w_buf(SP), R(w) + MOVW $0x5A827999, R(const) + MOVW $3, R(ctr) +loop1: ROUND1(a, b, c, d, e) + ROUND1(e, a, b, c, d) + ROUND1(d, e, a, b, c) + ROUND1(c, d, e, a, b) + ROUND1(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop1 + + ROUND1(a, b, c, d, e) + ROUND1x(e, a, b, c, d) + ROUND1x(d, e, a, b, c) + ROUND1x(c, d, e, a, b) + ROUND1x(b, c, d, e, a) + + MOVW $0x6ED9EBA1, R(const) + MOVW $4, R(ctr) +loop2: ROUND2(a, b, c, d, e) + ROUND2(e, a, b, c, d) + ROUND2(d, e, a, b, c) + ROUND2(c, d, e, a, b) + ROUND2(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop2 + + MOVW $0x8F1BBCDC, R(const) + MOVW $4, R(ctr) +loop3: ROUND3(a, b, c, d, e) + ROUND3(e, a, b, c, d) + ROUND3(d, e, a, b, c) + ROUND3(c, d, e, a, b) + ROUND3(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop3 + + MOVW $0xCA62C1D6, R(const) + MOVW $4, R(ctr) +loop4: ROUND4(a, b, c, d, e) + ROUND4(e, a, b, c, d) + ROUND4(d, e, a, b, c) + ROUND4(c, d, e, a, b) + ROUND4(b, c, d, e, a) + SUB.S $1, R(ctr) + BNE loop4 + + // Accumulate - restoring registers from SP+4 + MOVM.IB (R13), [R(t0),R(t1),R(t2),R(ctr),R(w)] + ADD R(t0), R(a) + ADD R(t1), R(b) + ADD R(t2), R(c) + ADD R(ctr), R(d) + ADD R(w), R(e) + + MOVW p_end(SP), R(t0) + CMP R(t0), R(data) + BLO loop + + // Save final SHA1 accumulator + MOVW dig+0(FP), R(t0) + MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0)) + + RET diff --git a/src/crypto/sha1/sha1block_decl.go b/src/crypto/sha1/sha1block_decl.go new file mode 100644 index 000000000..24e521af1 --- /dev/null +++ b/src/crypto/sha1/sha1block_decl.go @@ -0,0 +1,11 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 amd64p32 arm 386 + +package sha1 + +//go:noescape + +func block(dig *digest, p []byte) diff --git a/src/crypto/sha1/sha1block_generic.go b/src/crypto/sha1/sha1block_generic.go new file mode 100644 index 000000000..696e26b62 --- /dev/null +++ b/src/crypto/sha1/sha1block_generic.go @@ -0,0 +1,9 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!amd64p32,!386,!arm + +package sha1 + +var block = blockGeneric |