diff options
Diffstat (limited to 'src/crypto/md5')
-rw-r--r-- | src/crypto/md5/example_test.go | 25 | ||||
-rw-r--r-- | src/crypto/md5/gen.go | 331 | ||||
-rw-r--r-- | src/crypto/md5/md5.go | 136 | ||||
-rw-r--r-- | src/crypto/md5/md5_test.go | 163 | ||||
-rw-r--r-- | src/crypto/md5/md5block.go | 265 | ||||
-rw-r--r-- | src/crypto/md5/md5block_386.s | 182 | ||||
-rw-r--r-- | src/crypto/md5/md5block_amd64.s | 179 | ||||
-rw-r--r-- | src/crypto/md5/md5block_amd64p32.s | 184 | ||||
-rw-r--r-- | src/crypto/md5/md5block_arm.s | 299 | ||||
-rw-r--r-- | src/crypto/md5/md5block_decl.go | 11 | ||||
-rw-r--r-- | src/crypto/md5/md5block_generic.go | 9 |
11 files changed, 1784 insertions, 0 deletions
diff --git a/src/crypto/md5/example_test.go b/src/crypto/md5/example_test.go new file mode 100644 index 000000000..d47bb4570 --- /dev/null +++ b/src/crypto/md5/example_test.go @@ -0,0 +1,25 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package md5_test + +import ( + "crypto/md5" + "fmt" + "io" +) + +func ExampleNew() { + h := md5.New() + io.WriteString(h, "The fog is getting thicker!") + io.WriteString(h, "And Leon's getting laaarger!") + fmt.Printf("%x", h.Sum(nil)) + // Output: e2c569be17396eca2a2e3c11578123ed +} + +func ExampleSum() { + data := []byte("These pretzels are making me thirsty.") + fmt.Printf("%x", md5.Sum(data)) + // Output: b0804ec967f48520697662a204f5fe72 +} diff --git a/src/crypto/md5/gen.go b/src/crypto/md5/gen.go new file mode 100644 index 000000000..8cd0a6358 --- /dev/null +++ b/src/crypto/md5/gen.go @@ -0,0 +1,331 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// This program generates md5block.go +// Invoke as +// +// go run gen.go [-full] -output md5block.go +// +// The -full flag causes the generated code to do a full +// (16x) unrolling instead of a 4x unrolling. + +package main + +import ( + "bytes" + "flag" + "go/format" + "io/ioutil" + "log" + "strings" + "text/template" +) + +var filename = flag.String("output", "md5block.go", "output file name") + +func main() { + flag.Parse() + + var buf bytes.Buffer + + t := template.Must(template.New("main").Funcs(funcs).Parse(program)) + if err := t.Execute(&buf, data); err != nil { + log.Fatal(err) + } + + data, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatal(err) + } + err = ioutil.WriteFile(*filename, data, 0644) + if err != nil { + log.Fatal(err) + } +} + +type Data struct { + a, b, c, d string + Shift1 []int + Shift2 []int + Shift3 []int + Shift4 []int + Table1 []uint32 + Table2 []uint32 + Table3 []uint32 + Table4 []uint32 + Full bool +} + +var funcs = template.FuncMap{ + "dup": dup, + "relabel": relabel, + "rotate": rotate, +} + +func dup(count int, x []int) []int { + var out []int + for i := 0; i < count; i++ { + out = append(out, x...) + } + return out +} + +func relabel(s string) string { + return strings.NewReplacer("a", data.a, "b", data.b, "c", data.c, "d", data.d).Replace(s) +} + +func rotate() string { + data.a, data.b, data.c, data.d = data.d, data.a, data.b, data.c + return "" // no output +} + +func init() { + flag.BoolVar(&data.Full, "full", false, "complete unrolling") +} + +var data = Data{ + a: "a", + b: "b", + c: "c", + d: "d", + Shift1: []int{7, 12, 17, 22}, + Shift2: []int{5, 9, 14, 20}, + Shift3: []int{4, 11, 16, 23}, + Shift4: []int{6, 10, 15, 21}, + + // table[i] = int((1<<32) * abs(sin(i+1 radians))). + Table1: []uint32{ + // round 1 + 0xd76aa478, + 0xe8c7b756, + 0x242070db, + 0xc1bdceee, + 0xf57c0faf, + 0x4787c62a, + 0xa8304613, + 0xfd469501, + 0x698098d8, + 0x8b44f7af, + 0xffff5bb1, + 0x895cd7be, + 0x6b901122, + 0xfd987193, + 0xa679438e, + 0x49b40821, + }, + Table2: []uint32{ + // round 2 + 0xf61e2562, + 0xc040b340, + 0x265e5a51, + 0xe9b6c7aa, + 0xd62f105d, + 0x2441453, + 0xd8a1e681, + 0xe7d3fbc8, + 0x21e1cde6, + 0xc33707d6, + 0xf4d50d87, + 0x455a14ed, + 0xa9e3e905, + 0xfcefa3f8, + 0x676f02d9, + 0x8d2a4c8a, + }, + Table3: []uint32{ + // round3 + 0xfffa3942, + 0x8771f681, + 0x6d9d6122, + 0xfde5380c, + 0xa4beea44, + 0x4bdecfa9, + 0xf6bb4b60, + 0xbebfbc70, + 0x289b7ec6, + 0xeaa127fa, + 0xd4ef3085, + 0x4881d05, + 0xd9d4d039, + 0xe6db99e5, + 0x1fa27cf8, + 0xc4ac5665, + }, + Table4: []uint32{ + // round 4 + 0xf4292244, + 0x432aff97, + 0xab9423a7, + 0xfc93a039, + 0x655b59c3, + 0x8f0ccc92, + 0xffeff47d, + 0x85845dd1, + 0x6fa87e4f, + 0xfe2ce6e0, + 0xa3014314, + 0x4e0811a1, + 0xf7537e82, + 0xbd3af235, + 0x2ad7d2bb, + 0xeb86d391, + }, +} + +var program = `// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT. +// Generate with: go run gen.go{{if .Full}} -full{{end}} -output md5block.go + +package md5 + +import ( + "unsafe" + "runtime" +) + +{{if not .Full}} + var t1 = [...]uint32{ + {{range .Table1}}{{printf "\t%#x,\n" .}}{{end}} + } + + var t2 = [...]uint32{ + {{range .Table2}}{{printf "\t%#x,\n" .}}{{end}} + } + + var t3 = [...]uint32{ + {{range .Table3}}{{printf "\t%#x,\n" .}}{{end}} + } + + var t4 = [...]uint32{ + {{range .Table4}}{{printf "\t%#x,\n" .}}{{end}} + } +{{end}} + +const x86 = runtime.GOARCH == "amd64" || runtime.GOARCH == "386" + +var littleEndian bool + +func init() { + x := uint32(0x04030201) + y := [4]byte{0x1, 0x2, 0x3, 0x4} + littleEndian = *(*[4]byte)(unsafe.Pointer(&x)) == y +} + +func blockGeneric(dig *digest, p []byte) { + a := dig.s[0] + b := dig.s[1] + c := dig.s[2] + d := dig.s[3] + var X *[16]uint32 + var xbuf [16]uint32 + for len(p) >= chunk { + aa, bb, cc, dd := a, b, c, d + + // This is a constant condition - it is not evaluated on each iteration. + if x86 { + // MD5 was designed so that x86 processors can just iterate + // over the block data directly as uint32s, and we generate + // less code and run 1.3x faster if we take advantage of that. + // My apologies. + X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else if littleEndian && uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else { + X = &xbuf + j := 0 + for i := 0; i < 16; i++ { + X[i&15] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 + j += 4 + } + } + + {{if .Full}} + // Round 1. + {{range $i, $s := dup 4 .Shift1}} + {{index $.Table1 $i | printf "a += (((c^d)&b)^d) + X[%d] + %d" $i | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + {{rotate}} + {{end}} + + // Round 2. + {{range $i, $s := dup 4 .Shift2}} + {{index $.Table2 $i | printf "a += (((b^c)&d)^c) + X[(1+5*%d)&15] + %d" $i | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + {{rotate}} + {{end}} + + // Round 3. + {{range $i, $s := dup 4 .Shift3}} + {{index $.Table3 $i | printf "a += (b^c^d) + X[(5+3*%d)&15] + %d" $i | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + {{rotate}} + {{end}} + + // Round 4. + {{range $i, $s := dup 4 .Shift4}} + {{index $.Table4 $i | printf "a += (c^(b|^d)) + X[(7*%d)&15] + %d" $i | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + {{rotate}} + {{end}} + {{else}} + // Round 1. + for i := uint(0); i < 16; { + {{range $s := .Shift1}} + {{printf "a += (((c^d)&b)^d) + X[i&15] + t1[i&15]" | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + i++ + {{rotate}} + {{end}} + } + + // Round 2. + for i := uint(0); i < 16; { + {{range $s := .Shift2}} + {{printf "a += (((b^c)&d)^c) + X[(1+5*i)&15] + t2[i&15]" | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + i++ + {{rotate}} + {{end}} + } + + // Round 3. + for i := uint(0); i < 16; { + {{range $s := .Shift3}} + {{printf "a += (b^c^d) + X[(5+3*i)&15] + t3[i&15]" | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + i++ + {{rotate}} + {{end}} + } + + // Round 4. + for i := uint(0); i < 16; { + {{range $s := .Shift4}} + {{printf "a += (c^(b|^d)) + X[(7*i)&15] + t4[i&15]" | relabel}} + {{printf "a = a<<%d | a>>(32-%d) + b" $s $s | relabel}} + i++ + {{rotate}} + {{end}} + } + {{end}} + + a += aa + b += bb + c += cc + d += dd + + p = p[chunk:] + } + + dig.s[0] = a + dig.s[1] = b + dig.s[2] = c + dig.s[3] = d +} +` diff --git a/src/crypto/md5/md5.go b/src/crypto/md5/md5.go new file mode 100644 index 000000000..8c50c6d0b --- /dev/null +++ b/src/crypto/md5/md5.go @@ -0,0 +1,136 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen.go -full -output md5block.go + +// Package md5 implements the MD5 hash algorithm as defined in RFC 1321. +package md5 + +import ( + "crypto" + "hash" +) + +func init() { + crypto.RegisterHash(crypto.MD5, New) +} + +// The size of an MD5 checksum in bytes. +const Size = 16 + +// The blocksize of MD5 in bytes. +const BlockSize = 64 + +const ( + chunk = 64 + init0 = 0x67452301 + init1 = 0xEFCDAB89 + init2 = 0x98BADCFE + init3 = 0x10325476 +) + +// digest represents the partial evaluation of a checksum. +type digest struct { + s [4]uint32 + x [chunk]byte + nx int + len uint64 +} + +func (d *digest) Reset() { + d.s[0] = init0 + d.s[1] = init1 + d.s[2] = init2 + d.s[3] = init3 + d.nx = 0 + d.len = 0 +} + +// New returns a new hash.Hash computing the MD5 checksum. +func New() hash.Hash { + d := new(digest) + d.Reset() + return d +} + +func (d *digest) Size() int { return Size } + +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Write(p []byte) (nn int, err error) { + nn = len(p) + d.len += uint64(nn) + if d.nx > 0 { + n := len(p) + if n > chunk-d.nx { + n = chunk - d.nx + } + for i := 0; i < n; i++ { + d.x[d.nx+i] = p[i] + } + d.nx += n + if d.nx == chunk { + block(d, d.x[0:chunk]) + d.nx = 0 + } + p = p[n:] + } + if len(p) >= chunk { + n := len(p) &^ (chunk - 1) + block(d, p[:n]) + p = p[n:] + } + if len(p) > 0 { + d.nx = copy(d.x[:], p) + } + return +} + +func (d0 *digest) Sum(in []byte) []byte { + // Make a copy of d0 so that caller can keep writing and summing. + d := *d0 + hash := d.checkSum() + return append(in, hash[:]...) +} + +func (d *digest) checkSum() [Size]byte { + // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. + len := d.len + var tmp [64]byte + tmp[0] = 0x80 + if len%64 < 56 { + d.Write(tmp[0 : 56-len%64]) + } else { + d.Write(tmp[0 : 64+56-len%64]) + } + + // Length in bits. + len <<= 3 + for i := uint(0); i < 8; i++ { + tmp[i] = byte(len >> (8 * i)) + } + d.Write(tmp[0:8]) + + if d.nx != 0 { + panic("d.nx != 0") + } + + var digest [Size]byte + for i, s := range d.s { + digest[i*4] = byte(s) + digest[i*4+1] = byte(s >> 8) + digest[i*4+2] = byte(s >> 16) + digest[i*4+3] = byte(s >> 24) + } + + return digest +} + +// Sum returns the MD5 checksum of the data. +func Sum(data []byte) [Size]byte { + var d digest + d.Reset() + d.Write(data) + return d.checkSum() +} diff --git a/src/crypto/md5/md5_test.go b/src/crypto/md5/md5_test.go new file mode 100644 index 000000000..e7faf4961 --- /dev/null +++ b/src/crypto/md5/md5_test.go @@ -0,0 +1,163 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package md5 + +import ( + "crypto/rand" + "fmt" + "io" + "testing" + "unsafe" +) + +type md5Test struct { + out string + in string +} + +var golden = []md5Test{ + {"d41d8cd98f00b204e9800998ecf8427e", ""}, + {"0cc175b9c0f1b6a831c399e269772661", "a"}, + {"187ef4436122d1cc2f40dc2b92f0eba0", "ab"}, + {"900150983cd24fb0d6963f7d28e17f72", "abc"}, + {"e2fc714c4727ee9395f324cd2e7f331f", "abcd"}, + {"ab56b4d92b40713acc5af89985d4b786", "abcde"}, + {"e80b5017098950fc58aad83c8c14978e", "abcdef"}, + {"7ac66c0f148de9519b8bd264312c4d64", "abcdefg"}, + {"e8dc4081b13434b45189a720b77b6818", "abcdefgh"}, + {"8aa99b1f439ff71293e95357bac6fd94", "abcdefghi"}, + {"a925576942e94b2ef57a066101b48876", "abcdefghij"}, + {"d747fc1719c7eacb84058196cfe56d57", "Discard medicine more than two years old."}, + {"bff2dcb37ef3a44ba43ab144768ca837", "He who has a shady past knows that nice guys finish last."}, + {"0441015ecb54a7342d017ed1bcfdbea5", "I wouldn't marry him with a ten foot pole."}, + {"9e3cac8e9e9757a60c3ea391130d3689", "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"}, + {"a0f04459b031f916a59a35cc482dc039", "The days of the digital watch are numbered. -Tom Stoppard"}, + {"e7a48e0fe884faf31475d2a04b1362cc", "Nepal premier won't resign."}, + {"637d2fe925c07c113800509964fb0e06", "For every action there is an equal and opposite government program."}, + {"834a8d18d5c6562119cf4c7f5086cb71", "His money is twice tainted: 'taint yours and 'taint mine."}, + {"de3a4d2fd6c73ec2db2abad23b444281", "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"}, + {"acf203f997e2cf74ea3aff86985aefaf", "It's a tiny change to the code and not completely disgusting. - Bob Manchek"}, + {"e1c1384cb4d2221dfdd7c795a4222c9a", "size: a.out: bad magic"}, + {"c90f3ddecc54f34228c063d7525bf644", "The major problem is with sendmail. -Mark Horton"}, + {"cdf7ab6c1fd49bd9933c43f3ea5af185", "Give me a rock, paper and scissors and I will move the world. CCFestoon"}, + {"83bc85234942fc883c063cbd7f0ad5d0", "If the enemy is within range, then so are you."}, + {"277cbe255686b48dd7e8f389394d9299", "It's well we cannot hear the screams/That we create in others' dreams."}, + {"fd3fb0a7ffb8af16603f3d3af98f8e1f", "You remind me of a TV show, but that's all right: I watch it anyway."}, + {"469b13a78ebf297ecda64d4723655154", "C is as portable as Stonehedge!!"}, + {"63eb3a2f466410104731c4b037600110", "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"}, + {"72c2ed7592debca1c90fc0100f931a2f", "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"}, + {"132f7619d33b523b1d9e5bd8e0928355", "How can you write a big system without C++? -Paul Glick"}, +} + +func TestGolden(t *testing.T) { + for i := 0; i < len(golden); i++ { + g := golden[i] + s := fmt.Sprintf("%x", Sum([]byte(g.in))) + if s != g.out { + t.Fatalf("Sum function: md5(%s) = %s want %s", g.in, s, g.out) + } + c := New() + buf := make([]byte, len(g.in)+4) + for j := 0; j < 3+4; j++ { + if j < 2 { + io.WriteString(c, g.in) + } else if j == 2 { + io.WriteString(c, g.in[0:len(g.in)/2]) + c.Sum(nil) + io.WriteString(c, g.in[len(g.in)/2:]) + } else if j > 2 { + // test unaligned write + buf = buf[1:] + copy(buf, g.in) + c.Write(buf[:len(g.in)]) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != g.out { + t.Fatalf("md5[%d](%s) = %s want %s", j, g.in, s, g.out) + } + c.Reset() + } + } +} + +func TestLarge(t *testing.T) { + const N = 10000 + ok := "2bb571599a4180e1d542f76904adc3df" // md5sum of "0123456789" * 1000 + block := make([]byte, 10004) + c := New() + for offset := 0; offset < 4; offset++ { + for i := 0; i < N; i++ { + block[offset+i] = '0' + byte(i%10) + } + for blockSize := 10; blockSize <= N; blockSize *= 10 { + blocks := N / blockSize + b := block[offset : offset+blockSize] + c.Reset() + for i := 0; i < blocks; i++ { + c.Write(b) + } + s := fmt.Sprintf("%x", c.Sum(nil)) + if s != ok { + t.Fatalf("md5 TestLarge offset=%d, blockSize=%d = %s want %s", offset, blockSize, s, ok) + } + } + } +} + +// Tests that blockGeneric (pure Go) and block (in assembly for amd64, 386, arm) match. +func TestBlockGeneric(t *testing.T) { + gen, asm := New().(*digest), New().(*digest) + buf := make([]byte, BlockSize*20) // arbitrary factor + rand.Read(buf) + blockGeneric(gen, buf) + block(asm, buf) + if *gen != *asm { + t.Error("block and blockGeneric resulted in different states") + } +} + +var bench = New() +var buf = make([]byte, 8192+1) +var sum = make([]byte, bench.Size()) + +func benchmarkSize(b *testing.B, size int, unaligned bool) { + b.SetBytes(int64(size)) + buf := buf + if unaligned { + if uintptr(unsafe.Pointer(&buf[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + buf = buf[1:] + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + bench.Reset() + bench.Write(buf[:size]) + bench.Sum(sum[:0]) + } +} + +func BenchmarkHash8Bytes(b *testing.B) { + benchmarkSize(b, 8, false) +} + +func BenchmarkHash1K(b *testing.B) { + benchmarkSize(b, 1024, false) +} + +func BenchmarkHash8K(b *testing.B) { + benchmarkSize(b, 8192, false) +} + +func BenchmarkHash8BytesUnaligned(b *testing.B) { + benchmarkSize(b, 8, true) +} + +func BenchmarkHash1KUnaligned(b *testing.B) { + benchmarkSize(b, 1024, true) +} + +func BenchmarkHash8KUnaligned(b *testing.B) { + benchmarkSize(b, 8192, true) +} diff --git a/src/crypto/md5/md5block.go b/src/crypto/md5/md5block.go new file mode 100644 index 000000000..64e1e7c1e --- /dev/null +++ b/src/crypto/md5/md5block.go @@ -0,0 +1,265 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DO NOT EDIT. +// Generate with: go run gen.go -full -output md5block.go + +package md5 + +import ( + "runtime" + "unsafe" +) + +const x86 = runtime.GOARCH == "amd64" || runtime.GOARCH == "386" + +var littleEndian bool + +func init() { + x := uint32(0x04030201) + y := [4]byte{0x1, 0x2, 0x3, 0x4} + littleEndian = *(*[4]byte)(unsafe.Pointer(&x)) == y +} + +func blockGeneric(dig *digest, p []byte) { + a := dig.s[0] + b := dig.s[1] + c := dig.s[2] + d := dig.s[3] + var X *[16]uint32 + var xbuf [16]uint32 + for len(p) >= chunk { + aa, bb, cc, dd := a, b, c, d + + // This is a constant condition - it is not evaluated on each iteration. + if x86 { + // MD5 was designed so that x86 processors can just iterate + // over the block data directly as uint32s, and we generate + // less code and run 1.3x faster if we take advantage of that. + // My apologies. + X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else if littleEndian && uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + X = (*[16]uint32)(unsafe.Pointer(&p[0])) + } else { + X = &xbuf + j := 0 + for i := 0; i < 16; i++ { + X[i&15] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 + j += 4 + } + } + + // Round 1. + + a += (((c ^ d) & b) ^ d) + X[0] + 3614090360 + a = a<<7 | a>>(32-7) + b + + d += (((b ^ c) & a) ^ c) + X[1] + 3905402710 + d = d<<12 | d>>(32-12) + a + + c += (((a ^ b) & d) ^ b) + X[2] + 606105819 + c = c<<17 | c>>(32-17) + d + + b += (((d ^ a) & c) ^ a) + X[3] + 3250441966 + b = b<<22 | b>>(32-22) + c + + a += (((c ^ d) & b) ^ d) + X[4] + 4118548399 + a = a<<7 | a>>(32-7) + b + + d += (((b ^ c) & a) ^ c) + X[5] + 1200080426 + d = d<<12 | d>>(32-12) + a + + c += (((a ^ b) & d) ^ b) + X[6] + 2821735955 + c = c<<17 | c>>(32-17) + d + + b += (((d ^ a) & c) ^ a) + X[7] + 4249261313 + b = b<<22 | b>>(32-22) + c + + a += (((c ^ d) & b) ^ d) + X[8] + 1770035416 + a = a<<7 | a>>(32-7) + b + + d += (((b ^ c) & a) ^ c) + X[9] + 2336552879 + d = d<<12 | d>>(32-12) + a + + c += (((a ^ b) & d) ^ b) + X[10] + 4294925233 + c = c<<17 | c>>(32-17) + d + + b += (((d ^ a) & c) ^ a) + X[11] + 2304563134 + b = b<<22 | b>>(32-22) + c + + a += (((c ^ d) & b) ^ d) + X[12] + 1804603682 + a = a<<7 | a>>(32-7) + b + + d += (((b ^ c) & a) ^ c) + X[13] + 4254626195 + d = d<<12 | d>>(32-12) + a + + c += (((a ^ b) & d) ^ b) + X[14] + 2792965006 + c = c<<17 | c>>(32-17) + d + + b += (((d ^ a) & c) ^ a) + X[15] + 1236535329 + b = b<<22 | b>>(32-22) + c + + // Round 2. + + a += (((b ^ c) & d) ^ c) + X[(1+5*0)&15] + 4129170786 + a = a<<5 | a>>(32-5) + b + + d += (((a ^ b) & c) ^ b) + X[(1+5*1)&15] + 3225465664 + d = d<<9 | d>>(32-9) + a + + c += (((d ^ a) & b) ^ a) + X[(1+5*2)&15] + 643717713 + c = c<<14 | c>>(32-14) + d + + b += (((c ^ d) & a) ^ d) + X[(1+5*3)&15] + 3921069994 + b = b<<20 | b>>(32-20) + c + + a += (((b ^ c) & d) ^ c) + X[(1+5*4)&15] + 3593408605 + a = a<<5 | a>>(32-5) + b + + d += (((a ^ b) & c) ^ b) + X[(1+5*5)&15] + 38016083 + d = d<<9 | d>>(32-9) + a + + c += (((d ^ a) & b) ^ a) + X[(1+5*6)&15] + 3634488961 + c = c<<14 | c>>(32-14) + d + + b += (((c ^ d) & a) ^ d) + X[(1+5*7)&15] + 3889429448 + b = b<<20 | b>>(32-20) + c + + a += (((b ^ c) & d) ^ c) + X[(1+5*8)&15] + 568446438 + a = a<<5 | a>>(32-5) + b + + d += (((a ^ b) & c) ^ b) + X[(1+5*9)&15] + 3275163606 + d = d<<9 | d>>(32-9) + a + + c += (((d ^ a) & b) ^ a) + X[(1+5*10)&15] + 4107603335 + c = c<<14 | c>>(32-14) + d + + b += (((c ^ d) & a) ^ d) + X[(1+5*11)&15] + 1163531501 + b = b<<20 | b>>(32-20) + c + + a += (((b ^ c) & d) ^ c) + X[(1+5*12)&15] + 2850285829 + a = a<<5 | a>>(32-5) + b + + d += (((a ^ b) & c) ^ b) + X[(1+5*13)&15] + 4243563512 + d = d<<9 | d>>(32-9) + a + + c += (((d ^ a) & b) ^ a) + X[(1+5*14)&15] + 1735328473 + c = c<<14 | c>>(32-14) + d + + b += (((c ^ d) & a) ^ d) + X[(1+5*15)&15] + 2368359562 + b = b<<20 | b>>(32-20) + c + + // Round 3. + + a += (b ^ c ^ d) + X[(5+3*0)&15] + 4294588738 + a = a<<4 | a>>(32-4) + b + + d += (a ^ b ^ c) + X[(5+3*1)&15] + 2272392833 + d = d<<11 | d>>(32-11) + a + + c += (d ^ a ^ b) + X[(5+3*2)&15] + 1839030562 + c = c<<16 | c>>(32-16) + d + + b += (c ^ d ^ a) + X[(5+3*3)&15] + 4259657740 + b = b<<23 | b>>(32-23) + c + + a += (b ^ c ^ d) + X[(5+3*4)&15] + 2763975236 + a = a<<4 | a>>(32-4) + b + + d += (a ^ b ^ c) + X[(5+3*5)&15] + 1272893353 + d = d<<11 | d>>(32-11) + a + + c += (d ^ a ^ b) + X[(5+3*6)&15] + 4139469664 + c = c<<16 | c>>(32-16) + d + + b += (c ^ d ^ a) + X[(5+3*7)&15] + 3200236656 + b = b<<23 | b>>(32-23) + c + + a += (b ^ c ^ d) + X[(5+3*8)&15] + 681279174 + a = a<<4 | a>>(32-4) + b + + d += (a ^ b ^ c) + X[(5+3*9)&15] + 3936430074 + d = d<<11 | d>>(32-11) + a + + c += (d ^ a ^ b) + X[(5+3*10)&15] + 3572445317 + c = c<<16 | c>>(32-16) + d + + b += (c ^ d ^ a) + X[(5+3*11)&15] + 76029189 + b = b<<23 | b>>(32-23) + c + + a += (b ^ c ^ d) + X[(5+3*12)&15] + 3654602809 + a = a<<4 | a>>(32-4) + b + + d += (a ^ b ^ c) + X[(5+3*13)&15] + 3873151461 + d = d<<11 | d>>(32-11) + a + + c += (d ^ a ^ b) + X[(5+3*14)&15] + 530742520 + c = c<<16 | c>>(32-16) + d + + b += (c ^ d ^ a) + X[(5+3*15)&15] + 3299628645 + b = b<<23 | b>>(32-23) + c + + // Round 4. + + a += (c ^ (b | ^d)) + X[(7*0)&15] + 4096336452 + a = a<<6 | a>>(32-6) + b + + d += (b ^ (a | ^c)) + X[(7*1)&15] + 1126891415 + d = d<<10 | d>>(32-10) + a + + c += (a ^ (d | ^b)) + X[(7*2)&15] + 2878612391 + c = c<<15 | c>>(32-15) + d + + b += (d ^ (c | ^a)) + X[(7*3)&15] + 4237533241 + b = b<<21 | b>>(32-21) + c + + a += (c ^ (b | ^d)) + X[(7*4)&15] + 1700485571 + a = a<<6 | a>>(32-6) + b + + d += (b ^ (a | ^c)) + X[(7*5)&15] + 2399980690 + d = d<<10 | d>>(32-10) + a + + c += (a ^ (d | ^b)) + X[(7*6)&15] + 4293915773 + c = c<<15 | c>>(32-15) + d + + b += (d ^ (c | ^a)) + X[(7*7)&15] + 2240044497 + b = b<<21 | b>>(32-21) + c + + a += (c ^ (b | ^d)) + X[(7*8)&15] + 1873313359 + a = a<<6 | a>>(32-6) + b + + d += (b ^ (a | ^c)) + X[(7*9)&15] + 4264355552 + d = d<<10 | d>>(32-10) + a + + c += (a ^ (d | ^b)) + X[(7*10)&15] + 2734768916 + c = c<<15 | c>>(32-15) + d + + b += (d ^ (c | ^a)) + X[(7*11)&15] + 1309151649 + b = b<<21 | b>>(32-21) + c + + a += (c ^ (b | ^d)) + X[(7*12)&15] + 4149444226 + a = a<<6 | a>>(32-6) + b + + d += (b ^ (a | ^c)) + X[(7*13)&15] + 3174756917 + d = d<<10 | d>>(32-10) + a + + c += (a ^ (d | ^b)) + X[(7*14)&15] + 718787259 + c = c<<15 | c>>(32-15) + d + + b += (d ^ (c | ^a)) + X[(7*15)&15] + 3951481745 + b = b<<21 | b>>(32-21) + c + + a += aa + b += bb + c += cc + d += dd + + p = p[chunk:] + } + + dig.s[0] = a + dig.s[1] = b + dig.s[2] = c + dig.s[3] = d +} diff --git a/src/crypto/md5/md5block_386.s b/src/crypto/md5/md5block_386.s new file mode 100644 index 000000000..8e426d148 --- /dev/null +++ b/src/crypto/md5/md5block_386.s @@ -0,0 +1,182 @@ +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 8a assembly, and adjusted for 386, +// by the Go Authors. + +#include "textflag.h" + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand <bevand_m (at) epita.fr> +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. + +#define ROUND1(a, b, c, d, index, const, shift) \ + XORL c, BP; \ + LEAL const(a)(DI*1), a; \ + ANDL b, BP; \ + XORL d, BP; \ + MOVL (index*4)(SI), DI; \ + ADDL BP, a; \ + ROLL $shift, a; \ + MOVL c, BP; \ + ADDL b, a + +#define ROUND2(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + MOVL d, DI; \ + ANDL b, DI; \ + MOVL d, BP; \ + NOTL BP; \ + ANDL c, BP; \ + ORL DI, BP; \ + MOVL (index*4)(SI),DI; \ + ADDL BP, a; \ + ROLL $shift, a; \ + ADDL b, a + +#define ROUND3(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + MOVL (index*4)(SI),DI; \ + XORL d, BP; \ + XORL b, BP; \ + ADDL BP, a; \ + ROLL $shift, a; \ + MOVL b, BP; \ + ADDL b, a + +#define ROUND4(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + ORL b, BP; \ + XORL c, BP; \ + ADDL BP, a; \ + MOVL (index*4)(SI),DI; \ + MOVL $0xffffffff, BP; \ + ROLL $shift, a; \ + XORL c, BP; \ + ADDL b, a + +TEXT ·block(SB),NOSPLIT,$24-16 + MOVL dig+0(FP), BP + MOVL p+4(FP), SI + MOVL p_len+8(FP), DX + SHRL $6, DX + SHLL $6, DX + + LEAL (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + + CMPL SI, DI + JEQ end + + MOVL DI, 16(SP) + +loop: + MOVL AX, 0(SP) + MOVL BX, 4(SP) + MOVL CX, 8(SP) + MOVL DX, 12(SP) + + MOVL (0*4)(SI), DI + MOVL DX, BP + + ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); + ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); + ROUND1(CX,DX,AX,BX, 3,0x242070db,17); + ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); + ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); + ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); + ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); + ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); + ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); + ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); + ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); + ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); + ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); + ROUND1(DX,AX,BX,CX,14,0xfd987193,12); + ROUND1(CX,DX,AX,BX,15,0xa679438e,17); + ROUND1(BX,CX,DX,AX, 0,0x49b40821,22); + + MOVL (1*4)(SI), DI + MOVL DX, BP + + ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); + ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); + ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); + ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); + ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); + ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); + ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); + ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); + ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); + ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); + ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); + ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); + ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); + ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); + ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); + ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20); + + MOVL (5*4)(SI), DI + MOVL CX, BP + + ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4); + ROUND3(DX,AX,BX,CX,11,0x8771f681,11); + ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); + ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); + ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); + ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); + ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); + ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); + ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); + ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); + ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); + ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); + ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); + ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); + ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); + ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); + + MOVL (0*4)(SI), DI + MOVL $0xffffffff, BP + XORL DX, BP + + ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); + ROUND4(DX,AX,BX,CX,14,0x432aff97,10); + ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); + ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); + ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); + ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); + ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); + ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); + ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); + ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); + ROUND4(CX,DX,AX,BX,13,0xa3014314,15); + ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); + ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); + ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); + ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); + ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); + + ADDL 0(SP), AX + ADDL 4(SP), BX + ADDL 8(SP), CX + ADDL 12(SP), DX + + ADDL $64, SI + CMPL SI, 16(SP) + JB loop + +end: + MOVL dig+0(FP), BP + MOVL AX, (0*4)(BP) + MOVL BX, (1*4)(BP) + MOVL CX, (2*4)(BP) + MOVL DX, (3*4)(BP) + RET diff --git a/src/crypto/md5/md5block_amd64.s b/src/crypto/md5/md5block_amd64.s new file mode 100644 index 000000000..a3ae7d97b --- /dev/null +++ b/src/crypto/md5/md5block_amd64.s @@ -0,0 +1,179 @@ +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 6a assembly by the Go Authors. + +#include "textflag.h" + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand <bevand_m (at) epita.fr> +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. + +TEXT ·block(SB),NOSPLIT,$0-32 + MOVQ dig+0(FP), BP + MOVQ p+8(FP), SI + MOVQ p_len+16(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + + CMPQ SI, DI + JEQ end + +loop: + MOVL AX, R12 + MOVL BX, R13 + MOVL CX, R14 + MOVL DX, R15 + + MOVL (0*4)(SI), R8 + MOVL DX, R9 + +#define ROUND1(a, b, c, d, index, const, shift) \ + XORL c, R9; \ + LEAL const(a)(R8*1), a; \ + ANDL b, R9; \ + XORL d, R9; \ + MOVL (index*4)(SI), R8; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL c, R9; \ + ADDL b, a + + ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); + ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); + ROUND1(CX,DX,AX,BX, 3,0x242070db,17); + ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); + ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); + ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); + ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); + ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); + ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); + ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); + ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); + ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); + ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); + ROUND1(DX,AX,BX,CX,14,0xfd987193,12); + ROUND1(CX,DX,AX,BX,15,0xa679438e,17); + ROUND1(BX,CX,DX,AX, 0,0x49b40821,22); + + MOVL (1*4)(SI), R8 + MOVL DX, R9 + MOVL DX, R10 + +#define ROUND2(a, b, c, d, index, const, shift) \ + NOTL R9; \ + LEAL const(a)(R8*1),a; \ + ANDL b, R10; \ + ANDL c, R9; \ + MOVL (index*4)(SI),R8; \ + ORL R9, R10; \ + MOVL c, R9; \ + ADDL R10, a; \ + MOVL c, R10; \ + ROLL $shift, a; \ + ADDL b, a + + ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); + ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); + ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); + ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); + ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); + ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); + ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); + ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); + ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); + ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); + ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); + ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); + ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); + ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); + ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); + ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20); + + MOVL (5*4)(SI), R8 + MOVL CX, R9 + +#define ROUND3(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + MOVL (index*4)(SI),R8; \ + XORL d, R9; \ + XORL b, R9; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL b, R9; \ + ADDL b, a + + ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4); + ROUND3(DX,AX,BX,CX,11,0x8771f681,11); + ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); + ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); + ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); + ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); + ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); + ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); + ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); + ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); + ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); + ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); + ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); + ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); + ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); + ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); + + MOVL (0*4)(SI), R8 + MOVL $0xffffffff, R9 + XORL DX, R9 + +#define ROUND4(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + ORL b, R9; \ + XORL c, R9; \ + ADDL R9, a; \ + MOVL (index*4)(SI),R8; \ + MOVL $0xffffffff, R9; \ + ROLL $shift, a; \ + XORL c, R9; \ + ADDL b, a + + ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); + ROUND4(DX,AX,BX,CX,14,0x432aff97,10); + ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); + ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); + ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); + ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); + ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); + ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); + ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); + ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); + ROUND4(CX,DX,AX,BX,13,0xa3014314,15); + ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); + ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); + ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); + ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); + ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); + + ADDL R12, AX + ADDL R13, BX + ADDL R14, CX + ADDL R15, DX + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + MOVL AX, (0*4)(BP) + MOVL BX, (1*4)(BP) + MOVL CX, (2*4)(BP) + MOVL DX, (3*4)(BP) + RET diff --git a/src/crypto/md5/md5block_amd64p32.s b/src/crypto/md5/md5block_amd64p32.s new file mode 100644 index 000000000..d918a67c5 --- /dev/null +++ b/src/crypto/md5/md5block_amd64p32.s @@ -0,0 +1,184 @@ +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 6a assembly by the Go Authors. +// +// Restrictions to make code safe for Native Client: +// replace BP with R11, reloaded before use at return. +// replace R15 with R11. + +#include "textflag.h" + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand <bevand_m (at) epita.fr> +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. + +TEXT ·block(SB),NOSPLIT,$0-32 + MOVL dig+0(FP), R11 + MOVL p+4(FP), SI + MOVL p_len+8(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(R11), AX + MOVL (1*4)(R11), BX + MOVL (2*4)(R11), CX + MOVL (3*4)(R11), DX + + CMPQ SI, DI + JEQ end + +loop: + MOVL AX, R12 + MOVL BX, R13 + MOVL CX, R14 + MOVL DX, R11 + + MOVL (0*4)(SI), R8 + MOVL DX, R9 + +#define ROUND1(a, b, c, d, index, const, shift) \ + XORL c, R9; \ + LEAL const(a)(R8*1), a; \ + ANDL b, R9; \ + XORL d, R9; \ + MOVL (index*4)(SI), R8; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL c, R9; \ + ADDL b, a + + ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); + ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); + ROUND1(CX,DX,AX,BX, 3,0x242070db,17); + ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); + ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); + ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); + ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); + ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); + ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); + ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); + ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); + ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); + ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); + ROUND1(DX,AX,BX,CX,14,0xfd987193,12); + ROUND1(CX,DX,AX,BX,15,0xa679438e,17); + ROUND1(BX,CX,DX,AX, 0,0x49b40821,22); + + MOVL (1*4)(SI), R8 + MOVL DX, R9 + MOVL DX, R10 + +#define ROUND2(a, b, c, d, index, const, shift) \ + NOTL R9; \ + LEAL const(a)(R8*1),a; \ + ANDL b, R10; \ + ANDL c, R9; \ + MOVL (index*4)(SI),R8; \ + ORL R9, R10; \ + MOVL c, R9; \ + ADDL R10, a; \ + MOVL c, R10; \ + ROLL $shift, a; \ + ADDL b, a + + ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); + ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); + ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); + ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); + ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); + ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); + ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); + ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); + ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); + ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); + ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); + ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); + ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); + ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); + ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); + ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20); + + MOVL (5*4)(SI), R8 + MOVL CX, R9 + +#define ROUND3(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + MOVL (index*4)(SI),R8; \ + XORL d, R9; \ + XORL b, R9; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL b, R9; \ + ADDL b, a + + ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4); + ROUND3(DX,AX,BX,CX,11,0x8771f681,11); + ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); + ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); + ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); + ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); + ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); + ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); + ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); + ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); + ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); + ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); + ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); + ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); + ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); + ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); + + MOVL (0*4)(SI), R8 + MOVL $0xffffffff, R9 + XORL DX, R9 + +#define ROUND4(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + ORL b, R9; \ + XORL c, R9; \ + ADDL R9, a; \ + MOVL (index*4)(SI),R8; \ + MOVL $0xffffffff, R9; \ + ROLL $shift, a; \ + XORL c, R9; \ + ADDL b, a + + ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); + ROUND4(DX,AX,BX,CX,14,0x432aff97,10); + ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); + ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); + ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); + ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); + ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); + ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); + ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); + ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); + ROUND4(CX,DX,AX,BX,13,0xa3014314,15); + ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); + ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); + ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); + ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); + ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); + + ADDL R12, AX + ADDL R13, BX + ADDL R14, CX + ADDL R11, DX + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + MOVL dig+0(FP), R11 + MOVL AX, (0*4)(R11) + MOVL BX, (1*4)(R11) + MOVL CX, (2*4)(R11) + MOVL DX, (3*4)(R11) + RET diff --git a/src/crypto/md5/md5block_arm.s b/src/crypto/md5/md5block_arm.s new file mode 100644 index 000000000..3b26e549b --- /dev/null +++ b/src/crypto/md5/md5block_arm.s @@ -0,0 +1,299 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// ARM version of md5block.go + +#include "textflag.h" + +// Register definitions +table = 0 // Pointer to MD5 constants table +data = 1 // Pointer to data to hash +a = 2 // MD5 accumulator +b = 3 // MD5 accumulator +c = 4 // MD5 accumulator +d = 5 // MD5 accumulator +c0 = 6 // MD5 constant +c1 = 7 // MD5 constant +c2 = 8 // MD5 constant +// r9, r10 are forbidden +// r11 is OK provided you check the assembler that no synthetic instructions use it +c3 = 11 // MD5 constant +t0 = 12 // temporary +t1 = 14 // temporary + +// func block(dig *digest, p []byte) +// 0(FP) is *digest +// 4(FP) is p.array (struct Slice) +// 8(FP) is p.len +//12(FP) is p.cap +// +// Stack frame +p_end = -4 // -4(SP) pointer to the end of data +p_data = -8 // -8(SP) current data pointer +buf = -8-4*16 //-72(SP) 16 words temporary buffer + // 3 words at 4..12(R13) for called routine parameters + +TEXT ·block(SB), NOSPLIT, $84-16 + MOVW p+4(FP), R(data) // pointer to the data + MOVW p_len+8(FP), R(t0) // number of bytes + ADD R(data), R(t0) + MOVW R(t0), p_end(SP) // pointer to end of data + +loop: + MOVW R(data), p_data(SP) // Save R(data) + AND.S $3, R(data), R(t0) // TST $3, R(data) not working see issue 5921 + BEQ aligned // aligned detected - skip copy + + // Copy the unaligned source data into the aligned temporary buffer + // memove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers + MOVW $buf(SP), R(table) // to + MOVW $64, R(c0) // n + MOVM.IB [R(table),R(data),R(c0)], (R13) + BL runtime·memmove(SB) + + // Point to the local aligned copy of the data + MOVW $buf(SP), R(data) + +aligned: + // Point to the table of constants + // A PC relative add would be cheaper than this + MOVW $·table(SB), R(table) + + // Load up initial MD5 accumulator + MOVW dig+0(FP), R(c0) + MOVM.IA (R(c0)), [R(a),R(b),R(c),R(d)] + +// a += (((c^d)&b)^d) + X[index] + const +// a = a<<shift | a>>(32-shift) + b +#define ROUND1(a, b, c, d, index, shift, const) \ + EOR R(c), R(d), R(t0) ; \ + AND R(b), R(t0) ; \ + EOR R(d), R(t0) ; \ + MOVW (index<<2)(R(data)), R(t1) ; \ + ADD R(t1), R(t0) ; \ + ADD R(const), R(t0) ; \ + ADD R(t0), R(a) ; \ + ADD R(a)@>(32-shift), R(b), R(a) ; + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND1(a, b, c, d, 0, 7, c0) + ROUND1(d, a, b, c, 1, 12, c1) + ROUND1(c, d, a, b, 2, 17, c2) + ROUND1(b, c, d, a, 3, 22, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND1(a, b, c, d, 4, 7, c0) + ROUND1(d, a, b, c, 5, 12, c1) + ROUND1(c, d, a, b, 6, 17, c2) + ROUND1(b, c, d, a, 7, 22, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND1(a, b, c, d, 8, 7, c0) + ROUND1(d, a, b, c, 9, 12, c1) + ROUND1(c, d, a, b, 10, 17, c2) + ROUND1(b, c, d, a, 11, 22, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND1(a, b, c, d, 12, 7, c0) + ROUND1(d, a, b, c, 13, 12, c1) + ROUND1(c, d, a, b, 14, 17, c2) + ROUND1(b, c, d, a, 15, 22, c3) + +// a += (((b^c)&d)^c) + X[index] + const +// a = a<<shift | a>>(32-shift) + b +#define ROUND2(a, b, c, d, index, shift, const) \ + EOR R(b), R(c), R(t0) ; \ + AND R(d), R(t0) ; \ + EOR R(c), R(t0) ; \ + MOVW (index<<2)(R(data)), R(t1) ; \ + ADD R(t1), R(t0) ; \ + ADD R(const), R(t0) ; \ + ADD R(t0), R(a) ; \ + ADD R(a)@>(32-shift), R(b), R(a) ; + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND2(a, b, c, d, 1, 5, c0) + ROUND2(d, a, b, c, 6, 9, c1) + ROUND2(c, d, a, b, 11, 14, c2) + ROUND2(b, c, d, a, 0, 20, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND2(a, b, c, d, 5, 5, c0) + ROUND2(d, a, b, c, 10, 9, c1) + ROUND2(c, d, a, b, 15, 14, c2) + ROUND2(b, c, d, a, 4, 20, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND2(a, b, c, d, 9, 5, c0) + ROUND2(d, a, b, c, 14, 9, c1) + ROUND2(c, d, a, b, 3, 14, c2) + ROUND2(b, c, d, a, 8, 20, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND2(a, b, c, d, 13, 5, c0) + ROUND2(d, a, b, c, 2, 9, c1) + ROUND2(c, d, a, b, 7, 14, c2) + ROUND2(b, c, d, a, 12, 20, c3) + +// a += (b^c^d) + X[index] + const +// a = a<<shift | a>>(32-shift) + b +#define ROUND3(a, b, c, d, index, shift, const) \ + EOR R(b), R(c), R(t0) ; \ + EOR R(d), R(t0) ; \ + MOVW (index<<2)(R(data)), R(t1) ; \ + ADD R(t1), R(t0) ; \ + ADD R(const), R(t0) ; \ + ADD R(t0), R(a) ; \ + ADD R(a)@>(32-shift), R(b), R(a) ; + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND3(a, b, c, d, 5, 4, c0) + ROUND3(d, a, b, c, 8, 11, c1) + ROUND3(c, d, a, b, 11, 16, c2) + ROUND3(b, c, d, a, 14, 23, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND3(a, b, c, d, 1, 4, c0) + ROUND3(d, a, b, c, 4, 11, c1) + ROUND3(c, d, a, b, 7, 16, c2) + ROUND3(b, c, d, a, 10, 23, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND3(a, b, c, d, 13, 4, c0) + ROUND3(d, a, b, c, 0, 11, c1) + ROUND3(c, d, a, b, 3, 16, c2) + ROUND3(b, c, d, a, 6, 23, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND3(a, b, c, d, 9, 4, c0) + ROUND3(d, a, b, c, 12, 11, c1) + ROUND3(c, d, a, b, 15, 16, c2) + ROUND3(b, c, d, a, 2, 23, c3) + +// a += (c^(b|^d)) + X[index] + const +// a = a<<shift | a>>(32-shift) + b +#define ROUND4(a, b, c, d, index, shift, const) \ + MVN R(d), R(t0) ; \ + ORR R(b), R(t0) ; \ + EOR R(c), R(t0) ; \ + MOVW (index<<2)(R(data)), R(t1) ; \ + ADD R(t1), R(t0) ; \ + ADD R(const), R(t0) ; \ + ADD R(t0), R(a) ; \ + ADD R(a)@>(32-shift), R(b), R(a) ; + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND4(a, b, c, d, 0, 6, c0) + ROUND4(d, a, b, c, 7, 10, c1) + ROUND4(c, d, a, b, 14, 15, c2) + ROUND4(b, c, d, a, 5, 21, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND4(a, b, c, d, 12, 6, c0) + ROUND4(d, a, b, c, 3, 10, c1) + ROUND4(c, d, a, b, 10, 15, c2) + ROUND4(b, c, d, a, 1, 21, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND4(a, b, c, d, 8, 6, c0) + ROUND4(d, a, b, c, 15, 10, c1) + ROUND4(c, d, a, b, 6, 15, c2) + ROUND4(b, c, d, a, 13, 21, c3) + + MOVM.IA.W (R(table)), [R(c0),R(c1),R(c2),R(c3)] + ROUND4(a, b, c, d, 4, 6, c0) + ROUND4(d, a, b, c, 11, 10, c1) + ROUND4(c, d, a, b, 2, 15, c2) + ROUND4(b, c, d, a, 9, 21, c3) + + MOVW dig+0(FP), R(t0) + MOVM.IA (R(t0)), [R(c0),R(c1),R(c2),R(c3)] + + ADD R(c0), R(a) + ADD R(c1), R(b) + ADD R(c2), R(c) + ADD R(c3), R(d) + + MOVM.IA [R(a),R(b),R(c),R(d)], (R(t0)) + + MOVW p_data(SP), R(data) + MOVW p_end(SP), R(t0) + ADD $64, R(data) + CMP R(t0), R(data) + BLO loop + + RET + +// MD5 constants table + + // Round 1 + DATA ·table+0x00(SB)/4, $0xd76aa478 + DATA ·table+0x04(SB)/4, $0xe8c7b756 + DATA ·table+0x08(SB)/4, $0x242070db + DATA ·table+0x0c(SB)/4, $0xc1bdceee + DATA ·table+0x10(SB)/4, $0xf57c0faf + DATA ·table+0x14(SB)/4, $0x4787c62a + DATA ·table+0x18(SB)/4, $0xa8304613 + DATA ·table+0x1c(SB)/4, $0xfd469501 + DATA ·table+0x20(SB)/4, $0x698098d8 + DATA ·table+0x24(SB)/4, $0x8b44f7af + DATA ·table+0x28(SB)/4, $0xffff5bb1 + DATA ·table+0x2c(SB)/4, $0x895cd7be + DATA ·table+0x30(SB)/4, $0x6b901122 + DATA ·table+0x34(SB)/4, $0xfd987193 + DATA ·table+0x38(SB)/4, $0xa679438e + DATA ·table+0x3c(SB)/4, $0x49b40821 + // Round 2 + DATA ·table+0x40(SB)/4, $0xf61e2562 + DATA ·table+0x44(SB)/4, $0xc040b340 + DATA ·table+0x48(SB)/4, $0x265e5a51 + DATA ·table+0x4c(SB)/4, $0xe9b6c7aa + DATA ·table+0x50(SB)/4, $0xd62f105d + DATA ·table+0x54(SB)/4, $0x02441453 + DATA ·table+0x58(SB)/4, $0xd8a1e681 + DATA ·table+0x5c(SB)/4, $0xe7d3fbc8 + DATA ·table+0x60(SB)/4, $0x21e1cde6 + DATA ·table+0x64(SB)/4, $0xc33707d6 + DATA ·table+0x68(SB)/4, $0xf4d50d87 + DATA ·table+0x6c(SB)/4, $0x455a14ed + DATA ·table+0x70(SB)/4, $0xa9e3e905 + DATA ·table+0x74(SB)/4, $0xfcefa3f8 + DATA ·table+0x78(SB)/4, $0x676f02d9 + DATA ·table+0x7c(SB)/4, $0x8d2a4c8a + // Round 3 + DATA ·table+0x80(SB)/4, $0xfffa3942 + DATA ·table+0x84(SB)/4, $0x8771f681 + DATA ·table+0x88(SB)/4, $0x6d9d6122 + DATA ·table+0x8c(SB)/4, $0xfde5380c + DATA ·table+0x90(SB)/4, $0xa4beea44 + DATA ·table+0x94(SB)/4, $0x4bdecfa9 + DATA ·table+0x98(SB)/4, $0xf6bb4b60 + DATA ·table+0x9c(SB)/4, $0xbebfbc70 + DATA ·table+0xa0(SB)/4, $0x289b7ec6 + DATA ·table+0xa4(SB)/4, $0xeaa127fa + DATA ·table+0xa8(SB)/4, $0xd4ef3085 + DATA ·table+0xac(SB)/4, $0x04881d05 + DATA ·table+0xb0(SB)/4, $0xd9d4d039 + DATA ·table+0xb4(SB)/4, $0xe6db99e5 + DATA ·table+0xb8(SB)/4, $0x1fa27cf8 + DATA ·table+0xbc(SB)/4, $0xc4ac5665 + // Round 4 + DATA ·table+0xc0(SB)/4, $0xf4292244 + DATA ·table+0xc4(SB)/4, $0x432aff97 + DATA ·table+0xc8(SB)/4, $0xab9423a7 + DATA ·table+0xcc(SB)/4, $0xfc93a039 + DATA ·table+0xd0(SB)/4, $0x655b59c3 + DATA ·table+0xd4(SB)/4, $0x8f0ccc92 + DATA ·table+0xd8(SB)/4, $0xffeff47d + DATA ·table+0xdc(SB)/4, $0x85845dd1 + DATA ·table+0xe0(SB)/4, $0x6fa87e4f + DATA ·table+0xe4(SB)/4, $0xfe2ce6e0 + DATA ·table+0xe8(SB)/4, $0xa3014314 + DATA ·table+0xec(SB)/4, $0x4e0811a1 + DATA ·table+0xf0(SB)/4, $0xf7537e82 + DATA ·table+0xf4(SB)/4, $0xbd3af235 + DATA ·table+0xf8(SB)/4, $0x2ad7d2bb + DATA ·table+0xfc(SB)/4, $0xeb86d391 + // Global definition + GLOBL ·table(SB),8,$256 diff --git a/src/crypto/md5/md5block_decl.go b/src/crypto/md5/md5block_decl.go new file mode 100644 index 000000000..d7956a6d2 --- /dev/null +++ b/src/crypto/md5/md5block_decl.go @@ -0,0 +1,11 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 amd64p32 386 arm + +package md5 + +//go:noescape + +func block(dig *digest, p []byte) diff --git a/src/crypto/md5/md5block_generic.go b/src/crypto/md5/md5block_generic.go new file mode 100644 index 000000000..263463e51 --- /dev/null +++ b/src/crypto/md5/md5block_generic.go @@ -0,0 +1,9 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!amd64p32,!386,!arm + +package md5 + +var block = blockGeneric |