diff options
Diffstat (limited to 'src/pkg/crypto')
27 files changed, 1165 insertions, 97 deletions
diff --git a/src/pkg/crypto/cipher/example_test.go b/src/pkg/crypto/cipher/example_test.go index e0027cac2..373f6791b 100644 --- a/src/pkg/crypto/cipher/example_test.go +++ b/src/pkg/crypto/cipher/example_test.go @@ -233,7 +233,7 @@ func ExampleStreamReader() { } defer outFile.Close() - reader := &cipher.StreamReader{stream, inFile} + reader := &cipher.StreamReader{S: stream, R: inFile} // Copy the input file to the output file, decrypting as we go. if _, err := io.Copy(outFile, reader); err != nil { panic(err) @@ -270,7 +270,7 @@ func ExampleStreamWriter() { } defer outFile.Close() - writer := &cipher.StreamWriter{stream, outFile, nil} + writer := &cipher.StreamWriter{S: stream, W: outFile} // Copy the input file to the output file, encrypting as we go. if _, err := io.Copy(writer, inFile); err != nil { panic(err) diff --git a/src/pkg/crypto/dsa/dsa.go b/src/pkg/crypto/dsa/dsa.go index 05766a2f1..5a2a65744 100644 --- a/src/pkg/crypto/dsa/dsa.go +++ b/src/pkg/crypto/dsa/dsa.go @@ -144,8 +144,6 @@ GeneratePrimes: params.G = g return } - - panic("unreachable") } // GenerateKey generates a public&private key pair. The Parameters of the diff --git a/src/pkg/crypto/dsa/dsa_test.go b/src/pkg/crypto/dsa/dsa_test.go index 177aa444d..568416d0d 100644 --- a/src/pkg/crypto/dsa/dsa_test.go +++ b/src/pkg/crypto/dsa/dsa_test.go @@ -63,8 +63,9 @@ func testParameterGeneration(t *testing.T, sizes ParameterSizes, L, N int) { } func TestParameterGeneration(t *testing.T) { - // This test is too slow to run all the time. - return + if testing.Short() { + t.Skip("skipping parameter generation test in short mode") + } testParameterGeneration(t, L1024N160, 1024, 160) testParameterGeneration(t, L2048N224, 2048, 224) diff --git a/src/pkg/crypto/ecdsa/ecdsa.go b/src/pkg/crypto/ecdsa/ecdsa.go index 512d20c63..255000229 100644 --- a/src/pkg/crypto/ecdsa/ecdsa.go +++ b/src/pkg/crypto/ecdsa/ecdsa.go @@ -49,7 +49,7 @@ func randFieldElement(c elliptic.Curve, rand io.Reader) (k *big.Int, err error) return } -// GenerateKey generates a public&private key pair. +// GenerateKey generates a public and private key pair. func GenerateKey(c elliptic.Curve, rand io.Reader) (priv *PrivateKey, err error) { k, err := randFieldElement(c, rand) if err != nil { diff --git a/src/pkg/crypto/md5/gen.go b/src/pkg/crypto/md5/gen.go index 966bdae26..275b4aeea 100644 --- a/src/pkg/crypto/md5/gen.go +++ b/src/pkg/crypto/md5/gen.go @@ -161,6 +161,11 @@ var data = Data{ } var program = ` +// DO NOT EDIT. +// Generate with: go run gen.go{{if .Full}} -full{{end}} | gofmt >md5block.go + +// +build !amd64 + package md5 import ( @@ -186,6 +191,16 @@ import ( } {{end}} +const x86 = runtime.GOARCH == "amd64" || runtime.GOARCH == "386" + +var littleEndian bool + +func init() { + x := uint32(0x04030201) + y := [4]byte{0x1, 0x2, 0x3, 0x4} + littleEndian = *(*[4]byte)(unsafe.Pointer(&x)) == y +} + func block(dig *digest, p []byte) { a := dig.s[0] b := dig.s[1] @@ -197,13 +212,13 @@ func block(dig *digest, p []byte) { aa, bb, cc, dd := a, b, c, d // This is a constant condition - it is not evaluated on each iteration. - if runtime.GOARCH == "amd64" || runtime.GOARCH == "386" { + if x86 { // MD5 was designed so that x86 processors can just iterate // over the block data directly as uint32s, and we generate // less code and run 1.3x faster if we take advantage of that. // My apologies. X = (*[16]uint32)(unsafe.Pointer(&p[0])) - } else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { + } else if littleEndian && uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 { X = (*[16]uint32)(unsafe.Pointer(&p[0])) } else { X = &xbuf diff --git a/src/pkg/crypto/md5/md5block.go b/src/pkg/crypto/md5/md5block.go index 0ca421774..a376fbee9 100644 --- a/src/pkg/crypto/md5/md5block.go +++ b/src/pkg/crypto/md5/md5block.go @@ -1,3 +1,8 @@ +// DO NOT EDIT. +// Generate with: go run gen.go -full | gofmt >md5block.go + +// +build !amd64,!386 + package md5 import ( diff --git a/src/pkg/crypto/md5/md5block_386.s b/src/pkg/crypto/md5/md5block_386.s new file mode 100644 index 000000000..3ce15e37f --- /dev/null +++ b/src/pkg/crypto/md5/md5block_386.s @@ -0,0 +1,180 @@ +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 8a assembly, and adjusted for 386, +// by the Go Authors. + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand <bevand_m (at) epita.fr> +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. + +#define ROUND1(a, b, c, d, index, const, shift) \ + XORL c, BP; \ + LEAL const(a)(DI*1), a; \ + ANDL b, BP; \ + XORL d, BP; \ + MOVL (index*4)(SI), DI; \ + ADDL BP, a; \ + ROLL $shift, a; \ + MOVL c, BP; \ + ADDL b, a + +#define ROUND2(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + MOVL d, DI; \ + ANDL b, DI; \ + MOVL d, BP; \ + NOTL BP; \ + ANDL c, BP; \ + ORL DI, BP; \ + MOVL (index*4)(SI),DI; \ + ADDL BP, a; \ + ROLL $shift, a; \ + ADDL b, a + +#define ROUND3(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + MOVL (index*4)(SI),DI; \ + XORL d, BP; \ + XORL b, BP; \ + ADDL BP, a; \ + ROLL $shift, a; \ + MOVL b, BP; \ + ADDL b, a + +#define ROUND4(a, b, c, d, index, const, shift) \ + LEAL const(a)(DI*1),a; \ + ORL b, BP; \ + XORL c, BP; \ + ADDL BP, a; \ + MOVL (index*4)(SI),DI; \ + MOVL $0xffffffff, BP; \ + ROLL $shift, a; \ + XORL c, BP; \ + ADDL b, a + +TEXT ·block(SB),7,$24-16 + MOVL dig+0(FP), BP + MOVL p+4(FP), SI + MOVL p_len+8(FP), DX + SHRL $6, DX + SHLL $6, DX + + LEAL (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + + CMPL SI, DI + JEQ end + + MOVL DI, 16(SP) + +loop: + MOVL AX, 0(SP) + MOVL BX, 4(SP) + MOVL CX, 8(SP) + MOVL DX, 12(SP) + + MOVL (0*4)(SI), DI + MOVL DX, BP + + ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); + ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); + ROUND1(CX,DX,AX,BX, 3,0x242070db,17); + ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); + ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); + ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); + ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); + ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); + ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); + ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); + ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); + ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); + ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); + ROUND1(DX,AX,BX,CX,14,0xfd987193,12); + ROUND1(CX,DX,AX,BX,15,0xa679438e,17); + ROUND1(BX,CX,DX,AX, 0,0x49b40821,22); + + MOVL (1*4)(SI), DI + MOVL DX, BP + + ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); + ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); + ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); + ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); + ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); + ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); + ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); + ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); + ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); + ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); + ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); + ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); + ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); + ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); + ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); + ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20); + + MOVL (5*4)(SI), DI + MOVL CX, BP + + ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4); + ROUND3(DX,AX,BX,CX,11,0x8771f681,11); + ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); + ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); + ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); + ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); + ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); + ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); + ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); + ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); + ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); + ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); + ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); + ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); + ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); + ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); + + MOVL (0*4)(SI), DI + MOVL $0xffffffff, BP + XORL DX, BP + + ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); + ROUND4(DX,AX,BX,CX,14,0x432aff97,10); + ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); + ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); + ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); + ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); + ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); + ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); + ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); + ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); + ROUND4(CX,DX,AX,BX,13,0xa3014314,15); + ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); + ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); + ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); + ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); + ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); + + ADDL 0(SP), AX + ADDL 4(SP), BX + ADDL 8(SP), CX + ADDL 12(SP), DX + + ADDL $64, SI + CMPL SI, 16(SP) + JB loop + +end: + MOVL dig+0(FP), BP + MOVL AX, (0*4)(BP) + MOVL BX, (1*4)(BP) + MOVL CX, (2*4)(BP) + MOVL DX, (3*4)(BP) + RET diff --git a/src/pkg/crypto/md5/md5block_amd64.s b/src/pkg/crypto/md5/md5block_amd64.s new file mode 100644 index 000000000..e6420a28a --- /dev/null +++ b/src/pkg/crypto/md5/md5block_amd64.s @@ -0,0 +1,177 @@ +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 6a assembly by the Go Authors. + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand <bevand_m (at) epita.fr> +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. + +TEXT ·block(SB),7,$0-32 + MOVQ dig+0(FP), BP + MOVQ p+8(FP), SI + MOVQ p_len+16(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + + CMPQ SI, DI + JEQ end + +loop: + MOVL AX, R12 + MOVL BX, R13 + MOVL CX, R14 + MOVL DX, R15 + + MOVL (0*4)(SI), R8 + MOVL DX, R9 + +#define ROUND1(a, b, c, d, index, const, shift) \ + XORL c, R9; \ + LEAL const(a)(R8*1), a; \ + ANDL b, R9; \ + XORL d, R9; \ + MOVL (index*4)(SI), R8; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL c, R9; \ + ADDL b, a + + ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); + ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); + ROUND1(CX,DX,AX,BX, 3,0x242070db,17); + ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); + ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); + ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); + ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); + ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); + ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); + ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); + ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); + ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); + ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); + ROUND1(DX,AX,BX,CX,14,0xfd987193,12); + ROUND1(CX,DX,AX,BX,15,0xa679438e,17); + ROUND1(BX,CX,DX,AX, 0,0x49b40821,22); + + MOVL (1*4)(SI), R8 + MOVL DX, R9 + MOVL DX, R10 + +#define ROUND2(a, b, c, d, index, const, shift) \ + NOTL R9; \ + LEAL const(a)(R8*1),a; \ + ANDL b, R10; \ + ANDL c, R9; \ + MOVL (index*4)(SI),R8; \ + ORL R9, R10; \ + MOVL c, R9; \ + ADDL R10, a; \ + MOVL c, R10; \ + ROLL $shift, a; \ + ADDL b, a + + ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); + ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); + ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); + ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); + ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); + ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); + ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); + ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); + ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); + ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); + ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); + ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); + ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); + ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); + ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); + ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20); + + MOVL (5*4)(SI), R8 + MOVL CX, R9 + +#define ROUND3(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + MOVL (index*4)(SI),R8; \ + XORL d, R9; \ + XORL b, R9; \ + ADDL R9, a; \ + ROLL $shift, a; \ + MOVL b, R9; \ + ADDL b, a + + ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4); + ROUND3(DX,AX,BX,CX,11,0x8771f681,11); + ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); + ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); + ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); + ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); + ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); + ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); + ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); + ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); + ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); + ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); + ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); + ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); + ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); + ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); + + MOVL (0*4)(SI), R8 + MOVL $0xffffffff, R9 + XORL DX, R9 + +#define ROUND4(a, b, c, d, index, const, shift) \ + LEAL const(a)(R8*1),a; \ + ORL b, R9; \ + XORL c, R9; \ + ADDL R9, a; \ + MOVL (index*4)(SI),R8; \ + MOVL $0xffffffff, R9; \ + ROLL $shift, a; \ + XORL c, R9; \ + ADDL b, a + + ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); + ROUND4(DX,AX,BX,CX,14,0x432aff97,10); + ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); + ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); + ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); + ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); + ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); + ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); + ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); + ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); + ROUND4(CX,DX,AX,BX,13,0xa3014314,15); + ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); + ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); + ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); + ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); + ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); + + ADDL R12, AX + ADDL R13, BX + ADDL R14, CX + ADDL R15, DX + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + MOVL AX, (0*4)(BP) + MOVL BX, (1*4)(BP) + MOVL CX, (2*4)(BP) + MOVL DX, (3*4)(BP) + RET diff --git a/src/pkg/crypto/md5/md5block_decl.go b/src/pkg/crypto/md5/md5block_decl.go new file mode 100644 index 000000000..14190c6ff --- /dev/null +++ b/src/pkg/crypto/md5/md5block_decl.go @@ -0,0 +1,9 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 386 + +package md5 + +func block(dig *digest, p []byte) diff --git a/src/pkg/crypto/rand/util.go b/src/pkg/crypto/rand/util.go index 50e5b162b..0cd5e0e02 100644 --- a/src/pkg/crypto/rand/util.go +++ b/src/pkg/crypto/rand/util.go @@ -98,12 +98,13 @@ func Prime(rand io.Reader, bits int) (p *big.Int, err error) { return } } - - return } -// Int returns a uniform random value in [0, max). +// Int returns a uniform random value in [0, max). It panics if max <= 0. func Int(rand io.Reader, max *big.Int) (n *big.Int, err error) { + if max.Sign() <= 0 { + panic("crypto/rand: argument to Int is <= 0") + } k := (max.BitLen() + 7) / 8 // b is the number of bits in the most significant byte of max. @@ -130,6 +131,4 @@ func Int(rand io.Reader, max *big.Int) (n *big.Int, err error) { return } } - - return } diff --git a/src/pkg/crypto/rc4/rc4.go b/src/pkg/crypto/rc4/rc4.go index e0c33fa4b..3d717c63b 100644 --- a/src/pkg/crypto/rc4/rc4.go +++ b/src/pkg/crypto/rc4/rc4.go @@ -13,7 +13,7 @@ import "strconv" // A Cipher is an instance of RC4 using a particular key. type Cipher struct { - s [256]byte + s [256]uint32 i, j uint8 } @@ -32,11 +32,11 @@ func NewCipher(key []byte) (*Cipher, error) { } var c Cipher for i := 0; i < 256; i++ { - c.s[i] = uint8(i) + c.s[i] = uint32(i) } var j uint8 = 0 for i := 0; i < 256; i++ { - j += c.s[i] + key[i%k] + j += uint8(c.s[i]) + key[i%k] c.s[i], c.s[j] = c.s[j], c.s[i] } return &c, nil diff --git a/src/pkg/crypto/rc4/rc4_386.s b/src/pkg/crypto/rc4/rc4_386.s new file mode 100644 index 000000000..c80ef2a3a --- /dev/null +++ b/src/pkg/crypto/rc4/rc4_386.s @@ -0,0 +1,51 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +TEXT ·xorKeyStream(SB),7,$0 + MOVL dst+0(FP), DI + MOVL src+4(FP), SI + MOVL state+12(FP), BP + + MOVL i+16(FP), AX + MOVBLZX (AX), AX + MOVL j+20(FP), BX + MOVBLZX (BX), BX + CMPL n+8(FP), $0 + JEQ done + +loop: + // i += 1 + INCB AX + + // j += c.s[i] + MOVBLZX (BP)(AX*4), DX + ADDB DX, BX + MOVBLZX BX, BX + + // c.s[i], c.s[j] = c.s[j], c.s[i] + MOVBLZX (BP)(BX*4), CX + MOVB CX, (BP)(AX*4) + MOVB DX, (BP)(BX*4) + + // *dst = *src ^ c.s[c.s[i]+c.s[j]] + ADDB DX, CX + MOVBLZX CX, CX + MOVB (BP)(CX*4), CX + XORB (SI), CX + MOVBLZX CX, CX + MOVB CX, (DI) + + INCL SI + INCL DI + DECL n+8(FP) + JNE loop + +done: + MOVL i+16(FP), CX + MOVB AX, (CX) + MOVL j+20(FP), CX + MOVB BX, (CX) + + RET diff --git a/src/pkg/crypto/rc4/rc4_amd64.s b/src/pkg/crypto/rc4/rc4_amd64.s index ffe9ada85..353fe3720 100644 --- a/src/pkg/crypto/rc4/rc4_amd64.s +++ b/src/pkg/crypto/rc4/rc4_amd64.s @@ -1,53 +1,177 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Original source: +// http://www.zorinaq.com/papers/rc4-amd64.html +// http://www.zorinaq.com/papers/rc4-amd64.tar.bz2 + +// Local modifications: +// +// Transliterated from GNU to 6a assembly syntax by the Go authors. +// The comments and spacing are from the original. +// +// The new EXTEND macros avoid a bad stall on some systems after 8-bit math. +// +// The original code accumulated 64 bits of key stream in an integer +// register and then XOR'ed the key stream into the data 8 bytes at a time. +// Modified to accumulate 128 bits of key stream into an XMM register +// and then XOR the key stream into the data 16 bytes at a time. +// Approximately doubles throughput. + +// NOTE: Changing EXTEND to a no-op makes the code run 1.2x faster on Core i5 +// but makes the code run 2.0x slower on Xeon. +#define EXTEND(r) MOVBLZX r, r + +/* +** RC4 implementation optimized for AMD64. +** +** Author: Marc Bevand <bevand_m (at) epita.fr> +** Licence: I hereby disclaim the copyright on this code and place it +** in the public domain. +** +** The code has been designed to be easily integrated into openssl: +** the exported RC4() function can replace the actual implementations +** openssl already contains. Please note that when linking with openssl, +** it requires that sizeof(RC4_INT) == 8. So openssl must be compiled +** with -DRC4_INT='unsigned long'. +** +** The throughput achieved by this code is about 320 MBytes/sec, on +** a 1.8 GHz AMD Opteron (rev C0) processor. +*/ -// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) TEXT ·xorKeyStream(SB),7,$0 - MOVQ dst+0(FP), DI - MOVQ src+8(FP), SI - MOVQ n+16(FP), CX - MOVQ state+24(FP), R8 + MOVQ n+16(FP), BX // rbx = ARG(len) + MOVQ src+8(FP), SI // in = ARG(in) + MOVQ dst+0(FP), DI // out = ARG(out) + MOVQ state+24(FP), BP // d = ARG(data) + MOVQ i+32(FP), AX + MOVBQZX 0(AX), CX // x = *xp + MOVQ j+40(FP), AX + MOVBQZX 0(AX), DX // y = *yp + + LEAQ (SI)(BX*1), R9 // limit = in+len + +l1: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) + + INCB CX + EXTEND(CX) + TESTL $15, CX + JZ wordloop + + MOVBLZX (BP)(CX*4), AX + + ADDB AX, DX // y += tx + EXTEND(DX) + MOVBLZX (BP)(DX*4), BX // ty = d[y] + MOVB BX, (BP)(CX*4) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + MOVB AX, (BP)(DX*4) // d[y] = tx + MOVBLZX (BP)(BX*4), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l1 + +wordloop: + SUBQ $16, R9 + CMPQ SI, R9 + JGT end + +start: + ADDQ $16, SI // increment in + ADDQ $16, DI // increment out + + // Each KEYROUND generates one byte of key and + // inserts it into an XMM register at the given 16-bit index. + // The key state array is uint32 words only using the bottom + // byte of each word, so the 16-bit OR only copies 8 useful bits. + // We accumulate alternating bytes into X0 and X1, and then at + // the end we OR X1<<8 into X0 to produce the actual key. + // + // At the beginning of the loop, CX%16 == 0, so the 16 loads + // at state[CX], state[CX+1], ..., state[CX+15] can precompute + // (state+CX) as R12 and then become R12[0], R12[1], ... R12[15], + // without fear of the byte computation CX+15 wrapping around. + // + // The first round needs R12[0], the second needs R12[1], and so on. + // We can avoid memory stalls by starting the load for round n+1 + // before the end of round n, using the LOAD macro. + LEAQ (BP)(CX*4), R12 - MOVQ xPtr+32(FP), AX - MOVBQZX (AX), AX - MOVQ yPtr+40(FP), BX - MOVBQZX (BX), BX +#define KEYROUND(xmm, load, off, r1, r2, index) \ + MOVBLZX (BP)(DX*4), R8; \ + MOVB r1, (BP)(DX*4); \ + load((off+1), r2); \ + MOVB R8, (off*4)(R12); \ + ADDB r1, R8; \ + EXTEND(R8); \ + PINSRW $index, (BP)(R8*4), xmm -loop: - CMPQ CX, $0 - JE done +#define LOAD(off, reg) \ + MOVBLZX (off*4)(R12), reg; \ + ADDB reg, DX; \ + EXTEND(DX) - // c.i += 1 - INCB AX +#define SKIP(off, reg) - // c.j += c.s[c.i] - MOVB (R8)(AX*1), R9 - ADDB R9, BX + LOAD(0, AX) + KEYROUND(X0, LOAD, 0, AX, BX, 0) + KEYROUND(X1, LOAD, 1, BX, AX, 0) + KEYROUND(X0, LOAD, 2, AX, BX, 1) + KEYROUND(X1, LOAD, 3, BX, AX, 1) + KEYROUND(X0, LOAD, 4, AX, BX, 2) + KEYROUND(X1, LOAD, 5, BX, AX, 2) + KEYROUND(X0, LOAD, 6, AX, BX, 3) + KEYROUND(X1, LOAD, 7, BX, AX, 3) + KEYROUND(X0, LOAD, 8, AX, BX, 4) + KEYROUND(X1, LOAD, 9, BX, AX, 4) + KEYROUND(X0, LOAD, 10, AX, BX, 5) + KEYROUND(X1, LOAD, 11, BX, AX, 5) + KEYROUND(X0, LOAD, 12, AX, BX, 6) + KEYROUND(X1, LOAD, 13, BX, AX, 6) + KEYROUND(X0, LOAD, 14, AX, BX, 7) + KEYROUND(X1, SKIP, 15, BX, AX, 7) + + ADDB $16, CX - MOVBQZX (R8)(BX*1), R10 + PSLLQ $8, X1 + PXOR X1, X0 + MOVOU -16(SI), X2 + PXOR X0, X2 + MOVOU X2, -16(DI) - MOVB R10, (R8)(AX*1) - MOVB R9, (R8)(BX*1) + CMPQ SI, R9 // cmp in with in+len-16 + JLE start // jump if (in <= in+len-16) - // R11 = c.s[c.i]+c.s[c.j] - MOVQ R10, R11 - ADDB R9, R11 +end: + DECB CX + ADDQ $16, R9 // tmp = in+len - MOVB (R8)(R11*1), R11 - MOVB (SI), R12 - XORB R11, R12 - MOVB R12, (DI) + // handle the last bytes, one by one +l2: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) - INCQ SI - INCQ DI - DECQ CX + INCB CX + EXTEND(CX) + MOVBLZX (BP)(CX*4), AX - JMP loop -done: - MOVQ xPtr+32(FP), R8 - MOVB AX, (R8) - MOVQ yPtr+40(FP), R8 - MOVB BX, (R8) + ADDB AX, DX // y += tx + EXTEND(DX) + MOVBLZX (BP)(DX*4), BX // ty = d[y] + MOVB BX, (BP)(CX*4) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + MOVB AX, (BP)(DX*4) // d[y] = tx + MOVBLZX (BP)(BX*4), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l2 +finished: + MOVQ j+40(FP), BX + MOVB DX, 0(BX) + MOVQ i+32(FP), AX + MOVB CX, 0(AX) RET diff --git a/src/pkg/crypto/rc4/rc4_arm.s b/src/pkg/crypto/rc4/rc4_arm.s index 51a332f62..307cb7148 100644 --- a/src/pkg/crypto/rc4/rc4_arm.s +++ b/src/pkg/crypto/rc4/rc4_arm.s @@ -31,19 +31,19 @@ loop: // i += 1; j += state[i] ADD $1, R(i) AND $0xff, R(i) - MOVBU R(i)<<0(R(state)), R(t) + MOVBU R(i)<<2(R(state)), R(t) ADD R(t), R(j) AND $0xff, R(j) // swap state[i] <-> state[j] - MOVBU R(j)<<0(R(state)), R(t2) - MOVB R(t2), R(i)<<0(R(state)) - MOVB R(t), R(j)<<0(R(state)) + MOVBU R(j)<<2(R(state)), R(t2) + MOVB R(t2), R(i)<<2(R(state)) + MOVB R(t), R(j)<<2(R(state)) // dst[k] = src[k] ^ state[state[i] + state[j]] ADD R(t2), R(t) AND $0xff, R(t) - MOVBU R(t)<<0(R(state)), R(t) + MOVBU R(t)<<2(R(state)), R(t) MOVBU R(k)<<0(R(src)), R(t2) EOR R(t), R(t2) MOVB R(t2), R(k)<<0(R(dst)) diff --git a/src/pkg/crypto/rc4/rc4_asm.go b/src/pkg/crypto/rc4/rc4_asm.go index 0b66e4a9e..c582a4488 100644 --- a/src/pkg/crypto/rc4/rc4_asm.go +++ b/src/pkg/crypto/rc4/rc4_asm.go @@ -2,11 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm +// +build amd64 arm 386 package rc4 -func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +func xorKeyStream(dst, src *byte, n int, state *[256]uint32, i, j *uint8) // XORKeyStream sets dst to the result of XORing src with the key stream. // Dst and src may be the same slice but otherwise should not overlap. diff --git a/src/pkg/crypto/rc4/rc4_ref.go b/src/pkg/crypto/rc4/rc4_ref.go index 1018548c2..44d380436 100644 --- a/src/pkg/crypto/rc4/rc4_ref.go +++ b/src/pkg/crypto/rc4/rc4_ref.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!arm +// +build !amd64,!arm,!386 package rc4 diff --git a/src/pkg/crypto/rc4/rc4_test.go b/src/pkg/crypto/rc4/rc4_test.go index 9e12789f7..7b4df6791 100644 --- a/src/pkg/crypto/rc4/rc4_test.go +++ b/src/pkg/crypto/rc4/rc4_test.go @@ -5,6 +5,8 @@ package rc4 import ( + "bytes" + "fmt" "testing" ) @@ -72,25 +74,68 @@ var golden = []rc4Test{ }, } +func testEncrypt(t *testing.T, desc string, c *Cipher, src, expect []byte) { + dst := make([]byte, len(src)) + c.XORKeyStream(dst, src) + for i, v := range dst { + if v != expect[i] { + t.Fatalf("%s: mismatch at byte %d:\nhave %x\nwant %x", desc, i, dst, expect) + } + } +} + func TestGolden(t *testing.T) { - for i := 0; i < len(golden); i++ { - g := golden[i] - c, err := NewCipher(g.key) - if err != nil { - t.Errorf("Failed to create cipher at golden index %d", i) - return + for gi, g := range golden { + data := make([]byte, len(g.keystream)) + for i := range data { + data[i] = byte(i) } - keystream := make([]byte, len(g.keystream)) - c.XORKeyStream(keystream, keystream) - for j, v := range keystream { - if g.keystream[j] != v { - t.Errorf("Failed at golden index %d:\n%x\nvs\n%x", i, keystream, g.keystream) - break + + expect := make([]byte, len(g.keystream)) + for i := range expect { + expect[i] = byte(i) ^ g.keystream[i] + } + + for size := 1; size <= len(g.keystream); size++ { + c, err := NewCipher(g.key) + if err != nil { + t.Fatalf("#%d: NewCipher: %v", gi, err) + } + + off := 0 + for off < len(g.keystream) { + n := len(g.keystream) - off + if n > size { + n = size + } + desc := fmt.Sprintf("#%d@[%d:%d]", gi, off, off+n) + testEncrypt(t, desc, c, data[off:off+n], expect[off:off+n]) + off += n } } } } +func TestBlock(t *testing.T) { + c1a, _ := NewCipher(golden[0].key) + c1b, _ := NewCipher(golden[1].key) + data1 := make([]byte, 1<<20) + for i := range data1 { + c1a.XORKeyStream(data1[i:i+1], data1[i:i+1]) + c1b.XORKeyStream(data1[i:i+1], data1[i:i+1]) + } + + c2a, _ := NewCipher(golden[0].key) + c2b, _ := NewCipher(golden[1].key) + data2 := make([]byte, 1<<20) + c2a.XORKeyStream(data2, data2) + c2b.XORKeyStream(data2, data2) + + if !bytes.Equal(data1, data2) { + t.Fatalf("bad block") + } +} + func benchmark(b *testing.B, size int64) { buf := make([]byte, size) c, err := NewCipher(golden[0].key) diff --git a/src/pkg/crypto/rsa/rsa.go b/src/pkg/crypto/rsa/rsa.go index 35a5f7c3c..f56fb37ee 100644 --- a/src/pkg/crypto/rsa/rsa.go +++ b/src/pkg/crypto/rsa/rsa.go @@ -203,7 +203,9 @@ NextSetOfPrimes: g.GCD(priv.D, y, e, totient) if g.Cmp(bigOne) == 0 { - priv.D.Add(priv.D, totient) + if priv.D.Sign() < 0 { + priv.D.Add(priv.D, totient) + } priv.Primes = primes priv.N = n diff --git a/src/pkg/crypto/rsa/rsa_test.go b/src/pkg/crypto/rsa/rsa_test.go index f08cfe73c..ffd96e62f 100644 --- a/src/pkg/crypto/rsa/rsa_test.go +++ b/src/pkg/crypto/rsa/rsa_test.go @@ -93,6 +93,9 @@ func testKeyBasics(t *testing.T, priv *PrivateKey) { if err := priv.Validate(); err != nil { t.Errorf("Validate() failed: %s", err) } + if priv.D.Cmp(priv.N) > 0 { + t.Errorf("private exponent too large") + } pub := &priv.PublicKey m := big.NewInt(42) diff --git a/src/pkg/crypto/sha1/sha1block.go b/src/pkg/crypto/sha1/sha1block.go index 1c9507c68..92224fc0e 100644 --- a/src/pkg/crypto/sha1/sha1block.go +++ b/src/pkg/crypto/sha1/sha1block.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !amd64,!386 + // SHA1 block step. // In its own file so that a faster assembly or C version // can be substituted easily. diff --git a/src/pkg/crypto/sha1/sha1block_386.s b/src/pkg/crypto/sha1/sha1block_386.s new file mode 100644 index 000000000..e60a7b9b0 --- /dev/null +++ b/src/pkg/crypto/sha1/sha1block_386.s @@ -0,0 +1,233 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +// Like sha1block_amd64.s, but we keep the data and limit pointers on the stack. +// To free up the word pointer (R10 on amd64, DI here), we add it to e during +// LOAD/SHUFFLE instead of during MIX. +// +// The stack holds the intermediate word array - 16 uint32s - at 0(SP) up to 64(SP). +// The saved a, b, c, d, e (R11 through R15 on amd64) are at 64(SP) up to 84(SP). +// The saved limit pointer (DI on amd64) is at 84(SP). +// The saved data pointer (SI on amd64) is at 88(SP). + +#define LOAD(index, e) \ + MOVL 88(SP), SI; \ + MOVL (index*4)(SI), DI; \ + BSWAPL DI; \ + MOVL DI, (index*4)(SP); \ + ADDL DI, e + +#define SHUFFLE(index, e) \ + MOVL (((index)&0xf)*4)(SP), DI; \ + XORL (((index-3)&0xf)*4)(SP), DI; \ + XORL (((index-8)&0xf)*4)(SP), DI; \ + XORL (((index-14)&0xf)*4)(SP), DI; \ + ROLL $1, DI; \ + MOVL DI, (((index)&0xf)*4)(SP); \ + ADDL DI, e + +#define FUNC1(a, b, c, d, e) \ + MOVL b, SI; \ + ANDL c, SI; \ + MOVL b, DI; \ + NOTL DI; \ + ANDL d, DI; \ + ORL SI, DI + +#define FUNC2(a, b, c, d, e) \ + MOVL b, DI; \ + XORL c, DI; \ + XORL d, DI + +#define FUNC3(a, b, c, d, e) \ + MOVL b, SI; \ + ORL c, SI; \ + ANDL d, SI; \ + MOVL b, DI; \ + ANDL c, DI; \ + ORL SI, DI + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL DI, e; \ + MOVL a, SI; \ + ROLL $5, SI; \ + LEAL const(e)(SI*1), e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index, e); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index, e); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +// func block(dig *digest, p []byte) +TEXT ·block(SB),7,$92-16 + MOVL dig+0(FP), BP + MOVL p+4(FP), SI + MOVL p_len+8(FP), DX + SHRL $6, DX + SHLL $6, DX + + LEAL (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + MOVL (4*4)(BP), BP + + CMPL SI, DI + JEQ end + + MOVL DI, 84(SP) + +loop: + MOVL SI, 88(SP) + + MOVL AX, 64(SP) + MOVL BX, 68(SP) + MOVL CX, 72(SP) + MOVL DX, 76(SP) + MOVL BP, 80(SP) + + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) + + ADDL 64(SP), AX + ADDL 68(SP), BX + ADDL 72(SP), CX + ADDL 76(SP), DX + ADDL 80(SP), BP + + MOVL 88(SP), SI + ADDL $64, SI + CMPL SI, 84(SP) + JB loop + +end: + MOVL dig+0(FP), DI + MOVL AX, (0*4)(DI) + MOVL BX, (1*4)(DI) + MOVL CX, (2*4)(DI) + MOVL DX, (3*4)(DI) + MOVL BP, (4*4)(DI) + RET diff --git a/src/pkg/crypto/sha1/sha1block_amd64.s b/src/pkg/crypto/sha1/sha1block_amd64.s new file mode 100644 index 000000000..452578aa4 --- /dev/null +++ b/src/pkg/crypto/sha1/sha1block_amd64.s @@ -0,0 +1,216 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// SHA1 block routine. See sha1block.go for Go equivalent. +// +// There are 80 rounds of 4 types: +// - rounds 0-15 are type 1 and load data (ROUND1 macro). +// - rounds 16-19 are type 1 and do not load data (ROUND1x macro). +// - rounds 20-39 are type 2 and do not load data (ROUND2 macro). +// - rounds 40-59 are type 3 and do not load data (ROUND3 macro). +// - rounds 60-79 are type 4 and do not load data (ROUND4 macro). +// +// Each round loads or shuffles the data, then computes a per-round +// function of b, c, d, and then mixes the result into and rotates the +// five registers a, b, c, d, e holding the intermediate results. +// +// The register rotation is implemented by rotating the arguments to +// the round macros instead of by explicit move instructions. + +#define LOAD(index) \ + MOVL (index*4)(SI), R10; \ + BSWAPL R10; \ + MOVL R10, (index*4)(SP) + +#define SHUFFLE(index) \ + MOVL (((index)&0xf)*4)(SP), R10; \ + XORL (((index-3)&0xf)*4)(SP), R10; \ + XORL (((index-8)&0xf)*4)(SP), R10; \ + XORL (((index-14)&0xf)*4)(SP), R10; \ + ROLL $1, R10; \ + MOVL R10, (((index)&0xf)*4)(SP) + +#define FUNC1(a, b, c, d, e) \ + MOVL b, R8; \ + ANDL c, R8; \ + MOVL b, R9; \ + NOTL R9; \ + ANDL d, R9; \ + ORL R8, R9 + +#define FUNC2(a, b, c, d, e) \ + MOVL b, R9; \ + XORL c, R9; \ + XORL d, R9 + +#define FUNC3(a, b, c, d, e) \ + MOVL b, R8; \ + ORL c, R8; \ + ANDL d, R8; \ + MOVL b, R9; \ + ANDL c, R9; \ + ORL R8, R9 + +#define FUNC4 FUNC2 + +#define MIX(a, b, c, d, e, const) \ + ROLL $30, b; \ + ADDL R9, e; \ + MOVL a, R8; \ + ROLL $5, R8; \ + LEAL const(e)(R10*1), e; \ + ADDL R8, e + +#define ROUND1(a, b, c, d, e, index) \ + LOAD(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND1x(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC1(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x5A827999) + +#define ROUND2(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC2(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x6ED9EBA1) + +#define ROUND3(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC3(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0x8F1BBCDC) + +#define ROUND4(a, b, c, d, e, index) \ + SHUFFLE(index); \ + FUNC4(a, b, c, d, e); \ + MIX(a, b, c, d, e, 0xCA62C1D6) + +TEXT ·block(SB),7,$64-32 + MOVQ dig+0(FP), BP + MOVQ p_base+8(FP), SI + MOVQ p_len+16(FP), DX + SHRQ $6, DX + SHLQ $6, DX + + LEAQ (SI)(DX*1), DI + MOVL (0*4)(BP), AX + MOVL (1*4)(BP), BX + MOVL (2*4)(BP), CX + MOVL (3*4)(BP), DX + MOVL (4*4)(BP), BP + + CMPQ SI, DI + JEQ end + +loop: + MOVL AX, R11 + MOVL BX, R12 + MOVL CX, R13 + MOVL DX, R14 + MOVL BP, R15 + + ROUND1(AX, BX, CX, DX, BP, 0) + ROUND1(BP, AX, BX, CX, DX, 1) + ROUND1(DX, BP, AX, BX, CX, 2) + ROUND1(CX, DX, BP, AX, BX, 3) + ROUND1(BX, CX, DX, BP, AX, 4) + ROUND1(AX, BX, CX, DX, BP, 5) + ROUND1(BP, AX, BX, CX, DX, 6) + ROUND1(DX, BP, AX, BX, CX, 7) + ROUND1(CX, DX, BP, AX, BX, 8) + ROUND1(BX, CX, DX, BP, AX, 9) + ROUND1(AX, BX, CX, DX, BP, 10) + ROUND1(BP, AX, BX, CX, DX, 11) + ROUND1(DX, BP, AX, BX, CX, 12) + ROUND1(CX, DX, BP, AX, BX, 13) + ROUND1(BX, CX, DX, BP, AX, 14) + ROUND1(AX, BX, CX, DX, BP, 15) + + ROUND1x(BP, AX, BX, CX, DX, 16) + ROUND1x(DX, BP, AX, BX, CX, 17) + ROUND1x(CX, DX, BP, AX, BX, 18) + ROUND1x(BX, CX, DX, BP, AX, 19) + + ROUND2(AX, BX, CX, DX, BP, 20) + ROUND2(BP, AX, BX, CX, DX, 21) + ROUND2(DX, BP, AX, BX, CX, 22) + ROUND2(CX, DX, BP, AX, BX, 23) + ROUND2(BX, CX, DX, BP, AX, 24) + ROUND2(AX, BX, CX, DX, BP, 25) + ROUND2(BP, AX, BX, CX, DX, 26) + ROUND2(DX, BP, AX, BX, CX, 27) + ROUND2(CX, DX, BP, AX, BX, 28) + ROUND2(BX, CX, DX, BP, AX, 29) + ROUND2(AX, BX, CX, DX, BP, 30) + ROUND2(BP, AX, BX, CX, DX, 31) + ROUND2(DX, BP, AX, BX, CX, 32) + ROUND2(CX, DX, BP, AX, BX, 33) + ROUND2(BX, CX, DX, BP, AX, 34) + ROUND2(AX, BX, CX, DX, BP, 35) + ROUND2(BP, AX, BX, CX, DX, 36) + ROUND2(DX, BP, AX, BX, CX, 37) + ROUND2(CX, DX, BP, AX, BX, 38) + ROUND2(BX, CX, DX, BP, AX, 39) + + ROUND3(AX, BX, CX, DX, BP, 40) + ROUND3(BP, AX, BX, CX, DX, 41) + ROUND3(DX, BP, AX, BX, CX, 42) + ROUND3(CX, DX, BP, AX, BX, 43) + ROUND3(BX, CX, DX, BP, AX, 44) + ROUND3(AX, BX, CX, DX, BP, 45) + ROUND3(BP, AX, BX, CX, DX, 46) + ROUND3(DX, BP, AX, BX, CX, 47) + ROUND3(CX, DX, BP, AX, BX, 48) + ROUND3(BX, CX, DX, BP, AX, 49) + ROUND3(AX, BX, CX, DX, BP, 50) + ROUND3(BP, AX, BX, CX, DX, 51) + ROUND3(DX, BP, AX, BX, CX, 52) + ROUND3(CX, DX, BP, AX, BX, 53) + ROUND3(BX, CX, DX, BP, AX, 54) + ROUND3(AX, BX, CX, DX, BP, 55) + ROUND3(BP, AX, BX, CX, DX, 56) + ROUND3(DX, BP, AX, BX, CX, 57) + ROUND3(CX, DX, BP, AX, BX, 58) + ROUND3(BX, CX, DX, BP, AX, 59) + + ROUND4(AX, BX, CX, DX, BP, 60) + ROUND4(BP, AX, BX, CX, DX, 61) + ROUND4(DX, BP, AX, BX, CX, 62) + ROUND4(CX, DX, BP, AX, BX, 63) + ROUND4(BX, CX, DX, BP, AX, 64) + ROUND4(AX, BX, CX, DX, BP, 65) + ROUND4(BP, AX, BX, CX, DX, 66) + ROUND4(DX, BP, AX, BX, CX, 67) + ROUND4(CX, DX, BP, AX, BX, 68) + ROUND4(BX, CX, DX, BP, AX, 69) + ROUND4(AX, BX, CX, DX, BP, 70) + ROUND4(BP, AX, BX, CX, DX, 71) + ROUND4(DX, BP, AX, BX, CX, 72) + ROUND4(CX, DX, BP, AX, BX, 73) + ROUND4(BX, CX, DX, BP, AX, 74) + ROUND4(AX, BX, CX, DX, BP, 75) + ROUND4(BP, AX, BX, CX, DX, 76) + ROUND4(DX, BP, AX, BX, CX, 77) + ROUND4(CX, DX, BP, AX, BX, 78) + ROUND4(BX, CX, DX, BP, AX, 79) + + ADDL R11, AX + ADDL R12, BX + ADDL R13, CX + ADDL R14, DX + ADDL R15, BP + + ADDQ $64, SI + CMPQ SI, DI + JB loop + +end: + MOVQ dig+0(FP), DI + MOVL AX, (0*4)(DI) + MOVL BX, (1*4)(DI) + MOVL CX, (2*4)(DI) + MOVL DX, (3*4)(DI) + MOVL BP, (4*4)(DI) + RET diff --git a/src/pkg/crypto/sha1/sha1block_decl.go b/src/pkg/crypto/sha1/sha1block_decl.go new file mode 100644 index 000000000..3512a5829 --- /dev/null +++ b/src/pkg/crypto/sha1/sha1block_decl.go @@ -0,0 +1,9 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 386 + +package sha1 + +func block(dig *digest, p []byte) diff --git a/src/pkg/crypto/tls/common.go b/src/pkg/crypto/tls/common.go index a888df762..f86c90de7 100644 --- a/src/pkg/crypto/tls/common.go +++ b/src/pkg/crypto/tls/common.go @@ -204,7 +204,24 @@ type Config struct { // connections using that key are compromised. SessionTicketKey [32]byte - serverInitOnce sync.Once + serverInitOnce sync.Once // guards calling (*Config).serverInit +} + +func (c *Config) serverInit() { + if c.SessionTicketsDisabled { + return + } + + // If the key has already been set then we have nothing to do. + for _, b := range c.SessionTicketKey { + if b != 0 { + return + } + } + + if _, err := io.ReadFull(c.rand(), c.SessionTicketKey[:]); err != nil { + c.SessionTicketsDisabled = true + } } func (c *Config) rand() io.Reader { diff --git a/src/pkg/crypto/tls/handshake_server.go b/src/pkg/crypto/tls/handshake_server.go index 730991016..823730c60 100644 --- a/src/pkg/crypto/tls/handshake_server.go +++ b/src/pkg/crypto/tls/handshake_server.go @@ -33,22 +33,7 @@ func (c *Conn) serverHandshake() error { // If this is the first server handshake, we generate a random key to // encrypt the tickets with. - config.serverInitOnce.Do(func() { - if config.SessionTicketsDisabled { - return - } - - // If the key has already been set then we have nothing to do. - for _, b := range config.SessionTicketKey { - if b != 0 { - return - } - } - - if _, err := io.ReadFull(config.rand(), config.SessionTicketKey[:]); err != nil { - config.SessionTicketsDisabled = true - } - }) + config.serverInitOnce.Do(config.serverInit) hs := serverHandshakeState{ c: c, diff --git a/src/pkg/crypto/x509/pkcs8.go b/src/pkg/crypto/x509/pkcs8.go index 30caacb3c..8e1585e15 100644 --- a/src/pkg/crypto/x509/pkcs8.go +++ b/src/pkg/crypto/x509/pkcs8.go @@ -51,6 +51,4 @@ func ParsePKCS8PrivateKey(der []byte) (key interface{}, err error) { default: return nil, fmt.Errorf("crypto/x509: PKCS#8 wrapping contained private key with unknown algorithm: %v", privKey.Algo.Algorithm) } - - panic("unreachable") } diff --git a/src/pkg/crypto/x509/x509.go b/src/pkg/crypto/x509/x509.go index b802bf4eb..4dfea2c94 100644 --- a/src/pkg/crypto/x509/x509.go +++ b/src/pkg/crypto/x509/x509.go @@ -729,7 +729,6 @@ func parsePublicKey(algo PublicKeyAlgorithm, keyData *publicKeyInfo) (interface{ default: return nil, nil } - panic("unreachable") } func parseCertificate(in *certificate) (*Certificate, error) { |