diff options
Diffstat (limited to 'src/crypto/rc4')
-rw-r--r-- | src/crypto/rc4/rc4.go | 69 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_386.s | 53 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_amd64.s | 179 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_amd64p32.s | 192 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_arm.s | 62 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_asm.go | 18 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_ref.go | 13 | ||||
-rw-r--r-- | src/crypto/rc4/rc4_test.go | 173 |
8 files changed, 759 insertions, 0 deletions
diff --git a/src/crypto/rc4/rc4.go b/src/crypto/rc4/rc4.go new file mode 100644 index 000000000..9acb681bf --- /dev/null +++ b/src/crypto/rc4/rc4.go @@ -0,0 +1,69 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rc4 implements RC4 encryption, as defined in Bruce Schneier's +// Applied Cryptography. +package rc4 + +// BUG(agl): RC4 is in common use but has design weaknesses that make +// it a poor choice for new protocols. + +import "strconv" + +// A Cipher is an instance of RC4 using a particular key. +type Cipher struct { + s [256]uint32 + i, j uint8 +} + +type KeySizeError int + +func (k KeySizeError) Error() string { + return "crypto/rc4: invalid key size " + strconv.Itoa(int(k)) +} + +// NewCipher creates and returns a new Cipher. The key argument should be the +// RC4 key, at least 1 byte and at most 256 bytes. +func NewCipher(key []byte) (*Cipher, error) { + k := len(key) + if k < 1 || k > 256 { + return nil, KeySizeError(k) + } + var c Cipher + for i := 0; i < 256; i++ { + c.s[i] = uint32(i) + } + var j uint8 = 0 + for i := 0; i < 256; i++ { + j += uint8(c.s[i]) + key[i%k] + c.s[i], c.s[j] = c.s[j], c.s[i] + } + return &c, nil +} + +// Reset zeros the key data so that it will no longer appear in the +// process's memory. +func (c *Cipher) Reset() { + for i := range c.s { + c.s[i] = 0 + } + c.i, c.j = 0, 0 +} + +// xorKeyStreamGeneric sets dst to the result of XORing src with the +// key stream. Dst and src may be the same slice but otherwise should +// not overlap. +// +// This is the pure Go version. rc4_{amd64,386,arm}* contain assembly +// implementations. This is here for tests and to prevent bitrot. +func (c *Cipher) xorKeyStreamGeneric(dst, src []byte) { + i, j := c.i, c.j + for k, v := range src { + i += 1 + j += uint8(c.s[i]) + c.s[i], c.s[j] = c.s[j], c.s[i] + dst[k] = v ^ uint8(c.s[uint8(c.s[i]+c.s[j])]) + } + c.i, c.j = i, j +} diff --git a/src/crypto/rc4/rc4_386.s b/src/crypto/rc4/rc4_386.s new file mode 100644 index 000000000..54221036b --- /dev/null +++ b/src/crypto/rc4/rc4_386.s @@ -0,0 +1,53 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +TEXT ·xorKeyStream(SB),NOSPLIT,$0 + MOVL dst+0(FP), DI + MOVL src+4(FP), SI + MOVL state+12(FP), BP + + MOVL i+16(FP), AX + MOVBLZX (AX), AX + MOVL j+20(FP), BX + MOVBLZX (BX), BX + CMPL n+8(FP), $0 + JEQ done + +loop: + // i += 1 + INCB AX + + // j += c.s[i] + MOVBLZX (BP)(AX*4), DX + ADDB DX, BX + MOVBLZX BX, BX + + // c.s[i], c.s[j] = c.s[j], c.s[i] + MOVBLZX (BP)(BX*4), CX + MOVB CX, (BP)(AX*4) + MOVB DX, (BP)(BX*4) + + // *dst = *src ^ c.s[c.s[i]+c.s[j]] + ADDB DX, CX + MOVBLZX CX, CX + MOVB (BP)(CX*4), CX + XORB (SI), CX + MOVBLZX CX, CX + MOVB CX, (DI) + + INCL SI + INCL DI + DECL n+8(FP) + JNE loop + +done: + MOVL i+16(FP), CX + MOVB AX, (CX) + MOVL j+20(FP), CX + MOVB BX, (CX) + + RET diff --git a/src/crypto/rc4/rc4_amd64.s b/src/crypto/rc4/rc4_amd64.s new file mode 100644 index 000000000..57d941c8f --- /dev/null +++ b/src/crypto/rc4/rc4_amd64.s @@ -0,0 +1,179 @@ +// Original source: +// http://www.zorinaq.com/papers/rc4-amd64.html +// http://www.zorinaq.com/papers/rc4-amd64.tar.bz2 + +#include "textflag.h" + +// Local modifications: +// +// Transliterated from GNU to 6a assembly syntax by the Go authors. +// The comments and spacing are from the original. +// +// The new EXTEND macros avoid a bad stall on some systems after 8-bit math. +// +// The original code accumulated 64 bits of key stream in an integer +// register and then XOR'ed the key stream into the data 8 bytes at a time. +// Modified to accumulate 128 bits of key stream into an XMM register +// and then XOR the key stream into the data 16 bytes at a time. +// Approximately doubles throughput. + +// NOTE: Changing EXTEND to a no-op makes the code run 1.2x faster on Core i5 +// but makes the code run 2.0x slower on Xeon. +#define EXTEND(r) MOVBLZX r, r + +/* +** RC4 implementation optimized for AMD64. +** +** Author: Marc Bevand <bevand_m (at) epita.fr> +** Licence: I hereby disclaim the copyright on this code and place it +** in the public domain. +** +** The code has been designed to be easily integrated into openssl: +** the exported RC4() function can replace the actual implementations +** openssl already contains. Please note that when linking with openssl, +** it requires that sizeof(RC4_INT) == 8. So openssl must be compiled +** with -DRC4_INT='unsigned long'. +** +** The throughput achieved by this code is about 320 MBytes/sec, on +** a 1.8 GHz AMD Opteron (rev C0) processor. +*/ + +TEXT ·xorKeyStream(SB),NOSPLIT,$0 + MOVQ n+16(FP), BX // rbx = ARG(len) + MOVQ src+8(FP), SI // in = ARG(in) + MOVQ dst+0(FP), DI // out = ARG(out) + MOVQ state+24(FP), BP // d = ARG(data) + MOVQ i+32(FP), AX + MOVBQZX 0(AX), CX // x = *xp + MOVQ j+40(FP), AX + MOVBQZX 0(AX), DX // y = *yp + + LEAQ (SI)(BX*1), R9 // limit = in+len + +l1: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) + + INCB CX + EXTEND(CX) + TESTL $15, CX + JZ wordloop + + MOVBLZX (BP)(CX*4), AX + + ADDB AX, DX // y += tx + EXTEND(DX) + MOVBLZX (BP)(DX*4), BX // ty = d[y] + MOVB BX, (BP)(CX*4) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + MOVB AX, (BP)(DX*4) // d[y] = tx + MOVBLZX (BP)(BX*4), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l1 + +wordloop: + SUBQ $16, R9 + CMPQ SI, R9 + JGT end + +start: + ADDQ $16, SI // increment in + ADDQ $16, DI // increment out + + // Each KEYROUND generates one byte of key and + // inserts it into an XMM register at the given 16-bit index. + // The key state array is uint32 words only using the bottom + // byte of each word, so the 16-bit OR only copies 8 useful bits. + // We accumulate alternating bytes into X0 and X1, and then at + // the end we OR X1<<8 into X0 to produce the actual key. + // + // At the beginning of the loop, CX%16 == 0, so the 16 loads + // at state[CX], state[CX+1], ..., state[CX+15] can precompute + // (state+CX) as R12 and then become R12[0], R12[1], ... R12[15], + // without fear of the byte computation CX+15 wrapping around. + // + // The first round needs R12[0], the second needs R12[1], and so on. + // We can avoid memory stalls by starting the load for round n+1 + // before the end of round n, using the LOAD macro. + LEAQ (BP)(CX*4), R12 + +#define KEYROUND(xmm, load, off, r1, r2, index) \ + MOVBLZX (BP)(DX*4), R8; \ + MOVB r1, (BP)(DX*4); \ + load((off+1), r2); \ + MOVB R8, (off*4)(R12); \ + ADDB r1, R8; \ + EXTEND(R8); \ + PINSRW $index, (BP)(R8*4), xmm + +#define LOAD(off, reg) \ + MOVBLZX (off*4)(R12), reg; \ + ADDB reg, DX; \ + EXTEND(DX) + +#define SKIP(off, reg) + + LOAD(0, AX) + KEYROUND(X0, LOAD, 0, AX, BX, 0) + KEYROUND(X1, LOAD, 1, BX, AX, 0) + KEYROUND(X0, LOAD, 2, AX, BX, 1) + KEYROUND(X1, LOAD, 3, BX, AX, 1) + KEYROUND(X0, LOAD, 4, AX, BX, 2) + KEYROUND(X1, LOAD, 5, BX, AX, 2) + KEYROUND(X0, LOAD, 6, AX, BX, 3) + KEYROUND(X1, LOAD, 7, BX, AX, 3) + KEYROUND(X0, LOAD, 8, AX, BX, 4) + KEYROUND(X1, LOAD, 9, BX, AX, 4) + KEYROUND(X0, LOAD, 10, AX, BX, 5) + KEYROUND(X1, LOAD, 11, BX, AX, 5) + KEYROUND(X0, LOAD, 12, AX, BX, 6) + KEYROUND(X1, LOAD, 13, BX, AX, 6) + KEYROUND(X0, LOAD, 14, AX, BX, 7) + KEYROUND(X1, SKIP, 15, BX, AX, 7) + + ADDB $16, CX + + PSLLQ $8, X1 + PXOR X1, X0 + MOVOU -16(SI), X2 + PXOR X0, X2 + MOVOU X2, -16(DI) + + CMPQ SI, R9 // cmp in with in+len-16 + JLE start // jump if (in <= in+len-16) + +end: + DECB CX + ADDQ $16, R9 // tmp = in+len + + // handle the last bytes, one by one +l2: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) + + INCB CX + EXTEND(CX) + MOVBLZX (BP)(CX*4), AX + + ADDB AX, DX // y += tx + EXTEND(DX) + MOVBLZX (BP)(DX*4), BX // ty = d[y] + MOVB BX, (BP)(CX*4) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + MOVB AX, (BP)(DX*4) // d[y] = tx + MOVBLZX (BP)(BX*4), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l2 + +finished: + MOVQ j+40(FP), BX + MOVB DX, 0(BX) + MOVQ i+32(FP), AX + MOVB CX, 0(AX) + RET diff --git a/src/crypto/rc4/rc4_amd64p32.s b/src/crypto/rc4/rc4_amd64p32.s new file mode 100644 index 000000000..970b34e08 --- /dev/null +++ b/src/crypto/rc4/rc4_amd64p32.s @@ -0,0 +1,192 @@ +// Original source: +// http://www.zorinaq.com/papers/rc4-amd64.html +// http://www.zorinaq.com/papers/rc4-amd64.tar.bz2 + +#include "textflag.h" + +// Local modifications: +// +// Transliterated from GNU to 6a assembly syntax by the Go authors. +// The comments and spacing are from the original. +// +// The new EXTEND macros avoid a bad stall on some systems after 8-bit math. +// +// The original code accumulated 64 bits of key stream in an integer +// register and then XOR'ed the key stream into the data 8 bytes at a time. +// Modified to accumulate 128 bits of key stream into an XMM register +// and then XOR the key stream into the data 16 bytes at a time. +// Approximately doubles throughput. +// +// Converted to amd64p32. +// +// To make safe for Native Client, avoid use of BP, R15, +// and two-register addressing modes. + +// NOTE: Changing EXTEND to a no-op makes the code run 1.2x faster on Core i5 +// but makes the code run 2.0x slower on Xeon. +#define EXTEND(r) MOVBLZX r, r + +/* +** RC4 implementation optimized for AMD64. +** +** Author: Marc Bevand <bevand_m (at) epita.fr> +** Licence: I hereby disclaim the copyright on this code and place it +** in the public domain. +** +** The code has been designed to be easily integrated into openssl: +** the exported RC4() function can replace the actual implementations +** openssl already contains. Please note that when linking with openssl, +** it requires that sizeof(RC4_INT) == 8. So openssl must be compiled +** with -DRC4_INT='unsigned long'. +** +** The throughput achieved by this code is about 320 MBytes/sec, on +** a 1.8 GHz AMD Opteron (rev C0) processor. +*/ + +TEXT ·xorKeyStream(SB),NOSPLIT,$0 + MOVL n+8(FP), BX // rbx = ARG(len) + MOVL src+4(FP), SI // in = ARG(in) + MOVL dst+0(FP), DI // out = ARG(out) + MOVL state+12(FP), R10 // d = ARG(data) + MOVL i+16(FP), AX + MOVBQZX 0(AX), CX // x = *xp + MOVL j+20(FP), AX + MOVBQZX 0(AX), DX // y = *yp + + LEAQ (SI)(BX*1), R9 // limit = in+len + +l1: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) + + INCB CX + EXTEND(CX) + TESTL $15, CX + JZ wordloop + LEAL (R10)(CX*4), R12 + + MOVBLZX (R12), AX + + ADDB AX, DX // y += tx + EXTEND(DX) + LEAL (R10)(DX*4), R11 + MOVBLZX (R11), BX // ty = d[y] + MOVB BX, (R12) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + LEAL (R10)(BX*4), R13 + MOVB AX, (R11) // d[y] = tx + MOVBLZX (R13), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l1 + +wordloop: + SUBQ $16, R9 + CMPQ SI, R9 + JGT end + +start: + ADDQ $16, SI // increment in + ADDQ $16, DI // increment out + + // Each KEYROUND generates one byte of key and + // inserts it into an XMM register at the given 16-bit index. + // The key state array is uint32 words only using the bottom + // byte of each word, so the 16-bit OR only copies 8 useful bits. + // We accumulate alternating bytes into X0 and X1, and then at + // the end we OR X1<<8 into X0 to produce the actual key. + // + // At the beginning of the loop, CX%16 == 0, so the 16 loads + // at state[CX], state[CX+1], ..., state[CX+15] can precompute + // (state+CX) as R12 and then become R12[0], R12[1], ... R12[15], + // without fear of the byte computation CX+15 wrapping around. + // + // The first round needs R12[0], the second needs R12[1], and so on. + // We can avoid memory stalls by starting the load for round n+1 + // before the end of round n, using the LOAD macro. + LEAQ (R10)(CX*4), R12 + +#define KEYROUND(xmm, load, off, r1, r2, index) \ + LEAL (R10)(DX*4), R11; \ + MOVBLZX (R11), R8; \ + MOVB r1, (R11); \ + load((off+1), r2); \ + MOVB R8, (off*4)(R12); \ + ADDB r1, R8; \ + EXTEND(R8); \ + LEAL (R10)(R8*4), R14; \ + PINSRW $index, (R14), xmm + +#define LOAD(off, reg) \ + MOVBLZX (off*4)(R12), reg; \ + ADDB reg, DX; \ + EXTEND(DX) + +#define SKIP(off, reg) + + LOAD(0, AX) + KEYROUND(X0, LOAD, 0, AX, BX, 0) + KEYROUND(X1, LOAD, 1, BX, AX, 0) + KEYROUND(X0, LOAD, 2, AX, BX, 1) + KEYROUND(X1, LOAD, 3, BX, AX, 1) + KEYROUND(X0, LOAD, 4, AX, BX, 2) + KEYROUND(X1, LOAD, 5, BX, AX, 2) + KEYROUND(X0, LOAD, 6, AX, BX, 3) + KEYROUND(X1, LOAD, 7, BX, AX, 3) + KEYROUND(X0, LOAD, 8, AX, BX, 4) + KEYROUND(X1, LOAD, 9, BX, AX, 4) + KEYROUND(X0, LOAD, 10, AX, BX, 5) + KEYROUND(X1, LOAD, 11, BX, AX, 5) + KEYROUND(X0, LOAD, 12, AX, BX, 6) + KEYROUND(X1, LOAD, 13, BX, AX, 6) + KEYROUND(X0, LOAD, 14, AX, BX, 7) + KEYROUND(X1, SKIP, 15, BX, AX, 7) + + ADDB $16, CX + + PSLLQ $8, X1 + PXOR X1, X0 + MOVOU -16(SI), X2 + PXOR X0, X2 + MOVOU X2, -16(DI) + + CMPQ SI, R9 // cmp in with in+len-16 + JLE start // jump if (in <= in+len-16) + +end: + DECB CX + ADDQ $16, R9 // tmp = in+len + + // handle the last bytes, one by one +l2: CMPQ SI, R9 // cmp in with in+len + JGE finished // jump if (in >= in+len) + + INCB CX + EXTEND(CX) + LEAL (R10)(CX*4), R12 + MOVBLZX (R12), AX + + ADDB AX, DX // y += tx + EXTEND(DX) + LEAL (R10)(DX*4), R11 + MOVBLZX (R11), BX // ty = d[y] + MOVB BX, (R12) // d[x] = ty + ADDB AX, BX // val = ty+tx + EXTEND(BX) + LEAL (R10)(BX*4), R13 + MOVB AX, (R11) // d[y] = tx + MOVBLZX (R13), R8 // val = d[val] + XORB (SI), R8 // xor 1 byte + MOVB R8, (DI) + INCQ SI // in++ + INCQ DI // out++ + JMP l2 + +finished: + MOVL j+20(FP), BX + MOVB DX, 0(BX) + MOVL i+16(FP), AX + MOVB CX, 0(AX) + RET diff --git a/src/crypto/rc4/rc4_arm.s b/src/crypto/rc4/rc4_arm.s new file mode 100644 index 000000000..51be3bf95 --- /dev/null +++ b/src/crypto/rc4/rc4_arm.s @@ -0,0 +1,62 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !nacl + +#include "textflag.h" + +// Registers +dst = 0 +src = 1 +n = 2 +state = 3 +pi = 4 +pj = 5 +i = 6 +j = 7 +k = 8 +t = 11 +t2 = 12 + +// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +TEXT ·xorKeyStream(SB),NOSPLIT,$0 + MOVW 0(FP), R(dst) + MOVW 4(FP), R(src) + MOVW 8(FP), R(n) + MOVW 12(FP), R(state) + MOVW 16(FP), R(pi) + MOVW 20(FP), R(pj) + MOVBU (R(pi)), R(i) + MOVBU (R(pj)), R(j) + MOVW $0, R(k) + +loop: + // i += 1; j += state[i] + ADD $1, R(i) + AND $0xff, R(i) + MOVBU R(i)<<2(R(state)), R(t) + ADD R(t), R(j) + AND $0xff, R(j) + + // swap state[i] <-> state[j] + MOVBU R(j)<<2(R(state)), R(t2) + MOVB R(t2), R(i)<<2(R(state)) + MOVB R(t), R(j)<<2(R(state)) + + // dst[k] = src[k] ^ state[state[i] + state[j]] + ADD R(t2), R(t) + AND $0xff, R(t) + MOVBU R(t)<<2(R(state)), R(t) + MOVBU R(k)<<0(R(src)), R(t2) + EOR R(t), R(t2) + MOVB R(t2), R(k)<<0(R(dst)) + + ADD $1, R(k) + CMP R(k), R(n) + BNE loop + +done: + MOVB R(i), (R(pi)) + MOVB R(j), (R(pj)) + RET diff --git a/src/crypto/rc4/rc4_asm.go b/src/crypto/rc4/rc4_asm.go new file mode 100644 index 000000000..02e5b67d5 --- /dev/null +++ b/src/crypto/rc4/rc4_asm.go @@ -0,0 +1,18 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 amd64p32 arm,!nacl 386 + +package rc4 + +func xorKeyStream(dst, src *byte, n int, state *[256]uint32, i, j *uint8) + +// XORKeyStream sets dst to the result of XORing src with the key stream. +// Dst and src may be the same slice but otherwise should not overlap. +func (c *Cipher) XORKeyStream(dst, src []byte) { + if len(src) == 0 { + return + } + xorKeyStream(&dst[0], &src[0], len(src), &c.s, &c.i, &c.j) +} diff --git a/src/crypto/rc4/rc4_ref.go b/src/crypto/rc4/rc4_ref.go new file mode 100644 index 000000000..e34bd34cf --- /dev/null +++ b/src/crypto/rc4/rc4_ref.go @@ -0,0 +1,13 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!amd64p32,!arm,!386 arm,nacl + +package rc4 + +// XORKeyStream sets dst to the result of XORing src with the key stream. +// Dst and src may be the same slice but otherwise should not overlap. +func (c *Cipher) XORKeyStream(dst, src []byte) { + c.xorKeyStreamGeneric(dst, src) +} diff --git a/src/crypto/rc4/rc4_test.go b/src/crypto/rc4/rc4_test.go new file mode 100644 index 000000000..af7988246 --- /dev/null +++ b/src/crypto/rc4/rc4_test.go @@ -0,0 +1,173 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rc4 + +import ( + "bytes" + "fmt" + "testing" +) + +type rc4Test struct { + key, keystream []byte +} + +var golden = []rc4Test{ + // Test vectors from the original cypherpunk posting of ARC4: + // http://groups.google.com/group/sci.crypt/msg/10a300c9d21afca0?pli=1 + { + []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, + []byte{0x74, 0x94, 0xc2, 0xe7, 0x10, 0x4b, 0x08, 0x79}, + }, + { + []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a}, + }, + { + []byte{0xef, 0x01, 0x23, 0x45}, + []byte{0xd6, 0xa1, 0x41, 0xa7, 0xec, 0x3c, 0x38, 0xdf, 0xbd, 0x61}, + }, + + // Test vectors from the Wikipedia page: http://en.wikipedia.org/wiki/RC4 + { + []byte{0x4b, 0x65, 0x79}, + []byte{0xeb, 0x9f, 0x77, 0x81, 0xb7, 0x34, 0xca, 0x72, 0xa7, 0x19}, + }, + { + []byte{0x57, 0x69, 0x6b, 0x69}, + []byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7}, + }, + { + []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{ + 0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a, + 0x8a, 0x06, 0x1e, 0x67, 0x57, 0x6e, 0x92, 0x6d, + 0xc7, 0x1a, 0x7f, 0xa3, 0xf0, 0xcc, 0xeb, 0x97, + 0x45, 0x2b, 0x4d, 0x32, 0x27, 0x96, 0x5f, 0x9e, + 0xa8, 0xcc, 0x75, 0x07, 0x6d, 0x9f, 0xb9, 0xc5, + 0x41, 0x7a, 0xa5, 0xcb, 0x30, 0xfc, 0x22, 0x19, + 0x8b, 0x34, 0x98, 0x2d, 0xbb, 0x62, 0x9e, 0xc0, + 0x4b, 0x4f, 0x8b, 0x05, 0xa0, 0x71, 0x08, 0x50, + 0x92, 0xa0, 0xc3, 0x58, 0x4a, 0x48, 0xe4, 0xa3, + 0x0a, 0x39, 0x7b, 0x8a, 0xcd, 0x1d, 0x00, 0x9e, + 0xc8, 0x7d, 0x68, 0x11, 0xf2, 0x2c, 0xf4, 0x9c, + 0xa3, 0xe5, 0x93, 0x54, 0xb9, 0x45, 0x15, 0x35, + 0xa2, 0x18, 0x7a, 0x86, 0x42, 0x6c, 0xca, 0x7d, + 0x5e, 0x82, 0x3e, 0xba, 0x00, 0x44, 0x12, 0x67, + 0x12, 0x57, 0xb8, 0xd8, 0x60, 0xae, 0x4c, 0xbd, + 0x4c, 0x49, 0x06, 0xbb, 0xc5, 0x35, 0xef, 0xe1, + 0x58, 0x7f, 0x08, 0xdb, 0x33, 0x95, 0x5c, 0xdb, + 0xcb, 0xad, 0x9b, 0x10, 0xf5, 0x3f, 0xc4, 0xe5, + 0x2c, 0x59, 0x15, 0x65, 0x51, 0x84, 0x87, 0xfe, + 0x08, 0x4d, 0x0e, 0x3f, 0x03, 0xde, 0xbc, 0xc9, + 0xda, 0x1c, 0xe9, 0x0d, 0x08, 0x5c, 0x2d, 0x8a, + 0x19, 0xd8, 0x37, 0x30, 0x86, 0x16, 0x36, 0x92, + 0x14, 0x2b, 0xd8, 0xfc, 0x5d, 0x7a, 0x73, 0x49, + 0x6a, 0x8e, 0x59, 0xee, 0x7e, 0xcf, 0x6b, 0x94, + 0x06, 0x63, 0xf4, 0xa6, 0xbe, 0xe6, 0x5b, 0xd2, + 0xc8, 0x5c, 0x46, 0x98, 0x6c, 0x1b, 0xef, 0x34, + 0x90, 0xd3, 0x7b, 0x38, 0xda, 0x85, 0xd3, 0x2e, + 0x97, 0x39, 0xcb, 0x23, 0x4a, 0x2b, 0xe7, 0x40, + }, + }, +} + +func testEncrypt(t *testing.T, desc string, c *Cipher, src, expect []byte) { + dst := make([]byte, len(src)) + c.XORKeyStream(dst, src) + for i, v := range dst { + if v != expect[i] { + t.Fatalf("%s: mismatch at byte %d:\nhave %x\nwant %x", desc, i, dst, expect) + } + } +} + +func TestGolden(t *testing.T) { + for gi, g := range golden { + data := make([]byte, len(g.keystream)) + for i := range data { + data[i] = byte(i) + } + + expect := make([]byte, len(g.keystream)) + for i := range expect { + expect[i] = byte(i) ^ g.keystream[i] + } + + for size := 1; size <= len(g.keystream); size++ { + c, err := NewCipher(g.key) + if err != nil { + t.Fatalf("#%d: NewCipher: %v", gi, err) + } + + off := 0 + for off < len(g.keystream) { + n := len(g.keystream) - off + if n > size { + n = size + } + desc := fmt.Sprintf("#%d@[%d:%d]", gi, off, off+n) + testEncrypt(t, desc, c, data[off:off+n], expect[off:off+n]) + off += n + } + } + } +} + +func TestBlock(t *testing.T) { + testBlock(t, (*Cipher).XORKeyStream) +} + +// Test the pure Go version. +// Because we have assembly for amd64, 386, and arm, this prevents +// bitrot of the reference implementations. +func TestBlockGeneric(t *testing.T) { + testBlock(t, (*Cipher).xorKeyStreamGeneric) +} + +func testBlock(t *testing.T, xor func(c *Cipher, dst, src []byte)) { + c1a, _ := NewCipher(golden[0].key) + c1b, _ := NewCipher(golden[1].key) + data1 := make([]byte, 1<<20) + for i := range data1 { + xor(c1a, data1[i:i+1], data1[i:i+1]) + xor(c1b, data1[i:i+1], data1[i:i+1]) + } + + c2a, _ := NewCipher(golden[0].key) + c2b, _ := NewCipher(golden[1].key) + data2 := make([]byte, 1<<20) + xor(c2a, data2, data2) + xor(c2b, data2, data2) + + if !bytes.Equal(data1, data2) { + t.Fatalf("bad block") + } +} + +func benchmark(b *testing.B, size int64) { + buf := make([]byte, size) + c, err := NewCipher(golden[0].key) + if err != nil { + panic(err) + } + b.SetBytes(size) + + for i := 0; i < b.N; i++ { + c.XORKeyStream(buf, buf) + } +} + +func BenchmarkRC4_128(b *testing.B) { + benchmark(b, 128) +} + +func BenchmarkRC4_1K(b *testing.B) { + benchmark(b, 1024) +} + +func BenchmarkRC4_8K(b *testing.B) { + benchmark(b, 8096) +} |