diff options
Diffstat (limited to 'src/pkg/compress/bzip2')
-rw-r--r-- | src/pkg/compress/bzip2/bit_reader.go | 8 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/bzip2.go | 167 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/bzip2_test.go | 206 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/huffman.go | 9 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/move_to_front.go | 35 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2 | bin | 0 -> 124744 bytes | |||
-rw-r--r-- | src/pkg/compress/bzip2/testdata/e.txt.bz2 | bin | 0 -> 43149 bytes |
7 files changed, 366 insertions, 59 deletions
diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go index ab1d60651..32d1036ae 100644 --- a/src/pkg/compress/bzip2/bit_reader.go +++ b/src/pkg/compress/bzip2/bit_reader.go @@ -77,6 +77,14 @@ func (br *bitReader) ReadBit() bool { return n != 0 } +func (br *bitReader) TryReadBit() (bit byte, ok bool) { + if br.bits > 0 { + br.bits-- + return byte(br.n>>br.bits) & 1, true + } + return 0, false +} + func (br *bitReader) Err() error { return br.err } diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go index 3dc8c6206..82e30c7c9 100644 --- a/src/pkg/compress/bzip2/bzip2.go +++ b/src/pkg/compress/bzip2/bzip2.go @@ -22,14 +22,17 @@ func (s StructuralError) Error() string { // A reader decompresses bzip2 compressed data. type reader struct { - br bitReader - setupDone bool // true if we have parsed the bzip2 header. - blockSize int // blockSize in bytes, i.e. 900 * 1024. - eof bool - buf []byte // stores Burrows-Wheeler transformed data. - c [256]uint // the `C' array for the inverse BWT. - tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits. - tPos uint32 // Index of the next output byte in tt. + br bitReader + fileCRC uint32 + blockCRC uint32 + wantBlockCRC uint32 + setupDone bool // true if we have parsed the bzip2 header. + blockSize int // blockSize in bytes, i.e. 900 * 1024. + eof bool + buf []byte // stores Burrows-Wheeler transformed data. + c [256]uint // the `C' array for the inverse BWT. + tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits. + tPos uint32 // Index of the next output byte in tt. preRLE []uint32 // contains the RLE data still to be processed. preRLEUsed int // number of entries of preRLE used. @@ -50,12 +53,14 @@ const bzip2BlockMagic = 0x314159265359 const bzip2FinalMagic = 0x177245385090 // setup parses the bzip2 header. -func (bz2 *reader) setup() error { +func (bz2 *reader) setup(needMagic bool) error { br := &bz2.br - magic := br.ReadBits(16) - if magic != bzip2FileMagic { - return StructuralError("bad magic value") + if needMagic { + magic := br.ReadBits(16) + if magic != bzip2FileMagic { + return StructuralError("bad magic value") + } } t := br.ReadBits(8) @@ -68,8 +73,11 @@ func (bz2 *reader) setup() error { return StructuralError("invalid compression level") } + bz2.fileCRC = 0 bz2.blockSize = 100 * 1024 * (int(level) - '0') - bz2.tt = make([]uint32, bz2.blockSize) + if bz2.blockSize > len(bz2.tt) { + bz2.tt = make([]uint32, bz2.blockSize) + } return nil } @@ -79,7 +87,7 @@ func (bz2 *reader) Read(buf []byte) (n int, err error) { } if !bz2.setupDone { - err = bz2.setup() + err = bz2.setup(true) brErr := bz2.br.Err() if brErr != nil { err = brErr @@ -98,14 +106,14 @@ func (bz2 *reader) Read(buf []byte) (n int, err error) { return } -func (bz2 *reader) read(buf []byte) (n int, err error) { +func (bz2 *reader) readFromBlock(buf []byte) int { // bzip2 is a block based compressor, except that it has a run-length // preprocessing step. The block based nature means that we can // preallocate fixed-size buffers and reuse them. However, the RLE // preprocessing would require allocating huge buffers to store the // maximum expansion. Thus we process blocks all at once, except for // the RLE which we decompress as required. - + n := 0 for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) { // We have RLE data pending. @@ -148,34 +156,87 @@ func (bz2 *reader) read(buf []byte) (n int, err error) { n++ } - if n > 0 { - return - } + return n +} - // No RLE data is pending so we need to read a block. +func (bz2 *reader) read(buf []byte) (int, error) { + for { + n := bz2.readFromBlock(buf) + if n > 0 { + bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n]) + return n, nil + } - br := &bz2.br - magic := br.ReadBits64(48) - if magic == bzip2FinalMagic { - br.ReadBits64(32) // ignored CRC - bz2.eof = true - return 0, io.EOF - } else if magic != bzip2BlockMagic { - return 0, StructuralError("bad magic value found") - } + // End of block. Check CRC. + if bz2.blockCRC != bz2.wantBlockCRC { + bz2.br.err = StructuralError("block checksum mismatch") + return 0, bz2.br.err + } - err = bz2.readBlock() - if err != nil { - return 0, err - } + // Find next block. + br := &bz2.br + switch br.ReadBits64(48) { + default: + return 0, StructuralError("bad magic value found") + + case bzip2BlockMagic: + // Start of block. + err := bz2.readBlock() + if err != nil { + return 0, err + } - return bz2.read(buf) + case bzip2FinalMagic: + // Check end-of-file CRC. + wantFileCRC := uint32(br.ReadBits64(32)) + if br.err != nil { + return 0, br.err + } + if bz2.fileCRC != wantFileCRC { + br.err = StructuralError("file checksum mismatch") + return 0, br.err + } + + // Skip ahead to byte boundary. + // Is there a file concatenated to this one? + // It would start with BZ. + if br.bits%8 != 0 { + br.ReadBits(br.bits % 8) + } + b, err := br.r.ReadByte() + if err == io.EOF { + br.err = io.EOF + bz2.eof = true + return 0, io.EOF + } + if err != nil { + br.err = err + return 0, err + } + z, err := br.r.ReadByte() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + br.err = err + return 0, err + } + if b != 'B' || z != 'Z' { + return 0, StructuralError("bad magic value in continuation file") + } + if err := bz2.setup(false); err != nil { + return 0, err + } + } + } } // readBlock reads a bzip2 block. The magic number should already have been consumed. func (bz2 *reader) readBlock() (err error) { br := &bz2.br - br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is. + bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is. + bz2.blockCRC = 0 + bz2.fileCRC = (bz2.fileCRC<<1 | bz2.fileCRC>>31) ^ bz2.wantBlockCRC randomized := br.ReadBits(1) if randomized != 0 { return StructuralError("deprecated randomized files") @@ -316,6 +377,9 @@ func (bz2 *reader) readBlock() (err error) { if repeat > 0 { // We have decoded a complete run-length so we need to // replicate the last output symbol. + if repeat > bz2.blockSize-bufIndex { + return StructuralError("repeats past end of block") + } for i := 0; i < repeat; i++ { b := byte(mtf.First()) bz2.tt[bufIndex] = uint32(b) @@ -339,6 +403,9 @@ func (bz2 *reader) readBlock() (err error) { // doesn't need to be encoded and we have |v-1| in the next // line. b := byte(mtf.Decode(int(v - 1))) + if bufIndex >= bz2.blockSize { + return StructuralError("data exceeds block size") + } bz2.tt[bufIndex] = uint32(b) bz2.c[b]++ bufIndex++ @@ -385,3 +452,33 @@ func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 { return tt[origPtr] >> 8 } + +// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed, +// causing the bits in the input to be processed in the reverse of the usual order. + +var crctab [256]uint32 + +func init() { + const poly = 0x04C11DB7 + for i := range crctab { + crc := uint32(i) << 24 + for j := 0; j < 8; j++ { + if crc&0x80000000 != 0 { + crc = (crc << 1) ^ poly + } else { + crc <<= 1 + } + } + crctab[i] = crc + } +} + +// updateCRC updates the crc value to incorporate the data in b. +// The initial value is 0. +func updateCRC(val uint32, b []byte) uint32 { + crc := ^val + for _, v := range b { + crc = crctab[byte(crc>>24)^v] ^ (crc << 8) + } + return ^crc +} diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go index 7b227ac9f..ada1f9a00 100644 --- a/src/pkg/compress/bzip2/bzip2_test.go +++ b/src/pkg/compress/bzip2/bzip2_test.go @@ -6,6 +6,7 @@ package bzip2 import ( "bytes" + "encoding/base64" "encoding/hex" "io" "io/ioutil" @@ -62,6 +63,19 @@ func TestHelloWorldBZ2(t *testing.T) { } } +func TestConcat(t *testing.T) { + out, err := decompressHex(helloWorldBZ2Hex + helloWorldBZ2Hex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + hello2 := bytes.Repeat(helloWorld, 2) + if !bytes.Equal(hello2, out) { + t.Errorf("got %x, want %x", out, hello2) + } +} + func testZeros(t *testing.T, inHex string, n int) { out, err := decompressHex(inHex) if err != nil { @@ -155,3 +169,195 @@ const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8 const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400" const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d" + +const ( + digits = iota + twain +) + +var testfiles = []string{ + // Digits is the digits of the irrational number e. Its decimal representation + // does not repeat, but there are only 10 posible digits, so it should be + // reasonably compressible. + digits: "testdata/e.txt.bz2", + // Twain is Project Gutenberg's edition of Mark Twain's classic English novel. + twain: "testdata/Mark.Twain-Tom.Sawyer.txt.bz2", +} + +func benchmarkDecode(b *testing.B, testfile int) { + compressed, err := ioutil.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + b.SetBytes(int64(len(compressed))) + for i := 0; i < b.N; i++ { + r := bytes.NewBuffer(compressed) + io.Copy(ioutil.Discard, NewReader(r)) + } +} + +func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) } +func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) } + +func TestBufferOverrun(t *testing.T) { + // Tests https://code.google.com/p/go/issues/detail?id=5747. + buffer := bytes.NewBuffer([]byte(bufferOverrunBase64)) + decoder := base64.NewDecoder(base64.StdEncoding, buffer) + decompressor := NewReader(decoder) + // This shouldn't panic. + ioutil.ReadAll(decompressor) +} + +var bufferOverrunBase64 string = ` +QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3/// +////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD +oACUoDtndh0iQAPkAAAAaPWihQoCgr5t97Obju21ChQB0NBm3RbA7apXrRoBooAA +AhA+IAHWl2Us3O7t9yieb3udvd76+4+fd33nd3HO1bVvfcGRne6+3vfPvfc++995 +w7k973eJhasLVec970tzDNXdX28LoPXZ3H3K9z0s5ufWAfes49d5594c3dUYtI+2 ++h1dvtpRa+uvrVEAG9bl893RVEN7cWvroSqWjPMGgAQi7Gq8TJSgKKdjKFBIB9Ae +LqWxleu715eXe7ml9e5098Z6G1vr7t1QZ6ot76YzPd3j7333t2ql2Chm7XrA9ICQ +VF77z3rVBWqkSXtlfb099hyezAr6USbGpICTSCFAaqHrKo+tUnm32rpE4Ue+t2mj +bKUeipEqwc93EdhhTwmQpOhhesC9iqDSPNTWYNSnUtBdm1nsA0nqqNd7OWwDXtFL +ONmmA6Ubke26I9UblvWIPR5VOWOnctai443URunnDy77uVC59OfRvezlDu33Z7Ly +3NNuuHW63088xu3t3NHZhkZbG7tXRlj00qOtbaXTJUUdspTbABR9R6EUwQAEAAAA +EMEwRpoAAAABMmhoAAjBNNAaCMhponpoGpgJpk9TEyp6niGKZkAaAEfqMQ09U80p ++pMGSCKngIAAAAgAAg0AAJhGgABGCEaaTyTKeNI1PE0wkj01GajMSNPSZGnqbU9T +anlPUNAHqGQ0DQAMg9TamgAAYRU/IAAICAmjQJgjQBMEwp5DTSaaYmhTeqfplPID +U1T9TynoU82pT1NPU/VP0j1NHqRpk9TTR7SnqaNNGmmQAaAD1Aeo0PSAAAAaaBiK +eBAQBGgIABGQA0AmBNNBoaAgaJmpglPEyYap6npiTT0agGjJjUaaDTQAAAAAAM1A +9QAaAAAADU8iEAQAEyAJk0NNNJgIZTJ5E00YSemiaZNGm1MpGNJ+lPU9qm9U2RDM +oY0EzJB6h6nqDID1NMBDDRpo1AGNAjCMmhkMgaYSJIgAAAQyAAEyBoATECCNhTT0 +U/IZAmCM1DSTxkzUE8p6NDaGiZGJqntTFHvUyU9qPQp7Kn5GgKNPU9QAGg9QAAA3 +wz0Pk/g/m/m9P9H4vxv2+dH3gCS8nhbbbbbYxtgNsBsG0m2MbG0NNtsbYNsaY0wb +bBibGmm22mxptNpsaGNDTY02JsG0MY0xg2MaYNNDbGwG0L5vsK/F9DO+EAA447Kq +p7Wdf6Y+5c20T7DfHyMXIzRKrZexw72uiQI+y55vOe52xpqbCLC2uR20JdER7Zvr +7ufuKb6zhiBxLuj0eA27v8RpMLucw9Ohwcizi2wrpt+yU1FdpM7ZYPcwS3XTef+A +Wzjxwhdrgw3aH1LeC1eZW900x8V9Nv4hTPXp4l067P/4ANVZFF/imOe/d5bdueam +/DFFokQWnFaU+ZqLBCM+d0PialJQWnLqRQZk/KhfbbYc2pCUTgffcSYbrCM1N+8l +HU6gSz+h2GJXs+tbrNviL83M97X0vcTn/F82P8wen8/3/h3sHY+sf9CSej9ThYTV +3lQ+FUHpfpGD4kv7dYMV995dpDX/y3xR8FoXx1bjUxBTNxuutwQ/h/Eedn9wpn6w +E3+ND8YhN1HSriIxRE/6uFyMv6/oC6Elarw3aHMMqHJkGiiz6tejmvnYLQa+Qm6G +deZ7jXTZV6NlpocgDnRdimS06bTYSkvPAL/xoWNLkX6N6VljU0dfKSBmm2uZE/xu +sutQ1EdP7GdjhglIq4xlOFUFEQpmX+xx7R8y6c0GSAaqusOjNZwxZRudOvmXm1tZ +T+YnbeB2ir9eiHNrtJNSLD/J/WDyuQpwBUtLKo0krccY/wIILP7f86teb9Z/9oyz +OX05qEWbObfhpRw+9+rCvp/35ML8KX3aHaI0n+tudbFRsV5FLW+Oa8ruLN4peyVL +DWjTHrXNthq/s7zAJYMeFJZkZt5mT9rfpH+5g3nc+piOSZ+J5nHtOnKI7Ff8Xl+j +0t76XTNucCHQ6whav1OHdF53TY5wuv5OzvrdnxoId8fTyUvERr0ERINu/8XxZZ5f +B5/kTZ8bBO0wv54Jp+ED/GQI8lZHzIQCP3vfQhwnCTj9TvITic7P4mYLDbH3fyzR +i+6EajCcpXLWSGf+ZXkOrWspDWDhXtEKas0v3UqWksqgY1rTj45krX4KihN+daXs +pZl5WPlta5p06CX6Xm2SfzqkMw12/3ix1bpnnZ+kFeBNX7A+E9zzG6OZaN78GOpl +9Ht/eZn9PqWdav852zr0zqkDK2H5IjdvNah+b1YVGdQGzwR4Nw+f13yEKnV+y66W +djfq7zWp7m5w+hzfv+Ly8O7oet5Vvd8/wQvO7qzOZ2vjf9X8Tj8PnMb/nc/nKqRR ++ml4UEhOOwfCeJEEI109CMYSh91iAJqPjMyH6KjrPD7W25llZVcREYNCTg6htbQt +M38wYoquCWP6tdKYlVIv14xTNUeUf4El/FunCf6csZkmv+9tfWx7t59wuKIa3saU +tZs9M+3HFOZtz3OLg/Unoaj9BYazYqA78xBU9tZzrtmF/rQL9CGJt90o/oYnSfcS +SL3haaw351LXWQ1XOsv1SmH3v6ymuxEpPPnEDmBELaTYsvvMIWJsmPZFFww++Kd7 +s/Jo0JFeUU7uNtI+gVosAIpVVuWfI/9tOIycz7I5Z7zjV+NR2OuZbYtW5F08KX4o +2k/xuJIchcNFPtxPfw9dkDgscRbMckyFMrzuZ3IvrcGzk0J6iI5ytrv37bGpAXMz +WK9mMMPebepNevmLjjo/QWoM968Sjv7ldlPS5AinHcXwsFv6dmmh8lJt7UOJWoKu +lMD1cB2ksIGpMdv8iuqR42Rn/kn+17BhhUZcwDBaUXVdX6bKW7fxlUYbq+mlqIcf +a9v8HF87M9ANbi9bq9onf9TD7nQ6Xf6vZci8TBPX+/GI0He6j31fTVQYW+NsQxvO +J8xrx+e58CCLQNjxeIyPt+F+qk/QMiXw+LyxGVkV/XcGQT9X03jSDP6beJ5QG1JW +9Q3qLv/YixWI7gPV9Mrhf2oRYTc/9KLFRhkE3SjKOTKuSSBKQ24fI+hEznamH71D +66Hwez8/0et7AtTv9zvamv2OD5He6fMV4k+ePl6+qPfO5CdHtK+eCDZL5+4f5yrl +gTcRFiq8fXbc5IaI5fbbc1KMM/2T0Mr7+Hwaco6FtXm0fmhCgTZRqY4pKiEIfmaz +QwHNOOCrtMJ2VwsyMumt7xsOolGnizRev6lILH43qPcczQM7Gc5zRin80YvFt1Qm +h/57Z0auR2h0fuX50MBO4XQ+26y5l6v4j902R66c0j3z2KHstKQ04J/h6LbuNQE4 +D6cu/lyfK69DxxX8wb8XaQkMUcJdo1LzqUGDAb3Kfn/A3P/JYc99MO9qv67+SxWb +wYTyqKdWTd+1KbR/Rcn0Io5zI/QquX7FA1bxfMytjQ/X+l0fh0Pf+Hx97meH4fQL +7/T8/sdTm9Tn8nELvedyhydLlPPTScINdXyLIq9wgIJr4fWPbp9ZhFh/56fdSgOG +HDXg+gkXsN2Rddr4HQ5P3u+RhLzmSjhzoqY5EsPC4QvRlX9JXjB84rPV5USR66qa +/kjw4156GJnzoXtydKJE53t6PHfZWO+3ujsfI6iAdshc7OFzGXiZB9PtItKodhYq +nABkTKdcpu4+TOpf9h5piX5slsaBjkeTnj/Ba02ilboQfcDVigxrYn/iTH5ySWUW +/lHtg78s5UZM8sErwhNe3N3w+6ZOMnU+5i86/xFNtqZfDdXTGy1H3PzGbdtZXYT+ +Ixx2vpwBYzbPVYHxKosM5rPiVmcTllI9nuoSfeh9ib4foFWauOpvdmhBDqpTpKTX +u8EO2l2Z195G2RIV7TlKSxGWjR5sl/nALu1uzBeLd9zpSujzMTd1uTX9Qk/Q1S+r +vaW6bm8qqPO4jb6Wx6XIkm321nrIF6Ae25d1+Dpv/P5G4NoLd2j6/EtENC3FeR5z +oo7bA+tI8yEQRhiF0z1FlJXLD5ZbhNNWQm/j/IbzRfh8JtOFZU7ruShLvHXysW9S +9V909tr9jn8/E/Hb5N/1NVNHnZu2HIUvJvHJiHd2ucmeI9PWUMnppmE65GQ5E9xV +ZRlGEH0X85EvmHyEupkMrCC0oMv9RCq+/H8gcfpe00Hs/S+regT5p58cyYomh93v +qvuw/A06BE/wzJESuYbN9pqYpoXqXFemW1NksHEJ2w+PYMJ27WJyD5FpaXB85VaW +qMOhDfO8E3QdH8ybyKt/UgI8/tDGpFbyOlaVdIv1FXJhoLp8soAA4Djg6/KZ066N +ZFYuS8WdjpSZGP4/Lw+1yaXlzNznc/k2uHe2uXP3uFuPcHx+Dm44utxldoO1uBPy ++jzOs14+MIgOjOHMVNqAbMd8fUedLlhJMCfMtm4uz01enLNKcMrtLlPIR37Yukh1 +YEMXYpm7eU4XU+j+Jj3pDyaXtXs+p1fWfTN/cy9/Oxs4umUXQ4uHh1kObtayDJ56 +/QMxiHobjHNKuKfMxsrYEwN+QVIyVjAwMDYuMjQ1AAA9IwJniiBLRkZDAAAXt0Ja +aDQxQVkmU1lZtwytAACLf///////////////////+//////v//////////bv78// +/+AXO133uwO2xB2UxIvbKXrCqCoURUBL2ytFI82AFdcOwMhVTHtk5rD3szEVNYD4 +aIQINCaMRoTaSn7SbSMJiYmEwieTEp+psqbMCp+VNPaFNpqbBNR7UmanlPUeKfqm +j1PU0/VPU08o9Q9EeKHlPJtKbYqeTCYhN6U9T1NH6mp+lPyoGNTI/Knkyg1MggAg +CaMEyQnqZoaaRtRtJpppppoDaTR6hpphGh6mmgHpMQBpkGTTEAAaAAAA00AZDag0 +ADIBkGgABqemiRNTI0k8aU0PRGRoAZlP0UAAAGgAAAyAADQaAAAaAAAAAAAAAAAA +AaAAAAM0kgRBJ5MlPFP1Gj0jTTTUaekxNAbUGjTQMgaZANNAAAAaAADTQAAAAAAA +ANAA0AAANADQ0QAAAAAAAAAaGgAAAAAAABoA0AAA0AAAAAAAAAAAAANAAAAAkSEI +aTRpomp5DUxNNDTJPTKaep6T09Kemmo2JG0aTQ9ENogaaGhkABo0NHqaBoDTI0DC +Gj0gNAMhoDQ9QMQNAGQAaDDwyMPIMlbG1vhRBTFo6JksSupgpAjPbY0ec02IGXjb +eS+FBsh01+O4ZOaD+srUZCFaT4DRjVDLx7uKIsFtESIDUg1ZkhyCSYov05C00MtR +BdNNa/AYPGOQZWcs+VegXOPrkushFbZ3mBoRD6WamClkpBaHZrUhUl02bIfRXX4w +b3/9cW9nHDVxh2qFBxqgRKfmq7/Jc/tdJk05nVrGbckGVy2PnIy30CDhpWmqrSot +K2bOnX0NbP1iy2cd0Na0ZmbRstm4MzMzbbMySTd35F7f+zPP8DC+NJLYcakkkkRd +NZlupJt3OMFoDAD2g+N3FAMCydhIpoRHRQAdFI5nNg4ugEXHCYxkMyGCwtaJmial +y0IMlpSYYM/weXNJAhFqS0GNmvaPEtYGjbvaucMdklOTmBX1vfVAkTYB1uXCSK64 +UNIixOqRKLuRCFtqIQtgwqaFrCkIYbbewErWABa+VGADWsJXJjfx5SJViLuwiGXq +Ru6vCuwmU5CJiJz3UiBpmLv0r2wskxUhY4tzPVGQ9RMXJl65eLSNwZVwaSyGZ9Cm +A3jztQUUpFeUryBTskW95iVwRMFrhBCwZBAFJBZvhMEMNoDJJlUoIhQkAkjbExp2 +YZio+ZYeAZUwmH1qUbdQixmxf0+61+aVgJ1hwxsO1yG3hFx4pfjc09ITVht0pG8u +FtVFhPa1KE0gTRUSVXywkITucqk0Waz5Fs6qJpVHYdNrbYRFxnFsQGY1qmsTLjK6 +4QX5Rddo6krM/Bx9CqIAKq4CzVQYHrmIAd2EBhYmwVYwLvhzKIUrc2EirnGIvyuD +O4YZDSwsVTA0BpVvUOjDErkCraBoSutcKwUSSLGhVvNYHLz3klgZD++wWsa/swLw +gvNDY2De+sncOv8X2lq4HD95ZdwPuTIMXCwSbg4RrIqv+L0y6F17pqDecyQYPEj3 +iN/0BBeWZlJAyBMi5U3Q1zAlsK8IlDhaXGmvZrgISq5CfNjmUgxDeMggOKqxu4sI +OrilS49Lkl1J3u3GjXTuH+rX+4ccyFAQnizCpPClcY77F59j63S6fr5vr+y99tuO +7Ox7Wg/ljwhdyaK4xMmXczeJbx7x07htJNtC4xcQfAtvzeznLrN6MN/ILIBOI65I +qIA2D5fHHj1XN4aN6TvOjWDaSbSWqxCSCvXUpzkNJAkWXAuTwF8k5uSJvQj/rVo0 +hAhEMEIYkCRGx9AX+byIuXWlLMbbVeliHNUL5AQYmNwLFu4SkmGD+UWtBMyVHQOQ +ss0ggoVKSKOBUgnVS6ljt7WE1qXqJJ4QA1pEwYNLEaguEE1LtPNoVr5WzjbSbWPk +V9OW3y9IneUDLoIV5pAkEFTEFGFVjeTFxtpzBBfGgycBxVCdz8eESBIzsamRchAa +TQunQH8DHnpfod9QuAuRvc7JBlKUCYmCjMvynLcxIFohxCaYrDvGw4QbXZB7oWQ7 +hpoGlz23ayDfB8NrRRzdilsEQyQniu9ASLQg7RrGZnoTr1ai12IbCEUCGdFq03P5 +nBnRFAGmisQGcyykV9gKtcVMWLhCuVmXg86dndn7slUpRNSSEAU20oaWIm1maFTu +E0DT4gTbg0nuhjtz3kNOz+i7sBm0bkXjxQWuLqlZEmp60ZTyRZJDUqKSEKg6hqcy +ERxdU22CSNOO10RYUUiDVpKhPNdKTOIE1thp02sBNoNTFSht8WJtaBQ09qN3jd5r +dOLX4IA5fevRyCCzDgRXfV4wzik4KROjmxmTMglBySlIMEzcXehnDXCRiZSlvwA2 +0YsIOROcm4UrIRFxJHctJH7OdN5u1aHVHb5UaLHpv48NgmFRE56KTSoaWunqm2st +S0mrAdOiqcR12PWVbdVRJKcQ0DQuhwlAPcRtpxN3D4kbXJjToSYJIFw406G2CSaK +jQMIJPZGlQmgyFhoCSzeGS1VSq5SKKQQxs5RqKUcVUNY57YUETb4mXzV84SPngKi +nsce0mXByZq5BKUA9puHZWLNwQIYuDaJUNgG+E01E3pDYVNLKYQ0hsVesgV5gZY0 +htDsRdGtm0+iGnkN6+Ea9YJtUZNAkx2GgSoix12nTW0avTUfxR3oYcpvZ7IdtABE +UhBcjG4qZtDZsS1JQHys243vhLaDTSvvTeBiJA2tmokqECTBcSOCAGkAxMKlVAva +4IsLRaBBqhxDbcGtgdw03mFcLUaFuhtKuuEIEkUleJQwby/zwu9uvvZK4xTV+ECM +a8lmzxKmqkBggYK1+xPdbmJclm6tSZhE/OSJtCEjs+unJIQkT9hCWgBJqGMS07Eh +AJNmBiuVEVdTyjkIJkavuZmx2sJF13htgEZUCC23lZFOE6gWbM9WyYNJTM8yCQrb +0Sx3OQvBML5cRATAQkSQkAJOAhoxpQkNi4ZiEVDbdtJAME0RXNDXGHA3M3Q0mm1o +IEwbWpaM1DQCSMbGRCAu3iRIQiT6RlBpT1n3tfwvUXz3gIVlx3mEximY/kZW1kNG +sgEJIrBisaEoGYPJ+1CQUYFBw+eGEHJQBpNHjErXUJY2iWHQ30hXwFBuMSxQ2lB5 +bg+/LX3euG6HsHUB1lFvBvaiaBrITVwkCTa1d0s9CHZCiDZjbWReKyrpPE2oSa7o +LPrR4BJvys9ttjUpzETSSMxh8vsr9dXTwKBtK+1xCTGDQmNIaE29HmHdS5GSxpya +MismcAUSEgSxHBrKtgsZzduG7vHZn16l3kFkVITtENIzS2JsiBwFTDlhgexsjBHv +5HXOYxHBzoSDCcPZ0ctvkY9aS5XpoQuFYkGJgCsqjJZeUMNUEpDSbKcnUc1PifIA +CbR2UoXawBlspkEBr9HBfvUi/MUakZVOf1WKYrqSaIXce62JOyhJLq3qJBloTA0F +VbILEtM+heFmNRCFt70GJrExVJri0ArYbCRbADSGDBpBXxxb/6fo+s3C7uaL7RjM +LV2IQBNrAJrKFeJwTsPnxbAsemirUx2lk1kaxschzdK4TQNJN5wQnolIFg401OZ4 +2na11LnT3lR+1k1TMJhiAjXMk0F1ooHnYlt9LKfJ3ZIOmeY+2l9bUQHWFNGyEyfj +EAcu3kpGLq0Ez7XOS+EpAASRQTAYMATfVQibHLTT30zG732+pNe9za1JNt8sNJYn +RjWuJ6jL5ILV0rcd9vT7X9fObvcXitpvJ2XBJE+PhX2HaTkyWeF9pwnlQNrTe9hV +tzhA+ihZrDrHNmLcQjZbnv/IMubqq8egxY80t5n6vZ6U5TR6U9uZJvai1xtqAyCR +NWkW52m00rDTEuO6BA4q2RHDWwbETF55rRsWLIgNW9qJCyMHPbTM/dMBmWMQSMxz +4M2pRzt47SICxA327UqSCEERqMFybmYi3nUxePtLgHYplqRiw4ynMbXd/kiQ0LE0 +PKJSSCXA42ymziCpAxNWflzpzQdJZusahRFr6t6m+4p273/Taj7k+hZyNgBAgXAY +8F7pTts6orLb8IA6o4TOwkwQYmKvKu9VwMrE7+GUhVIAgY9a8DyQMiDBkEAwh7S1 +KgCBfao8DK1CwSS8Z3WjL5MEgt93z2koUQCD/YxMBppiCMp7SDVSmkkIHptfGpeh +t+M13Ccv1tavIASFiaQl6rBz3K4N3DSGwNkCibrvEAC0fQirOWnc4NVbcLKpFG1l +NQXF/eqdT79wq1Mvlap3QSCLhcD2D3fCkKVWid4aSjtp9FOX1Uaf7P9eT93zd9Sv +mj2yNLRUGzyI/0oONNSzmmkvJ5Cq2X2CdldIWMGZO57RJ8oyATAWTQmRmNkfh0Sx +uuR/J9oUsomVy1AEntc0dlPivkqBkBqrxU3j5PnWkaI3ZRGc0gg9spCQEISh4xEU +pMhVrnmDQLfLP8Ouqpx917MAw7hkjQk6BJFTAbXDsz3LSHIxo/gB8qrA1vbvdZZh +LtR0frJdfdppX8nAQX/TAxOQ8+H6yw8a9i7/zJEfSYIhop59N/fhcWW2F14cj2Xc +fyHaZ04lTO4uPnly91jwuFPaREuZVp8AxImIhlkxkAN61tWdWG7tEbaCgszh6VIz +ThFnHo2Vi8SQXPrXCN7J9Tc9ZYiAYqoThV/u6SYsea5aZL8deOvKBQCgZZuIxX1z +4EnfcqG176vY4VqMBIC4pMJz0WcHJYqN+j7BiwGoMBwExrIdTB7q4XIFLotcIpS0 +1MqyVsesvoQq7WObmGQXdMliMirSLcDuSx8Qy+4pIBgGDIyMp1qbonnGdcHYvU8S +O0A8s/iua5oFdNZTWvbVI4FUH9sKcLiB3/fIAF+sB4n8q6L+UCfmbPcAo/crQ6b3 +HqhDBMY9J0q/jdz9GNYZ/1fbXdkUqAQKFePhtzJDRBZba27+LPQNMCcrHMq06F1T +4QmLmkHt7LxB2pAczUO+T2O9bHEw/HWw+dYf2MoRDUw= +` diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go index f755019bb..8f6b0c9ca 100644 --- a/src/pkg/compress/bzip2/huffman.go +++ b/src/pkg/compress/bzip2/huffman.go @@ -33,14 +33,17 @@ const invalidNodeValue = 0xffff // Decode reads bits from the given bitReader and navigates the tree until a // symbol is found. -func (t huffmanTree) Decode(br *bitReader) (v uint16) { +func (t *huffmanTree) Decode(br *bitReader) (v uint16) { nodeIndex := uint16(0) // node 0 is the root of the tree. for { node := &t.nodes[nodeIndex] - bit := br.ReadBit() + bit, ok := br.TryReadBit() + if !ok && br.ReadBit() { + bit = 1 + } // bzip2 encodes left as a true bit. - if bit { + if bit != 0 { // left if node.left == invalidNodeValue { return node.leftValue diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go index 0ed19dec3..b7e75a700 100644 --- a/src/pkg/compress/bzip2/move_to_front.go +++ b/src/pkg/compress/bzip2/move_to_front.go @@ -15,10 +15,11 @@ type moveToFrontDecoder struct { // Rather than actually keep the list in memory, the symbols are stored // as a circular, double linked list with the symbol indexed by head // at the front of the list. - symbols []byte - next []uint8 - prev []uint8 + symbols [256]byte + next [256]uint8 + prev [256]uint8 head uint8 + len int } // newMTFDecoder creates a move-to-front decoder with an explicit initial list @@ -28,12 +29,9 @@ func newMTFDecoder(symbols []byte) *moveToFrontDecoder { panic("too many symbols") } - m := &moveToFrontDecoder{ - symbols: symbols, - next: make([]uint8, len(symbols)), - prev: make([]uint8, len(symbols)), - } - + m := new(moveToFrontDecoder) + copy(m.symbols[:], symbols) + m.len = len(symbols) m.threadLinkedList() return m } @@ -45,34 +43,29 @@ func newMTFDecoderWithRange(n int) *moveToFrontDecoder { panic("newMTFDecoderWithRange: cannot have > 256 symbols") } - m := &moveToFrontDecoder{ - symbols: make([]uint8, n), - next: make([]uint8, n), - prev: make([]uint8, n), - } - + m := new(moveToFrontDecoder) for i := 0; i < n; i++ { - m.symbols[i] = byte(i) + m.symbols[byte(i)] = byte(i) } - + m.len = n m.threadLinkedList() return m } // threadLinkedList creates the initial linked-list pointers. func (m *moveToFrontDecoder) threadLinkedList() { - if len(m.symbols) == 0 { + if m.len == 0 { return } - m.prev[0] = uint8(len(m.symbols) - 1) + m.prev[0] = uint8(m.len - 1) - for i := 0; i < len(m.symbols)-1; i++ { + for i := byte(0); int(i) < m.len-1; i++ { m.next[i] = uint8(i + 1) m.prev[i+1] = uint8(i) } - m.next[len(m.symbols)-1] = 0 + m.next[m.len-1] = 0 } func (m *moveToFrontDecoder) Decode(n int) (b byte) { diff --git a/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2 b/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2 Binary files differnew file mode 100644 index 000000000..0bd61a6d4 --- /dev/null +++ b/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2 diff --git a/src/pkg/compress/bzip2/testdata/e.txt.bz2 b/src/pkg/compress/bzip2/testdata/e.txt.bz2 Binary files differnew file mode 100644 index 000000000..65bf3b4c3 --- /dev/null +++ b/src/pkg/compress/bzip2/testdata/e.txt.bz2 |