summaryrefslogtreecommitdiff
path: root/src/pkg/compress/bzip2
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/compress/bzip2')
-rw-r--r--src/pkg/compress/bzip2/bit_reader.go8
-rw-r--r--src/pkg/compress/bzip2/bzip2.go167
-rw-r--r--src/pkg/compress/bzip2/bzip2_test.go206
-rw-r--r--src/pkg/compress/bzip2/huffman.go9
-rw-r--r--src/pkg/compress/bzip2/move_to_front.go35
-rw-r--r--src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2bin0 -> 124744 bytes
-rw-r--r--src/pkg/compress/bzip2/testdata/e.txt.bz2bin0 -> 43149 bytes
7 files changed, 366 insertions, 59 deletions
diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go
index ab1d60651..32d1036ae 100644
--- a/src/pkg/compress/bzip2/bit_reader.go
+++ b/src/pkg/compress/bzip2/bit_reader.go
@@ -77,6 +77,14 @@ func (br *bitReader) ReadBit() bool {
return n != 0
}
+func (br *bitReader) TryReadBit() (bit byte, ok bool) {
+ if br.bits > 0 {
+ br.bits--
+ return byte(br.n>>br.bits) & 1, true
+ }
+ return 0, false
+}
+
func (br *bitReader) Err() error {
return br.err
}
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go
index 3dc8c6206..82e30c7c9 100644
--- a/src/pkg/compress/bzip2/bzip2.go
+++ b/src/pkg/compress/bzip2/bzip2.go
@@ -22,14 +22,17 @@ func (s StructuralError) Error() string {
// A reader decompresses bzip2 compressed data.
type reader struct {
- br bitReader
- setupDone bool // true if we have parsed the bzip2 header.
- blockSize int // blockSize in bytes, i.e. 900 * 1024.
- eof bool
- buf []byte // stores Burrows-Wheeler transformed data.
- c [256]uint // the `C' array for the inverse BWT.
- tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
- tPos uint32 // Index of the next output byte in tt.
+ br bitReader
+ fileCRC uint32
+ blockCRC uint32
+ wantBlockCRC uint32
+ setupDone bool // true if we have parsed the bzip2 header.
+ blockSize int // blockSize in bytes, i.e. 900 * 1024.
+ eof bool
+ buf []byte // stores Burrows-Wheeler transformed data.
+ c [256]uint // the `C' array for the inverse BWT.
+ tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
+ tPos uint32 // Index of the next output byte in tt.
preRLE []uint32 // contains the RLE data still to be processed.
preRLEUsed int // number of entries of preRLE used.
@@ -50,12 +53,14 @@ const bzip2BlockMagic = 0x314159265359
const bzip2FinalMagic = 0x177245385090
// setup parses the bzip2 header.
-func (bz2 *reader) setup() error {
+func (bz2 *reader) setup(needMagic bool) error {
br := &bz2.br
- magic := br.ReadBits(16)
- if magic != bzip2FileMagic {
- return StructuralError("bad magic value")
+ if needMagic {
+ magic := br.ReadBits(16)
+ if magic != bzip2FileMagic {
+ return StructuralError("bad magic value")
+ }
}
t := br.ReadBits(8)
@@ -68,8 +73,11 @@ func (bz2 *reader) setup() error {
return StructuralError("invalid compression level")
}
+ bz2.fileCRC = 0
bz2.blockSize = 100 * 1024 * (int(level) - '0')
- bz2.tt = make([]uint32, bz2.blockSize)
+ if bz2.blockSize > len(bz2.tt) {
+ bz2.tt = make([]uint32, bz2.blockSize)
+ }
return nil
}
@@ -79,7 +87,7 @@ func (bz2 *reader) Read(buf []byte) (n int, err error) {
}
if !bz2.setupDone {
- err = bz2.setup()
+ err = bz2.setup(true)
brErr := bz2.br.Err()
if brErr != nil {
err = brErr
@@ -98,14 +106,14 @@ func (bz2 *reader) Read(buf []byte) (n int, err error) {
return
}
-func (bz2 *reader) read(buf []byte) (n int, err error) {
+func (bz2 *reader) readFromBlock(buf []byte) int {
// bzip2 is a block based compressor, except that it has a run-length
// preprocessing step. The block based nature means that we can
// preallocate fixed-size buffers and reuse them. However, the RLE
// preprocessing would require allocating huge buffers to store the
// maximum expansion. Thus we process blocks all at once, except for
// the RLE which we decompress as required.
-
+ n := 0
for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
// We have RLE data pending.
@@ -148,34 +156,87 @@ func (bz2 *reader) read(buf []byte) (n int, err error) {
n++
}
- if n > 0 {
- return
- }
+ return n
+}
- // No RLE data is pending so we need to read a block.
+func (bz2 *reader) read(buf []byte) (int, error) {
+ for {
+ n := bz2.readFromBlock(buf)
+ if n > 0 {
+ bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n])
+ return n, nil
+ }
- br := &bz2.br
- magic := br.ReadBits64(48)
- if magic == bzip2FinalMagic {
- br.ReadBits64(32) // ignored CRC
- bz2.eof = true
- return 0, io.EOF
- } else if magic != bzip2BlockMagic {
- return 0, StructuralError("bad magic value found")
- }
+ // End of block. Check CRC.
+ if bz2.blockCRC != bz2.wantBlockCRC {
+ bz2.br.err = StructuralError("block checksum mismatch")
+ return 0, bz2.br.err
+ }
- err = bz2.readBlock()
- if err != nil {
- return 0, err
- }
+ // Find next block.
+ br := &bz2.br
+ switch br.ReadBits64(48) {
+ default:
+ return 0, StructuralError("bad magic value found")
+
+ case bzip2BlockMagic:
+ // Start of block.
+ err := bz2.readBlock()
+ if err != nil {
+ return 0, err
+ }
- return bz2.read(buf)
+ case bzip2FinalMagic:
+ // Check end-of-file CRC.
+ wantFileCRC := uint32(br.ReadBits64(32))
+ if br.err != nil {
+ return 0, br.err
+ }
+ if bz2.fileCRC != wantFileCRC {
+ br.err = StructuralError("file checksum mismatch")
+ return 0, br.err
+ }
+
+ // Skip ahead to byte boundary.
+ // Is there a file concatenated to this one?
+ // It would start with BZ.
+ if br.bits%8 != 0 {
+ br.ReadBits(br.bits % 8)
+ }
+ b, err := br.r.ReadByte()
+ if err == io.EOF {
+ br.err = io.EOF
+ bz2.eof = true
+ return 0, io.EOF
+ }
+ if err != nil {
+ br.err = err
+ return 0, err
+ }
+ z, err := br.r.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ br.err = err
+ return 0, err
+ }
+ if b != 'B' || z != 'Z' {
+ return 0, StructuralError("bad magic value in continuation file")
+ }
+ if err := bz2.setup(false); err != nil {
+ return 0, err
+ }
+ }
+ }
}
// readBlock reads a bzip2 block. The magic number should already have been consumed.
func (bz2 *reader) readBlock() (err error) {
br := &bz2.br
- br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is.
+ bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is.
+ bz2.blockCRC = 0
+ bz2.fileCRC = (bz2.fileCRC<<1 | bz2.fileCRC>>31) ^ bz2.wantBlockCRC
randomized := br.ReadBits(1)
if randomized != 0 {
return StructuralError("deprecated randomized files")
@@ -316,6 +377,9 @@ func (bz2 *reader) readBlock() (err error) {
if repeat > 0 {
// We have decoded a complete run-length so we need to
// replicate the last output symbol.
+ if repeat > bz2.blockSize-bufIndex {
+ return StructuralError("repeats past end of block")
+ }
for i := 0; i < repeat; i++ {
b := byte(mtf.First())
bz2.tt[bufIndex] = uint32(b)
@@ -339,6 +403,9 @@ func (bz2 *reader) readBlock() (err error) {
// doesn't need to be encoded and we have |v-1| in the next
// line.
b := byte(mtf.Decode(int(v - 1)))
+ if bufIndex >= bz2.blockSize {
+ return StructuralError("data exceeds block size")
+ }
bz2.tt[bufIndex] = uint32(b)
bz2.c[b]++
bufIndex++
@@ -385,3 +452,33 @@ func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
return tt[origPtr] >> 8
}
+
+// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed,
+// causing the bits in the input to be processed in the reverse of the usual order.
+
+var crctab [256]uint32
+
+func init() {
+ const poly = 0x04C11DB7
+ for i := range crctab {
+ crc := uint32(i) << 24
+ for j := 0; j < 8; j++ {
+ if crc&0x80000000 != 0 {
+ crc = (crc << 1) ^ poly
+ } else {
+ crc <<= 1
+ }
+ }
+ crctab[i] = crc
+ }
+}
+
+// updateCRC updates the crc value to incorporate the data in b.
+// The initial value is 0.
+func updateCRC(val uint32, b []byte) uint32 {
+ crc := ^val
+ for _, v := range b {
+ crc = crctab[byte(crc>>24)^v] ^ (crc << 8)
+ }
+ return ^crc
+}
diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go
index 7b227ac9f..ada1f9a00 100644
--- a/src/pkg/compress/bzip2/bzip2_test.go
+++ b/src/pkg/compress/bzip2/bzip2_test.go
@@ -6,6 +6,7 @@ package bzip2
import (
"bytes"
+ "encoding/base64"
"encoding/hex"
"io"
"io/ioutil"
@@ -62,6 +63,19 @@ func TestHelloWorldBZ2(t *testing.T) {
}
}
+func TestConcat(t *testing.T) {
+ out, err := decompressHex(helloWorldBZ2Hex + helloWorldBZ2Hex)
+ if err != nil {
+ t.Errorf("error from Read: %s", err)
+ return
+ }
+
+ hello2 := bytes.Repeat(helloWorld, 2)
+ if !bytes.Equal(hello2, out) {
+ t.Errorf("got %x, want %x", out, hello2)
+ }
+}
+
func testZeros(t *testing.T, inHex string, n int) {
out, err := decompressHex(inHex)
if err != nil {
@@ -155,3 +169,195 @@ const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8
const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400"
const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d"
+
+const (
+ digits = iota
+ twain
+)
+
+var testfiles = []string{
+ // Digits is the digits of the irrational number e. Its decimal representation
+ // does not repeat, but there are only 10 posible digits, so it should be
+ // reasonably compressible.
+ digits: "testdata/e.txt.bz2",
+ // Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
+ twain: "testdata/Mark.Twain-Tom.Sawyer.txt.bz2",
+}
+
+func benchmarkDecode(b *testing.B, testfile int) {
+ compressed, err := ioutil.ReadFile(testfiles[testfile])
+ if err != nil {
+ b.Fatal(err)
+ }
+ b.SetBytes(int64(len(compressed)))
+ for i := 0; i < b.N; i++ {
+ r := bytes.NewBuffer(compressed)
+ io.Copy(ioutil.Discard, NewReader(r))
+ }
+}
+
+func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
+func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) }
+
+func TestBufferOverrun(t *testing.T) {
+ // Tests https://code.google.com/p/go/issues/detail?id=5747.
+ buffer := bytes.NewBuffer([]byte(bufferOverrunBase64))
+ decoder := base64.NewDecoder(base64.StdEncoding, buffer)
+ decompressor := NewReader(decoder)
+ // This shouldn't panic.
+ ioutil.ReadAll(decompressor)
+}
+
+var bufferOverrunBase64 string = `
+QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
+////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD
+oACUoDtndh0iQAPkAAAAaPWihQoCgr5t97Obju21ChQB0NBm3RbA7apXrRoBooAA
+AhA+IAHWl2Us3O7t9yieb3udvd76+4+fd33nd3HO1bVvfcGRne6+3vfPvfc++995
+w7k973eJhasLVec970tzDNXdX28LoPXZ3H3K9z0s5ufWAfes49d5594c3dUYtI+2
++h1dvtpRa+uvrVEAG9bl893RVEN7cWvroSqWjPMGgAQi7Gq8TJSgKKdjKFBIB9Ae
+LqWxleu715eXe7ml9e5098Z6G1vr7t1QZ6ot76YzPd3j7333t2ql2Chm7XrA9ICQ
+VF77z3rVBWqkSXtlfb099hyezAr6USbGpICTSCFAaqHrKo+tUnm32rpE4Ue+t2mj
+bKUeipEqwc93EdhhTwmQpOhhesC9iqDSPNTWYNSnUtBdm1nsA0nqqNd7OWwDXtFL
+ONmmA6Ubke26I9UblvWIPR5VOWOnctai443URunnDy77uVC59OfRvezlDu33Z7Ly
+3NNuuHW63088xu3t3NHZhkZbG7tXRlj00qOtbaXTJUUdspTbABR9R6EUwQAEAAAA
+EMEwRpoAAAABMmhoAAjBNNAaCMhponpoGpgJpk9TEyp6niGKZkAaAEfqMQ09U80p
++pMGSCKngIAAAAgAAg0AAJhGgABGCEaaTyTKeNI1PE0wkj01GajMSNPSZGnqbU9T
+anlPUNAHqGQ0DQAMg9TamgAAYRU/IAAICAmjQJgjQBMEwp5DTSaaYmhTeqfplPID
+U1T9TynoU82pT1NPU/VP0j1NHqRpk9TTR7SnqaNNGmmQAaAD1Aeo0PSAAAAaaBiK
+eBAQBGgIABGQA0AmBNNBoaAgaJmpglPEyYap6npiTT0agGjJjUaaDTQAAAAAAM1A
+9QAaAAAADU8iEAQAEyAJk0NNNJgIZTJ5E00YSemiaZNGm1MpGNJ+lPU9qm9U2RDM
+oY0EzJB6h6nqDID1NMBDDRpo1AGNAjCMmhkMgaYSJIgAAAQyAAEyBoATECCNhTT0
+U/IZAmCM1DSTxkzUE8p6NDaGiZGJqntTFHvUyU9qPQp7Kn5GgKNPU9QAGg9QAAA3
+wz0Pk/g/m/m9P9H4vxv2+dH3gCS8nhbbbbbYxtgNsBsG0m2MbG0NNtsbYNsaY0wb
+bBibGmm22mxptNpsaGNDTY02JsG0MY0xg2MaYNNDbGwG0L5vsK/F9DO+EAA447Kq
+p7Wdf6Y+5c20T7DfHyMXIzRKrZexw72uiQI+y55vOe52xpqbCLC2uR20JdER7Zvr
+7ufuKb6zhiBxLuj0eA27v8RpMLucw9Ohwcizi2wrpt+yU1FdpM7ZYPcwS3XTef+A
+Wzjxwhdrgw3aH1LeC1eZW900x8V9Nv4hTPXp4l067P/4ANVZFF/imOe/d5bdueam
+/DFFokQWnFaU+ZqLBCM+d0PialJQWnLqRQZk/KhfbbYc2pCUTgffcSYbrCM1N+8l
+HU6gSz+h2GJXs+tbrNviL83M97X0vcTn/F82P8wen8/3/h3sHY+sf9CSej9ThYTV
+3lQ+FUHpfpGD4kv7dYMV995dpDX/y3xR8FoXx1bjUxBTNxuutwQ/h/Eedn9wpn6w
+E3+ND8YhN1HSriIxRE/6uFyMv6/oC6Elarw3aHMMqHJkGiiz6tejmvnYLQa+Qm6G
+deZ7jXTZV6NlpocgDnRdimS06bTYSkvPAL/xoWNLkX6N6VljU0dfKSBmm2uZE/xu
+sutQ1EdP7GdjhglIq4xlOFUFEQpmX+xx7R8y6c0GSAaqusOjNZwxZRudOvmXm1tZ
+T+YnbeB2ir9eiHNrtJNSLD/J/WDyuQpwBUtLKo0krccY/wIILP7f86teb9Z/9oyz
+OX05qEWbObfhpRw+9+rCvp/35ML8KX3aHaI0n+tudbFRsV5FLW+Oa8ruLN4peyVL
+DWjTHrXNthq/s7zAJYMeFJZkZt5mT9rfpH+5g3nc+piOSZ+J5nHtOnKI7Ff8Xl+j
+0t76XTNucCHQ6whav1OHdF53TY5wuv5OzvrdnxoId8fTyUvERr0ERINu/8XxZZ5f
+B5/kTZ8bBO0wv54Jp+ED/GQI8lZHzIQCP3vfQhwnCTj9TvITic7P4mYLDbH3fyzR
+i+6EajCcpXLWSGf+ZXkOrWspDWDhXtEKas0v3UqWksqgY1rTj45krX4KihN+daXs
+pZl5WPlta5p06CX6Xm2SfzqkMw12/3ix1bpnnZ+kFeBNX7A+E9zzG6OZaN78GOpl
+9Ht/eZn9PqWdav852zr0zqkDK2H5IjdvNah+b1YVGdQGzwR4Nw+f13yEKnV+y66W
+djfq7zWp7m5w+hzfv+Ly8O7oet5Vvd8/wQvO7qzOZ2vjf9X8Tj8PnMb/nc/nKqRR
++ml4UEhOOwfCeJEEI109CMYSh91iAJqPjMyH6KjrPD7W25llZVcREYNCTg6htbQt
+M38wYoquCWP6tdKYlVIv14xTNUeUf4El/FunCf6csZkmv+9tfWx7t59wuKIa3saU
+tZs9M+3HFOZtz3OLg/Unoaj9BYazYqA78xBU9tZzrtmF/rQL9CGJt90o/oYnSfcS
+SL3haaw351LXWQ1XOsv1SmH3v6ymuxEpPPnEDmBELaTYsvvMIWJsmPZFFww++Kd7
+s/Jo0JFeUU7uNtI+gVosAIpVVuWfI/9tOIycz7I5Z7zjV+NR2OuZbYtW5F08KX4o
+2k/xuJIchcNFPtxPfw9dkDgscRbMckyFMrzuZ3IvrcGzk0J6iI5ytrv37bGpAXMz
+WK9mMMPebepNevmLjjo/QWoM968Sjv7ldlPS5AinHcXwsFv6dmmh8lJt7UOJWoKu
+lMD1cB2ksIGpMdv8iuqR42Rn/kn+17BhhUZcwDBaUXVdX6bKW7fxlUYbq+mlqIcf
+a9v8HF87M9ANbi9bq9onf9TD7nQ6Xf6vZci8TBPX+/GI0He6j31fTVQYW+NsQxvO
+J8xrx+e58CCLQNjxeIyPt+F+qk/QMiXw+LyxGVkV/XcGQT9X03jSDP6beJ5QG1JW
+9Q3qLv/YixWI7gPV9Mrhf2oRYTc/9KLFRhkE3SjKOTKuSSBKQ24fI+hEznamH71D
+66Hwez8/0et7AtTv9zvamv2OD5He6fMV4k+ePl6+qPfO5CdHtK+eCDZL5+4f5yrl
+gTcRFiq8fXbc5IaI5fbbc1KMM/2T0Mr7+Hwaco6FtXm0fmhCgTZRqY4pKiEIfmaz
+QwHNOOCrtMJ2VwsyMumt7xsOolGnizRev6lILH43qPcczQM7Gc5zRin80YvFt1Qm
+h/57Z0auR2h0fuX50MBO4XQ+26y5l6v4j902R66c0j3z2KHstKQ04J/h6LbuNQE4
+D6cu/lyfK69DxxX8wb8XaQkMUcJdo1LzqUGDAb3Kfn/A3P/JYc99MO9qv67+SxWb
+wYTyqKdWTd+1KbR/Rcn0Io5zI/QquX7FA1bxfMytjQ/X+l0fh0Pf+Hx97meH4fQL
+7/T8/sdTm9Tn8nELvedyhydLlPPTScINdXyLIq9wgIJr4fWPbp9ZhFh/56fdSgOG
+HDXg+gkXsN2Rddr4HQ5P3u+RhLzmSjhzoqY5EsPC4QvRlX9JXjB84rPV5USR66qa
+/kjw4156GJnzoXtydKJE53t6PHfZWO+3ujsfI6iAdshc7OFzGXiZB9PtItKodhYq
+nABkTKdcpu4+TOpf9h5piX5slsaBjkeTnj/Ba02ilboQfcDVigxrYn/iTH5ySWUW
+/lHtg78s5UZM8sErwhNe3N3w+6ZOMnU+5i86/xFNtqZfDdXTGy1H3PzGbdtZXYT+
+Ixx2vpwBYzbPVYHxKosM5rPiVmcTllI9nuoSfeh9ib4foFWauOpvdmhBDqpTpKTX
+u8EO2l2Z195G2RIV7TlKSxGWjR5sl/nALu1uzBeLd9zpSujzMTd1uTX9Qk/Q1S+r
+vaW6bm8qqPO4jb6Wx6XIkm321nrIF6Ae25d1+Dpv/P5G4NoLd2j6/EtENC3FeR5z
+oo7bA+tI8yEQRhiF0z1FlJXLD5ZbhNNWQm/j/IbzRfh8JtOFZU7ruShLvHXysW9S
+9V909tr9jn8/E/Hb5N/1NVNHnZu2HIUvJvHJiHd2ucmeI9PWUMnppmE65GQ5E9xV
+ZRlGEH0X85EvmHyEupkMrCC0oMv9RCq+/H8gcfpe00Hs/S+regT5p58cyYomh93v
+qvuw/A06BE/wzJESuYbN9pqYpoXqXFemW1NksHEJ2w+PYMJ27WJyD5FpaXB85VaW
+qMOhDfO8E3QdH8ybyKt/UgI8/tDGpFbyOlaVdIv1FXJhoLp8soAA4Djg6/KZ066N
+ZFYuS8WdjpSZGP4/Lw+1yaXlzNznc/k2uHe2uXP3uFuPcHx+Dm44utxldoO1uBPy
++jzOs14+MIgOjOHMVNqAbMd8fUedLlhJMCfMtm4uz01enLNKcMrtLlPIR37Yukh1
+YEMXYpm7eU4XU+j+Jj3pDyaXtXs+p1fWfTN/cy9/Oxs4umUXQ4uHh1kObtayDJ56
+/QMxiHobjHNKuKfMxsrYEwN+QVIyVjAwMDYuMjQ1AAA9IwJniiBLRkZDAAAXt0Ja
+aDQxQVkmU1lZtwytAACLf///////////////////+//////v//////////bv78//
+/+AXO133uwO2xB2UxIvbKXrCqCoURUBL2ytFI82AFdcOwMhVTHtk5rD3szEVNYD4
+aIQINCaMRoTaSn7SbSMJiYmEwieTEp+psqbMCp+VNPaFNpqbBNR7UmanlPUeKfqm
+j1PU0/VPU08o9Q9EeKHlPJtKbYqeTCYhN6U9T1NH6mp+lPyoGNTI/Knkyg1MggAg
+CaMEyQnqZoaaRtRtJpppppoDaTR6hpphGh6mmgHpMQBpkGTTEAAaAAAA00AZDag0
+ADIBkGgABqemiRNTI0k8aU0PRGRoAZlP0UAAAGgAAAyAADQaAAAaAAAAAAAAAAAA
+AaAAAAM0kgRBJ5MlPFP1Gj0jTTTUaekxNAbUGjTQMgaZANNAAAAaAADTQAAAAAAA
+ANAA0AAANADQ0QAAAAAAAAAaGgAAAAAAABoA0AAA0AAAAAAAAAAAAANAAAAAkSEI
+aTRpomp5DUxNNDTJPTKaep6T09Kemmo2JG0aTQ9ENogaaGhkABo0NHqaBoDTI0DC
+Gj0gNAMhoDQ9QMQNAGQAaDDwyMPIMlbG1vhRBTFo6JksSupgpAjPbY0ec02IGXjb
+eS+FBsh01+O4ZOaD+srUZCFaT4DRjVDLx7uKIsFtESIDUg1ZkhyCSYov05C00MtR
+BdNNa/AYPGOQZWcs+VegXOPrkushFbZ3mBoRD6WamClkpBaHZrUhUl02bIfRXX4w
+b3/9cW9nHDVxh2qFBxqgRKfmq7/Jc/tdJk05nVrGbckGVy2PnIy30CDhpWmqrSot
+K2bOnX0NbP1iy2cd0Na0ZmbRstm4MzMzbbMySTd35F7f+zPP8DC+NJLYcakkkkRd
+NZlupJt3OMFoDAD2g+N3FAMCydhIpoRHRQAdFI5nNg4ugEXHCYxkMyGCwtaJmial
+y0IMlpSYYM/weXNJAhFqS0GNmvaPEtYGjbvaucMdklOTmBX1vfVAkTYB1uXCSK64
+UNIixOqRKLuRCFtqIQtgwqaFrCkIYbbewErWABa+VGADWsJXJjfx5SJViLuwiGXq
+Ru6vCuwmU5CJiJz3UiBpmLv0r2wskxUhY4tzPVGQ9RMXJl65eLSNwZVwaSyGZ9Cm
+A3jztQUUpFeUryBTskW95iVwRMFrhBCwZBAFJBZvhMEMNoDJJlUoIhQkAkjbExp2
+YZio+ZYeAZUwmH1qUbdQixmxf0+61+aVgJ1hwxsO1yG3hFx4pfjc09ITVht0pG8u
+FtVFhPa1KE0gTRUSVXywkITucqk0Waz5Fs6qJpVHYdNrbYRFxnFsQGY1qmsTLjK6
+4QX5Rddo6krM/Bx9CqIAKq4CzVQYHrmIAd2EBhYmwVYwLvhzKIUrc2EirnGIvyuD
+O4YZDSwsVTA0BpVvUOjDErkCraBoSutcKwUSSLGhVvNYHLz3klgZD++wWsa/swLw
+gvNDY2De+sncOv8X2lq4HD95ZdwPuTIMXCwSbg4RrIqv+L0y6F17pqDecyQYPEj3
+iN/0BBeWZlJAyBMi5U3Q1zAlsK8IlDhaXGmvZrgISq5CfNjmUgxDeMggOKqxu4sI
+OrilS49Lkl1J3u3GjXTuH+rX+4ccyFAQnizCpPClcY77F59j63S6fr5vr+y99tuO
+7Ox7Wg/ljwhdyaK4xMmXczeJbx7x07htJNtC4xcQfAtvzeznLrN6MN/ILIBOI65I
+qIA2D5fHHj1XN4aN6TvOjWDaSbSWqxCSCvXUpzkNJAkWXAuTwF8k5uSJvQj/rVo0
+hAhEMEIYkCRGx9AX+byIuXWlLMbbVeliHNUL5AQYmNwLFu4SkmGD+UWtBMyVHQOQ
+ss0ggoVKSKOBUgnVS6ljt7WE1qXqJJ4QA1pEwYNLEaguEE1LtPNoVr5WzjbSbWPk
+V9OW3y9IneUDLoIV5pAkEFTEFGFVjeTFxtpzBBfGgycBxVCdz8eESBIzsamRchAa
+TQunQH8DHnpfod9QuAuRvc7JBlKUCYmCjMvynLcxIFohxCaYrDvGw4QbXZB7oWQ7
+hpoGlz23ayDfB8NrRRzdilsEQyQniu9ASLQg7RrGZnoTr1ai12IbCEUCGdFq03P5
+nBnRFAGmisQGcyykV9gKtcVMWLhCuVmXg86dndn7slUpRNSSEAU20oaWIm1maFTu
+E0DT4gTbg0nuhjtz3kNOz+i7sBm0bkXjxQWuLqlZEmp60ZTyRZJDUqKSEKg6hqcy
+ERxdU22CSNOO10RYUUiDVpKhPNdKTOIE1thp02sBNoNTFSht8WJtaBQ09qN3jd5r
+dOLX4IA5fevRyCCzDgRXfV4wzik4KROjmxmTMglBySlIMEzcXehnDXCRiZSlvwA2
+0YsIOROcm4UrIRFxJHctJH7OdN5u1aHVHb5UaLHpv48NgmFRE56KTSoaWunqm2st
+S0mrAdOiqcR12PWVbdVRJKcQ0DQuhwlAPcRtpxN3D4kbXJjToSYJIFw406G2CSaK
+jQMIJPZGlQmgyFhoCSzeGS1VSq5SKKQQxs5RqKUcVUNY57YUETb4mXzV84SPngKi
+nsce0mXByZq5BKUA9puHZWLNwQIYuDaJUNgG+E01E3pDYVNLKYQ0hsVesgV5gZY0
+htDsRdGtm0+iGnkN6+Ea9YJtUZNAkx2GgSoix12nTW0avTUfxR3oYcpvZ7IdtABE
+UhBcjG4qZtDZsS1JQHys243vhLaDTSvvTeBiJA2tmokqECTBcSOCAGkAxMKlVAva
+4IsLRaBBqhxDbcGtgdw03mFcLUaFuhtKuuEIEkUleJQwby/zwu9uvvZK4xTV+ECM
+a8lmzxKmqkBggYK1+xPdbmJclm6tSZhE/OSJtCEjs+unJIQkT9hCWgBJqGMS07Eh
+AJNmBiuVEVdTyjkIJkavuZmx2sJF13htgEZUCC23lZFOE6gWbM9WyYNJTM8yCQrb
+0Sx3OQvBML5cRATAQkSQkAJOAhoxpQkNi4ZiEVDbdtJAME0RXNDXGHA3M3Q0mm1o
+IEwbWpaM1DQCSMbGRCAu3iRIQiT6RlBpT1n3tfwvUXz3gIVlx3mEximY/kZW1kNG
+sgEJIrBisaEoGYPJ+1CQUYFBw+eGEHJQBpNHjErXUJY2iWHQ30hXwFBuMSxQ2lB5
+bg+/LX3euG6HsHUB1lFvBvaiaBrITVwkCTa1d0s9CHZCiDZjbWReKyrpPE2oSa7o
+LPrR4BJvys9ttjUpzETSSMxh8vsr9dXTwKBtK+1xCTGDQmNIaE29HmHdS5GSxpya
+MismcAUSEgSxHBrKtgsZzduG7vHZn16l3kFkVITtENIzS2JsiBwFTDlhgexsjBHv
+5HXOYxHBzoSDCcPZ0ctvkY9aS5XpoQuFYkGJgCsqjJZeUMNUEpDSbKcnUc1PifIA
+CbR2UoXawBlspkEBr9HBfvUi/MUakZVOf1WKYrqSaIXce62JOyhJLq3qJBloTA0F
+VbILEtM+heFmNRCFt70GJrExVJri0ArYbCRbADSGDBpBXxxb/6fo+s3C7uaL7RjM
+LV2IQBNrAJrKFeJwTsPnxbAsemirUx2lk1kaxschzdK4TQNJN5wQnolIFg401OZ4
+2na11LnT3lR+1k1TMJhiAjXMk0F1ooHnYlt9LKfJ3ZIOmeY+2l9bUQHWFNGyEyfj
+EAcu3kpGLq0Ez7XOS+EpAASRQTAYMATfVQibHLTT30zG732+pNe9za1JNt8sNJYn
+RjWuJ6jL5ILV0rcd9vT7X9fObvcXitpvJ2XBJE+PhX2HaTkyWeF9pwnlQNrTe9hV
+tzhA+ihZrDrHNmLcQjZbnv/IMubqq8egxY80t5n6vZ6U5TR6U9uZJvai1xtqAyCR
+NWkW52m00rDTEuO6BA4q2RHDWwbETF55rRsWLIgNW9qJCyMHPbTM/dMBmWMQSMxz
+4M2pRzt47SICxA327UqSCEERqMFybmYi3nUxePtLgHYplqRiw4ynMbXd/kiQ0LE0
+PKJSSCXA42ymziCpAxNWflzpzQdJZusahRFr6t6m+4p273/Taj7k+hZyNgBAgXAY
+8F7pTts6orLb8IA6o4TOwkwQYmKvKu9VwMrE7+GUhVIAgY9a8DyQMiDBkEAwh7S1
+KgCBfao8DK1CwSS8Z3WjL5MEgt93z2koUQCD/YxMBppiCMp7SDVSmkkIHptfGpeh
+t+M13Ccv1tavIASFiaQl6rBz3K4N3DSGwNkCibrvEAC0fQirOWnc4NVbcLKpFG1l
+NQXF/eqdT79wq1Mvlap3QSCLhcD2D3fCkKVWid4aSjtp9FOX1Uaf7P9eT93zd9Sv
+mj2yNLRUGzyI/0oONNSzmmkvJ5Cq2X2CdldIWMGZO57RJ8oyATAWTQmRmNkfh0Sx
+uuR/J9oUsomVy1AEntc0dlPivkqBkBqrxU3j5PnWkaI3ZRGc0gg9spCQEISh4xEU
+pMhVrnmDQLfLP8Ouqpx917MAw7hkjQk6BJFTAbXDsz3LSHIxo/gB8qrA1vbvdZZh
+LtR0frJdfdppX8nAQX/TAxOQ8+H6yw8a9i7/zJEfSYIhop59N/fhcWW2F14cj2Xc
+fyHaZ04lTO4uPnly91jwuFPaREuZVp8AxImIhlkxkAN61tWdWG7tEbaCgszh6VIz
+ThFnHo2Vi8SQXPrXCN7J9Tc9ZYiAYqoThV/u6SYsea5aZL8deOvKBQCgZZuIxX1z
+4EnfcqG176vY4VqMBIC4pMJz0WcHJYqN+j7BiwGoMBwExrIdTB7q4XIFLotcIpS0
+1MqyVsesvoQq7WObmGQXdMliMirSLcDuSx8Qy+4pIBgGDIyMp1qbonnGdcHYvU8S
+O0A8s/iua5oFdNZTWvbVI4FUH9sKcLiB3/fIAF+sB4n8q6L+UCfmbPcAo/crQ6b3
+HqhDBMY9J0q/jdz9GNYZ/1fbXdkUqAQKFePhtzJDRBZba27+LPQNMCcrHMq06F1T
+4QmLmkHt7LxB2pAczUO+T2O9bHEw/HWw+dYf2MoRDUw=
+`
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go
index f755019bb..8f6b0c9ca 100644
--- a/src/pkg/compress/bzip2/huffman.go
+++ b/src/pkg/compress/bzip2/huffman.go
@@ -33,14 +33,17 @@ const invalidNodeValue = 0xffff
// Decode reads bits from the given bitReader and navigates the tree until a
// symbol is found.
-func (t huffmanTree) Decode(br *bitReader) (v uint16) {
+func (t *huffmanTree) Decode(br *bitReader) (v uint16) {
nodeIndex := uint16(0) // node 0 is the root of the tree.
for {
node := &t.nodes[nodeIndex]
- bit := br.ReadBit()
+ bit, ok := br.TryReadBit()
+ if !ok && br.ReadBit() {
+ bit = 1
+ }
// bzip2 encodes left as a true bit.
- if bit {
+ if bit != 0 {
// left
if node.left == invalidNodeValue {
return node.leftValue
diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go
index 0ed19dec3..b7e75a700 100644
--- a/src/pkg/compress/bzip2/move_to_front.go
+++ b/src/pkg/compress/bzip2/move_to_front.go
@@ -15,10 +15,11 @@ type moveToFrontDecoder struct {
// Rather than actually keep the list in memory, the symbols are stored
// as a circular, double linked list with the symbol indexed by head
// at the front of the list.
- symbols []byte
- next []uint8
- prev []uint8
+ symbols [256]byte
+ next [256]uint8
+ prev [256]uint8
head uint8
+ len int
}
// newMTFDecoder creates a move-to-front decoder with an explicit initial list
@@ -28,12 +29,9 @@ func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
panic("too many symbols")
}
- m := &moveToFrontDecoder{
- symbols: symbols,
- next: make([]uint8, len(symbols)),
- prev: make([]uint8, len(symbols)),
- }
-
+ m := new(moveToFrontDecoder)
+ copy(m.symbols[:], symbols)
+ m.len = len(symbols)
m.threadLinkedList()
return m
}
@@ -45,34 +43,29 @@ func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
panic("newMTFDecoderWithRange: cannot have > 256 symbols")
}
- m := &moveToFrontDecoder{
- symbols: make([]uint8, n),
- next: make([]uint8, n),
- prev: make([]uint8, n),
- }
-
+ m := new(moveToFrontDecoder)
for i := 0; i < n; i++ {
- m.symbols[i] = byte(i)
+ m.symbols[byte(i)] = byte(i)
}
-
+ m.len = n
m.threadLinkedList()
return m
}
// threadLinkedList creates the initial linked-list pointers.
func (m *moveToFrontDecoder) threadLinkedList() {
- if len(m.symbols) == 0 {
+ if m.len == 0 {
return
}
- m.prev[0] = uint8(len(m.symbols) - 1)
+ m.prev[0] = uint8(m.len - 1)
- for i := 0; i < len(m.symbols)-1; i++ {
+ for i := byte(0); int(i) < m.len-1; i++ {
m.next[i] = uint8(i + 1)
m.prev[i+1] = uint8(i)
}
- m.next[len(m.symbols)-1] = 0
+ m.next[m.len-1] = 0
}
func (m *moveToFrontDecoder) Decode(n int) (b byte) {
diff --git a/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2 b/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2
new file mode 100644
index 000000000..0bd61a6d4
--- /dev/null
+++ b/src/pkg/compress/bzip2/testdata/Mark.Twain-Tom.Sawyer.txt.bz2
Binary files differ
diff --git a/src/pkg/compress/bzip2/testdata/e.txt.bz2 b/src/pkg/compress/bzip2/testdata/e.txt.bz2
new file mode 100644
index 000000000..65bf3b4c3
--- /dev/null
+++ b/src/pkg/compress/bzip2/testdata/e.txt.bz2
Binary files differ