diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/compress/flate | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/compress/flate')
-rw-r--r-- | src/compress/flate/copy.go | 32 | ||||
-rw-r--r-- | src/compress/flate/copy_test.go | 54 | ||||
-rw-r--r-- | src/compress/flate/deflate.go | 571 | ||||
-rw-r--r-- | src/compress/flate/deflate_test.go | 490 | ||||
-rw-r--r-- | src/compress/flate/fixedhuff.go | 78 | ||||
-rw-r--r-- | src/compress/flate/flate_test.go | 62 | ||||
-rw-r--r-- | src/compress/flate/gen.go | 190 | ||||
-rw-r--r-- | src/compress/flate/huffman_bit_writer.go | 517 | ||||
-rw-r--r-- | src/compress/flate/huffman_code.go | 323 | ||||
-rw-r--r-- | src/compress/flate/inflate.go | 739 | ||||
-rw-r--r-- | src/compress/flate/inflate_test.go | 39 | ||||
-rw-r--r-- | src/compress/flate/reader_test.go | 96 | ||||
-rw-r--r-- | src/compress/flate/reverse_bits.go | 48 | ||||
-rw-r--r-- | src/compress/flate/token.go | 102 | ||||
-rw-r--r-- | src/compress/flate/writer_test.go | 60 |
15 files changed, 3401 insertions, 0 deletions
diff --git a/src/compress/flate/copy.go b/src/compress/flate/copy.go new file mode 100644 index 000000000..a3200a8f4 --- /dev/null +++ b/src/compress/flate/copy.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// forwardCopy is like the built-in copy function except that it always goes +// forward from the start, even if the dst and src overlap. +// It is equivalent to: +// for i := 0; i < n; i++ { +// mem[dst+i] = mem[src+i] +// } +func forwardCopy(mem []byte, dst, src, n int) { + if dst <= src { + copy(mem[dst:dst+n], mem[src:src+n]) + return + } + for { + if dst >= src+n { + copy(mem[dst:dst+n], mem[src:src+n]) + return + } + // There is some forward overlap. The destination + // will be filled with a repeated pattern of mem[src:src+k]. + // We copy one instance of the pattern here, then repeat. + // Each time around this loop k will double. + k := dst - src + copy(mem[dst:dst+k], mem[src:src+k]) + n -= k + dst += k + } +} diff --git a/src/compress/flate/copy_test.go b/src/compress/flate/copy_test.go new file mode 100644 index 000000000..2011b1547 --- /dev/null +++ b/src/compress/flate/copy_test.go @@ -0,0 +1,54 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "testing" +) + +func TestForwardCopy(t *testing.T) { + testCases := []struct { + dst0, dst1 int + src0, src1 int + want string + }{ + {0, 9, 0, 9, "012345678"}, + {0, 5, 4, 9, "45678"}, + {4, 9, 0, 5, "01230"}, + {1, 6, 3, 8, "34567"}, + {3, 8, 1, 6, "12121"}, + {0, 9, 3, 6, "345"}, + {3, 6, 0, 9, "012"}, + {1, 6, 0, 9, "00000"}, + {0, 4, 7, 8, "7"}, + {0, 1, 6, 8, "6"}, + {4, 4, 6, 9, ""}, + {2, 8, 6, 6, ""}, + {0, 0, 0, 0, ""}, + } + for _, tc := range testCases { + b := []byte("0123456789") + n := tc.dst1 - tc.dst0 + if tc.src1-tc.src0 < n { + n = tc.src1 - tc.src0 + } + forwardCopy(b, tc.dst0, tc.src0, n) + got := string(b[tc.dst0 : tc.dst0+n]) + if got != tc.want { + t.Errorf("dst=b[%d:%d], src=b[%d:%d]: got %q, want %q", + tc.dst0, tc.dst1, tc.src0, tc.src1, got, tc.want) + } + // Check that the bytes outside of dst[:n] were not modified. + for i, x := range b { + if i >= tc.dst0 && i < tc.dst0+n { + continue + } + if int(x) != '0'+i { + t.Errorf("dst=b[%d:%d], src=b[%d:%d]: copy overrun at b[%d]: got '%c', want '%c'", + tc.dst0, tc.dst1, tc.src0, tc.src1, i, x, '0'+i) + } + } + } +} diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go new file mode 100644 index 000000000..8c79df0c6 --- /dev/null +++ b/src/compress/flate/deflate.go @@ -0,0 +1,571 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + fastCompression = 3 + BestCompression = 9 + DefaultCompression = -1 + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 3 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 17 + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 24 + + skipNever = math.MaxInt32 +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing int +} + +var levels = []compressionLevel{ + {}, // 0 + // For levels 1-3 we don't bother trying with lazy matches + {3, 0, 8, 4, 4}, + {3, 0, 16, 8, 5}, + {3, 0, 32, 32, 6}, + // Levels 4-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {4, 4, 16, 16, skipNever}, + {8, 16, 32, 32, skipNever}, + {8, 16, 128, 128, skipNever}, + {8, 32, 128, 256, skipNever}, + {32, 128, 258, 1024, skipNever}, + {32, 258, 258, 4096, skipNever}, +} + +type compressor struct { + compressionLevel + + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + chainHead int + hashHead []int + hashPrev []int + hashOffset int + + // input window: unprocessed data is window[index:windowEnd] + index int + window []byte + windowEnd int + blockStart int // window index where current tokens start + byteAvailable bool // if true, still need to process window[index-1]. + + // queued output tokens + tokens []token + + // deflate state + length int + offset int + hash int + maxInsertIndex int + err error +} + +func (d *compressor) fillDeflate(b []byte) int { + if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window, d.window[windowSize:2*windowSize]) + d.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + d.hashOffset += windowSize + if d.hashOffset > maxHashOffset { + delta := d.hashOffset - 1 + d.hashOffset -= delta + d.chainHead -= delta + for i, v := range d.hashPrev { + if v > delta { + d.hashPrev[i] -= delta + } else { + d.hashPrev[i] = 0 + } + } + for i, v := range d.hashHead { + if v > delta { + d.hashHead[i] -= delta + } else { + d.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tokens []token, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tokens, eof, window) + return d.w.err + } + return nil +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + w0 := win[pos] + w1 := win[pos+1] + wEnd := win[pos+length] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { + // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches + + n := 3 + for pos+n < len(win) && win[i+n] == win[pos+n] { + n++ + } + if n > length && (n > 3 || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +func (d *compressor) initDeflate() { + d.hashHead = make([]int, hashSize) + d.hashPrev = make([]int, windowSize) + d.window = make([]byte, 2*windowSize) + d.hashOffset = 1 + d.tokens = make([]token, 0, maxFlateBlockTokens+1) + d.length = minMatchLength - 1 + d.offset = 0 + d.byteAvailable = false + d.index = 0 + d.hash = 0 + d.chainHead = -1 +} + +func (d *compressor) deflate() { + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) + } + +Loop: + for { + if d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + break Loop + } + if d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1]))) + d.byteAvailable = false + } + if len(d.tokens) > 0 { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens = d.tokens[:0] + } + break Loop + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + d.chainHead = d.hashHead[d.hash] + d.hashPrev[d.index&windowMask] = d.chainHead + d.hashHead[d.hash] = d.index + d.hashOffset + } + prevLength := d.length + prevOffset := d.offset + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead-d.hashOffset >= minIndex && + (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 || + d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) { + if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if d.fastSkipHashing != skipNever && d.length >= minMatchLength || + d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + if d.fastSkipHashing != skipNever { + d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))) + } else { + d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))) + } + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + if d.length <= d.fastSkipHashing { + var newIndex int + if d.fastSkipHashing != skipNever { + newIndex = d.index + d.length + } else { + newIndex = d.index + prevLength - 1 + } + for d.index++; d.index < newIndex; d.index++ { + if d.index < d.maxInsertIndex { + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] + // Set the head of the hash chain to us. + d.hashHead[d.hash] = d.index + d.hashOffset + } + } + if d.fastSkipHashing == skipNever { + d.byteAvailable = false + d.length = minMatchLength - 1 + } + } else { + // For matches this long, we don't bother inserting each individual + // item into the table. + d.index += d.length + if d.index < d.maxInsertIndex { + d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) + } + } + if len(d.tokens) == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.tokens = d.tokens[:0] + } + } else { + if d.fastSkipHashing != skipNever || d.byteAvailable { + i := d.index - 1 + if d.fastSkipHashing != skipNever { + i = d.index + } + d.tokens = append(d.tokens, literalToken(uint32(d.window[i]))) + if len(d.tokens) == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { + return + } + d.tokens = d.tokens[:0] + } + } + d.index++ + if d.fastSkipHashing == skipNever { + d.byteAvailable = true + } + } + } +} + +func (d *compressor) fillStore(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) store() { + if d.windowEnd > 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } + d.windowEnd = 0 +} + +func (d *compressor) write(b []byte) (n int, err error) { + n = len(b) + b = b[d.fill(d, b):] + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillStore + d.step = (*compressor).store + case level == DefaultCompression: + level = 6 + fallthrough + case 1 <= level && level <= 9: + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflate + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level) + } + return nil +} + +var zeroes [32]int +var bzeroes [256]byte + +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + switch d.compressionLevel.chain { + case 0: + // level was NoCompression. + for i := range d.window { + d.window[i] = 0 + } + d.windowEnd = 0 + default: + d.chainHead = -1 + for s := d.hashHead; len(s) > 0; { + n := copy(s, zeroes[:]) + s = s[n:] + } + for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] { + copy(s, zeroes[:]) + } + d.hashOffset = 1 + + d.index, d.windowEnd = 0, 0 + for s := d.window; len(s) > 0; { + n := copy(s, bzeroes[:]) + s = s[n:] + } + d.blockStart, d.byteAvailable = 0, false + + d.tokens = d.tokens[:maxFlateBlockTokens+1] + for i := 0; i <= maxFlateBlockTokens; i++ { + d.tokens[i] = 0 + } + d.tokens = d.tokens[:0] + d.length = minMatchLength - 1 + d.offset = 0 + d.hash = 0 + d.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. Level 0 +// (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default +// compression level. +// +// If level is in the range [-1, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + dw := &dictWriter{w, false} + zw, err := NewWriter(dw, level) + if err != nil { + return nil, err + } + zw.Write(dict) + zw.Flush() + dw.enabled = true + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +type dictWriter struct { + w io.Writer + enabled bool +} + +func (w *dictWriter) Write(b []byte) (n int, err error) { + if w.enabled { + return w.w.Write(b) + } + return len(b), nil +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending compressed data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if dw, ok := w.d.w.w.(*dictWriter); ok { + // w was created with NewWriterDict + dw.w = dst + w.d.reset(dw) + dw.enabled = false + w.Write(w.dict) + w.Flush() + dw.enabled = true + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} diff --git a/src/compress/flate/deflate_test.go b/src/compress/flate/deflate_test.go new file mode 100644 index 000000000..730234c38 --- /dev/null +++ b/src/compress/flate/deflate_test.go @@ -0,0 +1,490 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "reflect" + "sync" + "testing" +) + +type deflateTest struct { + in []byte + level int + out []byte +} + +type deflateInflateTest struct { + in []byte +} + +type reverseBitsTest struct { + in uint16 + bitCount uint8 + out uint16 +} + +var deflateTests = []*deflateTest{ + {[]byte{}, 0, []byte{1, 0, 0, 255, 255}}, + {[]byte{0x11}, -1, []byte{18, 4, 4, 0, 0, 255, 255}}, + {[]byte{0x11}, DefaultCompression, []byte{18, 4, 4, 0, 0, 255, 255}}, + {[]byte{0x11}, 4, []byte{18, 4, 4, 0, 0, 255, 255}}, + + {[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}}, + {[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}}, + {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0, + []byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255}, + }, + {[]byte{}, 1, []byte{1, 0, 0, 255, 255}}, + {[]byte{0x11}, 1, []byte{18, 4, 4, 0, 0, 255, 255}}, + {[]byte{0x11, 0x12}, 1, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 1, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, + {[]byte{}, 9, []byte{1, 0, 0, 255, 255}}, + {[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}}, + {[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, +} + +var deflateInflateTests = []*deflateInflateTest{ + {[]byte{}}, + {[]byte{0x11}}, + {[]byte{0x11, 0x12}}, + {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + {[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, + {largeDataChunk()}, +} + +var reverseBitsTests = []*reverseBitsTest{ + {1, 1, 1}, + {1, 2, 2}, + {1, 3, 4}, + {1, 4, 8}, + {1, 5, 16}, + {17, 5, 17}, + {257, 9, 257}, + {29, 5, 23}, +} + +func largeDataChunk() []byte { + result := make([]byte, 100000) + for i := range result { + result[i] = byte(i * i & 0xFF) + } + return result +} + +func TestDeflate(t *testing.T) { + for _, h := range deflateTests { + var buf bytes.Buffer + w, err := NewWriter(&buf, h.level) + if err != nil { + t.Errorf("NewWriter: %v", err) + continue + } + w.Write(h.in) + w.Close() + if !bytes.Equal(buf.Bytes(), h.out) { + t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out) + } + } +} + +// A sparseReader returns a stream consisting of 0s followed by 1<<16 1s. +// This tests missing hash references in a very large input. +type sparseReader struct { + l int64 + cur int64 +} + +func (r *sparseReader) Read(b []byte) (n int, err error) { + if r.cur >= r.l { + return 0, io.EOF + } + n = len(b) + cur := r.cur + int64(n) + if cur > r.l { + n -= int(cur - r.l) + cur = r.l + } + for i := range b[0:n] { + if r.cur+int64(i) >= r.l-1<<16 { + b[i] = 1 + } else { + b[i] = 0 + } + } + r.cur = cur + return +} + +func TestVeryLongSparseChunk(t *testing.T) { + if testing.Short() { + t.Skip("skipping sparse chunk during short test") + } + w, err := NewWriter(ioutil.Discard, 1) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + if _, err = io.Copy(w, &sparseReader{l: 23E8}); err != nil { + t.Errorf("Compress failed: %v", err) + return + } +} + +type syncBuffer struct { + buf bytes.Buffer + mu sync.RWMutex + closed bool + ready chan bool +} + +func newSyncBuffer() *syncBuffer { + return &syncBuffer{ready: make(chan bool, 1)} +} + +func (b *syncBuffer) Read(p []byte) (n int, err error) { + for { + b.mu.RLock() + n, err = b.buf.Read(p) + b.mu.RUnlock() + if n > 0 || b.closed { + return + } + <-b.ready + } +} + +func (b *syncBuffer) signal() { + select { + case b.ready <- true: + default: + } +} + +func (b *syncBuffer) Write(p []byte) (n int, err error) { + n, err = b.buf.Write(p) + b.signal() + return +} + +func (b *syncBuffer) WriteMode() { + b.mu.Lock() +} + +func (b *syncBuffer) ReadMode() { + b.mu.Unlock() + b.signal() +} + +func (b *syncBuffer) Close() error { + b.closed = true + b.signal() + return nil +} + +func testSync(t *testing.T, level int, input []byte, name string) { + if len(input) == 0 { + return + } + + t.Logf("--testSync %d, %d, %s", level, len(input), name) + buf := newSyncBuffer() + buf1 := new(bytes.Buffer) + buf.WriteMode() + w, err := NewWriter(io.MultiWriter(buf, buf1), level) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + r := NewReader(buf) + + // Write half the input and read back. + for i := 0; i < 2; i++ { + var lo, hi int + if i == 0 { + lo, hi = 0, (len(input)+1)/2 + } else { + lo, hi = (len(input)+1)/2, len(input) + } + t.Logf("#%d: write %d-%d", i, lo, hi) + if _, err := w.Write(input[lo:hi]); err != nil { + t.Errorf("testSync: write: %v", err) + return + } + if i == 0 { + if err := w.Flush(); err != nil { + t.Errorf("testSync: flush: %v", err) + return + } + } else { + if err := w.Close(); err != nil { + t.Errorf("testSync: close: %v", err) + } + } + buf.ReadMode() + out := make([]byte, hi-lo+1) + m, err := io.ReadAtLeast(r, out, hi-lo) + t.Logf("#%d: read %d", i, m) + if m != hi-lo || err != nil { + t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len()) + return + } + if !bytes.Equal(input[lo:hi], out[:hi-lo]) { + t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) + return + } + // This test originally checked that after reading + // the first half of the input, there was nothing left + // in the read buffer (buf.buf.Len() != 0) but that is + // not necessarily the case: the write Flush may emit + // some extra framing bits that are not necessary + // to process to obtain the first half of the uncompressed + // data. The test ran correctly most of the time, because + // the background goroutine had usually read even + // those extra bits by now, but it's not a useful thing to + // check. + buf.WriteMode() + } + buf.ReadMode() + out := make([]byte, 10) + if n, err := r.Read(out); n > 0 || err != io.EOF { + t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n]) + } + if buf.buf.Len() != 0 { + t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name) + } + r.Close() + + // stream should work for ordinary reader too + r = NewReader(buf1) + out, err = ioutil.ReadAll(r) + if err != nil { + t.Errorf("testSync: read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name) + } +} + +func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name string, limit int) { + var buffer bytes.Buffer + w, err := NewWriter(&buffer, level) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + w.Write(input) + w.Close() + if limit > 0 && buffer.Len() > limit { + t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit) + return + } + r := NewReader(&buffer) + out, err := ioutil.ReadAll(r) + if err != nil { + t.Errorf("read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name) + return + } + testSync(t, level, input, name) +} + +func testToFromWithLimit(t *testing.T, input []byte, name string, limit [10]int) { + for i := 0; i < 10; i++ { + testToFromWithLevelAndLimit(t, i, input, name, limit[i]) + } +} + +func TestDeflateInflate(t *testing.T) { + for i, h := range deflateInflateTests { + testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [10]int{}) + } +} + +func TestReverseBits(t *testing.T) { + for _, h := range reverseBitsTests { + if v := reverseBits(h.in, h.bitCount); v != h.out { + t.Errorf("reverseBits(%v,%v) = %v, want %v", + h.in, h.bitCount, v, h.out) + } + } +} + +type deflateInflateStringTest struct { + filename string + label string + limit [10]int +} + +var deflateInflateStringTests = []deflateInflateStringTest{ + { + "../testdata/e.txt", + "2.718281828...", + [...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790}, + }, + { + "../testdata/Mark.Twain-Tom.Sawyer.txt", + "Mark.Twain-Tom.Sawyer", + [...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295}, + }, +} + +func TestDeflateInflateString(t *testing.T) { + for _, test := range deflateInflateStringTests { + gold, err := ioutil.ReadFile(test.filename) + if err != nil { + t.Error(err) + } + testToFromWithLimit(t, gold, test.label, test.limit) + if testing.Short() { + break + } + } +} + +func TestReaderDict(t *testing.T) { + const ( + dict = "hello world" + text = "hello again world" + ) + var b bytes.Buffer + w, err := NewWriter(&b, 5) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + r := NewReaderDict(&b, []byte(dict)) + data, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + if string(data) != "hello again world" { + t.Fatalf("read returned %q want %q", string(data), text) + } +} + +func TestWriterDict(t *testing.T) { + const ( + dict = "hello world" + text = "hello again world" + ) + var b bytes.Buffer + w, err := NewWriter(&b, 5) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + var b1 bytes.Buffer + w, _ = NewWriterDict(&b1, 5, []byte(dict)) + w.Write([]byte(text)) + w.Close() + + if !bytes.Equal(b1.Bytes(), b.Bytes()) { + t.Fatalf("writer wrote %q want %q", b1.Bytes(), b.Bytes()) + } +} + +// See http://code.google.com/p/go/issues/detail?id=2508 +func TestRegression2508(t *testing.T) { + if testing.Short() { + t.Logf("test disabled with -short") + return + } + w, err := NewWriter(ioutil.Discard, 1) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + buf := make([]byte, 1024) + for i := 0; i < 131072; i++ { + if _, err := w.Write(buf); err != nil { + t.Fatalf("writer failed: %v", err) + } + } + w.Close() +} + +func TestWriterReset(t *testing.T) { + for level := 0; level <= 9; level++ { + if testing.Short() && level > 1 { + break + } + w, err := NewWriter(ioutil.Discard, level) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + buf := []byte("hello world") + for i := 0; i < 1024; i++ { + w.Write(buf) + } + w.Reset(ioutil.Discard) + + wref, err := NewWriter(ioutil.Discard, level) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + + // DeepEqual doesn't compare functions. + w.d.fill, wref.d.fill = nil, nil + w.d.step, wref.d.step = nil, nil + if !reflect.DeepEqual(w, wref) { + t.Errorf("level %d Writer not reset after Reset", level) + } + } + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriter(w, NoCompression) }) + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriter(w, DefaultCompression) }) + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriter(w, BestCompression) }) + dict := []byte("we are the world") + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriterDict(w, NoCompression, dict) }) + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriterDict(w, DefaultCompression, dict) }) + testResetOutput(t, func(w io.Writer) (*Writer, error) { return NewWriterDict(w, BestCompression, dict) }) +} + +func testResetOutput(t *testing.T, newWriter func(w io.Writer) (*Writer, error)) { + buf := new(bytes.Buffer) + w, err := newWriter(buf) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + b := []byte("hello world") + for i := 0; i < 1024; i++ { + w.Write(b) + } + w.Close() + out1 := buf.String() + + buf2 := new(bytes.Buffer) + w.Reset(buf2) + for i := 0; i < 1024; i++ { + w.Write(b) + } + w.Close() + out2 := buf2.String() + + if out1 != out2 { + t.Errorf("got %q, expected %q", out2, out1) + } + t.Logf("got %d bytes", len(out1)) +} diff --git a/src/compress/flate/fixedhuff.go b/src/compress/flate/fixedhuff.go new file mode 100644 index 000000000..7df8b9a29 --- /dev/null +++ b/src/compress/flate/fixedhuff.go @@ -0,0 +1,78 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// autogenerated by go run gen.go -output fixedhuff.go, DO NOT EDIT + +var fixedHuffmanDecoder = huffmanDecoder{ + 7, + [huffmanNumChunks]uint32{ + 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c09, + 0x1087, 0x0608, 0x0208, 0x0a09, 0x0008, 0x0808, 0x0408, 0x0e09, + 0x1047, 0x0588, 0x0188, 0x0909, 0x1147, 0x0788, 0x0388, 0x0d09, + 0x10c7, 0x0688, 0x0288, 0x0b09, 0x0088, 0x0888, 0x0488, 0x0f09, + 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c89, + 0x10a7, 0x0648, 0x0248, 0x0a89, 0x0048, 0x0848, 0x0448, 0x0e89, + 0x1067, 0x05c8, 0x01c8, 0x0989, 0x1167, 0x07c8, 0x03c8, 0x0d89, + 0x10e7, 0x06c8, 0x02c8, 0x0b89, 0x00c8, 0x08c8, 0x04c8, 0x0f89, + 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c49, + 0x1097, 0x0628, 0x0228, 0x0a49, 0x0028, 0x0828, 0x0428, 0x0e49, + 0x1057, 0x05a8, 0x01a8, 0x0949, 0x1157, 0x07a8, 0x03a8, 0x0d49, + 0x10d7, 0x06a8, 0x02a8, 0x0b49, 0x00a8, 0x08a8, 0x04a8, 0x0f49, + 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cc9, + 0x10b7, 0x0668, 0x0268, 0x0ac9, 0x0068, 0x0868, 0x0468, 0x0ec9, + 0x1077, 0x05e8, 0x01e8, 0x09c9, 0x1177, 0x07e8, 0x03e8, 0x0dc9, + 0x10f7, 0x06e8, 0x02e8, 0x0bc9, 0x00e8, 0x08e8, 0x04e8, 0x0fc9, + 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c29, + 0x1087, 0x0618, 0x0218, 0x0a29, 0x0018, 0x0818, 0x0418, 0x0e29, + 0x1047, 0x0598, 0x0198, 0x0929, 0x1147, 0x0798, 0x0398, 0x0d29, + 0x10c7, 0x0698, 0x0298, 0x0b29, 0x0098, 0x0898, 0x0498, 0x0f29, + 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0ca9, + 0x10a7, 0x0658, 0x0258, 0x0aa9, 0x0058, 0x0858, 0x0458, 0x0ea9, + 0x1067, 0x05d8, 0x01d8, 0x09a9, 0x1167, 0x07d8, 0x03d8, 0x0da9, + 0x10e7, 0x06d8, 0x02d8, 0x0ba9, 0x00d8, 0x08d8, 0x04d8, 0x0fa9, + 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c69, + 0x1097, 0x0638, 0x0238, 0x0a69, 0x0038, 0x0838, 0x0438, 0x0e69, + 0x1057, 0x05b8, 0x01b8, 0x0969, 0x1157, 0x07b8, 0x03b8, 0x0d69, + 0x10d7, 0x06b8, 0x02b8, 0x0b69, 0x00b8, 0x08b8, 0x04b8, 0x0f69, + 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0ce9, + 0x10b7, 0x0678, 0x0278, 0x0ae9, 0x0078, 0x0878, 0x0478, 0x0ee9, + 0x1077, 0x05f8, 0x01f8, 0x09e9, 0x1177, 0x07f8, 0x03f8, 0x0de9, + 0x10f7, 0x06f8, 0x02f8, 0x0be9, 0x00f8, 0x08f8, 0x04f8, 0x0fe9, + 0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c19, + 0x1087, 0x0608, 0x0208, 0x0a19, 0x0008, 0x0808, 0x0408, 0x0e19, + 0x1047, 0x0588, 0x0188, 0x0919, 0x1147, 0x0788, 0x0388, 0x0d19, + 0x10c7, 0x0688, 0x0288, 0x0b19, 0x0088, 0x0888, 0x0488, 0x0f19, + 0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c99, + 0x10a7, 0x0648, 0x0248, 0x0a99, 0x0048, 0x0848, 0x0448, 0x0e99, + 0x1067, 0x05c8, 0x01c8, 0x0999, 0x1167, 0x07c8, 0x03c8, 0x0d99, + 0x10e7, 0x06c8, 0x02c8, 0x0b99, 0x00c8, 0x08c8, 0x04c8, 0x0f99, + 0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c59, + 0x1097, 0x0628, 0x0228, 0x0a59, 0x0028, 0x0828, 0x0428, 0x0e59, + 0x1057, 0x05a8, 0x01a8, 0x0959, 0x1157, 0x07a8, 0x03a8, 0x0d59, + 0x10d7, 0x06a8, 0x02a8, 0x0b59, 0x00a8, 0x08a8, 0x04a8, 0x0f59, + 0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cd9, + 0x10b7, 0x0668, 0x0268, 0x0ad9, 0x0068, 0x0868, 0x0468, 0x0ed9, + 0x1077, 0x05e8, 0x01e8, 0x09d9, 0x1177, 0x07e8, 0x03e8, 0x0dd9, + 0x10f7, 0x06e8, 0x02e8, 0x0bd9, 0x00e8, 0x08e8, 0x04e8, 0x0fd9, + 0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c39, + 0x1087, 0x0618, 0x0218, 0x0a39, 0x0018, 0x0818, 0x0418, 0x0e39, + 0x1047, 0x0598, 0x0198, 0x0939, 0x1147, 0x0798, 0x0398, 0x0d39, + 0x10c7, 0x0698, 0x0298, 0x0b39, 0x0098, 0x0898, 0x0498, 0x0f39, + 0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0cb9, + 0x10a7, 0x0658, 0x0258, 0x0ab9, 0x0058, 0x0858, 0x0458, 0x0eb9, + 0x1067, 0x05d8, 0x01d8, 0x09b9, 0x1167, 0x07d8, 0x03d8, 0x0db9, + 0x10e7, 0x06d8, 0x02d8, 0x0bb9, 0x00d8, 0x08d8, 0x04d8, 0x0fb9, + 0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c79, + 0x1097, 0x0638, 0x0238, 0x0a79, 0x0038, 0x0838, 0x0438, 0x0e79, + 0x1057, 0x05b8, 0x01b8, 0x0979, 0x1157, 0x07b8, 0x03b8, 0x0d79, + 0x10d7, 0x06b8, 0x02b8, 0x0b79, 0x00b8, 0x08b8, 0x04b8, 0x0f79, + 0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0cf9, + 0x10b7, 0x0678, 0x0278, 0x0af9, 0x0078, 0x0878, 0x0478, 0x0ef9, + 0x1077, 0x05f8, 0x01f8, 0x09f9, 0x1177, 0x07f8, 0x03f8, 0x0df9, + 0x10f7, 0x06f8, 0x02f8, 0x0bf9, 0x00f8, 0x08f8, 0x04f8, 0x0ff9, + }, + nil, 0, +} diff --git a/src/compress/flate/flate_test.go b/src/compress/flate/flate_test.go new file mode 100644 index 000000000..068766323 --- /dev/null +++ b/src/compress/flate/flate_test.go @@ -0,0 +1,62 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This test tests some internals of the flate package. +// The tests in package compress/gzip serve as the +// end-to-end test of the decompressor. + +package flate + +import ( + "bytes" + "testing" +) + +func TestUncompressedSource(t *testing.T) { + decoder := NewReader(bytes.NewReader([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11})) + output := make([]byte, 1) + n, error := decoder.Read(output) + if n != 1 || error != nil { + t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error) + } + if output[0] != 0x11 { + t.Errorf("output[0] = %x, want 0x11", output[0]) + } +} + +// The following test should not panic. +func TestIssue5915(t *testing.T) { + bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8} + h := new(huffmanDecoder) + ok := h.init(bits) + if ok == true { + t.Fatalf("Given sequence of bits is bad, and should not succeed.") + } +} + +// The following test should not panic. +func TestIssue5962(t *testing.T) { + bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, + 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11} + h := new(huffmanDecoder) + ok := h.init(bits) + if ok == true { + t.Fatalf("Given sequence of bits is bad, and should not succeed.") + } +} + +// The following test should not panic. +func TestIssue6255(t *testing.T) { + bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11} + bits2 := []int{11, 13} + h := new(huffmanDecoder) + if !h.init(bits1) { + t.Fatalf("Given sequence of bits is good and should succeed.") + } + if h.init(bits2) { + t.Fatalf("Given sequence of bits is bad and should not succeed.") + } +} diff --git a/src/compress/flate/gen.go b/src/compress/flate/gen.go new file mode 100644 index 000000000..6288ecddd --- /dev/null +++ b/src/compress/flate/gen.go @@ -0,0 +1,190 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// This program generates fixedhuff.go +// Invoke as +// +// go run gen.go -output fixedhuff.go + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "io/ioutil" + "log" +) + +var filename = flag.String("output", "fixedhuff.go", "output file name") + +const maxCodeLen = 16 + +// Note: the definition of the huffmanDecoder struct is copied from +// inflate.go, as it is private to the implementation. + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +func (h *huffmanDecoder) init(bits []int) bool { + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range bits { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + if max == 0 { + return false + } + + h.min = min + var linkBits uint + var numLinks int + if max > huffmanChunkBits { + linkBits = uint(max) - huffmanChunkBits + numLinks = 1 << linkBits + h.linkMask = uint32(numLinks - 1) + } + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + if i == huffmanChunkBits+1 { + // create link tables + link := code >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) + h.links[off] = make([]uint32, 1<<linkBits) + } + } + n := count[i] + nextcode[i] = code + code += n + code <<= 1 + } + + for i, n := range bits { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint32(i<<huffmanValueShift | n) + reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + h.chunks[off] = chunk + } + } else { + linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] + reverse >>= huffmanChunkBits + for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + linktab[off] = chunk + } + } + } + return true +} + +func main() { + flag.Parse() + + var h huffmanDecoder + var bits [288]int + initReverseByte() + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + h.init(bits[:]) + + var buf bytes.Buffer + + fmt.Fprintf(&buf, `// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file.`+"\n\n") + + fmt.Fprintln(&buf, "package flate") + fmt.Fprintln(&buf) + fmt.Fprintln(&buf, "// autogenerated by go run gen.go -output fixedhuff.go, DO NOT EDIT") + fmt.Fprintln(&buf) + fmt.Fprintln(&buf, "var fixedHuffmanDecoder = huffmanDecoder{") + fmt.Fprintf(&buf, "\t%d,\n", h.min) + fmt.Fprintln(&buf, "\t[huffmanNumChunks]uint32{") + for i := 0; i < huffmanNumChunks; i++ { + if i&7 == 0 { + fmt.Fprintf(&buf, "\t\t") + } else { + fmt.Fprintf(&buf, " ") + } + fmt.Fprintf(&buf, "0x%04x,", h.chunks[i]) + if i&7 == 7 { + fmt.Fprintln(&buf) + } + } + fmt.Fprintln(&buf, "\t},") + fmt.Fprintln(&buf, "\tnil, 0,") + fmt.Fprintln(&buf, "}") + + data, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatal(err) + } + err = ioutil.WriteFile(*filename, data, 0644) + if err != nil { + log.Fatal(err) + } +} + +var reverseByte [256]byte + +func initReverseByte() { + for x := 0; x < 256; x++ { + var result byte + for i := uint(0); i < 8; i++ { + result |= byte(((x >> i) & 1) << (7 - i)) + } + reverseByte[x] = result + } +} diff --git a/src/compress/flate/huffman_bit_writer.go b/src/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..b182a710b --- /dev/null +++ b/src/compress/flate/huffman_bit_writer.go @@ -0,0 +1,517 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" + "math" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = []int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = []uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = []int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = []uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + w io.Writer + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint32 + nbits uint32 + bytes [64]byte + nbytes int + literalFreq []int32 + offsetFreq []int32 + codegen []uint8 + codegenFreq []int32 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error +} + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + w: w, + literalFreq: make([]int32, maxLit), + offsetFreq: make([]int32, offsetCodeCount), + codegen: make([]uint8, maxLit+offsetCodeCount+1), + codegenFreq: make([]int32, codegenCodeCount), + literalEncoding: newHuffmanEncoder(maxLit), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.w = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.bytes = [64]byte{} + for i := range w.codegen { + w.codegen[i] = 0 + } + for _, s := range [...][]int32{w.literalFreq, w.offsetFreq, w.codegenFreq} { + for i := range s { + s[i] = 0 + } + } + for _, enc := range [...]*huffmanEncoder{ + w.literalEncoding, + w.offsetEncoding, + w.codegenEncoding} { + for i := range enc.code { + enc.code[i] = 0 + } + for i := range enc.codeBits { + enc.codeBits[i] = 0 + } + } +} + +func (w *huffmanBitWriter) flushBits() { + if w.err != nil { + w.nbits = 0 + return + } + bits := w.bits + w.bits >>= 16 + w.nbits -= 16 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + if n += 2; n >= len(w.bytes) { + _, w.err = w.w.Write(w.bytes[0:]) + n = 0 + } + w.nbytes = n +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + n := w.nbytes + if w.nbits > 8 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if w.nbits > 0 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + w.bits = 0 + _, w.err = w.w.Write(w.bytes[0:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) writeBits(b, nb int32) { + w.bits |= uint32(b) << w.nbits + if w.nbits += uint32(nb); w.nbits >= 16 { + w.flushBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits == 8 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + if w.nbits != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + if n != 0 { + _, w.err = w.w.Write(w.bytes[0:n]) + if w.err != nil { + return + } + } + w.nbytes = 0 + _, w.err = w.w.Write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + copy(codegen[0:numLiterals], w.literalEncoding.codeBits) + copy(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits) + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) { + if w.err != nil { + return + } + w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal])) +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := w.codegenEncoding.codeBits[codegenOrder[i]] + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord int = int(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + // The low byte contains the actual code to generate. + w.writeCode(w.codegenEncoding, uint32(codeWord)) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + break + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + break + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + break + } + } +} + +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { + if w.err != nil { + return + } + for i := range w.literalFreq { + w.literalFreq[i] = 0 + } + for i := range w.offsetFreq { + w.offsetFreq[i] = 0 + } + + n := len(tokens) + tokens = tokens[0 : n+1] + tokens[n] = endBlockMarker + + for _, t := range tokens { + switch t.typ() { + case literalType: + w.literalFreq[t.literal()]++ + case matchType: + length := t.length() + offset := t.offset() + w.literalFreq[lengthCodesStart+lengthCode(length)]++ + w.offsetFreq[offsetCode(offset)]++ + } + } + + // get the number of literals + numLiterals := len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets := len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + + w.literalEncoding.generate(w.literalFreq, 15) + w.offsetEncoding.generate(w.offsetFreq, 15) + + storedBytes := 0 + if input != nil { + storedBytes = len(input) + } + var extraBits int64 + var storedSize int64 = math.MaxInt64 + if storedBytes <= maxStoreBlockSize && input != nil { + storedSize = int64((storedBytes + 5) * 8) + // We only bother calculating the costs of the extra bits required by + // the length of offset fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { + // First eight length codes have extra size = 0. + extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart]) + } + for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { + // First four offset codes have extra size = 0. + extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode]) + } + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var size = int64(3) + + fixedLiteralEncoding.bitLength(w.literalFreq) + + fixedOffsetEncoding.bitLength(w.offsetFreq) + + extraBits + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets) + w.codegenEncoding.generate(w.codegenFreq, 7) + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + dynamicHeader := int64(3+5+5+4+(3*numCodegens)) + + w.codegenEncoding.bitLength(w.codegenFreq) + + int64(extraBits) + + int64(w.codegenFreq[16]*2) + + int64(w.codegenFreq[17]*3) + + int64(w.codegenFreq[18]*7) + dynamicSize := dynamicHeader + + w.literalEncoding.bitLength(w.literalFreq) + + w.offsetEncoding.bitLength(w.offsetFreq) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storedSize < size { + w.writeStoredHeader(storedBytes, eof) + w.writeBytes(input[0:storedBytes]) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + for _, t := range tokens { + switch t.typ() { + case literalType: + w.writeCode(literalEncoding, t.literal()) + break + case matchType: + // Write the length + length := t.length() + lengthCode := lengthCode(length) + w.writeCode(literalEncoding, lengthCode+lengthCodesStart) + extraLengthBits := int32(lengthExtraBits[lengthCode]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + w.writeCode(offsetEncoding, offsetCode) + extraOffsetBits := int32(offsetExtraBits[offsetCode]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode]) + w.writeBits(extraOffset, extraOffsetBits) + } + break + default: + panic("unknown token type: " + string(t)) + } + } +} diff --git a/src/compress/flate/huffman_code.go b/src/compress/flate/huffman_code.go new file mode 100644 index 000000000..3b9fce466 --- /dev/null +++ b/src/compress/flate/huffman_code.go @@ -0,0 +1,323 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "sort" +) + +type huffmanEncoder struct { + codeBits []uint8 + code []uint16 +} + +type literalNode struct { + literal uint16 + freq int32 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(maxLit) + codeBits := h.codeBits + code := h.code + var ch uint16 + for ch = 0; ch < maxLit; ch++ { + var bits uint16 + var size uint8 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + break + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + break + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + break + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codeBits[ch] = size + code[ch] = reverseBits(bits, size) + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codeBits := h.codeBits + code := h.code + for ch := uint16(0); ch < 30; ch++ { + codeBits[ch] = 5 + code[ch] = reverseBits(ch, 5) + } + return h +} + +var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() +var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []int32) int64 { + var total int64 + for i, f := range freq { + if f != 0 { + total += int64(f) * int64(h.codeBits[i]) + } + } + return total +} + +const maxBitsLimit = 16 + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: list[1].freq, + nextCharFreq: list[2].freq, + nextPairFreq: list[0].freq + list[1].freq, + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := maxBits + for { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := make([]int32, maxBits+1) + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + sortByLiteral(chunk) + for _, node := range chunk { + h.codeBits[node.literal] = uint8(n) + h.code[node.literal] = reverseBits(code, uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { + list := make([]literalNode, len(freq)+1) + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + h.codeBits[i] = 0 + } + } + // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros + h.codeBits = h.codeBits[0:len(freq)] + list = list[0:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codeBits[node.literal] = 1 + h.code[node.literal] = uint16(i) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +type literalNodeSorter struct { + a []literalNode + less func(i, j int) bool +} + +func (s literalNodeSorter) Len() int { return len(s.a) } + +func (s literalNodeSorter) Less(i, j int) bool { + return s.less(i, j) +} + +func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } + +func sortByFreq(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { + if a[i].freq == a[j].freq { + return a[i].literal < a[j].literal + } + return a[i].freq < a[j].freq + }} + sort.Sort(s) +} + +func sortByLiteral(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }} + sort.Sort(s) +} diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go new file mode 100644 index 000000000..76519bbf4 --- /dev/null +++ b/src/compress/flate/inflate.go @@ -0,0 +1,739 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen.go -output fixedhuff.go + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "io" + "strconv" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxHist = 32768 // max history required + // The next three numbers come from the RFC, section 3.2.7. + maxLit = 286 + maxDist = 32 + numCodes = 19 // number of codes in Huffman meta-code +) + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// Note that much of the implementation of huffmanDecoder is also copied +// into gen.go (in package main) for the purpose of precomputing the +// fixed huffman tables so they can be included statically. + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. + +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +func (h *huffmanDecoder) init(bits []int) bool { + if h.min != 0 { + *h = huffmanDecoder{} + } + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range bits { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + if max == 0 { + return false + } + + h.min = min + var linkBits uint + var numLinks int + if max > huffmanChunkBits { + linkBits = uint(max) - huffmanChunkBits + numLinks = 1 << linkBits + h.linkMask = uint32(numLinks - 1) + } + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + if i == huffmanChunkBits+1 { + // create link tables + link := code >> 1 + if huffmanNumChunks < link { + return false + } + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) + h.links[off] = make([]uint32, 1<<linkBits) + } + } + n := count[i] + nextcode[i] = code + code += n + code <<= 1 + } + + for i, n := range bits { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint32(i<<huffmanValueShift | n) + reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { + h.chunks[off] = chunk + } + } else { + value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift + if value >= uint32(len(h.links)) { + return false + } + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { + linktab[off] = chunk + } + } + } + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + woffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxLit + maxDist]int + codebits *[numCodes]int + + // Output history, buffer. + hist *[maxHist]byte + hp int // current output position in buffer + hw int // have written hist[0:hw] already + hfull bool // buffer has filled at least once + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + final bool + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + if f.final { + if f.hw != f.hp { + f.flush((*decompressor).nextBlock) + return + } + f.err = io.EOF + return + } + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + // maxDist is 32, so ndist is always valid. + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + for { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.hist[f.hp] = byte(v) + f.hp++ + if f.hp == len(f.hist) { + // After the flush, continue this loop. + f.flush((*decompressor).huffmanBlock) + return + } + continue + case v == 256: + // Done with huffman block; read next block. + f.step = (*decompressor).nextBlock + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + default: + length = 258 + n = 0 + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(reverseByte[(f.b&0x1F)<<3]) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist >= 30: + f.err = CorruptInputError(f.roffset) + return + default: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + } + + // Copy history[-dist:-dist+length] into output. + if dist > len(f.hist) { + f.err = InternalError("bad history distance") + return + } + + // No check on length; encoding can be prescient. + if !f.hfull && dist > f.hp { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + if f.copyHist() { + return + } + } +} + +// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. +// It reports whether the f.hist buffer is full. +func (f *decompressor) copyHist() bool { + p := f.hp - f.copyDist + if p < 0 { + p += len(f.hist) + } + for f.copyLen > 0 { + n := f.copyLen + if x := len(f.hist) - f.hp; n > x { + n = x + } + if x := len(f.hist) - p; n > x { + n = x + } + forwardCopy(f.hist[:], f.hp, p, n) + p += n + f.hp += n + f.copyLen -= n + if f.hp == len(f.hist) { + // After flush continue copying out of history. + f.flush((*decompressor).copyHuff) + return true + } + if p == len(f.hist) { + p = 0 + } + } + return false +} + +func (f *decompressor) copyHuff() { + if f.copyHist() { + return + } + f.huffmanBlock() +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = &ReadError{f.roffset, err} + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + // 0-length block means sync + f.flush((*decompressor).nextBlock) + return + } + + f.copyLen = n + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + n := f.copyLen + for n > 0 { + m := len(f.hist) - f.hp + if m > n { + m = n + } + m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) + f.roffset += int64(m) + if err != nil { + f.err = &ReadError{f.roffset, err} + return + } + n -= m + f.hp += m + if f.hp == len(f.hist) { + f.copyLen = n + f.flush((*decompressor).copyData) + return + } + } + f.step = (*decompressor).nextBlock +} + +func (f *decompressor) setDict(dict []byte) { + if len(dict) > len(f.hist) { + // Will only remember the tail. + dict = dict[len(dict)-len(f.hist):] + } + + f.hp = copy(f.hist[:], dict) + if f.hp == len(f.hist) { + f.hp = 0 + f.hfull = true + } + f.hw = f.hp +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + n := uint(h.min) + for { + for f.nb < n { + if err := f.moreBits(); err != nil { + return 0, err + } + } + chunk := h.chunks[f.b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + if n == 0 { + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + } + if n <= f.nb { + f.b >>= n + f.nb -= n + return int(chunk >> huffmanValueShift), nil + } + } +} + +// Flush any buffered output to the underlying writer. +func (f *decompressor) flush(step func(*decompressor)) { + f.toRead = f.hist[f.hw:f.hp] + f.woffset += int64(f.hp - f.hw) + f.hw = f.hp + if f.hp == len(f.hist) { + f.hp = 0 + f.hw = 0 + f.hfull = true + } + f.step = step +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + hist: f.hist, + step: (*decompressor).nextBlock, + } + if dict != nil { + f.setDict(dict) + } + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + var f decompressor + f.bits = new([maxLit + maxDist]int) + f.codebits = new([numCodes]int) + f.r = makeReader(r) + f.hist = new([maxHist]byte) + f.step = (*decompressor).nextBlock + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + var f decompressor + f.r = makeReader(r) + f.hist = new([maxHist]byte) + f.bits = new([maxLit + maxDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.setDict(dict) + return &f +} diff --git a/src/compress/flate/inflate_test.go b/src/compress/flate/inflate_test.go new file mode 100644 index 000000000..9f25d30b3 --- /dev/null +++ b/src/compress/flate/inflate_test.go @@ -0,0 +1,39 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "io" + "testing" +) + +func TestReset(t *testing.T) { + ss := []string{ + "lorem ipsum izzle fo rizzle", + "the quick brown fox jumped over", + } + + deflated := make([]bytes.Buffer, 2) + for i, s := range ss { + w, _ := NewWriter(&deflated[i], 1) + w.Write([]byte(s)) + w.Close() + } + + inflated := make([]bytes.Buffer, 2) + + f := NewReader(&deflated[0]) + io.Copy(&inflated[0], f) + f.(Resetter).Reset(&deflated[1], nil) + io.Copy(&inflated[1], f) + f.Close() + + for i, s := range ss { + if s != inflated[i].String() { + t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s) + } + } +} diff --git a/src/compress/flate/reader_test.go b/src/compress/flate/reader_test.go new file mode 100644 index 000000000..a62ef741d --- /dev/null +++ b/src/compress/flate/reader_test.go @@ -0,0 +1,96 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "io" + "io/ioutil" + "runtime" + "strings" + "testing" +) + +func TestNlitOutOfRange(t *testing.T) { + // Trying to decode this bogus flate data, which has a Huffman table + // with nlit=288, should not panic. + io.Copy(ioutil.Discard, NewReader(strings.NewReader( + "\xfc\xfe\x36\xe7\x5e\x1c\xef\xb3\x55\x58\x77\xb6\x56\xb5\x43\xf4"+ + "\x6f\xf2\xd2\xe6\x3d\x99\xa0\x85\x8c\x48\xeb\xf8\xda\x83\x04\x2a"+ + "\x75\xc4\xf8\x0f\x12\x11\xb9\xb4\x4b\x09\xa0\xbe\x8b\x91\x4c"))) +} + +const ( + digits = iota + twain +) + +var testfiles = []string{ + // Digits is the digits of the irrational number e. Its decimal representation + // does not repeat, but there are only 10 possible digits, so it should be + // reasonably compressible. + digits: "../testdata/e.txt", + // Twain is Project Gutenberg's edition of Mark Twain's classic English novel. + twain: "../testdata/Mark.Twain-Tom.Sawyer.txt", +} + +func benchmarkDecode(b *testing.B, testfile, level, n int) { + b.ReportAllocs() + b.StopTimer() + b.SetBytes(int64(n)) + buf0, err := ioutil.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } + compressed := new(bytes.Buffer) + w, err := NewWriter(compressed, level) + if err != nil { + b.Fatal(err) + } + for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } + io.Copy(w, bytes.NewReader(buf0)) + } + w.Close() + buf1 := compressed.Bytes() + buf0, compressed, w = nil, nil, nil + runtime.GC() + b.StartTimer() + for i := 0; i < b.N; i++ { + io.Copy(ioutil.Discard, NewReader(bytes.NewReader(buf1))) + } +} + +// These short names are so that gofmt doesn't break the BenchmarkXxx function +// bodies below over multiple lines. +const ( + speed = BestSpeed + default_ = DefaultCompression + compress = BestCompression +) + +func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) } +func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) } +func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) } +func BenchmarkDecodeDigitsDefault1e4(b *testing.B) { benchmarkDecode(b, digits, default_, 1e4) } +func BenchmarkDecodeDigitsDefault1e5(b *testing.B) { benchmarkDecode(b, digits, default_, 1e5) } +func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits, default_, 1e6) } +func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) } +func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) } +func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) } +func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) } +func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) } +func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) } +func BenchmarkDecodeTwainDefault1e4(b *testing.B) { benchmarkDecode(b, twain, default_, 1e4) } +func BenchmarkDecodeTwainDefault1e5(b *testing.B) { benchmarkDecode(b, twain, default_, 1e5) } +func BenchmarkDecodeTwainDefault1e6(b *testing.B) { benchmarkDecode(b, twain, default_, 1e6) } +func BenchmarkDecodeTwainCompress1e4(b *testing.B) { benchmarkDecode(b, twain, compress, 1e4) } +func BenchmarkDecodeTwainCompress1e5(b *testing.B) { benchmarkDecode(b, twain, compress, 1e5) } +func BenchmarkDecodeTwainCompress1e6(b *testing.B) { benchmarkDecode(b, twain, compress, 1e6) } diff --git a/src/compress/flate/reverse_bits.go b/src/compress/flate/reverse_bits.go new file mode 100644 index 000000000..c1a02720d --- /dev/null +++ b/src/compress/flate/reverse_bits.go @@ -0,0 +1,48 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +var reverseByte = [256]byte{ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +} + +func reverseUint16(v uint16) uint16 { + return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8 +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return reverseUint16(number << uint8(16-bitLength)) +} diff --git a/src/compress/flate/token.go b/src/compress/flate/token.go new file mode 100644 index 000000000..4d4917687 --- /dev/null +++ b/src/compress/flate/token.go @@ -0,0 +1,102 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [...]uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +var offsetCodes = [...]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +type token uint32 + +// Convert a literal into a literal token. +func literalToken(literal uint32) token { return token(literalType + literal) } + +// Convert a < xlength, xoffset > pair into a match token. +func matchToken(xlength uint32, xoffset uint32) token { + return token(matchType + xlength<<lengthShift + xoffset) +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint32 { return uint32(t - literalType) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) } + +func lengthCode(len uint32) uint32 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + const n = uint32(len(offsetCodes)) + switch { + case off < n: + return offsetCodes[off] + case off>>7 < n: + return offsetCodes[off>>7] + 14 + default: + return offsetCodes[off>>14] + 28 + } +} diff --git a/src/compress/flate/writer_test.go b/src/compress/flate/writer_test.go new file mode 100644 index 000000000..58431774e --- /dev/null +++ b/src/compress/flate/writer_test.go @@ -0,0 +1,60 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io/ioutil" + "runtime" + "testing" +) + +func benchmarkEncoder(b *testing.B, testfile, level, n int) { + b.StopTimer() + b.SetBytes(int64(n)) + buf0, err := ioutil.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } + buf1 := make([]byte, n) + for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } + copy(buf1[i:], buf0) + } + buf0 = nil + runtime.GC() + b.StartTimer() + for i := 0; i < b.N; i++ { + w, err := NewWriter(ioutil.Discard, level) + if err != nil { + b.Fatal(err) + } + w.Write(buf1) + w.Close() + } +} + +func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) } +func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) } +func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) } +func BenchmarkEncodeDigitsDefault1e4(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e4) } +func BenchmarkEncodeDigitsDefault1e5(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e5) } +func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e6) } +func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) } +func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) } +func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) } +func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) } +func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) } +func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) } +func BenchmarkEncodeTwainDefault1e4(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e4) } +func BenchmarkEncodeTwainDefault1e5(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e5) } +func BenchmarkEncodeTwainDefault1e6(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e6) } +func BenchmarkEncodeTwainCompress1e4(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e4) } +func BenchmarkEncodeTwainCompress1e5(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e5) } +func BenchmarkEncodeTwainCompress1e6(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e6) } |