summaryrefslogtreecommitdiff
path: root/src/pkg/compress/flate/inflate.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/compress/flate/inflate.go')
-rw-r--r--src/pkg/compress/flate/inflate.go255
1 files changed, 122 insertions, 133 deletions
diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go
index 3f2042bfe..a8d646019 100644
--- a/src/pkg/compress/flate/inflate.go
+++ b/src/pkg/compress/flate/inflate.go
@@ -16,9 +16,10 @@ import (
const (
maxCodeLen = 16 // max length of Huffman code
maxHist = 32768 // max history required
- maxLit = 286
- maxDist = 32
- numCodes = 19 // number of codes in Huffman meta-code
+ // The next three numbers come from the RFC, section 3.2.7.
+ maxLit = 286
+ maxDist = 32
+ numCodes = 19 // number of codes in Huffman meta-code
)
// A CorruptInputError reports the presence of corrupt input at a given offset.
@@ -53,32 +54,46 @@ func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
-// Huffman decoder is based on
-// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
-// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
-type huffmanDecoder struct {
- // min, max code length
- min, max int
+// Note that much of the implemenation of huffmanDecoder is also copied
+// into gen.go (in package main) for the purpose of precomputing the
+// fixed huffman tables so they can be included statically.
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
- // limit[i] = largest code word of length i
- // Given code v of length n,
- // need more bits if v > limit[n].
- limit [maxCodeLen + 1]int
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
- // base[i] = smallest code word of length i - seq number
- base [maxCodeLen + 1]int
+const (
+ huffmanChunkBits = 9
+ huffmanNumChunks = 1 << huffmanChunkBits
+ huffmanCountMask = 15
+ huffmanValueShift = 4
+)
- // codes[seq number] = output code.
- // Given code v of length n, value is
- // codes[v - base[n]].
- codes []int
+type huffmanDecoder struct {
+ min int // the minimum code length
+ chunks [huffmanNumChunks]uint32 // chunks as described above
+ links [][]uint32 // overflow links
+ linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
// Count number of codes of each length,
// compute min and max length.
- var count [maxCodeLen + 1]int
+ var count [maxCodeLen]int
var min, max int
for _, n := range bits {
if n == 0 {
@@ -97,93 +112,58 @@ func (h *huffmanDecoder) init(bits []int) bool {
}
h.min = min
- h.max = max
-
- // For each code range, compute
- // nextcode (first code of that length),
- // limit (last code of that length), and
- // base (offset from first code to sequence number).
+ var linkBits uint
+ var numLinks int
+ if max > huffmanChunkBits {
+ linkBits = uint(max) - huffmanChunkBits
+ numLinks = 1 << linkBits
+ h.linkMask = uint32(numLinks - 1)
+ }
code := 0
- seq := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
+ if i == huffmanChunkBits+1 {
+ // create link tables
+ link := code >> 1
+ h.links = make([][]uint32, huffmanNumChunks-link)
+ for j := uint(link); j < huffmanNumChunks; j++ {
+ reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+ reverse >>= uint(16 - huffmanChunkBits)
+ off := j - uint(link)
+ h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
+ h.links[off] = make([]uint32, 1<<linkBits)
+ }
+ }
n := count[i]
nextcode[i] = code
- h.base[i] = code - seq
code += n
- seq += n
- h.limit[i] = code - 1
code <<= 1
}
- // Make array mapping sequence numbers to codes.
- if len(h.codes) < len(bits) {
- h.codes = make([]int, len(bits))
- }
for i, n := range bits {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
- seq := code - h.base[n]
- h.codes[seq] = i
+ chunk := uint32(i<<huffmanValueShift | n)
+ reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
+ reverse >>= uint(16 - n)
+ if n <= huffmanChunkBits {
+ for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+ h.chunks[off] = chunk
+ }
+ } else {
+ linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
+ reverse >>= huffmanChunkBits
+ for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+ linktab[off] = chunk
+ }
+ }
}
return true
}
-// Hard-coded Huffman tables for DEFLATE algorithm.
-// See RFC 1951, section 3.2.6.
-var fixedHuffmanDecoder = huffmanDecoder{
- 7, 9,
- [maxCodeLen + 1]int{7: 23, 199, 511},
- [maxCodeLen + 1]int{7: 0, 24, 224},
- []int{
- // length 7: 256-279
- 256, 257, 258, 259, 260, 261, 262,
- 263, 264, 265, 266, 267, 268, 269,
- 270, 271, 272, 273, 274, 275, 276,
- 277, 278, 279,
-
- // length 8: 0-143
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
- 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
- 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
- 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
- 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
- 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
- 92, 93, 94, 95, 96, 97, 98, 99, 100,
- 101, 102, 103, 104, 105, 106, 107, 108,
- 109, 110, 111, 112, 113, 114, 115, 116,
- 117, 118, 119, 120, 121, 122, 123, 124,
- 125, 126, 127, 128, 129, 130, 131, 132,
- 133, 134, 135, 136, 137, 138, 139, 140,
- 141, 142, 143,
-
- // length 8: 280-287
- 280, 281, 282, 283, 284, 285, 286, 287,
-
- // length 9: 144-255
- 144, 145, 146, 147, 148, 149, 150, 151,
- 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167,
- 168, 169, 170, 171, 172, 173, 174, 175,
- 176, 177, 178, 179, 180, 181, 182, 183,
- 184, 185, 186, 187, 188, 189, 190, 191,
- 192, 193, 194, 195, 196, 197, 198, 199,
- 200, 201, 202, 203, 204, 205, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215,
- 216, 217, 218, 219, 220, 221, 222, 223,
- 224, 225, 226, 227, 228, 229, 230, 231,
- 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247,
- 248, 249, 250, 251, 252, 253, 254, 255,
- },
-}
-
// The actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
@@ -207,11 +187,11 @@ type decompressor struct {
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
- bits [maxLit + maxDist]int
- codebits [numCodes]int
+ bits *[maxLit + maxDist]int
+ codebits *[numCodes]int
// Output history, buffer.
- hist [maxHist]byte
+ hist *[maxHist]byte
hp int // current output position in buffer
hw int // have written hist[0:hw] already
hfull bool // buffer has filled at least once
@@ -306,10 +286,15 @@ func (f *decompressor) readHuffman() error {
}
}
nlit := int(f.b&0x1F) + 257
+ if nlit > maxLit {
+ return CorruptInputError(f.roffset)
+ }
f.b >>= 5
ndist := int(f.b&0x1F) + 1
+ // maxDist is 32, so ndist is always valid.
f.b >>= 5
nclen := int(f.b&0xF) + 4
+ // numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
@@ -505,51 +490,49 @@ func (f *decompressor) huffmanBlock() {
return
}
- p := f.hp - dist
- if p < 0 {
- p += len(f.hist)
- }
- for i := 0; i < length; i++ {
- f.hist[f.hp] = f.hist[p]
- f.hp++
- p++
- if f.hp == len(f.hist) {
- // After flush continue copying out of history.
- f.copyLen = length - (i + 1)
- f.copyDist = dist
- f.flush((*decompressor).copyHuff)
- return
- }
- if p == len(f.hist) {
- p = 0
- }
+ f.copyLen, f.copyDist = length, dist
+ if f.copyHist() {
+ return
}
}
panic("unreached")
}
-func (f *decompressor) copyHuff() {
- length := f.copyLen
- dist := f.copyDist
- p := f.hp - dist
+// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
+// It reports whether the f.hist buffer is full.
+func (f *decompressor) copyHist() bool {
+ p := f.hp - f.copyDist
if p < 0 {
p += len(f.hist)
}
- for i := 0; i < length; i++ {
- f.hist[f.hp] = f.hist[p]
- f.hp++
- p++
+ for f.copyLen > 0 {
+ n := f.copyLen
+ if x := len(f.hist) - f.hp; n > x {
+ n = x
+ }
+ if x := len(f.hist) - p; n > x {
+ n = x
+ }
+ forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
+ p += n
+ f.hp += n
+ f.copyLen -= n
if f.hp == len(f.hist) {
- f.copyLen = length - (i + 1)
+ // After flush continue copying out of history.
f.flush((*decompressor).copyHuff)
- return
+ return true
}
if p == len(f.hist) {
p = 0
}
}
+ return false
+}
- // Continue processing Huffman block.
+func (f *decompressor) copyHuff() {
+ if f.copyHist() {
+ return
+ }
f.huffmanBlock()
}
@@ -584,9 +567,9 @@ func (f *decompressor) dataBlock() {
f.copyData()
}
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
- // Read f.dataLen bytes into history,
- // pausing for reads as history fills.
n := f.copyLen
for n > 0 {
m := len(f.hist) - f.hp
@@ -640,23 +623,23 @@ func (f *decompressor) moreBits() error {
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
- for n := uint(h.min); n <= uint(h.max); n++ {
- lim := h.limit[n]
- if lim == -1 {
- continue
- }
+ n := uint(h.min)
+ for {
for f.nb < n {
if err := f.moreBits(); err != nil {
return 0, err
}
}
- v := int(f.b & uint32(1<<n-1))
- v <<= 16 - n
- v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
- if v <= lim {
+ chunk := h.chunks[f.b&(huffmanNumChunks-1)]
+ n = uint(chunk & huffmanCountMask)
+ if n > huffmanChunkBits {
+ chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
+ n = uint(chunk & huffmanCountMask)
+ }
+ if n <= f.nb {
f.b >>= n
f.nb -= n
- return h.codes[v-h.base[n]], nil
+ return int(chunk >> huffmanValueShift), nil
}
}
return 0, CorruptInputError(f.roffset)
@@ -688,7 +671,10 @@ func makeReader(r io.Reader) Reader {
// finished reading.
func NewReader(r io.Reader) io.ReadCloser {
var f decompressor
+ f.bits = new([maxLit + maxDist]int)
+ f.codebits = new([numCodes]int)
f.r = makeReader(r)
+ f.hist = new([maxHist]byte)
f.step = (*decompressor).nextBlock
return &f
}
@@ -700,8 +686,11 @@ func NewReader(r io.Reader) io.ReadCloser {
// to read data compressed by NewWriterDict.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
var f decompressor
- f.setDict(dict)
f.r = makeReader(r)
+ f.hist = new([maxHist]byte)
+ f.bits = new([maxLit + maxDist]int)
+ f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
+ f.setDict(dict)
return &f
}