1 files changed, 122 insertions, 133 deletions
diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go
index 3f2042bfe..a8d646019 100644
--- a/src/pkg/compress/flate/inflate.go
+++ b/src/pkg/compress/flate/inflate.go
@@ -16,9 +16,10 @@ import (
 const (
 	maxCodeLen = 16    // max length of Huffman code
 	maxHist    = 32768 // max history required
-	maxLit     = 286
-	maxDist    = 32
-	numCodes   = 19 // number of codes in Huffman meta-code
+	// The next three numbers come from the RFC, section 3.2.7.
+	maxLit   = 286
+	maxDist  = 32
+	numCodes = 19 // number of codes in Huffman meta-code
 )
 
 // A CorruptInputError reports the presence of corrupt input at a given offset.
@@ -53,32 +54,46 @@ func (e *WriteError) Error() string {
 	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
 }
 
-// Huffman decoder is based on
-// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
-// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
-type huffmanDecoder struct {
-	// min, max code length
-	min, max int
+// Note that much of the implemenation of huffmanDecoder is also copied
+// into gen.go (in package main) for the purpose of precomputing the
+// fixed huffman tables so they can be included statically.
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
 
-	// limit[i] = largest code word of length i
-	// Given code v of length n,
-	// need more bits if v > limit[n].
-	limit [maxCodeLen + 1]int
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
 
-	// base[i] = smallest code word of length i - seq number
-	base [maxCodeLen + 1]int
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
 
-	// codes[seq number] = output code.
-	// Given code v of length n, value is
-	// codes[v - base[n]].
-	codes []int
+type huffmanDecoder struct {
+	min      int                      // the minimum code length
+	chunks   [huffmanNumChunks]uint32 // chunks as described above
+	links    [][]uint32               // overflow links
+	linkMask uint32                   // mask the width of the link table
 }
 
 // Initialize Huffman decoding tables from array of code lengths.
 func (h *huffmanDecoder) init(bits []int) bool {
 	// Count number of codes of each length,
 	// compute min and max length.
-	var count [maxCodeLen + 1]int
+	var count [maxCodeLen]int
 	var min, max int
 	for _, n := range bits {
 		if n == 0 {
@@ -97,93 +112,58 @@ func (h *huffmanDecoder) init(bits []int) bool {
 	}
 
 	h.min = min
-	h.max = max
-
-	// For each code range, compute
-	// nextcode (first code of that length),
-	// limit (last code of that length), and
-	// base (offset from first code to sequence number).
+	var linkBits uint
+	var numLinks int
+	if max > huffmanChunkBits {
+		linkBits = uint(max) - huffmanChunkBits
+		numLinks = 1 << linkBits
+		h.linkMask = uint32(numLinks - 1)
+	}
 	code := 0
-	seq := 0
 	var nextcode [maxCodeLen]int
 	for i := min; i <= max; i++ {
+		if i == huffmanChunkBits+1 {
+			// create link tables
+			link := code >> 1
+			h.links = make([][]uint32, huffmanNumChunks-link)
+			for j := uint(link); j < huffmanNumChunks; j++ {
+				reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
+				reverse >>= uint(16 - huffmanChunkBits)
+				off := j - uint(link)
+				h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
+				h.links[off] = make([]uint32, 1<<linkBits)
+			}
+		}
 		n := count[i]
 		nextcode[i] = code
-		h.base[i] = code - seq
 		code += n
-		seq += n
-		h.limit[i] = code - 1
 		code <<= 1
 	}
 
-	// Make array mapping sequence numbers to codes.
-	if len(h.codes) < len(bits) {
-		h.codes = make([]int, len(bits))
-	}
 	for i, n := range bits {
 		if n == 0 {
 			continue
 		}
 		code := nextcode[n]
 		nextcode[n]++
-		seq := code - h.base[n]
-		h.codes[seq] = i
+		chunk := uint32(i<<huffmanValueShift | n)
+		reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
+				h.chunks[off] = chunk
+			}
+		} else {
+			linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
+				linktab[off] = chunk
+			}
+		}
 	}
 	return true
 }
 
-// Hard-coded Huffman tables for DEFLATE algorithm.
-// See RFC 1951, section 3.2.6.
-var fixedHuffmanDecoder = huffmanDecoder{
-	7, 9,
-	[maxCodeLen + 1]int{7: 23, 199, 511},
-	[maxCodeLen + 1]int{7: 0, 24, 224},
-	[]int{
-		// length 7: 256-279
-		256, 257, 258, 259, 260, 261, 262,
-		263, 264, 265, 266, 267, 268, 269,
-		270, 271, 272, 273, 274, 275, 276,
-		277, 278, 279,
-
-		// length 8: 0-143
-		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-		12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-		22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-		32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
-		42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
-		52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
-		62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
-		72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
-		82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
-		92, 93, 94, 95, 96, 97, 98, 99, 100,
-		101, 102, 103, 104, 105, 106, 107, 108,
-		109, 110, 111, 112, 113, 114, 115, 116,
-		117, 118, 119, 120, 121, 122, 123, 124,
-		125, 126, 127, 128, 129, 130, 131, 132,
-		133, 134, 135, 136, 137, 138, 139, 140,
-		141, 142, 143,
-
-		// length 8: 280-287
-		280, 281, 282, 283, 284, 285, 286, 287,
-
-		// length 9: 144-255
-		144, 145, 146, 147, 148, 149, 150, 151,
-		152, 153, 154, 155, 156, 157, 158, 159,
-		160, 161, 162, 163, 164, 165, 166, 167,
-		168, 169, 170, 171, 172, 173, 174, 175,
-		176, 177, 178, 179, 180, 181, 182, 183,
-		184, 185, 186, 187, 188, 189, 190, 191,
-		192, 193, 194, 195, 196, 197, 198, 199,
-		200, 201, 202, 203, 204, 205, 206, 207,
-		208, 209, 210, 211, 212, 213, 214, 215,
-		216, 217, 218, 219, 220, 221, 222, 223,
-		224, 225, 226, 227, 228, 229, 230, 231,
-		232, 233, 234, 235, 236, 237, 238, 239,
-		240, 241, 242, 243, 244, 245, 246, 247,
-		248, 249, 250, 251, 252, 253, 254, 255,
-	},
-}
-
 // The actual read interface needed by NewReader.
 // If the passed in io.Reader does not also have ReadByte,
 // the NewReader will introduce its own buffering.
@@ -207,11 +187,11 @@ type decompressor struct {
 	h1, h2 huffmanDecoder
 
 	// Length arrays used to define Huffman codes.
-	bits     [maxLit + maxDist]int
-	codebits [numCodes]int
+	bits     *[maxLit + maxDist]int
+	codebits *[numCodes]int
 
 	// Output history, buffer.
-	hist  [maxHist]byte
+	hist  *[maxHist]byte
 	hp    int  // current output position in buffer
 	hw    int  // have written hist[0:hw] already
 	hfull bool // buffer has filled at least once
@@ -306,10 +286,15 @@ func (f *decompressor) readHuffman() error {
 		}
 	}
 	nlit := int(f.b&0x1F) + 257
+	if nlit > maxLit {
+		return CorruptInputError(f.roffset)
+	}
 	f.b >>= 5
 	ndist := int(f.b&0x1F) + 1
+	// maxDist is 32, so ndist is always valid.
 	f.b >>= 5
 	nclen := int(f.b&0xF) + 4
+	// numCodes is 19, so nclen is always valid.
 	f.b >>= 4
 	f.nb -= 5 + 5 + 4
 
@@ -505,51 +490,49 @@ func (f *decompressor) huffmanBlock() {
 			return
 		}
 
-		p := f.hp - dist
-		if p < 0 {
-			p += len(f.hist)
-		}
-		for i := 0; i < length; i++ {
-			f.hist[f.hp] = f.hist[p]
-			f.hp++
-			p++
-			if f.hp == len(f.hist) {
-				// After flush continue copying out of history.
-				f.copyLen = length - (i + 1)
-				f.copyDist = dist
-				f.flush((*decompressor).copyHuff)
-				return
-			}
-			if p == len(f.hist) {
-				p = 0
-			}
+		f.copyLen, f.copyDist = length, dist
+		if f.copyHist() {
+			return
 		}
 	}
 	panic("unreached")
 }
 
-func (f *decompressor) copyHuff() {
-	length := f.copyLen
-	dist := f.copyDist
-	p := f.hp - dist
+// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
+// It reports whether the f.hist buffer is full.
+func (f *decompressor) copyHist() bool {
+	p := f.hp - f.copyDist
 	if p < 0 {
 		p += len(f.hist)
 	}
-	for i := 0; i < length; i++ {
-		f.hist[f.hp] = f.hist[p]
-		f.hp++
-		p++
+	for f.copyLen > 0 {
+		n := f.copyLen
+		if x := len(f.hist) - f.hp; n > x {
+			n = x
+		}
+		if x := len(f.hist) - p; n > x {
+			n = x
+		}
+		forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
+		p += n
+		f.hp += n
+		f.copyLen -= n
 		if f.hp == len(f.hist) {
-			f.copyLen = length - (i + 1)
+			// After flush continue copying out of history.
 			f.flush((*decompressor).copyHuff)
-			return
+			return true
 		}
 		if p == len(f.hist) {
 			p = 0
 		}
 	}
+	return false
+}
 
-	// Continue processing Huffman block.
+func (f *decompressor) copyHuff() {
+	if f.copyHist() {
+		return
+	}
 	f.huffmanBlock()
 }
 
@@ -584,9 +567,9 @@ func (f *decompressor) dataBlock() {
 	f.copyData()
 }
 
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
 func (f *decompressor) copyData() {
-	// Read f.dataLen bytes into history,
-	// pausing for reads as history fills.
 	n := f.copyLen
 	for n > 0 {
 		m := len(f.hist) - f.hp
@@ -640,23 +623,23 @@ func (f *decompressor) moreBits() error {
 
 // Read the next Huffman-encoded symbol from f according to h.
 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
-	for n := uint(h.min); n <= uint(h.max); n++ {
-		lim := h.limit[n]
-		if lim == -1 {
-			continue
-		}
+	n := uint(h.min)
+	for {
 		for f.nb < n {
 			if err := f.moreBits(); err != nil {
 				return 0, err
 			}
 		}
-		v := int(f.b & uint32(1<<n-1))
-		v <<= 16 - n
-		v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
-		if v <= lim {
+		chunk := h.chunks[f.b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= f.nb {
 			f.b >>= n
 			f.nb -= n
-			return h.codes[v-h.base[n]], nil
+			return int(chunk >> huffmanValueShift), nil
 		}
 	}
 	return 0, CorruptInputError(f.roffset)
@@ -688,7 +671,10 @@ func makeReader(r io.Reader) Reader {
 // finished reading.
 func NewReader(r io.Reader) io.ReadCloser {
 	var f decompressor
+	f.bits = new([maxLit + maxDist]int)
+	f.codebits = new([numCodes]int)
 	f.r = makeReader(r)
+	f.hist = new([maxHist]byte)
 	f.step = (*decompressor).nextBlock
 	return &f
 }
@@ -700,8 +686,11 @@ func NewReader(r io.Reader) io.ReadCloser {
 // to read data compressed by NewWriterDict.
 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
 	var f decompressor
-	f.setDict(dict)
 	f.r = makeReader(r)
+	f.hist = new([maxHist]byte)
+	f.bits = new([maxLit + maxDist]int)
+	f.codebits = new([numCodes]int)
 	f.step = (*decompressor).nextBlock
+	f.setDict(dict)
 	return &f
 }