2 files changed, 37 insertions, 7 deletions
diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go
index ada1f9a00..727249dc4 100644
--- a/src/pkg/compress/bzip2/bzip2_test.go
+++ b/src/pkg/compress/bzip2/bzip2_test.go
@@ -14,7 +14,7 @@ import (
 )
 
 func TestBitReader(t *testing.T) {
-	buf := bytes.NewBuffer([]byte{0xaa})
+	buf := bytes.NewReader([]byte{0xaa})
 	br := newBitReader(buf)
 	if n := br.ReadBits(1); n != 1 {
 		t.Errorf("read 1 wrong")
@@ -31,7 +31,7 @@ func TestBitReader(t *testing.T) {
 }
 
 func TestBitReaderLarge(t *testing.T) {
-	buf := bytes.NewBuffer([]byte{0x12, 0x34, 0x56, 0x78})
+	buf := bytes.NewReader([]byte{0x12, 0x34, 0x56, 0x78})
 	br := newBitReader(buf)
 	if n := br.ReadBits(32); n != 0x12345678 {
 		t.Errorf("got: %x want: %x", n, 0x12345678)
@@ -43,7 +43,7 @@ func readerFromHex(s string) io.Reader {
 	if err != nil {
 		panic("readerFromHex: bad input")
 	}
-	return bytes.NewBuffer(data)
+	return bytes.NewReader(data)
 }
 
 func decompressHex(s string) (out []byte, err error) {
@@ -177,7 +177,7 @@ const (
 
 var testfiles = []string{
 	// Digits is the digits of the irrational number e. Its decimal representation
-	// does not repeat, but there are only 10 posible digits, so it should be
+	// does not repeat, but there are only 10 possible digits, so it should be
 	// reasonably compressible.
 	digits: "testdata/e.txt.bz2",
 	// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
@@ -191,7 +191,7 @@ func benchmarkDecode(b *testing.B, testfile int) {
 	}
 	b.SetBytes(int64(len(compressed)))
 	for i := 0; i < b.N; i++ {
-		r := bytes.NewBuffer(compressed)
+		r := bytes.NewReader(compressed)
 		io.Copy(ioutil.Discard, NewReader(r))
 	}
 }
@@ -201,7 +201,7 @@ func BenchmarkDecodeTwain(b *testing.B)  { benchmarkDecode(b, twain) }
 
 func TestBufferOverrun(t *testing.T) {
 	// Tests https://code.google.com/p/go/issues/detail?id=5747.
-	buffer := bytes.NewBuffer([]byte(bufferOverrunBase64))
+	buffer := bytes.NewReader([]byte(bufferOverrunBase64))
 	decoder := base64.NewDecoder(base64.StdEncoding, buffer)
 	decompressor := NewReader(decoder)
 	// This shouldn't panic.
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go
index 8f6b0c9ca..75a6223d8 100644
--- a/src/pkg/compress/bzip2/huffman.go
+++ b/src/pkg/compress/bzip2/huffman.go
@@ -190,7 +190,37 @@ func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIn
 	right := codes[firstRightIndex:]
 
 	if len(left) == 0 || len(right) == 0 {
-		return 0, StructuralError("superfluous level in Huffman tree")
+		// There is a superfluous level in the Huffman tree indicating
+		// a bug in the encoder. However, this bug has been observed in
+		// the wild so we handle it.
+
+		// If this function was called recursively then we know that
+		// len(codes) >= 2 because, otherwise, we would have hit the
+		// "leaf node" case, below, and not recursed.
+		//
+		// However, for the initial call it's possible that len(codes)
+		// is zero or one. Both cases are invalid because a zero length
+		// tree cannot encode anything and a length-1 tree can only
+		// encode EOF and so is superfluous. We reject both.
+		if len(codes) < 2 {
+			return 0, StructuralError("empty Huffman tree")
+		}
+
+		// In this case the recursion doesn't always reduce the length
+		// of codes so we need to ensure termination via another
+		// mechanism.
+		if level == 31 {
+			// Since len(codes) >= 2 the only way that the values
+			// can match at all 32 bits is if they are equal, which
+			// is invalid. This ensures that we never enter
+			// infinite recursion.
+			return 0, StructuralError("equal symbols in Huffman tree")
+		}
+
+		if len(left) == 0 {
+			return buildHuffmanNode(t, right, level+1)
+		}
+		return buildHuffmanNode(t, left, level+1)
 	}
 
 	nodeIndex = uint16(t.nextNode)