15 files changed, 1718 insertions, 135 deletions
diff --git a/src/pkg/compress/bzip2/Makefile b/src/pkg/compress/bzip2/Makefile
new file mode 100644
index 000000000..a4bceef16
--- /dev/null
+++ b/src/pkg/compress/bzip2/Makefile
@@ -0,0 +1,14 @@
+# Copyright 2011 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/bzip2
+GOFILES=\
+	bit_reader.go\
+	bzip2.go\
+	huffman.go\
+	move_to_front.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go
new file mode 100644
index 000000000..50f0ec836
--- /dev/null
+++ b/src/pkg/compress/bzip2/bit_reader.go
@@ -0,0 +1,88 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+	"bufio"
+	"io"
+	"os"
+)
+
+// bitReader wraps an io.Reader and provides the ability to read values,
+// bit-by-bit, from it. Its Read* methods don't return the usual os.Error
+// because the error handling was verbose. Instead, any error is kept and can
+// be checked afterwards.
+type bitReader struct {
+	r    byteReader
+	n    uint64
+	bits uint
+	err  os.Error
+}
+
+// bitReader needs to read bytes from an io.Reader. We attempt to cast the
+// given io.Reader to this interface and, if it doesn't already fit, we wrap in
+// a bufio.Reader.
+type byteReader interface {
+	ReadByte() (byte, os.Error)
+}
+
+func newBitReader(r io.Reader) bitReader {
+	byter, ok := r.(byteReader)
+	if !ok {
+		byter = bufio.NewReader(r)
+	}
+	return bitReader{r: byter}
+}
+
+// ReadBits64 reads the given number of bits and returns them in the
+// least-significant part of a uint64. In the event of an error, it returns 0
+// and the error can be obtained by calling Error().
+func (br *bitReader) ReadBits64(bits uint) (n uint64) {
+	for bits > br.bits {
+		b, err := br.r.ReadByte()
+		if err == os.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		if err != nil {
+			br.err = err
+			return 0
+		}
+		br.n <<= 8
+		br.n |= uint64(b)
+		br.bits += 8
+	}
+
+	// br.n looks like this (assuming that br.bits = 14 and bits = 6):
+	// Bit: 111111
+	//      5432109876543210
+	//
+	//         (6 bits, the desired output)
+	//        |-----|
+	//        V     V
+	//      0101101101001110
+	//        ^            ^
+	//        |------------|
+	//           br.bits (num valid bits)
+	//
+	// This the next line right shifts the desired bits into the
+	// least-significant places and masks off anything above.
+	n = (br.n >> (br.bits - bits)) & ((1 << bits) - 1)
+	br.bits -= bits
+	return
+}
+
+func (br *bitReader) ReadBits(bits uint) (n int) {
+	n64 := br.ReadBits64(bits)
+	return int(n64)
+}
+
+func (br *bitReader) ReadBit() bool {
+	n := br.ReadBits(1)
+	return n != 0
+}
+
+func (br *bitReader) Error() os.Error {
+	return br.err
+}
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go
new file mode 100644
index 000000000..9e97edec1
--- /dev/null
+++ b/src/pkg/compress/bzip2/bzip2.go
@@ -0,0 +1,390 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bzip2 implements bzip2 decompression.
+package bzip2
+
+import (
+	"io"
+	"os"
+)
+
+// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot
+// of guessing: http://en.wikipedia.org/wiki/Bzip2
+// The source code to pyflate was useful for debugging:
+// http://www.paul.sladen.org/projects/pyflate
+
+// A StructuralError is returned when the bzip2 data is found to be
+// syntactically invalid.
+type StructuralError string
+
+func (s StructuralError) String() string {
+	return "bzip2 data invalid: " + string(s)
+}
+
+// A reader decompresses bzip2 compressed data.
+type reader struct {
+	br        bitReader
+	setupDone bool // true if we have parsed the bzip2 header.
+	blockSize int  // blockSize in bytes, i.e. 900 * 1024.
+	eof       bool
+	buf       []byte    // stores Burrows-Wheeler transformed data.
+	c         [256]uint // the `C' array for the inverse BWT.
+	tt        []uint32  // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
+	tPos      uint32    // Index of the next output byte in tt.
+
+	preRLE      []uint32 // contains the RLE data still to be processed.
+	preRLEUsed  int      // number of entries of preRLE used.
+	lastByte    int      // the last byte value seen.
+	byteRepeats uint     // the number of repeats of lastByte seen.
+	repeats     uint     // the number of copies of lastByte to output.
+}
+
+// NewReader returns an io.Reader which decompresses bzip2 data from r.
+func NewReader(r io.Reader) io.Reader {
+	bz2 := new(reader)
+	bz2.br = newBitReader(r)
+	return bz2
+}
+
+const bzip2FileMagic = 0x425a // "BZ"
+const bzip2BlockMagic = 0x314159265359
+const bzip2FinalMagic = 0x177245385090
+
+// setup parses the bzip2 header.
+func (bz2 *reader) setup() os.Error {
+	br := &bz2.br
+
+	magic := br.ReadBits(16)
+	if magic != bzip2FileMagic {
+		return StructuralError("bad magic value")
+	}
+
+	t := br.ReadBits(8)
+	if t != 'h' {
+		return StructuralError("non-Huffman entropy encoding")
+	}
+
+	level := br.ReadBits(8)
+	if level < '1' || level > '9' {
+		return StructuralError("invalid compression level")
+	}
+
+	bz2.blockSize = 100 * 1024 * (int(level) - '0')
+	bz2.tt = make([]uint32, bz2.blockSize)
+	return nil
+}
+
+func (bz2 *reader) Read(buf []byte) (n int, err os.Error) {
+	if bz2.eof {
+		return 0, os.EOF
+	}
+
+	if !bz2.setupDone {
+		err = bz2.setup()
+		brErr := bz2.br.Error()
+		if brErr != nil {
+			err = brErr
+		}
+		if err != nil {
+			return 0, err
+		}
+		bz2.setupDone = true
+	}
+
+	n, err = bz2.read(buf)
+	brErr := bz2.br.Error()
+	if brErr != nil {
+		err = brErr
+	}
+	return
+}
+
+func (bz2 *reader) read(buf []byte) (n int, err os.Error) {
+	// bzip2 is a block based compressor, except that it has a run-length
+	// preprocessing step. The block based nature means that we can
+	// preallocate fixed-size buffers and reuse them. However, the RLE
+	// preprocessing would require allocating huge buffers to store the
+	// maximum expansion. Thus we process blocks all at once, except for
+	// the RLE which we decompress as required.
+
+	for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
+		// We have RLE data pending.
+
+		// The run-length encoding works like this:
+		// Any sequence of four equal bytes is followed by a length
+		// byte which contains the number of repeats of that byte to
+		// include. (The number of repeats can be zero.) Because we are
+		// decompressing on-demand our state is kept in the reader
+		// object.
+
+		if bz2.repeats > 0 {
+			buf[n] = byte(bz2.lastByte)
+			n++
+			bz2.repeats--
+			if bz2.repeats == 0 {
+				bz2.lastByte = -1
+			}
+			continue
+		}
+
+		bz2.tPos = bz2.preRLE[bz2.tPos]
+		b := byte(bz2.tPos)
+		bz2.tPos >>= 8
+		bz2.preRLEUsed++
+
+		if bz2.byteRepeats == 3 {
+			bz2.repeats = uint(b)
+			bz2.byteRepeats = 0
+			continue
+		}
+
+		if bz2.lastByte == int(b) {
+			bz2.byteRepeats++
+		} else {
+			bz2.byteRepeats = 0
+		}
+		bz2.lastByte = int(b)
+
+		buf[n] = b
+		n++
+	}
+
+	if n > 0 {
+		return
+	}
+
+	// No RLE data is pending so we need to read a block.
+
+	br := &bz2.br
+	magic := br.ReadBits64(48)
+	if magic == bzip2FinalMagic {
+		br.ReadBits64(32) // ignored CRC
+		bz2.eof = true
+		return 0, os.EOF
+	} else if magic != bzip2BlockMagic {
+		return 0, StructuralError("bad magic value found")
+	}
+
+	err = bz2.readBlock()
+	if err != nil {
+		return 0, err
+	}
+
+	return bz2.read(buf)
+}
+
+// readBlock reads a bzip2 block. The magic number should already have been consumed.
+func (bz2 *reader) readBlock() (err os.Error) {
+	br := &bz2.br
+	br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is.
+	randomized := br.ReadBits(1)
+	if randomized != 0 {
+		return StructuralError("deprecated randomized files")
+	}
+	origPtr := uint(br.ReadBits(24))
+
+	// If not every byte value is used in the block (i.e., it's text) then
+	// the symbol set is reduced. The symbols used are stored as a
+	// two-level, 16x16 bitmap.
+	symbolRangeUsedBitmap := br.ReadBits(16)
+	symbolPresent := make([]bool, 256)
+	numSymbols := 0
+	for symRange := uint(0); symRange < 16; symRange++ {
+		if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {
+			bits := br.ReadBits(16)
+			for symbol := uint(0); symbol < 16; symbol++ {
+				if bits&(1<<(15-symbol)) != 0 {
+					symbolPresent[16*symRange+symbol] = true
+					numSymbols++
+				}
+			}
+		}
+	}
+
+	// A block uses between two and six different Huffman trees.
+	numHuffmanTrees := br.ReadBits(3)
+	if numHuffmanTrees < 2 || numHuffmanTrees > 6 {
+		return StructuralError("invalid number of Huffman trees")
+	}
+
+	// The Huffman tree can switch every 50 symbols so there's a list of
+	// tree indexes telling us which tree to use for each 50 symbol block.
+	numSelectors := br.ReadBits(15)
+	treeIndexes := make([]uint8, numSelectors)
+
+	// The tree indexes are move-to-front transformed and stored as unary
+	// numbers.
+	mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)
+	for i := range treeIndexes {
+		c := 0
+		for {
+			inc := br.ReadBits(1)
+			if inc == 0 {
+				break
+			}
+			c++
+		}
+		if c >= numHuffmanTrees {
+			return StructuralError("tree index too large")
+		}
+		treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c))
+	}
+
+	// The list of symbols for the move-to-front transform is taken from
+	// the previously decoded symbol bitmap.
+	symbols := make([]byte, numSymbols)
+	nextSymbol := 0
+	for i := 0; i < 256; i++ {
+		if symbolPresent[i] {
+			symbols[nextSymbol] = byte(i)
+			nextSymbol++
+		}
+	}
+	mtf := newMTFDecoder(symbols)
+
+	numSymbols += 2 // to account for RUNA and RUNB symbols
+	huffmanTrees := make([]huffmanTree, numHuffmanTrees)
+
+	// Now we decode the arrays of code-lengths for each tree.
+	lengths := make([]uint8, numSymbols)
+	for i := 0; i < numHuffmanTrees; i++ {
+		// The code lengths are delta encoded from a 5-bit base value.
+		length := br.ReadBits(5)
+		for j := 0; j < numSymbols; j++ {
+			for {
+				if !br.ReadBit() {
+					break
+				}
+				if br.ReadBit() {
+					length--
+				} else {
+					length++
+				}
+			}
+			if length < 0 || length > 20 {
+				return StructuralError("Huffman length out of range")
+			}
+			lengths[j] = uint8(length)
+		}
+		huffmanTrees[i], err = newHuffmanTree(lengths)
+		if err != nil {
+			return err
+		}
+	}
+
+	selectorIndex := 1 // the next tree index to use
+	currentHuffmanTree := huffmanTrees[treeIndexes[0]]
+	bufIndex := 0 // indexes bz2.buf, the output buffer.
+	// The output of the move-to-front transform is run-length encoded and
+	// we merge the decoding into the Huffman parsing loop. These two
+	// variables accumulate the repeat count. See the Wikipedia page for
+	// details.
+	repeat := 0
+	repeat_power := 0
+
+	// The `C' array (used by the inverse BWT) needs to be zero initialised.
+	for i := range bz2.c {
+		bz2.c[i] = 0
+	}
+
+	decoded := 0 // counts the number of symbols decoded by the current tree.
+	for {
+		if decoded == 50 {
+			currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]
+			selectorIndex++
+			decoded = 0
+		}
+
+		v := currentHuffmanTree.Decode(br)
+		decoded++
+
+		if v < 2 {
+			// This is either the RUNA or RUNB symbol.
+			if repeat == 0 {
+				repeat_power = 1
+			}
+			repeat += repeat_power << v
+			repeat_power <<= 1
+
+			// This limit of 2 million comes from the bzip2 source
+			// code. It prevents repeat from overflowing.
+			if repeat > 2*1024*1024 {
+				return StructuralError("repeat count too large")
+			}
+			continue
+		}
+
+		if repeat > 0 {
+			// We have decoded a complete run-length so we need to
+			// replicate the last output symbol.
+			for i := 0; i < repeat; i++ {
+				b := byte(mtf.First())
+				bz2.tt[bufIndex] = uint32(b)
+				bz2.c[b]++
+				bufIndex++
+			}
+			repeat = 0
+		}
+
+		if int(v) == numSymbols-1 {
+			// This is the EOF symbol. Because it's always at the
+			// end of the move-to-front list, and nevers gets moved
+			// to the front, it has this unique value.
+			break
+		}
+
+		// Since two metasymbols (RUNA and RUNB) have values 0 and 1,
+		// one would expect |v-2| to be passed to the MTF decoder.
+		// However, the front of the MTF list is never referenced as 0,
+		// it's always referenced with a run-length of 1. Thus 0
+		// doesn't need to be encoded and we have |v-1| in the next
+		// line.
+		b := byte(mtf.Decode(int(v - 1)))
+		bz2.tt[bufIndex] = uint32(b)
+		bz2.c[b]++
+		bufIndex++
+	}
+
+	if origPtr >= uint(bufIndex) {
+		return StructuralError("origPtr out of bounds")
+	}
+
+	// We have completed the entropy decoding. Now we can perform the
+	// inverse BWT and setup the RLE buffer.
+	bz2.preRLE = bz2.tt[:bufIndex]
+	bz2.preRLEUsed = 0
+	bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
+	bz2.lastByte = -1
+	bz2.byteRepeats = 0
+	bz2.repeats = 0
+
+	return nil
+}
+
+// inverseBWT implements the inverse Burrows-Wheeler transform as described in
+// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
+// In that document, origPtr is called `I' and c is the `C' array after the
+// first pass over the data. It's an argument here because we merge the first
+// pass with the Huffman decoding.
+//
+// This also implements the `single array' method from the bzip2 source code
+// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
+// index of the next byte in the top 24-bits. The index of the first byte is
+// returned.
+func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
+	sum := uint(0)
+	for i := 0; i < 256; i++ {
+		sum += c[i]
+		c[i] = sum - c[i]
+	}
+
+	for i := range tt {
+		b := tt[i] & 0xff
+		tt[c[b]] |= uint32(i) << 8
+		c[b]++
+	}
+
+	return tt[origPtr] >> 8
+}
diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go
new file mode 100644
index 000000000..156eea83f
--- /dev/null
+++ b/src/pkg/compress/bzip2/bzip2_test.go
@@ -0,0 +1,158 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+	"bytes"
+	"encoding/hex"
+	"io"
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+func TestBitReader(t *testing.T) {
+	buf := bytes.NewBuffer([]byte{0xaa})
+	br := newBitReader(buf)
+	if n := br.ReadBits(1); n != 1 {
+		t.Errorf("read 1 wrong")
+	}
+	if n := br.ReadBits(1); n != 0 {
+		t.Errorf("read 2 wrong")
+	}
+	if n := br.ReadBits(1); n != 1 {
+		t.Errorf("read 3 wrong")
+	}
+	if n := br.ReadBits(1); n != 0 {
+		t.Errorf("read 4 wrong")
+	}
+}
+
+func TestBitReaderLarge(t *testing.T) {
+	buf := bytes.NewBuffer([]byte{0x12, 0x34, 0x56, 0x78})
+	br := newBitReader(buf)
+	if n := br.ReadBits(32); n != 0x12345678 {
+		t.Errorf("got: %x want: %x", n, 0x12345678)
+	}
+}
+
+func readerFromHex(s string) io.Reader {
+	data, err := hex.DecodeString(s)
+	if err != nil {
+		panic("readerFromHex: bad input")
+	}
+	return bytes.NewBuffer(data)
+}
+
+func decompressHex(s string) (out []byte, err os.Error) {
+	r := NewReader(readerFromHex(s))
+	return ioutil.ReadAll(r)
+}
+
+func TestHelloWorldBZ2(t *testing.T) {
+	out, err := decompressHex(helloWorldBZ2Hex)
+	if err != nil {
+		t.Errorf("error from Read: %s", err)
+		return
+	}
+
+	if !bytes.Equal(helloWorld, out) {
+		t.Errorf("got %x, want %x", out, helloWorld)
+	}
+}
+
+func testZeros(t *testing.T, inHex string, n int) {
+	out, err := decompressHex(inHex)
+	if err != nil {
+		t.Errorf("error from Read: %s", err)
+		return
+	}
+
+	expected := make([]byte, n)
+
+	if !bytes.Equal(expected, out) {
+		allZeros := true
+		for _, b := range out {
+			if b != 0 {
+				allZeros = false
+				break
+			}
+		}
+		t.Errorf("incorrect result, got %d bytes (allZeros: %t)", len(out), allZeros)
+	}
+}
+
+func Test32Zeros(t *testing.T) {
+	testZeros(t, thirtyTwoZerosBZ2Hex, 32)
+}
+
+func Test1MBZeros(t *testing.T) {
+	testZeros(t, oneMBZerosBZ2Hex, 1024*1024)
+}
+
+func testRandomData(t *testing.T, compressedHex, uncompressedHex string) {
+	out, err := decompressHex(compressedHex)
+	if err != nil {
+		t.Errorf("error from Read: %s", err)
+		return
+	}
+
+	expected, _ := hex.DecodeString(uncompressedHex)
+
+	if !bytes.Equal(out, expected) {
+		t.Errorf("incorrect result\ngot:  %x\nwant: %x", out, expected)
+	}
+}
+
+func TestRandomData1(t *testing.T) {
+	testRandomData(t, randBZ2Hex, randHex)
+}
+
+func TestRandomData2(t *testing.T) {
+	// This test involves several repeated bytes in the output, but they
+	// should trigger RLE decoding.
+	testRandomData(t, rand2BZ2Hex, rand2Hex)
+}
+
+func TestRandomData3(t *testing.T) {
+	// This test uses the full range of symbols.
+	testRandomData(t, rand3BZ2Hex, rand3Hex)
+}
+
+func Test1MBSawtooth(t *testing.T) {
+	out, err := decompressHex(oneMBSawtoothBZ2Hex)
+	if err != nil {
+		t.Errorf("error from Read: %s", err)
+		return
+	}
+
+	expected := make([]byte, 1024*1024)
+
+	for i := range expected {
+		expected[i] = byte(i)
+	}
+
+	if !bytes.Equal(out, expected) {
+		t.Error("incorrect result")
+	}
+}
+
+const helloWorldBZ2Hex = "425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0"
+
+var helloWorld = []byte("hello world\n")
+
+const thirtyTwoZerosBZ2Hex = "425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098"
+const oneMBZerosBZ2Hex = "425a683931415926535938571ce50008084000c0040008200030cc0529a60806c4201e2ee48a70a12070ae39ca"
+
+const randBZ2Hex = "425a6839314159265359905d990d0001957fffffffffffafffffffffffffffffbfff6fffdfffffffffffffffffffffffffffffc002b6dd75676ed5b77720098320d11a64626981323d4da47a83131a13d09e8040f534cd4f4d27a464d193008cd09804601347a980026350c9886234d36864193d1351b44c136919e90340d26127a4cd264c32023009898981310c0344c340027a8303427a99a04c00003534c230d034f5006468d268cf54d36a3009a69a62626261311b40026013d34201a6934c9a604c98ca6c8460989fa9346234d30d3469a2604fd4131a7aa6d0046043d4c62098479269e89e835190d018d4c046001a11e801a0264792321932308c43a130688c260d46686804cd01a9e80981193684c6a68c00000004c4c20c04627a4c0000260003400d04c0681a01334026009a6f48041466132581ec5212b081d96b0effc16543e2228b052fcd30f2567ee8d970e0f10aabca68dd8270591c376cfc1baae0dba00aaff2d6caf6b211322c997cc18eaee5927f75185336bf907021324c71626c1dd20e22b9b0977f05d0f901eaa51db9fbaf7c603b4c87bc82890e6dd7e61d0079e27ec050dd788fd958152061cd01e222f9547cb9efc465d775b6fc98bac7d387bffd151ae09dadf19494f7a638e2eae58e550faba5fe6820ea520eb986096de4e527d80def3ba625e71fbefdcf7e7844e0a25d29b52dcd1344fca083737d42692aab38d230485f3c8ed54c2ed31f15cf0270c8143765b10b92157233fa1dfe0d7ce8ffe70b8b8f7250071701dfe9f1c94de362c9031455951c93eb098a6b50ee45c6131fefc3b6f9643e21f4adc59497138e246f5c57d834aa67c4f10d8bd8b3908d8130dd7388409c299a268eab3664fa4907c5c31574874bd8d388a4ab22b339660804e53e1b8d05867d40e3082560608d35d5d2c6054e8bab23da28f61f83efd41d25529ad6ea15fb50505cacfabb0902166427354ca3830a2c8415f21b19e592690fbe447020d685a4bcd16ecc4ff1a1c0e572627d0ef6265c008a43fc243240541061ed7840606be466d1c0dac2c53250ed567507d926c844154560d631960c65e15157829b2c7f16859f111a3a8cb72bf24ffa57a680c3be67b1be67c8dd8aea73ac2437a78df5b686d427080ebc01bd30b71a49f6ea31dc0f08e4849e38face96717690239538bc08b6cc5aa8d467cb9c36aa83d40ac7e58bddbfa185b22065e89a86c0145569d9e23726651aec49e31588d70f40fe9a4449dcf4f89eac220171e9c938e803dc195679651004b79ad33cc0c13aeeba5941b33ffeeb8fbe16e76c7811445c67b4269c90479433ddf9e8ed1d00c166b6c17217fb22c3ef1b0c1c7e28e185446a111c37f1ea6c07a59fbcc6546ecc6968d36ba58bc5489a5640647e426b0c39350cb6f07d5dc7a717648c4ec7f841467597ae1f65f408fd2d9940a4b1b860b3c9ae351dcae0b4425f7e8538710f2e40b7f70d13b51ac05ccc6ecda8264a88cad2d721d18132a9b9110a9e759c2483c77dcefc7e464ec88588174cb0c9abff93230ea0bed8decdd8ed8bfe2b5df0a253803678df04fab44c03b9ab7cc97d6e6d6fd0c4c840ce0efc498436f453bbb181603459471f2b588724592b222ec990614db530e10cadd84705621cfdd9261fa44a5f5806a2d74b575056b3c915255c65678f9c16e6dc00a99180fef1a840aff0e842ac02731080cc92782538360a60a727991013984da4fad95f79d5030677b7528d076b2483685fca4429edf804682fdc110dfc2f7c30e23e20a72e039108a0ad6fdee2f76985a4b4be4f5afc6101bf9d5042b657a05dc914e1424241766434"
+const randHex = "c95138082bdf2b9bfa5b1072b23f729735d42c785eeb94320fb14c265b9c2ca421d01a3db986df1ac2acde5a0e6bf955d6f95e61261540905928e195f1a66644cc7f37281744fff4dc6df35566a494c41a8167151950eb74f5fc45f85ad0e5ed28b49adfe218aa7ec1707e8e1d55825f61f72beda3b4c006b8c9188d7336a5d875329b1b58c27cc4e89ecbae02c7712400c39dd131d2c6de82e2863da51d472bdfb21ecce62cc9cf769ed28aedc7583d755da45a0d90874bda269dd53283a9bdfd05f95fc8e9a304bb338ea1a2111894678c18134f17d31a15d9bfc1237894650f3e715e2548639ecbddb845cfe4a46a7b3a3c540f48629488e8c869f1e9f3f4c552243a8105b20eb8e264994214349dae83b165fd6c2a5b8e83fce09fc0a80d3281c8d53a9a08095bd19cbc1388df23975646ed259e003d39261ee68cbece8bcf32971f7fe7e588e8ba8f5e8597909abaea693836a79a1964050ed910a45a0f13a58cd2d3ae18992c5b23082407fd920d0bf01e33118a017bb5e39f44931346845af52128f7965206759433a346034ea481671f501280067567619f5ecef6cded077f92ed7f3b3ce8e308c80f34ba06939e9303f91b4318c8c1dd4cc223c1f057ac0c91211c629cd30e46ee9ec1d9fd493086b7bc2bc83e33f08749a5d430b0ed4f79d70f481940c9b0930b16321886a0df4fa5a1465d5208c7d3494a7987d9a5e42aa256f0c9523947f8318d0ef0af3d59a45cfc2418d0785c9a548b32b81e7de18be7d55a69a4c156bbb3d7579c0ac8e9c72b24646e54b0d0e8725f8f49fb44ae3c6b9d0287be118586255a90a4a83483ed0328518037e52aa959c5748ed83e13023e532306be98b8288da306bbb040bcf5d92176f84a9306dc6b274b040370b61d71fde58dd6d20e6fee348eae0c54bd0a5a487b2d005f329794f2a902c296af0a4c1f638f63292a1fa18e006c1b1838636f4de71c73635b25660d32e88a0917e1a5677f6a02ca65585b82cbd99fb4badbfa97a585da1e6cadf6737b4ec6ca33f245d66ee6a9fae6785d69b003c17b9fc6ec34fe5824ab8caae5e8e14dc6f9e116e7bf4a60c04388783c8ae929e1b46b3ef3bbe81b38f2fa6da771bf39dfba2374d3d2ed356b8e2c42081d885a91a3afb2f31986d2f9873354c48cf5448492c32e62385af423aa4f83db6d1b2669650379a1134b0a04cbca0862d6f9743c791cbb527d36cd5d1f0fc7f503831c8bd1b7a0ef8ae1a5ed1155dfdd9e32b6bb33138112d3d476b802179cb85a2a6c354ccfed2f31604fbd8d6ec4baf9f1c8454f72c6588c06a7df3178c43a6970bfa02dd6f74cb5ec3b63f9eddaa17db5cbf27fac6de8e57c384afd0954179f7b5690c3bee42abc4fa79b4b12101a9cf5f0b9aecdda945def0bd04163237247d3539850e123fe18139f316fa0256d5bd2faa8"
+
+const oneMBSawtoothBZ2Hex = "425a683931415926535971931ea00006ddffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe007de00000000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000009aaaaa0000000000000000000000000000000000000000000000000000000000000000498002600026000000000000000000000000000000000000000000000000000000007fc42271980d044c0a822607411304a08982d044c1a82260f411308a08984d044c2a82261741130ca08986d044c3a82261f411310a08988d044c4a822627411314a0898ad044c5a82262f411318a0898cd044c6a82263741131ca0898ed044c7a82263f411320a08990d044c8a822647411324a08992d044c9a82264f411328a08994d044caa82265741132ca08996d044cba82265f411330a08998d044cca822667411334a0899ad044cda82266f411338a0899cd044cea82267741133ca0899ed044cfa82267f411340a089a0d044d0a822687411344a089a2d044d1a82268f411348a089a4d044d2a82269741134ca089a6d044d3a82269f411350a089a8d044d4a8226a7411354a089aad044d5a8226af411358a089acd044d6a8226b741135ca089aed044d7a8226bf411360a089b0d044d8a8226c7411364a089b2d044d9a8226cf411368a089b4d044daa8226d741136ca089b6d044dba8226df411370a089b8d044dca8226e7411374a089bad044dda8226ef411378a089bcd044dea8226f741137ca089bed044dfa8226ff411380a089c0d044e0a822707411384a089c2d044e1a82270f411388a089c4d044e2a82271741138ca089c59089c69089c71089c79089c81089c89089c91089c99089ca1089ca9089cb1089cb9089cc1089cc9089cd1089cd9089ce1089ce9089cf1089cf9089d01089d09089d11089d19089d21089d29089d31089d39089d41089d49089d51089d59089d61089d69089d71089d79089d81089d89089d91089d99089da1089da9089db1089db9089dc1089dc9089dd1089dd9089de1089de9089df1089df9089e01089e09089e11089e19089e21089e29089e31089e39089e41089e49089e51089e59089e61089e69089e71089e79089e81089e89089e91089e99089ea1089ea9089eb1089eb9089ec1089ec9089ed1089ed9089ee1089ee9089ef1089ef9089f01089f09089f11089f19089f21089f29089f31089f39089f41089f49089f51089f59089f61089f69089f71089f79089f81089f89089f91089f99089fa1089fa9089fb1089fb9089fc1089fc9089fd1089fd9089fe1089fe9089ff1089ff98a0ac9329acf23ba884804fdd3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0034f800000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000024c0013000130000000000000000000000000000000000000000000000000000000002955540000000000000000000000000000000000000000000000000000000000000001ff108c00846024230221181908c108460a4230621183908c20846124230a21185908c308461a4230e21187908c40846224231221189908c508462a423162118b908c60846324231a2118d908c708463a4231e2118f908c80846424232221191908c908464a4232621193908ca0846524232a21195908cb08465a4232e21197908cc0846624233221199908cd08466a423362119b908ce0846724233a2119d908cf08467a4233e2119f908d008468242342211a1908d108468a42346211a3908d20846924234a211a5908d308469a4234e211a7908d40846a242352211a9908d50846aa42356211ab908d60846b24235a211ad908d70846ba4235e211af908d80846c242362211b1908d90846ca42366211b3908da0846d24236a211b5908db0846da4236e211b7908dc0846e242372211b9908dd0846ea42376211bb908de0846f24237a211bd908df0846fa4237e211bf908e008470242382211c1908e108470a42386211c3908e20847124238a211c5908e2f8c211c6c8471d211c7c84721211c8c84725211c9c84729211cac8472d211cbc84731211ccc84735211cdc84739211cec8473d211cfc84741211d0c84745211d1c84749211d2c8474d211d3c84751211d4c84755211d5c84759211d6c8475d211d7c84761211d8c84765211d9c84769211dac8476d211dbc84771211dcc84775211ddc84779211dec8477d211dfc84781211e0c84785211e1c84789211e2c8478d211e3c84791211e4c84795211e5c84799211e6c8479d211e7c847a1211e8c847a5211e9c847a9211eac847ad211ebc847b1211ecc847b5211edc847b9211eec847bd211efc847c1211f0c847c5211f1c847c9211f2c847cd211f3c847d1211f4c847d5211f5c847d9211f6c847dd211f7c847e1211f8c847e5211f9c847e9211fac847ed211fbc847f1211fcc847f5211fdc847f9211fec847fd211ff8bb9229c284803a8b6248"
+
+const rand2BZ2Hex = "425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e042801099210094806c0110002e70806402000546034000034000000f2830000032000d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d73a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60"
+const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8719e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335ba"
+
+const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400"
+const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d"
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go
new file mode 100644
index 000000000..732bc4a21
--- /dev/null
+++ b/src/pkg/compress/bzip2/huffman.go
@@ -0,0 +1,223 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+	"os"
+	"sort"
+)
+
+// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
+// symbol.
+type huffmanTree struct {
+	// nodes contains all the non-leaf nodes in the tree. nodes[0] is the
+	// root of the tree and nextNode contains the index of the next element
+	// of nodes to use when the tree is being constructed.
+	nodes    []huffmanNode
+	nextNode int
+}
+
+// A huffmanNode is a node in the tree. left and right contain indexes into the
+// nodes slice of the tree. If left or right is invalidNodeValue then the child
+// is a left node and its value is in leftValue/rightValue.
+//
+// The symbols are uint16s because bzip2 encodes not only MTF indexes in the
+// tree, but also two magic values for run-length encoding and an EOF symbol.
+// Thus there are more than 256 possible symbols.
+type huffmanNode struct {
+	left, right           uint16
+	leftValue, rightValue uint16
+}
+
+// invalidNodeValue is an invalid index which marks a leaf node in the tree.
+const invalidNodeValue = 0xffff
+
+// Decode reads bits from the given bitReader and navigates the tree until a
+// symbol is found.
+func (t huffmanTree) Decode(br *bitReader) (v uint16) {
+	nodeIndex := uint16(0) // node 0 is the root of the tree.
+
+	for {
+		node := &t.nodes[nodeIndex]
+		bit := br.ReadBit()
+		// bzip2 encodes left as a true bit.
+		if bit {
+			// left
+			if node.left == invalidNodeValue {
+				return node.leftValue
+			}
+			nodeIndex = node.left
+		} else {
+			// right
+			if node.right == invalidNodeValue {
+				return node.rightValue
+			}
+			nodeIndex = node.right
+		}
+	}
+
+	panic("unreachable")
+}
+
+// newHuffmanTree builds a Huffman tree from a slice containing the code
+// lengths of each symbol. The maximum code length is 32 bits.
+func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) {
+	// There are many possible trees that assign the same code length to
+	// each symbol (consider reflecting a tree down the middle, for
+	// example). Since the code length assignments determine the
+	// efficiency of the tree, each of these trees is equally good. In
+	// order to minimise the amount of information needed to build a tree
+	// bzip2 uses a canonical tree so that it can be reconstructed given
+	// only the code length assignments.
+
+	if len(lengths) < 2 {
+		panic("newHuffmanTree: too few symbols")
+	}
+
+	var t huffmanTree
+
+	// First we sort the code length assignments by ascending code length,
+	// using the symbol value to break ties.
+	pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths)))
+	for i, length := range lengths {
+		pairs[i].value = uint16(i)
+		pairs[i].length = length
+	}
+
+	sort.Sort(pairs)
+
+	// Now we assign codes to the symbols, starting with the longest code.
+	// We keep the codes packed into a uint32, at the most-significant end.
+	// So branches are taken from the MSB downwards. This makes it easy to
+	// sort them later.
+	code := uint32(0)
+	length := uint8(32)
+
+	codes := huffmanCodes(make([]huffmanCode, len(lengths)))
+	for i := len(pairs) - 1; i >= 0; i-- {
+		if length > pairs[i].length {
+			// If the code length decreases we shift in order to
+			// zero any bits beyond the end of the code.
+			length >>= 32 - pairs[i].length
+			length <<= 32 - pairs[i].length
+			length = pairs[i].length
+		}
+		codes[i].code = code
+		codes[i].codeLen = length
+		codes[i].value = pairs[i].value
+		// We need to 'increment' the code, which means treating |code|
+		// like a |length| bit number.
+		code += 1 << (32 - length)
+	}
+
+	// Now we can sort by the code so that the left half of each branch are
+	// grouped together, recursively.
+	sort.Sort(codes)
+
+	t.nodes = make([]huffmanNode, len(codes))
+	_, err := buildHuffmanNode(&t, codes, 0)
+	return t, err
+}
+
+// huffmanSymbolLengthPair contains a symbol and its code length.
+type huffmanSymbolLengthPair struct {
+	value  uint16
+	length uint8
+}
+
+// huffmanSymbolLengthPair is used to provide an interface for sorting.
+type huffmanSymbolLengthPairs []huffmanSymbolLengthPair
+
+func (h huffmanSymbolLengthPairs) Len() int {
+	return len(h)
+}
+
+func (h huffmanSymbolLengthPairs) Less(i, j int) bool {
+	if h[i].length < h[j].length {
+		return true
+	}
+	if h[i].length > h[j].length {
+		return false
+	}
+	if h[i].value < h[j].value {
+		return true
+	}
+	return false
+}
+
+func (h huffmanSymbolLengthPairs) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+// huffmanCode contains a symbol, its code and code length.
+type huffmanCode struct {
+	code    uint32
+	codeLen uint8
+	value   uint16
+}
+
+// huffmanCodes is used to provide an interface for sorting.
+type huffmanCodes []huffmanCode
+
+func (n huffmanCodes) Len() int {
+	return len(n)
+}
+
+func (n huffmanCodes) Less(i, j int) bool {
+	return n[i].code < n[j].code
+}
+
+func (n huffmanCodes) Swap(i, j int) {
+	n[i], n[j] = n[j], n[i]
+}
+
+// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
+// the Huffman tree at the given level. It returns the index of the newly
+// constructed node.
+func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err os.Error) {
+	test := uint32(1) << (31 - level)
+
+	// We have to search the list of codes to find the divide between the left and right sides.
+	firstRightIndex := len(codes)
+	for i, code := range codes {
+		if code.code&test != 0 {
+			firstRightIndex = i
+			break
+		}
+	}
+
+	left := codes[:firstRightIndex]
+	right := codes[firstRightIndex:]
+
+	if len(left) == 0 || len(right) == 0 {
+		return 0, StructuralError("superfluous level in Huffman tree")
+	}
+
+	nodeIndex = uint16(t.nextNode)
+	node := &t.nodes[t.nextNode]
+	t.nextNode++
+
+	if len(left) == 1 {
+		// leaf node
+		node.left = invalidNodeValue
+		node.leftValue = left[0].value
+	} else {
+		node.left, err = buildHuffmanNode(t, left, level+1)
+	}
+
+	if err != nil {
+		return
+	}
+
+	if len(right) == 1 {
+		// leaf node
+		node.right = invalidNodeValue
+		node.rightValue = right[0].value
+	} else {
+		node.right, err = buildHuffmanNode(t, right, level+1)
+	}
+
+	return
+}
diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go
new file mode 100644
index 000000000..0ed19dec3
--- /dev/null
+++ b/src/pkg/compress/bzip2/move_to_front.go
@@ -0,0 +1,105 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+// moveToFrontDecoder implements a move-to-front list. Such a list is an
+// efficient way to transform a string with repeating elements into one with
+// many small valued numbers, which is suitable for entropy encoding. It works
+// by starting with an initial list of symbols and references symbols by their
+// index into that list. When a symbol is referenced, it's moved to the front
+// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
+// as the symbol will be at the front of the list after the first access.
+type moveToFrontDecoder struct {
+	// Rather than actually keep the list in memory, the symbols are stored
+	// as a circular, double linked list with the symbol indexed by head
+	// at the front of the list.
+	symbols []byte
+	next    []uint8
+	prev    []uint8
+	head    uint8
+}
+
+// newMTFDecoder creates a move-to-front decoder with an explicit initial list
+// of symbols.
+func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
+	if len(symbols) > 256 {
+		panic("too many symbols")
+	}
+
+	m := &moveToFrontDecoder{
+		symbols: symbols,
+		next:    make([]uint8, len(symbols)),
+		prev:    make([]uint8, len(symbols)),
+	}
+
+	m.threadLinkedList()
+	return m
+}
+
+// newMTFDecoderWithRange creates a move-to-front decoder with an initial
+// symbol list of 0...n-1.
+func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
+	if n > 256 {
+		panic("newMTFDecoderWithRange: cannot have > 256 symbols")
+	}
+
+	m := &moveToFrontDecoder{
+		symbols: make([]uint8, n),
+		next:    make([]uint8, n),
+		prev:    make([]uint8, n),
+	}
+
+	for i := 0; i < n; i++ {
+		m.symbols[i] = byte(i)
+	}
+
+	m.threadLinkedList()
+	return m
+}
+
+// threadLinkedList creates the initial linked-list pointers.
+func (m *moveToFrontDecoder) threadLinkedList() {
+	if len(m.symbols) == 0 {
+		return
+	}
+
+	m.prev[0] = uint8(len(m.symbols) - 1)
+
+	for i := 0; i < len(m.symbols)-1; i++ {
+		m.next[i] = uint8(i + 1)
+		m.prev[i+1] = uint8(i)
+	}
+
+	m.next[len(m.symbols)-1] = 0
+}
+
+func (m *moveToFrontDecoder) Decode(n int) (b byte) {
+	// Most of the time, n will be zero so it's worth dealing with this
+	// simple case.
+	if n == 0 {
+		return m.symbols[m.head]
+	}
+
+	i := m.head
+	for j := 0; j < n; j++ {
+		i = m.next[i]
+	}
+	b = m.symbols[i]
+
+	m.next[m.prev[i]] = m.next[i]
+	m.prev[m.next[i]] = m.prev[i]
+	m.next[i] = m.head
+	m.prev[i] = m.prev[m.head]
+	m.next[m.prev[m.head]] = i
+	m.prev[m.head] = i
+	m.head = i
+
+	return
+}
+
+// First returns the symbol at the front of the list.
+func (m *moveToFrontDecoder) First() byte {
+	return m.symbols[m.head]
+}
diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go
index 68dcd7bcc..ed5884a4b 100644
--- a/src/pkg/compress/flate/deflate_test.go
+++ b/src/pkg/compress/flate/deflate_test.go
@@ -191,9 +191,16 @@ func testSync(t *testing.T, level int, input []byte, name string) {
 			t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo])
 			return
 		}
-		if i == 0 && buf.buf.Len() != 0 {
-			t.Errorf("testSync/%d (%d, %d, %s): extra data after %d", i, level, len(input), name, hi-lo)
-		}
+		// This test originally checked that after reading
+		// the first half of the input, there was nothing left
+		// in the read buffer (buf.buf.Len() != 0) but that is
+		// not necessarily the case: the write Flush may emit
+		// some extra framing bits that are not necessary
+		// to process to obtain the first half of the uncompressed
+		// data.  The test ran correctly most of the time, because
+		// the background goroutine had usually read even
+		// those extra bits by now, but it's not a useful thing to
+		// check.
 		buf.WriteMode()
 	}
 	buf.ReadMode()
@@ -262,135 +269,9 @@ func TestReverseBits(t *testing.T) {
 }
 
 func TestDeflateInflateString(t *testing.T) {
-	gold := bytes.NewBufferString(getEdata()).Bytes()
+	gold, err := ioutil.ReadFile("../testdata/e.txt")
+	if err != nil {
+		t.Error(err)
+	}
 	testToFromWithLevel(t, 1, gold, "2.718281828...")
 }
-
-func getEdata() string {
-	return "2.718281828459045235360287471352662497757247093699959574966967627724076630353547" +
-		"59457138217852516642742746639193200305992181741359662904357290033429526059563073" +
-		"81323286279434907632338298807531952510190115738341879307021540891499348841675092" +
-		"44761460668082264800168477411853742345442437107539077744992069551702761838606261" +
-		"33138458300075204493382656029760673711320070932870912744374704723069697720931014" +
-		"16928368190255151086574637721112523897844250569536967707854499699679468644549059" +
-		"87931636889230098793127736178215424999229576351482208269895193668033182528869398" +
-		"49646510582093923982948879332036250944311730123819706841614039701983767932068328" +
-		"23764648042953118023287825098194558153017567173613320698112509961818815930416903" +
-		"51598888519345807273866738589422879228499892086805825749279610484198444363463244" +
-		"96848756023362482704197862320900216099023530436994184914631409343173814364054625" +
-		"31520961836908887070167683964243781405927145635490613031072085103837505101157477" +
-		"04171898610687396965521267154688957035035402123407849819334321068170121005627880" +
-		"23519303322474501585390473041995777709350366041699732972508868769664035557071622" +
-		"68447162560798826517871341951246652010305921236677194325278675398558944896970964" +
-		"09754591856956380236370162112047742722836489613422516445078182442352948636372141" +
-		"74023889344124796357437026375529444833799801612549227850925778256209262264832627" +
-		"79333865664816277251640191059004916449982893150566047258027786318641551956532442" +
-		"58698294695930801915298721172556347546396447910145904090586298496791287406870504" +
-		"89585867174798546677575732056812884592054133405392200011378630094556068816674001" +
-		"69842055804033637953764520304024322566135278369511778838638744396625322498506549" +
-		"95886234281899707733276171783928034946501434558897071942586398772754710962953741" +
-		"52111513683506275260232648472870392076431005958411661205452970302364725492966693" +
-		"81151373227536450988890313602057248176585118063036442812314965507047510254465011" +
-		"72721155519486685080036853228183152196003735625279449515828418829478761085263981" +
-		"39559900673764829224437528718462457803619298197139914756448826260390338144182326" +
-		"25150974827987779964373089970388867782271383605772978824125611907176639465070633" +
-		"04527954661855096666185664709711344474016070462621568071748187784437143698821855" +
-		"96709591025968620023537185887485696522000503117343920732113908032936344797273559" +
-		"55277349071783793421637012050054513263835440001863239914907054797780566978533580" +
-		"48966906295119432473099587655236812859041383241160722602998330535370876138939639" +
-		"17795745401613722361878936526053815584158718692553860616477983402543512843961294" +
-		"60352913325942794904337299085731580290958631382683291477116396337092400316894586" +
-		"36060645845925126994655724839186564209752685082307544254599376917041977780085362" +
-		"73094171016343490769642372229435236612557250881477922315197477806056967253801718" +
-		"07763603462459278778465850656050780844211529697521890874019660906651803516501792" +
-		"50461950136658543663271254963990854914420001457476081930221206602433009641270489" +
-		"43903971771951806990869986066365832322787093765022601492910115171776359446020232" +
-		"49300280401867723910288097866605651183260043688508817157238669842242201024950551" +
-		"88169480322100251542649463981287367765892768816359831247788652014117411091360116" +
-		"49950766290779436460058519419985601626479076153210387275571269925182756879893027" +
-		"61761146162549356495903798045838182323368612016243736569846703785853305275833337" +
-		"93990752166069238053369887956513728559388349989470741618155012539706464817194670" +
-		"83481972144888987906765037959036696724949925452790337296361626589760394985767413" +
-		"97359441023744329709355477982629614591442936451428617158587339746791897571211956" +
-		"18738578364475844842355558105002561149239151889309946342841393608038309166281881" +
-		"15037152849670597416256282360921680751501777253874025642534708790891372917228286" +
-		"11515915683725241630772254406337875931059826760944203261924285317018781772960235" +
-		"41306067213604600038966109364709514141718577701418060644363681546444005331608778" +
-		"31431744408119494229755993140118886833148328027065538330046932901157441475631399" +
-		"97221703804617092894579096271662260740718749975359212756084414737823303270330168" +
-		"23719364800217328573493594756433412994302485023573221459784328264142168487872167" +
-		"33670106150942434569844018733128101079451272237378861260581656680537143961278887" +
-		"32527373890392890506865324138062796025930387727697783792868409325365880733988457" +
-		"21874602100531148335132385004782716937621800490479559795929059165547050577751430" +
-		"81751126989851884087185640260353055837378324229241856256442550226721559802740126" +
-		"17971928047139600689163828665277009752767069777036439260224372841840883251848770" +
-		"47263844037953016690546593746161932384036389313136432713768884102681121989127522" +
-		"30562567562547017250863497653672886059667527408686274079128565769963137897530346" +
-		"60616669804218267724560530660773899624218340859882071864682623215080288286359746" +
-		"83965435885668550377313129658797581050121491620765676995065971534476347032085321" +
-		"56036748286083786568030730626576334697742956346437167093971930608769634953288468" +
-		"33613038829431040800296873869117066666146800015121143442256023874474325250769387" +
-		"07777519329994213727721125884360871583483562696166198057252661220679754062106208" +
-		"06498829184543953015299820925030054982570433905535701686531205264956148572492573" +
-		"86206917403695213533732531666345466588597286659451136441370331393672118569553952" +
-		"10845840724432383558606310680696492485123263269951460359603729725319836842336390" +
-		"46321367101161928217111502828016044880588023820319814930963695967358327420249882" +
-		"45684941273860566491352526706046234450549227581151709314921879592718001940968866" +
-		"98683703730220047531433818109270803001720593553052070070607223399946399057131158" +
-		"70996357773590271962850611465148375262095653467132900259943976631145459026858989" +
-		"79115837093419370441155121920117164880566945938131183843765620627846310490346293" +
-		"95002945834116482411496975832601180073169943739350696629571241027323913874175492" +
-		"30718624545432220395527352952402459038057445028922468862853365422138157221311632" +
-		"88112052146489805180092024719391710555390113943316681515828843687606961102505171" +
-		"00739276238555338627255353883096067164466237092264680967125406186950214317621166" +
-		"81400975952814939072226011126811531083873176173232352636058381731510345957365382" +
-		"23534992935822836851007810884634349983518404451704270189381994243410090575376257" +
-		"76757111809008816418331920196262341628816652137471732547772778348877436651882875" +
-		"21566857195063719365653903894493664217640031215278702223664636357555035655769488" +
-		"86549500270853923617105502131147413744106134445544192101336172996285694899193369" +
-		"18472947858072915608851039678195942983318648075608367955149663644896559294818785" +
-		"17840387733262470519450504198477420141839477312028158868457072905440575106012852" +
-		"58056594703046836344592652552137008068752009593453607316226118728173928074623094" +
-		"68536782310609792159936001994623799343421068781349734695924646975250624695861690" +
-		"91785739765951993929939955675427146549104568607020990126068187049841780791739240" +
-		"71945996323060254707901774527513186809982284730860766536866855516467702911336827" +
-		"56310722334672611370549079536583453863719623585631261838715677411873852772292259" +
-		"47433737856955384562468010139057278710165129666367644518724656537304024436841408" +
-		"14488732957847348490003019477888020460324660842875351848364959195082888323206522" +
-		"12810419044804724794929134228495197002260131043006241071797150279343326340799596" +
-		"05314460532304885289729176598760166678119379323724538572096075822771784833616135" +
-		"82612896226118129455927462767137794487586753657544861407611931125958512655759734" +
-		"57301533364263076798544338576171533346232527057200530398828949903425956623297578" +
-		"24887350292591668258944568946559926584547626945287805165017206747854178879822768" +
-		"06536650641910973434528878338621726156269582654478205672987756426325321594294418" +
-		"03994321700009054265076309558846589517170914760743713689331946909098190450129030" +
-		"70995662266203031826493657336984195557769637876249188528656866076005660256054457" +
-		"11337286840205574416030837052312242587223438854123179481388550075689381124935386" +
-		"31863528708379984569261998179452336408742959118074745341955142035172618420084550" +
-		"91708456823682008977394558426792142734775608796442792027083121501564063413416171" +
-		"66448069815483764491573900121217041547872591998943825364950514771379399147205219" +
-		"52907939613762110723849429061635760459623125350606853765142311534966568371511660" +
-		"42207963944666211632551577290709784731562782775987881364919512574833287937715714" +
-		"59091064841642678309949723674420175862269402159407924480541255360431317992696739" +
-		"15754241929660731239376354213923061787675395871143610408940996608947141834069836" +
-		"29936753626215452472984642137528910798843813060955526227208375186298370667872244" +
-		"30195793793786072107254277289071732854874374355781966511716618330881129120245204" +
-		"04868220007234403502544820283425418788465360259150644527165770004452109773558589" +
-		"76226554849416217149895323834216001140629507184904277892585527430352213968356790" +
-		"18076406042138307308774460170842688272261177180842664333651780002171903449234264" +
-		"26629226145600433738386833555534345300426481847398921562708609565062934040526494" +
-		"32442614456659212912256488935696550091543064261342526684725949143142393988454324" +
-		"86327461842846655985332312210466259890141712103446084271616619001257195870793217" +
-		"56969854401339762209674945418540711844643394699016269835160784892451405894094639" +
-		"52678073545797003070511636825194877011897640028276484141605872061841852971891540" +
-		"19688253289309149665345753571427318482016384644832499037886069008072709327673127" +
-		"58196656394114896171683298045513972950668760474091542042842999354102582911350224" +
-		"16907694316685742425225090269390348148564513030699251995904363840284292674125734" +
-		"22447765584177886171737265462085498294498946787350929581652632072258992368768457" +
-		"01782303809656788311228930580914057261086588484587310165815116753332767488701482" +
-		"91674197015125597825727074064318086014281490241467804723275976842696339357735429" +
-		"30186739439716388611764209004068663398856841681003872389214483176070116684503887" +
-		"21236436704331409115573328018297798873659091665961240202177855885487617616198937" +
-		"07943800566633648843650891448055710397652146960276625835990519870423001794655367" +
-		"9"
-}
diff --git a/src/pkg/compress/lzw/Makefile b/src/pkg/compress/lzw/Makefile
new file mode 100644
index 000000000..28f5e6abc
--- /dev/null
+++ b/src/pkg/compress/lzw/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2011 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/lzw
+GOFILES=\
+	reader.go\
+	writer.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go
new file mode 100644
index 000000000..8a540cbe6
--- /dev/null
+++ b/src/pkg/compress/lzw/reader.go
@@ -0,0 +1,210 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The lzw package implements the Lempel-Ziv-Welch compressed data format,
+// described in T. A. Welch, ``A Technique for High-Performance Data
+// Compression'', Computer, 17(6) (June 1984), pp 8-19.
+//
+// In particular, it implements LZW as used by the GIF, TIFF and PDF file
+// formats, which means variable-width codes up to 12 bits and the first
+// two non-literal codes are a clear code and an EOF code.
+package lzw
+
+// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF,
+// modulo LSB/MSB packing order.
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+)
+
+// Order specifies the bit ordering in an LZW data stream.
+type Order int
+
+const (
+	// LSB means Least Significant Bits first, as used in the GIF file format.
+	LSB Order = iota
+	// MSB means Most Significant Bits first, as used in the TIFF and PDF
+	// file formats.
+	MSB
+)
+
+// decoder is the state from which the readXxx method converts a byte
+// stream into a code stream.
+type decoder struct {
+	r     io.ByteReader
+	bits  uint32
+	nBits uint
+	width uint
+}
+
+// readLSB returns the next code for "Least Significant Bits first" data.
+func (d *decoder) readLSB() (uint16, os.Error) {
+	for d.nBits < d.width {
+		x, err := d.r.ReadByte()
+		if err != nil {
+			return 0, err
+		}
+		d.bits |= uint32(x) << d.nBits
+		d.nBits += 8
+	}
+	code := uint16(d.bits & (1<<d.width - 1))
+	d.bits >>= d.width
+	d.nBits -= d.width
+	return code, nil
+}
+
+// readMSB returns the next code for "Most Significant Bits first" data.
+func (d *decoder) readMSB() (uint16, os.Error) {
+	for d.nBits < d.width {
+		x, err := d.r.ReadByte()
+		if err != nil {
+			return 0, err
+		}
+		d.bits |= uint32(x) << (24 - d.nBits)
+		d.nBits += 8
+	}
+	code := uint16(d.bits >> (32 - d.width))
+	d.bits <<= d.width
+	d.nBits -= d.width
+	return code, nil
+}
+
+// decode decompresses bytes from r and writes them to pw.
+// read specifies how to decode bytes into codes.
+// litWidth is the width in bits of literal codes.
+func decode(r io.Reader, read func(*decoder) (uint16, os.Error), litWidth int, pw *io.PipeWriter) {
+	br, ok := r.(io.ByteReader)
+	if !ok {
+		br = bufio.NewReader(r)
+	}
+	pw.CloseWithError(decode1(pw, br, read, uint(litWidth)))
+}
+
+func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os.Error), litWidth uint) os.Error {
+	const (
+		maxWidth    = 12
+		invalidCode = 0xffff
+	)
+	d := decoder{r, 0, 0, 1 + litWidth}
+	w := bufio.NewWriter(pw)
+	// The first 1<<litWidth codes are literal codes.
+	// The next two codes mean clear and EOF.
+	// Other valid codes are in the range [lo, hi] where lo := clear + 2,
+	// with the upper bound incrementing on each code seen.
+	clear := uint16(1) << litWidth
+	eof, hi := clear+1, clear+1
+	// overflow is the code at which hi overflows the code width.
+	overflow := uint16(1) << d.width
+	var (
+		// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
+		//   suffix[c] is the last of these bytes.
+		//   prefix[c] is the code for all but the last byte.
+		//   This code can either be a literal code or another code in [lo, c).
+		// The c == hi case is a special case.
+		suffix [1 << maxWidth]uint8
+		prefix [1 << maxWidth]uint16
+		// buf is a scratch buffer for reconstituting the bytes that a code expands to.
+		// Code suffixes are written right-to-left from the end of the buffer.
+		buf [1 << maxWidth]byte
+	)
+
+	// Loop over the code stream, converting codes into decompressed bytes.
+	last := uint16(invalidCode)
+	for {
+		code, err := read(&d)
+		if err != nil {
+			if err == os.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			return err
+		}
+		switch {
+		case code < clear:
+			// We have a literal code.
+			if err := w.WriteByte(uint8(code)); err != nil {
+				return err
+			}
+			if last != invalidCode {
+				// Save what the hi code expands to.
+				suffix[hi] = uint8(code)
+				prefix[hi] = last
+			}
+		case code == clear:
+			d.width = 1 + litWidth
+			hi = eof
+			overflow = 1 << d.width
+			last = invalidCode
+			continue
+		case code == eof:
+			return w.Flush()
+		case code <= hi:
+			c, i := code, len(buf)-1
+			if code == hi {
+				// code == hi is a special case which expands to the last expansion
+				// followed by the head of the last expansion. To find the head, we walk
+				// the prefix chain until we find a literal code.
+				c = last
+				for c >= clear {
+					c = prefix[c]
+				}
+				buf[i] = uint8(c)
+				i--
+				c = last
+			}
+			// Copy the suffix chain into buf and then write that to w.
+			for c >= clear {
+				buf[i] = suffix[c]
+				i--
+				c = prefix[c]
+			}
+			buf[i] = uint8(c)
+			if _, err := w.Write(buf[i:]); err != nil {
+				return err
+			}
+			// Save what the hi code expands to.
+			suffix[hi] = uint8(c)
+			prefix[hi] = last
+		default:
+			return os.NewError("lzw: invalid code")
+		}
+		last, hi = code, hi+1
+		if hi == overflow {
+			if d.width == maxWidth {
+				return os.NewError("lzw: missing clear code")
+			}
+			d.width++
+			overflow <<= 1
+		}
+	}
+	panic("unreachable")
+}
+
+// NewReader creates a new io.ReadCloser that satisfies reads by decompressing
+// the data read from r.
+// It is the caller's responsibility to call Close on the ReadCloser when
+// finished reading.
+// The number of bits to use for literal codes, litWidth, must be in the
+// range [2,8] and is typically 8.
+func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
+	pr, pw := io.Pipe()
+	var read func(*decoder) (uint16, os.Error)
+	switch order {
+	case LSB:
+		read = (*decoder).readLSB
+	case MSB:
+		read = (*decoder).readMSB
+	default:
+		pw.CloseWithError(os.NewError("lzw: unknown order"))
+		return pr
+	}
+	if litWidth < 2 || 8 < litWidth {
+		pw.CloseWithError(fmt.Errorf("lzw: litWidth %d out of range", litWidth))
+		return pr
+	}
+	go decode(r, read, litWidth, pw)
+	return pr
+}
diff --git a/src/pkg/compress/lzw/reader_test.go b/src/pkg/compress/lzw/reader_test.go
new file mode 100644
index 000000000..7795a4c14
--- /dev/null
+++ b/src/pkg/compress/lzw/reader_test.go
@@ -0,0 +1,132 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+type lzwTest struct {
+	desc       string
+	raw        string
+	compressed string
+	err        os.Error
+}
+
+var lzwTests = []lzwTest{
+	{
+		"empty;LSB;8",
+		"",
+		"\x01\x01",
+		nil,
+	},
+	{
+		"empty;MSB;8",
+		"",
+		"\x80\x80",
+		nil,
+	},
+	{
+		"tobe;LSB;7",
+		"TOBEORNOTTOBEORTOBEORNOT",
+		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
+		nil,
+	},
+	{
+		"tobe;LSB;8",
+		"TOBEORNOTTOBEORTOBEORNOT",
+		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
+		nil,
+	},
+	{
+		"tobe;MSB;7",
+		"TOBEORNOTTOBEORTOBEORNOT",
+		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
+		nil,
+	},
+	{
+		"tobe;MSB;8",
+		"TOBEORNOTTOBEORTOBEORNOT",
+		"\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
+		nil,
+	},
+	{
+		"tobe-truncated;LSB;8",
+		"TOBEORNOTTOBEORTOBEORNOT",
+		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
+		io.ErrUnexpectedEOF,
+	},
+	// This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
+	{
+		"gif;LSB;8",
+		"\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+		"\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
+		nil,
+	},
+	// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
+	{
+		"pdf;MSB;8",
+		"-----A---B",
+		"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
+		nil,
+	},
+}
+
+func TestReader(t *testing.T) {
+	b := bytes.NewBuffer(nil)
+	for _, tt := range lzwTests {
+		d := strings.Split(tt.desc, ";", -1)
+		var order Order
+		switch d[1] {
+		case "LSB":
+			order = LSB
+		case "MSB":
+			order = MSB
+		default:
+			t.Errorf("%s: bad order %q", tt.desc, d[1])
+		}
+		litWidth, _ := strconv.Atoi(d[2])
+		rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
+		defer rc.Close()
+		b.Reset()
+		n, err := io.Copy(b, rc)
+		if err != nil {
+			if err != tt.err {
+				t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
+			}
+			continue
+		}
+		s := b.String()
+		if s != tt.raw {
+			t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
+		}
+	}
+}
+
+type devNull struct{}
+
+func (devNull) Write(p []byte) (int, os.Error) {
+	return len(p), nil
+}
+
+func BenchmarkDecoder(b *testing.B) {
+	b.StopTimer()
+	buf0, _ := ioutil.ReadFile("../testdata/e.txt")
+	compressed := bytes.NewBuffer(nil)
+	w := NewWriter(compressed, LSB, 8)
+	io.Copy(w, bytes.NewBuffer(buf0))
+	w.Close()
+	buf1 := compressed.Bytes()
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		io.Copy(devNull{}, NewReader(bytes.NewBuffer(buf1), LSB, 8))
+	}
+}
diff --git a/src/pkg/compress/lzw/writer.go b/src/pkg/compress/lzw/writer.go
new file mode 100644
index 000000000..87143b7aa
--- /dev/null
+++ b/src/pkg/compress/lzw/writer.go
@@ -0,0 +1,259 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+)
+
+// A writer is a buffered, flushable writer.
+type writer interface {
+	WriteByte(byte) os.Error
+	Flush() os.Error
+}
+
+// An errWriteCloser is an io.WriteCloser that always returns a given error.
+type errWriteCloser struct {
+	err os.Error
+}
+
+func (e *errWriteCloser) Write([]byte) (int, os.Error) {
+	return 0, e.err
+}
+
+func (e *errWriteCloser) Close() os.Error {
+	return e.err
+}
+
+const (
+	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
+	// type conversions when shifting bits.
+	maxCode     = 1<<12 - 1
+	invalidCode = 1<<32 - 1
+	// There are 1<<12 possible codes, which is an upper bound on the number of
+	// valid hash table entries at any given point in time. tableSize is 4x that.
+	tableSize = 4 * 1 << 12
+	tableMask = tableSize - 1
+	// A hash table entry is a uint32. Zero is an invalid entry since the
+	// lower 12 bits of a valid entry must be a non-literal code.
+	invalidEntry = 0
+)
+
+// encoder is LZW compressor.
+type encoder struct {
+	// w is the writer that compressed bytes are written to.
+	w writer
+	// write, bits, nBits and width are the state for converting a code stream
+	// into a byte stream.
+	write func(*encoder, uint32) os.Error
+	bits  uint32
+	nBits uint
+	width uint
+	// litWidth is the width in bits of literal codes.
+	litWidth uint
+	// hi is the code implied by the next code emission.
+	// overflow is the code at which hi overflows the code width.
+	hi, overflow uint32
+	// savedCode is the accumulated code at the end of the most recent Write
+	// call. It is equal to invalidCode if there was no such call.
+	savedCode uint32
+	// err is the first error encountered during writing. Closing the encoder
+	// will make any future Write calls return os.EINVAL.
+	err os.Error
+	// table is the hash table from 20-bit keys to 12-bit values. Each table
+	// entry contains key<<12|val and collisions resolve by linear probing.
+	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
+	// The values are a 12-bit code.
+	table [tableSize]uint32
+}
+
+// writeLSB writes the code c for "Least Significant Bits first" data.
+func (e *encoder) writeLSB(c uint32) os.Error {
+	e.bits |= c << e.nBits
+	e.nBits += e.width
+	for e.nBits >= 8 {
+		if err := e.w.WriteByte(uint8(e.bits)); err != nil {
+			return err
+		}
+		e.bits >>= 8
+		e.nBits -= 8
+	}
+	return nil
+}
+
+// writeMSB writes the code c for "Most Significant Bits first" data.
+func (e *encoder) writeMSB(c uint32) os.Error {
+	e.bits |= c << (32 - e.width - e.nBits)
+	e.nBits += e.width
+	for e.nBits >= 8 {
+		if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil {
+			return err
+		}
+		e.bits <<= 8
+		e.nBits -= 8
+	}
+	return nil
+}
+
+// errOutOfCodes is an internal error that means that the encoder has run out
+// of unused codes and a clear code needs to be sent next.
+var errOutOfCodes = os.NewError("lzw: out of codes")
+
+// incHi increments e.hi and checks for both overflow and running out of
+// unused codes. In the latter case, incHi sends a clear code, resets the
+// encoder state and returns errOutOfCodes.
+func (e *encoder) incHi() os.Error {
+	e.hi++
+	if e.hi == e.overflow {
+		e.width++
+		e.overflow <<= 1
+	}
+	if e.hi == maxCode {
+		clear := uint32(1) << e.litWidth
+		if err := e.write(e, clear); err != nil {
+			return err
+		}
+		e.width = uint(e.litWidth) + 1
+		e.hi = clear + 1
+		e.overflow = clear << 1
+		for i := range e.table {
+			e.table[i] = invalidEntry
+		}
+		return errOutOfCodes
+	}
+	return nil
+}
+
+// Write writes a compressed representation of p to e's underlying writer.
+func (e *encoder) Write(p []byte) (int, os.Error) {
+	if e.err != nil {
+		return 0, e.err
+	}
+	if len(p) == 0 {
+		return 0, nil
+	}
+	litMask := uint32(1<<e.litWidth - 1)
+	code := e.savedCode
+	if code == invalidCode {
+		// The first code sent is always a literal code.
+		code, p = uint32(p[0])&litMask, p[1:]
+	}
+loop:
+	for _, x := range p {
+		literal := uint32(x) & litMask
+		key := code<<8 | literal
+		// If there is a hash table hit for this key then we continue the loop
+		// and do not emit a code yet.
+		hash := (key>>12 ^ key) & tableMask
+		for h, t := hash, e.table[hash]; t != invalidEntry; {
+			if key == t>>12 {
+				code = t & maxCode
+				continue loop
+			}
+			h = (h + 1) & tableMask
+			t = e.table[h]
+		}
+		// Otherwise, write the current code, and literal becomes the start of
+		// the next emitted code.
+		if e.err = e.write(e, code); e.err != nil {
+			return 0, e.err
+		}
+		code = literal
+		// Increment e.hi, the next implied code. If we run out of codes, reset
+		// the encoder state (including clearing the hash table) and continue.
+		if err := e.incHi(); err != nil {
+			if err == errOutOfCodes {
+				continue
+			}
+			e.err = err
+			return 0, e.err
+		}
+		// Otherwise, insert key -> e.hi into the map that e.table represents.
+		for {
+			if e.table[hash] == invalidEntry {
+				e.table[hash] = (key << 12) | e.hi
+				break
+			}
+			hash = (hash + 1) & tableMask
+		}
+	}
+	e.savedCode = code
+	return len(p), nil
+}
+
+// Close closes the encoder, flushing any pending output. It does not close or
+// flush e's underlying writer.
+func (e *encoder) Close() os.Error {
+	if e.err != nil {
+		if e.err == os.EINVAL {
+			return nil
+		}
+		return e.err
+	}
+	// Make any future calls to Write return os.EINVAL.
+	e.err = os.EINVAL
+	// Write the savedCode if valid.
+	if e.savedCode != invalidCode {
+		if err := e.write(e, e.savedCode); err != nil {
+			return err
+		}
+		if err := e.incHi(); err != nil && err != errOutOfCodes {
+			return err
+		}
+	}
+	// Write the eof code.
+	eof := uint32(1)<<e.litWidth + 1
+	if err := e.write(e, eof); err != nil {
+		return err
+	}
+	// Write the final bits.
+	if e.nBits > 0 {
+		if e.write == (*encoder).writeMSB {
+			e.bits >>= 24
+		}
+		if err := e.w.WriteByte(uint8(e.bits)); err != nil {
+			return err
+		}
+	}
+	return e.w.Flush()
+}
+
+// NewWriter creates a new io.WriteCloser that satisfies writes by compressing
+// the data and writing it to w.
+// It is the caller's responsibility to call Close on the WriteCloser when
+// finished writing.
+// The number of bits to use for literal codes, litWidth, must be in the
+// range [2,8] and is typically 8.
+func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
+	var write func(*encoder, uint32) os.Error
+	switch order {
+	case LSB:
+		write = (*encoder).writeLSB
+	case MSB:
+		write = (*encoder).writeMSB
+	default:
+		return &errWriteCloser{os.NewError("lzw: unknown order")}
+	}
+	if litWidth < 2 || 8 < litWidth {
+		return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)}
+	}
+	bw, ok := w.(writer)
+	if !ok {
+		bw = bufio.NewWriter(w)
+	}
+	lw := uint(litWidth)
+	return &encoder{
+		w:         bw,
+		write:     write,
+		width:     1 + lw,
+		litWidth:  lw,
+		hi:        1<<lw + 1,
+		overflow:  1 << (lw + 1),
+		savedCode: invalidCode,
+	}
+}
diff --git a/src/pkg/compress/lzw/writer_test.go b/src/pkg/compress/lzw/writer_test.go
new file mode 100644
index 000000000..715b974aa
--- /dev/null
+++ b/src/pkg/compress/lzw/writer_test.go
@@ -0,0 +1,111 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+	"io"
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+var filenames = []string{
+	"../testdata/e.txt",
+	"../testdata/pi.txt",
+}
+
+// testFile tests that compressing and then decompressing the given file with
+// the given options yields equivalent bytes to the original file.
+func testFile(t *testing.T, fn string, order Order, litWidth int) {
+	// Read the file, as golden output.
+	golden, err := os.Open(fn, os.O_RDONLY, 0400)
+	if err != nil {
+		t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
+		return
+	}
+	defer golden.Close()
+
+	// Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
+	raw, err := os.Open(fn, os.O_RDONLY, 0400)
+	if err != nil {
+		t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
+		return
+	}
+
+	piper, pipew := io.Pipe()
+	defer piper.Close()
+	go func() {
+		defer raw.Close()
+		defer pipew.Close()
+		lzww := NewWriter(pipew, order, litWidth)
+		defer lzww.Close()
+		var b [4096]byte
+		for {
+			n, err0 := raw.Read(b[:])
+			if err0 != nil && err0 != os.EOF {
+				t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
+				return
+			}
+			_, err1 := lzww.Write(b[:n])
+			if err1 == os.EPIPE {
+				// Fail, but do not report the error, as some other (presumably reportable) error broke the pipe.
+				return
+			}
+			if err1 != nil {
+				t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
+				return
+			}
+			if err0 == os.EOF {
+				break
+			}
+		}
+	}()
+	lzwr := NewReader(piper, order, litWidth)
+	defer lzwr.Close()
+
+	// Compare the two.
+	b0, err0 := ioutil.ReadAll(golden)
+	b1, err1 := ioutil.ReadAll(lzwr)
+	if err0 != nil {
+		t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
+		return
+	}
+	if err1 != nil {
+		t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
+		return
+	}
+	if len(b0) != len(b1) {
+		t.Errorf("%s (order=%d litWidth=%d): length mismatch %d versus %d", fn, order, litWidth, len(b0), len(b1))
+		return
+	}
+	for i := 0; i < len(b0); i++ {
+		if b0[i] != b1[i] {
+			t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x versus 0x%02x\n", fn, order, litWidth, i, b0[i], b1[i])
+			return
+		}
+	}
+}
+
+func TestWriter(t *testing.T) {
+	for _, filename := range filenames {
+		for _, order := range [...]Order{LSB, MSB} {
+			// The test data "2.71828 etcetera" is ASCII text requiring at least 6 bits.
+			for _, litWidth := range [...]int{6, 7, 8} {
+				testFile(t, filename, order, litWidth)
+			}
+		}
+	}
+}
+
+func BenchmarkEncoder(b *testing.B) {
+	b.StopTimer()
+	buf, _ := ioutil.ReadFile("../testdata/e.txt")
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		w := NewWriter(devNull{}, LSB, 8)
+		w.Write(buf)
+		w.Close()
+	}
+}
diff --git a/src/pkg/compress/zlib/testdata/e.txt b/src/pkg/compress/testdata/e.txt
index 76cf2a7b6..76cf2a7b6 100644
--- a/src/pkg/compress/zlib/testdata/e.txt
+++ b/src/pkg/compress/testdata/e.txt
diff --git a/src/pkg/compress/zlib/testdata/pi.txt b/src/pkg/compress/testdata/pi.txt
index 58d8f3b6d..58d8f3b6d 100644
--- a/src/pkg/compress/zlib/testdata/pi.txt
+++ b/src/pkg/compress/testdata/pi.txt
diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go
index fa9e78e8e..5a13ba825 100644
--- a/src/pkg/compress/zlib/writer_test.go
+++ b/src/pkg/compress/zlib/writer_test.go
@@ -12,8 +12,8 @@ import (
 )
 
 var filenames = []string{
-	"testdata/e.txt",
-	"testdata/pi.txt",
+	"../testdata/e.txt",
+	"../testdata/pi.txt",
 }
 
 // Tests that compressing and then decompressing the given file at the given compression level