summaryrefslogtreecommitdiff
path: root/src/pkg/compress
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
committerOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
commit5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch)
treec0650497e988f47be9c6f2324fa692a52dea82e1 /src/pkg/compress
parent80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff)
downloadgolang-upstream/60.tar.gz
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/pkg/compress')
-rw-r--r--src/pkg/compress/bzip2/Makefile14
-rw-r--r--src/pkg/compress/bzip2/bit_reader.go88
-rw-r--r--src/pkg/compress/bzip2/bzip2.go390
-rw-r--r--src/pkg/compress/bzip2/bzip2_test.go158
-rw-r--r--src/pkg/compress/bzip2/huffman.go223
-rw-r--r--src/pkg/compress/bzip2/move_to_front.go105
-rw-r--r--src/pkg/compress/flate/Makefile17
-rw-r--r--src/pkg/compress/flate/deflate.go493
-rw-r--r--src/pkg/compress/flate/deflate_test.go321
-rw-r--r--src/pkg/compress/flate/flate_test.go139
-rw-r--r--src/pkg/compress/flate/huffman_bit_writer.go494
-rw-r--r--src/pkg/compress/flate/huffman_code.go378
-rw-r--r--src/pkg/compress/flate/inflate.go708
-rw-r--r--src/pkg/compress/flate/reverse_bits.go48
-rw-r--r--src/pkg/compress/flate/token.go103
-rw-r--r--src/pkg/compress/flate/util.go72
-rw-r--r--src/pkg/compress/gzip/Makefile12
-rw-r--r--src/pkg/compress/gzip/gunzip.go230
-rw-r--r--src/pkg/compress/gzip/gunzip_test.go305
-rw-r--r--src/pkg/compress/gzip/gzip.go187
-rw-r--r--src/pkg/compress/gzip/gzip_test.go84
-rw-r--r--src/pkg/compress/lzw/Makefile12
-rw-r--r--src/pkg/compress/lzw/reader.go253
-rw-r--r--src/pkg/compress/lzw/reader_test.go145
-rw-r--r--src/pkg/compress/lzw/writer.go259
-rw-r--r--src/pkg/compress/lzw/writer_test.go132
-rw-r--r--src/pkg/compress/testdata/e.txt1
-rw-r--r--src/pkg/compress/testdata/pi.txt1
-rw-r--r--src/pkg/compress/zlib/Makefile12
-rw-r--r--src/pkg/compress/zlib/reader.go126
-rw-r--r--src/pkg/compress/zlib/reader_test.go128
-rw-r--r--src/pkg/compress/zlib/writer.go139
-rw-r--r--src/pkg/compress/zlib/writer_test.go144
33 files changed, 5921 insertions, 0 deletions
diff --git a/src/pkg/compress/bzip2/Makefile b/src/pkg/compress/bzip2/Makefile
new file mode 100644
index 000000000..a4bceef16
--- /dev/null
+++ b/src/pkg/compress/bzip2/Makefile
@@ -0,0 +1,14 @@
+# Copyright 2011 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/bzip2
+GOFILES=\
+ bit_reader.go\
+ bzip2.go\
+ huffman.go\
+ move_to_front.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go
new file mode 100644
index 000000000..50f0ec836
--- /dev/null
+++ b/src/pkg/compress/bzip2/bit_reader.go
@@ -0,0 +1,88 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+ "bufio"
+ "io"
+ "os"
+)
+
+// bitReader wraps an io.Reader and provides the ability to read values,
+// bit-by-bit, from it. Its Read* methods don't return the usual os.Error
+// because the error handling was verbose. Instead, any error is kept and can
+// be checked afterwards.
+type bitReader struct {
+ r byteReader
+ n uint64
+ bits uint
+ err os.Error
+}
+
+// bitReader needs to read bytes from an io.Reader. We attempt to cast the
+// given io.Reader to this interface and, if it doesn't already fit, we wrap in
+// a bufio.Reader.
+type byteReader interface {
+ ReadByte() (byte, os.Error)
+}
+
+func newBitReader(r io.Reader) bitReader {
+ byter, ok := r.(byteReader)
+ if !ok {
+ byter = bufio.NewReader(r)
+ }
+ return bitReader{r: byter}
+}
+
+// ReadBits64 reads the given number of bits and returns them in the
+// least-significant part of a uint64. In the event of an error, it returns 0
+// and the error can be obtained by calling Error().
+func (br *bitReader) ReadBits64(bits uint) (n uint64) {
+ for bits > br.bits {
+ b, err := br.r.ReadByte()
+ if err == os.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ if err != nil {
+ br.err = err
+ return 0
+ }
+ br.n <<= 8
+ br.n |= uint64(b)
+ br.bits += 8
+ }
+
+ // br.n looks like this (assuming that br.bits = 14 and bits = 6):
+ // Bit: 111111
+ // 5432109876543210
+ //
+ // (6 bits, the desired output)
+ // |-----|
+ // V V
+ // 0101101101001110
+ // ^ ^
+ // |------------|
+ // br.bits (num valid bits)
+ //
+ // This the next line right shifts the desired bits into the
+ // least-significant places and masks off anything above.
+ n = (br.n >> (br.bits - bits)) & ((1 << bits) - 1)
+ br.bits -= bits
+ return
+}
+
+func (br *bitReader) ReadBits(bits uint) (n int) {
+ n64 := br.ReadBits64(bits)
+ return int(n64)
+}
+
+func (br *bitReader) ReadBit() bool {
+ n := br.ReadBits(1)
+ return n != 0
+}
+
+func (br *bitReader) Error() os.Error {
+ return br.err
+}
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go
new file mode 100644
index 000000000..8b4572306
--- /dev/null
+++ b/src/pkg/compress/bzip2/bzip2.go
@@ -0,0 +1,390 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bzip2 implements bzip2 decompression.
+package bzip2
+
+import (
+ "io"
+ "os"
+)
+
+// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot
+// of guessing: http://en.wikipedia.org/wiki/Bzip2
+// The source code to pyflate was useful for debugging:
+// http://www.paul.sladen.org/projects/pyflate
+
+// A StructuralError is returned when the bzip2 data is found to be
+// syntactically invalid.
+type StructuralError string
+
+func (s StructuralError) String() string {
+ return "bzip2 data invalid: " + string(s)
+}
+
+// A reader decompresses bzip2 compressed data.
+type reader struct {
+ br bitReader
+ setupDone bool // true if we have parsed the bzip2 header.
+ blockSize int // blockSize in bytes, i.e. 900 * 1024.
+ eof bool
+ buf []byte // stores Burrows-Wheeler transformed data.
+ c [256]uint // the `C' array for the inverse BWT.
+ tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
+ tPos uint32 // Index of the next output byte in tt.
+
+ preRLE []uint32 // contains the RLE data still to be processed.
+ preRLEUsed int // number of entries of preRLE used.
+ lastByte int // the last byte value seen.
+ byteRepeats uint // the number of repeats of lastByte seen.
+ repeats uint // the number of copies of lastByte to output.
+}
+
+// NewReader returns an io.Reader which decompresses bzip2 data from r.
+func NewReader(r io.Reader) io.Reader {
+ bz2 := new(reader)
+ bz2.br = newBitReader(r)
+ return bz2
+}
+
+const bzip2FileMagic = 0x425a // "BZ"
+const bzip2BlockMagic = 0x314159265359
+const bzip2FinalMagic = 0x177245385090
+
+// setup parses the bzip2 header.
+func (bz2 *reader) setup() os.Error {
+ br := &bz2.br
+
+ magic := br.ReadBits(16)
+ if magic != bzip2FileMagic {
+ return StructuralError("bad magic value")
+ }
+
+ t := br.ReadBits(8)
+ if t != 'h' {
+ return StructuralError("non-Huffman entropy encoding")
+ }
+
+ level := br.ReadBits(8)
+ if level < '1' || level > '9' {
+ return StructuralError("invalid compression level")
+ }
+
+ bz2.blockSize = 100 * 1024 * (int(level) - '0')
+ bz2.tt = make([]uint32, bz2.blockSize)
+ return nil
+}
+
+func (bz2 *reader) Read(buf []byte) (n int, err os.Error) {
+ if bz2.eof {
+ return 0, os.EOF
+ }
+
+ if !bz2.setupDone {
+ err = bz2.setup()
+ brErr := bz2.br.Error()
+ if brErr != nil {
+ err = brErr
+ }
+ if err != nil {
+ return 0, err
+ }
+ bz2.setupDone = true
+ }
+
+ n, err = bz2.read(buf)
+ brErr := bz2.br.Error()
+ if brErr != nil {
+ err = brErr
+ }
+ return
+}
+
+func (bz2 *reader) read(buf []byte) (n int, err os.Error) {
+ // bzip2 is a block based compressor, except that it has a run-length
+ // preprocessing step. The block based nature means that we can
+ // preallocate fixed-size buffers and reuse them. However, the RLE
+ // preprocessing would require allocating huge buffers to store the
+ // maximum expansion. Thus we process blocks all at once, except for
+ // the RLE which we decompress as required.
+
+ for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
+ // We have RLE data pending.
+
+ // The run-length encoding works like this:
+ // Any sequence of four equal bytes is followed by a length
+ // byte which contains the number of repeats of that byte to
+ // include. (The number of repeats can be zero.) Because we are
+ // decompressing on-demand our state is kept in the reader
+ // object.
+
+ if bz2.repeats > 0 {
+ buf[n] = byte(bz2.lastByte)
+ n++
+ bz2.repeats--
+ if bz2.repeats == 0 {
+ bz2.lastByte = -1
+ }
+ continue
+ }
+
+ bz2.tPos = bz2.preRLE[bz2.tPos]
+ b := byte(bz2.tPos)
+ bz2.tPos >>= 8
+ bz2.preRLEUsed++
+
+ if bz2.byteRepeats == 3 {
+ bz2.repeats = uint(b)
+ bz2.byteRepeats = 0
+ continue
+ }
+
+ if bz2.lastByte == int(b) {
+ bz2.byteRepeats++
+ } else {
+ bz2.byteRepeats = 0
+ }
+ bz2.lastByte = int(b)
+
+ buf[n] = b
+ n++
+ }
+
+ if n > 0 {
+ return
+ }
+
+ // No RLE data is pending so we need to read a block.
+
+ br := &bz2.br
+ magic := br.ReadBits64(48)
+ if magic == bzip2FinalMagic {
+ br.ReadBits64(32) // ignored CRC
+ bz2.eof = true
+ return 0, os.EOF
+ } else if magic != bzip2BlockMagic {
+ return 0, StructuralError("bad magic value found")
+ }
+
+ err = bz2.readBlock()
+ if err != nil {
+ return 0, err
+ }
+
+ return bz2.read(buf)
+}
+
+// readBlock reads a bzip2 block. The magic number should already have been consumed.
+func (bz2 *reader) readBlock() (err os.Error) {
+ br := &bz2.br
+ br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is.
+ randomized := br.ReadBits(1)
+ if randomized != 0 {
+ return StructuralError("deprecated randomized files")
+ }
+ origPtr := uint(br.ReadBits(24))
+
+ // If not every byte value is used in the block (i.e., it's text) then
+ // the symbol set is reduced. The symbols used are stored as a
+ // two-level, 16x16 bitmap.
+ symbolRangeUsedBitmap := br.ReadBits(16)
+ symbolPresent := make([]bool, 256)
+ numSymbols := 0
+ for symRange := uint(0); symRange < 16; symRange++ {
+ if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {
+ bits := br.ReadBits(16)
+ for symbol := uint(0); symbol < 16; symbol++ {
+ if bits&(1<<(15-symbol)) != 0 {
+ symbolPresent[16*symRange+symbol] = true
+ numSymbols++
+ }
+ }
+ }
+ }
+
+ // A block uses between two and six different Huffman trees.
+ numHuffmanTrees := br.ReadBits(3)
+ if numHuffmanTrees < 2 || numHuffmanTrees > 6 {
+ return StructuralError("invalid number of Huffman trees")
+ }
+
+ // The Huffman tree can switch every 50 symbols so there's a list of
+ // tree indexes telling us which tree to use for each 50 symbol block.
+ numSelectors := br.ReadBits(15)
+ treeIndexes := make([]uint8, numSelectors)
+
+ // The tree indexes are move-to-front transformed and stored as unary
+ // numbers.
+ mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)
+ for i := range treeIndexes {
+ c := 0
+ for {
+ inc := br.ReadBits(1)
+ if inc == 0 {
+ break
+ }
+ c++
+ }
+ if c >= numHuffmanTrees {
+ return StructuralError("tree index too large")
+ }
+ treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c))
+ }
+
+ // The list of symbols for the move-to-front transform is taken from
+ // the previously decoded symbol bitmap.
+ symbols := make([]byte, numSymbols)
+ nextSymbol := 0
+ for i := 0; i < 256; i++ {
+ if symbolPresent[i] {
+ symbols[nextSymbol] = byte(i)
+ nextSymbol++
+ }
+ }
+ mtf := newMTFDecoder(symbols)
+
+ numSymbols += 2 // to account for RUNA and RUNB symbols
+ huffmanTrees := make([]huffmanTree, numHuffmanTrees)
+
+ // Now we decode the arrays of code-lengths for each tree.
+ lengths := make([]uint8, numSymbols)
+ for i := 0; i < numHuffmanTrees; i++ {
+ // The code lengths are delta encoded from a 5-bit base value.
+ length := br.ReadBits(5)
+ for j := 0; j < numSymbols; j++ {
+ for {
+ if !br.ReadBit() {
+ break
+ }
+ if br.ReadBit() {
+ length--
+ } else {
+ length++
+ }
+ }
+ if length < 0 || length > 20 {
+ return StructuralError("Huffman length out of range")
+ }
+ lengths[j] = uint8(length)
+ }
+ huffmanTrees[i], err = newHuffmanTree(lengths)
+ if err != nil {
+ return err
+ }
+ }
+
+ selectorIndex := 1 // the next tree index to use
+ currentHuffmanTree := huffmanTrees[treeIndexes[0]]
+ bufIndex := 0 // indexes bz2.buf, the output buffer.
+ // The output of the move-to-front transform is run-length encoded and
+ // we merge the decoding into the Huffman parsing loop. These two
+ // variables accumulate the repeat count. See the Wikipedia page for
+ // details.
+ repeat := 0
+ repeat_power := 0
+
+ // The `C' array (used by the inverse BWT) needs to be zero initialized.
+ for i := range bz2.c {
+ bz2.c[i] = 0
+ }
+
+ decoded := 0 // counts the number of symbols decoded by the current tree.
+ for {
+ if decoded == 50 {
+ currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]
+ selectorIndex++
+ decoded = 0
+ }
+
+ v := currentHuffmanTree.Decode(br)
+ decoded++
+
+ if v < 2 {
+ // This is either the RUNA or RUNB symbol.
+ if repeat == 0 {
+ repeat_power = 1
+ }
+ repeat += repeat_power << v
+ repeat_power <<= 1
+
+ // This limit of 2 million comes from the bzip2 source
+ // code. It prevents repeat from overflowing.
+ if repeat > 2*1024*1024 {
+ return StructuralError("repeat count too large")
+ }
+ continue
+ }
+
+ if repeat > 0 {
+ // We have decoded a complete run-length so we need to
+ // replicate the last output symbol.
+ for i := 0; i < repeat; i++ {
+ b := byte(mtf.First())
+ bz2.tt[bufIndex] = uint32(b)
+ bz2.c[b]++
+ bufIndex++
+ }
+ repeat = 0
+ }
+
+ if int(v) == numSymbols-1 {
+ // This is the EOF symbol. Because it's always at the
+ // end of the move-to-front list, and never gets moved
+ // to the front, it has this unique value.
+ break
+ }
+
+ // Since two metasymbols (RUNA and RUNB) have values 0 and 1,
+ // one would expect |v-2| to be passed to the MTF decoder.
+ // However, the front of the MTF list is never referenced as 0,
+ // it's always referenced with a run-length of 1. Thus 0
+ // doesn't need to be encoded and we have |v-1| in the next
+ // line.
+ b := byte(mtf.Decode(int(v - 1)))
+ bz2.tt[bufIndex] = uint32(b)
+ bz2.c[b]++
+ bufIndex++
+ }
+
+ if origPtr >= uint(bufIndex) {
+ return StructuralError("origPtr out of bounds")
+ }
+
+ // We have completed the entropy decoding. Now we can perform the
+ // inverse BWT and setup the RLE buffer.
+ bz2.preRLE = bz2.tt[:bufIndex]
+ bz2.preRLEUsed = 0
+ bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
+ bz2.lastByte = -1
+ bz2.byteRepeats = 0
+ bz2.repeats = 0
+
+ return nil
+}
+
+// inverseBWT implements the inverse Burrows-Wheeler transform as described in
+// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
+// In that document, origPtr is called `I' and c is the `C' array after the
+// first pass over the data. It's an argument here because we merge the first
+// pass with the Huffman decoding.
+//
+// This also implements the `single array' method from the bzip2 source code
+// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
+// index of the next byte in the top 24-bits. The index of the first byte is
+// returned.
+func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
+ sum := uint(0)
+ for i := 0; i < 256; i++ {
+ sum += c[i]
+ c[i] = sum - c[i]
+ }
+
+ for i := range tt {
+ b := tt[i] & 0xff
+ tt[c[b]] |= uint32(i) << 8
+ c[b]++
+ }
+
+ return tt[origPtr] >> 8
+}
diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go
new file mode 100644
index 000000000..156eea83f
--- /dev/null
+++ b/src/pkg/compress/bzip2/bzip2_test.go
@@ -0,0 +1,158 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+ "bytes"
+ "encoding/hex"
+ "io"
+ "io/ioutil"
+ "os"
+ "testing"
+)
+
+func TestBitReader(t *testing.T) {
+ buf := bytes.NewBuffer([]byte{0xaa})
+ br := newBitReader(buf)
+ if n := br.ReadBits(1); n != 1 {
+ t.Errorf("read 1 wrong")
+ }
+ if n := br.ReadBits(1); n != 0 {
+ t.Errorf("read 2 wrong")
+ }
+ if n := br.ReadBits(1); n != 1 {
+ t.Errorf("read 3 wrong")
+ }
+ if n := br.ReadBits(1); n != 0 {
+ t.Errorf("read 4 wrong")
+ }
+}
+
+func TestBitReaderLarge(t *testing.T) {
+ buf := bytes.NewBuffer([]byte{0x12, 0x34, 0x56, 0x78})
+ br := newBitReader(buf)
+ if n := br.ReadBits(32); n != 0x12345678 {
+ t.Errorf("got: %x want: %x", n, 0x12345678)
+ }
+}
+
+func readerFromHex(s string) io.Reader {
+ data, err := hex.DecodeString(s)
+ if err != nil {
+ panic("readerFromHex: bad input")
+ }
+ return bytes.NewBuffer(data)
+}
+
+func decompressHex(s string) (out []byte, err os.Error) {
+ r := NewReader(readerFromHex(s))
+ return ioutil.ReadAll(r)
+}
+
+func TestHelloWorldBZ2(t *testing.T) {
+ out, err := decompressHex(helloWorldBZ2Hex)
+ if err != nil {
+ t.Errorf("error from Read: %s", err)
+ return
+ }
+
+ if !bytes.Equal(helloWorld, out) {
+ t.Errorf("got %x, want %x", out, helloWorld)
+ }
+}
+
+func testZeros(t *testing.T, inHex string, n int) {
+ out, err := decompressHex(inHex)
+ if err != nil {
+ t.Errorf("error from Read: %s", err)
+ return
+ }
+
+ expected := make([]byte, n)
+
+ if !bytes.Equal(expected, out) {
+ allZeros := true
+ for _, b := range out {
+ if b != 0 {
+ allZeros = false
+ break
+ }
+ }
+ t.Errorf("incorrect result, got %d bytes (allZeros: %t)", len(out), allZeros)
+ }
+}
+
+func Test32Zeros(t *testing.T) {
+ testZeros(t, thirtyTwoZerosBZ2Hex, 32)
+}
+
+func Test1MBZeros(t *testing.T) {
+ testZeros(t, oneMBZerosBZ2Hex, 1024*1024)
+}
+
+func testRandomData(t *testing.T, compressedHex, uncompressedHex string) {
+ out, err := decompressHex(compressedHex)
+ if err != nil {
+ t.Errorf("error from Read: %s", err)
+ return
+ }
+
+ expected, _ := hex.DecodeString(uncompressedHex)
+
+ if !bytes.Equal(out, expected) {
+ t.Errorf("incorrect result\ngot: %x\nwant: %x", out, expected)
+ }
+}
+
+func TestRandomData1(t *testing.T) {
+ testRandomData(t, randBZ2Hex, randHex)
+}
+
+func TestRandomData2(t *testing.T) {
+ // This test involves several repeated bytes in the output, but they
+ // should trigger RLE decoding.
+ testRandomData(t, rand2BZ2Hex, rand2Hex)
+}
+
+func TestRandomData3(t *testing.T) {
+ // This test uses the full range of symbols.
+ testRandomData(t, rand3BZ2Hex, rand3Hex)
+}
+
+func Test1MBSawtooth(t *testing.T) {
+ out, err := decompressHex(oneMBSawtoothBZ2Hex)
+ if err != nil {
+ t.Errorf("error from Read: %s", err)
+ return
+ }
+
+ expected := make([]byte, 1024*1024)
+
+ for i := range expected {
+ expected[i] = byte(i)
+ }
+
+ if !bytes.Equal(out, expected) {
+ t.Error("incorrect result")
+ }
+}
+
+const helloWorldBZ2Hex = "425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0"
+
+var helloWorld = []byte("hello world\n")
+
+const thirtyTwoZerosBZ2Hex = "425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098"
+const oneMBZerosBZ2Hex = "425a683931415926535938571ce50008084000c0040008200030cc0529a60806c4201e2ee48a70a12070ae39ca"
+
+const randBZ2Hex = "425a6839314159265359905d990d0001957fffffffffffafffffffffffffffffbfff6fffdfffffffffffffffffffffffffffffc002b6dd75676ed5b77720098320d11a64626981323d4da47a83131a13d09e8040f534cd4f4d27a464d193008cd09804601347a980026350c9886234d36864193d1351b44c136919e90340d26127a4cd264c32023009898981310c0344c340027a8303427a99a04c00003534c230d034f5006468d268cf54d36a3009a69a62626261311b40026013d34201a6934c9a604c98ca6c8460989fa9346234d30d3469a2604fd4131a7aa6d0046043d4c62098479269e89e835190d018d4c046001a11e801a0264792321932308c43a130688c260d46686804cd01a9e80981193684c6a68c00000004c4c20c04627a4c0000260003400d04c0681a01334026009a6f48041466132581ec5212b081d96b0effc16543e2228b052fcd30f2567ee8d970e0f10aabca68dd8270591c376cfc1baae0dba00aaff2d6caf6b211322c997cc18eaee5927f75185336bf907021324c71626c1dd20e22b9b0977f05d0f901eaa51db9fbaf7c603b4c87bc82890e6dd7e61d0079e27ec050dd788fd958152061cd01e222f9547cb9efc465d775b6fc98bac7d387bffd151ae09dadf19494f7a638e2eae58e550faba5fe6820ea520eb986096de4e527d80def3ba625e71fbefdcf7e7844e0a25d29b52dcd1344fca083737d42692aab38d230485f3c8ed54c2ed31f15cf0270c8143765b10b92157233fa1dfe0d7ce8ffe70b8b8f7250071701dfe9f1c94de362c9031455951c93eb098a6b50ee45c6131fefc3b6f9643e21f4adc59497138e246f5c57d834aa67c4f10d8bd8b3908d8130dd7388409c299a268eab3664fa4907c5c31574874bd8d388a4ab22b339660804e53e1b8d05867d40e3082560608d35d5d2c6054e8bab23da28f61f83efd41d25529ad6ea15fb50505cacfabb0902166427354ca3830a2c8415f21b19e592690fbe447020d685a4bcd16ecc4ff1a1c0e572627d0ef6265c008a43fc243240541061ed7840606be466d1c0dac2c53250ed567507d926c844154560d631960c65e15157829b2c7f16859f111a3a8cb72bf24ffa57a680c3be67b1be67c8dd8aea73ac2437a78df5b686d427080ebc01bd30b71a49f6ea31dc0f08e4849e38face96717690239538bc08b6cc5aa8d467cb9c36aa83d40ac7e58bddbfa185b22065e89a86c0145569d9e23726651aec49e31588d70f40fe9a4449dcf4f89eac220171e9c938e803dc195679651004b79ad33cc0c13aeeba5941b33ffeeb8fbe16e76c7811445c67b4269c90479433ddf9e8ed1d00c166b6c17217fb22c3ef1b0c1c7e28e185446a111c37f1ea6c07a59fbcc6546ecc6968d36ba58bc5489a5640647e426b0c39350cb6f07d5dc7a717648c4ec7f841467597ae1f65f408fd2d9940a4b1b860b3c9ae351dcae0b4425f7e8538710f2e40b7f70d13b51ac05ccc6ecda8264a88cad2d721d18132a9b9110a9e759c2483c77dcefc7e464ec88588174cb0c9abff93230ea0bed8decdd8ed8bfe2b5df0a253803678df04fab44c03b9ab7cc97d6e6d6fd0c4c840ce0efc498436f453bbb181603459471f2b588724592b222ec990614db530e10cadd84705621cfdd9261fa44a5f5806a2d74b575056b3c915255c65678f9c16e6dc00a99180fef1a840aff0e842ac02731080cc92782538360a60a727991013984da4fad95f79d5030677b7528d076b2483685fca4429edf804682fdc110dfc2f7c30e23e20a72e039108a0ad6fdee2f76985a4b4be4f5afc6101bf9d5042b657a05dc914e1424241766434"
+const randHex = "c95138082bdf2b9bfa5b1072b23f729735d42c785eeb94320fb14c265b9c2ca421d01a3db986df1ac2acde5a0e6bf955d6f95e61261540905928e195f1a66644cc7f37281744fff4dc6df35566a494c41a8167151950eb74f5fc45f85ad0e5ed28b49adfe218aa7ec1707e8e1d55825f61f72beda3b4c006b8c9188d7336a5d875329b1b58c27cc4e89ecbae02c7712400c39dd131d2c6de82e2863da51d472bdfb21ecce62cc9cf769ed28aedc7583d755da45a0d90874bda269dd53283a9bdfd05f95fc8e9a304bb338ea1a2111894678c18134f17d31a15d9bfc1237894650f3e715e2548639ecbddb845cfe4a46a7b3a3c540f48629488e8c869f1e9f3f4c552243a8105b20eb8e264994214349dae83b165fd6c2a5b8e83fce09fc0a80d3281c8d53a9a08095bd19cbc1388df23975646ed259e003d39261ee68cbece8bcf32971f7fe7e588e8ba8f5e8597909abaea693836a79a1964050ed910a45a0f13a58cd2d3ae18992c5b23082407fd920d0bf01e33118a017bb5e39f44931346845af52128f7965206759433a346034ea481671f501280067567619f5ecef6cded077f92ed7f3b3ce8e308c80f34ba06939e9303f91b4318c8c1dd4cc223c1f057ac0c91211c629cd30e46ee9ec1d9fd493086b7bc2bc83e33f08749a5d430b0ed4f79d70f481940c9b0930b16321886a0df4fa5a1465d5208c7d3494a7987d9a5e42aa256f0c9523947f8318d0ef0af3d59a45cfc2418d0785c9a548b32b81e7de18be7d55a69a4c156bbb3d7579c0ac8e9c72b24646e54b0d0e8725f8f49fb44ae3c6b9d0287be118586255a90a4a83483ed0328518037e52aa959c5748ed83e13023e532306be98b8288da306bbb040bcf5d92176f84a9306dc6b274b040370b61d71fde58dd6d20e6fee348eae0c54bd0a5a487b2d005f329794f2a902c296af0a4c1f638f63292a1fa18e006c1b1838636f4de71c73635b25660d32e88a0917e1a5677f6a02ca65585b82cbd99fb4badbfa97a585da1e6cadf6737b4ec6ca33f245d66ee6a9fae6785d69b003c17b9fc6ec34fe5824ab8caae5e8e14dc6f9e116e7bf4a60c04388783c8ae929e1b46b3ef3bbe81b38f2fa6da771bf39dfba2374d3d2ed356b8e2c42081d885a91a3afb2f31986d2f9873354c48cf5448492c32e62385af423aa4f83db6d1b2669650379a1134b0a04cbca0862d6f9743c791cbb527d36cd5d1f0fc7f503831c8bd1b7a0ef8ae1a5ed1155dfdd9e32b6bb33138112d3d476b802179cb85a2a6c354ccfed2f31604fbd8d6ec4baf9f1c8454f72c6588c06a7df3178c43a6970bfa02dd6f74cb5ec3b63f9eddaa17db5cbf27fac6de8e57c384afd0954179f7b5690c3bee42abc4fa79b4b12101a9cf5f0b9aecdda945def0bd04163237247d3539850e123fe18139f316fa0256d5bd2faa8"
+
+const oneMBSawtoothBZ2Hex = "425a683931415926535971931ea00006ddffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe007de00000000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000009aaaaa0000000000000000000000000000000000000000000000000000000000000000498002600026000000000000000000000000000000000000000000000000000000007fc42271980d044c0a822607411304a08982d044c1a82260f411308a08984d044c2a82261741130ca08986d044c3a82261f411310a08988d044c4a822627411314a0898ad044c5a82262f411318a0898cd044c6a82263741131ca0898ed044c7a82263f411320a08990d044c8a822647411324a08992d044c9a82264f411328a08994d044caa82265741132ca08996d044cba82265f411330a08998d044cca822667411334a0899ad044cda82266f411338a0899cd044cea82267741133ca0899ed044cfa82267f411340a089a0d044d0a822687411344a089a2d044d1a82268f411348a089a4d044d2a82269741134ca089a6d044d3a82269f411350a089a8d044d4a8226a7411354a089aad044d5a8226af411358a089acd044d6a8226b741135ca089aed044d7a8226bf411360a089b0d044d8a8226c7411364a089b2d044d9a8226cf411368a089b4d044daa8226d741136ca089b6d044dba8226df411370a089b8d044dca8226e7411374a089bad044dda8226ef411378a089bcd044dea8226f741137ca089bed044dfa8226ff411380a089c0d044e0a822707411384a089c2d044e1a82270f411388a089c4d044e2a82271741138ca089c59089c69089c71089c79089c81089c89089c91089c99089ca1089ca9089cb1089cb9089cc1089cc9089cd1089cd9089ce1089ce9089cf1089cf9089d01089d09089d11089d19089d21089d29089d31089d39089d41089d49089d51089d59089d61089d69089d71089d79089d81089d89089d91089d99089da1089da9089db1089db9089dc1089dc9089dd1089dd9089de1089de9089df1089df9089e01089e09089e11089e19089e21089e29089e31089e39089e41089e49089e51089e59089e61089e69089e71089e79089e81089e89089e91089e99089ea1089ea9089eb1089eb9089ec1089ec9089ed1089ed9089ee1089ee9089ef1089ef9089f01089f09089f11089f19089f21089f29089f31089f39089f41089f49089f51089f59089f61089f69089f71089f79089f81089f89089f91089f99089fa1089fa9089fb1089fb9089fc1089fc9089fd1089fd9089fe1089fe9089ff1089ff98a0ac9329acf23ba884804fdd3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0034f800000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000024c0013000130000000000000000000000000000000000000000000000000000000002955540000000000000000000000000000000000000000000000000000000000000001ff108c00846024230221181908c108460a4230621183908c20846124230a21185908c308461a4230e21187908c40846224231221189908c508462a423162118b908c60846324231a2118d908c708463a4231e2118f908c80846424232221191908c908464a4232621193908ca0846524232a21195908cb08465a4232e21197908cc0846624233221199908cd08466a423362119b908ce0846724233a2119d908cf08467a4233e2119f908d008468242342211a1908d108468a42346211a3908d20846924234a211a5908d308469a4234e211a7908d40846a242352211a9908d50846aa42356211ab908d60846b24235a211ad908d70846ba4235e211af908d80846c242362211b1908d90846ca42366211b3908da0846d24236a211b5908db0846da4236e211b7908dc0846e242372211b9908dd0846ea42376211bb908de0846f24237a211bd908df0846fa4237e211bf908e008470242382211c1908e108470a42386211c3908e20847124238a211c5908e2f8c211c6c8471d211c7c84721211c8c84725211c9c84729211cac8472d211cbc84731211ccc84735211cdc84739211cec8473d211cfc84741211d0c84745211d1c84749211d2c8474d211d3c84751211d4c84755211d5c84759211d6c8475d211d7c84761211d8c84765211d9c84769211dac8476d211dbc84771211dcc84775211ddc84779211dec8477d211dfc84781211e0c84785211e1c84789211e2c8478d211e3c84791211e4c84795211e5c84799211e6c8479d211e7c847a1211e8c847a5211e9c847a9211eac847ad211ebc847b1211ecc847b5211edc847b9211eec847bd211efc847c1211f0c847c5211f1c847c9211f2c847cd211f3c847d1211f4c847d5211f5c847d9211f6c847dd211f7c847e1211f8c847e5211f9c847e9211fac847ed211fbc847f1211fcc847f5211fdc847f9211fec847fd211ff8bb9229c284803a8b6248"
+
+const rand2BZ2Hex = "425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e042801099210094806c0110002e70806402000546034000034000000f2830000032000d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d73a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60"
+const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8719e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335ba"
+
+const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400"
+const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d"
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go
new file mode 100644
index 000000000..dc05739c7
--- /dev/null
+++ b/src/pkg/compress/bzip2/huffman.go
@@ -0,0 +1,223 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+import (
+ "os"
+ "sort"
+)
+
+// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
+// symbol.
+type huffmanTree struct {
+ // nodes contains all the non-leaf nodes in the tree. nodes[0] is the
+ // root of the tree and nextNode contains the index of the next element
+ // of nodes to use when the tree is being constructed.
+ nodes []huffmanNode
+ nextNode int
+}
+
+// A huffmanNode is a node in the tree. left and right contain indexes into the
+// nodes slice of the tree. If left or right is invalidNodeValue then the child
+// is a left node and its value is in leftValue/rightValue.
+//
+// The symbols are uint16s because bzip2 encodes not only MTF indexes in the
+// tree, but also two magic values for run-length encoding and an EOF symbol.
+// Thus there are more than 256 possible symbols.
+type huffmanNode struct {
+ left, right uint16
+ leftValue, rightValue uint16
+}
+
+// invalidNodeValue is an invalid index which marks a leaf node in the tree.
+const invalidNodeValue = 0xffff
+
+// Decode reads bits from the given bitReader and navigates the tree until a
+// symbol is found.
+func (t huffmanTree) Decode(br *bitReader) (v uint16) {
+ nodeIndex := uint16(0) // node 0 is the root of the tree.
+
+ for {
+ node := &t.nodes[nodeIndex]
+ bit := br.ReadBit()
+ // bzip2 encodes left as a true bit.
+ if bit {
+ // left
+ if node.left == invalidNodeValue {
+ return node.leftValue
+ }
+ nodeIndex = node.left
+ } else {
+ // right
+ if node.right == invalidNodeValue {
+ return node.rightValue
+ }
+ nodeIndex = node.right
+ }
+ }
+
+ panic("unreachable")
+}
+
+// newHuffmanTree builds a Huffman tree from a slice containing the code
+// lengths of each symbol. The maximum code length is 32 bits.
+func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) {
+ // There are many possible trees that assign the same code length to
+ // each symbol (consider reflecting a tree down the middle, for
+ // example). Since the code length assignments determine the
+ // efficiency of the tree, each of these trees is equally good. In
+ // order to minimize the amount of information needed to build a tree
+ // bzip2 uses a canonical tree so that it can be reconstructed given
+ // only the code length assignments.
+
+ if len(lengths) < 2 {
+ panic("newHuffmanTree: too few symbols")
+ }
+
+ var t huffmanTree
+
+ // First we sort the code length assignments by ascending code length,
+ // using the symbol value to break ties.
+ pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths)))
+ for i, length := range lengths {
+ pairs[i].value = uint16(i)
+ pairs[i].length = length
+ }
+
+ sort.Sort(pairs)
+
+ // Now we assign codes to the symbols, starting with the longest code.
+ // We keep the codes packed into a uint32, at the most-significant end.
+ // So branches are taken from the MSB downwards. This makes it easy to
+ // sort them later.
+ code := uint32(0)
+ length := uint8(32)
+
+ codes := huffmanCodes(make([]huffmanCode, len(lengths)))
+ for i := len(pairs) - 1; i >= 0; i-- {
+ if length > pairs[i].length {
+ // If the code length decreases we shift in order to
+ // zero any bits beyond the end of the code.
+ length >>= 32 - pairs[i].length
+ length <<= 32 - pairs[i].length
+ length = pairs[i].length
+ }
+ codes[i].code = code
+ codes[i].codeLen = length
+ codes[i].value = pairs[i].value
+ // We need to 'increment' the code, which means treating |code|
+ // like a |length| bit number.
+ code += 1 << (32 - length)
+ }
+
+ // Now we can sort by the code so that the left half of each branch are
+ // grouped together, recursively.
+ sort.Sort(codes)
+
+ t.nodes = make([]huffmanNode, len(codes))
+ _, err := buildHuffmanNode(&t, codes, 0)
+ return t, err
+}
+
+// huffmanSymbolLengthPair contains a symbol and its code length.
+type huffmanSymbolLengthPair struct {
+ value uint16
+ length uint8
+}
+
+// huffmanSymbolLengthPair is used to provide an interface for sorting.
+type huffmanSymbolLengthPairs []huffmanSymbolLengthPair
+
+func (h huffmanSymbolLengthPairs) Len() int {
+ return len(h)
+}
+
+func (h huffmanSymbolLengthPairs) Less(i, j int) bool {
+ if h[i].length < h[j].length {
+ return true
+ }
+ if h[i].length > h[j].length {
+ return false
+ }
+ if h[i].value < h[j].value {
+ return true
+ }
+ return false
+}
+
+func (h huffmanSymbolLengthPairs) Swap(i, j int) {
+ h[i], h[j] = h[j], h[i]
+}
+
+// huffmanCode contains a symbol, its code and code length.
+type huffmanCode struct {
+ code uint32
+ codeLen uint8
+ value uint16
+}
+
+// huffmanCodes is used to provide an interface for sorting.
+type huffmanCodes []huffmanCode
+
+func (n huffmanCodes) Len() int {
+ return len(n)
+}
+
+func (n huffmanCodes) Less(i, j int) bool {
+ return n[i].code < n[j].code
+}
+
+func (n huffmanCodes) Swap(i, j int) {
+ n[i], n[j] = n[j], n[i]
+}
+
+// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
+// the Huffman tree at the given level. It returns the index of the newly
+// constructed node.
+func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err os.Error) {
+ test := uint32(1) << (31 - level)
+
+ // We have to search the list of codes to find the divide between the left and right sides.
+ firstRightIndex := len(codes)
+ for i, code := range codes {
+ if code.code&test != 0 {
+ firstRightIndex = i
+ break
+ }
+ }
+
+ left := codes[:firstRightIndex]
+ right := codes[firstRightIndex:]
+
+ if len(left) == 0 || len(right) == 0 {
+ return 0, StructuralError("superfluous level in Huffman tree")
+ }
+
+ nodeIndex = uint16(t.nextNode)
+ node := &t.nodes[t.nextNode]
+ t.nextNode++
+
+ if len(left) == 1 {
+ // leaf node
+ node.left = invalidNodeValue
+ node.leftValue = left[0].value
+ } else {
+ node.left, err = buildHuffmanNode(t, left, level+1)
+ }
+
+ if err != nil {
+ return
+ }
+
+ if len(right) == 1 {
+ // leaf node
+ node.right = invalidNodeValue
+ node.rightValue = right[0].value
+ } else {
+ node.right, err = buildHuffmanNode(t, right, level+1)
+ }
+
+ return
+}
diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go
new file mode 100644
index 000000000..0ed19dec3
--- /dev/null
+++ b/src/pkg/compress/bzip2/move_to_front.go
@@ -0,0 +1,105 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bzip2
+
+// moveToFrontDecoder implements a move-to-front list. Such a list is an
+// efficient way to transform a string with repeating elements into one with
+// many small valued numbers, which is suitable for entropy encoding. It works
+// by starting with an initial list of symbols and references symbols by their
+// index into that list. When a symbol is referenced, it's moved to the front
+// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
+// as the symbol will be at the front of the list after the first access.
+type moveToFrontDecoder struct {
+ // Rather than actually keep the list in memory, the symbols are stored
+ // as a circular, double linked list with the symbol indexed by head
+ // at the front of the list.
+ symbols []byte
+ next []uint8
+ prev []uint8
+ head uint8
+}
+
+// newMTFDecoder creates a move-to-front decoder with an explicit initial list
+// of symbols.
+func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
+ if len(symbols) > 256 {
+ panic("too many symbols")
+ }
+
+ m := &moveToFrontDecoder{
+ symbols: symbols,
+ next: make([]uint8, len(symbols)),
+ prev: make([]uint8, len(symbols)),
+ }
+
+ m.threadLinkedList()
+ return m
+}
+
+// newMTFDecoderWithRange creates a move-to-front decoder with an initial
+// symbol list of 0...n-1.
+func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
+ if n > 256 {
+ panic("newMTFDecoderWithRange: cannot have > 256 symbols")
+ }
+
+ m := &moveToFrontDecoder{
+ symbols: make([]uint8, n),
+ next: make([]uint8, n),
+ prev: make([]uint8, n),
+ }
+
+ for i := 0; i < n; i++ {
+ m.symbols[i] = byte(i)
+ }
+
+ m.threadLinkedList()
+ return m
+}
+
+// threadLinkedList creates the initial linked-list pointers.
+func (m *moveToFrontDecoder) threadLinkedList() {
+ if len(m.symbols) == 0 {
+ return
+ }
+
+ m.prev[0] = uint8(len(m.symbols) - 1)
+
+ for i := 0; i < len(m.symbols)-1; i++ {
+ m.next[i] = uint8(i + 1)
+ m.prev[i+1] = uint8(i)
+ }
+
+ m.next[len(m.symbols)-1] = 0
+}
+
+func (m *moveToFrontDecoder) Decode(n int) (b byte) {
+ // Most of the time, n will be zero so it's worth dealing with this
+ // simple case.
+ if n == 0 {
+ return m.symbols[m.head]
+ }
+
+ i := m.head
+ for j := 0; j < n; j++ {
+ i = m.next[i]
+ }
+ b = m.symbols[i]
+
+ m.next[m.prev[i]] = m.next[i]
+ m.prev[m.next[i]] = m.prev[i]
+ m.next[i] = m.head
+ m.prev[i] = m.prev[m.head]
+ m.next[m.prev[m.head]] = i
+ m.prev[m.head] = i
+ m.head = i
+
+ return
+}
+
+// First returns the symbol at the front of the list.
+func (m *moveToFrontDecoder) First() byte {
+ return m.symbols[m.head]
+}
diff --git a/src/pkg/compress/flate/Makefile b/src/pkg/compress/flate/Makefile
new file mode 100644
index 000000000..197828a92
--- /dev/null
+++ b/src/pkg/compress/flate/Makefile
@@ -0,0 +1,17 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/flate
+GOFILES=\
+ deflate.go\
+ huffman_bit_writer.go\
+ huffman_code.go\
+ inflate.go\
+ reverse_bits.go\
+ token.go\
+ util.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/flate/deflate.go b/src/pkg/compress/flate/deflate.go
new file mode 100644
index 000000000..b1cee0b2f
--- /dev/null
+++ b/src/pkg/compress/flate/deflate.go
@@ -0,0 +1,493 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "io"
+ "math"
+ "os"
+)
+
+const (
+ NoCompression = 0
+ BestSpeed = 1
+ fastCompression = 3
+ BestCompression = 9
+ DefaultCompression = -1
+ logWindowSize = 15
+ windowSize = 1 << logWindowSize
+ windowMask = windowSize - 1
+ logMaxOffsetSize = 15 // Standard DEFLATE
+ minMatchLength = 3 // The smallest match that the compressor looks for
+ maxMatchLength = 258 // The longest match for the compressor
+ minOffsetSize = 1 // The shortest offset that makes any sence
+
+ // The maximum number of tokens we put into a single flat block, just too
+ // stop things from getting too large.
+ maxFlateBlockTokens = 1 << 14
+ maxStoreBlockSize = 65535
+ hashBits = 15
+ hashSize = 1 << hashBits
+ hashMask = (1 << hashBits) - 1
+ hashShift = (hashBits + minMatchLength - 1) / minMatchLength
+)
+
+type compressionLevel struct {
+ good, lazy, nice, chain, fastSkipHashing int
+}
+
+var levels = []compressionLevel{
+ {}, // 0
+ // For levels 1-3 we don't bother trying with lazy matches
+ {3, 0, 8, 4, 4},
+ {3, 0, 16, 8, 5},
+ {3, 0, 32, 32, 6},
+ // Levels 4-9 use increasingly more lazy matching
+ // and increasingly stringent conditions for "good enough".
+ {4, 4, 16, 16, math.MaxInt32},
+ {8, 16, 32, 32, math.MaxInt32},
+ {8, 16, 128, 128, math.MaxInt32},
+ {8, 32, 128, 256, math.MaxInt32},
+ {32, 128, 258, 1024, math.MaxInt32},
+ {32, 258, 258, 4096, math.MaxInt32},
+}
+
+type compressor struct {
+ compressionLevel
+
+ w *huffmanBitWriter
+
+ // compression algorithm
+ fill func(*compressor, []byte) int // copy data to window
+ step func(*compressor) // process window
+ sync bool // requesting flush
+
+ // Input hash chains
+ // hashHead[hashValue] contains the largest inputIndex with the specified hash value
+ // If hashHead[hashValue] is within the current window, then
+ // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
+ // with the same hash value.
+ chainHead int
+ hashHead []int
+ hashPrev []int
+
+ // input window: unprocessed data is window[index:windowEnd]
+ index int
+ window []byte
+ windowEnd int
+ blockStart int // window index where current tokens start
+ byteAvailable bool // if true, still need to process window[index-1].
+
+ // queued output tokens: tokens[:ti]
+ tokens []token
+ ti int
+
+ // deflate state
+ length int
+ offset int
+ hash int
+ maxInsertIndex int
+ err os.Error
+}
+
+func (d *compressor) fillDeflate(b []byte) int {
+ if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+ // shift the window by windowSize
+ copy(d.window, d.window[windowSize:2*windowSize])
+ d.index -= windowSize
+ d.windowEnd -= windowSize
+ if d.blockStart >= windowSize {
+ d.blockStart -= windowSize
+ } else {
+ d.blockStart = math.MaxInt32
+ }
+ for i, h := range d.hashHead {
+ v := h - windowSize
+ if v < -1 {
+ v = -1
+ }
+ d.hashHead[i] = v
+ }
+ for i, h := range d.hashPrev {
+ v := -h - windowSize
+ if v < -1 {
+ v = -1
+ }
+ d.hashPrev[i] = v
+ }
+ }
+ n := copy(d.window[d.windowEnd:], b)
+ d.windowEnd += n
+ return n
+}
+
+func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
+ if index > 0 || eof {
+ var window []byte
+ if d.blockStart <= index {
+ window = d.window[d.blockStart:index]
+ }
+ d.blockStart = index
+ d.w.writeBlock(tokens, eof, window)
+ return d.w.err
+ }
+ return nil
+}
+
+// Try to find a match starting at index whose length is greater than prevSize.
+// We only look at chainCount possibilities before giving up.
+func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
+ minMatchLook := maxMatchLength
+ if lookahead < minMatchLook {
+ minMatchLook = lookahead
+ }
+
+ win := d.window[0 : pos+minMatchLook]
+
+ // We quit when we get a match that's at least nice long
+ nice := len(win) - pos
+ if d.nice < nice {
+ nice = d.nice
+ }
+
+ // If we've got a match that's good enough, only look in 1/4 the chain.
+ tries := d.chain
+ length = prevLength
+ if length >= d.good {
+ tries >>= 2
+ }
+
+ w0 := win[pos]
+ w1 := win[pos+1]
+ wEnd := win[pos+length]
+ minIndex := pos - windowSize
+
+ for i := prevHead; tries > 0; tries-- {
+ if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
+ // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
+
+ n := 3
+ for pos+n < len(win) && win[i+n] == win[pos+n] {
+ n++
+ }
+ if n > length && (n > 3 || pos-i <= 4096) {
+ length = n
+ offset = pos - i
+ ok = true
+ if n >= nice {
+ // The match is good enough that we don't try to find a better one.
+ break
+ }
+ wEnd = win[pos+n]
+ }
+ }
+ if i == minIndex {
+ // hashPrev[i & windowMask] has already been overwritten, so stop now.
+ break
+ }
+ if i = d.hashPrev[i&windowMask]; i < minIndex || i < 0 {
+ break
+ }
+ }
+ return
+}
+
+func (d *compressor) writeStoredBlock(buf []byte) os.Error {
+ if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
+ return d.w.err
+ }
+ d.w.writeBytes(buf)
+ return d.w.err
+}
+
+func (d *compressor) initDeflate() {
+ d.hashHead = make([]int, hashSize)
+ d.hashPrev = make([]int, windowSize)
+ d.window = make([]byte, 2*windowSize)
+ fillInts(d.hashHead, -1)
+ d.tokens = make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
+ d.length = minMatchLength - 1
+ d.offset = 0
+ d.byteAvailable = false
+ d.index = 0
+ d.ti = 0
+ d.hash = 0
+ d.chainHead = -1
+}
+
+func (d *compressor) deflate() {
+ if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
+ return
+ }
+
+ d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+ if d.index < d.maxInsertIndex {
+ d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
+ }
+
+Loop:
+ for {
+ if d.index > d.windowEnd {
+ panic("index > windowEnd")
+ }
+ lookahead := d.windowEnd - d.index
+ if lookahead < minMatchLength+maxMatchLength {
+ if !d.sync {
+ break Loop
+ }
+ if d.index > d.windowEnd {
+ panic("index > windowEnd")
+ }
+ if lookahead == 0 {
+ // Flush current output block if any.
+ if d.byteAvailable {
+ // There is still one pending token that needs to be flushed
+ d.tokens[d.ti] = literalToken(uint32(d.window[d.index-1]))
+ d.ti++
+ d.byteAvailable = false
+ }
+ if d.ti > 0 {
+ if d.err = d.writeBlock(d.tokens[0:d.ti], d.index, false); d.err != nil {
+ return
+ }
+ d.ti = 0
+ }
+ break Loop
+ }
+ }
+ if d.index < d.maxInsertIndex {
+ // Update the hash
+ d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
+ d.chainHead = d.hashHead[d.hash]
+ d.hashPrev[d.index&windowMask] = d.chainHead
+ d.hashHead[d.hash] = d.index
+ }
+ prevLength := d.length
+ prevOffset := d.offset
+ d.length = minMatchLength - 1
+ d.offset = 0
+ minIndex := d.index - windowSize
+ if minIndex < 0 {
+ minIndex = 0
+ }
+
+ if d.chainHead >= minIndex &&
+ (d.fastSkipHashing != 0 && lookahead > minMatchLength-1 ||
+ d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) {
+ if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok {
+ d.length = newLength
+ d.offset = newOffset
+ }
+ }
+ if d.fastSkipHashing != 0 && d.length >= minMatchLength ||
+ d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength {
+ // There was a match at the previous step, and the current match is
+ // not better. Output the previous match.
+ if d.fastSkipHashing != 0 {
+ d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))
+ } else {
+ d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
+ }
+ d.ti++
+ // Insert in the hash table all strings up to the end of the match.
+ // index and index-1 are already inserted. If there is not enough
+ // lookahead, the last two strings are not inserted into the hash
+ // table.
+ if d.length <= d.fastSkipHashing {
+ var newIndex int
+ if d.fastSkipHashing != 0 {
+ newIndex = d.index + d.length
+ } else {
+ newIndex = prevLength - 1
+ }
+ for d.index++; d.index < newIndex; d.index++ {
+ if d.index < d.maxInsertIndex {
+ d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
+ // Get previous value with the same hash.
+ // Our chain should point to the previous value.
+ d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
+ // Set the head of the hash chain to us.
+ d.hashHead[d.hash] = d.index
+ }
+ }
+ if d.fastSkipHashing == 0 {
+ d.byteAvailable = false
+ d.length = minMatchLength - 1
+ }
+ } else {
+ // For matches this long, we don't bother inserting each individual
+ // item into the table.
+ d.index += d.length
+ d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
+ }
+ if d.ti == maxFlateBlockTokens {
+ // The block includes the current character
+ if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+ return
+ }
+ d.ti = 0
+ }
+ } else {
+ if d.fastSkipHashing != 0 || d.byteAvailable {
+ i := d.index - 1
+ if d.fastSkipHashing != 0 {
+ i = d.index
+ }
+ d.tokens[d.ti] = literalToken(uint32(d.window[i]))
+ d.ti++
+ if d.ti == maxFlateBlockTokens {
+ if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
+ return
+ }
+ d.ti = 0
+ }
+ }
+ d.index++
+ if d.fastSkipHashing == 0 {
+ d.byteAvailable = true
+ }
+ }
+ }
+}
+
+func (d *compressor) fillStore(b []byte) int {
+ n := copy(d.window[d.windowEnd:], b)
+ d.windowEnd += n
+ return n
+}
+
+func (d *compressor) store() {
+ if d.windowEnd > 0 {
+ d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+ }
+ d.windowEnd = 0
+}
+
+func (d *compressor) write(b []byte) (n int, err os.Error) {
+ n = len(b)
+ b = b[d.fill(d, b):]
+ for len(b) > 0 {
+ d.step(d)
+ b = b[d.fill(d, b):]
+ }
+ return n, d.err
+}
+
+func (d *compressor) syncFlush() os.Error {
+ d.sync = true
+ d.step(d)
+ if d.err == nil {
+ d.w.writeStoredHeader(0, false)
+ d.w.flush()
+ d.err = d.w.err
+ }
+ d.sync = false
+ return d.err
+}
+
+func (d *compressor) init(w io.Writer, level int) (err os.Error) {
+ d.w = newHuffmanBitWriter(w)
+
+ switch {
+ case level == NoCompression:
+ d.window = make([]byte, maxStoreBlockSize)
+ d.fill = (*compressor).fillStore
+ d.step = (*compressor).store
+ case level == DefaultCompression:
+ level = 6
+ fallthrough
+ case 1 <= level && level <= 9:
+ d.compressionLevel = levels[level]
+ d.initDeflate()
+ d.fill = (*compressor).fillDeflate
+ d.step = (*compressor).deflate
+ default:
+ return WrongValueError{"level", 0, 9, int32(level)}
+ }
+ return nil
+}
+
+func (d *compressor) close() os.Error {
+ d.sync = true
+ d.step(d)
+ if d.err != nil {
+ return d.err
+ }
+ if d.w.writeStoredHeader(0, true); d.w.err != nil {
+ return d.w.err
+ }
+ d.w.flush()
+ return d.w.err
+}
+
+// NewWriter returns a new Writer compressing
+// data at the given level. Following zlib, levels
+// range from 1 (BestSpeed) to 9 (BestCompression);
+// higher levels typically run slower but compress more.
+// Level 0 (NoCompression) does not attempt any
+// compression; it only adds the necessary DEFLATE framing.
+func NewWriter(w io.Writer, level int) *Writer {
+ const logWindowSize = logMaxOffsetSize
+ var dw Writer
+ dw.d.init(w, level)
+ return &dw
+}
+
+// NewWriterDict is like NewWriter but initializes the new
+// Writer with a preset dictionary. The returned Writer behaves
+// as if the dictionary had been written to it without producing
+// any compressed output. The compressed data written to w
+// can only be decompressed by a Reader initialized with the
+// same dictionary.
+func NewWriterDict(w io.Writer, level int, dict []byte) *Writer {
+ dw := &dictWriter{w, false}
+ zw := NewWriter(dw, level)
+ zw.Write(dict)
+ zw.Flush()
+ dw.enabled = true
+ return zw
+}
+
+type dictWriter struct {
+ w io.Writer
+ enabled bool
+}
+
+func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
+ if w.enabled {
+ return w.w.Write(b)
+ }
+ return len(b), nil
+}
+
+// A Writer takes data written to it and writes the compressed
+// form of that data to an underlying writer (see NewWriter).
+type Writer struct {
+ d compressor
+}
+
+// Write writes data to w, which will eventually write the
+// compressed form of data to its underlying writer.
+func (w *Writer) Write(data []byte) (n int, err os.Error) {
+ return w.d.write(data)
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+// It is useful mainly in compressed network protocols, to ensure that
+// a remote reader has enough data to reconstruct a packet.
+// Flush does not return until the data has been written.
+// If the underlying writer returns an error, Flush returns that error.
+//
+// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
+func (w *Writer) Flush() os.Error {
+ // For more about flushing:
+ // http://www.bolet.org/~pornin/deflate-flush.html
+ return w.d.syncFlush()
+}
+
+// Close flushes and closes the writer.
+func (w *Writer) Close() os.Error {
+ return w.d.close()
+}
diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go
new file mode 100644
index 000000000..930823685
--- /dev/null
+++ b/src/pkg/compress/flate/deflate_test.go
@@ -0,0 +1,321 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "sync"
+ "testing"
+)
+
+type deflateTest struct {
+ in []byte
+ level int
+ out []byte
+}
+
+type deflateInflateTest struct {
+ in []byte
+}
+
+type reverseBitsTest struct {
+ in uint16
+ bitCount uint8
+ out uint16
+}
+
+var deflateTests = []*deflateTest{
+ &deflateTest{[]byte{}, 0, []byte{1, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11}, -1, []byte{18, 4, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11}, DefaultCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11}, 4, []byte{18, 4, 4, 0, 0, 255, 255}},
+
+ &deflateTest{[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0,
+ []byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255},
+ },
+ &deflateTest{[]byte{}, 1, []byte{1, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11}, 1, []byte{18, 4, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x12}, 1, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 1, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
+ &deflateTest{[]byte{}, 9, []byte{1, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
+ &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
+}
+
+var deflateInflateTests = []*deflateInflateTest{
+ &deflateInflateTest{[]byte{}},
+ &deflateInflateTest{[]byte{0x11}},
+ &deflateInflateTest{[]byte{0x11, 0x12}},
+ &deflateInflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}},
+ &deflateInflateTest{[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}},
+ &deflateInflateTest{largeDataChunk()},
+}
+
+var reverseBitsTests = []*reverseBitsTest{
+ &reverseBitsTest{1, 1, 1},
+ &reverseBitsTest{1, 2, 2},
+ &reverseBitsTest{1, 3, 4},
+ &reverseBitsTest{1, 4, 8},
+ &reverseBitsTest{1, 5, 16},
+ &reverseBitsTest{17, 5, 17},
+ &reverseBitsTest{257, 9, 257},
+ &reverseBitsTest{29, 5, 23},
+}
+
+func largeDataChunk() []byte {
+ result := make([]byte, 100000)
+ for i := range result {
+ result[i] = byte(i * i & 0xFF)
+ }
+ return result
+}
+
+func TestDeflate(t *testing.T) {
+ for _, h := range deflateTests {
+ var buf bytes.Buffer
+ w := NewWriter(&buf, h.level)
+ w.Write(h.in)
+ w.Close()
+ if !bytes.Equal(buf.Bytes(), h.out) {
+ t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out)
+ }
+ }
+}
+
+type syncBuffer struct {
+ buf bytes.Buffer
+ mu sync.RWMutex
+ closed bool
+ ready chan bool
+}
+
+func newSyncBuffer() *syncBuffer {
+ return &syncBuffer{ready: make(chan bool, 1)}
+}
+
+func (b *syncBuffer) Read(p []byte) (n int, err os.Error) {
+ for {
+ b.mu.RLock()
+ n, err = b.buf.Read(p)
+ b.mu.RUnlock()
+ if n > 0 || b.closed {
+ return
+ }
+ <-b.ready
+ }
+ panic("unreachable")
+}
+
+func (b *syncBuffer) signal() {
+ select {
+ case b.ready <- true:
+ default:
+ }
+}
+
+func (b *syncBuffer) Write(p []byte) (n int, err os.Error) {
+ n, err = b.buf.Write(p)
+ b.signal()
+ return
+}
+
+func (b *syncBuffer) WriteMode() {
+ b.mu.Lock()
+}
+
+func (b *syncBuffer) ReadMode() {
+ b.mu.Unlock()
+ b.signal()
+}
+
+func (b *syncBuffer) Close() os.Error {
+ b.closed = true
+ b.signal()
+ return nil
+}
+
+func testSync(t *testing.T, level int, input []byte, name string) {
+ if len(input) == 0 {
+ return
+ }
+
+ t.Logf("--testSync %d, %d, %s", level, len(input), name)
+ buf := newSyncBuffer()
+ buf1 := new(bytes.Buffer)
+ buf.WriteMode()
+ w := NewWriter(io.MultiWriter(buf, buf1), level)
+ r := NewReader(buf)
+
+ // Write half the input and read back.
+ for i := 0; i < 2; i++ {
+ var lo, hi int
+ if i == 0 {
+ lo, hi = 0, (len(input)+1)/2
+ } else {
+ lo, hi = (len(input)+1)/2, len(input)
+ }
+ t.Logf("#%d: write %d-%d", i, lo, hi)
+ if _, err := w.Write(input[lo:hi]); err != nil {
+ t.Errorf("testSync: write: %v", err)
+ return
+ }
+ if i == 0 {
+ if err := w.Flush(); err != nil {
+ t.Errorf("testSync: flush: %v", err)
+ return
+ }
+ } else {
+ if err := w.Close(); err != nil {
+ t.Errorf("testSync: close: %v", err)
+ }
+ }
+ buf.ReadMode()
+ out := make([]byte, hi-lo+1)
+ m, err := io.ReadAtLeast(r, out, hi-lo)
+ t.Logf("#%d: read %d", i, m)
+ if m != hi-lo || err != nil {
+ t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len())
+ return
+ }
+ if !bytes.Equal(input[lo:hi], out[:hi-lo]) {
+ t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo])
+ return
+ }
+ // This test originally checked that after reading
+ // the first half of the input, there was nothing left
+ // in the read buffer (buf.buf.Len() != 0) but that is
+ // not necessarily the case: the write Flush may emit
+ // some extra framing bits that are not necessary
+ // to process to obtain the first half of the uncompressed
+ // data. The test ran correctly most of the time, because
+ // the background goroutine had usually read even
+ // those extra bits by now, but it's not a useful thing to
+ // check.
+ buf.WriteMode()
+ }
+ buf.ReadMode()
+ out := make([]byte, 10)
+ if n, err := r.Read(out); n > 0 || err != os.EOF {
+ t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n])
+ }
+ if buf.buf.Len() != 0 {
+ t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name)
+ }
+ r.Close()
+
+ // stream should work for ordinary reader too
+ r = NewReader(buf1)
+ out, err := ioutil.ReadAll(r)
+ if err != nil {
+ t.Errorf("testSync: read: %s", err)
+ return
+ }
+ r.Close()
+ if !bytes.Equal(input, out) {
+ t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name)
+ }
+}
+
+func testToFromWithLevel(t *testing.T, level int, input []byte, name string) os.Error {
+ buffer := bytes.NewBuffer(nil)
+ w := NewWriter(buffer, level)
+ w.Write(input)
+ w.Close()
+ r := NewReader(buffer)
+ out, err := ioutil.ReadAll(r)
+ if err != nil {
+ t.Errorf("read: %s", err)
+ return err
+ }
+ r.Close()
+ if !bytes.Equal(input, out) {
+ t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name)
+ }
+
+ testSync(t, level, input, name)
+ return nil
+}
+
+func testToFrom(t *testing.T, input []byte, name string) {
+ for i := 0; i < 10; i++ {
+ testToFromWithLevel(t, i, input, name)
+ }
+}
+
+func TestDeflateInflate(t *testing.T) {
+ for i, h := range deflateInflateTests {
+ testToFrom(t, h.in, fmt.Sprintf("#%d", i))
+ }
+}
+
+func TestReverseBits(t *testing.T) {
+ for _, h := range reverseBitsTests {
+ if v := reverseBits(h.in, h.bitCount); v != h.out {
+ t.Errorf("reverseBits(%v,%v) = %v, want %v",
+ h.in, h.bitCount, v, h.out)
+ }
+ }
+}
+
+func TestDeflateInflateString(t *testing.T) {
+ gold, err := ioutil.ReadFile("../testdata/e.txt")
+ if err != nil {
+ t.Error(err)
+ }
+ testToFromWithLevel(t, 1, gold, "2.718281828...")
+}
+
+func TestReaderDict(t *testing.T) {
+ const (
+ dict = "hello world"
+ text = "hello again world"
+ )
+ var b bytes.Buffer
+ w := NewWriter(&b, 5)
+ w.Write([]byte(dict))
+ w.Flush()
+ b.Reset()
+ w.Write([]byte(text))
+ w.Close()
+
+ r := NewReaderDict(&b, []byte(dict))
+ data, err := ioutil.ReadAll(r)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if string(data) != "hello again world" {
+ t.Fatalf("read returned %q want %q", string(data), text)
+ }
+}
+
+func TestWriterDict(t *testing.T) {
+ const (
+ dict = "hello world"
+ text = "hello again world"
+ )
+ var b bytes.Buffer
+ w := NewWriter(&b, 5)
+ w.Write([]byte(dict))
+ w.Flush()
+ b.Reset()
+ w.Write([]byte(text))
+ w.Close()
+
+ var b1 bytes.Buffer
+ w = NewWriterDict(&b1, 5, []byte(dict))
+ w.Write([]byte(text))
+ w.Close()
+
+ if !bytes.Equal(b1.Bytes(), b.Bytes()) {
+ t.Fatalf("writer wrote %q want %q", b1.Bytes(), b.Bytes())
+ }
+}
diff --git a/src/pkg/compress/flate/flate_test.go b/src/pkg/compress/flate/flate_test.go
new file mode 100644
index 000000000..bfd3b8381
--- /dev/null
+++ b/src/pkg/compress/flate/flate_test.go
@@ -0,0 +1,139 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This test tests some internals of the flate package.
+// The tests in package compress/gzip serve as the
+// end-to-end test of the decompressor.
+
+package flate
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+)
+
+// The Huffman code lengths used by the fixed-format Huffman blocks.
+var fixedHuffmanBits = [...]int{
+ // 0-143 length 8
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
+ // 144-255 length 9
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+
+ // 256-279 length 7
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+
+ // 280-287 length 8
+ 8, 8, 8, 8, 8, 8, 8, 8,
+}
+
+type InitDecoderTest struct {
+ in []int
+ out huffmanDecoder
+ ok bool
+}
+
+var initDecoderTests = []*InitDecoderTest{
+ // Example from Connell 1973,
+ &InitDecoderTest{
+ []int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5},
+ huffmanDecoder{
+ 2, 5,
+ [maxCodeLen + 1]int{2: 0, 4, 13, 31},
+ [maxCodeLen + 1]int{2: 0, 1, 6, 20},
+ // Paper used different code assignment:
+ // 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6
+ // Reordered here so that codes of same length
+ // are assigned to increasing numbers.
+ []int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11},
+ },
+ true,
+ },
+
+ // Example from RFC 1951 section 3.2.2
+ &InitDecoderTest{
+ []int{2, 1, 3, 3},
+ huffmanDecoder{
+ 1, 3,
+ [maxCodeLen + 1]int{1: 0, 2, 7},
+ [maxCodeLen + 1]int{1: 0, 1, 4},
+ []int{1, 0, 2, 3},
+ },
+ true,
+ },
+
+ // Second example from RFC 1951 section 3.2.2
+ &InitDecoderTest{
+ []int{3, 3, 3, 3, 3, 2, 4, 4},
+ huffmanDecoder{
+ 2, 4,
+ [maxCodeLen + 1]int{2: 0, 6, 15},
+ [maxCodeLen + 1]int{2: 0, 1, 8},
+ []int{5, 0, 1, 2, 3, 4, 6, 7},
+ },
+ true,
+ },
+
+ // Static Huffman codes (RFC 1951 section 3.2.6)
+ &InitDecoderTest{
+ fixedHuffmanBits[0:],
+ fixedHuffmanDecoder,
+ true,
+ },
+
+ // Illegal input.
+ &InitDecoderTest{
+ []int{},
+ huffmanDecoder{},
+ false,
+ },
+
+ // Illegal input.
+ &InitDecoderTest{
+ []int{0, 0, 0, 0, 0, 0, 0},
+ huffmanDecoder{},
+ false,
+ },
+}
+
+func TestInitDecoder(t *testing.T) {
+ for i, tt := range initDecoderTests {
+ var h huffmanDecoder
+ if h.init(tt.in) != tt.ok {
+ t.Errorf("test %d: init = %v", i, !tt.ok)
+ continue
+ }
+ if !reflect.DeepEqual(&h, &tt.out) {
+ t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out)
+ }
+ }
+}
+
+func TestUncompressedSource(t *testing.T) {
+ decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11}))
+ output := make([]byte, 1)
+ n, error := decoder.Read(output)
+ if n != 1 || error != nil {
+ t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error)
+ }
+ if output[0] != 0x11 {
+ t.Errorf("output[0] = %x, want 0x11", output[0])
+ }
+}
diff --git a/src/pkg/compress/flate/huffman_bit_writer.go b/src/pkg/compress/flate/huffman_bit_writer.go
new file mode 100644
index 000000000..3981df5cb
--- /dev/null
+++ b/src/pkg/compress/flate/huffman_bit_writer.go
@@ -0,0 +1,494 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "io"
+ "math"
+ "os"
+ "strconv"
+)
+
+const (
+ // The largest offset code.
+ offsetCodeCount = 30
+
+ // The special code used to mark the end of a block.
+ endBlockMarker = 256
+
+ // The first length code.
+ lengthCodesStart = 257
+
+ // The number of codegen codes.
+ codegenCodeCount = 19
+ badCode = 255
+)
+
+// The number of extra bits needed by length code X - LENGTH_CODES_START.
+var lengthExtraBits = []int8{
+ /* 257 */ 0, 0, 0,
+ /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
+ /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
+ /* 280 */ 4, 5, 5, 5, 5, 0,
+}
+
+// The length indicated by length code X - LENGTH_CODES_START.
+var lengthBase = []uint32{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
+ 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+ 64, 80, 96, 112, 128, 160, 192, 224, 255,
+}
+
+// offset code word extra bits.
+var offsetExtraBits = []int8{
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+ /* extended window */
+ 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
+}
+
+var offsetBase = []uint32{
+ /* normal deflate */
+ 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
+ 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
+ 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
+ 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
+ 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
+ 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+
+ /* extended window */
+ 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
+ 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
+ 0x100000, 0x180000, 0x200000, 0x300000,
+}
+
+// The odd order in which the codegen code sizes are written.
+var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+type huffmanBitWriter struct {
+ w io.Writer
+ // Data waiting to be written is bytes[0:nbytes]
+ // and then the low nbits of bits.
+ bits uint32
+ nbits uint32
+ bytes [64]byte
+ nbytes int
+ literalFreq []int32
+ offsetFreq []int32
+ codegen []uint8
+ codegenFreq []int32
+ literalEncoding *huffmanEncoder
+ offsetEncoding *huffmanEncoder
+ codegenEncoding *huffmanEncoder
+ err os.Error
+}
+
+type WrongValueError struct {
+ name string
+ from int32
+ to int32
+ value int32
+}
+
+func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
+ return &huffmanBitWriter{
+ w: w,
+ literalFreq: make([]int32, maxLit),
+ offsetFreq: make([]int32, offsetCodeCount),
+ codegen: make([]uint8, maxLit+offsetCodeCount+1),
+ codegenFreq: make([]int32, codegenCodeCount),
+ literalEncoding: newHuffmanEncoder(maxLit),
+ offsetEncoding: newHuffmanEncoder(offsetCodeCount),
+ codegenEncoding: newHuffmanEncoder(codegenCodeCount),
+ }
+}
+
+func (err WrongValueError) String() string {
+ return "huffmanBitWriter: " + err.name + " should belong to [" + strconv.Itoa64(int64(err.from)) + ";" +
+ strconv.Itoa64(int64(err.to)) + "] but actual value is " + strconv.Itoa64(int64(err.value))
+}
+
+func (w *huffmanBitWriter) flushBits() {
+ if w.err != nil {
+ w.nbits = 0
+ return
+ }
+ bits := w.bits
+ w.bits >>= 16
+ w.nbits -= 16
+ n := w.nbytes
+ w.bytes[n] = byte(bits)
+ w.bytes[n+1] = byte(bits >> 8)
+ if n += 2; n >= len(w.bytes) {
+ _, w.err = w.w.Write(w.bytes[0:])
+ n = 0
+ }
+ w.nbytes = n
+}
+
+func (w *huffmanBitWriter) flush() {
+ if w.err != nil {
+ w.nbits = 0
+ return
+ }
+ n := w.nbytes
+ if w.nbits > 8 {
+ w.bytes[n] = byte(w.bits)
+ w.bits >>= 8
+ w.nbits -= 8
+ n++
+ }
+ if w.nbits > 0 {
+ w.bytes[n] = byte(w.bits)
+ w.nbits = 0
+ n++
+ }
+ w.bits = 0
+ _, w.err = w.w.Write(w.bytes[0:n])
+ w.nbytes = 0
+}
+
+func (w *huffmanBitWriter) writeBits(b, nb int32) {
+ w.bits |= uint32(b) << w.nbits
+ if w.nbits += uint32(nb); w.nbits >= 16 {
+ w.flushBits()
+ }
+}
+
+func (w *huffmanBitWriter) writeBytes(bytes []byte) {
+ if w.err != nil {
+ return
+ }
+ n := w.nbytes
+ if w.nbits == 8 {
+ w.bytes[n] = byte(w.bits)
+ w.nbits = 0
+ n++
+ }
+ if w.nbits != 0 {
+ w.err = InternalError("writeBytes with unfinished bits")
+ return
+ }
+ if n != 0 {
+ _, w.err = w.w.Write(w.bytes[0:n])
+ if w.err != nil {
+ return
+ }
+ }
+ w.nbytes = 0
+ _, w.err = w.w.Write(bytes)
+}
+
+// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
+// the literal and offset lengths arrays (which are concatenated into a single
+// array). This method generates that run-length encoding.
+//
+// The result is written into the codegen array, and the frequencies
+// of each code is written into the codegenFreq array.
+// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
+// information. Code badCode is an end marker
+//
+// numLiterals The number of literals in literalEncoding
+// numOffsets The number of offsets in offsetEncoding
+func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) {
+ fillInt32s(w.codegenFreq, 0)
+ // Note that we are using codegen both as a temporary variable for holding
+ // a copy of the frequencies, and as the place where we put the result.
+ // This is fine because the output is always shorter than the input used
+ // so far.
+ codegen := w.codegen // cache
+ // Copy the concatenated code sizes to codegen. Put a marker at the end.
+ copyUint8s(codegen[0:numLiterals], w.literalEncoding.codeBits)
+ copyUint8s(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits)
+ codegen[numLiterals+numOffsets] = badCode
+
+ size := codegen[0]
+ count := 1
+ outIndex := 0
+ for inIndex := 1; size != badCode; inIndex++ {
+ // INVARIANT: We have seen "count" copies of size that have not yet
+ // had output generated for them.
+ nextSize := codegen[inIndex]
+ if nextSize == size {
+ count++
+ continue
+ }
+ // We need to generate codegen indicating "count" of size.
+ if size != 0 {
+ codegen[outIndex] = size
+ outIndex++
+ w.codegenFreq[size]++
+ count--
+ for count >= 3 {
+ n := min(count, 6)
+ codegen[outIndex] = 16
+ outIndex++
+ codegen[outIndex] = uint8(n - 3)
+ outIndex++
+ w.codegenFreq[16]++
+ count -= n
+ }
+ } else {
+ for count >= 11 {
+ n := min(count, 138)
+ codegen[outIndex] = 18
+ outIndex++
+ codegen[outIndex] = uint8(n - 11)
+ outIndex++
+ w.codegenFreq[18]++
+ count -= n
+ }
+ if count >= 3 {
+ // count >= 3 && count <= 10
+ codegen[outIndex] = 17
+ outIndex++
+ codegen[outIndex] = uint8(count - 3)
+ outIndex++
+ w.codegenFreq[17]++
+ count = 0
+ }
+ }
+ count--
+ for ; count >= 0; count-- {
+ codegen[outIndex] = size
+ outIndex++
+ w.codegenFreq[size]++
+ }
+ // Set up invariant for next time through the loop.
+ size = nextSize
+ count = 1
+ }
+ // Marker indicating the end of the codegen.
+ codegen[outIndex] = badCode
+}
+
+func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) {
+ if w.err != nil {
+ return
+ }
+ w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal]))
+}
+
+// Write the header of a dynamic Huffman block to the output stream.
+//
+// numLiterals The number of literals specified in codegen
+// numOffsets The number of offsets specified in codegen
+// numCodegens The number of codegens used in codegen
+func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
+ if w.err != nil {
+ return
+ }
+ var firstBits int32 = 4
+ if isEof {
+ firstBits = 5
+ }
+ w.writeBits(firstBits, 3)
+ w.writeBits(int32(numLiterals-257), 5)
+ w.writeBits(int32(numOffsets-1), 5)
+ w.writeBits(int32(numCodegens-4), 4)
+
+ for i := 0; i < numCodegens; i++ {
+ value := w.codegenEncoding.codeBits[codegenOrder[i]]
+ w.writeBits(int32(value), 3)
+ }
+
+ i := 0
+ for {
+ var codeWord int = int(w.codegen[i])
+ i++
+ if codeWord == badCode {
+ break
+ }
+ // The low byte contains the actual code to generate.
+ w.writeCode(w.codegenEncoding, uint32(codeWord))
+
+ switch codeWord {
+ case 16:
+ w.writeBits(int32(w.codegen[i]), 2)
+ i++
+ break
+ case 17:
+ w.writeBits(int32(w.codegen[i]), 3)
+ i++
+ break
+ case 18:
+ w.writeBits(int32(w.codegen[i]), 7)
+ i++
+ break
+ }
+ }
+}
+
+func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
+ if w.err != nil {
+ return
+ }
+ var flag int32
+ if isEof {
+ flag = 1
+ }
+ w.writeBits(flag, 3)
+ w.flush()
+ w.writeBits(int32(length), 16)
+ w.writeBits(int32(^uint16(length)), 16)
+}
+
+func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
+ if w.err != nil {
+ return
+ }
+ // Indicate that we are a fixed Huffman block
+ var value int32 = 2
+ if isEof {
+ value = 3
+ }
+ w.writeBits(value, 3)
+}
+
+func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
+ if w.err != nil {
+ return
+ }
+ fillInt32s(w.literalFreq, 0)
+ fillInt32s(w.offsetFreq, 0)
+
+ n := len(tokens)
+ tokens = tokens[0 : n+1]
+ tokens[n] = endBlockMarker
+
+ for _, t := range tokens {
+ switch t.typ() {
+ case literalType:
+ w.literalFreq[t.literal()]++
+ case matchType:
+ length := t.length()
+ offset := t.offset()
+ w.literalFreq[lengthCodesStart+lengthCode(length)]++
+ w.offsetFreq[offsetCode(offset)]++
+ }
+ }
+
+ // get the number of literals
+ numLiterals := len(w.literalFreq)
+ for w.literalFreq[numLiterals-1] == 0 {
+ numLiterals--
+ }
+ // get the number of offsets
+ numOffsets := len(w.offsetFreq)
+ for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
+ numOffsets--
+ }
+ if numOffsets == 0 {
+ // We haven't found a single match. If we want to go with the dynamic encoding,
+ // we should count at least one offset to be sure that the offset huffman tree could be encoded.
+ w.offsetFreq[0] = 1
+ numOffsets = 1
+ }
+
+ w.literalEncoding.generate(w.literalFreq, 15)
+ w.offsetEncoding.generate(w.offsetFreq, 15)
+
+ storedBytes := 0
+ if input != nil {
+ storedBytes = len(input)
+ }
+ var extraBits int64
+ var storedSize int64 = math.MaxInt64
+ if storedBytes <= maxStoreBlockSize && input != nil {
+ storedSize = int64((storedBytes + 5) * 8)
+ // We only bother calculating the costs of the extra bits required by
+ // the length of offset fields (which will be the same for both fixed
+ // and dynamic encoding), if we need to compare those two encodings
+ // against stored encoding.
+ for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
+ // First eight length codes have extra size = 0.
+ extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart])
+ }
+ for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
+ // First four offset codes have extra size = 0.
+ extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
+ }
+ }
+
+ // Figure out smallest code.
+ // Fixed Huffman baseline.
+ var size = int64(3) +
+ fixedLiteralEncoding.bitLength(w.literalFreq) +
+ fixedOffsetEncoding.bitLength(w.offsetFreq) +
+ extraBits
+ var literalEncoding = fixedLiteralEncoding
+ var offsetEncoding = fixedOffsetEncoding
+
+ // Dynamic Huffman?
+ var numCodegens int
+
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literalEncoding and the offsetEncoding.
+ w.generateCodegen(numLiterals, numOffsets)
+ w.codegenEncoding.generate(w.codegenFreq, 7)
+ numCodegens = len(w.codegenFreq)
+ for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
+ numCodegens--
+ }
+ dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
+ w.codegenEncoding.bitLength(w.codegenFreq) +
+ int64(extraBits) +
+ int64(w.codegenFreq[16]*2) +
+ int64(w.codegenFreq[17]*3) +
+ int64(w.codegenFreq[18]*7)
+ dynamicSize := dynamicHeader +
+ w.literalEncoding.bitLength(w.literalFreq) +
+ w.offsetEncoding.bitLength(w.offsetFreq)
+
+ if dynamicSize < size {
+ size = dynamicSize
+ literalEncoding = w.literalEncoding
+ offsetEncoding = w.offsetEncoding
+ }
+
+ // Stored bytes?
+ if storedSize < size {
+ w.writeStoredHeader(storedBytes, eof)
+ w.writeBytes(input[0:storedBytes])
+ return
+ }
+
+ // Huffman.
+ if literalEncoding == fixedLiteralEncoding {
+ w.writeFixedHeader(eof)
+ } else {
+ w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
+ }
+ for _, t := range tokens {
+ switch t.typ() {
+ case literalType:
+ w.writeCode(literalEncoding, t.literal())
+ break
+ case matchType:
+ // Write the length
+ length := t.length()
+ lengthCode := lengthCode(length)
+ w.writeCode(literalEncoding, lengthCode+lengthCodesStart)
+ extraLengthBits := int32(lengthExtraBits[lengthCode])
+ if extraLengthBits > 0 {
+ extraLength := int32(length - lengthBase[lengthCode])
+ w.writeBits(extraLength, extraLengthBits)
+ }
+ // Write the offset
+ offset := t.offset()
+ offsetCode := offsetCode(offset)
+ w.writeCode(offsetEncoding, offsetCode)
+ extraOffsetBits := int32(offsetExtraBits[offsetCode])
+ if extraOffsetBits > 0 {
+ extraOffset := int32(offset - offsetBase[offsetCode])
+ w.writeBits(extraOffset, extraOffsetBits)
+ }
+ break
+ default:
+ panic("unknown token type: " + string(t))
+ }
+ }
+}
diff --git a/src/pkg/compress/flate/huffman_code.go b/src/pkg/compress/flate/huffman_code.go
new file mode 100644
index 000000000..7ed603a4f
--- /dev/null
+++ b/src/pkg/compress/flate/huffman_code.go
@@ -0,0 +1,378 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+import (
+ "math"
+ "sort"
+)
+
+type huffmanEncoder struct {
+ codeBits []uint8
+ code []uint16
+}
+
+type literalNode struct {
+ literal uint16
+ freq int32
+}
+
+type chain struct {
+ // The sum of the leaves in this tree
+ freq int32
+
+ // The number of literals to the left of this item at this level
+ leafCount int32
+
+ // The right child of this chain in the previous level.
+ up *chain
+}
+
+type levelInfo struct {
+ // Our level. for better printing
+ level int32
+
+ // The most recent chain generated for this level
+ lastChain *chain
+
+ // The frequency of the next character to add to this level
+ nextCharFreq int32
+
+ // The frequency of the next pair (from level below) to add to this level.
+ // Only valid if the "needed" value of the next lower level is 0.
+ nextPairFreq int32
+
+ // The number of chains remaining to generate for this level before moving
+ // up to the next level
+ needed int32
+
+ // The levelInfo for level+1
+ up *levelInfo
+
+ // The levelInfo for level-1
+ down *levelInfo
+}
+
+func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
+
+func newHuffmanEncoder(size int) *huffmanEncoder {
+ return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
+}
+
+// Generates a HuffmanCode corresponding to the fixed literal table
+func generateFixedLiteralEncoding() *huffmanEncoder {
+ h := newHuffmanEncoder(maxLit)
+ codeBits := h.codeBits
+ code := h.code
+ var ch uint16
+ for ch = 0; ch < maxLit; ch++ {
+ var bits uint16
+ var size uint8
+ switch {
+ case ch < 144:
+ // size 8, 000110000 .. 10111111
+ bits = ch + 48
+ size = 8
+ break
+ case ch < 256:
+ // size 9, 110010000 .. 111111111
+ bits = ch + 400 - 144
+ size = 9
+ break
+ case ch < 280:
+ // size 7, 0000000 .. 0010111
+ bits = ch - 256
+ size = 7
+ break
+ default:
+ // size 8, 11000000 .. 11000111
+ bits = ch + 192 - 280
+ size = 8
+ }
+ codeBits[ch] = size
+ code[ch] = reverseBits(bits, size)
+ }
+ return h
+}
+
+func generateFixedOffsetEncoding() *huffmanEncoder {
+ h := newHuffmanEncoder(30)
+ codeBits := h.codeBits
+ code := h.code
+ for ch := uint16(0); ch < 30; ch++ {
+ codeBits[ch] = 5
+ code[ch] = reverseBits(ch, 5)
+ }
+ return h
+}
+
+var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
+var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
+
+func (h *huffmanEncoder) bitLength(freq []int32) int64 {
+ var total int64
+ for i, f := range freq {
+ if f != 0 {
+ total += int64(f) * int64(h.codeBits[i])
+ }
+ }
+ return total
+}
+
+// Generate elements in the chain using an iterative algorithm.
+func (h *huffmanEncoder) generateChains(top *levelInfo, list []literalNode) {
+ n := len(list)
+ list = list[0 : n+1]
+ list[n] = maxNode()
+
+ l := top
+ for {
+ if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
+ // We've run out of both leafs and pairs.
+ // End all calculations for this level.
+ // To m sure we never come back to this level or any lower level,
+ // set nextPairFreq impossibly large.
+ l.lastChain = nil
+ l.needed = 0
+ l = l.up
+ l.nextPairFreq = math.MaxInt32
+ continue
+ }
+
+ prevFreq := l.lastChain.freq
+ if l.nextCharFreq < l.nextPairFreq {
+ // The next item on this row is a leaf node.
+ n := l.lastChain.leafCount + 1
+ l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
+ l.nextCharFreq = list[n].freq
+ } else {
+ // The next item on this row is a pair from the previous row.
+ // nextPairFreq isn't valid until we generate two
+ // more values in the level below
+ l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
+ l.down.needed = 2
+ }
+
+ if l.needed--; l.needed == 0 {
+ // We've done everything we need to do for this level.
+ // Continue calculating one level up. Fill in nextPairFreq
+ // of that level with the sum of the two nodes we've just calculated on
+ // this level.
+ up := l.up
+ if up == nil {
+ // All done!
+ return
+ }
+ up.nextPairFreq = prevFreq + l.lastChain.freq
+ l = up
+ } else {
+ // If we stole from below, move down temporarily to replenish it.
+ for l.down.needed > 0 {
+ l = l.down
+ }
+ }
+ }
+}
+
+// Return the number of literals assigned to each bit size in the Huffman encoding
+//
+// This method is only called when list.length >= 3
+// The cases of 0, 1, and 2 literals are handled by special case code.
+//
+// list An array of the literals with non-zero frequencies
+// and their associated frequencies. The array is in order of increasing
+// frequency, and has as its last element a special element with frequency
+// MaxInt32
+// maxBits The maximum number of bits that should be used to encode any literal.
+// return An integer array in which array[i] indicates the number of literals
+// that should be encoded in i bits.
+func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
+ n := int32(len(list))
+ list = list[0 : n+1]
+ list[n] = maxNode()
+
+ // The tree can't have greater depth than n - 1, no matter what. This
+ // saves a little bit of work in some small cases
+ maxBits = minInt32(maxBits, n-1)
+
+ // Create information about each of the levels.
+ // A bogus "Level 0" whose sole purpose is so that
+ // level1.prev.needed==0. This makes level1.nextPairFreq
+ // be a legitimate value that never gets chosen.
+ top := &levelInfo{needed: 0}
+ chain2 := &chain{list[1].freq, 2, new(chain)}
+ for level := int32(1); level <= maxBits; level++ {
+ // For every level, the first two items are the first two characters.
+ // We initialize the levels as if we had already figured this out.
+ top = &levelInfo{
+ level: level,
+ lastChain: chain2,
+ nextCharFreq: list[2].freq,
+ nextPairFreq: list[0].freq + list[1].freq,
+ down: top,
+ }
+ top.down.up = top
+ if level == 1 {
+ top.nextPairFreq = math.MaxInt32
+ }
+ }
+
+ // We need a total of 2*n - 2 items at top level and have already generated 2.
+ top.needed = 2*n - 4
+
+ l := top
+ for {
+ if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
+ // We've run out of both leafs and pairs.
+ // End all calculations for this level.
+ // To m sure we never come back to this level or any lower level,
+ // set nextPairFreq impossibly large.
+ l.lastChain = nil
+ l.needed = 0
+ l = l.up
+ l.nextPairFreq = math.MaxInt32
+ continue
+ }
+
+ prevFreq := l.lastChain.freq
+ if l.nextCharFreq < l.nextPairFreq {
+ // The next item on this row is a leaf node.
+ n := l.lastChain.leafCount + 1
+ l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
+ l.nextCharFreq = list[n].freq
+ } else {
+ // The next item on this row is a pair from the previous row.
+ // nextPairFreq isn't valid until we generate two
+ // more values in the level below
+ l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
+ l.down.needed = 2
+ }
+
+ if l.needed--; l.needed == 0 {
+ // We've done everything we need to do for this level.
+ // Continue calculating one level up. Fill in nextPairFreq
+ // of that level with the sum of the two nodes we've just calculated on
+ // this level.
+ up := l.up
+ if up == nil {
+ // All done!
+ break
+ }
+ up.nextPairFreq = prevFreq + l.lastChain.freq
+ l = up
+ } else {
+ // If we stole from below, move down temporarily to replenish it.
+ for l.down.needed > 0 {
+ l = l.down
+ }
+ }
+ }
+
+ // Somethings is wrong if at the end, the top level is null or hasn't used
+ // all of the leaves.
+ if top.lastChain.leafCount != n {
+ panic("top.lastChain.leafCount != n")
+ }
+
+ bitCount := make([]int32, maxBits+1)
+ bits := 1
+ for chain := top.lastChain; chain.up != nil; chain = chain.up {
+ // chain.leafCount gives the number of literals requiring at least "bits"
+ // bits to encode.
+ bitCount[bits] = chain.leafCount - chain.up.leafCount
+ bits++
+ }
+ return bitCount
+}
+
+// Look at the leaves and assign them a bit count and an encoding as specified
+// in RFC 1951 3.2.2
+func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
+ code := uint16(0)
+ for n, bits := range bitCount {
+ code <<= 1
+ if n == 0 || bits == 0 {
+ continue
+ }
+ // The literals list[len(list)-bits] .. list[len(list)-bits]
+ // are encoded using "bits" bits, and get the values
+ // code, code + 1, .... The code values are
+ // assigned in literal order (not frequency order).
+ chunk := list[len(list)-int(bits):]
+ sortByLiteral(chunk)
+ for _, node := range chunk {
+ h.codeBits[node.literal] = uint8(n)
+ h.code[node.literal] = reverseBits(code, uint8(n))
+ code++
+ }
+ list = list[0 : len(list)-int(bits)]
+ }
+}
+
+// Update this Huffman Code object to be the minimum code for the specified frequency count.
+//
+// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
+// maxBits The maximum number of bits to use for any literal.
+func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
+ list := make([]literalNode, len(freq)+1)
+ // Number of non-zero literals
+ count := 0
+ // Set list to be the set of all non-zero literals and their frequencies
+ for i, f := range freq {
+ if f != 0 {
+ list[count] = literalNode{uint16(i), f}
+ count++
+ } else {
+ h.codeBits[i] = 0
+ }
+ }
+ // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
+ h.codeBits = h.codeBits[0:len(freq)]
+ list = list[0:count]
+ if count <= 2 {
+ // Handle the small cases here, because they are awkward for the general case code. With
+ // two or fewer literals, everything has bit length 1.
+ for i, node := range list {
+ // "list" is in order of increasing literal value.
+ h.codeBits[node.literal] = 1
+ h.code[node.literal] = uint16(i)
+ }
+ return
+ }
+ sortByFreq(list)
+
+ // Get the number of literals for each bit count
+ bitCount := h.bitCounts(list, maxBits)
+ // And do the assignment
+ h.assignEncodingAndSize(bitCount, list)
+}
+
+type literalNodeSorter struct {
+ a []literalNode
+ less func(i, j int) bool
+}
+
+func (s literalNodeSorter) Len() int { return len(s.a) }
+
+func (s literalNodeSorter) Less(i, j int) bool {
+ return s.less(i, j)
+}
+
+func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
+
+func sortByFreq(a []literalNode) {
+ s := &literalNodeSorter{a, func(i, j int) bool {
+ if a[i].freq == a[j].freq {
+ return a[i].literal < a[j].literal
+ }
+ return a[i].freq < a[j].freq
+ }}
+ sort.Sort(s)
+}
+
+func sortByLiteral(a []literalNode) {
+ s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
+ sort.Sort(s)
+}
diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go
new file mode 100644
index 000000000..3845f1204
--- /dev/null
+++ b/src/pkg/compress/flate/inflate.go
@@ -0,0 +1,708 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flate implements the DEFLATE compressed data format, described in
+// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
+// formats.
+package flate
+
+import (
+ "bufio"
+ "io"
+ "os"
+ "strconv"
+)
+
+const (
+ maxCodeLen = 16 // max length of Huffman code
+ maxHist = 32768 // max history required
+ maxLit = 286
+ maxDist = 32
+ numCodes = 19 // number of codes in Huffman meta-code
+)
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError int64
+
+func (e CorruptInputError) String() string {
+ return "flate: corrupt input before offset " + strconv.Itoa64(int64(e))
+}
+
+// An InternalError reports an error in the flate code itself.
+type InternalError string
+
+func (e InternalError) String() string { return "flate: internal error: " + string(e) }
+
+// A ReadError reports an error encountered while reading input.
+type ReadError struct {
+ Offset int64 // byte offset where error occurred
+ Error os.Error // error returned by underlying Read
+}
+
+func (e *ReadError) String() string {
+ return "flate: read error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String()
+}
+
+// A WriteError reports an error encountered while writing output.
+type WriteError struct {
+ Offset int64 // byte offset where error occurred
+ Error os.Error // error returned by underlying Write
+}
+
+func (e *WriteError) String() string {
+ return "flate: write error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String()
+}
+
+// Huffman decoder is based on
+// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
+// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
+type huffmanDecoder struct {
+ // min, max code length
+ min, max int
+
+ // limit[i] = largest code word of length i
+ // Given code v of length n,
+ // need more bits if v > limit[n].
+ limit [maxCodeLen + 1]int
+
+ // base[i] = smallest code word of length i - seq number
+ base [maxCodeLen + 1]int
+
+ // codes[seq number] = output code.
+ // Given code v of length n, value is
+ // codes[v - base[n]].
+ codes []int
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+func (h *huffmanDecoder) init(bits []int) bool {
+ // Count number of codes of each length,
+ // compute min and max length.
+ var count [maxCodeLen + 1]int
+ var min, max int
+ for _, n := range bits {
+ if n == 0 {
+ continue
+ }
+ if min == 0 || n < min {
+ min = n
+ }
+ if n > max {
+ max = n
+ }
+ count[n]++
+ }
+ if max == 0 {
+ return false
+ }
+
+ h.min = min
+ h.max = max
+
+ // For each code range, compute
+ // nextcode (first code of that length),
+ // limit (last code of that length), and
+ // base (offset from first code to sequence number).
+ code := 0
+ seq := 0
+ var nextcode [maxCodeLen]int
+ for i := min; i <= max; i++ {
+ n := count[i]
+ nextcode[i] = code
+ h.base[i] = code - seq
+ code += n
+ seq += n
+ h.limit[i] = code - 1
+ code <<= 1
+ }
+
+ // Make array mapping sequence numbers to codes.
+ if len(h.codes) < len(bits) {
+ h.codes = make([]int, len(bits))
+ }
+ for i, n := range bits {
+ if n == 0 {
+ continue
+ }
+ code := nextcode[n]
+ nextcode[n]++
+ seq := code - h.base[n]
+ h.codes[seq] = i
+ }
+ return true
+}
+
+// Hard-coded Huffman tables for DEFLATE algorithm.
+// See RFC 1951, section 3.2.6.
+var fixedHuffmanDecoder = huffmanDecoder{
+ 7, 9,
+ [maxCodeLen + 1]int{7: 23, 199, 511},
+ [maxCodeLen + 1]int{7: 0, 24, 224},
+ []int{
+ // length 7: 256-279
+ 256, 257, 258, 259, 260, 261, 262,
+ 263, 264, 265, 266, 267, 268, 269,
+ 270, 271, 272, 273, 274, 275, 276,
+ 277, 278, 279,
+
+ // length 8: 0-143
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+ 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
+ 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
+ 92, 93, 94, 95, 96, 97, 98, 99, 100,
+ 101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132,
+ 133, 134, 135, 136, 137, 138, 139, 140,
+ 141, 142, 143,
+
+ // length 8: 280-287
+ 280, 281, 282, 283, 284, 285, 286, 287,
+
+ // length 9: 144-255
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ },
+}
+
+// The actual read interface needed by NewReader.
+// If the passed in io.Reader does not also have ReadByte,
+// the NewReader will introduce its own buffering.
+type Reader interface {
+ io.Reader
+ ReadByte() (c byte, err os.Error)
+}
+
+// Decompress state.
+type decompressor struct {
+ // Input source.
+ r Reader
+ roffset int64
+ woffset int64
+
+ // Input bits, in top of b.
+ b uint32
+ nb uint
+
+ // Huffman decoders for literal/length, distance.
+ h1, h2 huffmanDecoder
+
+ // Length arrays used to define Huffman codes.
+ bits [maxLit + maxDist]int
+ codebits [numCodes]int
+
+ // Output history, buffer.
+ hist [maxHist]byte
+ hp int // current output position in buffer
+ hw int // have written hist[0:hw] already
+ hfull bool // buffer has filled at least once
+
+ // Temporary buffer (avoids repeated allocation).
+ buf [4]byte
+
+ // Next step in the decompression,
+ // and decompression state.
+ step func(*decompressor)
+ final bool
+ err os.Error
+ toRead []byte
+ hl, hd *huffmanDecoder
+ copyLen int
+ copyDist int
+}
+
+func (f *decompressor) nextBlock() {
+ if f.final {
+ if f.hw != f.hp {
+ f.flush((*decompressor).nextBlock)
+ return
+ }
+ f.err = os.EOF
+ return
+ }
+ for f.nb < 1+2 {
+ if f.err = f.moreBits(); f.err != nil {
+ return
+ }
+ }
+ f.final = f.b&1 == 1
+ f.b >>= 1
+ typ := f.b & 3
+ f.b >>= 2
+ f.nb -= 1 + 2
+ switch typ {
+ case 0:
+ f.dataBlock()
+ case 1:
+ // compressed, fixed Huffman tables
+ f.hl = &fixedHuffmanDecoder
+ f.hd = nil
+ f.huffmanBlock()
+ case 2:
+ // compressed, dynamic Huffman tables
+ if f.err = f.readHuffman(); f.err != nil {
+ break
+ }
+ f.hl = &f.h1
+ f.hd = &f.h2
+ f.huffmanBlock()
+ default:
+ // 3 is reserved.
+ f.err = CorruptInputError(f.roffset)
+ }
+}
+
+func (f *decompressor) Read(b []byte) (int, os.Error) {
+ for {
+ if len(f.toRead) > 0 {
+ n := copy(b, f.toRead)
+ f.toRead = f.toRead[n:]
+ return n, nil
+ }
+ if f.err != nil {
+ return 0, f.err
+ }
+ f.step(f)
+ }
+ panic("unreachable")
+}
+
+func (f *decompressor) Close() os.Error {
+ if f.err == os.EOF {
+ return nil
+ }
+ return f.err
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+func (f *decompressor) readHuffman() os.Error {
+ // HLIT[5], HDIST[5], HCLEN[4].
+ for f.nb < 5+5+4 {
+ if err := f.moreBits(); err != nil {
+ return err
+ }
+ }
+ nlit := int(f.b&0x1F) + 257
+ f.b >>= 5
+ ndist := int(f.b&0x1F) + 1
+ f.b >>= 5
+ nclen := int(f.b&0xF) + 4
+ f.b >>= 4
+ f.nb -= 5 + 5 + 4
+
+ // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
+ for i := 0; i < nclen; i++ {
+ for f.nb < 3 {
+ if err := f.moreBits(); err != nil {
+ return err
+ }
+ }
+ f.codebits[codeOrder[i]] = int(f.b & 0x7)
+ f.b >>= 3
+ f.nb -= 3
+ }
+ for i := nclen; i < len(codeOrder); i++ {
+ f.codebits[codeOrder[i]] = 0
+ }
+ if !f.h1.init(f.codebits[0:]) {
+ return CorruptInputError(f.roffset)
+ }
+
+ // HLIT + 257 code lengths, HDIST + 1 code lengths,
+ // using the code length Huffman code.
+ for i, n := 0, nlit+ndist; i < n; {
+ x, err := f.huffSym(&f.h1)
+ if err != nil {
+ return err
+ }
+ if x < 16 {
+ // Actual length.
+ f.bits[i] = x
+ i++
+ continue
+ }
+ // Repeat previous length or zero.
+ var rep int
+ var nb uint
+ var b int
+ switch x {
+ default:
+ return InternalError("unexpected length code")
+ case 16:
+ rep = 3
+ nb = 2
+ if i == 0 {
+ return CorruptInputError(f.roffset)
+ }
+ b = f.bits[i-1]
+ case 17:
+ rep = 3
+ nb = 3
+ b = 0
+ case 18:
+ rep = 11
+ nb = 7
+ b = 0
+ }
+ for f.nb < nb {
+ if err := f.moreBits(); err != nil {
+ return err
+ }
+ }
+ rep += int(f.b & uint32(1<<nb-1))
+ f.b >>= nb
+ f.nb -= nb
+ if i+rep > n {
+ return CorruptInputError(f.roffset)
+ }
+ for j := 0; j < rep; j++ {
+ f.bits[i] = b
+ i++
+ }
+ }
+
+ if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+ return CorruptInputError(f.roffset)
+ }
+
+ return nil
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBlock() {
+ for {
+ v, err := f.huffSym(f.hl)
+ if err != nil {
+ f.err = err
+ return
+ }
+ var n uint // number of bits extra
+ var length int
+ switch {
+ case v < 256:
+ f.hist[f.hp] = byte(v)
+ f.hp++
+ if f.hp == len(f.hist) {
+ // After the flush, continue this loop.
+ f.flush((*decompressor).huffmanBlock)
+ return
+ }
+ continue
+ case v == 256:
+ // Done with huffman block; read next block.
+ f.step = (*decompressor).nextBlock
+ return
+ // otherwise, reference to older data
+ case v < 265:
+ length = v - (257 - 3)
+ n = 0
+ case v < 269:
+ length = v*2 - (265*2 - 11)
+ n = 1
+ case v < 273:
+ length = v*4 - (269*4 - 19)
+ n = 2
+ case v < 277:
+ length = v*8 - (273*8 - 35)
+ n = 3
+ case v < 281:
+ length = v*16 - (277*16 - 67)
+ n = 4
+ case v < 285:
+ length = v*32 - (281*32 - 131)
+ n = 5
+ default:
+ length = 258
+ n = 0
+ }
+ if n > 0 {
+ for f.nb < n {
+ if err = f.moreBits(); err != nil {
+ f.err = err
+ return
+ }
+ }
+ length += int(f.b & uint32(1<<n-1))
+ f.b >>= n
+ f.nb -= n
+ }
+
+ var dist int
+ if f.hd == nil {
+ for f.nb < 5 {
+ if err = f.moreBits(); err != nil {
+ f.err = err
+ return
+ }
+ }
+ dist = int(reverseByte[(f.b&0x1F)<<3])
+ f.b >>= 5
+ f.nb -= 5
+ } else {
+ if dist, err = f.huffSym(f.hd); err != nil {
+ f.err = err
+ return
+ }
+ }
+
+ switch {
+ case dist < 4:
+ dist++
+ case dist >= 30:
+ f.err = CorruptInputError(f.roffset)
+ return
+ default:
+ nb := uint(dist-2) >> 1
+ // have 1 bit in bottom of dist, need nb more.
+ extra := (dist & 1) << nb
+ for f.nb < nb {
+ if err = f.moreBits(); err != nil {
+ f.err = err
+ return
+ }
+ }
+ extra |= int(f.b & uint32(1<<nb-1))
+ f.b >>= nb
+ f.nb -= nb
+ dist = 1<<(nb+1) + 1 + extra
+ }
+
+ // Copy history[-dist:-dist+length] into output.
+ if dist > len(f.hist) {
+ f.err = InternalError("bad history distance")
+ return
+ }
+
+ // No check on length; encoding can be prescient.
+ if !f.hfull && dist > f.hp {
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ p := f.hp - dist
+ if p < 0 {
+ p += len(f.hist)
+ }
+ for i := 0; i < length; i++ {
+ f.hist[f.hp] = f.hist[p]
+ f.hp++
+ p++
+ if f.hp == len(f.hist) {
+ // After flush continue copying out of history.
+ f.copyLen = length - (i + 1)
+ f.copyDist = dist
+ f.flush((*decompressor).copyHuff)
+ return
+ }
+ if p == len(f.hist) {
+ p = 0
+ }
+ }
+ }
+ panic("unreached")
+}
+
+func (f *decompressor) copyHuff() {
+ length := f.copyLen
+ dist := f.copyDist
+ p := f.hp - dist
+ if p < 0 {
+ p += len(f.hist)
+ }
+ for i := 0; i < length; i++ {
+ f.hist[f.hp] = f.hist[p]
+ f.hp++
+ p++
+ if f.hp == len(f.hist) {
+ f.copyLen = length - (i + 1)
+ f.flush((*decompressor).copyHuff)
+ return
+ }
+ if p == len(f.hist) {
+ p = 0
+ }
+ }
+
+ // Continue processing Huffman block.
+ f.huffmanBlock()
+}
+
+// Copy a single uncompressed data block from input to output.
+func (f *decompressor) dataBlock() {
+ // Uncompressed.
+ // Discard current half-byte.
+ f.nb = 0
+ f.b = 0
+
+ // Length then ones-complement of length.
+ nr, err := io.ReadFull(f.r, f.buf[0:4])
+ f.roffset += int64(nr)
+ if err != nil {
+ f.err = &ReadError{f.roffset, err}
+ return
+ }
+ n := int(f.buf[0]) | int(f.buf[1])<<8
+ nn := int(f.buf[2]) | int(f.buf[3])<<8
+ if uint16(nn) != uint16(^n) {
+ f.err = CorruptInputError(f.roffset)
+ return
+ }
+
+ if n == 0 {
+ // 0-length block means sync
+ f.flush((*decompressor).nextBlock)
+ return
+ }
+
+ f.copyLen = n
+ f.copyData()
+}
+
+func (f *decompressor) copyData() {
+ // Read f.dataLen bytes into history,
+ // pausing for reads as history fills.
+ n := f.copyLen
+ for n > 0 {
+ m := len(f.hist) - f.hp
+ if m > n {
+ m = n
+ }
+ m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
+ f.roffset += int64(m)
+ if err != nil {
+ f.err = &ReadError{f.roffset, err}
+ return
+ }
+ n -= m
+ f.hp += m
+ if f.hp == len(f.hist) {
+ f.copyLen = n
+ f.flush((*decompressor).copyData)
+ return
+ }
+ }
+ f.step = (*decompressor).nextBlock
+}
+
+func (f *decompressor) setDict(dict []byte) {
+ if len(dict) > len(f.hist) {
+ // Will only remember the tail.
+ dict = dict[len(dict)-len(f.hist):]
+ }
+
+ f.hp = copy(f.hist[:], dict)
+ if f.hp == len(f.hist) {
+ f.hp = 0
+ f.hfull = true
+ }
+ f.hw = f.hp
+}
+
+func (f *decompressor) moreBits() os.Error {
+ c, err := f.r.ReadByte()
+ if err != nil {
+ if err == os.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ return err
+ }
+ f.roffset++
+ f.b |= uint32(c) << f.nb
+ f.nb += 8
+ return nil
+}
+
+// Read the next Huffman-encoded symbol from f according to h.
+func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) {
+ for n := uint(h.min); n <= uint(h.max); n++ {
+ lim := h.limit[n]
+ if lim == -1 {
+ continue
+ }
+ for f.nb < n {
+ if err := f.moreBits(); err != nil {
+ return 0, err
+ }
+ }
+ v := int(f.b & uint32(1<<n-1))
+ v <<= 16 - n
+ v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
+ if v <= lim {
+ f.b >>= n
+ f.nb -= n
+ return h.codes[v-h.base[n]], nil
+ }
+ }
+ return 0, CorruptInputError(f.roffset)
+}
+
+// Flush any buffered output to the underlying writer.
+func (f *decompressor) flush(step func(*decompressor)) {
+ f.toRead = f.hist[f.hw:f.hp]
+ f.woffset += int64(f.hp - f.hw)
+ f.hw = f.hp
+ if f.hp == len(f.hist) {
+ f.hp = 0
+ f.hw = 0
+ f.hfull = true
+ }
+ f.step = step
+}
+
+func makeReader(r io.Reader) Reader {
+ if rr, ok := r.(Reader); ok {
+ return rr
+ }
+ return bufio.NewReader(r)
+}
+
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r. It is the caller's
+// responsibility to call Close on the ReadCloser when
+// finished reading.
+func NewReader(r io.Reader) io.ReadCloser {
+ var f decompressor
+ f.r = makeReader(r)
+ f.step = (*decompressor).nextBlock
+ return &f
+}
+
+// NewReaderDict is like NewReader but initializes the reader
+// with a preset dictionary. The returned Reader behaves as if
+// the uncompressed data stream started with the given dictionary,
+// which has already been read. NewReaderDict is typically used
+// to read data compressed by NewWriterDict.
+func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
+ var f decompressor
+ f.setDict(dict)
+ f.r = makeReader(r)
+ f.step = (*decompressor).nextBlock
+ return &f
+}
diff --git a/src/pkg/compress/flate/reverse_bits.go b/src/pkg/compress/flate/reverse_bits.go
new file mode 100644
index 000000000..c1a02720d
--- /dev/null
+++ b/src/pkg/compress/flate/reverse_bits.go
@@ -0,0 +1,48 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+var reverseByte = [256]byte{
+ 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+ 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+ 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+ 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+ 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+ 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+ 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+ 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+ 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+ 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+ 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+ 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+ 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+ 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+ 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+ 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+ 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+ 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+ 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+ 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+ 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+ 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+ 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+ 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+ 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+ 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+ 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+ 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+ 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+ 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+ 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+ 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
+}
+
+func reverseUint16(v uint16) uint16 {
+ return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8
+}
+
+func reverseBits(number uint16, bitLength byte) uint16 {
+ return reverseUint16(number << uint8(16-bitLength))
+}
diff --git a/src/pkg/compress/flate/token.go b/src/pkg/compress/flate/token.go
new file mode 100644
index 000000000..38aea5fa6
--- /dev/null
+++ b/src/pkg/compress/flate/token.go
@@ -0,0 +1,103 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+const (
+ // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
+ // 8 bits: xlength = length - MIN_MATCH_LENGTH
+ // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
+ lengthShift = 22
+ offsetMask = 1<<lengthShift - 1
+ typeMask = 3 << 30
+ literalType = 0 << 30
+ matchType = 1 << 30
+)
+
+// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
+// is lengthCodes[length - MIN_MATCH_LENGTH]
+var lengthCodes = [...]uint32{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+ 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
+ 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+ 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+ 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+ 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 28,
+}
+
+var offsetCodes = [...]uint32{
+ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+}
+
+type token uint32
+
+// Convert a literal into a literal token.
+func literalToken(literal uint32) token { return token(literalType + literal) }
+
+// Convert a < xlength, xoffset > pair into a match token.
+func matchToken(xlength uint32, xoffset uint32) token {
+ return token(matchType + xlength<<lengthShift + xoffset)
+}
+
+// Returns the type of a token
+func (t token) typ() uint32 { return uint32(t) & typeMask }
+
+// Returns the literal of a literal token
+func (t token) literal() uint32 { return uint32(t - literalType) }
+
+// Returns the extra offset of a match token
+func (t token) offset() uint32 { return uint32(t) & offsetMask }
+
+func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
+
+func lengthCode(len uint32) uint32 { return lengthCodes[len] }
+
+// Returns the offset code corresponding to a specific offset
+func offsetCode(off uint32) uint32 {
+ const n = uint32(len(offsetCodes))
+ switch {
+ case off < n:
+ return offsetCodes[off]
+ case off>>7 < n:
+ return offsetCodes[off>>7] + 14
+ default:
+ return offsetCodes[off>>14] + 28
+ }
+ panic("unreachable")
+}
diff --git a/src/pkg/compress/flate/util.go b/src/pkg/compress/flate/util.go
new file mode 100644
index 000000000..aca5c78b2
--- /dev/null
+++ b/src/pkg/compress/flate/util.go
@@ -0,0 +1,72 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flate
+
+func min(left int, right int) int {
+ if left < right {
+ return left
+ }
+ return right
+}
+
+func minInt32(left int32, right int32) int32 {
+ if left < right {
+ return left
+ }
+ return right
+}
+
+func max(left int, right int) int {
+ if left > right {
+ return left
+ }
+ return right
+}
+
+func fillInts(a []int, value int) {
+ for i := range a {
+ a[i] = value
+ }
+}
+
+func fillInt32s(a []int32, value int32) {
+ for i := range a {
+ a[i] = value
+ }
+}
+
+func fillBytes(a []byte, value byte) {
+ for i := range a {
+ a[i] = value
+ }
+}
+
+func fillInt8s(a []int8, value int8) {
+ for i := range a {
+ a[i] = value
+ }
+}
+
+func fillUint8s(a []uint8, value uint8) {
+ for i := range a {
+ a[i] = value
+ }
+}
+
+func copyInt8s(dst []int8, src []int8) int {
+ cnt := min(len(dst), len(src))
+ for i := 0; i < cnt; i++ {
+ dst[i] = src[i]
+ }
+ return cnt
+}
+
+func copyUint8s(dst []uint8, src []uint8) int {
+ cnt := min(len(dst), len(src))
+ for i := 0; i < cnt; i++ {
+ dst[i] = src[i]
+ }
+ return cnt
+}
diff --git a/src/pkg/compress/gzip/Makefile b/src/pkg/compress/gzip/Makefile
new file mode 100644
index 000000000..b671fc72c
--- /dev/null
+++ b/src/pkg/compress/gzip/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/gzip
+GOFILES=\
+ gunzip.go\
+ gzip.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/gzip/gunzip.go b/src/pkg/compress/gzip/gunzip.go
new file mode 100644
index 000000000..6ac9293d7
--- /dev/null
+++ b/src/pkg/compress/gzip/gunzip.go
@@ -0,0 +1,230 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gzip implements reading and writing of gzip format compressed files,
+// as specified in RFC 1952.
+package gzip
+
+import (
+ "bufio"
+ "compress/flate"
+ "hash"
+ "hash/crc32"
+ "io"
+ "os"
+)
+
+// BUG(nigeltao): Comments and Names don't properly map UTF-8 character codes outside of
+// the 0x00-0x7f range to ISO 8859-1 (Latin-1).
+
+const (
+ gzipID1 = 0x1f
+ gzipID2 = 0x8b
+ gzipDeflate = 8
+ flagText = 1 << 0
+ flagHdrCrc = 1 << 1
+ flagExtra = 1 << 2
+ flagName = 1 << 3
+ flagComment = 1 << 4
+)
+
+func makeReader(r io.Reader) flate.Reader {
+ if rr, ok := r.(flate.Reader); ok {
+ return rr
+ }
+ return bufio.NewReader(r)
+}
+
+var HeaderError = os.NewError("invalid gzip header")
+var ChecksumError = os.NewError("gzip checksum error")
+
+// The gzip file stores a header giving metadata about the compressed file.
+// That header is exposed as the fields of the Compressor and Decompressor structs.
+type Header struct {
+ Comment string // comment
+ Extra []byte // "extra data"
+ Mtime uint32 // modification time (seconds since January 1, 1970)
+ Name string // file name
+ OS byte // operating system type
+}
+
+// An Decompressor is an io.Reader that can be read to retrieve
+// uncompressed data from a gzip-format compressed file.
+//
+// In general, a gzip file can be a concatenation of gzip files,
+// each with its own header. Reads from the Decompressor
+// return the concatenation of the uncompressed data of each.
+// Only the first header is recorded in the Decompressor fields.
+//
+// Gzip files store a length and checksum of the uncompressed data.
+// The Decompressor will return a ChecksumError when Read
+// reaches the end of the uncompressed data if it does not
+// have the expected length or checksum. Clients should treat data
+// returned by Read as tentative until they receive the successful
+// (zero length, nil error) Read marking the end of the data.
+type Decompressor struct {
+ Header
+ r flate.Reader
+ decompressor io.ReadCloser
+ digest hash.Hash32
+ size uint32
+ flg byte
+ buf [512]byte
+ err os.Error
+}
+
+// NewReader creates a new Decompressor reading the given reader.
+// The implementation buffers input and may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the Decompressor when done.
+func NewReader(r io.Reader) (*Decompressor, os.Error) {
+ z := new(Decompressor)
+ z.r = makeReader(r)
+ z.digest = crc32.NewIEEE()
+ if err := z.readHeader(true); err != nil {
+ z.err = err
+ return nil, err
+ }
+ return z, nil
+}
+
+// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
+func get4(p []byte) uint32 {
+ return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+}
+
+func (z *Decompressor) readString() (string, os.Error) {
+ var err os.Error
+ for i := 0; ; i++ {
+ if i >= len(z.buf) {
+ return "", HeaderError
+ }
+ z.buf[i], err = z.r.ReadByte()
+ if err != nil {
+ return "", err
+ }
+ if z.buf[i] == 0 {
+ // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+ // TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8.
+ return string(z.buf[0:i]), nil
+ }
+ }
+ panic("not reached")
+}
+
+func (z *Decompressor) read2() (uint32, os.Error) {
+ _, err := io.ReadFull(z.r, z.buf[0:2])
+ if err != nil {
+ return 0, err
+ }
+ return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
+}
+
+func (z *Decompressor) readHeader(save bool) os.Error {
+ _, err := io.ReadFull(z.r, z.buf[0:10])
+ if err != nil {
+ return err
+ }
+ if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
+ return HeaderError
+ }
+ z.flg = z.buf[3]
+ if save {
+ z.Mtime = get4(z.buf[4:8])
+ // z.buf[8] is xfl, ignored
+ z.OS = z.buf[9]
+ }
+ z.digest.Reset()
+ z.digest.Write(z.buf[0:10])
+
+ if z.flg&flagExtra != 0 {
+ n, err := z.read2()
+ if err != nil {
+ return err
+ }
+ data := make([]byte, n)
+ if _, err = io.ReadFull(z.r, data); err != nil {
+ return err
+ }
+ if save {
+ z.Extra = data
+ }
+ }
+
+ var s string
+ if z.flg&flagName != 0 {
+ if s, err = z.readString(); err != nil {
+ return err
+ }
+ if save {
+ z.Name = s
+ }
+ }
+
+ if z.flg&flagComment != 0 {
+ if s, err = z.readString(); err != nil {
+ return err
+ }
+ if save {
+ z.Comment = s
+ }
+ }
+
+ if z.flg&flagHdrCrc != 0 {
+ n, err := z.read2()
+ if err != nil {
+ return err
+ }
+ sum := z.digest.Sum32() & 0xFFFF
+ if n != sum {
+ return HeaderError
+ }
+ }
+
+ z.digest.Reset()
+ z.decompressor = flate.NewReader(z.r)
+ return nil
+}
+
+func (z *Decompressor) Read(p []byte) (n int, err os.Error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+
+ n, err = z.decompressor.Read(p)
+ z.digest.Write(p[0:n])
+ z.size += uint32(n)
+ if n != 0 || err != os.EOF {
+ z.err = err
+ return
+ }
+
+ // Finished file; check checksum + size.
+ if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
+ z.err = err
+ return 0, err
+ }
+ crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
+ sum := z.digest.Sum32()
+ if sum != crc32 || isize != z.size {
+ z.err = ChecksumError
+ return 0, z.err
+ }
+
+ // File is ok; is there another?
+ if err = z.readHeader(false); err != nil {
+ z.err = err
+ return
+ }
+
+ // Yes. Reset and read from it.
+ z.digest.Reset()
+ z.size = 0
+ return z.Read(p)
+}
+
+// Calling Close does not close the wrapped io.Reader originally passed to NewReader.
+func (z *Decompressor) Close() os.Error { return z.decompressor.Close() }
diff --git a/src/pkg/compress/gzip/gunzip_test.go b/src/pkg/compress/gzip/gunzip_test.go
new file mode 100644
index 000000000..1c08c7374
--- /dev/null
+++ b/src/pkg/compress/gzip/gunzip_test.go
@@ -0,0 +1,305 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "bytes"
+ "io"
+ "os"
+ "testing"
+)
+
+type gunzipTest struct {
+ name string
+ desc string
+ raw string
+ gzip []byte
+ err os.Error
+}
+
+var gunzipTests = []gunzipTest{
+ { // has 1 empty fixed-huffman block
+ "empty.txt",
+ "empty.txt",
+ "",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xf7, 0x5e, 0x14, 0x4a,
+ 0x00, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0x03, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ },
+ nil,
+ },
+ { // has 1 non-empty fixed huffman block
+ "hello.txt",
+ "hello.txt",
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
+ 0x00, 0x00,
+ },
+ nil,
+ },
+ { // concatenation
+ "hello.txt",
+ "hello.txt x2",
+ "hello world\n" +
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
+ 0x00, 0x00,
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
+ 0x00, 0x00,
+ },
+ nil,
+ },
+ { // has a fixed huffman block with some length-distance pairs
+ "shesells.txt",
+ "shesells.txt",
+ "she sells seashells by the seashore\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x66, 0x8b, 0x4a,
+ 0x00, 0x03, 0x73, 0x68, 0x65, 0x73, 0x65, 0x6c,
+ 0x6c, 0x73, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x2b,
+ 0xce, 0x48, 0x55, 0x28, 0x4e, 0xcd, 0xc9, 0x29,
+ 0x06, 0x92, 0x89, 0xc5, 0x19, 0x60, 0x56, 0x52,
+ 0xa5, 0x42, 0x09, 0x58, 0x18, 0x28, 0x90, 0x5f,
+ 0x94, 0xca, 0x05, 0x00, 0x76, 0xb0, 0x3b, 0xeb,
+ 0x24, 0x00, 0x00, 0x00,
+ },
+ nil,
+ },
+ { // has dynamic huffman blocks
+ "gettysburg",
+ "gettysburg",
+ " Four score and seven years ago our fathers brought forth on\n" +
+ "this continent, a new nation, conceived in Liberty, and dedicated\n" +
+ "to the proposition that all men are created equal.\n" +
+ " Now we are engaged in a great Civil War, testing whether that\n" +
+ "nation, or any nation so conceived and so dedicated, can long\n" +
+ "endure.\n" +
+ " We are met on a great battle-field of that war.\n" +
+ " We have come to dedicate a portion of that field, as a final\n" +
+ "resting place for those who here gave their lives that that\n" +
+ "nation might live. It is altogether fitting and proper that\n" +
+ "we should do this.\n" +
+ " But, in a larger sense, we can not dedicate — we can not\n" +
+ "consecrate — we can not hallow — this ground.\n" +
+ " The brave men, living and dead, who struggled here, have\n" +
+ "consecrated it, far above our poor power to add or detract.\n" +
+ "The world will little note, nor long remember what we say here,\n" +
+ "but it can never forget what they did here.\n" +
+ " It is for us the living, rather, to be dedicated here to the\n" +
+ "unfinished work which they who fought here have thus far so\n" +
+ "nobly advanced. It is rather for us to be here dedicated to\n" +
+ "the great task remaining before us — that from these honored\n" +
+ "dead we take increased devotion to that cause for which they\n" +
+ "gave the last full measure of devotion —\n" +
+ " that we here highly resolve that these dead shall not have\n" +
+ "died in vain — that this nation, under God, shall have a new\n" +
+ "birth of freedom — and that government of the people, by the\n" +
+ "people, for the people, shall not perish from this earth.\n" +
+ "\n" +
+ "Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xd1, 0x12, 0x2b, 0x4a,
+ 0x00, 0x03, 0x67, 0x65, 0x74, 0x74, 0x79, 0x73,
+ 0x62, 0x75, 0x72, 0x67, 0x00, 0x65, 0x54, 0xcd,
+ 0x6e, 0xd4, 0x30, 0x10, 0xbe, 0xfb, 0x29, 0xe6,
+ 0x01, 0x42, 0xa5, 0x0a, 0x09, 0xc1, 0x11, 0x90,
+ 0x40, 0x48, 0xa8, 0xe2, 0x80, 0xd4, 0xf3, 0x24,
+ 0x9e, 0x24, 0x56, 0xbd, 0x9e, 0xc5, 0x76, 0x76,
+ 0x95, 0x1b, 0x0f, 0xc1, 0x13, 0xf2, 0x24, 0x7c,
+ 0x63, 0x77, 0x9b, 0x4a, 0x5c, 0xaa, 0x6e, 0x6c,
+ 0xcf, 0x7c, 0x7f, 0x33, 0x44, 0x5f, 0x74, 0xcb,
+ 0x54, 0x26, 0xcd, 0x42, 0x9c, 0x3c, 0x15, 0xb9,
+ 0x48, 0xa2, 0x5d, 0x38, 0x17, 0xe2, 0x45, 0xc9,
+ 0x4e, 0x67, 0xae, 0xab, 0xe0, 0xf7, 0x98, 0x75,
+ 0x5b, 0xd6, 0x4a, 0xb3, 0xe6, 0xba, 0x92, 0x26,
+ 0x57, 0xd7, 0x50, 0x68, 0xd2, 0x54, 0x43, 0x92,
+ 0x54, 0x07, 0x62, 0x4a, 0x72, 0xa5, 0xc4, 0x35,
+ 0x68, 0x1a, 0xec, 0x60, 0x92, 0x70, 0x11, 0x4f,
+ 0x21, 0xd1, 0xf7, 0x30, 0x4a, 0xae, 0xfb, 0xd0,
+ 0x9a, 0x78, 0xf1, 0x61, 0xe2, 0x2a, 0xde, 0x55,
+ 0x25, 0xd4, 0xa6, 0x73, 0xd6, 0xb3, 0x96, 0x60,
+ 0xef, 0xf0, 0x9b, 0x2b, 0x71, 0x8c, 0x74, 0x02,
+ 0x10, 0x06, 0xac, 0x29, 0x8b, 0xdd, 0x25, 0xf9,
+ 0xb5, 0x71, 0xbc, 0x73, 0x44, 0x0f, 0x7a, 0xa5,
+ 0xab, 0xb4, 0x33, 0x49, 0x0b, 0x2f, 0xbd, 0x03,
+ 0xd3, 0x62, 0x17, 0xe9, 0x73, 0xb8, 0x84, 0x48,
+ 0x8f, 0x9c, 0x07, 0xaa, 0x52, 0x00, 0x6d, 0xa1,
+ 0xeb, 0x2a, 0xc6, 0xa0, 0x95, 0x76, 0x37, 0x78,
+ 0x9a, 0x81, 0x65, 0x7f, 0x46, 0x4b, 0x45, 0x5f,
+ 0xe1, 0x6d, 0x42, 0xe8, 0x01, 0x13, 0x5c, 0x38,
+ 0x51, 0xd4, 0xb4, 0x38, 0x49, 0x7e, 0xcb, 0x62,
+ 0x28, 0x1e, 0x3b, 0x82, 0x93, 0x54, 0x48, 0xf1,
+ 0xd2, 0x7d, 0xe4, 0x5a, 0xa3, 0xbc, 0x99, 0x83,
+ 0x44, 0x4f, 0x3a, 0x77, 0x36, 0x57, 0xce, 0xcf,
+ 0x2f, 0x56, 0xbe, 0x80, 0x90, 0x9e, 0x84, 0xea,
+ 0x51, 0x1f, 0x8f, 0xcf, 0x90, 0xd4, 0x60, 0xdc,
+ 0x5e, 0xb4, 0xf7, 0x10, 0x0b, 0x26, 0xe0, 0xff,
+ 0xc4, 0xd1, 0xe5, 0x67, 0x2e, 0xe7, 0xc8, 0x93,
+ 0x98, 0x05, 0xb8, 0xa8, 0x45, 0xc0, 0x4d, 0x09,
+ 0xdc, 0x84, 0x16, 0x2b, 0x0d, 0x9a, 0x21, 0x53,
+ 0x04, 0x8b, 0xd2, 0x0b, 0xbd, 0xa2, 0x4c, 0xa7,
+ 0x60, 0xee, 0xd9, 0xe1, 0x1d, 0xd1, 0xb7, 0x4a,
+ 0x30, 0x8f, 0x63, 0xd5, 0xa5, 0x8b, 0x33, 0x87,
+ 0xda, 0x1a, 0x18, 0x79, 0xf3, 0xe3, 0xa6, 0x17,
+ 0x94, 0x2e, 0xab, 0x6e, 0xa0, 0xe3, 0xcd, 0xac,
+ 0x50, 0x8c, 0xca, 0xa7, 0x0d, 0x76, 0x37, 0xd1,
+ 0x23, 0xe7, 0x05, 0x57, 0x8b, 0xa4, 0x22, 0x83,
+ 0xd9, 0x62, 0x52, 0x25, 0xad, 0x07, 0xbb, 0xbf,
+ 0xbf, 0xff, 0xbc, 0xfa, 0xee, 0x20, 0x73, 0x91,
+ 0x29, 0xff, 0x7f, 0x02, 0x71, 0x62, 0x84, 0xb5,
+ 0xf6, 0xb5, 0x25, 0x6b, 0x41, 0xde, 0x92, 0xb7,
+ 0x76, 0x3f, 0x91, 0x91, 0x31, 0x1b, 0x41, 0x84,
+ 0x62, 0x30, 0x0a, 0x37, 0xa4, 0x5e, 0x18, 0x3a,
+ 0x99, 0x08, 0xa5, 0xe6, 0x6d, 0x59, 0x22, 0xec,
+ 0x33, 0x39, 0x86, 0x26, 0xf5, 0xab, 0x66, 0xc8,
+ 0x08, 0x20, 0xcf, 0x0c, 0xd7, 0x47, 0x45, 0x21,
+ 0x0b, 0xf6, 0x59, 0xd5, 0xfe, 0x5c, 0x8d, 0xaa,
+ 0x12, 0x7b, 0x6f, 0xa1, 0xf0, 0x52, 0x33, 0x4f,
+ 0xf5, 0xce, 0x59, 0xd3, 0xab, 0x66, 0x10, 0xbf,
+ 0x06, 0xc4, 0x31, 0x06, 0x73, 0xd6, 0x80, 0xa2,
+ 0x78, 0xc2, 0x45, 0xcb, 0x03, 0x65, 0x39, 0xc9,
+ 0x09, 0xd1, 0x06, 0x04, 0x33, 0x1a, 0x5a, 0xf1,
+ 0xde, 0x01, 0xb8, 0x71, 0x83, 0xc4, 0xb5, 0xb3,
+ 0xc3, 0x54, 0x65, 0x33, 0x0d, 0x5a, 0xf7, 0x9b,
+ 0x90, 0x7c, 0x27, 0x1f, 0x3a, 0x58, 0xa3, 0xd8,
+ 0xfd, 0x30, 0x5f, 0xb7, 0xd2, 0x66, 0xa2, 0x93,
+ 0x1c, 0x28, 0xb7, 0xe9, 0x1b, 0x0c, 0xe1, 0x28,
+ 0x47, 0x26, 0xbb, 0xe9, 0x7d, 0x7e, 0xdc, 0x96,
+ 0x10, 0x92, 0x50, 0x56, 0x7c, 0x06, 0xe2, 0x27,
+ 0xb4, 0x08, 0xd3, 0xda, 0x7b, 0x98, 0x34, 0x73,
+ 0x9f, 0xdb, 0xf6, 0x62, 0xed, 0x31, 0x41, 0x13,
+ 0xd3, 0xa2, 0xa8, 0x4b, 0x3a, 0xc6, 0x1d, 0xe4,
+ 0x2f, 0x8c, 0xf8, 0xfb, 0x97, 0x64, 0xf4, 0xb6,
+ 0x2f, 0x80, 0x5a, 0xf3, 0x56, 0xe0, 0x40, 0x50,
+ 0xd5, 0x19, 0xd0, 0x1e, 0xfc, 0xca, 0xe5, 0xc9,
+ 0xd4, 0x60, 0x00, 0x81, 0x2e, 0xa3, 0xcc, 0xb6,
+ 0x52, 0xf0, 0xb4, 0xdb, 0x69, 0x99, 0xce, 0x7a,
+ 0x32, 0x4c, 0x08, 0xed, 0xaa, 0x10, 0x10, 0xe3,
+ 0x6f, 0xee, 0x99, 0x68, 0x95, 0x9f, 0x04, 0x71,
+ 0xb2, 0x49, 0x2f, 0x62, 0xa6, 0x5e, 0xb4, 0xef,
+ 0x02, 0xed, 0x4f, 0x27, 0xde, 0x4a, 0x0f, 0xfd,
+ 0xc1, 0xcc, 0xdd, 0x02, 0x8f, 0x08, 0x16, 0x54,
+ 0xdf, 0xda, 0xca, 0xe0, 0x82, 0xf1, 0xb4, 0x31,
+ 0x7a, 0xa9, 0x81, 0xfe, 0x90, 0xb7, 0x3e, 0xdb,
+ 0xd3, 0x35, 0xc0, 0x20, 0x80, 0x33, 0x46, 0x4a,
+ 0x63, 0xab, 0xd1, 0x0d, 0x29, 0xd2, 0xe2, 0x84,
+ 0xb8, 0xdb, 0xfa, 0xe9, 0x89, 0x44, 0x86, 0x7c,
+ 0xe8, 0x0b, 0xe6, 0x02, 0x6a, 0x07, 0x9b, 0x96,
+ 0xd0, 0xdb, 0x2e, 0x41, 0x4c, 0xa1, 0xd5, 0x57,
+ 0x45, 0x14, 0xfb, 0xe3, 0xa6, 0x72, 0x5b, 0x87,
+ 0x6e, 0x0c, 0x6d, 0x5b, 0xce, 0xe0, 0x2f, 0xe2,
+ 0x21, 0x81, 0x95, 0xb0, 0xe8, 0xb6, 0x32, 0x0b,
+ 0xb2, 0x98, 0x13, 0x52, 0x5d, 0xfb, 0xec, 0x63,
+ 0x17, 0x8a, 0x9e, 0x23, 0x22, 0x36, 0xee, 0xcd,
+ 0xda, 0xdb, 0xcf, 0x3e, 0xf1, 0xc7, 0xf1, 0x01,
+ 0x12, 0x93, 0x0a, 0xeb, 0x6f, 0xf2, 0x02, 0x15,
+ 0x96, 0x77, 0x5d, 0xef, 0x9c, 0xfb, 0x88, 0x91,
+ 0x59, 0xf9, 0x84, 0xdd, 0x9b, 0x26, 0x8d, 0x80,
+ 0xf9, 0x80, 0x66, 0x2d, 0xac, 0xf7, 0x1f, 0x06,
+ 0xba, 0x7f, 0xff, 0xee, 0xed, 0x40, 0x5f, 0xa5,
+ 0xd6, 0xbd, 0x8c, 0x5b, 0x46, 0xd2, 0x7e, 0x48,
+ 0x4a, 0x65, 0x8f, 0x08, 0x42, 0x60, 0xf7, 0x0f,
+ 0xb9, 0x16, 0x0b, 0x0c, 0x1a, 0x06, 0x00, 0x00,
+ },
+ nil,
+ },
+ { // has 1 non-empty fixed huffman block then garbage
+ "hello.txt",
+ "hello.txt + garbage",
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
+ 0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!',
+ },
+ HeaderError,
+ },
+ { // has 1 non-empty fixed huffman block not enough header
+ "hello.txt",
+ "hello.txt + garbage",
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
+ 0x00, 0x00, gzipID1,
+ },
+ io.ErrUnexpectedEOF,
+ },
+ { // has 1 non-empty fixed huffman block but corrupt checksum
+ "hello.txt",
+ "hello.txt + corrupt checksum",
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00,
+ 0x00, 0x00,
+ },
+ ChecksumError,
+ },
+ { // has 1 non-empty fixed huffman block but corrupt size
+ "hello.txt",
+ "hello.txt + corrupt size",
+ "hello world\n",
+ []byte{
+ 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a,
+ 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9,
+ 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00,
+ 0x00, 0x00,
+ },
+ ChecksumError,
+ },
+}
+
+func TestDecompressor(t *testing.T) {
+ b := new(bytes.Buffer)
+ for _, tt := range gunzipTests {
+ in := bytes.NewBuffer(tt.gzip)
+ gzip, err := NewReader(in)
+ if err != nil {
+ t.Errorf("%s: NewReader: %s", tt.name, err)
+ continue
+ }
+ defer gzip.Close()
+ if tt.name != gzip.Name {
+ t.Errorf("%s: got name %s", tt.name, gzip.Name)
+ }
+ b.Reset()
+ n, err := io.Copy(b, gzip)
+ if err != tt.err {
+ t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err)
+ }
+ s := b.String()
+ if s != tt.raw {
+ t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw)
+ }
+ }
+}
diff --git a/src/pkg/compress/gzip/gzip.go b/src/pkg/compress/gzip/gzip.go
new file mode 100644
index 000000000..8860d10af
--- /dev/null
+++ b/src/pkg/compress/gzip/gzip.go
@@ -0,0 +1,187 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "compress/flate"
+ "hash"
+ "hash/crc32"
+ "io"
+ "os"
+)
+
+// These constants are copied from the flate package, so that code that imports
+// "compress/gzip" does not also have to import "compress/flate".
+const (
+ NoCompression = flate.NoCompression
+ BestSpeed = flate.BestSpeed
+ BestCompression = flate.BestCompression
+ DefaultCompression = flate.DefaultCompression
+)
+
+// A Compressor is an io.WriteCloser that satisfies writes by compressing data written
+// to its wrapped io.Writer.
+type Compressor struct {
+ Header
+ w io.Writer
+ level int
+ compressor io.WriteCloser
+ digest hash.Hash32
+ size uint32
+ closed bool
+ buf [10]byte
+ err os.Error
+}
+
+// NewWriter calls NewWriterLevel with the default compression level.
+func NewWriter(w io.Writer) (*Compressor, os.Error) {
+ return NewWriterLevel(w, DefaultCompression)
+}
+
+// NewWriterLevel creates a new Compressor writing to the given writer.
+// Writes may be buffered and not flushed until Close.
+// Callers that wish to set the fields in Compressor.Header must
+// do so before the first call to Write or Close.
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// level is the compression level, which can be DefaultCompression, NoCompression,
+// or any integer value between BestSpeed and BestCompression (inclusive).
+func NewWriterLevel(w io.Writer, level int) (*Compressor, os.Error) {
+ z := new(Compressor)
+ z.OS = 255 // unknown
+ z.w = w
+ z.level = level
+ z.digest = crc32.NewIEEE()
+ return z, nil
+}
+
+// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
+func put2(p []byte, v uint16) {
+ p[0] = uint8(v >> 0)
+ p[1] = uint8(v >> 8)
+}
+
+func put4(p []byte, v uint32) {
+ p[0] = uint8(v >> 0)
+ p[1] = uint8(v >> 8)
+ p[2] = uint8(v >> 16)
+ p[3] = uint8(v >> 24)
+}
+
+// writeBytes writes a length-prefixed byte slice to z.w.
+func (z *Compressor) writeBytes(b []byte) os.Error {
+ if len(b) > 0xffff {
+ return os.NewError("gzip.Write: Extra data is too large")
+ }
+ put2(z.buf[0:2], uint16(len(b)))
+ _, err := z.w.Write(z.buf[0:2])
+ if err != nil {
+ return err
+ }
+ _, err = z.w.Write(b)
+ return err
+}
+
+// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w.
+func (z *Compressor) writeString(s string) os.Error {
+ // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+ // TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1).
+ for _, v := range s {
+ if v == 0 || v > 0x7f {
+ return os.NewError("gzip.Write: non-ASCII header string")
+ }
+ }
+ _, err := io.WriteString(z.w, s)
+ if err != nil {
+ return err
+ }
+ // GZIP strings are NUL-terminated.
+ z.buf[0] = 0
+ _, err = z.w.Write(z.buf[0:1])
+ return err
+}
+
+func (z *Compressor) Write(p []byte) (int, os.Error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+ var n int
+ // Write the GZIP header lazily.
+ if z.compressor == nil {
+ z.buf[0] = gzipID1
+ z.buf[1] = gzipID2
+ z.buf[2] = gzipDeflate
+ z.buf[3] = 0
+ if z.Extra != nil {
+ z.buf[3] |= 0x04
+ }
+ if z.Name != "" {
+ z.buf[3] |= 0x08
+ }
+ if z.Comment != "" {
+ z.buf[3] |= 0x10
+ }
+ put4(z.buf[4:8], z.Mtime)
+ if z.level == BestCompression {
+ z.buf[8] = 2
+ } else if z.level == BestSpeed {
+ z.buf[8] = 4
+ } else {
+ z.buf[8] = 0
+ }
+ z.buf[9] = z.OS
+ n, z.err = z.w.Write(z.buf[0:10])
+ if z.err != nil {
+ return n, z.err
+ }
+ if z.Extra != nil {
+ z.err = z.writeBytes(z.Extra)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ if z.Name != "" {
+ z.err = z.writeString(z.Name)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ if z.Comment != "" {
+ z.err = z.writeString(z.Comment)
+ if z.err != nil {
+ return n, z.err
+ }
+ }
+ z.compressor = flate.NewWriter(z.w, z.level)
+ }
+ z.size += uint32(len(p))
+ z.digest.Write(p)
+ n, z.err = z.compressor.Write(p)
+ return n, z.err
+}
+
+// Calling Close does not close the wrapped io.Writer originally passed to NewWriter.
+func (z *Compressor) Close() os.Error {
+ if z.err != nil {
+ return z.err
+ }
+ if z.closed {
+ return nil
+ }
+ z.closed = true
+ if z.compressor == nil {
+ z.Write(nil)
+ if z.err != nil {
+ return z.err
+ }
+ }
+ z.err = z.compressor.Close()
+ if z.err != nil {
+ return z.err
+ }
+ put4(z.buf[0:4], z.digest.Sum32())
+ put4(z.buf[4:8], z.size)
+ _, z.err = z.w.Write(z.buf[0:8])
+ return z.err
+}
diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go
new file mode 100644
index 000000000..121e627e6
--- /dev/null
+++ b/src/pkg/compress/gzip/gzip_test.go
@@ -0,0 +1,84 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzip
+
+import (
+ "io"
+ "io/ioutil"
+ "testing"
+)
+
+// pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the
+// writer end and cfunc at the reader end.
+func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) {
+ piper, pipew := io.Pipe()
+ defer piper.Close()
+ go func() {
+ defer pipew.Close()
+ compressor, err := NewWriter(pipew)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ defer compressor.Close()
+ dfunc(compressor)
+ }()
+ decompressor, err := NewReader(piper)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ defer decompressor.Close()
+ cfunc(decompressor)
+}
+
+// Tests that an empty payload still forms a valid GZIP stream.
+func TestEmpty(t *testing.T) {
+ pipe(t,
+ func(compressor *Compressor) {},
+ func(decompressor *Decompressor) {
+ b, err := ioutil.ReadAll(decompressor)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ if len(b) != 0 {
+ t.Fatalf("did not read an empty slice")
+ }
+ })
+}
+
+// Tests that gzipping and then gunzipping is the identity function.
+func TestWriter(t *testing.T) {
+ pipe(t,
+ func(compressor *Compressor) {
+ compressor.Comment = "comment"
+ compressor.Extra = []byte("extra")
+ compressor.Mtime = 1e8
+ compressor.Name = "name"
+ _, err := compressor.Write([]byte("payload"))
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ },
+ func(decompressor *Decompressor) {
+ b, err := ioutil.ReadAll(decompressor)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ if string(b) != "payload" {
+ t.Fatalf("payload is %q, want %q", string(b), "payload")
+ }
+ if decompressor.Comment != "comment" {
+ t.Fatalf("comment is %q, want %q", decompressor.Comment, "comment")
+ }
+ if string(decompressor.Extra) != "extra" {
+ t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra")
+ }
+ if decompressor.Mtime != 1e8 {
+ t.Fatalf("mtime is %d, want %d", decompressor.Mtime, uint32(1e8))
+ }
+ if decompressor.Name != "name" {
+ t.Fatalf("name is %q, want %q", decompressor.Name, "name")
+ }
+ })
+}
diff --git a/src/pkg/compress/lzw/Makefile b/src/pkg/compress/lzw/Makefile
new file mode 100644
index 000000000..28f5e6abc
--- /dev/null
+++ b/src/pkg/compress/lzw/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2011 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/lzw
+GOFILES=\
+ reader.go\
+ writer.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go
new file mode 100644
index 000000000..21231c8e5
--- /dev/null
+++ b/src/pkg/compress/lzw/reader.go
@@ -0,0 +1,253 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lzw implements the Lempel-Ziv-Welch compressed data format,
+// described in T. A. Welch, ``A Technique for High-Performance Data
+// Compression'', Computer, 17(6) (June 1984), pp 8-19.
+//
+// In particular, it implements LZW as used by the GIF, TIFF and PDF file
+// formats, which means variable-width codes up to 12 bits and the first
+// two non-literal codes are a clear code and an EOF code.
+package lzw
+
+// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF,
+// modulo LSB/MSB packing order.
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+)
+
+// Order specifies the bit ordering in an LZW data stream.
+type Order int
+
+const (
+ // LSB means Least Significant Bits first, as used in the GIF file format.
+ LSB Order = iota
+ // MSB means Most Significant Bits first, as used in the TIFF and PDF
+ // file formats.
+ MSB
+)
+
+const (
+ maxWidth = 12
+ decoderInvalidCode = 0xffff
+ flushBuffer = 1 << maxWidth
+)
+
+// decoder is the state from which the readXxx method converts a byte
+// stream into a code stream.
+type decoder struct {
+ r io.ByteReader
+ bits uint32
+ nBits uint
+ width uint
+ read func(*decoder) (uint16, os.Error) // readLSB or readMSB
+ litWidth int // width in bits of literal codes
+ err os.Error
+
+ // The first 1<<litWidth codes are literal codes.
+ // The next two codes mean clear and EOF.
+ // Other valid codes are in the range [lo, hi] where lo := clear + 2,
+ // with the upper bound incrementing on each code seen.
+ // overflow is the code at which hi overflows the code width.
+ // last is the most recently seen code, or decoderInvalidCode.
+ clear, eof, hi, overflow, last uint16
+
+ // Each code c in [lo, hi] expands to two or more bytes. For c != hi:
+ // suffix[c] is the last of these bytes.
+ // prefix[c] is the code for all but the last byte.
+ // This code can either be a literal code or another code in [lo, c).
+ // The c == hi case is a special case.
+ suffix [1 << maxWidth]uint8
+ prefix [1 << maxWidth]uint16
+
+ // output is the temporary output buffer.
+ // Literal codes are accumulated from the start of the buffer.
+ // Non-literal codes decode to a sequence of suffixes that are first
+ // written right-to-left from the end of the buffer before being copied
+ // to the start of the buffer.
+ // It is flushed when it contains >= 1<<maxWidth bytes,
+ // so that there is always room to decode an entire code.
+ output [2 * 1 << maxWidth]byte
+ o int // write index into output
+ toRead []byte // bytes to return from Read
+}
+
+// readLSB returns the next code for "Least Significant Bits first" data.
+func (d *decoder) readLSB() (uint16, os.Error) {
+ for d.nBits < d.width {
+ x, err := d.r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ d.bits |= uint32(x) << d.nBits
+ d.nBits += 8
+ }
+ code := uint16(d.bits & (1<<d.width - 1))
+ d.bits >>= d.width
+ d.nBits -= d.width
+ return code, nil
+}
+
+// readMSB returns the next code for "Most Significant Bits first" data.
+func (d *decoder) readMSB() (uint16, os.Error) {
+ for d.nBits < d.width {
+ x, err := d.r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ d.bits |= uint32(x) << (24 - d.nBits)
+ d.nBits += 8
+ }
+ code := uint16(d.bits >> (32 - d.width))
+ d.bits <<= d.width
+ d.nBits -= d.width
+ return code, nil
+}
+
+func (d *decoder) Read(b []byte) (int, os.Error) {
+ for {
+ if len(d.toRead) > 0 {
+ n := copy(b, d.toRead)
+ d.toRead = d.toRead[n:]
+ return n, nil
+ }
+ if d.err != nil {
+ return 0, d.err
+ }
+ d.decode()
+ }
+ panic("unreachable")
+}
+
+// decode decompresses bytes from r and leaves them in d.toRead.
+// read specifies how to decode bytes into codes.
+// litWidth is the width in bits of literal codes.
+func (d *decoder) decode() {
+ // Loop over the code stream, converting codes into decompressed bytes.
+ for {
+ code, err := d.read(d)
+ if err != nil {
+ if err == os.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ d.err = err
+ return
+ }
+ switch {
+ case code < d.clear:
+ // We have a literal code.
+ d.output[d.o] = uint8(code)
+ d.o++
+ if d.last != decoderInvalidCode {
+ // Save what the hi code expands to.
+ d.suffix[d.hi] = uint8(code)
+ d.prefix[d.hi] = d.last
+ }
+ case code == d.clear:
+ d.width = 1 + uint(d.litWidth)
+ d.hi = d.eof
+ d.overflow = 1 << d.width
+ d.last = decoderInvalidCode
+ continue
+ case code == d.eof:
+ d.flush()
+ d.err = os.EOF
+ return
+ case code <= d.hi:
+ c, i := code, len(d.output)-1
+ if code == d.hi {
+ // code == hi is a special case which expands to the last expansion
+ // followed by the head of the last expansion. To find the head, we walk
+ // the prefix chain until we find a literal code.
+ c = d.last
+ for c >= d.clear {
+ c = d.prefix[c]
+ }
+ d.output[i] = uint8(c)
+ i--
+ c = d.last
+ }
+ // Copy the suffix chain into output and then write that to w.
+ for c >= d.clear {
+ d.output[i] = d.suffix[c]
+ i--
+ c = d.prefix[c]
+ }
+ d.output[i] = uint8(c)
+ d.o += copy(d.output[d.o:], d.output[i:])
+ if d.last != decoderInvalidCode {
+ // Save what the hi code expands to.
+ d.suffix[d.hi] = uint8(c)
+ d.prefix[d.hi] = d.last
+ }
+ default:
+ d.err = os.NewError("lzw: invalid code")
+ return
+ }
+ d.last, d.hi = code, d.hi+1
+ if d.hi >= d.overflow {
+ if d.width == maxWidth {
+ d.last = decoderInvalidCode
+ } else {
+ d.width++
+ d.overflow <<= 1
+ }
+ }
+ if d.o >= flushBuffer {
+ d.flush()
+ return
+ }
+ }
+ panic("unreachable")
+}
+
+func (d *decoder) flush() {
+ d.toRead = d.output[:d.o]
+ d.o = 0
+}
+
+func (d *decoder) Close() os.Error {
+ d.err = os.EINVAL // in case any Reads come along
+ return nil
+}
+
+// NewReader creates a new io.ReadCloser that satisfies reads by decompressing
+// the data read from r.
+// It is the caller's responsibility to call Close on the ReadCloser when
+// finished reading.
+// The number of bits to use for literal codes, litWidth, must be in the
+// range [2,8] and is typically 8.
+func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
+ d := new(decoder)
+ switch order {
+ case LSB:
+ d.read = (*decoder).readLSB
+ case MSB:
+ d.read = (*decoder).readMSB
+ default:
+ d.err = os.NewError("lzw: unknown order")
+ return d
+ }
+ if litWidth < 2 || 8 < litWidth {
+ d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
+ return d
+ }
+ if br, ok := r.(io.ByteReader); ok {
+ d.r = br
+ } else {
+ d.r = bufio.NewReader(r)
+ }
+ d.litWidth = litWidth
+ d.width = 1 + uint(litWidth)
+ d.clear = uint16(1) << uint(litWidth)
+ d.eof, d.hi = d.clear+1, d.clear+1
+ d.overflow = uint16(1) << d.width
+ d.last = decoderInvalidCode
+
+ return d
+}
diff --git a/src/pkg/compress/lzw/reader_test.go b/src/pkg/compress/lzw/reader_test.go
new file mode 100644
index 000000000..f8042b0d1
--- /dev/null
+++ b/src/pkg/compress/lzw/reader_test.go
@@ -0,0 +1,145 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+ "bytes"
+ "io"
+ "io/ioutil"
+ "os"
+ "runtime"
+ "strconv"
+ "strings"
+ "testing"
+)
+
+type lzwTest struct {
+ desc string
+ raw string
+ compressed string
+ err os.Error
+}
+
+var lzwTests = []lzwTest{
+ {
+ "empty;LSB;8",
+ "",
+ "\x01\x01",
+ nil,
+ },
+ {
+ "empty;MSB;8",
+ "",
+ "\x80\x80",
+ nil,
+ },
+ {
+ "tobe;LSB;7",
+ "TOBEORNOTTOBEORTOBEORNOT",
+ "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
+ nil,
+ },
+ {
+ "tobe;LSB;8",
+ "TOBEORNOTTOBEORTOBEORNOT",
+ "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
+ nil,
+ },
+ {
+ "tobe;MSB;7",
+ "TOBEORNOTTOBEORTOBEORNOT",
+ "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
+ nil,
+ },
+ {
+ "tobe;MSB;8",
+ "TOBEORNOTTOBEORTOBEORNOT",
+ "\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
+ nil,
+ },
+ {
+ "tobe-truncated;LSB;8",
+ "TOBEORNOTTOBEORTOBEORNOT",
+ "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
+ io.ErrUnexpectedEOF,
+ },
+ // This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
+ {
+ "gif;LSB;8",
+ "\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+ "\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
+ nil,
+ },
+ // This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
+ {
+ "pdf;MSB;8",
+ "-----A---B",
+ "\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
+ nil,
+ },
+}
+
+func TestReader(t *testing.T) {
+ b := bytes.NewBuffer(nil)
+ for _, tt := range lzwTests {
+ d := strings.Split(tt.desc, ";")
+ var order Order
+ switch d[1] {
+ case "LSB":
+ order = LSB
+ case "MSB":
+ order = MSB
+ default:
+ t.Errorf("%s: bad order %q", tt.desc, d[1])
+ }
+ litWidth, _ := strconv.Atoi(d[2])
+ rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
+ defer rc.Close()
+ b.Reset()
+ n, err := io.Copy(b, rc)
+ if err != nil {
+ if err != tt.err {
+ t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
+ }
+ continue
+ }
+ s := b.String()
+ if s != tt.raw {
+ t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
+ }
+ }
+}
+
+func benchmarkDecoder(b *testing.B, n int) {
+ b.StopTimer()
+ b.SetBytes(int64(n))
+ buf0, _ := ioutil.ReadFile("../testdata/e.txt")
+ buf0 = buf0[:10000]
+ compressed := bytes.NewBuffer(nil)
+ w := NewWriter(compressed, LSB, 8)
+ for i := 0; i < n; i += len(buf0) {
+ io.Copy(w, bytes.NewBuffer(buf0))
+ }
+ w.Close()
+ buf1 := compressed.Bytes()
+ buf0, compressed, w = nil, nil, nil
+ runtime.GC()
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ io.Copy(ioutil.Discard, NewReader(bytes.NewBuffer(buf1), LSB, 8))
+ }
+}
+
+func BenchmarkDecoder1e4(b *testing.B) {
+ benchmarkDecoder(b, 1e4)
+}
+
+func BenchmarkDecoder1e5(b *testing.B) {
+ benchmarkDecoder(b, 1e5)
+}
+
+func BenchmarkDecoder1e6(b *testing.B) {
+ benchmarkDecoder(b, 1e6)
+}
diff --git a/src/pkg/compress/lzw/writer.go b/src/pkg/compress/lzw/writer.go
new file mode 100644
index 000000000..87143b7aa
--- /dev/null
+++ b/src/pkg/compress/lzw/writer.go
@@ -0,0 +1,259 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+)
+
+// A writer is a buffered, flushable writer.
+type writer interface {
+ WriteByte(byte) os.Error
+ Flush() os.Error
+}
+
+// An errWriteCloser is an io.WriteCloser that always returns a given error.
+type errWriteCloser struct {
+ err os.Error
+}
+
+func (e *errWriteCloser) Write([]byte) (int, os.Error) {
+ return 0, e.err
+}
+
+func (e *errWriteCloser) Close() os.Error {
+ return e.err
+}
+
+const (
+ // A code is a 12 bit value, stored as a uint32 when encoding to avoid
+ // type conversions when shifting bits.
+ maxCode = 1<<12 - 1
+ invalidCode = 1<<32 - 1
+ // There are 1<<12 possible codes, which is an upper bound on the number of
+ // valid hash table entries at any given point in time. tableSize is 4x that.
+ tableSize = 4 * 1 << 12
+ tableMask = tableSize - 1
+ // A hash table entry is a uint32. Zero is an invalid entry since the
+ // lower 12 bits of a valid entry must be a non-literal code.
+ invalidEntry = 0
+)
+
+// encoder is LZW compressor.
+type encoder struct {
+ // w is the writer that compressed bytes are written to.
+ w writer
+ // write, bits, nBits and width are the state for converting a code stream
+ // into a byte stream.
+ write func(*encoder, uint32) os.Error
+ bits uint32
+ nBits uint
+ width uint
+ // litWidth is the width in bits of literal codes.
+ litWidth uint
+ // hi is the code implied by the next code emission.
+ // overflow is the code at which hi overflows the code width.
+ hi, overflow uint32
+ // savedCode is the accumulated code at the end of the most recent Write
+ // call. It is equal to invalidCode if there was no such call.
+ savedCode uint32
+ // err is the first error encountered during writing. Closing the encoder
+ // will make any future Write calls return os.EINVAL.
+ err os.Error
+ // table is the hash table from 20-bit keys to 12-bit values. Each table
+ // entry contains key<<12|val and collisions resolve by linear probing.
+ // The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
+ // The values are a 12-bit code.
+ table [tableSize]uint32
+}
+
+// writeLSB writes the code c for "Least Significant Bits first" data.
+func (e *encoder) writeLSB(c uint32) os.Error {
+ e.bits |= c << e.nBits
+ e.nBits += e.width
+ for e.nBits >= 8 {
+ if err := e.w.WriteByte(uint8(e.bits)); err != nil {
+ return err
+ }
+ e.bits >>= 8
+ e.nBits -= 8
+ }
+ return nil
+}
+
+// writeMSB writes the code c for "Most Significant Bits first" data.
+func (e *encoder) writeMSB(c uint32) os.Error {
+ e.bits |= c << (32 - e.width - e.nBits)
+ e.nBits += e.width
+ for e.nBits >= 8 {
+ if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil {
+ return err
+ }
+ e.bits <<= 8
+ e.nBits -= 8
+ }
+ return nil
+}
+
+// errOutOfCodes is an internal error that means that the encoder has run out
+// of unused codes and a clear code needs to be sent next.
+var errOutOfCodes = os.NewError("lzw: out of codes")
+
+// incHi increments e.hi and checks for both overflow and running out of
+// unused codes. In the latter case, incHi sends a clear code, resets the
+// encoder state and returns errOutOfCodes.
+func (e *encoder) incHi() os.Error {
+ e.hi++
+ if e.hi == e.overflow {
+ e.width++
+ e.overflow <<= 1
+ }
+ if e.hi == maxCode {
+ clear := uint32(1) << e.litWidth
+ if err := e.write(e, clear); err != nil {
+ return err
+ }
+ e.width = uint(e.litWidth) + 1
+ e.hi = clear + 1
+ e.overflow = clear << 1
+ for i := range e.table {
+ e.table[i] = invalidEntry
+ }
+ return errOutOfCodes
+ }
+ return nil
+}
+
+// Write writes a compressed representation of p to e's underlying writer.
+func (e *encoder) Write(p []byte) (int, os.Error) {
+ if e.err != nil {
+ return 0, e.err
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+ litMask := uint32(1<<e.litWidth - 1)
+ code := e.savedCode
+ if code == invalidCode {
+ // The first code sent is always a literal code.
+ code, p = uint32(p[0])&litMask, p[1:]
+ }
+loop:
+ for _, x := range p {
+ literal := uint32(x) & litMask
+ key := code<<8 | literal
+ // If there is a hash table hit for this key then we continue the loop
+ // and do not emit a code yet.
+ hash := (key>>12 ^ key) & tableMask
+ for h, t := hash, e.table[hash]; t != invalidEntry; {
+ if key == t>>12 {
+ code = t & maxCode
+ continue loop
+ }
+ h = (h + 1) & tableMask
+ t = e.table[h]
+ }
+ // Otherwise, write the current code, and literal becomes the start of
+ // the next emitted code.
+ if e.err = e.write(e, code); e.err != nil {
+ return 0, e.err
+ }
+ code = literal
+ // Increment e.hi, the next implied code. If we run out of codes, reset
+ // the encoder state (including clearing the hash table) and continue.
+ if err := e.incHi(); err != nil {
+ if err == errOutOfCodes {
+ continue
+ }
+ e.err = err
+ return 0, e.err
+ }
+ // Otherwise, insert key -> e.hi into the map that e.table represents.
+ for {
+ if e.table[hash] == invalidEntry {
+ e.table[hash] = (key << 12) | e.hi
+ break
+ }
+ hash = (hash + 1) & tableMask
+ }
+ }
+ e.savedCode = code
+ return len(p), nil
+}
+
+// Close closes the encoder, flushing any pending output. It does not close or
+// flush e's underlying writer.
+func (e *encoder) Close() os.Error {
+ if e.err != nil {
+ if e.err == os.EINVAL {
+ return nil
+ }
+ return e.err
+ }
+ // Make any future calls to Write return os.EINVAL.
+ e.err = os.EINVAL
+ // Write the savedCode if valid.
+ if e.savedCode != invalidCode {
+ if err := e.write(e, e.savedCode); err != nil {
+ return err
+ }
+ if err := e.incHi(); err != nil && err != errOutOfCodes {
+ return err
+ }
+ }
+ // Write the eof code.
+ eof := uint32(1)<<e.litWidth + 1
+ if err := e.write(e, eof); err != nil {
+ return err
+ }
+ // Write the final bits.
+ if e.nBits > 0 {
+ if e.write == (*encoder).writeMSB {
+ e.bits >>= 24
+ }
+ if err := e.w.WriteByte(uint8(e.bits)); err != nil {
+ return err
+ }
+ }
+ return e.w.Flush()
+}
+
+// NewWriter creates a new io.WriteCloser that satisfies writes by compressing
+// the data and writing it to w.
+// It is the caller's responsibility to call Close on the WriteCloser when
+// finished writing.
+// The number of bits to use for literal codes, litWidth, must be in the
+// range [2,8] and is typically 8.
+func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
+ var write func(*encoder, uint32) os.Error
+ switch order {
+ case LSB:
+ write = (*encoder).writeLSB
+ case MSB:
+ write = (*encoder).writeMSB
+ default:
+ return &errWriteCloser{os.NewError("lzw: unknown order")}
+ }
+ if litWidth < 2 || 8 < litWidth {
+ return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)}
+ }
+ bw, ok := w.(writer)
+ if !ok {
+ bw = bufio.NewWriter(w)
+ }
+ lw := uint(litWidth)
+ return &encoder{
+ w: bw,
+ write: write,
+ width: 1 + lw,
+ litWidth: lw,
+ hi: 1<<lw + 1,
+ overflow: 1 << (lw + 1),
+ savedCode: invalidCode,
+ }
+}
diff --git a/src/pkg/compress/lzw/writer_test.go b/src/pkg/compress/lzw/writer_test.go
new file mode 100644
index 000000000..4c5e522f9
--- /dev/null
+++ b/src/pkg/compress/lzw/writer_test.go
@@ -0,0 +1,132 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lzw
+
+import (
+ "io"
+ "io/ioutil"
+ "os"
+ "runtime"
+ "testing"
+)
+
+var filenames = []string{
+ "../testdata/e.txt",
+ "../testdata/pi.txt",
+}
+
+// testFile tests that compressing and then decompressing the given file with
+// the given options yields equivalent bytes to the original file.
+func testFile(t *testing.T, fn string, order Order, litWidth int) {
+ // Read the file, as golden output.
+ golden, err := os.Open(fn)
+ if err != nil {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
+ return
+ }
+ defer golden.Close()
+
+ // Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
+ raw, err := os.Open(fn)
+ if err != nil {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err)
+ return
+ }
+
+ piper, pipew := io.Pipe()
+ defer piper.Close()
+ go func() {
+ defer raw.Close()
+ defer pipew.Close()
+ lzww := NewWriter(pipew, order, litWidth)
+ defer lzww.Close()
+ var b [4096]byte
+ for {
+ n, err0 := raw.Read(b[:])
+ if err0 != nil && err0 != os.EOF {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
+ return
+ }
+ _, err1 := lzww.Write(b[:n])
+ if err1 == os.EPIPE {
+ // Fail, but do not report the error, as some other (presumably reportable) error broke the pipe.
+ return
+ }
+ if err1 != nil {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
+ return
+ }
+ if err0 == os.EOF {
+ break
+ }
+ }
+ }()
+ lzwr := NewReader(piper, order, litWidth)
+ defer lzwr.Close()
+
+ // Compare the two.
+ b0, err0 := ioutil.ReadAll(golden)
+ b1, err1 := ioutil.ReadAll(lzwr)
+ if err0 != nil {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0)
+ return
+ }
+ if err1 != nil {
+ t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
+ return
+ }
+ if len(b1) != len(b0) {
+ t.Errorf("%s (order=%d litWidth=%d): length mismatch %d != %d", fn, order, litWidth, len(b1), len(b0))
+ return
+ }
+ for i := 0; i < len(b0); i++ {
+ if b1[i] != b0[i] {
+ t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x != 0x%02x\n", fn, order, litWidth, i, b1[i], b0[i])
+ return
+ }
+ }
+}
+
+func TestWriter(t *testing.T) {
+ for _, filename := range filenames {
+ for _, order := range [...]Order{LSB, MSB} {
+ // The test data "2.71828 etcetera" is ASCII text requiring at least 6 bits.
+ for _, litWidth := range [...]int{6, 7, 8} {
+ testFile(t, filename, order, litWidth)
+ }
+ }
+ }
+}
+
+func benchmarkEncoder(b *testing.B, n int) {
+ b.StopTimer()
+ b.SetBytes(int64(n))
+ buf0, _ := ioutil.ReadFile("../testdata/e.txt")
+ buf0 = buf0[:10000]
+ buf1 := make([]byte, n)
+ for i := 0; i < n; i += len(buf0) {
+ copy(buf1[i:], buf0)
+ }
+ buf0 = nil
+ runtime.GC()
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ w := NewWriter(ioutil.Discard, LSB, 8)
+ w.Write(buf1)
+ w.Close()
+ }
+}
+
+func BenchmarkEncoder1e4(b *testing.B) {
+ benchmarkEncoder(b, 1e4)
+}
+
+func BenchmarkEncoder1e5(b *testing.B) {
+ benchmarkEncoder(b, 1e5)
+}
+
+func BenchmarkEncoder1e6(b *testing.B) {
+ benchmarkEncoder(b, 1e6)
+}
diff --git a/src/pkg/compress/testdata/e.txt b/src/pkg/compress/testdata/e.txt
new file mode 100644
index 000000000..76cf2a7b6
--- /dev/null
+++ b/src/pkg/compress/testdata/e.txt
@@ -0,0 +1 @@
+2.7182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274274663919320030599218174135966290435729003342952605956307381323286279434907632338298807531952510190115738341879307021540891499348841675092447614606680822648001684774118537423454424371075390777449920695517027618386062613313845830007520449338265602976067371132007093287091274437470472306969772093101416928368190255151086574637721112523897844250569536967707854499699679468644549059879316368892300987931277361782154249992295763514822082698951936680331825288693984964651058209392398294887933203625094431173012381970684161403970198376793206832823764648042953118023287825098194558153017567173613320698112509961818815930416903515988885193458072738667385894228792284998920868058257492796104841984443634632449684875602336248270419786232090021609902353043699418491463140934317381436405462531520961836908887070167683964243781405927145635490613031072085103837505101157477041718986106873969655212671546889570350354021234078498193343210681701210056278802351930332247450158539047304199577770935036604169973297250886876966403555707162268447162560798826517871341951246652010305921236677194325278675398558944896970964097545918569563802363701621120477427228364896134225164450781824423529486363721417402388934412479635743702637552944483379980161254922785092577825620926226483262779333865664816277251640191059004916449982893150566047258027786318641551956532442586982946959308019152987211725563475463964479101459040905862984967912874068705048958586717479854667757573205681288459205413340539220001137863009455606881667400169842055804033637953764520304024322566135278369511778838638744396625322498506549958862342818997077332761717839280349465014345588970719425863987727547109629537415211151368350627526023264847287039207643100595841166120545297030236472549296669381151373227536450988890313602057248176585118063036442812314965507047510254465011727211555194866850800368532281831521960037356252794495158284188294787610852639813955990067376482922443752871846245780361929819713991475644882626039033814418232625150974827987779964373089970388867782271383605772978824125611907176639465070633045279546618550966661856647097113444740160704626215680717481877844371436988218559670959102596862002353718588748569652200050311734392073211390803293634479727355955277349071783793421637012050054513263835440001863239914907054797780566978533580489669062951194324730995876552368128590413832411607226029983305353708761389396391779574540161372236187893652605381558415871869255386061647798340254351284396129460352913325942794904337299085731580290958631382683291477116396337092400316894586360606458459251269946557248391865642097526850823075442545993769170419777800853627309417101634349076964237222943523661255725088147792231519747780605696725380171807763603462459278778465850656050780844211529697521890874019660906651803516501792504619501366585436632712549639908549144200014574760819302212066024330096412704894390397177195180699086998606636583232278709376502260149291011517177635944602023249300280401867723910288097866605651183260043688508817157238669842242201024950551881694803221002515426494639812873677658927688163598312477886520141174110913601164995076629077943646005851941998560162647907615321038727557126992518275687989302761761146162549356495903798045838182323368612016243736569846703785853305275833337939907521660692380533698879565137285593883499894707416181550125397064648171946708348197214488898790676503795903669672494992545279033729636162658976039498576741397359441023744329709355477982629614591442936451428617158587339746791897571211956187385783644758448423555581050025611492391518893099463428413936080383091662818811503715284967059741625628236092168075150177725387402564253470879089137291722828611515915683725241630772254406337875931059826760944203261924285317018781772960235413060672136046000389661093647095141417185777014180606443636815464440053316087783143174440811949422975599314011888683314832802706553833004693290115744147563139997221703804617092894579096271662260740718749975359212756084414737823303270330168237193648002173285734935947564334129943024850235732214597843282641421684878721673367010615094243456984401873312810107945127223737886126058165668053714396127888732527373890392890506865324138062796025930387727697783792868409325365880733988457218746021005311483351323850047827169376218004904795597959290591655470505777514308175112698985188408718564026035305583737832422924185625644255022672155980274012617971928047139600689163828665277009752767069777036439260224372841840883251848770472638440379530166905465937461619323840363893131364327137688841026811219891275223056256756254701725086349765367288605966752740868627407912856576996313789753034660616669804218267724560530660773899624218340859882071864682623215080288286359746839654358856685503773131296587975810501214916207656769950659715344763470320853215603674828608378656803073062657633469774295634643716709397193060876963495328846833613038829431040800296873869117066666146800015121143442256023874474325250769387077775193299942137277211258843608715834835626961661980572526612206797540621062080649882918454395301529982092503005498257043390553570168653120526495614857249257386206917403695213533732531666345466588597286659451136441370331393672118569553952108458407244323835586063106806964924851232632699514603596037297253198368423363904632136710116192821711150282801604488058802382031981493096369596735832742024988245684941273860566491352526706046234450549227581151709314921879592718001940968866986837037302200475314338181092708030017205935530520700706072233999463990571311587099635777359027196285061146514837526209565346713290025994397663114545902685898979115837093419370441155121920117164880566945938131183843765620627846310490346293950029458341164824114969758326011800731699437393506966295712410273239138741754923071862454543222039552735295240245903805744502892246886285336542213815722131163288112052146489805180092024719391710555390113943316681515828843687606961102505171007392762385553386272553538830960671644662370922646809671254061869502143176211668140097595281493907222601112681153108387317617323235263605838173151034595736538223534992935822836851007810884634349983518404451704270189381994243410090575376257767571118090088164183319201962623416288166521374717325477727783488774366518828752156685719506371936565390389449366421764003121527870222366463635755503565576948886549500270853923617105502131147413744106134445544192101336172996285694899193369184729478580729156088510396781959429833186480756083679551496636448965592948187851784038773326247051945050419847742014183947731202815886845707290544057510601285258056594703046836344592652552137008068752009593453607316226118728173928074623094685367823106097921599360019946237993434210687813497346959246469752506246958616909178573976595199392993995567542714654910456860702099012606818704984178079173924071945996323060254707901774527513186809982284730860766536866855516467702911336827563107223346726113705490795365834538637196235856312618387156774118738527722922594743373785695538456246801013905727871016512966636764451872465653730402443684140814488732957847348490003019477888020460324660842875351848364959195082888323206522128104190448047247949291342284951970022601310430062410717971502793433263407995960531446053230488528972917659876016667811937932372453857209607582277178483361613582612896226118129455927462767137794487586753657544861407611931125958512655759734573015333642630767985443385761715333462325270572005303988289499034259566232975782488735029259166825894456894655992658454762694528780516501720674785417887982276806536650641910973434528878338621726156269582654478205672987756426325321594294418039943217000090542650763095588465895171709147607437136893319469090981904501290307099566226620303182649365733698419555776963787624918852865686607600566025605445711337286840205574416030837052312242587223438854123179481388550075689381124935386318635287083799845692619981794523364087429591180747453419551420351726184200845509170845682368200897739455842679214273477560879644279202708312150156406341341617166448069815483764491573900121217041547872591998943825364950514771379399147205219529079396137621107238494290616357604596231253506068537651423115349665683715116604220796394466621163255157729070978473156278277598788136491951257483328793771571459091064841642678309949723674420175862269402159407924480541255360431317992696739157542419296607312393763542139230617876753958711436104089409966089471418340698362993675362621545247298464213752891079884381306095552622720837518629837066787224430195793793786072107254277289071732854874374355781966511716618330881129120245204048682200072344035025448202834254187884653602591506445271657700044521097735585897622655484941621714989532383421600114062950718490427789258552743035221396835679018076406042138307308774460170842688272261177180842664333651780002171903449234264266292261456004337383868335555343453004264818473989215627086095650629340405264943244261445665921291225648893569655009154306426134252668472594914314239398845432486327461842846655985332312210466259890141712103446084271616619001257195870793217569698544013397622096749454185407118446433946990162698351607848924514058940946395267807354579700307051163682519487701189764002827648414160587206184185297189154019688253289309149665345753571427318482016384644832499037886069008072709327673127581966563941148961716832980455139729506687604740915420428429993541025829113502241690769431668574242522509026939034814856451303069925199590436384028429267412573422447765584177886171737265462085498294498946787350929581652632072258992368768457017823038096567883112289305809140572610865884845873101658151167533327674887014829167419701512559782572707406431808601428149024146780472327597684269633935773542930186739439716388611764209004068663398856841681003872389214483176070116684503887212364367043314091155733280182977988736590916659612402021778558854876176161989370794380056663364884365089144805571039765214696027662583599051987042300179465536788
diff --git a/src/pkg/compress/testdata/pi.txt b/src/pkg/compress/testdata/pi.txt
new file mode 100644
index 000000000..58d8f3b6d
--- /dev/null
+++ b/src/pkg/compress/testdata/pi.txt
@@ -0,0 +1 @@
+3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989380952572010654858632788659361533818279682303019520353018529689957736225994138912497217752834791315155748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035637076601047101819429555961989467678374494482553797747268471040475346462080466842590694912933136770289891521047521620569660240580381501935112533824300355876402474964732639141992726042699227967823547816360093417216412199245863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818347977535663698074265425278625518184175746728909777727938000816470600161452491921732172147723501414419735685481613611573525521334757418494684385233239073941433345477624168625189835694855620992192221842725502542568876717904946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886269456042419652850222106611863067442786220391949450471237137869609563643719172874677646575739624138908658326459958133904780275900994657640789512694683983525957098258226205224894077267194782684826014769909026401363944374553050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382686838689427741559918559252459539594310499725246808459872736446958486538367362226260991246080512438843904512441365497627807977156914359977001296160894416948685558484063534220722258284886481584560285060168427394522674676788952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288797108931456691368672287489405601015033086179286809208747609178249385890097149096759852613655497818931297848216829989487226588048575640142704775551323796414515237462343645428584447952658678210511413547357395231134271661021359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435064302184531910484810053706146806749192781911979399520614196634287544406437451237181921799983910159195618146751426912397489409071864942319615679452080951465502252316038819301420937621378559566389377870830390697920773467221825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539057962685610055081066587969981635747363840525714591028970641401109712062804390397595156771577004203378699360072305587631763594218731251471205329281918261861258673215791984148488291644706095752706957220917567116722910981690915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398315019701651511685171437657618351556508849099898599823873455283316355076479185358932261854896321329330898570642046752590709154814165498594616371802709819943099244889575712828905923233260972997120844335732654893823911932597463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100449293215160842444859637669838952286847831235526582131449576857262433441893039686426243410773226978028073189154411010446823252716201052652272111660396665573092547110557853763466820653109896526918620564769312570586356620185581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318586769751456614068007002378776591344017127494704205622305389945613140711270004078547332699390814546646458807972708266830634328587856983052358089330657574067954571637752542021149557615814002501262285941302164715509792592309907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111790429782856475032031986915140287080859904801094121472213179476477726224142548545403321571853061422881375850430633217518297986622371721591607716692547487389866549494501146540628433663937900397692656721463853067360965712091807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862947265473642523081770367515906735023507283540567040386743513622224771589150495309844489333096340878076932599397805419341447377441842631298608099888687413260472156951623965864573021631598193195167353812974167729478672422924654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001593776471651228935786015881617557829735233446042815126272037343146531977774160319906655418763979293344195215413418994854447345673831624993419131814809277771038638773431772075456545322077709212019051660962804909263601975988281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267945612753181340783303362542327839449753824372058353114771199260638133467768796959703098339130771098704085913374641442822772634659470474587847787201927715280731767907707157213444730605700733492436931138350493163128404251219256517980694113528013147013047816437885185290928545201165839341965621349143415956258658655705526904965209858033850722426482939728584783163057777560688876446248246857926039535277348030480290058760758251047470916439613626760449256274204208320856611906254543372131535958450687724602901618766795240616342522577195429162991930645537799140373404328752628889639958794757291746426357455254079091451357111369410911939325191076020825202618798531887705842972591677813149699009019211697173727847684726860849003377024242916513005005168323364350389517029893922334517220138128069650117844087451960121228599371623130171144484640903890644954440061986907548516026327505298349187407866808818338510228334508504860825039302133219715518430635455007668282949304137765527939751754613953984683393638304746119966538581538420568533862186725233402830871123282789212507712629463229563989898935821167456270102183564622013496715188190973038119800497340723961036854066431939509790190699639552453005450580685501956730229219139339185680344903982059551002263535361920419947455385938102343955449597783779023742161727111723643435439478221818528624085140066604433258885698670543154706965747458550332323342107301545940516553790686627333799585115625784322988273723198987571415957811196358330059408730681216028764962867446047746491599505497374256269010490377819868359381465741268049256487985561453723478673303904688383436346553794986419270563872931748723320837601123029911367938627089438799362016295154133714248928307220126901475466847653576164773794675200490757155527819653621323926406160136358155907422020203187277605277219005561484255518792530343513984425322341576233610642506390497500865627109535919465897514131034822769306247435363256916078154781811528436679570611086153315044521274739245449454236828860613408414863776700961207151249140430272538607648236341433462351897576645216413767969031495019108575984423919862916421939949072362346468441173940326591840443780513338945257423995082965912285085558215725031071257012668302402929525220118726767562204154205161841634847565169998116141010029960783869092916030288400269104140792886215078424516709087000699282120660418371806535567252532567532861291042487761825829765157959847035622262934860034158722980534989650226291748788202734209222245339856264766914905562842503912757710284027998066365825488926488025456610172967026640765590429099456815065265305371829412703369313785178609040708667114965583434347693385781711386455873678123014587687126603489139095620099393610310291616152881384379099042317473363948045759314931405297634757481193567091101377517210080315590248530906692037671922033229094334676851422144773793937517034436619910403375111735471918550464490263655128162288244625759163330391072253837421821408835086573917715096828874782656995995744906617583441375223970968340800535598491754173818839994469748676265516582765848358845314277568790029095170283529716344562129640435231176006651012412006597558512761785838292041974844236080071930457618932349229279650198751872127267507981255470958904556357921221033346697499235630254947802490114195212382815309114079073860251522742995818072471625916685451333123948049470791191532673430282441860414263639548000448002670496248201792896476697583183271314251702969234889627668440323260927524960357996469256504936818360900323809293459588970695365349406034021665443755890045632882250545255640564482465151875471196218443965825337543885690941130315095261793780029741207665147939425902989695946995565761218656196733786236256125216320862869222103274889218654364802296780705765615144632046927906821207388377814233562823608963208068222468012248261177185896381409183903673672220888321513755600372798394004152970028783076670944474560134556417254370906979396122571429894671543578468788614445812314593571984922528471605049221242470141214780573455105008019086996033027634787081081754501193071412233908663938339529425786905076431006383519834389341596131854347546495569781038293097164651438407007073604112373599843452251610507027056235266012764848308407611830130527932054274628654036036745328651057065874882256981579367897669742205750596834408697350201410206723585020072452256326513410559240190274216248439140359989535394590944070469120914093870012645600162374288021092764579310657922955249887275846101264836999892256959688159205600101655256375678
diff --git a/src/pkg/compress/zlib/Makefile b/src/pkg/compress/zlib/Makefile
new file mode 100644
index 000000000..791072d34
--- /dev/null
+++ b/src/pkg/compress/zlib/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=compress/zlib
+GOFILES=\
+ reader.go\
+ writer.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/compress/zlib/reader.go b/src/pkg/compress/zlib/reader.go
new file mode 100644
index 000000000..78dabdf4d
--- /dev/null
+++ b/src/pkg/compress/zlib/reader.go
@@ -0,0 +1,126 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package zlib implements reading and writing of zlib format compressed data,
+as specified in RFC 1950.
+
+The implementation provides filters that uncompress during reading
+and compress during writing. For example, to write compressed data
+to a buffer:
+
+ var b bytes.Buffer
+ w, err := zlib.NewWriter(&b)
+ w.Write([]byte("hello, world\n"))
+ w.Close()
+
+and to read that data back:
+
+ r, err := zlib.NewReader(&b)
+ io.Copy(os.Stdout, r)
+ r.Close()
+*/
+package zlib
+
+import (
+ "bufio"
+ "compress/flate"
+ "hash"
+ "hash/adler32"
+ "io"
+ "os"
+)
+
+const zlibDeflate = 8
+
+var ChecksumError = os.NewError("zlib checksum error")
+var HeaderError = os.NewError("invalid zlib header")
+var DictionaryError = os.NewError("invalid zlib dictionary")
+
+type reader struct {
+ r flate.Reader
+ decompressor io.ReadCloser
+ digest hash.Hash32
+ err os.Error
+ scratch [4]byte
+}
+
+// NewReader creates a new io.ReadCloser that satisfies reads by decompressing data read from r.
+// The implementation buffers input and may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser when done.
+func NewReader(r io.Reader) (io.ReadCloser, os.Error) {
+ return NewReaderDict(r, nil)
+}
+
+// NewReaderDict is like NewReader but uses a preset dictionary.
+// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
+func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, os.Error) {
+ z := new(reader)
+ if fr, ok := r.(flate.Reader); ok {
+ z.r = fr
+ } else {
+ z.r = bufio.NewReader(r)
+ }
+ _, err := io.ReadFull(z.r, z.scratch[0:2])
+ if err != nil {
+ return nil, err
+ }
+ h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
+ if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
+ return nil, HeaderError
+ }
+ if z.scratch[1]&0x20 != 0 {
+ _, err = io.ReadFull(z.r, z.scratch[0:4])
+ if err != nil {
+ return nil, err
+ }
+ checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
+ if checksum != adler32.Checksum(dict) {
+ return nil, DictionaryError
+ }
+ z.decompressor = flate.NewReaderDict(z.r, dict)
+ } else {
+ z.decompressor = flate.NewReader(z.r)
+ }
+ z.digest = adler32.New()
+ return z, nil
+}
+
+func (z *reader) Read(p []byte) (n int, err os.Error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+
+ n, err = z.decompressor.Read(p)
+ z.digest.Write(p[0:n])
+ if n != 0 || err != os.EOF {
+ z.err = err
+ return
+ }
+
+ // Finished file; check checksum.
+ if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
+ z.err = err
+ return 0, err
+ }
+ // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+ checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
+ if checksum != z.digest.Sum32() {
+ z.err = ChecksumError
+ return 0, z.err
+ }
+ return
+}
+
+// Calling Close does not close the wrapped io.Reader originally passed to NewReader.
+func (z *reader) Close() os.Error {
+ if z.err != nil {
+ return z.err
+ }
+ z.err = z.decompressor.Close()
+ return z.err
+}
diff --git a/src/pkg/compress/zlib/reader_test.go b/src/pkg/compress/zlib/reader_test.go
new file mode 100644
index 000000000..195db446c
--- /dev/null
+++ b/src/pkg/compress/zlib/reader_test.go
@@ -0,0 +1,128 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zlib
+
+import (
+ "bytes"
+ "io"
+ "os"
+ "testing"
+)
+
+type zlibTest struct {
+ desc string
+ raw string
+ compressed []byte
+ dict []byte
+ err os.Error
+}
+
+// Compare-to-golden test data was generated by the ZLIB example program at
+// http://www.zlib.net/zpipe.c
+
+var zlibTests = []zlibTest{
+ {
+ "empty",
+ "",
+ []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
+ nil,
+ nil,
+ },
+ {
+ "goodbye",
+ "goodbye, world",
+ []byte{
+ 0x78, 0x9c, 0x4b, 0xcf, 0xcf, 0x4f, 0x49, 0xaa,
+ 0x4c, 0xd5, 0x51, 0x28, 0xcf, 0x2f, 0xca, 0x49,
+ 0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e,
+ },
+ nil,
+ nil,
+ },
+ {
+ "bad header",
+ "",
+ []byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
+ nil,
+ HeaderError,
+ },
+ {
+ "bad checksum",
+ "",
+ []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff},
+ nil,
+ ChecksumError,
+ },
+ {
+ "not enough data",
+ "",
+ []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00},
+ nil,
+ io.ErrUnexpectedEOF,
+ },
+ {
+ "excess data is silently ignored",
+ "",
+ []byte{
+ 0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x78, 0x9c, 0xff,
+ },
+ nil,
+ nil,
+ },
+ {
+ "dictionary",
+ "Hello, World!\n",
+ []byte{
+ 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
+ 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
+ 0x12, 0x04, 0x74,
+ },
+ []byte{
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x0a,
+ },
+ nil,
+ },
+ {
+ "wrong dictionary",
+ "",
+ []byte{
+ 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
+ 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
+ 0x12, 0x04, 0x74,
+ },
+ []byte{
+ 0x48, 0x65, 0x6c, 0x6c,
+ },
+ DictionaryError,
+ },
+}
+
+func TestDecompressor(t *testing.T) {
+ b := new(bytes.Buffer)
+ for _, tt := range zlibTests {
+ in := bytes.NewBuffer(tt.compressed)
+ zlib, err := NewReaderDict(in, tt.dict)
+ if err != nil {
+ if err != tt.err {
+ t.Errorf("%s: NewReader: %s", tt.desc, err)
+ }
+ continue
+ }
+ defer zlib.Close()
+ b.Reset()
+ n, err := io.Copy(b, zlib)
+ if err != nil {
+ if err != tt.err {
+ t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
+ }
+ continue
+ }
+ s := b.String()
+ if s != tt.raw {
+ t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
+ }
+ }
+}
diff --git a/src/pkg/compress/zlib/writer.go b/src/pkg/compress/zlib/writer.go
new file mode 100644
index 000000000..8f86e9c4c
--- /dev/null
+++ b/src/pkg/compress/zlib/writer.go
@@ -0,0 +1,139 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zlib
+
+import (
+ "compress/flate"
+ "hash"
+ "hash/adler32"
+ "io"
+ "os"
+)
+
+// These constants are copied from the flate package, so that code that imports
+// "compress/zlib" does not also have to import "compress/flate".
+const (
+ NoCompression = flate.NoCompression
+ BestSpeed = flate.BestSpeed
+ BestCompression = flate.BestCompression
+ DefaultCompression = flate.DefaultCompression
+)
+
+// A Writer takes data written to it and writes the compressed
+// form of that data to an underlying writer (see NewWriter).
+type Writer struct {
+ w io.Writer
+ compressor *flate.Writer
+ digest hash.Hash32
+ err os.Error
+ scratch [4]byte
+}
+
+// NewWriter calls NewWriterLevel with the default compression level.
+func NewWriter(w io.Writer) (*Writer, os.Error) {
+ return NewWriterLevel(w, DefaultCompression)
+}
+
+// NewWriterLevel calls NewWriterDict with no dictionary.
+func NewWriterLevel(w io.Writer, level int) (*Writer, os.Error) {
+ return NewWriterDict(w, level, nil)
+}
+
+// NewWriterDict creates a new io.WriteCloser that satisfies writes by compressing data written to w.
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// level is the compression level, which can be DefaultCompression, NoCompression,
+// or any integer value between BestSpeed and BestCompression (inclusive).
+// dict is the preset dictionary to compress with, or nil to use no dictionary.
+func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) {
+ z := new(Writer)
+ // ZLIB has a two-byte header (as documented in RFC 1950).
+ // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size.
+ // The next four bits is the CM (compression method), which is 8 for deflate.
+ z.scratch[0] = 0x78
+ // The next two bits is the FLEVEL (compression level). The four values are:
+ // 0=fastest, 1=fast, 2=default, 3=best.
+ // The next bit, FDICT, is set if a dictionary is given.
+ // The final five FCHECK bits form a mod-31 checksum.
+ switch level {
+ case 0, 1:
+ z.scratch[1] = 0 << 6
+ case 2, 3, 4, 5:
+ z.scratch[1] = 1 << 6
+ case 6, -1:
+ z.scratch[1] = 2 << 6
+ case 7, 8, 9:
+ z.scratch[1] = 3 << 6
+ default:
+ return nil, os.NewError("level out of range")
+ }
+ if dict != nil {
+ z.scratch[1] |= 1 << 5
+ }
+ z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31)
+ _, err := w.Write(z.scratch[0:2])
+ if err != nil {
+ return nil, err
+ }
+ if dict != nil {
+ // The next four bytes are the Adler-32 checksum of the dictionary.
+ checksum := adler32.Checksum(dict)
+ z.scratch[0] = uint8(checksum >> 24)
+ z.scratch[1] = uint8(checksum >> 16)
+ z.scratch[2] = uint8(checksum >> 8)
+ z.scratch[3] = uint8(checksum >> 0)
+ _, err = w.Write(z.scratch[0:4])
+ if err != nil {
+ return nil, err
+ }
+ }
+ z.w = w
+ z.compressor = flate.NewWriterDict(w, level, dict)
+ z.digest = adler32.New()
+ return z, nil
+}
+
+func (z *Writer) Write(p []byte) (n int, err os.Error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+ n, err = z.compressor.Write(p)
+ if err != nil {
+ z.err = err
+ return
+ }
+ z.digest.Write(p)
+ return
+}
+
+// Flush flushes the underlying compressor.
+func (z *Writer) Flush() os.Error {
+ if z.err != nil {
+ return z.err
+ }
+ z.err = z.compressor.Flush()
+ return z.err
+}
+
+// Calling Close does not close the wrapped io.Writer originally passed to NewWriter.
+func (z *Writer) Close() os.Error {
+ if z.err != nil {
+ return z.err
+ }
+ z.err = z.compressor.Close()
+ if z.err != nil {
+ return z.err
+ }
+ checksum := z.digest.Sum32()
+ // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+ z.scratch[0] = uint8(checksum >> 24)
+ z.scratch[1] = uint8(checksum >> 16)
+ z.scratch[2] = uint8(checksum >> 8)
+ z.scratch[3] = uint8(checksum >> 0)
+ _, z.err = z.w.Write(z.scratch[0:4])
+ return z.err
+}
diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go
new file mode 100644
index 000000000..32f05ab68
--- /dev/null
+++ b/src/pkg/compress/zlib/writer_test.go
@@ -0,0 +1,144 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zlib
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "testing"
+)
+
+var filenames = []string{
+ "../testdata/e.txt",
+ "../testdata/pi.txt",
+}
+
+var data = []string{
+ "test a reasonable sized string that can be compressed",
+}
+
+// Tests that compressing and then decompressing the given file at the given compression level and dictionary
+// yields equivalent bytes to the original file.
+func testFileLevelDict(t *testing.T, fn string, level int, d string) {
+ // Read the file, as golden output.
+ golden, err := os.Open(fn)
+ if err != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ return
+ }
+ defer golden.Close()
+ b0, err0 := ioutil.ReadAll(golden)
+ if err0 != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
+ return
+ }
+ testLevelDict(t, fn, b0, level, d)
+}
+
+func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) {
+ // Make dictionary, if given.
+ var dict []byte
+ if d != "" {
+ dict = []byte(d)
+ }
+
+ // Push data through a pipe that compresses at the write end, and decompresses at the read end.
+ piper, pipew := io.Pipe()
+ defer piper.Close()
+ go func() {
+ defer pipew.Close()
+ zlibw, err := NewWriterDict(pipew, level, dict)
+ if err != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ return
+ }
+ defer zlibw.Close()
+ _, err = zlibw.Write(b0)
+ if err == os.EPIPE {
+ // Fail, but do not report the error, as some other (presumably reported) error broke the pipe.
+ return
+ }
+ if err != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ return
+ }
+ }()
+ zlibr, err := NewReaderDict(piper, dict)
+ if err != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+ return
+ }
+ defer zlibr.Close()
+
+ // Compare the decompressed data.
+ b1, err1 := ioutil.ReadAll(zlibr)
+ if err1 != nil {
+ t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
+ return
+ }
+ if len(b0) != len(b1) {
+ t.Errorf("%s (level=%d, dict=%q): length mismatch %d versus %d", fn, level, d, len(b0), len(b1))
+ return
+ }
+ for i := 0; i < len(b0); i++ {
+ if b0[i] != b1[i] {
+ t.Errorf("%s (level=%d, dict=%q): mismatch at %d, 0x%02x versus 0x%02x\n", fn, level, d, i, b0[i], b1[i])
+ return
+ }
+ }
+}
+
+func TestWriter(t *testing.T) {
+ for i, s := range data {
+ b := []byte(s)
+ tag := fmt.Sprintf("#%d", i)
+ testLevelDict(t, tag, b, DefaultCompression, "")
+ testLevelDict(t, tag, b, NoCompression, "")
+ for level := BestSpeed; level <= BestCompression; level++ {
+ testLevelDict(t, tag, b, level, "")
+ }
+ }
+}
+
+func TestWriterBig(t *testing.T) {
+ for _, fn := range filenames {
+ testFileLevelDict(t, fn, DefaultCompression, "")
+ testFileLevelDict(t, fn, NoCompression, "")
+ for level := BestSpeed; level <= BestCompression; level++ {
+ testFileLevelDict(t, fn, level, "")
+ }
+ }
+}
+
+func TestWriterDict(t *testing.T) {
+ const dictionary = "0123456789."
+ for _, fn := range filenames {
+ testFileLevelDict(t, fn, DefaultCompression, dictionary)
+ testFileLevelDict(t, fn, NoCompression, dictionary)
+ for level := BestSpeed; level <= BestCompression; level++ {
+ testFileLevelDict(t, fn, level, dictionary)
+ }
+ }
+}
+
+func TestWriterDictIsUsed(t *testing.T) {
+ var input = []byte("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
+ buf := bytes.NewBuffer(nil)
+ compressor, err := NewWriterDict(buf, BestCompression, input)
+ if err != nil {
+ t.Errorf("error in NewWriterDict: %s", err)
+ return
+ }
+ compressor.Write(input)
+ compressor.Close()
+ const expectedMaxSize = 25
+ output := buf.Bytes()
+ if len(output) > expectedMaxSize {
+ t.Errorf("result too large (got %d, want <= %d bytes). Is the dictionary being used?", len(output), expectedMaxSize)
+ }
+}