diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
commit | 5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch) | |
tree | c0650497e988f47be9c6f2324fa692a52dea82e1 /src/pkg/compress | |
parent | 80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff) | |
download | golang-upstream/60.tar.gz |
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/pkg/compress')
33 files changed, 5921 insertions, 0 deletions
diff --git a/src/pkg/compress/bzip2/Makefile b/src/pkg/compress/bzip2/Makefile new file mode 100644 index 000000000..a4bceef16 --- /dev/null +++ b/src/pkg/compress/bzip2/Makefile @@ -0,0 +1,14 @@ +# Copyright 2011 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/bzip2 +GOFILES=\ + bit_reader.go\ + bzip2.go\ + huffman.go\ + move_to_front.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/bzip2/bit_reader.go b/src/pkg/compress/bzip2/bit_reader.go new file mode 100644 index 000000000..50f0ec836 --- /dev/null +++ b/src/pkg/compress/bzip2/bit_reader.go @@ -0,0 +1,88 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "bufio" + "io" + "os" +) + +// bitReader wraps an io.Reader and provides the ability to read values, +// bit-by-bit, from it. Its Read* methods don't return the usual os.Error +// because the error handling was verbose. Instead, any error is kept and can +// be checked afterwards. +type bitReader struct { + r byteReader + n uint64 + bits uint + err os.Error +} + +// bitReader needs to read bytes from an io.Reader. We attempt to cast the +// given io.Reader to this interface and, if it doesn't already fit, we wrap in +// a bufio.Reader. +type byteReader interface { + ReadByte() (byte, os.Error) +} + +func newBitReader(r io.Reader) bitReader { + byter, ok := r.(byteReader) + if !ok { + byter = bufio.NewReader(r) + } + return bitReader{r: byter} +} + +// ReadBits64 reads the given number of bits and returns them in the +// least-significant part of a uint64. In the event of an error, it returns 0 +// and the error can be obtained by calling Error(). +func (br *bitReader) ReadBits64(bits uint) (n uint64) { + for bits > br.bits { + b, err := br.r.ReadByte() + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + if err != nil { + br.err = err + return 0 + } + br.n <<= 8 + br.n |= uint64(b) + br.bits += 8 + } + + // br.n looks like this (assuming that br.bits = 14 and bits = 6): + // Bit: 111111 + // 5432109876543210 + // + // (6 bits, the desired output) + // |-----| + // V V + // 0101101101001110 + // ^ ^ + // |------------| + // br.bits (num valid bits) + // + // This the next line right shifts the desired bits into the + // least-significant places and masks off anything above. + n = (br.n >> (br.bits - bits)) & ((1 << bits) - 1) + br.bits -= bits + return +} + +func (br *bitReader) ReadBits(bits uint) (n int) { + n64 := br.ReadBits64(bits) + return int(n64) +} + +func (br *bitReader) ReadBit() bool { + n := br.ReadBits(1) + return n != 0 +} + +func (br *bitReader) Error() os.Error { + return br.err +} diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go new file mode 100644 index 000000000..8b4572306 --- /dev/null +++ b/src/pkg/compress/bzip2/bzip2.go @@ -0,0 +1,390 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package bzip2 implements bzip2 decompression. +package bzip2 + +import ( + "io" + "os" +) + +// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot +// of guessing: http://en.wikipedia.org/wiki/Bzip2 +// The source code to pyflate was useful for debugging: +// http://www.paul.sladen.org/projects/pyflate + +// A StructuralError is returned when the bzip2 data is found to be +// syntactically invalid. +type StructuralError string + +func (s StructuralError) String() string { + return "bzip2 data invalid: " + string(s) +} + +// A reader decompresses bzip2 compressed data. +type reader struct { + br bitReader + setupDone bool // true if we have parsed the bzip2 header. + blockSize int // blockSize in bytes, i.e. 900 * 1024. + eof bool + buf []byte // stores Burrows-Wheeler transformed data. + c [256]uint // the `C' array for the inverse BWT. + tt []uint32 // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits. + tPos uint32 // Index of the next output byte in tt. + + preRLE []uint32 // contains the RLE data still to be processed. + preRLEUsed int // number of entries of preRLE used. + lastByte int // the last byte value seen. + byteRepeats uint // the number of repeats of lastByte seen. + repeats uint // the number of copies of lastByte to output. +} + +// NewReader returns an io.Reader which decompresses bzip2 data from r. +func NewReader(r io.Reader) io.Reader { + bz2 := new(reader) + bz2.br = newBitReader(r) + return bz2 +} + +const bzip2FileMagic = 0x425a // "BZ" +const bzip2BlockMagic = 0x314159265359 +const bzip2FinalMagic = 0x177245385090 + +// setup parses the bzip2 header. +func (bz2 *reader) setup() os.Error { + br := &bz2.br + + magic := br.ReadBits(16) + if magic != bzip2FileMagic { + return StructuralError("bad magic value") + } + + t := br.ReadBits(8) + if t != 'h' { + return StructuralError("non-Huffman entropy encoding") + } + + level := br.ReadBits(8) + if level < '1' || level > '9' { + return StructuralError("invalid compression level") + } + + bz2.blockSize = 100 * 1024 * (int(level) - '0') + bz2.tt = make([]uint32, bz2.blockSize) + return nil +} + +func (bz2 *reader) Read(buf []byte) (n int, err os.Error) { + if bz2.eof { + return 0, os.EOF + } + + if !bz2.setupDone { + err = bz2.setup() + brErr := bz2.br.Error() + if brErr != nil { + err = brErr + } + if err != nil { + return 0, err + } + bz2.setupDone = true + } + + n, err = bz2.read(buf) + brErr := bz2.br.Error() + if brErr != nil { + err = brErr + } + return +} + +func (bz2 *reader) read(buf []byte) (n int, err os.Error) { + // bzip2 is a block based compressor, except that it has a run-length + // preprocessing step. The block based nature means that we can + // preallocate fixed-size buffers and reuse them. However, the RLE + // preprocessing would require allocating huge buffers to store the + // maximum expansion. Thus we process blocks all at once, except for + // the RLE which we decompress as required. + + for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) { + // We have RLE data pending. + + // The run-length encoding works like this: + // Any sequence of four equal bytes is followed by a length + // byte which contains the number of repeats of that byte to + // include. (The number of repeats can be zero.) Because we are + // decompressing on-demand our state is kept in the reader + // object. + + if bz2.repeats > 0 { + buf[n] = byte(bz2.lastByte) + n++ + bz2.repeats-- + if bz2.repeats == 0 { + bz2.lastByte = -1 + } + continue + } + + bz2.tPos = bz2.preRLE[bz2.tPos] + b := byte(bz2.tPos) + bz2.tPos >>= 8 + bz2.preRLEUsed++ + + if bz2.byteRepeats == 3 { + bz2.repeats = uint(b) + bz2.byteRepeats = 0 + continue + } + + if bz2.lastByte == int(b) { + bz2.byteRepeats++ + } else { + bz2.byteRepeats = 0 + } + bz2.lastByte = int(b) + + buf[n] = b + n++ + } + + if n > 0 { + return + } + + // No RLE data is pending so we need to read a block. + + br := &bz2.br + magic := br.ReadBits64(48) + if magic == bzip2FinalMagic { + br.ReadBits64(32) // ignored CRC + bz2.eof = true + return 0, os.EOF + } else if magic != bzip2BlockMagic { + return 0, StructuralError("bad magic value found") + } + + err = bz2.readBlock() + if err != nil { + return 0, err + } + + return bz2.read(buf) +} + +// readBlock reads a bzip2 block. The magic number should already have been consumed. +func (bz2 *reader) readBlock() (err os.Error) { + br := &bz2.br + br.ReadBits64(32) // skip checksum. TODO: check it if we can figure out what it is. + randomized := br.ReadBits(1) + if randomized != 0 { + return StructuralError("deprecated randomized files") + } + origPtr := uint(br.ReadBits(24)) + + // If not every byte value is used in the block (i.e., it's text) then + // the symbol set is reduced. The symbols used are stored as a + // two-level, 16x16 bitmap. + symbolRangeUsedBitmap := br.ReadBits(16) + symbolPresent := make([]bool, 256) + numSymbols := 0 + for symRange := uint(0); symRange < 16; symRange++ { + if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 { + bits := br.ReadBits(16) + for symbol := uint(0); symbol < 16; symbol++ { + if bits&(1<<(15-symbol)) != 0 { + symbolPresent[16*symRange+symbol] = true + numSymbols++ + } + } + } + } + + // A block uses between two and six different Huffman trees. + numHuffmanTrees := br.ReadBits(3) + if numHuffmanTrees < 2 || numHuffmanTrees > 6 { + return StructuralError("invalid number of Huffman trees") + } + + // The Huffman tree can switch every 50 symbols so there's a list of + // tree indexes telling us which tree to use for each 50 symbol block. + numSelectors := br.ReadBits(15) + treeIndexes := make([]uint8, numSelectors) + + // The tree indexes are move-to-front transformed and stored as unary + // numbers. + mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees) + for i := range treeIndexes { + c := 0 + for { + inc := br.ReadBits(1) + if inc == 0 { + break + } + c++ + } + if c >= numHuffmanTrees { + return StructuralError("tree index too large") + } + treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c)) + } + + // The list of symbols for the move-to-front transform is taken from + // the previously decoded symbol bitmap. + symbols := make([]byte, numSymbols) + nextSymbol := 0 + for i := 0; i < 256; i++ { + if symbolPresent[i] { + symbols[nextSymbol] = byte(i) + nextSymbol++ + } + } + mtf := newMTFDecoder(symbols) + + numSymbols += 2 // to account for RUNA and RUNB symbols + huffmanTrees := make([]huffmanTree, numHuffmanTrees) + + // Now we decode the arrays of code-lengths for each tree. + lengths := make([]uint8, numSymbols) + for i := 0; i < numHuffmanTrees; i++ { + // The code lengths are delta encoded from a 5-bit base value. + length := br.ReadBits(5) + for j := 0; j < numSymbols; j++ { + for { + if !br.ReadBit() { + break + } + if br.ReadBit() { + length-- + } else { + length++ + } + } + if length < 0 || length > 20 { + return StructuralError("Huffman length out of range") + } + lengths[j] = uint8(length) + } + huffmanTrees[i], err = newHuffmanTree(lengths) + if err != nil { + return err + } + } + + selectorIndex := 1 // the next tree index to use + currentHuffmanTree := huffmanTrees[treeIndexes[0]] + bufIndex := 0 // indexes bz2.buf, the output buffer. + // The output of the move-to-front transform is run-length encoded and + // we merge the decoding into the Huffman parsing loop. These two + // variables accumulate the repeat count. See the Wikipedia page for + // details. + repeat := 0 + repeat_power := 0 + + // The `C' array (used by the inverse BWT) needs to be zero initialized. + for i := range bz2.c { + bz2.c[i] = 0 + } + + decoded := 0 // counts the number of symbols decoded by the current tree. + for { + if decoded == 50 { + currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]] + selectorIndex++ + decoded = 0 + } + + v := currentHuffmanTree.Decode(br) + decoded++ + + if v < 2 { + // This is either the RUNA or RUNB symbol. + if repeat == 0 { + repeat_power = 1 + } + repeat += repeat_power << v + repeat_power <<= 1 + + // This limit of 2 million comes from the bzip2 source + // code. It prevents repeat from overflowing. + if repeat > 2*1024*1024 { + return StructuralError("repeat count too large") + } + continue + } + + if repeat > 0 { + // We have decoded a complete run-length so we need to + // replicate the last output symbol. + for i := 0; i < repeat; i++ { + b := byte(mtf.First()) + bz2.tt[bufIndex] = uint32(b) + bz2.c[b]++ + bufIndex++ + } + repeat = 0 + } + + if int(v) == numSymbols-1 { + // This is the EOF symbol. Because it's always at the + // end of the move-to-front list, and never gets moved + // to the front, it has this unique value. + break + } + + // Since two metasymbols (RUNA and RUNB) have values 0 and 1, + // one would expect |v-2| to be passed to the MTF decoder. + // However, the front of the MTF list is never referenced as 0, + // it's always referenced with a run-length of 1. Thus 0 + // doesn't need to be encoded and we have |v-1| in the next + // line. + b := byte(mtf.Decode(int(v - 1))) + bz2.tt[bufIndex] = uint32(b) + bz2.c[b]++ + bufIndex++ + } + + if origPtr >= uint(bufIndex) { + return StructuralError("origPtr out of bounds") + } + + // We have completed the entropy decoding. Now we can perform the + // inverse BWT and setup the RLE buffer. + bz2.preRLE = bz2.tt[:bufIndex] + bz2.preRLEUsed = 0 + bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:]) + bz2.lastByte = -1 + bz2.byteRepeats = 0 + bz2.repeats = 0 + + return nil +} + +// inverseBWT implements the inverse Burrows-Wheeler transform as described in +// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2. +// In that document, origPtr is called `I' and c is the `C' array after the +// first pass over the data. It's an argument here because we merge the first +// pass with the Huffman decoding. +// +// This also implements the `single array' method from the bzip2 source code +// which leaves the output, still shuffled, in the bottom 8 bits of tt with the +// index of the next byte in the top 24-bits. The index of the first byte is +// returned. +func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 { + sum := uint(0) + for i := 0; i < 256; i++ { + sum += c[i] + c[i] = sum - c[i] + } + + for i := range tt { + b := tt[i] & 0xff + tt[c[b]] |= uint32(i) << 8 + c[b]++ + } + + return tt[origPtr] >> 8 +} diff --git a/src/pkg/compress/bzip2/bzip2_test.go b/src/pkg/compress/bzip2/bzip2_test.go new file mode 100644 index 000000000..156eea83f --- /dev/null +++ b/src/pkg/compress/bzip2/bzip2_test.go @@ -0,0 +1,158 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "bytes" + "encoding/hex" + "io" + "io/ioutil" + "os" + "testing" +) + +func TestBitReader(t *testing.T) { + buf := bytes.NewBuffer([]byte{0xaa}) + br := newBitReader(buf) + if n := br.ReadBits(1); n != 1 { + t.Errorf("read 1 wrong") + } + if n := br.ReadBits(1); n != 0 { + t.Errorf("read 2 wrong") + } + if n := br.ReadBits(1); n != 1 { + t.Errorf("read 3 wrong") + } + if n := br.ReadBits(1); n != 0 { + t.Errorf("read 4 wrong") + } +} + +func TestBitReaderLarge(t *testing.T) { + buf := bytes.NewBuffer([]byte{0x12, 0x34, 0x56, 0x78}) + br := newBitReader(buf) + if n := br.ReadBits(32); n != 0x12345678 { + t.Errorf("got: %x want: %x", n, 0x12345678) + } +} + +func readerFromHex(s string) io.Reader { + data, err := hex.DecodeString(s) + if err != nil { + panic("readerFromHex: bad input") + } + return bytes.NewBuffer(data) +} + +func decompressHex(s string) (out []byte, err os.Error) { + r := NewReader(readerFromHex(s)) + return ioutil.ReadAll(r) +} + +func TestHelloWorldBZ2(t *testing.T) { + out, err := decompressHex(helloWorldBZ2Hex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + if !bytes.Equal(helloWorld, out) { + t.Errorf("got %x, want %x", out, helloWorld) + } +} + +func testZeros(t *testing.T, inHex string, n int) { + out, err := decompressHex(inHex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected := make([]byte, n) + + if !bytes.Equal(expected, out) { + allZeros := true + for _, b := range out { + if b != 0 { + allZeros = false + break + } + } + t.Errorf("incorrect result, got %d bytes (allZeros: %t)", len(out), allZeros) + } +} + +func Test32Zeros(t *testing.T) { + testZeros(t, thirtyTwoZerosBZ2Hex, 32) +} + +func Test1MBZeros(t *testing.T) { + testZeros(t, oneMBZerosBZ2Hex, 1024*1024) +} + +func testRandomData(t *testing.T, compressedHex, uncompressedHex string) { + out, err := decompressHex(compressedHex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected, _ := hex.DecodeString(uncompressedHex) + + if !bytes.Equal(out, expected) { + t.Errorf("incorrect result\ngot: %x\nwant: %x", out, expected) + } +} + +func TestRandomData1(t *testing.T) { + testRandomData(t, randBZ2Hex, randHex) +} + +func TestRandomData2(t *testing.T) { + // This test involves several repeated bytes in the output, but they + // should trigger RLE decoding. + testRandomData(t, rand2BZ2Hex, rand2Hex) +} + +func TestRandomData3(t *testing.T) { + // This test uses the full range of symbols. + testRandomData(t, rand3BZ2Hex, rand3Hex) +} + +func Test1MBSawtooth(t *testing.T) { + out, err := decompressHex(oneMBSawtoothBZ2Hex) + if err != nil { + t.Errorf("error from Read: %s", err) + return + } + + expected := make([]byte, 1024*1024) + + for i := range expected { + expected[i] = byte(i) + } + + if !bytes.Equal(out, expected) { + t.Error("incorrect result") + } +} + +const helloWorldBZ2Hex = "425a68393141592653594eece83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb9229c28482776741b0" + +var helloWorld = []byte("hello world\n") + +const thirtyTwoZerosBZ2Hex = "425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098" +const oneMBZerosBZ2Hex = "425a683931415926535938571ce50008084000c0040008200030cc0529a60806c4201e2ee48a70a12070ae39ca" + +const randBZ2Hex = "425a6839314159265359905d990d0001957fffffffffffafffffffffffffffffbfff6fffdfffffffffffffffffffffffffffffc002b6dd75676ed5b77720098320d11a64626981323d4da47a83131a13d09e8040f534cd4f4d27a464d193008cd09804601347a980026350c9886234d36864193d1351b44c136919e90340d26127a4cd264c32023009898981310c0344c340027a8303427a99a04c00003534c230d034f5006468d268cf54d36a3009a69a62626261311b40026013d34201a6934c9a604c98ca6c8460989fa9346234d30d3469a2604fd4131a7aa6d0046043d4c62098479269e89e835190d018d4c046001a11e801a0264792321932308c43a130688c260d46686804cd01a9e80981193684c6a68c00000004c4c20c04627a4c0000260003400d04c0681a01334026009a6f48041466132581ec5212b081d96b0effc16543e2228b052fcd30f2567ee8d970e0f10aabca68dd8270591c376cfc1baae0dba00aaff2d6caf6b211322c997cc18eaee5927f75185336bf907021324c71626c1dd20e22b9b0977f05d0f901eaa51db9fbaf7c603b4c87bc82890e6dd7e61d0079e27ec050dd788fd958152061cd01e222f9547cb9efc465d775b6fc98bac7d387bffd151ae09dadf19494f7a638e2eae58e550faba5fe6820ea520eb986096de4e527d80def3ba625e71fbefdcf7e7844e0a25d29b52dcd1344fca083737d42692aab38d230485f3c8ed54c2ed31f15cf0270c8143765b10b92157233fa1dfe0d7ce8ffe70b8b8f7250071701dfe9f1c94de362c9031455951c93eb098a6b50ee45c6131fefc3b6f9643e21f4adc59497138e246f5c57d834aa67c4f10d8bd8b3908d8130dd7388409c299a268eab3664fa4907c5c31574874bd8d388a4ab22b339660804e53e1b8d05867d40e3082560608d35d5d2c6054e8bab23da28f61f83efd41d25529ad6ea15fb50505cacfabb0902166427354ca3830a2c8415f21b19e592690fbe447020d685a4bcd16ecc4ff1a1c0e572627d0ef6265c008a43fc243240541061ed7840606be466d1c0dac2c53250ed567507d926c844154560d631960c65e15157829b2c7f16859f111a3a8cb72bf24ffa57a680c3be67b1be67c8dd8aea73ac2437a78df5b686d427080ebc01bd30b71a49f6ea31dc0f08e4849e38face96717690239538bc08b6cc5aa8d467cb9c36aa83d40ac7e58bddbfa185b22065e89a86c0145569d9e23726651aec49e31588d70f40fe9a4449dcf4f89eac220171e9c938e803dc195679651004b79ad33cc0c13aeeba5941b33ffeeb8fbe16e76c7811445c67b4269c90479433ddf9e8ed1d00c166b6c17217fb22c3ef1b0c1c7e28e185446a111c37f1ea6c07a59fbcc6546ecc6968d36ba58bc5489a5640647e426b0c39350cb6f07d5dc7a717648c4ec7f841467597ae1f65f408fd2d9940a4b1b860b3c9ae351dcae0b4425f7e8538710f2e40b7f70d13b51ac05ccc6ecda8264a88cad2d721d18132a9b9110a9e759c2483c77dcefc7e464ec88588174cb0c9abff93230ea0bed8decdd8ed8bfe2b5df0a253803678df04fab44c03b9ab7cc97d6e6d6fd0c4c840ce0efc498436f453bbb181603459471f2b588724592b222ec990614db530e10cadd84705621cfdd9261fa44a5f5806a2d74b575056b3c915255c65678f9c16e6dc00a99180fef1a840aff0e842ac02731080cc92782538360a60a727991013984da4fad95f79d5030677b7528d076b2483685fca4429edf804682fdc110dfc2f7c30e23e20a72e039108a0ad6fdee2f76985a4b4be4f5afc6101bf9d5042b657a05dc914e1424241766434" +const randHex = "c95138082bdf2b9bfa5b1072b23f729735d42c785eeb94320fb14c265b9c2ca421d01a3db986df1ac2acde5a0e6bf955d6f95e61261540905928e195f1a66644cc7f37281744fff4dc6df35566a494c41a8167151950eb74f5fc45f85ad0e5ed28b49adfe218aa7ec1707e8e1d55825f61f72beda3b4c006b8c9188d7336a5d875329b1b58c27cc4e89ecbae02c7712400c39dd131d2c6de82e2863da51d472bdfb21ecce62cc9cf769ed28aedc7583d755da45a0d90874bda269dd53283a9bdfd05f95fc8e9a304bb338ea1a2111894678c18134f17d31a15d9bfc1237894650f3e715e2548639ecbddb845cfe4a46a7b3a3c540f48629488e8c869f1e9f3f4c552243a8105b20eb8e264994214349dae83b165fd6c2a5b8e83fce09fc0a80d3281c8d53a9a08095bd19cbc1388df23975646ed259e003d39261ee68cbece8bcf32971f7fe7e588e8ba8f5e8597909abaea693836a79a1964050ed910a45a0f13a58cd2d3ae18992c5b23082407fd920d0bf01e33118a017bb5e39f44931346845af52128f7965206759433a346034ea481671f501280067567619f5ecef6cded077f92ed7f3b3ce8e308c80f34ba06939e9303f91b4318c8c1dd4cc223c1f057ac0c91211c629cd30e46ee9ec1d9fd493086b7bc2bc83e33f08749a5d430b0ed4f79d70f481940c9b0930b16321886a0df4fa5a1465d5208c7d3494a7987d9a5e42aa256f0c9523947f8318d0ef0af3d59a45cfc2418d0785c9a548b32b81e7de18be7d55a69a4c156bbb3d7579c0ac8e9c72b24646e54b0d0e8725f8f49fb44ae3c6b9d0287be118586255a90a4a83483ed0328518037e52aa959c5748ed83e13023e532306be98b8288da306bbb040bcf5d92176f84a9306dc6b274b040370b61d71fde58dd6d20e6fee348eae0c54bd0a5a487b2d005f329794f2a902c296af0a4c1f638f63292a1fa18e006c1b1838636f4de71c73635b25660d32e88a0917e1a5677f6a02ca65585b82cbd99fb4badbfa97a585da1e6cadf6737b4ec6ca33f245d66ee6a9fae6785d69b003c17b9fc6ec34fe5824ab8caae5e8e14dc6f9e116e7bf4a60c04388783c8ae929e1b46b3ef3bbe81b38f2fa6da771bf39dfba2374d3d2ed356b8e2c42081d885a91a3afb2f31986d2f9873354c48cf5448492c32e62385af423aa4f83db6d1b2669650379a1134b0a04cbca0862d6f9743c791cbb527d36cd5d1f0fc7f503831c8bd1b7a0ef8ae1a5ed1155dfdd9e32b6bb33138112d3d476b802179cb85a2a6c354ccfed2f31604fbd8d6ec4baf9f1c8454f72c6588c06a7df3178c43a6970bfa02dd6f74cb5ec3b63f9eddaa17db5cbf27fac6de8e57c384afd0954179f7b5690c3bee42abc4fa79b4b12101a9cf5f0b9aecdda945def0bd04163237247d3539850e123fe18139f316fa0256d5bd2faa8" + +const oneMBSawtoothBZ2Hex = "425a683931415926535971931ea00006ddffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe007de00000000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000009aaaaa0000000000000000000000000000000000000000000000000000000000000000498002600026000000000000000000000000000000000000000000000000000000007fc42271980d044c0a822607411304a08982d044c1a82260f411308a08984d044c2a82261741130ca08986d044c3a82261f411310a08988d044c4a822627411314a0898ad044c5a82262f411318a0898cd044c6a82263741131ca0898ed044c7a82263f411320a08990d044c8a822647411324a08992d044c9a82264f411328a08994d044caa82265741132ca08996d044cba82265f411330a08998d044cca822667411334a0899ad044cda82266f411338a0899cd044cea82267741133ca0899ed044cfa82267f411340a089a0d044d0a822687411344a089a2d044d1a82268f411348a089a4d044d2a82269741134ca089a6d044d3a82269f411350a089a8d044d4a8226a7411354a089aad044d5a8226af411358a089acd044d6a8226b741135ca089aed044d7a8226bf411360a089b0d044d8a8226c7411364a089b2d044d9a8226cf411368a089b4d044daa8226d741136ca089b6d044dba8226df411370a089b8d044dca8226e7411374a089bad044dda8226ef411378a089bcd044dea8226f741137ca089bed044dfa8226ff411380a089c0d044e0a822707411384a089c2d044e1a82270f411388a089c4d044e2a82271741138ca089c59089c69089c71089c79089c81089c89089c91089c99089ca1089ca9089cb1089cb9089cc1089cc9089cd1089cd9089ce1089ce9089cf1089cf9089d01089d09089d11089d19089d21089d29089d31089d39089d41089d49089d51089d59089d61089d69089d71089d79089d81089d89089d91089d99089da1089da9089db1089db9089dc1089dc9089dd1089dd9089de1089de9089df1089df9089e01089e09089e11089e19089e21089e29089e31089e39089e41089e49089e51089e59089e61089e69089e71089e79089e81089e89089e91089e99089ea1089ea9089eb1089eb9089ec1089ec9089ed1089ed9089ee1089ee9089ef1089ef9089f01089f09089f11089f19089f21089f29089f31089f39089f41089f49089f51089f59089f61089f69089f71089f79089f81089f89089f91089f99089fa1089fa9089fb1089fb9089fc1089fc9089fd1089fd9089fe1089fe9089ff1089ff98a0ac9329acf23ba884804fdd3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0034f800000000000024c00130001300000000000000000000000000000000000000000000000000000000126000980009800000000000000000000000000000000000000000000000000000000930004c0004c000000000000000000000000000000000000000000000000000000004980026000260000000000000000000000000000000000000000000000000000000024c0013000130000000000000000000000000000000000000000000000000000000002955540000000000000000000000000000000000000000000000000000000000000001ff108c00846024230221181908c108460a4230621183908c20846124230a21185908c308461a4230e21187908c40846224231221189908c508462a423162118b908c60846324231a2118d908c708463a4231e2118f908c80846424232221191908c908464a4232621193908ca0846524232a21195908cb08465a4232e21197908cc0846624233221199908cd08466a423362119b908ce0846724233a2119d908cf08467a4233e2119f908d008468242342211a1908d108468a42346211a3908d20846924234a211a5908d308469a4234e211a7908d40846a242352211a9908d50846aa42356211ab908d60846b24235a211ad908d70846ba4235e211af908d80846c242362211b1908d90846ca42366211b3908da0846d24236a211b5908db0846da4236e211b7908dc0846e242372211b9908dd0846ea42376211bb908de0846f24237a211bd908df0846fa4237e211bf908e008470242382211c1908e108470a42386211c3908e20847124238a211c5908e2f8c211c6c8471d211c7c84721211c8c84725211c9c84729211cac8472d211cbc84731211ccc84735211cdc84739211cec8473d211cfc84741211d0c84745211d1c84749211d2c8474d211d3c84751211d4c84755211d5c84759211d6c8475d211d7c84761211d8c84765211d9c84769211dac8476d211dbc84771211dcc84775211ddc84779211dec8477d211dfc84781211e0c84785211e1c84789211e2c8478d211e3c84791211e4c84795211e5c84799211e6c8479d211e7c847a1211e8c847a5211e9c847a9211eac847ad211ebc847b1211ecc847b5211edc847b9211eec847bd211efc847c1211f0c847c5211f1c847c9211f2c847cd211f3c847d1211f4c847d5211f5c847d9211f6c847dd211f7c847e1211f8c847e5211f9c847e9211fac847ed211fbc847f1211fcc847f5211fdc847f9211fec847fd211ff8bb9229c284803a8b6248" + +const rand2BZ2Hex = "425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e042801099210094806c0110002e70806402000546034000034000000f2830000032000d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d73a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60" +const rand2Hex = "92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f8719e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335ba" + +const rand3BZ2Hex = "425a68393141592653593be669d00000327ffffffffffffffffffffffffffffffffffff7ffffffffffffffffffffffffffffffc002b3b2b1b6e2bae400004c00132300004c0d268c004c08c0130026001a008683234c0684c34008c230261a04c0260064d07a8d00034000d27a1268c9931a8d327a3427a41faa69ea0da264c1a34219326869b51b49a6469a3268c689fa53269a62794687a9a68f5189994c9e487a8f534fd49a3d34043629e8c93d04da4f4648d30d4f44d3234c4d3023d0840680984d309934c234d3131a000640984f536a6132601300130130c8d00d04d1841ea7a8d31a02609b40023460010c01a34d4c1a0d04d3069306810034d0d0d4c0046130d034d0131a9a64d321804c68003400098344c13000991808c0001a00000000098004d3d4da4604c47a13012140aadf8d673c922c607ef6212a8c0403adea4b28aee578900e653b9cdeb8d11e6b838815f3ebaad5a01c5408d84a332170aff8734d4e06612d3c2889f31925fb89e33561f5100ae89b1f7047102e729373d3667e58d73aaa80fa7be368a1cc2dadd81d81ec8e1b504bd772ca31d03649269b01ceddaca07bf3d4eba24de141be3f86f93601e03714c0f64654671684f9f9528626fd4e1b76753dc0c54b842486b8d59d8ab314e86ca818e7a1f079463cbbd70d9b79b283c7edc419406311022e4be98c2c1374df9cdde2d008ce1d00e5f06ad1024baf555631f70831fc1023034e62be7c4bcb648caf276963ffa20e96bb50377fe1c113da0db4625b50741c35a058edb009c6ee5dbf93b8a6b060eec568180e8db791b82aab96cbf4326ca98361461379425ba8dcc347be670bdba7641883e5526ae3d833f6e9cb9bac9557747c79e206151072f7f0071dff3880411846f66bf4075c7462f302b53cb3400a74cf35652ad5641ed33572fd54e7ed7f85f58a0acba89327e7c6be5c58cb71528b99df2431f1d0358f8d28d81d95292da631fb06701decabb205fac59ff0fb1df536afc681eece6ea658c4d9eaa45f1342aa1ff70bdaff2ddaf25ec88c22f12829a0553db1ec2505554cb17d7b282e213a5a2aa30431ded2bce665bb199d023840832fedb2c0c350a27291407ff77440792872137df281592e82076a05c64c345ffb058c64f7f7c207ef78420b7010520610f17e302cc4dfcfaef72a0ed091aab4b541eb0531bbe941ca2f792bf7b31ca6162882b68054a8470115bc2c19f2df2023f7800432b39b04d3a304e8085ba3f1f0ca5b1ba4d38d339e6084de979cdea6d0e244c6c9fa0366bd890621e3d30846f5e8497e21597b8f29bbf52c961a485dfbea647600da0fc1f25ce4d203a8352ece310c39073525044e7ac46acf2ed9120bae1b4f6f02364abfe343f80b290983160c103557af1c68416480d024cc31b6c06cfec011456f1e95c420a12b48b1c3fe220c2879a982fb099948ac440db844b9a112a5188c7783fd3b19593290785f908d95c9db4b280bafe89c1313aeec24772046d9bc089645f0d182a21184e143823c5f52de50e5d7e98d3d7ab56f5413bbccd1415c9bcff707def475b643fb7f29842582104d4cc1dbaaca8f10a2f44273c339e0984f2b1e06ab2f0771db01fafa8142298345f3196f23e5847bda024034b6f59b11c29e981c881456e40d211929fd4f766200258aad8212016322bd5c605790dcfdf1bd2a93d99c9b8f498722d311d7eae7ff420496a31804c55f4759a7b13aaaf5f7ce006c3a8a998897d5e0a504398c2b627852545baf440798bcc5cc049357cf3f17d9771e4528a1af3d77dc794a11346e1bdf5efe37a405b127b4c43b616d61fbc5dc914e14240ef99a7400" +const rand3Hex = "1744b384d68c042371244e13500d4bfb98c6244e3d71a5b700224420b59c593553f33bd786e3d0ce31626f511bc985f59d1a88aa38ba8ad6218d306abee60dd9172540232b95be1af146c69e72e5fde667a090dc3f93bdc5c5af0ab80acdbaa7a505f628c59dc0247b31a439cacf5010a94376d71521df08c178b02fb96fdb1809144ea38c68536187c53201fea8631fb0a880b4451ccdca7cc61f6aafca21cc7449d920599db61789ac3b1e164b3390124f95022aeea39ccca3ec1053f4fa10de2978e2861ea58e477085c2220021a0927aa94c5d0006b5055abba340e4f9eba22e969978dfd18e278a8b89d877328ae34268bc0174cfe211954c0036f078025217d1269fac1932a03b05a0b616012271bbe1fb554171c7a59b196d8a4479f45a77931b5d97aaf6c0c673cbe597b79b96e2a0c1eae2e66e46ccc8c85798e23ffe972ebdaa3f6caea243c004e60321eb47cd79137d78fd0613be606feacc5b3637bdc96a89c13746db8cad886f3ccf912b2178c823bcac395f06d28080269bdca2debf3419c66c690fd1adcfbd53e32e79443d7a42511a84cb22ca94fffad9149275a075b2f8ae0b021dcde9bf62b102db920733b897560518b06e1ad7f4b03458493ddaa7f4fa2c1609f7a1735aeeb1b3e2cea3ab45fc376323cc91873b7e9c90d07c192e38d3f5dfc9bfab1fd821c854da9e607ea596c391c7ec4161c6c4493929a8176badaa5a5af7211c623f29643a937677d3df0da9266181b7c4da5dd40376db677fe8f4a1dc456adf6f33c1e37cec471dd318c2647644fe52f93707a77da7d1702380a80e14cc0fdce7bf2eed48a529090bae0388ee277ce6c7018c5fb00b88362554362205c641f0d0fab94fd5b8357b5ff08b207fee023709bc126ec90cfb17c006754638f8186aaeb1265e80be0c1189ec07d01d5f6f96cb9ce82744147d18490de7dc72862f42f024a16968891a356f5e7e0e695d8c933ba5b5e43ad4c4ade5399bc2cae9bb6189b7870d7f22956194d277f28b10e01c10c6ffe3e065f7e2d6d056aa790db5649ca84dc64c35566c0af1b68c32b5b7874aaa66467afa44f40e9a0846a07ae75360a641dd2acc69d93219b2891f190621511e62a27f5e4fbe641ece1fa234fc7e9a74f48d2a760d82160d9540f649256b169d1fed6fbefdc491126530f3cbad7913e19fbd7aa53b1e243fbf28d5f38c10ebd77c8b986775975cc1d619efb27cdcd733fa1ca36cffe9c0a33cc9f02463c91a886601fd349efee85ef1462065ef9bd2c8f533220ad93138b8382d5938103ab25b2d9af8ae106e1211eb9b18793fba033900c809c02cd6d17e2f3e6fc84dae873411f8e87c3f0a8f1765b7825d185ce3730f299c3028d4a62da9ee95c2b870fb70c79370d485f9d5d9acb78926d20444033d960524d2776dc31988ec7c0dbf23b9905d" diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go new file mode 100644 index 000000000..dc05739c7 --- /dev/null +++ b/src/pkg/compress/bzip2/huffman.go @@ -0,0 +1,223 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +import ( + "os" + "sort" +) + +// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a +// symbol. +type huffmanTree struct { + // nodes contains all the non-leaf nodes in the tree. nodes[0] is the + // root of the tree and nextNode contains the index of the next element + // of nodes to use when the tree is being constructed. + nodes []huffmanNode + nextNode int +} + +// A huffmanNode is a node in the tree. left and right contain indexes into the +// nodes slice of the tree. If left or right is invalidNodeValue then the child +// is a left node and its value is in leftValue/rightValue. +// +// The symbols are uint16s because bzip2 encodes not only MTF indexes in the +// tree, but also two magic values for run-length encoding and an EOF symbol. +// Thus there are more than 256 possible symbols. +type huffmanNode struct { + left, right uint16 + leftValue, rightValue uint16 +} + +// invalidNodeValue is an invalid index which marks a leaf node in the tree. +const invalidNodeValue = 0xffff + +// Decode reads bits from the given bitReader and navigates the tree until a +// symbol is found. +func (t huffmanTree) Decode(br *bitReader) (v uint16) { + nodeIndex := uint16(0) // node 0 is the root of the tree. + + for { + node := &t.nodes[nodeIndex] + bit := br.ReadBit() + // bzip2 encodes left as a true bit. + if bit { + // left + if node.left == invalidNodeValue { + return node.leftValue + } + nodeIndex = node.left + } else { + // right + if node.right == invalidNodeValue { + return node.rightValue + } + nodeIndex = node.right + } + } + + panic("unreachable") +} + +// newHuffmanTree builds a Huffman tree from a slice containing the code +// lengths of each symbol. The maximum code length is 32 bits. +func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) { + // There are many possible trees that assign the same code length to + // each symbol (consider reflecting a tree down the middle, for + // example). Since the code length assignments determine the + // efficiency of the tree, each of these trees is equally good. In + // order to minimize the amount of information needed to build a tree + // bzip2 uses a canonical tree so that it can be reconstructed given + // only the code length assignments. + + if len(lengths) < 2 { + panic("newHuffmanTree: too few symbols") + } + + var t huffmanTree + + // First we sort the code length assignments by ascending code length, + // using the symbol value to break ties. + pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths))) + for i, length := range lengths { + pairs[i].value = uint16(i) + pairs[i].length = length + } + + sort.Sort(pairs) + + // Now we assign codes to the symbols, starting with the longest code. + // We keep the codes packed into a uint32, at the most-significant end. + // So branches are taken from the MSB downwards. This makes it easy to + // sort them later. + code := uint32(0) + length := uint8(32) + + codes := huffmanCodes(make([]huffmanCode, len(lengths))) + for i := len(pairs) - 1; i >= 0; i-- { + if length > pairs[i].length { + // If the code length decreases we shift in order to + // zero any bits beyond the end of the code. + length >>= 32 - pairs[i].length + length <<= 32 - pairs[i].length + length = pairs[i].length + } + codes[i].code = code + codes[i].codeLen = length + codes[i].value = pairs[i].value + // We need to 'increment' the code, which means treating |code| + // like a |length| bit number. + code += 1 << (32 - length) + } + + // Now we can sort by the code so that the left half of each branch are + // grouped together, recursively. + sort.Sort(codes) + + t.nodes = make([]huffmanNode, len(codes)) + _, err := buildHuffmanNode(&t, codes, 0) + return t, err +} + +// huffmanSymbolLengthPair contains a symbol and its code length. +type huffmanSymbolLengthPair struct { + value uint16 + length uint8 +} + +// huffmanSymbolLengthPair is used to provide an interface for sorting. +type huffmanSymbolLengthPairs []huffmanSymbolLengthPair + +func (h huffmanSymbolLengthPairs) Len() int { + return len(h) +} + +func (h huffmanSymbolLengthPairs) Less(i, j int) bool { + if h[i].length < h[j].length { + return true + } + if h[i].length > h[j].length { + return false + } + if h[i].value < h[j].value { + return true + } + return false +} + +func (h huffmanSymbolLengthPairs) Swap(i, j int) { + h[i], h[j] = h[j], h[i] +} + +// huffmanCode contains a symbol, its code and code length. +type huffmanCode struct { + code uint32 + codeLen uint8 + value uint16 +} + +// huffmanCodes is used to provide an interface for sorting. +type huffmanCodes []huffmanCode + +func (n huffmanCodes) Len() int { + return len(n) +} + +func (n huffmanCodes) Less(i, j int) bool { + return n[i].code < n[j].code +} + +func (n huffmanCodes) Swap(i, j int) { + n[i], n[j] = n[j], n[i] +} + +// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in +// the Huffman tree at the given level. It returns the index of the newly +// constructed node. +func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err os.Error) { + test := uint32(1) << (31 - level) + + // We have to search the list of codes to find the divide between the left and right sides. + firstRightIndex := len(codes) + for i, code := range codes { + if code.code&test != 0 { + firstRightIndex = i + break + } + } + + left := codes[:firstRightIndex] + right := codes[firstRightIndex:] + + if len(left) == 0 || len(right) == 0 { + return 0, StructuralError("superfluous level in Huffman tree") + } + + nodeIndex = uint16(t.nextNode) + node := &t.nodes[t.nextNode] + t.nextNode++ + + if len(left) == 1 { + // leaf node + node.left = invalidNodeValue + node.leftValue = left[0].value + } else { + node.left, err = buildHuffmanNode(t, left, level+1) + } + + if err != nil { + return + } + + if len(right) == 1 { + // leaf node + node.right = invalidNodeValue + node.rightValue = right[0].value + } else { + node.right, err = buildHuffmanNode(t, right, level+1) + } + + return +} diff --git a/src/pkg/compress/bzip2/move_to_front.go b/src/pkg/compress/bzip2/move_to_front.go new file mode 100644 index 000000000..0ed19dec3 --- /dev/null +++ b/src/pkg/compress/bzip2/move_to_front.go @@ -0,0 +1,105 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bzip2 + +// moveToFrontDecoder implements a move-to-front list. Such a list is an +// efficient way to transform a string with repeating elements into one with +// many small valued numbers, which is suitable for entropy encoding. It works +// by starting with an initial list of symbols and references symbols by their +// index into that list. When a symbol is referenced, it's moved to the front +// of the list. Thus, a repeated symbol ends up being encoded with many zeros, +// as the symbol will be at the front of the list after the first access. +type moveToFrontDecoder struct { + // Rather than actually keep the list in memory, the symbols are stored + // as a circular, double linked list with the symbol indexed by head + // at the front of the list. + symbols []byte + next []uint8 + prev []uint8 + head uint8 +} + +// newMTFDecoder creates a move-to-front decoder with an explicit initial list +// of symbols. +func newMTFDecoder(symbols []byte) *moveToFrontDecoder { + if len(symbols) > 256 { + panic("too many symbols") + } + + m := &moveToFrontDecoder{ + symbols: symbols, + next: make([]uint8, len(symbols)), + prev: make([]uint8, len(symbols)), + } + + m.threadLinkedList() + return m +} + +// newMTFDecoderWithRange creates a move-to-front decoder with an initial +// symbol list of 0...n-1. +func newMTFDecoderWithRange(n int) *moveToFrontDecoder { + if n > 256 { + panic("newMTFDecoderWithRange: cannot have > 256 symbols") + } + + m := &moveToFrontDecoder{ + symbols: make([]uint8, n), + next: make([]uint8, n), + prev: make([]uint8, n), + } + + for i := 0; i < n; i++ { + m.symbols[i] = byte(i) + } + + m.threadLinkedList() + return m +} + +// threadLinkedList creates the initial linked-list pointers. +func (m *moveToFrontDecoder) threadLinkedList() { + if len(m.symbols) == 0 { + return + } + + m.prev[0] = uint8(len(m.symbols) - 1) + + for i := 0; i < len(m.symbols)-1; i++ { + m.next[i] = uint8(i + 1) + m.prev[i+1] = uint8(i) + } + + m.next[len(m.symbols)-1] = 0 +} + +func (m *moveToFrontDecoder) Decode(n int) (b byte) { + // Most of the time, n will be zero so it's worth dealing with this + // simple case. + if n == 0 { + return m.symbols[m.head] + } + + i := m.head + for j := 0; j < n; j++ { + i = m.next[i] + } + b = m.symbols[i] + + m.next[m.prev[i]] = m.next[i] + m.prev[m.next[i]] = m.prev[i] + m.next[i] = m.head + m.prev[i] = m.prev[m.head] + m.next[m.prev[m.head]] = i + m.prev[m.head] = i + m.head = i + + return +} + +// First returns the symbol at the front of the list. +func (m *moveToFrontDecoder) First() byte { + return m.symbols[m.head] +} diff --git a/src/pkg/compress/flate/Makefile b/src/pkg/compress/flate/Makefile new file mode 100644 index 000000000..197828a92 --- /dev/null +++ b/src/pkg/compress/flate/Makefile @@ -0,0 +1,17 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/flate +GOFILES=\ + deflate.go\ + huffman_bit_writer.go\ + huffman_code.go\ + inflate.go\ + reverse_bits.go\ + token.go\ + util.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/flate/deflate.go b/src/pkg/compress/flate/deflate.go new file mode 100644 index 000000000..b1cee0b2f --- /dev/null +++ b/src/pkg/compress/flate/deflate.go @@ -0,0 +1,493 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" + "math" + "os" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + fastCompression = 3 + BestCompression = 9 + DefaultCompression = -1 + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 3 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sence + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 15 + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing int +} + +var levels = []compressionLevel{ + {}, // 0 + // For levels 1-3 we don't bother trying with lazy matches + {3, 0, 8, 4, 4}, + {3, 0, 16, 8, 5}, + {3, 0, 32, 32, 6}, + // Levels 4-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {4, 4, 16, 16, math.MaxInt32}, + {8, 16, 32, 32, math.MaxInt32}, + {8, 16, 128, 128, math.MaxInt32}, + {8, 32, 128, 256, math.MaxInt32}, + {32, 128, 258, 1024, math.MaxInt32}, + {32, 258, 258, 4096, math.MaxInt32}, +} + +type compressor struct { + compressionLevel + + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + chainHead int + hashHead []int + hashPrev []int + + // input window: unprocessed data is window[index:windowEnd] + index int + window []byte + windowEnd int + blockStart int // window index where current tokens start + byteAvailable bool // if true, still need to process window[index-1]. + + // queued output tokens: tokens[:ti] + tokens []token + ti int + + // deflate state + length int + offset int + hash int + maxInsertIndex int + err os.Error +} + +func (d *compressor) fillDeflate(b []byte) int { + if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window, d.window[windowSize:2*windowSize]) + d.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + for i, h := range d.hashHead { + v := h - windowSize + if v < -1 { + v = -1 + } + d.hashHead[i] = v + } + for i, h := range d.hashPrev { + v := -h - windowSize + if v < -1 { + v = -1 + } + d.hashPrev[i] = v + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tokens, eof, window) + return d.w.err + } + return nil +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + w0 := win[pos] + w1 := win[pos+1] + wEnd := win[pos+length] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { + // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches + + n := 3 + for pos+n < len(win) && win[i+n] == win[pos+n] { + n++ + } + if n > length && (n > 3 || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + if i = d.hashPrev[i&windowMask]; i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) os.Error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +func (d *compressor) initDeflate() { + d.hashHead = make([]int, hashSize) + d.hashPrev = make([]int, windowSize) + d.window = make([]byte, 2*windowSize) + fillInts(d.hashHead, -1) + d.tokens = make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1) + d.length = minMatchLength - 1 + d.offset = 0 + d.byteAvailable = false + d.index = 0 + d.ti = 0 + d.hash = 0 + d.chainHead = -1 +} + +func (d *compressor) deflate() { + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) + } + +Loop: + for { + if d.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - d.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + break Loop + } + if d.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens[d.ti] = literalToken(uint32(d.window[d.index-1])) + d.ti++ + d.byteAvailable = false + } + if d.ti > 0 { + if d.err = d.writeBlock(d.tokens[0:d.ti], d.index, false); d.err != nil { + return + } + d.ti = 0 + } + break Loop + } + } + if d.index < d.maxInsertIndex { + // Update the hash + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + d.chainHead = d.hashHead[d.hash] + d.hashPrev[d.index&windowMask] = d.chainHead + d.hashHead[d.hash] = d.index + } + prevLength := d.length + prevOffset := d.offset + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if d.chainHead >= minIndex && + (d.fastSkipHashing != 0 && lookahead > minMatchLength-1 || + d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) { + if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset + } + } + if d.fastSkipHashing != 0 && d.length >= minMatchLength || + d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + if d.fastSkipHashing != 0 { + d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)) + } else { + d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)) + } + d.ti++ + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + if d.length <= d.fastSkipHashing { + var newIndex int + if d.fastSkipHashing != 0 { + newIndex = d.index + d.length + } else { + newIndex = prevLength - 1 + } + for d.index++; d.index < newIndex; d.index++ { + if d.index < d.maxInsertIndex { + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] + // Set the head of the hash chain to us. + d.hashHead[d.hash] = d.index + } + } + if d.fastSkipHashing == 0 { + d.byteAvailable = false + d.length = minMatchLength - 1 + } + } else { + // For matches this long, we don't bother inserting each individual + // item into the table. + d.index += d.length + d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) + } + if d.ti == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + return + } + d.ti = 0 + } + } else { + if d.fastSkipHashing != 0 || d.byteAvailable { + i := d.index - 1 + if d.fastSkipHashing != 0 { + i = d.index + } + d.tokens[d.ti] = literalToken(uint32(d.window[i])) + d.ti++ + if d.ti == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { + return + } + d.ti = 0 + } + } + d.index++ + if d.fastSkipHashing == 0 { + d.byteAvailable = true + } + } + } +} + +func (d *compressor) fillStore(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) store() { + if d.windowEnd > 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } + d.windowEnd = 0 +} + +func (d *compressor) write(b []byte) (n int, err os.Error) { + n = len(b) + b = b[d.fill(d, b):] + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + } + return n, d.err +} + +func (d *compressor) syncFlush() os.Error { + d.sync = true + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err os.Error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillStore + d.step = (*compressor).store + case level == DefaultCompression: + level = 6 + fallthrough + case 1 <= level && level <= 9: + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflate + default: + return WrongValueError{"level", 0, 9, int32(level)} + } + return nil +} + +func (d *compressor) close() os.Error { + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + return d.w.err +} + +// NewWriter returns a new Writer compressing +// data at the given level. Following zlib, levels +// range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any +// compression; it only adds the necessary DEFLATE framing. +func NewWriter(w io.Writer, level int) *Writer { + const logWindowSize = logMaxOffsetSize + var dw Writer + dw.d.init(w, level) + return &dw +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) *Writer { + dw := &dictWriter{w, false} + zw := NewWriter(dw, level) + zw.Write(dict) + zw.Flush() + dw.enabled = true + return zw +} + +type dictWriter struct { + w io.Writer + enabled bool +} + +func (w *dictWriter) Write(b []byte) (n int, err os.Error) { + if w.enabled { + return w.w.Write(b) + } + return len(b), nil +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err os.Error) { + return w.d.write(data) +} + +// Flush flushes any pending compressed data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() os.Error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() os.Error { + return w.d.close() +} diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go new file mode 100644 index 000000000..930823685 --- /dev/null +++ b/src/pkg/compress/flate/deflate_test.go @@ -0,0 +1,321 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "sync" + "testing" +) + +type deflateTest struct { + in []byte + level int + out []byte +} + +type deflateInflateTest struct { + in []byte +} + +type reverseBitsTest struct { + in uint16 + bitCount uint8 + out uint16 +} + +var deflateTests = []*deflateTest{ + &deflateTest{[]byte{}, 0, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, -1, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, DefaultCompression, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 4, []byte{18, 4, 4, 0, 0, 255, 255}}, + + &deflateTest{[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0, + []byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255}, + }, + &deflateTest{[]byte{}, 1, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 1, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 1, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 1, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, + &deflateTest{[]byte{}, 9, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, +} + +var deflateInflateTests = []*deflateInflateTest{ + &deflateInflateTest{[]byte{}}, + &deflateInflateTest{[]byte{0x11}}, + &deflateInflateTest{[]byte{0x11, 0x12}}, + &deflateInflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + &deflateInflateTest{[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, + &deflateInflateTest{largeDataChunk()}, +} + +var reverseBitsTests = []*reverseBitsTest{ + &reverseBitsTest{1, 1, 1}, + &reverseBitsTest{1, 2, 2}, + &reverseBitsTest{1, 3, 4}, + &reverseBitsTest{1, 4, 8}, + &reverseBitsTest{1, 5, 16}, + &reverseBitsTest{17, 5, 17}, + &reverseBitsTest{257, 9, 257}, + &reverseBitsTest{29, 5, 23}, +} + +func largeDataChunk() []byte { + result := make([]byte, 100000) + for i := range result { + result[i] = byte(i * i & 0xFF) + } + return result +} + +func TestDeflate(t *testing.T) { + for _, h := range deflateTests { + var buf bytes.Buffer + w := NewWriter(&buf, h.level) + w.Write(h.in) + w.Close() + if !bytes.Equal(buf.Bytes(), h.out) { + t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out) + } + } +} + +type syncBuffer struct { + buf bytes.Buffer + mu sync.RWMutex + closed bool + ready chan bool +} + +func newSyncBuffer() *syncBuffer { + return &syncBuffer{ready: make(chan bool, 1)} +} + +func (b *syncBuffer) Read(p []byte) (n int, err os.Error) { + for { + b.mu.RLock() + n, err = b.buf.Read(p) + b.mu.RUnlock() + if n > 0 || b.closed { + return + } + <-b.ready + } + panic("unreachable") +} + +func (b *syncBuffer) signal() { + select { + case b.ready <- true: + default: + } +} + +func (b *syncBuffer) Write(p []byte) (n int, err os.Error) { + n, err = b.buf.Write(p) + b.signal() + return +} + +func (b *syncBuffer) WriteMode() { + b.mu.Lock() +} + +func (b *syncBuffer) ReadMode() { + b.mu.Unlock() + b.signal() +} + +func (b *syncBuffer) Close() os.Error { + b.closed = true + b.signal() + return nil +} + +func testSync(t *testing.T, level int, input []byte, name string) { + if len(input) == 0 { + return + } + + t.Logf("--testSync %d, %d, %s", level, len(input), name) + buf := newSyncBuffer() + buf1 := new(bytes.Buffer) + buf.WriteMode() + w := NewWriter(io.MultiWriter(buf, buf1), level) + r := NewReader(buf) + + // Write half the input and read back. + for i := 0; i < 2; i++ { + var lo, hi int + if i == 0 { + lo, hi = 0, (len(input)+1)/2 + } else { + lo, hi = (len(input)+1)/2, len(input) + } + t.Logf("#%d: write %d-%d", i, lo, hi) + if _, err := w.Write(input[lo:hi]); err != nil { + t.Errorf("testSync: write: %v", err) + return + } + if i == 0 { + if err := w.Flush(); err != nil { + t.Errorf("testSync: flush: %v", err) + return + } + } else { + if err := w.Close(); err != nil { + t.Errorf("testSync: close: %v", err) + } + } + buf.ReadMode() + out := make([]byte, hi-lo+1) + m, err := io.ReadAtLeast(r, out, hi-lo) + t.Logf("#%d: read %d", i, m) + if m != hi-lo || err != nil { + t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len()) + return + } + if !bytes.Equal(input[lo:hi], out[:hi-lo]) { + t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) + return + } + // This test originally checked that after reading + // the first half of the input, there was nothing left + // in the read buffer (buf.buf.Len() != 0) but that is + // not necessarily the case: the write Flush may emit + // some extra framing bits that are not necessary + // to process to obtain the first half of the uncompressed + // data. The test ran correctly most of the time, because + // the background goroutine had usually read even + // those extra bits by now, but it's not a useful thing to + // check. + buf.WriteMode() + } + buf.ReadMode() + out := make([]byte, 10) + if n, err := r.Read(out); n > 0 || err != os.EOF { + t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n]) + } + if buf.buf.Len() != 0 { + t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name) + } + r.Close() + + // stream should work for ordinary reader too + r = NewReader(buf1) + out, err := ioutil.ReadAll(r) + if err != nil { + t.Errorf("testSync: read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name) + } +} + +func testToFromWithLevel(t *testing.T, level int, input []byte, name string) os.Error { + buffer := bytes.NewBuffer(nil) + w := NewWriter(buffer, level) + w.Write(input) + w.Close() + r := NewReader(buffer) + out, err := ioutil.ReadAll(r) + if err != nil { + t.Errorf("read: %s", err) + return err + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name) + } + + testSync(t, level, input, name) + return nil +} + +func testToFrom(t *testing.T, input []byte, name string) { + for i := 0; i < 10; i++ { + testToFromWithLevel(t, i, input, name) + } +} + +func TestDeflateInflate(t *testing.T) { + for i, h := range deflateInflateTests { + testToFrom(t, h.in, fmt.Sprintf("#%d", i)) + } +} + +func TestReverseBits(t *testing.T) { + for _, h := range reverseBitsTests { + if v := reverseBits(h.in, h.bitCount); v != h.out { + t.Errorf("reverseBits(%v,%v) = %v, want %v", + h.in, h.bitCount, v, h.out) + } + } +} + +func TestDeflateInflateString(t *testing.T) { + gold, err := ioutil.ReadFile("../testdata/e.txt") + if err != nil { + t.Error(err) + } + testToFromWithLevel(t, 1, gold, "2.718281828...") +} + +func TestReaderDict(t *testing.T) { + const ( + dict = "hello world" + text = "hello again world" + ) + var b bytes.Buffer + w := NewWriter(&b, 5) + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + r := NewReaderDict(&b, []byte(dict)) + data, err := ioutil.ReadAll(r) + if err != nil { + t.Fatal(err) + } + if string(data) != "hello again world" { + t.Fatalf("read returned %q want %q", string(data), text) + } +} + +func TestWriterDict(t *testing.T) { + const ( + dict = "hello world" + text = "hello again world" + ) + var b bytes.Buffer + w := NewWriter(&b, 5) + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + var b1 bytes.Buffer + w = NewWriterDict(&b1, 5, []byte(dict)) + w.Write([]byte(text)) + w.Close() + + if !bytes.Equal(b1.Bytes(), b.Bytes()) { + t.Fatalf("writer wrote %q want %q", b1.Bytes(), b.Bytes()) + } +} diff --git a/src/pkg/compress/flate/flate_test.go b/src/pkg/compress/flate/flate_test.go new file mode 100644 index 000000000..bfd3b8381 --- /dev/null +++ b/src/pkg/compress/flate/flate_test.go @@ -0,0 +1,139 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This test tests some internals of the flate package. +// The tests in package compress/gzip serve as the +// end-to-end test of the decompressor. + +package flate + +import ( + "bytes" + "reflect" + "testing" +) + +// The Huffman code lengths used by the fixed-format Huffman blocks. +var fixedHuffmanBits = [...]int{ + // 0-143 length 8 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + + // 144-255 length 9 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + + // 256-279 length 7 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + + // 280-287 length 8 + 8, 8, 8, 8, 8, 8, 8, 8, +} + +type InitDecoderTest struct { + in []int + out huffmanDecoder + ok bool +} + +var initDecoderTests = []*InitDecoderTest{ + // Example from Connell 1973, + &InitDecoderTest{ + []int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5}, + huffmanDecoder{ + 2, 5, + [maxCodeLen + 1]int{2: 0, 4, 13, 31}, + [maxCodeLen + 1]int{2: 0, 1, 6, 20}, + // Paper used different code assignment: + // 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6 + // Reordered here so that codes of same length + // are assigned to increasing numbers. + []int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11}, + }, + true, + }, + + // Example from RFC 1951 section 3.2.2 + &InitDecoderTest{ + []int{2, 1, 3, 3}, + huffmanDecoder{ + 1, 3, + [maxCodeLen + 1]int{1: 0, 2, 7}, + [maxCodeLen + 1]int{1: 0, 1, 4}, + []int{1, 0, 2, 3}, + }, + true, + }, + + // Second example from RFC 1951 section 3.2.2 + &InitDecoderTest{ + []int{3, 3, 3, 3, 3, 2, 4, 4}, + huffmanDecoder{ + 2, 4, + [maxCodeLen + 1]int{2: 0, 6, 15}, + [maxCodeLen + 1]int{2: 0, 1, 8}, + []int{5, 0, 1, 2, 3, 4, 6, 7}, + }, + true, + }, + + // Static Huffman codes (RFC 1951 section 3.2.6) + &InitDecoderTest{ + fixedHuffmanBits[0:], + fixedHuffmanDecoder, + true, + }, + + // Illegal input. + &InitDecoderTest{ + []int{}, + huffmanDecoder{}, + false, + }, + + // Illegal input. + &InitDecoderTest{ + []int{0, 0, 0, 0, 0, 0, 0}, + huffmanDecoder{}, + false, + }, +} + +func TestInitDecoder(t *testing.T) { + for i, tt := range initDecoderTests { + var h huffmanDecoder + if h.init(tt.in) != tt.ok { + t.Errorf("test %d: init = %v", i, !tt.ok) + continue + } + if !reflect.DeepEqual(&h, &tt.out) { + t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out) + } + } +} + +func TestUncompressedSource(t *testing.T) { + decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11})) + output := make([]byte, 1) + n, error := decoder.Read(output) + if n != 1 || error != nil { + t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error) + } + if output[0] != 0x11 { + t.Errorf("output[0] = %x, want 0x11", output[0]) + } +} diff --git a/src/pkg/compress/flate/huffman_bit_writer.go b/src/pkg/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..3981df5cb --- /dev/null +++ b/src/pkg/compress/flate/huffman_bit_writer.go @@ -0,0 +1,494 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" + "math" + "os" + "strconv" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = []int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = []uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = []int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = []uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + w io.Writer + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint32 + nbits uint32 + bytes [64]byte + nbytes int + literalFreq []int32 + offsetFreq []int32 + codegen []uint8 + codegenFreq []int32 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err os.Error +} + +type WrongValueError struct { + name string + from int32 + to int32 + value int32 +} + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + w: w, + literalFreq: make([]int32, maxLit), + offsetFreq: make([]int32, offsetCodeCount), + codegen: make([]uint8, maxLit+offsetCodeCount+1), + codegenFreq: make([]int32, codegenCodeCount), + literalEncoding: newHuffmanEncoder(maxLit), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + } +} + +func (err WrongValueError) String() string { + return "huffmanBitWriter: " + err.name + " should belong to [" + strconv.Itoa64(int64(err.from)) + ";" + + strconv.Itoa64(int64(err.to)) + "] but actual value is " + strconv.Itoa64(int64(err.value)) +} + +func (w *huffmanBitWriter) flushBits() { + if w.err != nil { + w.nbits = 0 + return + } + bits := w.bits + w.bits >>= 16 + w.nbits -= 16 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + if n += 2; n >= len(w.bytes) { + _, w.err = w.w.Write(w.bytes[0:]) + n = 0 + } + w.nbytes = n +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + n := w.nbytes + if w.nbits > 8 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if w.nbits > 0 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + w.bits = 0 + _, w.err = w.w.Write(w.bytes[0:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) writeBits(b, nb int32) { + w.bits |= uint32(b) << w.nbits + if w.nbits += uint32(nb); w.nbits >= 16 { + w.flushBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits == 8 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + if w.nbits != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + if n != 0 { + _, w.err = w.w.Write(w.bytes[0:n]) + if w.err != nil { + return + } + } + w.nbytes = 0 + _, w.err = w.w.Write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) { + fillInt32s(w.codegenFreq, 0) + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + copyUint8s(codegen[0:numLiterals], w.literalEncoding.codeBits) + copyUint8s(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits) + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := min(count, 6) + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := min(count, 138) + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) { + if w.err != nil { + return + } + w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal])) +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := w.codegenEncoding.codeBits[codegenOrder[i]] + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord int = int(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + // The low byte contains the actual code to generate. + w.writeCode(w.codegenEncoding, uint32(codeWord)) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + break + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + break + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + break + } + } +} + +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { + if w.err != nil { + return + } + fillInt32s(w.literalFreq, 0) + fillInt32s(w.offsetFreq, 0) + + n := len(tokens) + tokens = tokens[0 : n+1] + tokens[n] = endBlockMarker + + for _, t := range tokens { + switch t.typ() { + case literalType: + w.literalFreq[t.literal()]++ + case matchType: + length := t.length() + offset := t.offset() + w.literalFreq[lengthCodesStart+lengthCode(length)]++ + w.offsetFreq[offsetCode(offset)]++ + } + } + + // get the number of literals + numLiterals := len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets := len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + + w.literalEncoding.generate(w.literalFreq, 15) + w.offsetEncoding.generate(w.offsetFreq, 15) + + storedBytes := 0 + if input != nil { + storedBytes = len(input) + } + var extraBits int64 + var storedSize int64 = math.MaxInt64 + if storedBytes <= maxStoreBlockSize && input != nil { + storedSize = int64((storedBytes + 5) * 8) + // We only bother calculating the costs of the extra bits required by + // the length of offset fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { + // First eight length codes have extra size = 0. + extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart]) + } + for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { + // First four offset codes have extra size = 0. + extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode]) + } + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var size = int64(3) + + fixedLiteralEncoding.bitLength(w.literalFreq) + + fixedOffsetEncoding.bitLength(w.offsetFreq) + + extraBits + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets) + w.codegenEncoding.generate(w.codegenFreq, 7) + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + dynamicHeader := int64(3+5+5+4+(3*numCodegens)) + + w.codegenEncoding.bitLength(w.codegenFreq) + + int64(extraBits) + + int64(w.codegenFreq[16]*2) + + int64(w.codegenFreq[17]*3) + + int64(w.codegenFreq[18]*7) + dynamicSize := dynamicHeader + + w.literalEncoding.bitLength(w.literalFreq) + + w.offsetEncoding.bitLength(w.offsetFreq) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storedSize < size { + w.writeStoredHeader(storedBytes, eof) + w.writeBytes(input[0:storedBytes]) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + for _, t := range tokens { + switch t.typ() { + case literalType: + w.writeCode(literalEncoding, t.literal()) + break + case matchType: + // Write the length + length := t.length() + lengthCode := lengthCode(length) + w.writeCode(literalEncoding, lengthCode+lengthCodesStart) + extraLengthBits := int32(lengthExtraBits[lengthCode]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + w.writeCode(offsetEncoding, offsetCode) + extraOffsetBits := int32(offsetExtraBits[offsetCode]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode]) + w.writeBits(extraOffset, extraOffsetBits) + } + break + default: + panic("unknown token type: " + string(t)) + } + } +} diff --git a/src/pkg/compress/flate/huffman_code.go b/src/pkg/compress/flate/huffman_code.go new file mode 100644 index 000000000..7ed603a4f --- /dev/null +++ b/src/pkg/compress/flate/huffman_code.go @@ -0,0 +1,378 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "sort" +) + +type huffmanEncoder struct { + codeBits []uint8 + code []uint16 +} + +type literalNode struct { + literal uint16 + freq int32 +} + +type chain struct { + // The sum of the leaves in this tree + freq int32 + + // The number of literals to the left of this item at this level + leafCount int32 + + // The right child of this chain in the previous level. + up *chain +} + +type levelInfo struct { + // Our level. for better printing + level int32 + + // The most recent chain generated for this level + lastChain *chain + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 + + // The levelInfo for level+1 + up *levelInfo + + // The levelInfo for level-1 + down *levelInfo +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(maxLit) + codeBits := h.codeBits + code := h.code + var ch uint16 + for ch = 0; ch < maxLit; ch++ { + var bits uint16 + var size uint8 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + break + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + break + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + break + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codeBits[ch] = size + code[ch] = reverseBits(bits, size) + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codeBits := h.codeBits + code := h.code + for ch := uint16(0); ch < 30; ch++ { + codeBits[ch] = 5 + code[ch] = reverseBits(ch, 5) + } + return h +} + +var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() +var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []int32) int64 { + var total int64 + for i, f := range freq { + if f != 0 { + total += int64(f) * int64(h.codeBits[i]) + } + } + return total +} + +// Generate elements in the chain using an iterative algorithm. +func (h *huffmanEncoder) generateChains(top *levelInfo, list []literalNode) { + n := len(list) + list = list[0 : n+1] + list[n] = maxNode() + + l := top + for { + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To m sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.lastChain = nil + l.needed = 0 + l = l.up + l.nextPairFreq = math.MaxInt32 + continue + } + + prevFreq := l.lastChain.freq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := l.lastChain.leafCount + 1 + l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up} + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain} + l.down.needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + up := l.up + if up == nil { + // All done! + return + } + up.nextPairFreq = prevFreq + l.lastChain.freq + l = up + } else { + // If we stole from below, move down temporarily to replenish it. + for l.down.needed > 0 { + l = l.down + } + } + } +} + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + maxBits = minInt32(maxBits, n-1) + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + top := &levelInfo{needed: 0} + chain2 := &chain{list[1].freq, 2, new(chain)} + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + top = &levelInfo{ + level: level, + lastChain: chain2, + nextCharFreq: list[2].freq, + nextPairFreq: list[0].freq + list[1].freq, + down: top, + } + top.down.up = top + if level == 1 { + top.nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + top.needed = 2*n - 4 + + l := top + for { + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To m sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.lastChain = nil + l.needed = 0 + l = l.up + l.nextPairFreq = math.MaxInt32 + continue + } + + prevFreq := l.lastChain.freq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := l.lastChain.leafCount + 1 + l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up} + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain} + l.down.needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + up := l.up + if up == nil { + // All done! + break + } + up.nextPairFreq = prevFreq + l.lastChain.freq + l = up + } else { + // If we stole from below, move down temporarily to replenish it. + for l.down.needed > 0 { + l = l.down + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if top.lastChain.leafCount != n { + panic("top.lastChain.leafCount != n") + } + + bitCount := make([]int32, maxBits+1) + bits := 1 + for chain := top.lastChain; chain.up != nil; chain = chain.up { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = chain.leafCount - chain.up.leafCount + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + sortByLiteral(chunk) + for _, node := range chunk { + h.codeBits[node.literal] = uint8(n) + h.code[node.literal] = reverseBits(code, uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { + list := make([]literalNode, len(freq)+1) + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + h.codeBits[i] = 0 + } + } + // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros + h.codeBits = h.codeBits[0:len(freq)] + list = list[0:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codeBits[node.literal] = 1 + h.code[node.literal] = uint16(i) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +type literalNodeSorter struct { + a []literalNode + less func(i, j int) bool +} + +func (s literalNodeSorter) Len() int { return len(s.a) } + +func (s literalNodeSorter) Less(i, j int) bool { + return s.less(i, j) +} + +func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } + +func sortByFreq(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { + if a[i].freq == a[j].freq { + return a[i].literal < a[j].literal + } + return a[i].freq < a[j].freq + }} + sort.Sort(s) +} + +func sortByLiteral(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }} + sort.Sort(s) +} diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go new file mode 100644 index 000000000..3845f1204 --- /dev/null +++ b/src/pkg/compress/flate/inflate.go @@ -0,0 +1,708 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "io" + "os" + "strconv" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxHist = 32768 // max history required + maxLit = 286 + maxDist = 32 + numCodes = 19 // number of codes in Huffman meta-code +) + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) String() string { + return "flate: corrupt input before offset " + strconv.Itoa64(int64(e)) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) String() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +type ReadError struct { + Offset int64 // byte offset where error occurred + Error os.Error // error returned by underlying Read +} + +func (e *ReadError) String() string { + return "flate: read error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String() +} + +// A WriteError reports an error encountered while writing output. +type WriteError struct { + Offset int64 // byte offset where error occurred + Error os.Error // error returned by underlying Write +} + +func (e *WriteError) String() string { + return "flate: write error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String() +} + +// Huffman decoder is based on +// J. Brian Connell, ``A Huffman-Shannon-Fano Code,'' +// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047. +type huffmanDecoder struct { + // min, max code length + min, max int + + // limit[i] = largest code word of length i + // Given code v of length n, + // need more bits if v > limit[n]. + limit [maxCodeLen + 1]int + + // base[i] = smallest code word of length i - seq number + base [maxCodeLen + 1]int + + // codes[seq number] = output code. + // Given code v of length n, value is + // codes[v - base[n]]. + codes []int +} + +// Initialize Huffman decoding tables from array of code lengths. +func (h *huffmanDecoder) init(bits []int) bool { + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen + 1]int + var min, max int + for _, n := range bits { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + if max == 0 { + return false + } + + h.min = min + h.max = max + + // For each code range, compute + // nextcode (first code of that length), + // limit (last code of that length), and + // base (offset from first code to sequence number). + code := 0 + seq := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + n := count[i] + nextcode[i] = code + h.base[i] = code - seq + code += n + seq += n + h.limit[i] = code - 1 + code <<= 1 + } + + // Make array mapping sequence numbers to codes. + if len(h.codes) < len(bits) { + h.codes = make([]int, len(bits)) + } + for i, n := range bits { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + seq := code - h.base[n] + h.codes[seq] = i + } + return true +} + +// Hard-coded Huffman tables for DEFLATE algorithm. +// See RFC 1951, section 3.2.6. +var fixedHuffmanDecoder = huffmanDecoder{ + 7, 9, + [maxCodeLen + 1]int{7: 23, 199, 511}, + [maxCodeLen + 1]int{7: 0, 24, 224}, + []int{ + // length 7: 256-279 + 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, + + // length 8: 0-143 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, + 141, 142, 143, + + // length 8: 280-287 + 280, 281, 282, 283, 284, 285, 286, 287, + + // length 9: 144-255 + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + }, +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + ReadByte() (c byte, err os.Error) +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + woffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits [maxLit + maxDist]int + codebits [numCodes]int + + // Output history, buffer. + hist [maxHist]byte + hp int // current output position in buffer + hw int // have written hist[0:hw] already + hfull bool // buffer has filled at least once + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + final bool + err os.Error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + if f.final { + if f.hw != f.hp { + f.flush((*decompressor).nextBlock) + return + } + f.err = os.EOF + return + } + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, os.Error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + } + panic("unreachable") +} + +func (f *decompressor) Close() os.Error { + if f.err == os.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() os.Error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + for { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.hist[f.hp] = byte(v) + f.hp++ + if f.hp == len(f.hist) { + // After the flush, continue this loop. + f.flush((*decompressor).huffmanBlock) + return + } + continue + case v == 256: + // Done with huffman block; read next block. + f.step = (*decompressor).nextBlock + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + default: + length = 258 + n = 0 + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(reverseByte[(f.b&0x1F)<<3]) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist >= 30: + f.err = CorruptInputError(f.roffset) + return + default: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + } + + // Copy history[-dist:-dist+length] into output. + if dist > len(f.hist) { + f.err = InternalError("bad history distance") + return + } + + // No check on length; encoding can be prescient. + if !f.hfull && dist > f.hp { + f.err = CorruptInputError(f.roffset) + return + } + + p := f.hp - dist + if p < 0 { + p += len(f.hist) + } + for i := 0; i < length; i++ { + f.hist[f.hp] = f.hist[p] + f.hp++ + p++ + if f.hp == len(f.hist) { + // After flush continue copying out of history. + f.copyLen = length - (i + 1) + f.copyDist = dist + f.flush((*decompressor).copyHuff) + return + } + if p == len(f.hist) { + p = 0 + } + } + } + panic("unreached") +} + +func (f *decompressor) copyHuff() { + length := f.copyLen + dist := f.copyDist + p := f.hp - dist + if p < 0 { + p += len(f.hist) + } + for i := 0; i < length; i++ { + f.hist[f.hp] = f.hist[p] + f.hp++ + p++ + if f.hp == len(f.hist) { + f.copyLen = length - (i + 1) + f.flush((*decompressor).copyHuff) + return + } + if p == len(f.hist) { + p = 0 + } + } + + // Continue processing Huffman block. + f.huffmanBlock() +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = &ReadError{f.roffset, err} + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + // 0-length block means sync + f.flush((*decompressor).nextBlock) + return + } + + f.copyLen = n + f.copyData() +} + +func (f *decompressor) copyData() { + // Read f.dataLen bytes into history, + // pausing for reads as history fills. + n := f.copyLen + for n > 0 { + m := len(f.hist) - f.hp + if m > n { + m = n + } + m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) + f.roffset += int64(m) + if err != nil { + f.err = &ReadError{f.roffset, err} + return + } + n -= m + f.hp += m + if f.hp == len(f.hist) { + f.copyLen = n + f.flush((*decompressor).copyData) + return + } + } + f.step = (*decompressor).nextBlock +} + +func (f *decompressor) setDict(dict []byte) { + if len(dict) > len(f.hist) { + // Will only remember the tail. + dict = dict[len(dict)-len(f.hist):] + } + + f.hp = copy(f.hist[:], dict) + if f.hp == len(f.hist) { + f.hp = 0 + f.hfull = true + } + f.hw = f.hp +} + +func (f *decompressor) moreBits() os.Error { + c, err := f.r.ReadByte() + if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) { + for n := uint(h.min); n <= uint(h.max); n++ { + lim := h.limit[n] + if lim == -1 { + continue + } + for f.nb < n { + if err := f.moreBits(); err != nil { + return 0, err + } + } + v := int(f.b & uint32(1<<n-1)) + v <<= 16 - n + v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits + if v <= lim { + f.b >>= n + f.nb -= n + return h.codes[v-h.base[n]], nil + } + } + return 0, CorruptInputError(f.roffset) +} + +// Flush any buffered output to the underlying writer. +func (f *decompressor) flush(step func(*decompressor)) { + f.toRead = f.hist[f.hw:f.hp] + f.woffset += int64(f.hp - f.hw) + f.hw = f.hp + if f.hp == len(f.hist) { + f.hp = 0 + f.hw = 0 + f.hfull = true + } + f.step = step +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. It is the caller's +// responsibility to call Close on the ReadCloser when +// finished reading. +func NewReader(r io.Reader) io.ReadCloser { + var f decompressor + f.r = makeReader(r) + f.step = (*decompressor).nextBlock + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + var f decompressor + f.setDict(dict) + f.r = makeReader(r) + f.step = (*decompressor).nextBlock + return &f +} diff --git a/src/pkg/compress/flate/reverse_bits.go b/src/pkg/compress/flate/reverse_bits.go new file mode 100644 index 000000000..c1a02720d --- /dev/null +++ b/src/pkg/compress/flate/reverse_bits.go @@ -0,0 +1,48 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +var reverseByte = [256]byte{ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +} + +func reverseUint16(v uint16) uint16 { + return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8 +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return reverseUint16(number << uint8(16-bitLength)) +} diff --git a/src/pkg/compress/flate/token.go b/src/pkg/compress/flate/token.go new file mode 100644 index 000000000..38aea5fa6 --- /dev/null +++ b/src/pkg/compress/flate/token.go @@ -0,0 +1,103 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [...]uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +var offsetCodes = [...]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +type token uint32 + +// Convert a literal into a literal token. +func literalToken(literal uint32) token { return token(literalType + literal) } + +// Convert a < xlength, xoffset > pair into a match token. +func matchToken(xlength uint32, xoffset uint32) token { + return token(matchType + xlength<<lengthShift + xoffset) +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint32 { return uint32(t - literalType) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) } + +func lengthCode(len uint32) uint32 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + const n = uint32(len(offsetCodes)) + switch { + case off < n: + return offsetCodes[off] + case off>>7 < n: + return offsetCodes[off>>7] + 14 + default: + return offsetCodes[off>>14] + 28 + } + panic("unreachable") +} diff --git a/src/pkg/compress/flate/util.go b/src/pkg/compress/flate/util.go new file mode 100644 index 000000000..aca5c78b2 --- /dev/null +++ b/src/pkg/compress/flate/util.go @@ -0,0 +1,72 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +func min(left int, right int) int { + if left < right { + return left + } + return right +} + +func minInt32(left int32, right int32) int32 { + if left < right { + return left + } + return right +} + +func max(left int, right int) int { + if left > right { + return left + } + return right +} + +func fillInts(a []int, value int) { + for i := range a { + a[i] = value + } +} + +func fillInt32s(a []int32, value int32) { + for i := range a { + a[i] = value + } +} + +func fillBytes(a []byte, value byte) { + for i := range a { + a[i] = value + } +} + +func fillInt8s(a []int8, value int8) { + for i := range a { + a[i] = value + } +} + +func fillUint8s(a []uint8, value uint8) { + for i := range a { + a[i] = value + } +} + +func copyInt8s(dst []int8, src []int8) int { + cnt := min(len(dst), len(src)) + for i := 0; i < cnt; i++ { + dst[i] = src[i] + } + return cnt +} + +func copyUint8s(dst []uint8, src []uint8) int { + cnt := min(len(dst), len(src)) + for i := 0; i < cnt; i++ { + dst[i] = src[i] + } + return cnt +} diff --git a/src/pkg/compress/gzip/Makefile b/src/pkg/compress/gzip/Makefile new file mode 100644 index 000000000..b671fc72c --- /dev/null +++ b/src/pkg/compress/gzip/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/gzip +GOFILES=\ + gunzip.go\ + gzip.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/gzip/gunzip.go b/src/pkg/compress/gzip/gunzip.go new file mode 100644 index 000000000..6ac9293d7 --- /dev/null +++ b/src/pkg/compress/gzip/gunzip.go @@ -0,0 +1,230 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "compress/flate" + "hash" + "hash/crc32" + "io" + "os" +) + +// BUG(nigeltao): Comments and Names don't properly map UTF-8 character codes outside of +// the 0x00-0x7f range to ISO 8859-1 (Latin-1). + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +func makeReader(r io.Reader) flate.Reader { + if rr, ok := r.(flate.Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +var HeaderError = os.NewError("invalid gzip header") +var ChecksumError = os.NewError("gzip checksum error") + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Compressor and Decompressor structs. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + Mtime uint32 // modification time (seconds since January 1, 1970) + Name string // file name + OS byte // operating system type +} + +// An Decompressor is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Decompressor +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Decompressor fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Decompressor will return a ChecksumError when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the successful +// (zero length, nil error) Read marking the end of the data. +type Decompressor struct { + Header + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + size uint32 + flg byte + buf [512]byte + err os.Error +} + +// NewReader creates a new Decompressor reading the given reader. +// The implementation buffers input and may read more data than necessary from r. +// It is the caller's responsibility to call Close on the Decompressor when done. +func NewReader(r io.Reader) (*Decompressor, os.Error) { + z := new(Decompressor) + z.r = makeReader(r) + z.digest = crc32.NewIEEE() + if err := z.readHeader(true); err != nil { + z.err = err + return nil, err + } + return z, nil +} + +// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). +func get4(p []byte) uint32 { + return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 +} + +func (z *Decompressor) readString() (string, os.Error) { + var err os.Error + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", HeaderError + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] == 0 { + // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). + // TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8. + return string(z.buf[0:i]), nil + } + } + panic("not reached") +} + +func (z *Decompressor) read2() (uint32, os.Error) { + _, err := io.ReadFull(z.r, z.buf[0:2]) + if err != nil { + return 0, err + } + return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil +} + +func (z *Decompressor) readHeader(save bool) os.Error { + _, err := io.ReadFull(z.r, z.buf[0:10]) + if err != nil { + return err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return HeaderError + } + z.flg = z.buf[3] + if save { + z.Mtime = get4(z.buf[4:8]) + // z.buf[8] is xfl, ignored + z.OS = z.buf[9] + } + z.digest.Reset() + z.digest.Write(z.buf[0:10]) + + if z.flg&flagExtra != 0 { + n, err := z.read2() + if err != nil { + return err + } + data := make([]byte, n) + if _, err = io.ReadFull(z.r, data); err != nil { + return err + } + if save { + z.Extra = data + } + } + + var s string + if z.flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return err + } + if save { + z.Name = s + } + } + + if z.flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return err + } + if save { + z.Comment = s + } + } + + if z.flg&flagHdrCrc != 0 { + n, err := z.read2() + if err != nil { + return err + } + sum := z.digest.Sum32() & 0xFFFF + if n != sum { + return HeaderError + } + } + + z.digest.Reset() + z.decompressor = flate.NewReader(z.r) + return nil +} + +func (z *Decompressor) Read(p []byte) (n int, err os.Error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + + n, err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + z.size += uint32(n) + if n != 0 || err != os.EOF { + z.err = err + return + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + z.err = err + return 0, err + } + crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8]) + sum := z.digest.Sum32() + if sum != crc32 || isize != z.size { + z.err = ChecksumError + return 0, z.err + } + + // File is ok; is there another? + if err = z.readHeader(false); err != nil { + z.err = err + return + } + + // Yes. Reset and read from it. + z.digest.Reset() + z.size = 0 + return z.Read(p) +} + +// Calling Close does not close the wrapped io.Reader originally passed to NewReader. +func (z *Decompressor) Close() os.Error { return z.decompressor.Close() } diff --git a/src/pkg/compress/gzip/gunzip_test.go b/src/pkg/compress/gzip/gunzip_test.go new file mode 100644 index 000000000..1c08c7374 --- /dev/null +++ b/src/pkg/compress/gzip/gunzip_test.go @@ -0,0 +1,305 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "bytes" + "io" + "os" + "testing" +) + +type gunzipTest struct { + name string + desc string + raw string + gzip []byte + err os.Error +} + +var gunzipTests = []gunzipTest{ + { // has 1 empty fixed-huffman block + "empty.txt", + "empty.txt", + "", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xf7, 0x5e, 0x14, 0x4a, + 0x00, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + nil, + }, + { // has 1 non-empty fixed huffman block + "hello.txt", + "hello.txt", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + }, + nil, + }, + { // concatenation + "hello.txt", + "hello.txt x2", + "hello world\n" + + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + }, + nil, + }, + { // has a fixed huffman block with some length-distance pairs + "shesells.txt", + "shesells.txt", + "she sells seashells by the seashore\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x66, 0x8b, 0x4a, + 0x00, 0x03, 0x73, 0x68, 0x65, 0x73, 0x65, 0x6c, + 0x6c, 0x73, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x2b, + 0xce, 0x48, 0x55, 0x28, 0x4e, 0xcd, 0xc9, 0x29, + 0x06, 0x92, 0x89, 0xc5, 0x19, 0x60, 0x56, 0x52, + 0xa5, 0x42, 0x09, 0x58, 0x18, 0x28, 0x90, 0x5f, + 0x94, 0xca, 0x05, 0x00, 0x76, 0xb0, 0x3b, 0xeb, + 0x24, 0x00, 0x00, 0x00, + }, + nil, + }, + { // has dynamic huffman blocks + "gettysburg", + "gettysburg", + " Four score and seven years ago our fathers brought forth on\n" + + "this continent, a new nation, conceived in Liberty, and dedicated\n" + + "to the proposition that all men are created equal.\n" + + " Now we are engaged in a great Civil War, testing whether that\n" + + "nation, or any nation so conceived and so dedicated, can long\n" + + "endure.\n" + + " We are met on a great battle-field of that war.\n" + + " We have come to dedicate a portion of that field, as a final\n" + + "resting place for those who here gave their lives that that\n" + + "nation might live. It is altogether fitting and proper that\n" + + "we should do this.\n" + + " But, in a larger sense, we can not dedicate — we can not\n" + + "consecrate — we can not hallow — this ground.\n" + + " The brave men, living and dead, who struggled here, have\n" + + "consecrated it, far above our poor power to add or detract.\n" + + "The world will little note, nor long remember what we say here,\n" + + "but it can never forget what they did here.\n" + + " It is for us the living, rather, to be dedicated here to the\n" + + "unfinished work which they who fought here have thus far so\n" + + "nobly advanced. It is rather for us to be here dedicated to\n" + + "the great task remaining before us — that from these honored\n" + + "dead we take increased devotion to that cause for which they\n" + + "gave the last full measure of devotion —\n" + + " that we here highly resolve that these dead shall not have\n" + + "died in vain — that this nation, under God, shall have a new\n" + + "birth of freedom — and that government of the people, by the\n" + + "people, for the people, shall not perish from this earth.\n" + + "\n" + + "Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xd1, 0x12, 0x2b, 0x4a, + 0x00, 0x03, 0x67, 0x65, 0x74, 0x74, 0x79, 0x73, + 0x62, 0x75, 0x72, 0x67, 0x00, 0x65, 0x54, 0xcd, + 0x6e, 0xd4, 0x30, 0x10, 0xbe, 0xfb, 0x29, 0xe6, + 0x01, 0x42, 0xa5, 0x0a, 0x09, 0xc1, 0x11, 0x90, + 0x40, 0x48, 0xa8, 0xe2, 0x80, 0xd4, 0xf3, 0x24, + 0x9e, 0x24, 0x56, 0xbd, 0x9e, 0xc5, 0x76, 0x76, + 0x95, 0x1b, 0x0f, 0xc1, 0x13, 0xf2, 0x24, 0x7c, + 0x63, 0x77, 0x9b, 0x4a, 0x5c, 0xaa, 0x6e, 0x6c, + 0xcf, 0x7c, 0x7f, 0x33, 0x44, 0x5f, 0x74, 0xcb, + 0x54, 0x26, 0xcd, 0x42, 0x9c, 0x3c, 0x15, 0xb9, + 0x48, 0xa2, 0x5d, 0x38, 0x17, 0xe2, 0x45, 0xc9, + 0x4e, 0x67, 0xae, 0xab, 0xe0, 0xf7, 0x98, 0x75, + 0x5b, 0xd6, 0x4a, 0xb3, 0xe6, 0xba, 0x92, 0x26, + 0x57, 0xd7, 0x50, 0x68, 0xd2, 0x54, 0x43, 0x92, + 0x54, 0x07, 0x62, 0x4a, 0x72, 0xa5, 0xc4, 0x35, + 0x68, 0x1a, 0xec, 0x60, 0x92, 0x70, 0x11, 0x4f, + 0x21, 0xd1, 0xf7, 0x30, 0x4a, 0xae, 0xfb, 0xd0, + 0x9a, 0x78, 0xf1, 0x61, 0xe2, 0x2a, 0xde, 0x55, + 0x25, 0xd4, 0xa6, 0x73, 0xd6, 0xb3, 0x96, 0x60, + 0xef, 0xf0, 0x9b, 0x2b, 0x71, 0x8c, 0x74, 0x02, + 0x10, 0x06, 0xac, 0x29, 0x8b, 0xdd, 0x25, 0xf9, + 0xb5, 0x71, 0xbc, 0x73, 0x44, 0x0f, 0x7a, 0xa5, + 0xab, 0xb4, 0x33, 0x49, 0x0b, 0x2f, 0xbd, 0x03, + 0xd3, 0x62, 0x17, 0xe9, 0x73, 0xb8, 0x84, 0x48, + 0x8f, 0x9c, 0x07, 0xaa, 0x52, 0x00, 0x6d, 0xa1, + 0xeb, 0x2a, 0xc6, 0xa0, 0x95, 0x76, 0x37, 0x78, + 0x9a, 0x81, 0x65, 0x7f, 0x46, 0x4b, 0x45, 0x5f, + 0xe1, 0x6d, 0x42, 0xe8, 0x01, 0x13, 0x5c, 0x38, + 0x51, 0xd4, 0xb4, 0x38, 0x49, 0x7e, 0xcb, 0x62, + 0x28, 0x1e, 0x3b, 0x82, 0x93, 0x54, 0x48, 0xf1, + 0xd2, 0x7d, 0xe4, 0x5a, 0xa3, 0xbc, 0x99, 0x83, + 0x44, 0x4f, 0x3a, 0x77, 0x36, 0x57, 0xce, 0xcf, + 0x2f, 0x56, 0xbe, 0x80, 0x90, 0x9e, 0x84, 0xea, + 0x51, 0x1f, 0x8f, 0xcf, 0x90, 0xd4, 0x60, 0xdc, + 0x5e, 0xb4, 0xf7, 0x10, 0x0b, 0x26, 0xe0, 0xff, + 0xc4, 0xd1, 0xe5, 0x67, 0x2e, 0xe7, 0xc8, 0x93, + 0x98, 0x05, 0xb8, 0xa8, 0x45, 0xc0, 0x4d, 0x09, + 0xdc, 0x84, 0x16, 0x2b, 0x0d, 0x9a, 0x21, 0x53, + 0x04, 0x8b, 0xd2, 0x0b, 0xbd, 0xa2, 0x4c, 0xa7, + 0x60, 0xee, 0xd9, 0xe1, 0x1d, 0xd1, 0xb7, 0x4a, + 0x30, 0x8f, 0x63, 0xd5, 0xa5, 0x8b, 0x33, 0x87, + 0xda, 0x1a, 0x18, 0x79, 0xf3, 0xe3, 0xa6, 0x17, + 0x94, 0x2e, 0xab, 0x6e, 0xa0, 0xe3, 0xcd, 0xac, + 0x50, 0x8c, 0xca, 0xa7, 0x0d, 0x76, 0x37, 0xd1, + 0x23, 0xe7, 0x05, 0x57, 0x8b, 0xa4, 0x22, 0x83, + 0xd9, 0x62, 0x52, 0x25, 0xad, 0x07, 0xbb, 0xbf, + 0xbf, 0xff, 0xbc, 0xfa, 0xee, 0x20, 0x73, 0x91, + 0x29, 0xff, 0x7f, 0x02, 0x71, 0x62, 0x84, 0xb5, + 0xf6, 0xb5, 0x25, 0x6b, 0x41, 0xde, 0x92, 0xb7, + 0x76, 0x3f, 0x91, 0x91, 0x31, 0x1b, 0x41, 0x84, + 0x62, 0x30, 0x0a, 0x37, 0xa4, 0x5e, 0x18, 0x3a, + 0x99, 0x08, 0xa5, 0xe6, 0x6d, 0x59, 0x22, 0xec, + 0x33, 0x39, 0x86, 0x26, 0xf5, 0xab, 0x66, 0xc8, + 0x08, 0x20, 0xcf, 0x0c, 0xd7, 0x47, 0x45, 0x21, + 0x0b, 0xf6, 0x59, 0xd5, 0xfe, 0x5c, 0x8d, 0xaa, + 0x12, 0x7b, 0x6f, 0xa1, 0xf0, 0x52, 0x33, 0x4f, + 0xf5, 0xce, 0x59, 0xd3, 0xab, 0x66, 0x10, 0xbf, + 0x06, 0xc4, 0x31, 0x06, 0x73, 0xd6, 0x80, 0xa2, + 0x78, 0xc2, 0x45, 0xcb, 0x03, 0x65, 0x39, 0xc9, + 0x09, 0xd1, 0x06, 0x04, 0x33, 0x1a, 0x5a, 0xf1, + 0xde, 0x01, 0xb8, 0x71, 0x83, 0xc4, 0xb5, 0xb3, + 0xc3, 0x54, 0x65, 0x33, 0x0d, 0x5a, 0xf7, 0x9b, + 0x90, 0x7c, 0x27, 0x1f, 0x3a, 0x58, 0xa3, 0xd8, + 0xfd, 0x30, 0x5f, 0xb7, 0xd2, 0x66, 0xa2, 0x93, + 0x1c, 0x28, 0xb7, 0xe9, 0x1b, 0x0c, 0xe1, 0x28, + 0x47, 0x26, 0xbb, 0xe9, 0x7d, 0x7e, 0xdc, 0x96, + 0x10, 0x92, 0x50, 0x56, 0x7c, 0x06, 0xe2, 0x27, + 0xb4, 0x08, 0xd3, 0xda, 0x7b, 0x98, 0x34, 0x73, + 0x9f, 0xdb, 0xf6, 0x62, 0xed, 0x31, 0x41, 0x13, + 0xd3, 0xa2, 0xa8, 0x4b, 0x3a, 0xc6, 0x1d, 0xe4, + 0x2f, 0x8c, 0xf8, 0xfb, 0x97, 0x64, 0xf4, 0xb6, + 0x2f, 0x80, 0x5a, 0xf3, 0x56, 0xe0, 0x40, 0x50, + 0xd5, 0x19, 0xd0, 0x1e, 0xfc, 0xca, 0xe5, 0xc9, + 0xd4, 0x60, 0x00, 0x81, 0x2e, 0xa3, 0xcc, 0xb6, + 0x52, 0xf0, 0xb4, 0xdb, 0x69, 0x99, 0xce, 0x7a, + 0x32, 0x4c, 0x08, 0xed, 0xaa, 0x10, 0x10, 0xe3, + 0x6f, 0xee, 0x99, 0x68, 0x95, 0x9f, 0x04, 0x71, + 0xb2, 0x49, 0x2f, 0x62, 0xa6, 0x5e, 0xb4, 0xef, + 0x02, 0xed, 0x4f, 0x27, 0xde, 0x4a, 0x0f, 0xfd, + 0xc1, 0xcc, 0xdd, 0x02, 0x8f, 0x08, 0x16, 0x54, + 0xdf, 0xda, 0xca, 0xe0, 0x82, 0xf1, 0xb4, 0x31, + 0x7a, 0xa9, 0x81, 0xfe, 0x90, 0xb7, 0x3e, 0xdb, + 0xd3, 0x35, 0xc0, 0x20, 0x80, 0x33, 0x46, 0x4a, + 0x63, 0xab, 0xd1, 0x0d, 0x29, 0xd2, 0xe2, 0x84, + 0xb8, 0xdb, 0xfa, 0xe9, 0x89, 0x44, 0x86, 0x7c, + 0xe8, 0x0b, 0xe6, 0x02, 0x6a, 0x07, 0x9b, 0x96, + 0xd0, 0xdb, 0x2e, 0x41, 0x4c, 0xa1, 0xd5, 0x57, + 0x45, 0x14, 0xfb, 0xe3, 0xa6, 0x72, 0x5b, 0x87, + 0x6e, 0x0c, 0x6d, 0x5b, 0xce, 0xe0, 0x2f, 0xe2, + 0x21, 0x81, 0x95, 0xb0, 0xe8, 0xb6, 0x32, 0x0b, + 0xb2, 0x98, 0x13, 0x52, 0x5d, 0xfb, 0xec, 0x63, + 0x17, 0x8a, 0x9e, 0x23, 0x22, 0x36, 0xee, 0xcd, + 0xda, 0xdb, 0xcf, 0x3e, 0xf1, 0xc7, 0xf1, 0x01, + 0x12, 0x93, 0x0a, 0xeb, 0x6f, 0xf2, 0x02, 0x15, + 0x96, 0x77, 0x5d, 0xef, 0x9c, 0xfb, 0x88, 0x91, + 0x59, 0xf9, 0x84, 0xdd, 0x9b, 0x26, 0x8d, 0x80, + 0xf9, 0x80, 0x66, 0x2d, 0xac, 0xf7, 0x1f, 0x06, + 0xba, 0x7f, 0xff, 0xee, 0xed, 0x40, 0x5f, 0xa5, + 0xd6, 0xbd, 0x8c, 0x5b, 0x46, 0xd2, 0x7e, 0x48, + 0x4a, 0x65, 0x8f, 0x08, 0x42, 0x60, 0xf7, 0x0f, + 0xb9, 0x16, 0x0b, 0x0c, 0x1a, 0x06, 0x00, 0x00, + }, + nil, + }, + { // has 1 non-empty fixed huffman block then garbage + "hello.txt", + "hello.txt + garbage", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!', + }, + HeaderError, + }, + { // has 1 non-empty fixed huffman block not enough header + "hello.txt", + "hello.txt + garbage", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, gzipID1, + }, + io.ErrUnexpectedEOF, + }, + { // has 1 non-empty fixed huffman block but corrupt checksum + "hello.txt", + "hello.txt + corrupt checksum", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00, + 0x00, 0x00, + }, + ChecksumError, + }, + { // has 1 non-empty fixed huffman block but corrupt size + "hello.txt", + "hello.txt + corrupt size", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00, + 0x00, 0x00, + }, + ChecksumError, + }, +} + +func TestDecompressor(t *testing.T) { + b := new(bytes.Buffer) + for _, tt := range gunzipTests { + in := bytes.NewBuffer(tt.gzip) + gzip, err := NewReader(in) + if err != nil { + t.Errorf("%s: NewReader: %s", tt.name, err) + continue + } + defer gzip.Close() + if tt.name != gzip.Name { + t.Errorf("%s: got name %s", tt.name, gzip.Name) + } + b.Reset() + n, err := io.Copy(b, gzip) + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) + } + } +} diff --git a/src/pkg/compress/gzip/gzip.go b/src/pkg/compress/gzip/gzip.go new file mode 100644 index 000000000..8860d10af --- /dev/null +++ b/src/pkg/compress/gzip/gzip.go @@ -0,0 +1,187 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "compress/flate" + "hash" + "hash/crc32" + "io" + "os" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression +) + +// A Compressor is an io.WriteCloser that satisfies writes by compressing data written +// to its wrapped io.Writer. +type Compressor struct { + Header + w io.Writer + level int + compressor io.WriteCloser + digest hash.Hash32 + size uint32 + closed bool + buf [10]byte + err os.Error +} + +// NewWriter calls NewWriterLevel with the default compression level. +func NewWriter(w io.Writer) (*Compressor, os.Error) { + return NewWriterLevel(w, DefaultCompression) +} + +// NewWriterLevel creates a new Compressor writing to the given writer. +// Writes may be buffered and not flushed until Close. +// Callers that wish to set the fields in Compressor.Header must +// do so before the first call to Write or Close. +// It is the caller's responsibility to call Close on the WriteCloser when done. +// level is the compression level, which can be DefaultCompression, NoCompression, +// or any integer value between BestSpeed and BestCompression (inclusive). +func NewWriterLevel(w io.Writer, level int) (*Compressor, os.Error) { + z := new(Compressor) + z.OS = 255 // unknown + z.w = w + z.level = level + z.digest = crc32.NewIEEE() + return z, nil +} + +// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). +func put2(p []byte, v uint16) { + p[0] = uint8(v >> 0) + p[1] = uint8(v >> 8) +} + +func put4(p []byte, v uint32) { + p[0] = uint8(v >> 0) + p[1] = uint8(v >> 8) + p[2] = uint8(v >> 16) + p[3] = uint8(v >> 24) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Compressor) writeBytes(b []byte) os.Error { + if len(b) > 0xffff { + return os.NewError("gzip.Write: Extra data is too large") + } + put2(z.buf[0:2], uint16(len(b))) + _, err := z.w.Write(z.buf[0:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w. +func (z *Compressor) writeString(s string) os.Error { + // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). + // TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1). + for _, v := range s { + if v == 0 || v > 0x7f { + return os.NewError("gzip.Write: non-ASCII header string") + } + } + _, err := io.WriteString(z.w, s) + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[0:1]) + return err +} + +func (z *Compressor) Write(p []byte) (int, os.Error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if z.compressor == nil { + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + put4(z.buf[4:8], z.Mtime) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[0:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + z.compressor = flate.NewWriter(z.w, z.level) + } + z.size += uint32(len(p)) + z.digest.Write(p) + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Calling Close does not close the wrapped io.Writer originally passed to NewWriter. +func (z *Compressor) Close() os.Error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if z.compressor == nil { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + put4(z.buf[0:4], z.digest.Sum32()) + put4(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[0:8]) + return z.err +} diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go new file mode 100644 index 000000000..121e627e6 --- /dev/null +++ b/src/pkg/compress/gzip/gzip_test.go @@ -0,0 +1,84 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "io" + "io/ioutil" + "testing" +) + +// pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the +// writer end and cfunc at the reader end. +func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) { + piper, pipew := io.Pipe() + defer piper.Close() + go func() { + defer pipew.Close() + compressor, err := NewWriter(pipew) + if err != nil { + t.Fatalf("%v", err) + } + defer compressor.Close() + dfunc(compressor) + }() + decompressor, err := NewReader(piper) + if err != nil { + t.Fatalf("%v", err) + } + defer decompressor.Close() + cfunc(decompressor) +} + +// Tests that an empty payload still forms a valid GZIP stream. +func TestEmpty(t *testing.T) { + pipe(t, + func(compressor *Compressor) {}, + func(decompressor *Decompressor) { + b, err := ioutil.ReadAll(decompressor) + if err != nil { + t.Fatalf("%v", err) + } + if len(b) != 0 { + t.Fatalf("did not read an empty slice") + } + }) +} + +// Tests that gzipping and then gunzipping is the identity function. +func TestWriter(t *testing.T) { + pipe(t, + func(compressor *Compressor) { + compressor.Comment = "comment" + compressor.Extra = []byte("extra") + compressor.Mtime = 1e8 + compressor.Name = "name" + _, err := compressor.Write([]byte("payload")) + if err != nil { + t.Fatalf("%v", err) + } + }, + func(decompressor *Decompressor) { + b, err := ioutil.ReadAll(decompressor) + if err != nil { + t.Fatalf("%v", err) + } + if string(b) != "payload" { + t.Fatalf("payload is %q, want %q", string(b), "payload") + } + if decompressor.Comment != "comment" { + t.Fatalf("comment is %q, want %q", decompressor.Comment, "comment") + } + if string(decompressor.Extra) != "extra" { + t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra") + } + if decompressor.Mtime != 1e8 { + t.Fatalf("mtime is %d, want %d", decompressor.Mtime, uint32(1e8)) + } + if decompressor.Name != "name" { + t.Fatalf("name is %q, want %q", decompressor.Name, "name") + } + }) +} diff --git a/src/pkg/compress/lzw/Makefile b/src/pkg/compress/lzw/Makefile new file mode 100644 index 000000000..28f5e6abc --- /dev/null +++ b/src/pkg/compress/lzw/Makefile @@ -0,0 +1,12 @@ +# Copyright 2011 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/lzw +GOFILES=\ + reader.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go new file mode 100644 index 000000000..21231c8e5 --- /dev/null +++ b/src/pkg/compress/lzw/reader.go @@ -0,0 +1,253 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lzw implements the Lempel-Ziv-Welch compressed data format, +// described in T. A. Welch, ``A Technique for High-Performance Data +// Compression'', Computer, 17(6) (June 1984), pp 8-19. +// +// In particular, it implements LZW as used by the GIF, TIFF and PDF file +// formats, which means variable-width codes up to 12 bits and the first +// two non-literal codes are a clear code and an EOF code. +package lzw + +// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF, +// modulo LSB/MSB packing order. + +import ( + "bufio" + "fmt" + "io" + "os" +) + +// Order specifies the bit ordering in an LZW data stream. +type Order int + +const ( + // LSB means Least Significant Bits first, as used in the GIF file format. + LSB Order = iota + // MSB means Most Significant Bits first, as used in the TIFF and PDF + // file formats. + MSB +) + +const ( + maxWidth = 12 + decoderInvalidCode = 0xffff + flushBuffer = 1 << maxWidth +) + +// decoder is the state from which the readXxx method converts a byte +// stream into a code stream. +type decoder struct { + r io.ByteReader + bits uint32 + nBits uint + width uint + read func(*decoder) (uint16, os.Error) // readLSB or readMSB + litWidth int // width in bits of literal codes + err os.Error + + // The first 1<<litWidth codes are literal codes. + // The next two codes mean clear and EOF. + // Other valid codes are in the range [lo, hi] where lo := clear + 2, + // with the upper bound incrementing on each code seen. + // overflow is the code at which hi overflows the code width. + // last is the most recently seen code, or decoderInvalidCode. + clear, eof, hi, overflow, last uint16 + + // Each code c in [lo, hi] expands to two or more bytes. For c != hi: + // suffix[c] is the last of these bytes. + // prefix[c] is the code for all but the last byte. + // This code can either be a literal code or another code in [lo, c). + // The c == hi case is a special case. + suffix [1 << maxWidth]uint8 + prefix [1 << maxWidth]uint16 + + // output is the temporary output buffer. + // Literal codes are accumulated from the start of the buffer. + // Non-literal codes decode to a sequence of suffixes that are first + // written right-to-left from the end of the buffer before being copied + // to the start of the buffer. + // It is flushed when it contains >= 1<<maxWidth bytes, + // so that there is always room to decode an entire code. + output [2 * 1 << maxWidth]byte + o int // write index into output + toRead []byte // bytes to return from Read +} + +// readLSB returns the next code for "Least Significant Bits first" data. +func (d *decoder) readLSB() (uint16, os.Error) { + for d.nBits < d.width { + x, err := d.r.ReadByte() + if err != nil { + return 0, err + } + d.bits |= uint32(x) << d.nBits + d.nBits += 8 + } + code := uint16(d.bits & (1<<d.width - 1)) + d.bits >>= d.width + d.nBits -= d.width + return code, nil +} + +// readMSB returns the next code for "Most Significant Bits first" data. +func (d *decoder) readMSB() (uint16, os.Error) { + for d.nBits < d.width { + x, err := d.r.ReadByte() + if err != nil { + return 0, err + } + d.bits |= uint32(x) << (24 - d.nBits) + d.nBits += 8 + } + code := uint16(d.bits >> (32 - d.width)) + d.bits <<= d.width + d.nBits -= d.width + return code, nil +} + +func (d *decoder) Read(b []byte) (int, os.Error) { + for { + if len(d.toRead) > 0 { + n := copy(b, d.toRead) + d.toRead = d.toRead[n:] + return n, nil + } + if d.err != nil { + return 0, d.err + } + d.decode() + } + panic("unreachable") +} + +// decode decompresses bytes from r and leaves them in d.toRead. +// read specifies how to decode bytes into codes. +// litWidth is the width in bits of literal codes. +func (d *decoder) decode() { + // Loop over the code stream, converting codes into decompressed bytes. + for { + code, err := d.read(d) + if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + d.err = err + return + } + switch { + case code < d.clear: + // We have a literal code. + d.output[d.o] = uint8(code) + d.o++ + if d.last != decoderInvalidCode { + // Save what the hi code expands to. + d.suffix[d.hi] = uint8(code) + d.prefix[d.hi] = d.last + } + case code == d.clear: + d.width = 1 + uint(d.litWidth) + d.hi = d.eof + d.overflow = 1 << d.width + d.last = decoderInvalidCode + continue + case code == d.eof: + d.flush() + d.err = os.EOF + return + case code <= d.hi: + c, i := code, len(d.output)-1 + if code == d.hi { + // code == hi is a special case which expands to the last expansion + // followed by the head of the last expansion. To find the head, we walk + // the prefix chain until we find a literal code. + c = d.last + for c >= d.clear { + c = d.prefix[c] + } + d.output[i] = uint8(c) + i-- + c = d.last + } + // Copy the suffix chain into output and then write that to w. + for c >= d.clear { + d.output[i] = d.suffix[c] + i-- + c = d.prefix[c] + } + d.output[i] = uint8(c) + d.o += copy(d.output[d.o:], d.output[i:]) + if d.last != decoderInvalidCode { + // Save what the hi code expands to. + d.suffix[d.hi] = uint8(c) + d.prefix[d.hi] = d.last + } + default: + d.err = os.NewError("lzw: invalid code") + return + } + d.last, d.hi = code, d.hi+1 + if d.hi >= d.overflow { + if d.width == maxWidth { + d.last = decoderInvalidCode + } else { + d.width++ + d.overflow <<= 1 + } + } + if d.o >= flushBuffer { + d.flush() + return + } + } + panic("unreachable") +} + +func (d *decoder) flush() { + d.toRead = d.output[:d.o] + d.o = 0 +} + +func (d *decoder) Close() os.Error { + d.err = os.EINVAL // in case any Reads come along + return nil +} + +// NewReader creates a new io.ReadCloser that satisfies reads by decompressing +// the data read from r. +// It is the caller's responsibility to call Close on the ReadCloser when +// finished reading. +// The number of bits to use for literal codes, litWidth, must be in the +// range [2,8] and is typically 8. +func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser { + d := new(decoder) + switch order { + case LSB: + d.read = (*decoder).readLSB + case MSB: + d.read = (*decoder).readMSB + default: + d.err = os.NewError("lzw: unknown order") + return d + } + if litWidth < 2 || 8 < litWidth { + d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth) + return d + } + if br, ok := r.(io.ByteReader); ok { + d.r = br + } else { + d.r = bufio.NewReader(r) + } + d.litWidth = litWidth + d.width = 1 + uint(litWidth) + d.clear = uint16(1) << uint(litWidth) + d.eof, d.hi = d.clear+1, d.clear+1 + d.overflow = uint16(1) << d.width + d.last = decoderInvalidCode + + return d +} diff --git a/src/pkg/compress/lzw/reader_test.go b/src/pkg/compress/lzw/reader_test.go new file mode 100644 index 000000000..f8042b0d1 --- /dev/null +++ b/src/pkg/compress/lzw/reader_test.go @@ -0,0 +1,145 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "bytes" + "io" + "io/ioutil" + "os" + "runtime" + "strconv" + "strings" + "testing" +) + +type lzwTest struct { + desc string + raw string + compressed string + err os.Error +} + +var lzwTests = []lzwTest{ + { + "empty;LSB;8", + "", + "\x01\x01", + nil, + }, + { + "empty;MSB;8", + "", + "\x80\x80", + nil, + }, + { + "tobe;LSB;7", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81", + nil, + }, + { + "tobe;LSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01", + nil, + }, + { + "tobe;MSB;7", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81", + nil, + }, + { + "tobe;MSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80", + nil, + }, + { + "tobe-truncated;LSB;8", + "TOBEORNOTTOBEORTOBEORNOT", + "\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04", + io.ErrUnexpectedEOF, + }, + // This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format. + { + "gif;LSB;8", + "\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + "\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01", + nil, + }, + // This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file + { + "pdf;MSB;8", + "-----A---B", + "\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01", + nil, + }, +} + +func TestReader(t *testing.T) { + b := bytes.NewBuffer(nil) + for _, tt := range lzwTests { + d := strings.Split(tt.desc, ";") + var order Order + switch d[1] { + case "LSB": + order = LSB + case "MSB": + order = MSB + default: + t.Errorf("%s: bad order %q", tt.desc, d[1]) + } + litWidth, _ := strconv.Atoi(d[2]) + rc := NewReader(strings.NewReader(tt.compressed), order, litWidth) + defer rc.Close() + b.Reset() + n, err := io.Copy(b, rc) + if err != nil { + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err) + } + continue + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw) + } + } +} + +func benchmarkDecoder(b *testing.B, n int) { + b.StopTimer() + b.SetBytes(int64(n)) + buf0, _ := ioutil.ReadFile("../testdata/e.txt") + buf0 = buf0[:10000] + compressed := bytes.NewBuffer(nil) + w := NewWriter(compressed, LSB, 8) + for i := 0; i < n; i += len(buf0) { + io.Copy(w, bytes.NewBuffer(buf0)) + } + w.Close() + buf1 := compressed.Bytes() + buf0, compressed, w = nil, nil, nil + runtime.GC() + b.StartTimer() + for i := 0; i < b.N; i++ { + io.Copy(ioutil.Discard, NewReader(bytes.NewBuffer(buf1), LSB, 8)) + } +} + +func BenchmarkDecoder1e4(b *testing.B) { + benchmarkDecoder(b, 1e4) +} + +func BenchmarkDecoder1e5(b *testing.B) { + benchmarkDecoder(b, 1e5) +} + +func BenchmarkDecoder1e6(b *testing.B) { + benchmarkDecoder(b, 1e6) +} diff --git a/src/pkg/compress/lzw/writer.go b/src/pkg/compress/lzw/writer.go new file mode 100644 index 000000000..87143b7aa --- /dev/null +++ b/src/pkg/compress/lzw/writer.go @@ -0,0 +1,259 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "bufio" + "fmt" + "io" + "os" +) + +// A writer is a buffered, flushable writer. +type writer interface { + WriteByte(byte) os.Error + Flush() os.Error +} + +// An errWriteCloser is an io.WriteCloser that always returns a given error. +type errWriteCloser struct { + err os.Error +} + +func (e *errWriteCloser) Write([]byte) (int, os.Error) { + return 0, e.err +} + +func (e *errWriteCloser) Close() os.Error { + return e.err +} + +const ( + // A code is a 12 bit value, stored as a uint32 when encoding to avoid + // type conversions when shifting bits. + maxCode = 1<<12 - 1 + invalidCode = 1<<32 - 1 + // There are 1<<12 possible codes, which is an upper bound on the number of + // valid hash table entries at any given point in time. tableSize is 4x that. + tableSize = 4 * 1 << 12 + tableMask = tableSize - 1 + // A hash table entry is a uint32. Zero is an invalid entry since the + // lower 12 bits of a valid entry must be a non-literal code. + invalidEntry = 0 +) + +// encoder is LZW compressor. +type encoder struct { + // w is the writer that compressed bytes are written to. + w writer + // write, bits, nBits and width are the state for converting a code stream + // into a byte stream. + write func(*encoder, uint32) os.Error + bits uint32 + nBits uint + width uint + // litWidth is the width in bits of literal codes. + litWidth uint + // hi is the code implied by the next code emission. + // overflow is the code at which hi overflows the code width. + hi, overflow uint32 + // savedCode is the accumulated code at the end of the most recent Write + // call. It is equal to invalidCode if there was no such call. + savedCode uint32 + // err is the first error encountered during writing. Closing the encoder + // will make any future Write calls return os.EINVAL. + err os.Error + // table is the hash table from 20-bit keys to 12-bit values. Each table + // entry contains key<<12|val and collisions resolve by linear probing. + // The keys consist of a 12-bit code prefix and an 8-bit byte suffix. + // The values are a 12-bit code. + table [tableSize]uint32 +} + +// writeLSB writes the code c for "Least Significant Bits first" data. +func (e *encoder) writeLSB(c uint32) os.Error { + e.bits |= c << e.nBits + e.nBits += e.width + for e.nBits >= 8 { + if err := e.w.WriteByte(uint8(e.bits)); err != nil { + return err + } + e.bits >>= 8 + e.nBits -= 8 + } + return nil +} + +// writeMSB writes the code c for "Most Significant Bits first" data. +func (e *encoder) writeMSB(c uint32) os.Error { + e.bits |= c << (32 - e.width - e.nBits) + e.nBits += e.width + for e.nBits >= 8 { + if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil { + return err + } + e.bits <<= 8 + e.nBits -= 8 + } + return nil +} + +// errOutOfCodes is an internal error that means that the encoder has run out +// of unused codes and a clear code needs to be sent next. +var errOutOfCodes = os.NewError("lzw: out of codes") + +// incHi increments e.hi and checks for both overflow and running out of +// unused codes. In the latter case, incHi sends a clear code, resets the +// encoder state and returns errOutOfCodes. +func (e *encoder) incHi() os.Error { + e.hi++ + if e.hi == e.overflow { + e.width++ + e.overflow <<= 1 + } + if e.hi == maxCode { + clear := uint32(1) << e.litWidth + if err := e.write(e, clear); err != nil { + return err + } + e.width = uint(e.litWidth) + 1 + e.hi = clear + 1 + e.overflow = clear << 1 + for i := range e.table { + e.table[i] = invalidEntry + } + return errOutOfCodes + } + return nil +} + +// Write writes a compressed representation of p to e's underlying writer. +func (e *encoder) Write(p []byte) (int, os.Error) { + if e.err != nil { + return 0, e.err + } + if len(p) == 0 { + return 0, nil + } + litMask := uint32(1<<e.litWidth - 1) + code := e.savedCode + if code == invalidCode { + // The first code sent is always a literal code. + code, p = uint32(p[0])&litMask, p[1:] + } +loop: + for _, x := range p { + literal := uint32(x) & litMask + key := code<<8 | literal + // If there is a hash table hit for this key then we continue the loop + // and do not emit a code yet. + hash := (key>>12 ^ key) & tableMask + for h, t := hash, e.table[hash]; t != invalidEntry; { + if key == t>>12 { + code = t & maxCode + continue loop + } + h = (h + 1) & tableMask + t = e.table[h] + } + // Otherwise, write the current code, and literal becomes the start of + // the next emitted code. + if e.err = e.write(e, code); e.err != nil { + return 0, e.err + } + code = literal + // Increment e.hi, the next implied code. If we run out of codes, reset + // the encoder state (including clearing the hash table) and continue. + if err := e.incHi(); err != nil { + if err == errOutOfCodes { + continue + } + e.err = err + return 0, e.err + } + // Otherwise, insert key -> e.hi into the map that e.table represents. + for { + if e.table[hash] == invalidEntry { + e.table[hash] = (key << 12) | e.hi + break + } + hash = (hash + 1) & tableMask + } + } + e.savedCode = code + return len(p), nil +} + +// Close closes the encoder, flushing any pending output. It does not close or +// flush e's underlying writer. +func (e *encoder) Close() os.Error { + if e.err != nil { + if e.err == os.EINVAL { + return nil + } + return e.err + } + // Make any future calls to Write return os.EINVAL. + e.err = os.EINVAL + // Write the savedCode if valid. + if e.savedCode != invalidCode { + if err := e.write(e, e.savedCode); err != nil { + return err + } + if err := e.incHi(); err != nil && err != errOutOfCodes { + return err + } + } + // Write the eof code. + eof := uint32(1)<<e.litWidth + 1 + if err := e.write(e, eof); err != nil { + return err + } + // Write the final bits. + if e.nBits > 0 { + if e.write == (*encoder).writeMSB { + e.bits >>= 24 + } + if err := e.w.WriteByte(uint8(e.bits)); err != nil { + return err + } + } + return e.w.Flush() +} + +// NewWriter creates a new io.WriteCloser that satisfies writes by compressing +// the data and writing it to w. +// It is the caller's responsibility to call Close on the WriteCloser when +// finished writing. +// The number of bits to use for literal codes, litWidth, must be in the +// range [2,8] and is typically 8. +func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser { + var write func(*encoder, uint32) os.Error + switch order { + case LSB: + write = (*encoder).writeLSB + case MSB: + write = (*encoder).writeMSB + default: + return &errWriteCloser{os.NewError("lzw: unknown order")} + } + if litWidth < 2 || 8 < litWidth { + return &errWriteCloser{fmt.Errorf("lzw: litWidth %d out of range", litWidth)} + } + bw, ok := w.(writer) + if !ok { + bw = bufio.NewWriter(w) + } + lw := uint(litWidth) + return &encoder{ + w: bw, + write: write, + width: 1 + lw, + litWidth: lw, + hi: 1<<lw + 1, + overflow: 1 << (lw + 1), + savedCode: invalidCode, + } +} diff --git a/src/pkg/compress/lzw/writer_test.go b/src/pkg/compress/lzw/writer_test.go new file mode 100644 index 000000000..4c5e522f9 --- /dev/null +++ b/src/pkg/compress/lzw/writer_test.go @@ -0,0 +1,132 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "io" + "io/ioutil" + "os" + "runtime" + "testing" +) + +var filenames = []string{ + "../testdata/e.txt", + "../testdata/pi.txt", +} + +// testFile tests that compressing and then decompressing the given file with +// the given options yields equivalent bytes to the original file. +func testFile(t *testing.T, fn string, order Order, litWidth int) { + // Read the file, as golden output. + golden, err := os.Open(fn) + if err != nil { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err) + return + } + defer golden.Close() + + // Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end. + raw, err := os.Open(fn) + if err != nil { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err) + return + } + + piper, pipew := io.Pipe() + defer piper.Close() + go func() { + defer raw.Close() + defer pipew.Close() + lzww := NewWriter(pipew, order, litWidth) + defer lzww.Close() + var b [4096]byte + for { + n, err0 := raw.Read(b[:]) + if err0 != nil && err0 != os.EOF { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0) + return + } + _, err1 := lzww.Write(b[:n]) + if err1 == os.EPIPE { + // Fail, but do not report the error, as some other (presumably reportable) error broke the pipe. + return + } + if err1 != nil { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1) + return + } + if err0 == os.EOF { + break + } + } + }() + lzwr := NewReader(piper, order, litWidth) + defer lzwr.Close() + + // Compare the two. + b0, err0 := ioutil.ReadAll(golden) + b1, err1 := ioutil.ReadAll(lzwr) + if err0 != nil { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err0) + return + } + if err1 != nil { + t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1) + return + } + if len(b1) != len(b0) { + t.Errorf("%s (order=%d litWidth=%d): length mismatch %d != %d", fn, order, litWidth, len(b1), len(b0)) + return + } + for i := 0; i < len(b0); i++ { + if b1[i] != b0[i] { + t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x != 0x%02x\n", fn, order, litWidth, i, b1[i], b0[i]) + return + } + } +} + +func TestWriter(t *testing.T) { + for _, filename := range filenames { + for _, order := range [...]Order{LSB, MSB} { + // The test data "2.71828 etcetera" is ASCII text requiring at least 6 bits. + for _, litWidth := range [...]int{6, 7, 8} { + testFile(t, filename, order, litWidth) + } + } + } +} + +func benchmarkEncoder(b *testing.B, n int) { + b.StopTimer() + b.SetBytes(int64(n)) + buf0, _ := ioutil.ReadFile("../testdata/e.txt") + buf0 = buf0[:10000] + buf1 := make([]byte, n) + for i := 0; i < n; i += len(buf0) { + copy(buf1[i:], buf0) + } + buf0 = nil + runtime.GC() + b.StartTimer() + for i := 0; i < b.N; i++ { + w := NewWriter(ioutil.Discard, LSB, 8) + w.Write(buf1) + w.Close() + } +} + +func BenchmarkEncoder1e4(b *testing.B) { + benchmarkEncoder(b, 1e4) +} + +func BenchmarkEncoder1e5(b *testing.B) { + benchmarkEncoder(b, 1e5) +} + +func BenchmarkEncoder1e6(b *testing.B) { + benchmarkEncoder(b, 1e6) +} diff --git a/src/pkg/compress/testdata/e.txt b/src/pkg/compress/testdata/e.txt new file mode 100644 index 000000000..76cf2a7b6 --- /dev/null +++ b/src/pkg/compress/testdata/e.txt @@ -0,0 +1 @@ +2.7182818284590452353602874713526624977572470936999595749669676277240766303535475945713821785251664274274663919320030599218174135966290435729003342952605956307381323286279434907632338298807531952510190115738341879307021540891499348841675092447614606680822648001684774118537423454424371075390777449920695517027618386062613313845830007520449338265602976067371132007093287091274437470472306969772093101416928368190255151086574637721112523897844250569536967707854499699679468644549059879316368892300987931277361782154249992295763514822082698951936680331825288693984964651058209392398294887933203625094431173012381970684161403970198376793206832823764648042953118023287825098194558153017567173613320698112509961818815930416903515988885193458072738667385894228792284998920868058257492796104841984443634632449684875602336248270419786232090021609902353043699418491463140934317381436405462531520961836908887070167683964243781405927145635490613031072085103837505101157477041718986106873969655212671546889570350354021234078498193343210681701210056278802351930332247450158539047304199577770935036604169973297250886876966403555707162268447162560798826517871341951246652010305921236677194325278675398558944896970964097545918569563802363701621120477427228364896134225164450781824423529486363721417402388934412479635743702637552944483379980161254922785092577825620926226483262779333865664816277251640191059004916449982893150566047258027786318641551956532442586982946959308019152987211725563475463964479101459040905862984967912874068705048958586717479854667757573205681288459205413340539220001137863009455606881667400169842055804033637953764520304024322566135278369511778838638744396625322498506549958862342818997077332761717839280349465014345588970719425863987727547109629537415211151368350627526023264847287039207643100595841166120545297030236472549296669381151373227536450988890313602057248176585118063036442812314965507047510254465011727211555194866850800368532281831521960037356252794495158284188294787610852639813955990067376482922443752871846245780361929819713991475644882626039033814418232625150974827987779964373089970388867782271383605772978824125611907176639465070633045279546618550966661856647097113444740160704626215680717481877844371436988218559670959102596862002353718588748569652200050311734392073211390803293634479727355955277349071783793421637012050054513263835440001863239914907054797780566978533580489669062951194324730995876552368128590413832411607226029983305353708761389396391779574540161372236187893652605381558415871869255386061647798340254351284396129460352913325942794904337299085731580290958631382683291477116396337092400316894586360606458459251269946557248391865642097526850823075442545993769170419777800853627309417101634349076964237222943523661255725088147792231519747780605696725380171807763603462459278778465850656050780844211529697521890874019660906651803516501792504619501366585436632712549639908549144200014574760819302212066024330096412704894390397177195180699086998606636583232278709376502260149291011517177635944602023249300280401867723910288097866605651183260043688508817157238669842242201024950551881694803221002515426494639812873677658927688163598312477886520141174110913601164995076629077943646005851941998560162647907615321038727557126992518275687989302761761146162549356495903798045838182323368612016243736569846703785853305275833337939907521660692380533698879565137285593883499894707416181550125397064648171946708348197214488898790676503795903669672494992545279033729636162658976039498576741397359441023744329709355477982629614591442936451428617158587339746791897571211956187385783644758448423555581050025611492391518893099463428413936080383091662818811503715284967059741625628236092168075150177725387402564253470879089137291722828611515915683725241630772254406337875931059826760944203261924285317018781772960235413060672136046000389661093647095141417185777014180606443636815464440053316087783143174440811949422975599314011888683314832802706553833004693290115744147563139997221703804617092894579096271662260740718749975359212756084414737823303270330168237193648002173285734935947564334129943024850235732214597843282641421684878721673367010615094243456984401873312810107945127223737886126058165668053714396127888732527373890392890506865324138062796025930387727697783792868409325365880733988457218746021005311483351323850047827169376218004904795597959290591655470505777514308175112698985188408718564026035305583737832422924185625644255022672155980274012617971928047139600689163828665277009752767069777036439260224372841840883251848770472638440379530166905465937461619323840363893131364327137688841026811219891275223056256756254701725086349765367288605966752740868627407912856576996313789753034660616669804218267724560530660773899624218340859882071864682623215080288286359746839654358856685503773131296587975810501214916207656769950659715344763470320853215603674828608378656803073062657633469774295634643716709397193060876963495328846833613038829431040800296873869117066666146800015121143442256023874474325250769387077775193299942137277211258843608715834835626961661980572526612206797540621062080649882918454395301529982092503005498257043390553570168653120526495614857249257386206917403695213533732531666345466588597286659451136441370331393672118569553952108458407244323835586063106806964924851232632699514603596037297253198368423363904632136710116192821711150282801604488058802382031981493096369596735832742024988245684941273860566491352526706046234450549227581151709314921879592718001940968866986837037302200475314338181092708030017205935530520700706072233999463990571311587099635777359027196285061146514837526209565346713290025994397663114545902685898979115837093419370441155121920117164880566945938131183843765620627846310490346293950029458341164824114969758326011800731699437393506966295712410273239138741754923071862454543222039552735295240245903805744502892246886285336542213815722131163288112052146489805180092024719391710555390113943316681515828843687606961102505171007392762385553386272553538830960671644662370922646809671254061869502143176211668140097595281493907222601112681153108387317617323235263605838173151034595736538223534992935822836851007810884634349983518404451704270189381994243410090575376257767571118090088164183319201962623416288166521374717325477727783488774366518828752156685719506371936565390389449366421764003121527870222366463635755503565576948886549500270853923617105502131147413744106134445544192101336172996285694899193369184729478580729156088510396781959429833186480756083679551496636448965592948187851784038773326247051945050419847742014183947731202815886845707290544057510601285258056594703046836344592652552137008068752009593453607316226118728173928074623094685367823106097921599360019946237993434210687813497346959246469752506246958616909178573976595199392993995567542714654910456860702099012606818704984178079173924071945996323060254707901774527513186809982284730860766536866855516467702911336827563107223346726113705490795365834538637196235856312618387156774118738527722922594743373785695538456246801013905727871016512966636764451872465653730402443684140814488732957847348490003019477888020460324660842875351848364959195082888323206522128104190448047247949291342284951970022601310430062410717971502793433263407995960531446053230488528972917659876016667811937932372453857209607582277178483361613582612896226118129455927462767137794487586753657544861407611931125958512655759734573015333642630767985443385761715333462325270572005303988289499034259566232975782488735029259166825894456894655992658454762694528780516501720674785417887982276806536650641910973434528878338621726156269582654478205672987756426325321594294418039943217000090542650763095588465895171709147607437136893319469090981904501290307099566226620303182649365733698419555776963787624918852865686607600566025605445711337286840205574416030837052312242587223438854123179481388550075689381124935386318635287083799845692619981794523364087429591180747453419551420351726184200845509170845682368200897739455842679214273477560879644279202708312150156406341341617166448069815483764491573900121217041547872591998943825364950514771379399147205219529079396137621107238494290616357604596231253506068537651423115349665683715116604220796394466621163255157729070978473156278277598788136491951257483328793771571459091064841642678309949723674420175862269402159407924480541255360431317992696739157542419296607312393763542139230617876753958711436104089409966089471418340698362993675362621545247298464213752891079884381306095552622720837518629837066787224430195793793786072107254277289071732854874374355781966511716618330881129120245204048682200072344035025448202834254187884653602591506445271657700044521097735585897622655484941621714989532383421600114062950718490427789258552743035221396835679018076406042138307308774460170842688272261177180842664333651780002171903449234264266292261456004337383868335555343453004264818473989215627086095650629340405264943244261445665921291225648893569655009154306426134252668472594914314239398845432486327461842846655985332312210466259890141712103446084271616619001257195870793217569698544013397622096749454185407118446433946990162698351607848924514058940946395267807354579700307051163682519487701189764002827648414160587206184185297189154019688253289309149665345753571427318482016384644832499037886069008072709327673127581966563941148961716832980455139729506687604740915420428429993541025829113502241690769431668574242522509026939034814856451303069925199590436384028429267412573422447765584177886171737265462085498294498946787350929581652632072258992368768457017823038096567883112289305809140572610865884845873101658151167533327674887014829167419701512559782572707406431808601428149024146780472327597684269633935773542930186739439716388611764209004068663398856841681003872389214483176070116684503887212364367043314091155733280182977988736590916659612402021778558854876176161989370794380056663364884365089144805571039765214696027662583599051987042300179465536788 diff --git a/src/pkg/compress/testdata/pi.txt b/src/pkg/compress/testdata/pi.txt new file mode 100644 index 000000000..58d8f3b6d --- /dev/null +++ b/src/pkg/compress/testdata/pi.txt @@ -0,0 +1 @@ +3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989380952572010654858632788659361533818279682303019520353018529689957736225994138912497217752834791315155748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035637076601047101819429555961989467678374494482553797747268471040475346462080466842590694912933136770289891521047521620569660240580381501935112533824300355876402474964732639141992726042699227967823547816360093417216412199245863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818347977535663698074265425278625518184175746728909777727938000816470600161452491921732172147723501414419735685481613611573525521334757418494684385233239073941433345477624168625189835694855620992192221842725502542568876717904946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886269456042419652850222106611863067442786220391949450471237137869609563643719172874677646575739624138908658326459958133904780275900994657640789512694683983525957098258226205224894077267194782684826014769909026401363944374553050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382686838689427741559918559252459539594310499725246808459872736446958486538367362226260991246080512438843904512441365497627807977156914359977001296160894416948685558484063534220722258284886481584560285060168427394522674676788952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288797108931456691368672287489405601015033086179286809208747609178249385890097149096759852613655497818931297848216829989487226588048575640142704775551323796414515237462343645428584447952658678210511413547357395231134271661021359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435064302184531910484810053706146806749192781911979399520614196634287544406437451237181921799983910159195618146751426912397489409071864942319615679452080951465502252316038819301420937621378559566389377870830390697920773467221825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539057962685610055081066587969981635747363840525714591028970641401109712062804390397595156771577004203378699360072305587631763594218731251471205329281918261861258673215791984148488291644706095752706957220917567116722910981690915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398315019701651511685171437657618351556508849099898599823873455283316355076479185358932261854896321329330898570642046752590709154814165498594616371802709819943099244889575712828905923233260972997120844335732654893823911932597463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100449293215160842444859637669838952286847831235526582131449576857262433441893039686426243410773226978028073189154411010446823252716201052652272111660396665573092547110557853763466820653109896526918620564769312570586356620185581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318586769751456614068007002378776591344017127494704205622305389945613140711270004078547332699390814546646458807972708266830634328587856983052358089330657574067954571637752542021149557615814002501262285941302164715509792592309907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111790429782856475032031986915140287080859904801094121472213179476477726224142548545403321571853061422881375850430633217518297986622371721591607716692547487389866549494501146540628433663937900397692656721463853067360965712091807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862947265473642523081770367515906735023507283540567040386743513622224771589150495309844489333096340878076932599397805419341447377441842631298608099888687413260472156951623965864573021631598193195167353812974167729478672422924654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001593776471651228935786015881617557829735233446042815126272037343146531977774160319906655418763979293344195215413418994854447345673831624993419131814809277771038638773431772075456545322077709212019051660962804909263601975988281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267945612753181340783303362542327839449753824372058353114771199260638133467768796959703098339130771098704085913374641442822772634659470474587847787201927715280731767907707157213444730605700733492436931138350493163128404251219256517980694113528013147013047816437885185290928545201165839341965621349143415956258658655705526904965209858033850722426482939728584783163057777560688876446248246857926039535277348030480290058760758251047470916439613626760449256274204208320856611906254543372131535958450687724602901618766795240616342522577195429162991930645537799140373404328752628889639958794757291746426357455254079091451357111369410911939325191076020825202618798531887705842972591677813149699009019211697173727847684726860849003377024242916513005005168323364350389517029893922334517220138128069650117844087451960121228599371623130171144484640903890644954440061986907548516026327505298349187407866808818338510228334508504860825039302133219715518430635455007668282949304137765527939751754613953984683393638304746119966538581538420568533862186725233402830871123282789212507712629463229563989898935821167456270102183564622013496715188190973038119800497340723961036854066431939509790190699639552453005450580685501956730229219139339185680344903982059551002263535361920419947455385938102343955449597783779023742161727111723643435439478221818528624085140066604433258885698670543154706965747458550332323342107301545940516553790686627333799585115625784322988273723198987571415957811196358330059408730681216028764962867446047746491599505497374256269010490377819868359381465741268049256487985561453723478673303904688383436346553794986419270563872931748723320837601123029911367938627089438799362016295154133714248928307220126901475466847653576164773794675200490757155527819653621323926406160136358155907422020203187277605277219005561484255518792530343513984425322341576233610642506390497500865627109535919465897514131034822769306247435363256916078154781811528436679570611086153315044521274739245449454236828860613408414863776700961207151249140430272538607648236341433462351897576645216413767969031495019108575984423919862916421939949072362346468441173940326591840443780513338945257423995082965912285085558215725031071257012668302402929525220118726767562204154205161841634847565169998116141010029960783869092916030288400269104140792886215078424516709087000699282120660418371806535567252532567532861291042487761825829765157959847035622262934860034158722980534989650226291748788202734209222245339856264766914905562842503912757710284027998066365825488926488025456610172967026640765590429099456815065265305371829412703369313785178609040708667114965583434347693385781711386455873678123014587687126603489139095620099393610310291616152881384379099042317473363948045759314931405297634757481193567091101377517210080315590248530906692037671922033229094334676851422144773793937517034436619910403375111735471918550464490263655128162288244625759163330391072253837421821408835086573917715096828874782656995995744906617583441375223970968340800535598491754173818839994469748676265516582765848358845314277568790029095170283529716344562129640435231176006651012412006597558512761785838292041974844236080071930457618932349229279650198751872127267507981255470958904556357921221033346697499235630254947802490114195212382815309114079073860251522742995818072471625916685451333123948049470791191532673430282441860414263639548000448002670496248201792896476697583183271314251702969234889627668440323260927524960357996469256504936818360900323809293459588970695365349406034021665443755890045632882250545255640564482465151875471196218443965825337543885690941130315095261793780029741207665147939425902989695946995565761218656196733786236256125216320862869222103274889218654364802296780705765615144632046927906821207388377814233562823608963208068222468012248261177185896381409183903673672220888321513755600372798394004152970028783076670944474560134556417254370906979396122571429894671543578468788614445812314593571984922528471605049221242470141214780573455105008019086996033027634787081081754501193071412233908663938339529425786905076431006383519834389341596131854347546495569781038293097164651438407007073604112373599843452251610507027056235266012764848308407611830130527932054274628654036036745328651057065874882256981579367897669742205750596834408697350201410206723585020072452256326513410559240190274216248439140359989535394590944070469120914093870012645600162374288021092764579310657922955249887275846101264836999892256959688159205600101655256375678 diff --git a/src/pkg/compress/zlib/Makefile b/src/pkg/compress/zlib/Makefile new file mode 100644 index 000000000..791072d34 --- /dev/null +++ b/src/pkg/compress/zlib/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/zlib +GOFILES=\ + reader.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/zlib/reader.go b/src/pkg/compress/zlib/reader.go new file mode 100644 index 000000000..78dabdf4d --- /dev/null +++ b/src/pkg/compress/zlib/reader.go @@ -0,0 +1,126 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zlib implements reading and writing of zlib format compressed data, +as specified in RFC 1950. + +The implementation provides filters that uncompress during reading +and compress during writing. For example, to write compressed data +to a buffer: + + var b bytes.Buffer + w, err := zlib.NewWriter(&b) + w.Write([]byte("hello, world\n")) + w.Close() + +and to read that data back: + + r, err := zlib.NewReader(&b) + io.Copy(os.Stdout, r) + r.Close() +*/ +package zlib + +import ( + "bufio" + "compress/flate" + "hash" + "hash/adler32" + "io" + "os" +) + +const zlibDeflate = 8 + +var ChecksumError = os.NewError("zlib checksum error") +var HeaderError = os.NewError("invalid zlib header") +var DictionaryError = os.NewError("invalid zlib dictionary") + +type reader struct { + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + err os.Error + scratch [4]byte +} + +// NewReader creates a new io.ReadCloser that satisfies reads by decompressing data read from r. +// The implementation buffers input and may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser when done. +func NewReader(r io.Reader) (io.ReadCloser, os.Error) { + return NewReaderDict(r, nil) +} + +// NewReaderDict is like NewReader but uses a preset dictionary. +// NewReaderDict ignores the dictionary if the compressed data does not refer to it. +func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, os.Error) { + z := new(reader) + if fr, ok := r.(flate.Reader); ok { + z.r = fr + } else { + z.r = bufio.NewReader(r) + } + _, err := io.ReadFull(z.r, z.scratch[0:2]) + if err != nil { + return nil, err + } + h := uint(z.scratch[0])<<8 | uint(z.scratch[1]) + if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) { + return nil, HeaderError + } + if z.scratch[1]&0x20 != 0 { + _, err = io.ReadFull(z.r, z.scratch[0:4]) + if err != nil { + return nil, err + } + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != adler32.Checksum(dict) { + return nil, DictionaryError + } + z.decompressor = flate.NewReaderDict(z.r, dict) + } else { + z.decompressor = flate.NewReader(z.r) + } + z.digest = adler32.New() + return z, nil +} + +func (z *reader) Read(p []byte) (n int, err os.Error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + + n, err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + if n != 0 || err != os.EOF { + z.err = err + return + } + + // Finished file; check checksum. + if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { + z.err = err + return 0, err + } + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != z.digest.Sum32() { + z.err = ChecksumError + return 0, z.err + } + return +} + +// Calling Close does not close the wrapped io.Reader originally passed to NewReader. +func (z *reader) Close() os.Error { + if z.err != nil { + return z.err + } + z.err = z.decompressor.Close() + return z.err +} diff --git a/src/pkg/compress/zlib/reader_test.go b/src/pkg/compress/zlib/reader_test.go new file mode 100644 index 000000000..195db446c --- /dev/null +++ b/src/pkg/compress/zlib/reader_test.go @@ -0,0 +1,128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "bytes" + "io" + "os" + "testing" +) + +type zlibTest struct { + desc string + raw string + compressed []byte + dict []byte + err os.Error +} + +// Compare-to-golden test data was generated by the ZLIB example program at +// http://www.zlib.net/zpipe.c + +var zlibTests = []zlibTest{ + { + "empty", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, + nil, + nil, + }, + { + "goodbye", + "goodbye, world", + []byte{ + 0x78, 0x9c, 0x4b, 0xcf, 0xcf, 0x4f, 0x49, 0xaa, + 0x4c, 0xd5, 0x51, 0x28, 0xcf, 0x2f, 0xca, 0x49, + 0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e, + }, + nil, + nil, + }, + { + "bad header", + "", + []byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, + nil, + HeaderError, + }, + { + "bad checksum", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff}, + nil, + ChecksumError, + }, + { + "not enough data", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00}, + nil, + io.ErrUnexpectedEOF, + }, + { + "excess data is silently ignored", + "", + []byte{ + 0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x78, 0x9c, 0xff, + }, + nil, + nil, + }, + { + "dictionary", + "Hello, World!\n", + []byte{ + 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00, + 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24, + 0x12, 0x04, 0x74, + }, + []byte{ + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x0a, + }, + nil, + }, + { + "wrong dictionary", + "", + []byte{ + 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00, + 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24, + 0x12, 0x04, 0x74, + }, + []byte{ + 0x48, 0x65, 0x6c, 0x6c, + }, + DictionaryError, + }, +} + +func TestDecompressor(t *testing.T) { + b := new(bytes.Buffer) + for _, tt := range zlibTests { + in := bytes.NewBuffer(tt.compressed) + zlib, err := NewReaderDict(in, tt.dict) + if err != nil { + if err != tt.err { + t.Errorf("%s: NewReader: %s", tt.desc, err) + } + continue + } + defer zlib.Close() + b.Reset() + n, err := io.Copy(b, zlib) + if err != nil { + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err) + } + continue + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw) + } + } +} diff --git a/src/pkg/compress/zlib/writer.go b/src/pkg/compress/zlib/writer.go new file mode 100644 index 000000000..8f86e9c4c --- /dev/null +++ b/src/pkg/compress/zlib/writer.go @@ -0,0 +1,139 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "compress/flate" + "hash" + "hash/adler32" + "io" + "os" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/zlib" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression +) + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + w io.Writer + compressor *flate.Writer + digest hash.Hash32 + err os.Error + scratch [4]byte +} + +// NewWriter calls NewWriterLevel with the default compression level. +func NewWriter(w io.Writer) (*Writer, os.Error) { + return NewWriterLevel(w, DefaultCompression) +} + +// NewWriterLevel calls NewWriterDict with no dictionary. +func NewWriterLevel(w io.Writer, level int) (*Writer, os.Error) { + return NewWriterDict(w, level, nil) +} + +// NewWriterDict creates a new io.WriteCloser that satisfies writes by compressing data written to w. +// It is the caller's responsibility to call Close on the WriteCloser when done. +// level is the compression level, which can be DefaultCompression, NoCompression, +// or any integer value between BestSpeed and BestCompression (inclusive). +// dict is the preset dictionary to compress with, or nil to use no dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) { + z := new(Writer) + // ZLIB has a two-byte header (as documented in RFC 1950). + // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. + // The next four bits is the CM (compression method), which is 8 for deflate. + z.scratch[0] = 0x78 + // The next two bits is the FLEVEL (compression level). The four values are: + // 0=fastest, 1=fast, 2=default, 3=best. + // The next bit, FDICT, is set if a dictionary is given. + // The final five FCHECK bits form a mod-31 checksum. + switch level { + case 0, 1: + z.scratch[1] = 0 << 6 + case 2, 3, 4, 5: + z.scratch[1] = 1 << 6 + case 6, -1: + z.scratch[1] = 2 << 6 + case 7, 8, 9: + z.scratch[1] = 3 << 6 + default: + return nil, os.NewError("level out of range") + } + if dict != nil { + z.scratch[1] |= 1 << 5 + } + z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31) + _, err := w.Write(z.scratch[0:2]) + if err != nil { + return nil, err + } + if dict != nil { + // The next four bytes are the Adler-32 checksum of the dictionary. + checksum := adler32.Checksum(dict) + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + _, err = w.Write(z.scratch[0:4]) + if err != nil { + return nil, err + } + } + z.w = w + z.compressor = flate.NewWriterDict(w, level, dict) + z.digest = adler32.New() + return z, nil +} + +func (z *Writer) Write(p []byte) (n int, err os.Error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + n, err = z.compressor.Write(p) + if err != nil { + z.err = err + return + } + z.digest.Write(p) + return +} + +// Flush flushes the underlying compressor. +func (z *Writer) Flush() os.Error { + if z.err != nil { + return z.err + } + z.err = z.compressor.Flush() + return z.err +} + +// Calling Close does not close the wrapped io.Writer originally passed to NewWriter. +func (z *Writer) Close() os.Error { + if z.err != nil { + return z.err + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + checksum := z.digest.Sum32() + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + _, z.err = z.w.Write(z.scratch[0:4]) + return z.err +} diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go new file mode 100644 index 000000000..32f05ab68 --- /dev/null +++ b/src/pkg/compress/zlib/writer_test.go @@ -0,0 +1,144 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "testing" +) + +var filenames = []string{ + "../testdata/e.txt", + "../testdata/pi.txt", +} + +var data = []string{ + "test a reasonable sized string that can be compressed", +} + +// Tests that compressing and then decompressing the given file at the given compression level and dictionary +// yields equivalent bytes to the original file. +func testFileLevelDict(t *testing.T, fn string, level int, d string) { + // Read the file, as golden output. + golden, err := os.Open(fn) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer golden.Close() + b0, err0 := ioutil.ReadAll(golden) + if err0 != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0) + return + } + testLevelDict(t, fn, b0, level, d) +} + +func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) { + // Make dictionary, if given. + var dict []byte + if d != "" { + dict = []byte(d) + } + + // Push data through a pipe that compresses at the write end, and decompresses at the read end. + piper, pipew := io.Pipe() + defer piper.Close() + go func() { + defer pipew.Close() + zlibw, err := NewWriterDict(pipew, level, dict) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer zlibw.Close() + _, err = zlibw.Write(b0) + if err == os.EPIPE { + // Fail, but do not report the error, as some other (presumably reported) error broke the pipe. + return + } + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + }() + zlibr, err := NewReaderDict(piper, dict) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer zlibr.Close() + + // Compare the decompressed data. + b1, err1 := ioutil.ReadAll(zlibr) + if err1 != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1) + return + } + if len(b0) != len(b1) { + t.Errorf("%s (level=%d, dict=%q): length mismatch %d versus %d", fn, level, d, len(b0), len(b1)) + return + } + for i := 0; i < len(b0); i++ { + if b0[i] != b1[i] { + t.Errorf("%s (level=%d, dict=%q): mismatch at %d, 0x%02x versus 0x%02x\n", fn, level, d, i, b0[i], b1[i]) + return + } + } +} + +func TestWriter(t *testing.T) { + for i, s := range data { + b := []byte(s) + tag := fmt.Sprintf("#%d", i) + testLevelDict(t, tag, b, DefaultCompression, "") + testLevelDict(t, tag, b, NoCompression, "") + for level := BestSpeed; level <= BestCompression; level++ { + testLevelDict(t, tag, b, level, "") + } + } +} + +func TestWriterBig(t *testing.T) { + for _, fn := range filenames { + testFileLevelDict(t, fn, DefaultCompression, "") + testFileLevelDict(t, fn, NoCompression, "") + for level := BestSpeed; level <= BestCompression; level++ { + testFileLevelDict(t, fn, level, "") + } + } +} + +func TestWriterDict(t *testing.T) { + const dictionary = "0123456789." + for _, fn := range filenames { + testFileLevelDict(t, fn, DefaultCompression, dictionary) + testFileLevelDict(t, fn, NoCompression, dictionary) + for level := BestSpeed; level <= BestCompression; level++ { + testFileLevelDict(t, fn, level, dictionary) + } + } +} + +func TestWriterDictIsUsed(t *testing.T) { + var input = []byte("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") + buf := bytes.NewBuffer(nil) + compressor, err := NewWriterDict(buf, BestCompression, input) + if err != nil { + t.Errorf("error in NewWriterDict: %s", err) + return + } + compressor.Write(input) + compressor.Close() + const expectedMaxSize = 25 + output := buf.Bytes() + if len(output) > expectedMaxSize { + t.Errorf("result too large (got %d, want <= %d bytes). Is the dictionary being used?", len(output), expectedMaxSize) + } +} |