diff options
Diffstat (limited to 'src/pkg/compress/zlib')
-rw-r--r-- | src/pkg/compress/zlib/Makefile | 12 | ||||
-rw-r--r-- | src/pkg/compress/zlib/reader.go | 126 | ||||
-rw-r--r-- | src/pkg/compress/zlib/reader_test.go | 128 | ||||
-rw-r--r-- | src/pkg/compress/zlib/writer.go | 139 | ||||
-rw-r--r-- | src/pkg/compress/zlib/writer_test.go | 144 |
5 files changed, 549 insertions, 0 deletions
diff --git a/src/pkg/compress/zlib/Makefile b/src/pkg/compress/zlib/Makefile new file mode 100644 index 000000000..791072d34 --- /dev/null +++ b/src/pkg/compress/zlib/Makefile @@ -0,0 +1,12 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=compress/zlib +GOFILES=\ + reader.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/compress/zlib/reader.go b/src/pkg/compress/zlib/reader.go new file mode 100644 index 000000000..78dabdf4d --- /dev/null +++ b/src/pkg/compress/zlib/reader.go @@ -0,0 +1,126 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zlib implements reading and writing of zlib format compressed data, +as specified in RFC 1950. + +The implementation provides filters that uncompress during reading +and compress during writing. For example, to write compressed data +to a buffer: + + var b bytes.Buffer + w, err := zlib.NewWriter(&b) + w.Write([]byte("hello, world\n")) + w.Close() + +and to read that data back: + + r, err := zlib.NewReader(&b) + io.Copy(os.Stdout, r) + r.Close() +*/ +package zlib + +import ( + "bufio" + "compress/flate" + "hash" + "hash/adler32" + "io" + "os" +) + +const zlibDeflate = 8 + +var ChecksumError = os.NewError("zlib checksum error") +var HeaderError = os.NewError("invalid zlib header") +var DictionaryError = os.NewError("invalid zlib dictionary") + +type reader struct { + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + err os.Error + scratch [4]byte +} + +// NewReader creates a new io.ReadCloser that satisfies reads by decompressing data read from r. +// The implementation buffers input and may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser when done. +func NewReader(r io.Reader) (io.ReadCloser, os.Error) { + return NewReaderDict(r, nil) +} + +// NewReaderDict is like NewReader but uses a preset dictionary. +// NewReaderDict ignores the dictionary if the compressed data does not refer to it. +func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, os.Error) { + z := new(reader) + if fr, ok := r.(flate.Reader); ok { + z.r = fr + } else { + z.r = bufio.NewReader(r) + } + _, err := io.ReadFull(z.r, z.scratch[0:2]) + if err != nil { + return nil, err + } + h := uint(z.scratch[0])<<8 | uint(z.scratch[1]) + if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) { + return nil, HeaderError + } + if z.scratch[1]&0x20 != 0 { + _, err = io.ReadFull(z.r, z.scratch[0:4]) + if err != nil { + return nil, err + } + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != adler32.Checksum(dict) { + return nil, DictionaryError + } + z.decompressor = flate.NewReaderDict(z.r, dict) + } else { + z.decompressor = flate.NewReader(z.r) + } + z.digest = adler32.New() + return z, nil +} + +func (z *reader) Read(p []byte) (n int, err os.Error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + + n, err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + if n != 0 || err != os.EOF { + z.err = err + return + } + + // Finished file; check checksum. + if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { + z.err = err + return 0, err + } + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != z.digest.Sum32() { + z.err = ChecksumError + return 0, z.err + } + return +} + +// Calling Close does not close the wrapped io.Reader originally passed to NewReader. +func (z *reader) Close() os.Error { + if z.err != nil { + return z.err + } + z.err = z.decompressor.Close() + return z.err +} diff --git a/src/pkg/compress/zlib/reader_test.go b/src/pkg/compress/zlib/reader_test.go new file mode 100644 index 000000000..195db446c --- /dev/null +++ b/src/pkg/compress/zlib/reader_test.go @@ -0,0 +1,128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "bytes" + "io" + "os" + "testing" +) + +type zlibTest struct { + desc string + raw string + compressed []byte + dict []byte + err os.Error +} + +// Compare-to-golden test data was generated by the ZLIB example program at +// http://www.zlib.net/zpipe.c + +var zlibTests = []zlibTest{ + { + "empty", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, + nil, + nil, + }, + { + "goodbye", + "goodbye, world", + []byte{ + 0x78, 0x9c, 0x4b, 0xcf, 0xcf, 0x4f, 0x49, 0xaa, + 0x4c, 0xd5, 0x51, 0x28, 0xcf, 0x2f, 0xca, 0x49, + 0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e, + }, + nil, + nil, + }, + { + "bad header", + "", + []byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, + nil, + HeaderError, + }, + { + "bad checksum", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff}, + nil, + ChecksumError, + }, + { + "not enough data", + "", + []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00}, + nil, + io.ErrUnexpectedEOF, + }, + { + "excess data is silently ignored", + "", + []byte{ + 0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x78, 0x9c, 0xff, + }, + nil, + nil, + }, + { + "dictionary", + "Hello, World!\n", + []byte{ + 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00, + 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24, + 0x12, 0x04, 0x74, + }, + []byte{ + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x0a, + }, + nil, + }, + { + "wrong dictionary", + "", + []byte{ + 0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00, + 0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24, + 0x12, 0x04, 0x74, + }, + []byte{ + 0x48, 0x65, 0x6c, 0x6c, + }, + DictionaryError, + }, +} + +func TestDecompressor(t *testing.T) { + b := new(bytes.Buffer) + for _, tt := range zlibTests { + in := bytes.NewBuffer(tt.compressed) + zlib, err := NewReaderDict(in, tt.dict) + if err != nil { + if err != tt.err { + t.Errorf("%s: NewReader: %s", tt.desc, err) + } + continue + } + defer zlib.Close() + b.Reset() + n, err := io.Copy(b, zlib) + if err != nil { + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err) + } + continue + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw) + } + } +} diff --git a/src/pkg/compress/zlib/writer.go b/src/pkg/compress/zlib/writer.go new file mode 100644 index 000000000..8f86e9c4c --- /dev/null +++ b/src/pkg/compress/zlib/writer.go @@ -0,0 +1,139 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "compress/flate" + "hash" + "hash/adler32" + "io" + "os" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/zlib" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression +) + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + w io.Writer + compressor *flate.Writer + digest hash.Hash32 + err os.Error + scratch [4]byte +} + +// NewWriter calls NewWriterLevel with the default compression level. +func NewWriter(w io.Writer) (*Writer, os.Error) { + return NewWriterLevel(w, DefaultCompression) +} + +// NewWriterLevel calls NewWriterDict with no dictionary. +func NewWriterLevel(w io.Writer, level int) (*Writer, os.Error) { + return NewWriterDict(w, level, nil) +} + +// NewWriterDict creates a new io.WriteCloser that satisfies writes by compressing data written to w. +// It is the caller's responsibility to call Close on the WriteCloser when done. +// level is the compression level, which can be DefaultCompression, NoCompression, +// or any integer value between BestSpeed and BestCompression (inclusive). +// dict is the preset dictionary to compress with, or nil to use no dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) { + z := new(Writer) + // ZLIB has a two-byte header (as documented in RFC 1950). + // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. + // The next four bits is the CM (compression method), which is 8 for deflate. + z.scratch[0] = 0x78 + // The next two bits is the FLEVEL (compression level). The four values are: + // 0=fastest, 1=fast, 2=default, 3=best. + // The next bit, FDICT, is set if a dictionary is given. + // The final five FCHECK bits form a mod-31 checksum. + switch level { + case 0, 1: + z.scratch[1] = 0 << 6 + case 2, 3, 4, 5: + z.scratch[1] = 1 << 6 + case 6, -1: + z.scratch[1] = 2 << 6 + case 7, 8, 9: + z.scratch[1] = 3 << 6 + default: + return nil, os.NewError("level out of range") + } + if dict != nil { + z.scratch[1] |= 1 << 5 + } + z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31) + _, err := w.Write(z.scratch[0:2]) + if err != nil { + return nil, err + } + if dict != nil { + // The next four bytes are the Adler-32 checksum of the dictionary. + checksum := adler32.Checksum(dict) + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + _, err = w.Write(z.scratch[0:4]) + if err != nil { + return nil, err + } + } + z.w = w + z.compressor = flate.NewWriterDict(w, level, dict) + z.digest = adler32.New() + return z, nil +} + +func (z *Writer) Write(p []byte) (n int, err os.Error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + n, err = z.compressor.Write(p) + if err != nil { + z.err = err + return + } + z.digest.Write(p) + return +} + +// Flush flushes the underlying compressor. +func (z *Writer) Flush() os.Error { + if z.err != nil { + return z.err + } + z.err = z.compressor.Flush() + return z.err +} + +// Calling Close does not close the wrapped io.Writer originally passed to NewWriter. +func (z *Writer) Close() os.Error { + if z.err != nil { + return z.err + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + checksum := z.digest.Sum32() + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + _, z.err = z.w.Write(z.scratch[0:4]) + return z.err +} diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go new file mode 100644 index 000000000..32f05ab68 --- /dev/null +++ b/src/pkg/compress/zlib/writer_test.go @@ -0,0 +1,144 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "testing" +) + +var filenames = []string{ + "../testdata/e.txt", + "../testdata/pi.txt", +} + +var data = []string{ + "test a reasonable sized string that can be compressed", +} + +// Tests that compressing and then decompressing the given file at the given compression level and dictionary +// yields equivalent bytes to the original file. +func testFileLevelDict(t *testing.T, fn string, level int, d string) { + // Read the file, as golden output. + golden, err := os.Open(fn) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer golden.Close() + b0, err0 := ioutil.ReadAll(golden) + if err0 != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0) + return + } + testLevelDict(t, fn, b0, level, d) +} + +func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) { + // Make dictionary, if given. + var dict []byte + if d != "" { + dict = []byte(d) + } + + // Push data through a pipe that compresses at the write end, and decompresses at the read end. + piper, pipew := io.Pipe() + defer piper.Close() + go func() { + defer pipew.Close() + zlibw, err := NewWriterDict(pipew, level, dict) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer zlibw.Close() + _, err = zlibw.Write(b0) + if err == os.EPIPE { + // Fail, but do not report the error, as some other (presumably reported) error broke the pipe. + return + } + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + }() + zlibr, err := NewReaderDict(piper, dict) + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return + } + defer zlibr.Close() + + // Compare the decompressed data. + b1, err1 := ioutil.ReadAll(zlibr) + if err1 != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1) + return + } + if len(b0) != len(b1) { + t.Errorf("%s (level=%d, dict=%q): length mismatch %d versus %d", fn, level, d, len(b0), len(b1)) + return + } + for i := 0; i < len(b0); i++ { + if b0[i] != b1[i] { + t.Errorf("%s (level=%d, dict=%q): mismatch at %d, 0x%02x versus 0x%02x\n", fn, level, d, i, b0[i], b1[i]) + return + } + } +} + +func TestWriter(t *testing.T) { + for i, s := range data { + b := []byte(s) + tag := fmt.Sprintf("#%d", i) + testLevelDict(t, tag, b, DefaultCompression, "") + testLevelDict(t, tag, b, NoCompression, "") + for level := BestSpeed; level <= BestCompression; level++ { + testLevelDict(t, tag, b, level, "") + } + } +} + +func TestWriterBig(t *testing.T) { + for _, fn := range filenames { + testFileLevelDict(t, fn, DefaultCompression, "") + testFileLevelDict(t, fn, NoCompression, "") + for level := BestSpeed; level <= BestCompression; level++ { + testFileLevelDict(t, fn, level, "") + } + } +} + +func TestWriterDict(t *testing.T) { + const dictionary = "0123456789." + for _, fn := range filenames { + testFileLevelDict(t, fn, DefaultCompression, dictionary) + testFileLevelDict(t, fn, NoCompression, dictionary) + for level := BestSpeed; level <= BestCompression; level++ { + testFileLevelDict(t, fn, level, dictionary) + } + } +} + +func TestWriterDictIsUsed(t *testing.T) { + var input = []byte("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") + buf := bytes.NewBuffer(nil) + compressor, err := NewWriterDict(buf, BestCompression, input) + if err != nil { + t.Errorf("error in NewWriterDict: %s", err) + return + } + compressor.Write(input) + compressor.Close() + const expectedMaxSize = 25 + output := buf.Bytes() + if len(output) > expectedMaxSize { + t.Errorf("result too large (got %d, want <= %d bytes). Is the dictionary being used?", len(output), expectedMaxSize) + } +} |