diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/compress/gzip | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/compress/gzip')
-rw-r--r-- | src/compress/gzip/gunzip.go | 287 | ||||
-rw-r--r-- | src/compress/gzip/gunzip_test.go | 410 | ||||
-rw-r--r-- | src/compress/gzip/gzip.go | 272 | ||||
-rw-r--r-- | src/compress/gzip/gzip_test.go | 231 | ||||
-rw-r--r-- | src/compress/gzip/testdata/issue6550.gz | bin | 0 -> 65536 bytes |
5 files changed, 1200 insertions, 0 deletions
diff --git a/src/compress/gzip/gunzip.go b/src/compress/gzip/gunzip.go new file mode 100644 index 000000000..72ee55c4f --- /dev/null +++ b/src/compress/gzip/gunzip.go @@ -0,0 +1,287 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "compress/flate" + "errors" + "hash" + "hash/crc32" + "io" + "time" +) + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +func makeReader(r io.Reader) flate.Reader { + if rr, ok := r.(flate.Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = errors.New("gzip: invalid checksum") + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = errors.New("gzip: invalid header") +) + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Writer and Reader structs. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + +// A Reader is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Reader +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Reader fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Reader will return a ErrChecksum when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { + Header + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + size uint32 + flg byte + buf [512]byte + err error + multistream bool +} + +// NewReader creates a new Reader reading the given reader. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the Reader when done. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) + z.r = makeReader(r) + z.multistream = true + z.digest = crc32.NewIEEE() + if err := z.readHeader(true); err != nil { + return nil, err + } + return z, nil +} + +// Reset discards the Reader z's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) error { + z.r = makeReader(r) + if z.digest == nil { + z.digest = crc32.NewIEEE() + } else { + z.digest.Reset() + } + z.size = 0 + z.err = nil + z.multistream = true + return z.readHeader(true) +} + +// Multistream controls whether the reader supports multistream files. +// +// If enabled (the default), the Reader expects the input to be a sequence +// of individually gzipped data streams, each with its own header and +// trailer, ending at EOF. The effect is that the concatenation of a sequence +// of gzipped files is treated as equivalent to the gzip of the concatenation +// of the sequence. This is standard behavior for gzip readers. +// +// Calling Multistream(false) disables this behavior; disabling the behavior +// can be useful when reading file formats that distinguish individual gzip +// data streams or mix gzip data streams with other data streams. +// In this mode, when the Reader reaches the end of the data stream, +// Read returns io.EOF. If the underlying reader implements io.ByteReader, +// it will be left positioned just after the gzip stream. +// To start the next stream, call z.Reset(r) followed by z.Multistream(false). +// If there is no next stream, z.Reset(r) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). +func get4(p []byte) uint32 { + return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 +} + +func (z *Reader) readString() (string, error) { + var err error + needconv := false + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", ErrHeader + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] > 0x7f { + needconv = true + } + if z.buf[i] == 0 { + // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). + if needconv { + s := make([]rune, 0, i) + for _, v := range z.buf[0:i] { + s = append(s, rune(v)) + } + return string(s), nil + } + return string(z.buf[0:i]), nil + } + } +} + +func (z *Reader) read2() (uint32, error) { + _, err := io.ReadFull(z.r, z.buf[0:2]) + if err != nil { + return 0, err + } + return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil +} + +func (z *Reader) readHeader(save bool) error { + _, err := io.ReadFull(z.r, z.buf[0:10]) + if err != nil { + return err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return ErrHeader + } + z.flg = z.buf[3] + if save { + z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0) + // z.buf[8] is xfl, ignored + z.OS = z.buf[9] + } + z.digest.Reset() + z.digest.Write(z.buf[0:10]) + + if z.flg&flagExtra != 0 { + n, err := z.read2() + if err != nil { + return err + } + data := make([]byte, n) + if _, err = io.ReadFull(z.r, data); err != nil { + return err + } + if save { + z.Extra = data + } + } + + var s string + if z.flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return err + } + if save { + z.Name = s + } + } + + if z.flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return err + } + if save { + z.Comment = s + } + } + + if z.flg&flagHdrCrc != 0 { + n, err := z.read2() + if err != nil { + return err + } + sum := z.digest.Sum32() & 0xFFFF + if n != sum { + return ErrHeader + } + } + + z.digest.Reset() + if z.decompressor == nil { + z.decompressor = flate.NewReader(z.r) + } else { + z.decompressor.(flate.Resetter).Reset(z.r, nil) + } + return nil +} + +func (z *Reader) Read(p []byte) (n int, err error) { + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + + n, err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + z.size += uint32(n) + if n != 0 || err != io.EOF { + z.err = err + return + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + z.err = err + return 0, err + } + crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8]) + sum := z.digest.Sum32() + if sum != crc32 || isize != z.size { + z.err = ErrChecksum + return 0, z.err + } + + // File is ok; is there another? + if !z.multistream { + return 0, io.EOF + } + + if err = z.readHeader(false); err != nil { + z.err = err + return + } + + // Yes. Reset and read from it. + z.digest.Reset() + z.size = 0 + return z.Read(p) +} + +// Close closes the Reader. It does not close the underlying io.Reader. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/src/compress/gzip/gunzip_test.go b/src/compress/gzip/gunzip_test.go new file mode 100644 index 000000000..0636dec9a --- /dev/null +++ b/src/compress/gzip/gunzip_test.go @@ -0,0 +1,410 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "bytes" + "io" + "io/ioutil" + "os" + "strings" + "testing" + "time" +) + +type gunzipTest struct { + name string + desc string + raw string + gzip []byte + err error +} + +var gunzipTests = []gunzipTest{ + { // has 1 empty fixed-huffman block + "empty.txt", + "empty.txt", + "", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xf7, 0x5e, 0x14, 0x4a, + 0x00, 0x03, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + nil, + }, + { // has 1 non-empty fixed huffman block + "hello.txt", + "hello.txt", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + }, + nil, + }, + { // concatenation + "hello.txt", + "hello.txt x2", + "hello world\n" + + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, + }, + nil, + }, + { // has a fixed huffman block with some length-distance pairs + "shesells.txt", + "shesells.txt", + "she sells seashells by the seashore\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0x72, 0x66, 0x8b, 0x4a, + 0x00, 0x03, 0x73, 0x68, 0x65, 0x73, 0x65, 0x6c, + 0x6c, 0x73, 0x2e, 0x74, 0x78, 0x74, 0x00, 0x2b, + 0xce, 0x48, 0x55, 0x28, 0x4e, 0xcd, 0xc9, 0x29, + 0x06, 0x92, 0x89, 0xc5, 0x19, 0x60, 0x56, 0x52, + 0xa5, 0x42, 0x09, 0x58, 0x18, 0x28, 0x90, 0x5f, + 0x94, 0xca, 0x05, 0x00, 0x76, 0xb0, 0x3b, 0xeb, + 0x24, 0x00, 0x00, 0x00, + }, + nil, + }, + { // has dynamic huffman blocks + "gettysburg", + "gettysburg", + " Four score and seven years ago our fathers brought forth on\n" + + "this continent, a new nation, conceived in Liberty, and dedicated\n" + + "to the proposition that all men are created equal.\n" + + " Now we are engaged in a great Civil War, testing whether that\n" + + "nation, or any nation so conceived and so dedicated, can long\n" + + "endure.\n" + + " We are met on a great battle-field of that war.\n" + + " We have come to dedicate a portion of that field, as a final\n" + + "resting place for those who here gave their lives that that\n" + + "nation might live. It is altogether fitting and proper that\n" + + "we should do this.\n" + + " But, in a larger sense, we can not dedicate — we can not\n" + + "consecrate — we can not hallow — this ground.\n" + + " The brave men, living and dead, who struggled here, have\n" + + "consecrated it, far above our poor power to add or detract.\n" + + "The world will little note, nor long remember what we say here,\n" + + "but it can never forget what they did here.\n" + + " It is for us the living, rather, to be dedicated here to the\n" + + "unfinished work which they who fought here have thus far so\n" + + "nobly advanced. It is rather for us to be here dedicated to\n" + + "the great task remaining before us — that from these honored\n" + + "dead we take increased devotion to that cause for which they\n" + + "gave the last full measure of devotion —\n" + + " that we here highly resolve that these dead shall not have\n" + + "died in vain — that this nation, under God, shall have a new\n" + + "birth of freedom — and that government of the people, by the\n" + + "people, for the people, shall not perish from this earth.\n" + + "\n" + + "Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xd1, 0x12, 0x2b, 0x4a, + 0x00, 0x03, 0x67, 0x65, 0x74, 0x74, 0x79, 0x73, + 0x62, 0x75, 0x72, 0x67, 0x00, 0x65, 0x54, 0xcd, + 0x6e, 0xd4, 0x30, 0x10, 0xbe, 0xfb, 0x29, 0xe6, + 0x01, 0x42, 0xa5, 0x0a, 0x09, 0xc1, 0x11, 0x90, + 0x40, 0x48, 0xa8, 0xe2, 0x80, 0xd4, 0xf3, 0x24, + 0x9e, 0x24, 0x56, 0xbd, 0x9e, 0xc5, 0x76, 0x76, + 0x95, 0x1b, 0x0f, 0xc1, 0x13, 0xf2, 0x24, 0x7c, + 0x63, 0x77, 0x9b, 0x4a, 0x5c, 0xaa, 0x6e, 0x6c, + 0xcf, 0x7c, 0x7f, 0x33, 0x44, 0x5f, 0x74, 0xcb, + 0x54, 0x26, 0xcd, 0x42, 0x9c, 0x3c, 0x15, 0xb9, + 0x48, 0xa2, 0x5d, 0x38, 0x17, 0xe2, 0x45, 0xc9, + 0x4e, 0x67, 0xae, 0xab, 0xe0, 0xf7, 0x98, 0x75, + 0x5b, 0xd6, 0x4a, 0xb3, 0xe6, 0xba, 0x92, 0x26, + 0x57, 0xd7, 0x50, 0x68, 0xd2, 0x54, 0x43, 0x92, + 0x54, 0x07, 0x62, 0x4a, 0x72, 0xa5, 0xc4, 0x35, + 0x68, 0x1a, 0xec, 0x60, 0x92, 0x70, 0x11, 0x4f, + 0x21, 0xd1, 0xf7, 0x30, 0x4a, 0xae, 0xfb, 0xd0, + 0x9a, 0x78, 0xf1, 0x61, 0xe2, 0x2a, 0xde, 0x55, + 0x25, 0xd4, 0xa6, 0x73, 0xd6, 0xb3, 0x96, 0x60, + 0xef, 0xf0, 0x9b, 0x2b, 0x71, 0x8c, 0x74, 0x02, + 0x10, 0x06, 0xac, 0x29, 0x8b, 0xdd, 0x25, 0xf9, + 0xb5, 0x71, 0xbc, 0x73, 0x44, 0x0f, 0x7a, 0xa5, + 0xab, 0xb4, 0x33, 0x49, 0x0b, 0x2f, 0xbd, 0x03, + 0xd3, 0x62, 0x17, 0xe9, 0x73, 0xb8, 0x84, 0x48, + 0x8f, 0x9c, 0x07, 0xaa, 0x52, 0x00, 0x6d, 0xa1, + 0xeb, 0x2a, 0xc6, 0xa0, 0x95, 0x76, 0x37, 0x78, + 0x9a, 0x81, 0x65, 0x7f, 0x46, 0x4b, 0x45, 0x5f, + 0xe1, 0x6d, 0x42, 0xe8, 0x01, 0x13, 0x5c, 0x38, + 0x51, 0xd4, 0xb4, 0x38, 0x49, 0x7e, 0xcb, 0x62, + 0x28, 0x1e, 0x3b, 0x82, 0x93, 0x54, 0x48, 0xf1, + 0xd2, 0x7d, 0xe4, 0x5a, 0xa3, 0xbc, 0x99, 0x83, + 0x44, 0x4f, 0x3a, 0x77, 0x36, 0x57, 0xce, 0xcf, + 0x2f, 0x56, 0xbe, 0x80, 0x90, 0x9e, 0x84, 0xea, + 0x51, 0x1f, 0x8f, 0xcf, 0x90, 0xd4, 0x60, 0xdc, + 0x5e, 0xb4, 0xf7, 0x10, 0x0b, 0x26, 0xe0, 0xff, + 0xc4, 0xd1, 0xe5, 0x67, 0x2e, 0xe7, 0xc8, 0x93, + 0x98, 0x05, 0xb8, 0xa8, 0x45, 0xc0, 0x4d, 0x09, + 0xdc, 0x84, 0x16, 0x2b, 0x0d, 0x9a, 0x21, 0x53, + 0x04, 0x8b, 0xd2, 0x0b, 0xbd, 0xa2, 0x4c, 0xa7, + 0x60, 0xee, 0xd9, 0xe1, 0x1d, 0xd1, 0xb7, 0x4a, + 0x30, 0x8f, 0x63, 0xd5, 0xa5, 0x8b, 0x33, 0x87, + 0xda, 0x1a, 0x18, 0x79, 0xf3, 0xe3, 0xa6, 0x17, + 0x94, 0x2e, 0xab, 0x6e, 0xa0, 0xe3, 0xcd, 0xac, + 0x50, 0x8c, 0xca, 0xa7, 0x0d, 0x76, 0x37, 0xd1, + 0x23, 0xe7, 0x05, 0x57, 0x8b, 0xa4, 0x22, 0x83, + 0xd9, 0x62, 0x52, 0x25, 0xad, 0x07, 0xbb, 0xbf, + 0xbf, 0xff, 0xbc, 0xfa, 0xee, 0x20, 0x73, 0x91, + 0x29, 0xff, 0x7f, 0x02, 0x71, 0x62, 0x84, 0xb5, + 0xf6, 0xb5, 0x25, 0x6b, 0x41, 0xde, 0x92, 0xb7, + 0x76, 0x3f, 0x91, 0x91, 0x31, 0x1b, 0x41, 0x84, + 0x62, 0x30, 0x0a, 0x37, 0xa4, 0x5e, 0x18, 0x3a, + 0x99, 0x08, 0xa5, 0xe6, 0x6d, 0x59, 0x22, 0xec, + 0x33, 0x39, 0x86, 0x26, 0xf5, 0xab, 0x66, 0xc8, + 0x08, 0x20, 0xcf, 0x0c, 0xd7, 0x47, 0x45, 0x21, + 0x0b, 0xf6, 0x59, 0xd5, 0xfe, 0x5c, 0x8d, 0xaa, + 0x12, 0x7b, 0x6f, 0xa1, 0xf0, 0x52, 0x33, 0x4f, + 0xf5, 0xce, 0x59, 0xd3, 0xab, 0x66, 0x10, 0xbf, + 0x06, 0xc4, 0x31, 0x06, 0x73, 0xd6, 0x80, 0xa2, + 0x78, 0xc2, 0x45, 0xcb, 0x03, 0x65, 0x39, 0xc9, + 0x09, 0xd1, 0x06, 0x04, 0x33, 0x1a, 0x5a, 0xf1, + 0xde, 0x01, 0xb8, 0x71, 0x83, 0xc4, 0xb5, 0xb3, + 0xc3, 0x54, 0x65, 0x33, 0x0d, 0x5a, 0xf7, 0x9b, + 0x90, 0x7c, 0x27, 0x1f, 0x3a, 0x58, 0xa3, 0xd8, + 0xfd, 0x30, 0x5f, 0xb7, 0xd2, 0x66, 0xa2, 0x93, + 0x1c, 0x28, 0xb7, 0xe9, 0x1b, 0x0c, 0xe1, 0x28, + 0x47, 0x26, 0xbb, 0xe9, 0x7d, 0x7e, 0xdc, 0x96, + 0x10, 0x92, 0x50, 0x56, 0x7c, 0x06, 0xe2, 0x27, + 0xb4, 0x08, 0xd3, 0xda, 0x7b, 0x98, 0x34, 0x73, + 0x9f, 0xdb, 0xf6, 0x62, 0xed, 0x31, 0x41, 0x13, + 0xd3, 0xa2, 0xa8, 0x4b, 0x3a, 0xc6, 0x1d, 0xe4, + 0x2f, 0x8c, 0xf8, 0xfb, 0x97, 0x64, 0xf4, 0xb6, + 0x2f, 0x80, 0x5a, 0xf3, 0x56, 0xe0, 0x40, 0x50, + 0xd5, 0x19, 0xd0, 0x1e, 0xfc, 0xca, 0xe5, 0xc9, + 0xd4, 0x60, 0x00, 0x81, 0x2e, 0xa3, 0xcc, 0xb6, + 0x52, 0xf0, 0xb4, 0xdb, 0x69, 0x99, 0xce, 0x7a, + 0x32, 0x4c, 0x08, 0xed, 0xaa, 0x10, 0x10, 0xe3, + 0x6f, 0xee, 0x99, 0x68, 0x95, 0x9f, 0x04, 0x71, + 0xb2, 0x49, 0x2f, 0x62, 0xa6, 0x5e, 0xb4, 0xef, + 0x02, 0xed, 0x4f, 0x27, 0xde, 0x4a, 0x0f, 0xfd, + 0xc1, 0xcc, 0xdd, 0x02, 0x8f, 0x08, 0x16, 0x54, + 0xdf, 0xda, 0xca, 0xe0, 0x82, 0xf1, 0xb4, 0x31, + 0x7a, 0xa9, 0x81, 0xfe, 0x90, 0xb7, 0x3e, 0xdb, + 0xd3, 0x35, 0xc0, 0x20, 0x80, 0x33, 0x46, 0x4a, + 0x63, 0xab, 0xd1, 0x0d, 0x29, 0xd2, 0xe2, 0x84, + 0xb8, 0xdb, 0xfa, 0xe9, 0x89, 0x44, 0x86, 0x7c, + 0xe8, 0x0b, 0xe6, 0x02, 0x6a, 0x07, 0x9b, 0x96, + 0xd0, 0xdb, 0x2e, 0x41, 0x4c, 0xa1, 0xd5, 0x57, + 0x45, 0x14, 0xfb, 0xe3, 0xa6, 0x72, 0x5b, 0x87, + 0x6e, 0x0c, 0x6d, 0x5b, 0xce, 0xe0, 0x2f, 0xe2, + 0x21, 0x81, 0x95, 0xb0, 0xe8, 0xb6, 0x32, 0x0b, + 0xb2, 0x98, 0x13, 0x52, 0x5d, 0xfb, 0xec, 0x63, + 0x17, 0x8a, 0x9e, 0x23, 0x22, 0x36, 0xee, 0xcd, + 0xda, 0xdb, 0xcf, 0x3e, 0xf1, 0xc7, 0xf1, 0x01, + 0x12, 0x93, 0x0a, 0xeb, 0x6f, 0xf2, 0x02, 0x15, + 0x96, 0x77, 0x5d, 0xef, 0x9c, 0xfb, 0x88, 0x91, + 0x59, 0xf9, 0x84, 0xdd, 0x9b, 0x26, 0x8d, 0x80, + 0xf9, 0x80, 0x66, 0x2d, 0xac, 0xf7, 0x1f, 0x06, + 0xba, 0x7f, 0xff, 0xee, 0xed, 0x40, 0x5f, 0xa5, + 0xd6, 0xbd, 0x8c, 0x5b, 0x46, 0xd2, 0x7e, 0x48, + 0x4a, 0x65, 0x8f, 0x08, 0x42, 0x60, 0xf7, 0x0f, + 0xb9, 0x16, 0x0b, 0x0c, 0x1a, 0x06, 0x00, 0x00, + }, + nil, + }, + { // has 1 non-empty fixed huffman block then garbage + "hello.txt", + "hello.txt + garbage", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!', + }, + ErrHeader, + }, + { // has 1 non-empty fixed huffman block not enough header + "hello.txt", + "hello.txt + garbage", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, + 0x00, 0x00, gzipID1, + }, + io.ErrUnexpectedEOF, + }, + { // has 1 non-empty fixed huffman block but corrupt checksum + "hello.txt", + "hello.txt + corrupt checksum", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00, + 0x00, 0x00, + }, + ErrChecksum, + }, + { // has 1 non-empty fixed huffman block but corrupt size + "hello.txt", + "hello.txt + corrupt size", + "hello world\n", + []byte{ + 0x1f, 0x8b, 0x08, 0x08, 0xc8, 0x58, 0x13, 0x4a, + 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xcb, 0x48, 0xcd, 0xc9, + 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00, + 0x00, 0x00, + }, + ErrChecksum, + }, +} + +func TestDecompressor(t *testing.T) { + b := new(bytes.Buffer) + for _, tt := range gunzipTests { + in := bytes.NewReader(tt.gzip) + gzip, err := NewReader(in) + if err != nil { + t.Errorf("%s: NewReader: %s", tt.name, err) + continue + } + defer gzip.Close() + if tt.name != gzip.Name { + t.Errorf("%s: got name %s", tt.name, gzip.Name) + } + b.Reset() + n, err := io.Copy(b, gzip) + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) + } + s := b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) + } + + // Test Reader Reset. + in = bytes.NewReader(tt.gzip) + err = gzip.Reset(in) + if err != nil { + t.Errorf("%s: Reset: %s", tt.name, err) + continue + } + if tt.name != gzip.Name { + t.Errorf("%s: got name %s", tt.name, gzip.Name) + } + b.Reset() + n, err = io.Copy(b, gzip) + if err != tt.err { + t.Errorf("%s: io.Copy: %v want %v", tt.name, err, tt.err) + } + s = b.String() + if s != tt.raw { + t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.name, n, s, len(tt.raw), tt.raw) + } + } +} + +func TestIssue6550(t *testing.T) { + f, err := os.Open("testdata/issue6550.gz") + if err != nil { + t.Fatal(err) + } + gzip, err := NewReader(f) + if err != nil { + t.Fatalf("NewReader(testdata/issue6550.gz): %v", err) + } + defer gzip.Close() + done := make(chan bool, 1) + go func() { + _, err := io.Copy(ioutil.Discard, gzip) + if err == nil { + t.Errorf("Copy succeeded") + } else { + t.Logf("Copy failed (correctly): %v", err) + } + done <- true + }() + select { + case <-time.After(1 * time.Second): + t.Errorf("Copy hung") + case <-done: + // ok + } +} + +func TestInitialReset(t *testing.T) { + var r Reader + if err := r.Reset(bytes.NewReader(gunzipTests[1].gzip)); err != nil { + t.Error(err) + } + var buf bytes.Buffer + if _, err := io.Copy(&buf, &r); err != nil { + t.Error(err) + } + if s := buf.String(); s != gunzipTests[1].raw { + t.Errorf("got %q want %q", s, gunzipTests[1].raw) + } +} + +func TestMultistreamFalse(t *testing.T) { + // Find concatenation test. + var tt gunzipTest + for _, tt = range gunzipTests { + if strings.HasSuffix(tt.desc, " x2") { + goto Found + } + } + t.Fatal("cannot find hello.txt x2 in gunzip tests") + +Found: + br := bytes.NewReader(tt.gzip) + var r Reader + if err := r.Reset(br); err != nil { + t.Fatalf("first reset: %v", err) + } + + // Expect two streams with "hello world\n", then real EOF. + const hello = "hello world\n" + + r.Multistream(false) + data, err := ioutil.ReadAll(&r) + if string(data) != hello || err != nil { + t.Fatalf("first stream = %q, %v, want %q, %v", string(data), err, hello, nil) + } + + if err := r.Reset(br); err != nil { + t.Fatalf("second reset: %v", err) + } + r.Multistream(false) + data, err = ioutil.ReadAll(&r) + if string(data) != hello || err != nil { + t.Fatalf("second stream = %q, %v, want %q, %v", string(data), err, hello, nil) + } + + if err := r.Reset(br); err != io.EOF { + t.Fatalf("third reset: err=%v, want io.EOF", err) + } +} diff --git a/src/compress/gzip/gzip.go b/src/compress/gzip/gzip.go new file mode 100644 index 000000000..5131d128e --- /dev/null +++ b/src/compress/gzip/gzip.go @@ -0,0 +1,272 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "compress/flate" + "errors" + "fmt" + "hash" + "hash/crc32" + "io" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression +) + +// A Writer is an io.WriteCloser. +// Writes to a Writer are compressed and written to w. +type Writer struct { + Header + w io.Writer + level int + wroteHeader bool + compressor *flate.Writer + digest hash.Hash32 + size uint32 + closed bool + buf [10]byte + err error +} + +// NewWriter returns a new Writer. +// Writes to the returned writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write or Close. The Comment and Name header fields are +// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO +// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an +// error on Write. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < DefaultCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + z := new(Writer) + z.init(w, level) + return z, nil +} + +func (z *Writer) init(w io.Writer, level int) { + digest := z.digest + if digest != nil { + digest.Reset() + } else { + digest = crc32.NewIEEE() + } + compressor := z.compressor + if compressor != nil { + compressor.Reset(w) + } + *z = Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + digest: digest, + compressor: compressor, + } +} + +// Reset discards the Writer z's state and makes it equivalent to the +// result of its original state from NewWriter or NewWriterLevel, but +// writing to w instead. This permits reusing a Writer rather than +// allocating a new one. +func (z *Writer) Reset(w io.Writer) { + z.init(w, z.level) +} + +// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). +func put2(p []byte, v uint16) { + p[0] = uint8(v >> 0) + p[1] = uint8(v >> 8) +} + +func put4(p []byte, v uint32) { + p[0] = uint8(v >> 0) + p[1] = uint8(v >> 8) + p[2] = uint8(v >> 16) + p[3] = uint8(v >> 24) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Writer) writeBytes(b []byte) error { + if len(b) > 0xffff { + return errors.New("gzip.Write: Extra data is too large") + } + put2(z.buf[0:2], uint16(len(b))) + _, err := z.w.Write(z.buf[0:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false + for _, v := range s { + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) + } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) + } + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[0:1]) + return err +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if !z.wroteHeader { + z.wroteHeader = true + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + put4(z.buf[4:8], uint32(z.ModTime.Unix())) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[0:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + if z.compressor == nil { + z.compressor, _ = flate.NewWriter(z.w, z.level) + } + } + z.size += uint32(len(p)) + z.digest.Write(p) + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Flush flushes any pending compressed data to the underlying writer. +// +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. Flush does +// not return until the data has been written. If the underlying +// writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (z *Writer) Flush() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + put4(z.buf[0:4], z.digest.Sum32()) + put4(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[0:8]) + return z.err +} diff --git a/src/compress/gzip/gzip_test.go b/src/compress/gzip/gzip_test.go new file mode 100644 index 000000000..09271b24e --- /dev/null +++ b/src/compress/gzip/gzip_test.go @@ -0,0 +1,231 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "bufio" + "bytes" + "io/ioutil" + "testing" + "time" +) + +// TestEmpty tests that an empty payload still forms a valid GZIP stream. +func TestEmpty(t *testing.T) { + buf := new(bytes.Buffer) + + if err := NewWriter(buf).Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + + r, err := NewReader(buf) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + b, err := ioutil.ReadAll(r) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if len(b) != 0 { + t.Fatalf("got %d bytes, want 0", len(b)) + } + if err := r.Close(); err != nil { + t.Fatalf("Reader.Close: %v", err) + } +} + +// TestRoundTrip tests that gzipping and then gunzipping is the identity +// function. +func TestRoundTrip(t *testing.T) { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w.Comment = "comment" + w.Extra = []byte("extra") + w.ModTime = time.Unix(1e8, 0) + w.Name = "name" + if _, err := w.Write([]byte("payload")); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + + r, err := NewReader(buf) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + b, err := ioutil.ReadAll(r) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if string(b) != "payload" { + t.Fatalf("payload is %q, want %q", string(b), "payload") + } + if r.Comment != "comment" { + t.Fatalf("comment is %q, want %q", r.Comment, "comment") + } + if string(r.Extra) != "extra" { + t.Fatalf("extra is %q, want %q", r.Extra, "extra") + } + if r.ModTime.Unix() != 1e8 { + t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8)) + } + if r.Name != "name" { + t.Fatalf("name is %q, want %q", r.Name, "name") + } + if err := r.Close(); err != nil { + t.Fatalf("Reader.Close: %v", err) + } +} + +// TestLatin1 tests the internal functions for converting to and from Latin-1. +func TestLatin1(t *testing.T) { + latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0} + utf8 := "Äußerung" + z := Reader{r: bufio.NewReader(bytes.NewReader(latin1))} + s, err := z.readString() + if err != nil { + t.Fatalf("readString: %v", err) + } + if s != utf8 { + t.Fatalf("read latin-1: got %q, want %q", s, utf8) + } + + buf := bytes.NewBuffer(make([]byte, 0, len(latin1))) + c := Writer{w: buf} + if err = c.writeString(utf8); err != nil { + t.Fatalf("writeString: %v", err) + } + s = buf.String() + if s != string(latin1) { + t.Fatalf("write utf-8: got %q, want %q", s, string(latin1)) + } +} + +// TestLatin1RoundTrip tests that metadata that is representable in Latin-1 +// survives a round trip. +func TestLatin1RoundTrip(t *testing.T) { + testCases := []struct { + name string + ok bool + }{ + {"", true}, + {"ASCII is OK", true}, + {"unless it contains a NUL\x00", false}, + {"no matter where \x00 occurs", false}, + {"\x00\x00\x00", false}, + {"Látin-1 also passes (U+00E1)", true}, + {"but LĀtin Extended-A (U+0100) does not", false}, + {"neither does 日本語", false}, + {"invalid UTF-8 also \xffails", false}, + {"\x00 as does Látin-1 with NUL", false}, + } + for _, tc := range testCases { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w.Name = tc.name + err := w.Close() + if (err == nil) != tc.ok { + t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err) + continue + } + if !tc.ok { + continue + } + + r, err := NewReader(buf) + if err != nil { + t.Errorf("NewReader: %v", err) + continue + } + _, err = ioutil.ReadAll(r) + if err != nil { + t.Errorf("ReadAll: %v", err) + continue + } + if r.Name != tc.name { + t.Errorf("name is %q, want %q", r.Name, tc.name) + continue + } + if err := r.Close(); err != nil { + t.Errorf("Reader.Close: %v", err) + continue + } + } +} + +func TestWriterFlush(t *testing.T) { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w.Comment = "comment" + w.Extra = []byte("extra") + w.ModTime = time.Unix(1e8, 0) + w.Name = "name" + + n0 := buf.Len() + if n0 != 0 { + t.Fatalf("buffer size = %d before writes; want 0", n0) + } + + if err := w.Flush(); err != nil { + t.Fatal(err) + } + + n1 := buf.Len() + if n1 == 0 { + t.Fatal("no data after first flush") + } + + w.Write([]byte("x")) + + n2 := buf.Len() + if n1 != n2 { + t.Fatalf("after writing a single byte, size changed from %d to %d; want no change", n1, n2) + } + + if err := w.Flush(); err != nil { + t.Fatal(err) + } + + n3 := buf.Len() + if n2 == n3 { + t.Fatal("Flush didn't flush any data") + } +} + +// Multiple gzip files concatenated form a valid gzip file. +func TestConcat(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(&buf) + w.Write([]byte("hello ")) + w.Close() + w = NewWriter(&buf) + w.Write([]byte("world\n")) + w.Close() + + r, err := NewReader(&buf) + data, err := ioutil.ReadAll(r) + if string(data) != "hello world\n" || err != nil { + t.Fatalf("ReadAll = %q, %v, want %q, nil", data, err, "hello world") + } +} + +func TestWriterReset(t *testing.T) { + buf := new(bytes.Buffer) + buf2 := new(bytes.Buffer) + z := NewWriter(buf) + msg := []byte("hello world") + z.Write(msg) + z.Close() + z.Reset(buf2) + z.Write(msg) + z.Close() + if buf.String() != buf2.String() { + t.Errorf("buf2 %q != original buf of %q", buf2.String(), buf.String()) + } +} diff --git a/src/compress/gzip/testdata/issue6550.gz b/src/compress/gzip/testdata/issue6550.gz Binary files differnew file mode 100644 index 000000000..57972b636 --- /dev/null +++ b/src/compress/gzip/testdata/issue6550.gz |