diff options
Diffstat (limited to 'src/pkg/compress/gzip')
-rw-r--r-- | src/pkg/compress/gzip/Makefile | 12 | ||||
-rw-r--r-- | src/pkg/compress/gzip/gunzip.go | 89 | ||||
-rw-r--r-- | src/pkg/compress/gzip/gunzip_test.go | 9 | ||||
-rw-r--r-- | src/pkg/compress/gzip/gzip.go | 100 | ||||
-rw-r--r-- | src/pkg/compress/gzip/gzip_test.go | 207 |
5 files changed, 260 insertions, 157 deletions
diff --git a/src/pkg/compress/gzip/Makefile b/src/pkg/compress/gzip/Makefile deleted file mode 100644 index b671fc72c..000000000 --- a/src/pkg/compress/gzip/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2009 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -include ../../../Make.inc - -TARG=compress/gzip -GOFILES=\ - gunzip.go\ - gzip.go\ - -include ../../../Make.pkg diff --git a/src/pkg/compress/gzip/gunzip.go b/src/pkg/compress/gzip/gunzip.go index 6ac9293d7..33736f635 100644 --- a/src/pkg/compress/gzip/gunzip.go +++ b/src/pkg/compress/gzip/gunzip.go @@ -9,15 +9,13 @@ package gzip import ( "bufio" "compress/flate" + "errors" "hash" "hash/crc32" "io" - "os" + "time" ) -// BUG(nigeltao): Comments and Names don't properly map UTF-8 character codes outside of -// the 0x00-0x7f range to ISO 8859-1 (Latin-1). - const ( gzipID1 = 0x1f gzipID2 = 0x8b @@ -36,34 +34,38 @@ func makeReader(r io.Reader) flate.Reader { return bufio.NewReader(r) } -var HeaderError = os.NewError("invalid gzip header") -var ChecksumError = os.NewError("gzip checksum error") +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = errors.New("gzip: invalid checksum") + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = errors.New("gzip: invalid header") +) // The gzip file stores a header giving metadata about the compressed file. -// That header is exposed as the fields of the Compressor and Decompressor structs. +// That header is exposed as the fields of the Writer and Reader structs. type Header struct { - Comment string // comment - Extra []byte // "extra data" - Mtime uint32 // modification time (seconds since January 1, 1970) - Name string // file name - OS byte // operating system type + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type } -// An Decompressor is an io.Reader that can be read to retrieve +// A Reader is an io.Reader that can be read to retrieve // uncompressed data from a gzip-format compressed file. // // In general, a gzip file can be a concatenation of gzip files, -// each with its own header. Reads from the Decompressor +// each with its own header. Reads from the Reader // return the concatenation of the uncompressed data of each. -// Only the first header is recorded in the Decompressor fields. +// Only the first header is recorded in the Reader fields. // // Gzip files store a length and checksum of the uncompressed data. -// The Decompressor will return a ChecksumError when Read +// The Reader will return a ErrChecksum when Read // reaches the end of the uncompressed data if it does not // have the expected length or checksum. Clients should treat data -// returned by Read as tentative until they receive the successful -// (zero length, nil error) Read marking the end of the data. -type Decompressor struct { +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { Header r flate.Reader decompressor io.ReadCloser @@ -71,18 +73,17 @@ type Decompressor struct { size uint32 flg byte buf [512]byte - err os.Error + err error } -// NewReader creates a new Decompressor reading the given reader. +// NewReader creates a new Reader reading the given reader. // The implementation buffers input and may read more data than necessary from r. -// It is the caller's responsibility to call Close on the Decompressor when done. -func NewReader(r io.Reader) (*Decompressor, os.Error) { - z := new(Decompressor) +// It is the caller's responsibility to call Close on the Reader when done. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) z.r = makeReader(r) z.digest = crc32.NewIEEE() if err := z.readHeader(true); err != nil { - z.err = err return nil, err } return z, nil @@ -93,26 +94,36 @@ func get4(p []byte) uint32 { return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 } -func (z *Decompressor) readString() (string, os.Error) { - var err os.Error +func (z *Reader) readString() (string, error) { + var err error + needconv := false for i := 0; ; i++ { if i >= len(z.buf) { - return "", HeaderError + return "", ErrHeader } z.buf[i], err = z.r.ReadByte() if err != nil { return "", err } + if z.buf[i] > 0x7f { + needconv = true + } if z.buf[i] == 0 { // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). - // TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8. + if needconv { + s := make([]rune, 0, i) + for _, v := range z.buf[0:i] { + s = append(s, rune(v)) + } + return string(s), nil + } return string(z.buf[0:i]), nil } } panic("not reached") } -func (z *Decompressor) read2() (uint32, os.Error) { +func (z *Reader) read2() (uint32, error) { _, err := io.ReadFull(z.r, z.buf[0:2]) if err != nil { return 0, err @@ -120,17 +131,17 @@ func (z *Decompressor) read2() (uint32, os.Error) { return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil } -func (z *Decompressor) readHeader(save bool) os.Error { +func (z *Reader) readHeader(save bool) error { _, err := io.ReadFull(z.r, z.buf[0:10]) if err != nil { return err } if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { - return HeaderError + return ErrHeader } z.flg = z.buf[3] if save { - z.Mtime = get4(z.buf[4:8]) + z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0) // z.buf[8] is xfl, ignored z.OS = z.buf[9] } @@ -177,7 +188,7 @@ func (z *Decompressor) readHeader(save bool) os.Error { } sum := z.digest.Sum32() & 0xFFFF if n != sum { - return HeaderError + return ErrHeader } } @@ -186,7 +197,7 @@ func (z *Decompressor) readHeader(save bool) os.Error { return nil } -func (z *Decompressor) Read(p []byte) (n int, err os.Error) { +func (z *Reader) Read(p []byte) (n int, err error) { if z.err != nil { return 0, z.err } @@ -197,7 +208,7 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) { n, err = z.decompressor.Read(p) z.digest.Write(p[0:n]) z.size += uint32(n) - if n != 0 || err != os.EOF { + if n != 0 || err != io.EOF { z.err = err return } @@ -210,7 +221,7 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) { crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8]) sum := z.digest.Sum32() if sum != crc32 || isize != z.size { - z.err = ChecksumError + z.err = ErrChecksum return 0, z.err } @@ -226,5 +237,5 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) { return z.Read(p) } -// Calling Close does not close the wrapped io.Reader originally passed to NewReader. -func (z *Decompressor) Close() os.Error { return z.decompressor.Close() } +// Close closes the Reader. It does not close the underlying io.Reader. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/src/pkg/compress/gzip/gunzip_test.go b/src/pkg/compress/gzip/gunzip_test.go index 1c08c7374..a1333580d 100644 --- a/src/pkg/compress/gzip/gunzip_test.go +++ b/src/pkg/compress/gzip/gunzip_test.go @@ -7,7 +7,6 @@ package gzip import ( "bytes" "io" - "os" "testing" ) @@ -16,7 +15,7 @@ type gunzipTest struct { desc string raw string gzip []byte - err os.Error + err error } var gunzipTests = []gunzipTest{ @@ -233,7 +232,7 @@ var gunzipTests = []gunzipTest{ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00, 0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!', }, - HeaderError, + ErrHeader, }, { // has 1 non-empty fixed huffman block not enough header "hello.txt", @@ -261,7 +260,7 @@ var gunzipTests = []gunzipTest{ 0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, }, - ChecksumError, + ErrChecksum, }, { // has 1 non-empty fixed huffman block but corrupt size "hello.txt", @@ -275,7 +274,7 @@ var gunzipTests = []gunzipTest{ 0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00, 0x00, 0x00, }, - ChecksumError, + ErrChecksum, }, } diff --git a/src/pkg/compress/gzip/gzip.go b/src/pkg/compress/gzip/gzip.go index 8860d10af..3035dfffc 100644 --- a/src/pkg/compress/gzip/gzip.go +++ b/src/pkg/compress/gzip/gzip.go @@ -6,10 +6,11 @@ package gzip import ( "compress/flate" + "errors" + "fmt" "hash" "hash/crc32" "io" - "os" ) // These constants are copied from the flate package, so that code that imports @@ -21,9 +22,9 @@ const ( DefaultCompression = flate.DefaultCompression ) -// A Compressor is an io.WriteCloser that satisfies writes by compressing data written +// A Writer is an io.WriteCloser that satisfies writes by compressing data written // to its wrapped io.Writer. -type Compressor struct { +type Writer struct { Header w io.Writer level int @@ -32,28 +33,43 @@ type Compressor struct { size uint32 closed bool buf [10]byte - err os.Error + err error } -// NewWriter calls NewWriterLevel with the default compression level. -func NewWriter(w io.Writer) (*Compressor, os.Error) { - return NewWriterLevel(w, DefaultCompression) +// NewWriter creates a new Writer that satisfies writes by compressing data +// written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write or Close. The Comment and Name header fields are +// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO +// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an +// error on Write. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z } -// NewWriterLevel creates a new Compressor writing to the given writer. -// Writes may be buffered and not flushed until Close. -// Callers that wish to set the fields in Compressor.Header must -// do so before the first call to Write or Close. -// It is the caller's responsibility to call Close on the WriteCloser when done. -// level is the compression level, which can be DefaultCompression, NoCompression, -// or any integer value between BestSpeed and BestCompression (inclusive). -func NewWriterLevel(w io.Writer, level int) (*Compressor, os.Error) { - z := new(Compressor) - z.OS = 255 // unknown - z.w = w - z.level = level - z.digest = crc32.NewIEEE() - return z, nil +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < DefaultCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + return &Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + digest: crc32.NewIEEE(), + }, nil } // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950). @@ -70,9 +86,9 @@ func put4(p []byte, v uint32) { } // writeBytes writes a length-prefixed byte slice to z.w. -func (z *Compressor) writeBytes(b []byte) os.Error { +func (z *Writer) writeBytes(b []byte) error { if len(b) > 0xffff { - return os.NewError("gzip.Write: Extra data is too large") + return errors.New("gzip.Write: Extra data is too large") } put2(z.buf[0:2], uint16(len(b))) _, err := z.w.Write(z.buf[0:2]) @@ -83,16 +99,28 @@ func (z *Compressor) writeBytes(b []byte) os.Error { return err } -// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w. -func (z *Compressor) writeString(s string) os.Error { - // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). - // TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1). +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false for _, v := range s { - if v == 0 || v > 0x7f { - return os.NewError("gzip.Write: non-ASCII header string") + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) } - _, err := io.WriteString(z.w, s) if err != nil { return err } @@ -102,7 +130,9 @@ func (z *Compressor) writeString(s string) os.Error { return err } -func (z *Compressor) Write(p []byte) (int, os.Error) { +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { if z.err != nil { return 0, z.err } @@ -122,7 +152,7 @@ func (z *Compressor) Write(p []byte) (int, os.Error) { if z.Comment != "" { z.buf[3] |= 0x10 } - put4(z.buf[4:8], z.Mtime) + put4(z.buf[4:8], uint32(z.ModTime.Unix())) if z.level == BestCompression { z.buf[8] = 2 } else if z.level == BestSpeed { @@ -153,7 +183,7 @@ func (z *Compressor) Write(p []byte) (int, os.Error) { return n, z.err } } - z.compressor = flate.NewWriter(z.w, z.level) + z.compressor, _ = flate.NewWriter(z.w, z.level) } z.size += uint32(len(p)) z.digest.Write(p) @@ -161,8 +191,8 @@ func (z *Compressor) Write(p []byte) (int, os.Error) { return n, z.err } -// Calling Close does not close the wrapped io.Writer originally passed to NewWriter. -func (z *Compressor) Close() os.Error { +// Close closes the Writer. It does not close the underlying io.Writer. +func (z *Writer) Close() error { if z.err != nil { return z.err } diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go index 121e627e6..6f7b59364 100644 --- a/src/pkg/compress/gzip/gzip_test.go +++ b/src/pkg/compress/gzip/gzip_test.go @@ -5,80 +5,155 @@ package gzip import ( - "io" + "bufio" + "bytes" "io/ioutil" "testing" + "time" ) -// pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the -// writer end and cfunc at the reader end. -func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) { - piper, pipew := io.Pipe() - defer piper.Close() - go func() { - defer pipew.Close() - compressor, err := NewWriter(pipew) - if err != nil { - t.Fatalf("%v", err) - } - defer compressor.Close() - dfunc(compressor) - }() - decompressor, err := NewReader(piper) +// TestEmpty tests that an empty payload still forms a valid GZIP stream. +func TestEmpty(t *testing.T) { + buf := new(bytes.Buffer) + + if err := NewWriter(buf).Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + + r, err := NewReader(buf) if err != nil { - t.Fatalf("%v", err) + t.Fatalf("NewReader: %v", err) + } + b, err := ioutil.ReadAll(r) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if len(b) != 0 { + t.Fatalf("got %d bytes, want 0", len(b)) + } + if err := r.Close(); err != nil { + t.Fatalf("Reader.Close: %v", err) } - defer decompressor.Close() - cfunc(decompressor) } -// Tests that an empty payload still forms a valid GZIP stream. -func TestEmpty(t *testing.T) { - pipe(t, - func(compressor *Compressor) {}, - func(decompressor *Decompressor) { - b, err := ioutil.ReadAll(decompressor) - if err != nil { - t.Fatalf("%v", err) - } - if len(b) != 0 { - t.Fatalf("did not read an empty slice") - } - }) +// TestRoundTrip tests that gzipping and then gunzipping is the identity +// function. +func TestRoundTrip(t *testing.T) { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w.Comment = "comment" + w.Extra = []byte("extra") + w.ModTime = time.Unix(1e8, 0) + w.Name = "name" + if _, err := w.Write([]byte("payload")); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + + r, err := NewReader(buf) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + b, err := ioutil.ReadAll(r) + if err != nil { + t.Fatalf("ReadAll: %v", err) + } + if string(b) != "payload" { + t.Fatalf("payload is %q, want %q", string(b), "payload") + } + if r.Comment != "comment" { + t.Fatalf("comment is %q, want %q", r.Comment, "comment") + } + if string(r.Extra) != "extra" { + t.Fatalf("extra is %q, want %q", r.Extra, "extra") + } + if r.ModTime.Unix() != 1e8 { + t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8)) + } + if r.Name != "name" { + t.Fatalf("name is %q, want %q", r.Name, "name") + } + if err := r.Close(); err != nil { + t.Fatalf("Reader.Close: %v", err) + } } -// Tests that gzipping and then gunzipping is the identity function. -func TestWriter(t *testing.T) { - pipe(t, - func(compressor *Compressor) { - compressor.Comment = "comment" - compressor.Extra = []byte("extra") - compressor.Mtime = 1e8 - compressor.Name = "name" - _, err := compressor.Write([]byte("payload")) - if err != nil { - t.Fatalf("%v", err) - } - }, - func(decompressor *Decompressor) { - b, err := ioutil.ReadAll(decompressor) - if err != nil { - t.Fatalf("%v", err) - } - if string(b) != "payload" { - t.Fatalf("payload is %q, want %q", string(b), "payload") - } - if decompressor.Comment != "comment" { - t.Fatalf("comment is %q, want %q", decompressor.Comment, "comment") - } - if string(decompressor.Extra) != "extra" { - t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra") - } - if decompressor.Mtime != 1e8 { - t.Fatalf("mtime is %d, want %d", decompressor.Mtime, uint32(1e8)) - } - if decompressor.Name != "name" { - t.Fatalf("name is %q, want %q", decompressor.Name, "name") - } - }) +// TestLatin1 tests the internal functions for converting to and from Latin-1. +func TestLatin1(t *testing.T) { + latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0} + utf8 := "Äußerung" + z := Reader{r: bufio.NewReader(bytes.NewBuffer(latin1))} + s, err := z.readString() + if err != nil { + t.Fatalf("readString: %v", err) + } + if s != utf8 { + t.Fatalf("read latin-1: got %q, want %q", s, utf8) + } + + buf := bytes.NewBuffer(make([]byte, 0, len(latin1))) + c := Writer{w: buf} + if err = c.writeString(utf8); err != nil { + t.Fatalf("writeString: %v", err) + } + s = buf.String() + if s != string(latin1) { + t.Fatalf("write utf-8: got %q, want %q", s, string(latin1)) + } +} + +// TestLatin1RoundTrip tests that metadata that is representable in Latin-1 +// survives a round trip. +func TestLatin1RoundTrip(t *testing.T) { + testCases := []struct { + name string + ok bool + }{ + {"", true}, + {"ASCII is OK", true}, + {"unless it contains a NUL\x00", false}, + {"no matter where \x00 occurs", false}, + {"\x00\x00\x00", false}, + {"Látin-1 also passes (U+00E1)", true}, + {"but LĀtin Extended-A (U+0100) does not", false}, + {"neither does 日本語", false}, + {"invalid UTF-8 also \xffails", false}, + {"\x00 as does Látin-1 with NUL", false}, + } + for _, tc := range testCases { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w.Name = tc.name + err := w.Close() + if (err == nil) != tc.ok { + t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err) + continue + } + if !tc.ok { + continue + } + + r, err := NewReader(buf) + if err != nil { + t.Errorf("NewReader: %v", err) + continue + } + _, err = ioutil.ReadAll(r) + if err != nil { + t.Errorf("ReadAll: %v", err) + continue + } + if r.Name != tc.name { + t.Errorf("name is %q, want %q", r.Name, tc.name) + continue + } + if err := r.Close(); err != nil { + t.Errorf("Reader.Close: %v", err) + continue + } + } } |