summaryrefslogtreecommitdiff
path: root/src/pkg/compress/gzip
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/compress/gzip')
-rw-r--r--src/pkg/compress/gzip/Makefile12
-rw-r--r--src/pkg/compress/gzip/gunzip.go89
-rw-r--r--src/pkg/compress/gzip/gunzip_test.go9
-rw-r--r--src/pkg/compress/gzip/gzip.go100
-rw-r--r--src/pkg/compress/gzip/gzip_test.go207
5 files changed, 260 insertions, 157 deletions
diff --git a/src/pkg/compress/gzip/Makefile b/src/pkg/compress/gzip/Makefile
deleted file mode 100644
index b671fc72c..000000000
--- a/src/pkg/compress/gzip/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright 2009 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-include ../../../Make.inc
-
-TARG=compress/gzip
-GOFILES=\
- gunzip.go\
- gzip.go\
-
-include ../../../Make.pkg
diff --git a/src/pkg/compress/gzip/gunzip.go b/src/pkg/compress/gzip/gunzip.go
index 6ac9293d7..33736f635 100644
--- a/src/pkg/compress/gzip/gunzip.go
+++ b/src/pkg/compress/gzip/gunzip.go
@@ -9,15 +9,13 @@ package gzip
import (
"bufio"
"compress/flate"
+ "errors"
"hash"
"hash/crc32"
"io"
- "os"
+ "time"
)
-// BUG(nigeltao): Comments and Names don't properly map UTF-8 character codes outside of
-// the 0x00-0x7f range to ISO 8859-1 (Latin-1).
-
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
@@ -36,34 +34,38 @@ func makeReader(r io.Reader) flate.Reader {
return bufio.NewReader(r)
}
-var HeaderError = os.NewError("invalid gzip header")
-var ChecksumError = os.NewError("gzip checksum error")
+var (
+ // ErrChecksum is returned when reading GZIP data that has an invalid checksum.
+ ErrChecksum = errors.New("gzip: invalid checksum")
+ // ErrHeader is returned when reading GZIP data that has an invalid header.
+ ErrHeader = errors.New("gzip: invalid header")
+)
// The gzip file stores a header giving metadata about the compressed file.
-// That header is exposed as the fields of the Compressor and Decompressor structs.
+// That header is exposed as the fields of the Writer and Reader structs.
type Header struct {
- Comment string // comment
- Extra []byte // "extra data"
- Mtime uint32 // modification time (seconds since January 1, 1970)
- Name string // file name
- OS byte // operating system type
+ Comment string // comment
+ Extra []byte // "extra data"
+ ModTime time.Time // modification time
+ Name string // file name
+ OS byte // operating system type
}
-// An Decompressor is an io.Reader that can be read to retrieve
+// A Reader is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
-// each with its own header. Reads from the Decompressor
+// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
-// Only the first header is recorded in the Decompressor fields.
+// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
-// The Decompressor will return a ChecksumError when Read
+// The Reader will return a ErrChecksum when Read
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
-// returned by Read as tentative until they receive the successful
-// (zero length, nil error) Read marking the end of the data.
-type Decompressor struct {
+// returned by Read as tentative until they receive the io.EOF
+// marking the end of the data.
+type Reader struct {
Header
r flate.Reader
decompressor io.ReadCloser
@@ -71,18 +73,17 @@ type Decompressor struct {
size uint32
flg byte
buf [512]byte
- err os.Error
+ err error
}
-// NewReader creates a new Decompressor reading the given reader.
+// NewReader creates a new Reader reading the given reader.
// The implementation buffers input and may read more data than necessary from r.
-// It is the caller's responsibility to call Close on the Decompressor when done.
-func NewReader(r io.Reader) (*Decompressor, os.Error) {
- z := new(Decompressor)
+// It is the caller's responsibility to call Close on the Reader when done.
+func NewReader(r io.Reader) (*Reader, error) {
+ z := new(Reader)
z.r = makeReader(r)
z.digest = crc32.NewIEEE()
if err := z.readHeader(true); err != nil {
- z.err = err
return nil, err
}
return z, nil
@@ -93,26 +94,36 @@ func get4(p []byte) uint32 {
return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
}
-func (z *Decompressor) readString() (string, os.Error) {
- var err os.Error
+func (z *Reader) readString() (string, error) {
+ var err error
+ needconv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
- return "", HeaderError
+ return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
+ if z.buf[i] > 0x7f {
+ needconv = true
+ }
if z.buf[i] == 0 {
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
- // TODO(nigeltao): Convert from ISO 8859-1 (Latin-1) to UTF-8.
+ if needconv {
+ s := make([]rune, 0, i)
+ for _, v := range z.buf[0:i] {
+ s = append(s, rune(v))
+ }
+ return string(s), nil
+ }
return string(z.buf[0:i]), nil
}
}
panic("not reached")
}
-func (z *Decompressor) read2() (uint32, os.Error) {
+func (z *Reader) read2() (uint32, error) {
_, err := io.ReadFull(z.r, z.buf[0:2])
if err != nil {
return 0, err
@@ -120,17 +131,17 @@ func (z *Decompressor) read2() (uint32, os.Error) {
return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
}
-func (z *Decompressor) readHeader(save bool) os.Error {
+func (z *Reader) readHeader(save bool) error {
_, err := io.ReadFull(z.r, z.buf[0:10])
if err != nil {
return err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
- return HeaderError
+ return ErrHeader
}
z.flg = z.buf[3]
if save {
- z.Mtime = get4(z.buf[4:8])
+ z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
// z.buf[8] is xfl, ignored
z.OS = z.buf[9]
}
@@ -177,7 +188,7 @@ func (z *Decompressor) readHeader(save bool) os.Error {
}
sum := z.digest.Sum32() & 0xFFFF
if n != sum {
- return HeaderError
+ return ErrHeader
}
}
@@ -186,7 +197,7 @@ func (z *Decompressor) readHeader(save bool) os.Error {
return nil
}
-func (z *Decompressor) Read(p []byte) (n int, err os.Error) {
+func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
@@ -197,7 +208,7 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) {
n, err = z.decompressor.Read(p)
z.digest.Write(p[0:n])
z.size += uint32(n)
- if n != 0 || err != os.EOF {
+ if n != 0 || err != io.EOF {
z.err = err
return
}
@@ -210,7 +221,7 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) {
crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
sum := z.digest.Sum32()
if sum != crc32 || isize != z.size {
- z.err = ChecksumError
+ z.err = ErrChecksum
return 0, z.err
}
@@ -226,5 +237,5 @@ func (z *Decompressor) Read(p []byte) (n int, err os.Error) {
return z.Read(p)
}
-// Calling Close does not close the wrapped io.Reader originally passed to NewReader.
-func (z *Decompressor) Close() os.Error { return z.decompressor.Close() }
+// Close closes the Reader. It does not close the underlying io.Reader.
+func (z *Reader) Close() error { return z.decompressor.Close() }
diff --git a/src/pkg/compress/gzip/gunzip_test.go b/src/pkg/compress/gzip/gunzip_test.go
index 1c08c7374..a1333580d 100644
--- a/src/pkg/compress/gzip/gunzip_test.go
+++ b/src/pkg/compress/gzip/gunzip_test.go
@@ -7,7 +7,6 @@ package gzip
import (
"bytes"
"io"
- "os"
"testing"
)
@@ -16,7 +15,7 @@ type gunzipTest struct {
desc string
raw string
gzip []byte
- err os.Error
+ err error
}
var gunzipTests = []gunzipTest{
@@ -233,7 +232,7 @@ var gunzipTests = []gunzipTest{
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0x0c, 0x00,
0x00, 0x00, 'g', 'a', 'r', 'b', 'a', 'g', 'e', '!', '!', '!',
},
- HeaderError,
+ ErrHeader,
},
{ // has 1 non-empty fixed huffman block not enough header
"hello.txt",
@@ -261,7 +260,7 @@ var gunzipTests = []gunzipTest{
0x02, 0x00, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x00,
0x00, 0x00,
},
- ChecksumError,
+ ErrChecksum,
},
{ // has 1 non-empty fixed huffman block but corrupt size
"hello.txt",
@@ -275,7 +274,7 @@ var gunzipTests = []gunzipTest{
0x02, 0x00, 0x2d, 0x3b, 0x08, 0xaf, 0xff, 0x00,
0x00, 0x00,
},
- ChecksumError,
+ ErrChecksum,
},
}
diff --git a/src/pkg/compress/gzip/gzip.go b/src/pkg/compress/gzip/gzip.go
index 8860d10af..3035dfffc 100644
--- a/src/pkg/compress/gzip/gzip.go
+++ b/src/pkg/compress/gzip/gzip.go
@@ -6,10 +6,11 @@ package gzip
import (
"compress/flate"
+ "errors"
+ "fmt"
"hash"
"hash/crc32"
"io"
- "os"
)
// These constants are copied from the flate package, so that code that imports
@@ -21,9 +22,9 @@ const (
DefaultCompression = flate.DefaultCompression
)
-// A Compressor is an io.WriteCloser that satisfies writes by compressing data written
+// A Writer is an io.WriteCloser that satisfies writes by compressing data written
// to its wrapped io.Writer.
-type Compressor struct {
+type Writer struct {
Header
w io.Writer
level int
@@ -32,28 +33,43 @@ type Compressor struct {
size uint32
closed bool
buf [10]byte
- err os.Error
+ err error
}
-// NewWriter calls NewWriterLevel with the default compression level.
-func NewWriter(w io.Writer) (*Compressor, os.Error) {
- return NewWriterLevel(w, DefaultCompression)
+// NewWriter creates a new Writer that satisfies writes by compressing data
+// written to w.
+//
+// It is the caller's responsibility to call Close on the WriteCloser when done.
+// Writes may be buffered and not flushed until Close.
+//
+// Callers that wish to set the fields in Writer.Header must do so before
+// the first call to Write or Close. The Comment and Name header fields are
+// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO
+// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an
+// error on Write.
+func NewWriter(w io.Writer) *Writer {
+ z, _ := NewWriterLevel(w, DefaultCompression)
+ return z
}
-// NewWriterLevel creates a new Compressor writing to the given writer.
-// Writes may be buffered and not flushed until Close.
-// Callers that wish to set the fields in Compressor.Header must
-// do so before the first call to Write or Close.
-// It is the caller's responsibility to call Close on the WriteCloser when done.
-// level is the compression level, which can be DefaultCompression, NoCompression,
-// or any integer value between BestSpeed and BestCompression (inclusive).
-func NewWriterLevel(w io.Writer, level int) (*Compressor, os.Error) {
- z := new(Compressor)
- z.OS = 255 // unknown
- z.w = w
- z.level = level
- z.digest = crc32.NewIEEE()
- return z, nil
+// NewWriterLevel is like NewWriter but specifies the compression level instead
+// of assuming DefaultCompression.
+//
+// The compression level can be DefaultCompression, NoCompression, or any
+// integer value between BestSpeed and BestCompression inclusive. The error
+// returned will be nil if the level is valid.
+func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
+ if level < DefaultCompression || level > BestCompression {
+ return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
+ }
+ return &Writer{
+ Header: Header{
+ OS: 255, // unknown
+ },
+ w: w,
+ level: level,
+ digest: crc32.NewIEEE(),
+ }, nil
}
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
@@ -70,9 +86,9 @@ func put4(p []byte, v uint32) {
}
// writeBytes writes a length-prefixed byte slice to z.w.
-func (z *Compressor) writeBytes(b []byte) os.Error {
+func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
- return os.NewError("gzip.Write: Extra data is too large")
+ return errors.New("gzip.Write: Extra data is too large")
}
put2(z.buf[0:2], uint16(len(b)))
_, err := z.w.Write(z.buf[0:2])
@@ -83,16 +99,28 @@ func (z *Compressor) writeBytes(b []byte) os.Error {
return err
}
-// writeString writes a string (in ISO 8859-1 (Latin-1) format) to z.w.
-func (z *Compressor) writeString(s string) os.Error {
- // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
- // TODO(nigeltao): Convert from UTF-8 to ISO 8859-1 (Latin-1).
+// writeString writes a UTF-8 string s in GZIP's format to z.w.
+// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
+func (z *Writer) writeString(s string) (err error) {
+ // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
+ needconv := false
for _, v := range s {
- if v == 0 || v > 0x7f {
- return os.NewError("gzip.Write: non-ASCII header string")
+ if v == 0 || v > 0xff {
+ return errors.New("gzip.Write: non-Latin-1 header string")
+ }
+ if v > 0x7f {
+ needconv = true
+ }
+ }
+ if needconv {
+ b := make([]byte, 0, len(s))
+ for _, v := range s {
+ b = append(b, byte(v))
}
+ _, err = z.w.Write(b)
+ } else {
+ _, err = io.WriteString(z.w, s)
}
- _, err := io.WriteString(z.w, s)
if err != nil {
return err
}
@@ -102,7 +130,9 @@ func (z *Compressor) writeString(s string) os.Error {
return err
}
-func (z *Compressor) Write(p []byte) (int, os.Error) {
+// Write writes a compressed form of p to the underlying io.Writer. The
+// compressed bytes are not necessarily flushed until the Writer is closed.
+func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
@@ -122,7 +152,7 @@ func (z *Compressor) Write(p []byte) (int, os.Error) {
if z.Comment != "" {
z.buf[3] |= 0x10
}
- put4(z.buf[4:8], z.Mtime)
+ put4(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
@@ -153,7 +183,7 @@ func (z *Compressor) Write(p []byte) (int, os.Error) {
return n, z.err
}
}
- z.compressor = flate.NewWriter(z.w, z.level)
+ z.compressor, _ = flate.NewWriter(z.w, z.level)
}
z.size += uint32(len(p))
z.digest.Write(p)
@@ -161,8 +191,8 @@ func (z *Compressor) Write(p []byte) (int, os.Error) {
return n, z.err
}
-// Calling Close does not close the wrapped io.Writer originally passed to NewWriter.
-func (z *Compressor) Close() os.Error {
+// Close closes the Writer. It does not close the underlying io.Writer.
+func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go
index 121e627e6..6f7b59364 100644
--- a/src/pkg/compress/gzip/gzip_test.go
+++ b/src/pkg/compress/gzip/gzip_test.go
@@ -5,80 +5,155 @@
package gzip
import (
- "io"
+ "bufio"
+ "bytes"
"io/ioutil"
"testing"
+ "time"
)
-// pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the
-// writer end and cfunc at the reader end.
-func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) {
- piper, pipew := io.Pipe()
- defer piper.Close()
- go func() {
- defer pipew.Close()
- compressor, err := NewWriter(pipew)
- if err != nil {
- t.Fatalf("%v", err)
- }
- defer compressor.Close()
- dfunc(compressor)
- }()
- decompressor, err := NewReader(piper)
+// TestEmpty tests that an empty payload still forms a valid GZIP stream.
+func TestEmpty(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ if err := NewWriter(buf).Close(); err != nil {
+ t.Fatalf("Writer.Close: %v", err)
+ }
+
+ r, err := NewReader(buf)
if err != nil {
- t.Fatalf("%v", err)
+ t.Fatalf("NewReader: %v", err)
+ }
+ b, err := ioutil.ReadAll(r)
+ if err != nil {
+ t.Fatalf("ReadAll: %v", err)
+ }
+ if len(b) != 0 {
+ t.Fatalf("got %d bytes, want 0", len(b))
+ }
+ if err := r.Close(); err != nil {
+ t.Fatalf("Reader.Close: %v", err)
}
- defer decompressor.Close()
- cfunc(decompressor)
}
-// Tests that an empty payload still forms a valid GZIP stream.
-func TestEmpty(t *testing.T) {
- pipe(t,
- func(compressor *Compressor) {},
- func(decompressor *Decompressor) {
- b, err := ioutil.ReadAll(decompressor)
- if err != nil {
- t.Fatalf("%v", err)
- }
- if len(b) != 0 {
- t.Fatalf("did not read an empty slice")
- }
- })
+// TestRoundTrip tests that gzipping and then gunzipping is the identity
+// function.
+func TestRoundTrip(t *testing.T) {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+ w.Comment = "comment"
+ w.Extra = []byte("extra")
+ w.ModTime = time.Unix(1e8, 0)
+ w.Name = "name"
+ if _, err := w.Write([]byte("payload")); err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Writer.Close: %v", err)
+ }
+
+ r, err := NewReader(buf)
+ if err != nil {
+ t.Fatalf("NewReader: %v", err)
+ }
+ b, err := ioutil.ReadAll(r)
+ if err != nil {
+ t.Fatalf("ReadAll: %v", err)
+ }
+ if string(b) != "payload" {
+ t.Fatalf("payload is %q, want %q", string(b), "payload")
+ }
+ if r.Comment != "comment" {
+ t.Fatalf("comment is %q, want %q", r.Comment, "comment")
+ }
+ if string(r.Extra) != "extra" {
+ t.Fatalf("extra is %q, want %q", r.Extra, "extra")
+ }
+ if r.ModTime.Unix() != 1e8 {
+ t.Fatalf("mtime is %d, want %d", r.ModTime.Unix(), uint32(1e8))
+ }
+ if r.Name != "name" {
+ t.Fatalf("name is %q, want %q", r.Name, "name")
+ }
+ if err := r.Close(); err != nil {
+ t.Fatalf("Reader.Close: %v", err)
+ }
}
-// Tests that gzipping and then gunzipping is the identity function.
-func TestWriter(t *testing.T) {
- pipe(t,
- func(compressor *Compressor) {
- compressor.Comment = "comment"
- compressor.Extra = []byte("extra")
- compressor.Mtime = 1e8
- compressor.Name = "name"
- _, err := compressor.Write([]byte("payload"))
- if err != nil {
- t.Fatalf("%v", err)
- }
- },
- func(decompressor *Decompressor) {
- b, err := ioutil.ReadAll(decompressor)
- if err != nil {
- t.Fatalf("%v", err)
- }
- if string(b) != "payload" {
- t.Fatalf("payload is %q, want %q", string(b), "payload")
- }
- if decompressor.Comment != "comment" {
- t.Fatalf("comment is %q, want %q", decompressor.Comment, "comment")
- }
- if string(decompressor.Extra) != "extra" {
- t.Fatalf("extra is %q, want %q", decompressor.Extra, "extra")
- }
- if decompressor.Mtime != 1e8 {
- t.Fatalf("mtime is %d, want %d", decompressor.Mtime, uint32(1e8))
- }
- if decompressor.Name != "name" {
- t.Fatalf("name is %q, want %q", decompressor.Name, "name")
- }
- })
+// TestLatin1 tests the internal functions for converting to and from Latin-1.
+func TestLatin1(t *testing.T) {
+ latin1 := []byte{0xc4, 'u', 0xdf, 'e', 'r', 'u', 'n', 'g', 0}
+ utf8 := "Äußerung"
+ z := Reader{r: bufio.NewReader(bytes.NewBuffer(latin1))}
+ s, err := z.readString()
+ if err != nil {
+ t.Fatalf("readString: %v", err)
+ }
+ if s != utf8 {
+ t.Fatalf("read latin-1: got %q, want %q", s, utf8)
+ }
+
+ buf := bytes.NewBuffer(make([]byte, 0, len(latin1)))
+ c := Writer{w: buf}
+ if err = c.writeString(utf8); err != nil {
+ t.Fatalf("writeString: %v", err)
+ }
+ s = buf.String()
+ if s != string(latin1) {
+ t.Fatalf("write utf-8: got %q, want %q", s, string(latin1))
+ }
+}
+
+// TestLatin1RoundTrip tests that metadata that is representable in Latin-1
+// survives a round trip.
+func TestLatin1RoundTrip(t *testing.T) {
+ testCases := []struct {
+ name string
+ ok bool
+ }{
+ {"", true},
+ {"ASCII is OK", true},
+ {"unless it contains a NUL\x00", false},
+ {"no matter where \x00 occurs", false},
+ {"\x00\x00\x00", false},
+ {"Látin-1 also passes (U+00E1)", true},
+ {"but LĀtin Extended-A (U+0100) does not", false},
+ {"neither does 日本語", false},
+ {"invalid UTF-8 also \xffails", false},
+ {"\x00 as does Látin-1 with NUL", false},
+ }
+ for _, tc := range testCases {
+ buf := new(bytes.Buffer)
+
+ w := NewWriter(buf)
+ w.Name = tc.name
+ err := w.Close()
+ if (err == nil) != tc.ok {
+ t.Errorf("Writer.Close: name = %q, err = %v", tc.name, err)
+ continue
+ }
+ if !tc.ok {
+ continue
+ }
+
+ r, err := NewReader(buf)
+ if err != nil {
+ t.Errorf("NewReader: %v", err)
+ continue
+ }
+ _, err = ioutil.ReadAll(r)
+ if err != nil {
+ t.Errorf("ReadAll: %v", err)
+ continue
+ }
+ if r.Name != tc.name {
+ t.Errorf("name is %q, want %q", r.Name, tc.name)
+ continue
+ }
+ if err := r.Close(); err != nil {
+ t.Errorf("Reader.Close: %v", err)
+ continue
+ }
+ }
}