diff options
Diffstat (limited to 'src/pkg/archive')
26 files changed, 1968 insertions, 0 deletions
diff --git a/src/pkg/archive/tar/Makefile b/src/pkg/archive/tar/Makefile new file mode 100644 index 000000000..8897e883e --- /dev/null +++ b/src/pkg/archive/tar/Makefile @@ -0,0 +1,13 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=archive/tar +GOFILES=\ + common.go\ + reader.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/archive/tar/common.go b/src/pkg/archive/tar/common.go new file mode 100644 index 000000000..528858765 --- /dev/null +++ b/src/pkg/archive/tar/common.go @@ -0,0 +1,75 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tar implements access to tar archives. +// It aims to cover most of the variations, including those produced +// by GNU and BSD tars. +// +// References: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// http://www.gnu.org/software/tar/manual/html_node/Standard.html +package tar + +const ( + blockSize = 512 + + // Types + TypeReg = '0' + TypeRegA = '\x00' + TypeLink = '1' + TypeSymlink = '2' + TypeChar = '3' + TypeBlock = '4' + TypeDir = '5' + TypeFifo = '6' + TypeCont = '7' + TypeXHeader = 'x' + TypeXGlobalHeader = 'g' +) + +// A Header represents a single header in a tar archive. +// Some fields may not be populated. +type Header struct { + Name string + Mode int64 + Uid int + Gid int + Size int64 + Mtime int64 + Typeflag byte + Linkname string + Uname string + Gname string + Devmajor int64 + Devminor int64 + Atime int64 + Ctime int64 +} + +var zeroBlock = make([]byte, blockSize) + +// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. +// We compute and return both. +func checksum(header []byte) (unsigned int64, signed int64) { + for i := 0; i < len(header); i++ { + if i == 148 { + // The chksum field (header[148:156]) is special: it should be treated as space bytes. + unsigned += ' ' * 8 + signed += ' ' * 8 + i += 7 + continue + } + unsigned += int64(header[i]) + signed += int64(int8(header[i])) + } + return +} + +type slicer []byte + +func (sp *slicer) next(n int) (b []byte) { + s := *sp + b, *sp = s[0:n], s[n:] + return +} diff --git a/src/pkg/archive/tar/reader.go b/src/pkg/archive/tar/reader.go new file mode 100644 index 000000000..45d95c3df --- /dev/null +++ b/src/pkg/archive/tar/reader.go @@ -0,0 +1,221 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - pax extensions + +import ( + "bytes" + "io" + "io/ioutil" + "os" + "strconv" +) + +var ( + HeaderError = os.NewError("invalid tar header") +) + +// A Reader provides sequential access to the contents of a tar archive. +// A tar archive consists of a sequence of files. +// The Next method advances to the next file in the archive (including the first), +// and then it can be treated as an io.Reader to access the file's data. +// +// Example: +// tr := tar.NewReader(r) +// for { +// hdr, err := tr.Next() +// if err == os.EOF { +// // end of tar archive +// break +// } +// if err != nil { +// // handle error +// } +// io.Copy(data, tr) +// } +type Reader struct { + r io.Reader + err os.Error + nb int64 // number of unread bytes for current file entry + pad int64 // amount of padding (ignored) after current file entry +} + +// NewReader creates a new Reader reading from r. +func NewReader(r io.Reader) *Reader { return &Reader{r: r} } + +// Next advances to the next entry in the tar archive. +func (tr *Reader) Next() (*Header, os.Error) { + var hdr *Header + if tr.err == nil { + tr.skipUnread() + } + if tr.err == nil { + hdr = tr.readHeader() + } + return hdr, tr.err +} + +// Parse bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func cString(b []byte) string { + n := 0 + for n < len(b) && b[n] != 0 { + n++ + } + return string(b[0:n]) +} + +func (tr *Reader) octal(b []byte) int64 { + // Removing leading spaces. + for len(b) > 0 && b[0] == ' ' { + b = b[1:] + } + // Removing trailing NULs and spaces. + for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') { + b = b[0 : len(b)-1] + } + x, err := strconv.Btoui64(cString(b), 8) + if err != nil { + tr.err = err + } + return int64(x) +} + +// Skip any unread bytes in the existing file entry, as well as any alignment padding. +func (tr *Reader) skipUnread() { + nr := tr.nb + tr.pad // number of bytes to skip + tr.nb, tr.pad = 0, 0 + if sr, ok := tr.r.(io.Seeker); ok { + if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { + return + } + } + _, tr.err = io.Copyn(ioutil.Discard, tr.r, nr) +} + +func (tr *Reader) verifyChecksum(header []byte) bool { + if tr.err != nil { + return false + } + + given := tr.octal(header[148:156]) + unsigned, signed := checksum(header) + return given == unsigned || given == signed +} + +func (tr *Reader) readHeader() *Header { + header := make([]byte, blockSize) + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + return nil + } + + // Two blocks of zero bytes marks the end of the archive. + if bytes.Equal(header, zeroBlock[0:blockSize]) { + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + return nil + } + if bytes.Equal(header, zeroBlock[0:blockSize]) { + tr.err = os.EOF + } else { + tr.err = HeaderError // zero block and then non-zero block + } + return nil + } + + if !tr.verifyChecksum(header) { + tr.err = HeaderError + return nil + } + + // Unpack + hdr := new(Header) + s := slicer(header) + + hdr.Name = cString(s.next(100)) + hdr.Mode = tr.octal(s.next(8)) + hdr.Uid = int(tr.octal(s.next(8))) + hdr.Gid = int(tr.octal(s.next(8))) + hdr.Size = tr.octal(s.next(12)) + hdr.Mtime = tr.octal(s.next(12)) + s.next(8) // chksum + hdr.Typeflag = s.next(1)[0] + hdr.Linkname = cString(s.next(100)) + + // The remainder of the header depends on the value of magic. + // The original (v7) version of tar had no explicit magic field, + // so its magic bytes, like the rest of the block, are NULs. + magic := string(s.next(8)) // contains version field as well. + var format string + switch magic { + case "ustar\x0000": // POSIX tar (1003.1-1988) + if string(header[508:512]) == "tar\x00" { + format = "star" + } else { + format = "posix" + } + case "ustar \x00": // old GNU tar + format = "gnu" + } + + switch format { + case "posix", "gnu", "star": + hdr.Uname = cString(s.next(32)) + hdr.Gname = cString(s.next(32)) + devmajor := s.next(8) + devminor := s.next(8) + if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { + hdr.Devmajor = tr.octal(devmajor) + hdr.Devminor = tr.octal(devminor) + } + var prefix string + switch format { + case "posix", "gnu": + prefix = cString(s.next(155)) + case "star": + prefix = cString(s.next(131)) + hdr.Atime = tr.octal(s.next(12)) + hdr.Ctime = tr.octal(s.next(12)) + } + if len(prefix) > 0 { + hdr.Name = prefix + "/" + hdr.Name + } + } + + if tr.err != nil { + tr.err = HeaderError + return nil + } + + // Maximum value of hdr.Size is 64 GB (12 octal digits), + // so there's no risk of int64 overflowing. + tr.nb = int64(hdr.Size) + tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two + + return hdr +} + +// Read reads from the current entry in the tar archive. +// It returns 0, os.EOF when it reaches the end of that entry, +// until Next is called to advance to the next entry. +func (tr *Reader) Read(b []byte) (n int, err os.Error) { + if tr.nb == 0 { + // file consumed + return 0, os.EOF + } + + if int64(len(b)) > tr.nb { + b = b[0:tr.nb] + } + n, err = tr.r.Read(b) + tr.nb -= int64(n) + + if err == os.EOF && tr.nb > 0 { + err = io.ErrUnexpectedEOF + } + tr.err = err + return +} diff --git a/src/pkg/archive/tar/reader_test.go b/src/pkg/archive/tar/reader_test.go new file mode 100644 index 000000000..f473c900f --- /dev/null +++ b/src/pkg/archive/tar/reader_test.go @@ -0,0 +1,273 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "crypto/md5" + "fmt" + "io" + "os" + "reflect" + "testing" +) + +type untarTest struct { + file string + headers []*Header + cksums []string +} + +var gnuTarTest = &untarTest{ + file: "testdata/gnu.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244428340, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244436044, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + }, + cksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, +} + +var untarTests = []*untarTest{ + gnuTarTest, + &untarTest{ + file: "testdata/star.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244592783, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Atime: 1244592783, + Ctime: 1244592783, + }, + }, + }, + &untarTest{ + file: "testdata/v7.tar", + headers: []*Header{ + &Header{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1244593104, + Typeflag: '\x00', + }, + &Header{ + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1244593104, + Typeflag: '\x00', + }, + }, + }, +} + +func TestReader(t *testing.T) { +testLoop: + for i, test := range untarTests { + f, err := os.Open(test.file) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + tr := NewReader(f) + for j, header := range test.headers { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) + f.Close() + continue testLoop + } + if !reflect.DeepEqual(hdr, header) { + t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", + i, j, *hdr, *header) + } + } + hdr, err := tr.Next() + if err == os.EOF { + break + } + if hdr != nil || err != nil { + t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) + } + f.Close() + } +} + +func TestPartialRead(t *testing.T) { + f, err := os.Open("testdata/gnu.tar") + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + // Read the first four bytes; Next() should skip the last byte. + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Fatalf("Didn't get first file: %v", err) + } + buf := make([]byte, 4) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if expected := []byte("Kilt"); !bytes.Equal(buf, expected) { + t.Errorf("Contents = %v, want %v", buf, expected) + } + + // Second file + hdr, err = tr.Next() + if err != nil || hdr == nil { + t.Fatalf("Didn't get second file: %v", err) + } + buf = make([]byte, 6) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if expected := []byte("Google"); !bytes.Equal(buf, expected) { + t.Errorf("Contents = %v, want %v", buf, expected) + } +} + +func TestIncrementalRead(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + headers := test.headers + cksums := test.cksums + nread := 0 + + // loop over all files + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == os.EOF { + break + } + + // check the header + if !reflect.DeepEqual(hdr, headers[nread]) { + t.Errorf("Incorrect header:\nhave %+v\nwant %+v", + *hdr, headers[nread]) + } + + // read file contents in little chunks EOF, + // checksumming all the way + h := md5.New() + rdbuf := make([]uint8, 8) + for { + nr, err := tr.Read(rdbuf) + if err == os.EOF { + break + } + if err != nil { + t.Errorf("Read: unexpected error %v\n", err) + break + } + h.Write(rdbuf[0:nr]) + } + // verify checksum + have := fmt.Sprintf("%x", h.Sum()) + want := cksums[nread] + if want != have { + t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) + } + } + if nread != len(headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) + } +} + +func TestNonSeekable(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + // pipe the data in + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("Unexpected error %s", err) + } + go func() { + rdbuf := make([]uint8, 1<<16) + for { + nr, err := f.Read(rdbuf) + w.Write(rdbuf[0:nr]) + if err == os.EOF { + break + } + } + w.Close() + }() + + tr := NewReader(r) + nread := 0 + + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == os.EOF { + break + } + } + + if nread != len(test.headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) + } +} diff --git a/src/pkg/archive/tar/testdata/gnu.tar b/src/pkg/archive/tar/testdata/gnu.tar Binary files differnew file mode 100644 index 000000000..fc899dc8d --- /dev/null +++ b/src/pkg/archive/tar/testdata/gnu.tar diff --git a/src/pkg/archive/tar/testdata/small.txt b/src/pkg/archive/tar/testdata/small.txt new file mode 100644 index 000000000..b249bfc51 --- /dev/null +++ b/src/pkg/archive/tar/testdata/small.txt @@ -0,0 +1 @@ +Kilts
\ No newline at end of file diff --git a/src/pkg/archive/tar/testdata/small2.txt b/src/pkg/archive/tar/testdata/small2.txt new file mode 100644 index 000000000..394ee3ecd --- /dev/null +++ b/src/pkg/archive/tar/testdata/small2.txt @@ -0,0 +1 @@ +Google.com diff --git a/src/pkg/archive/tar/testdata/star.tar b/src/pkg/archive/tar/testdata/star.tar Binary files differnew file mode 100644 index 000000000..59e2d4e60 --- /dev/null +++ b/src/pkg/archive/tar/testdata/star.tar diff --git a/src/pkg/archive/tar/testdata/v7.tar b/src/pkg/archive/tar/testdata/v7.tar Binary files differnew file mode 100644 index 000000000..eb65fc941 --- /dev/null +++ b/src/pkg/archive/tar/testdata/v7.tar diff --git a/src/pkg/archive/tar/testdata/writer-big.tar b/src/pkg/archive/tar/testdata/writer-big.tar Binary files differnew file mode 100644 index 000000000..753e883ce --- /dev/null +++ b/src/pkg/archive/tar/testdata/writer-big.tar diff --git a/src/pkg/archive/tar/testdata/writer.tar b/src/pkg/archive/tar/testdata/writer.tar Binary files differnew file mode 100644 index 000000000..0358f91b9 --- /dev/null +++ b/src/pkg/archive/tar/testdata/writer.tar diff --git a/src/pkg/archive/tar/writer.go b/src/pkg/archive/tar/writer.go new file mode 100644 index 000000000..8673bad31 --- /dev/null +++ b/src/pkg/archive/tar/writer.go @@ -0,0 +1,205 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - catch more errors (no first header, write after close, etc.) + +import ( + "io" + "os" + "strconv" +) + +var ( + ErrWriteTooLong = os.NewError("write too long") + ErrFieldTooLong = os.NewError("header field too long") + ErrWriteAfterClose = os.NewError("write after close") +) + +// A Writer provides sequential writing of a tar archive in POSIX.1 format. +// A tar archive consists of a sequence of files. +// Call WriteHeader to begin a new file, and then call Write to supply that file's data, +// writing at most hdr.Size bytes in total. +// +// Example: +// tw := tar.NewWriter(w) +// hdr := new(Header) +// hdr.Size = length of data in bytes +// // populate other hdr fields as desired +// if err := tw.WriteHeader(hdr); err != nil { +// // handle error +// } +// io.Copy(tw, data) +// tw.Close() +type Writer struct { + w io.Writer + err os.Error + nb int64 // number of unwritten bytes for current file entry + pad int64 // amount of padding to write after current file entry + closed bool + usedBinary bool // whether the binary numeric field extension was used +} + +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } + +// Flush finishes writing the current file (optional). +func (tw *Writer) Flush() os.Error { + n := tw.nb + tw.pad + for n > 0 && tw.err == nil { + nr := n + if nr > blockSize { + nr = blockSize + } + var nw int + nw, tw.err = tw.w.Write(zeroBlock[0:nr]) + n -= int64(nw) + } + tw.nb = 0 + tw.pad = 0 + return tw.err +} + +// Write s into b, terminating it with a NUL if there is room. +func (tw *Writer) cString(b []byte, s string) { + if len(s) > len(b) { + if tw.err == nil { + tw.err = ErrFieldTooLong + } + return + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } +} + +// Encode x as an octal ASCII string and write it into b with leading zeros. +func (tw *Writer) octal(b []byte, x int64) { + s := strconv.Itob64(x, 8) + // leading zeros, but leave room for a NUL. + for len(s)+1 < len(b) { + s = "0" + s + } + tw.cString(b, s) +} + +// Write x into b, either as octal or as binary (GNUtar/star extension). +func (tw *Writer) numeric(b []byte, x int64) { + // Try octal first. + s := strconv.Itob64(x, 8) + if len(s) < len(b) { + tw.octal(b, x) + return + } + // Too big: use binary (big-endian). + tw.usedBinary = true + for i := len(b) - 1; x > 0 && i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // highest bit indicates binary format +} + +// WriteHeader writes hdr and prepares to accept the file's contents. +// WriteHeader calls Flush if it is not the first header. +// Calling after a Close will return ErrWriteAfterClose. +func (tw *Writer) WriteHeader(hdr *Header) os.Error { + if tw.closed { + return ErrWriteAfterClose + } + if tw.err == nil { + tw.Flush() + } + if tw.err != nil { + return tw.err + } + + tw.nb = int64(hdr.Size) + tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two + + header := make([]byte, blockSize) + s := slicer(header) + + // TODO(dsymonds): handle names longer than 100 chars + copy(s.next(100), []byte(hdr.Name)) + + tw.octal(s.next(8), hdr.Mode) // 100:108 + tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116 + tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124 + tw.numeric(s.next(12), hdr.Size) // 124:136 + tw.numeric(s.next(12), hdr.Mtime) // 136:148 + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 + s.next(100) // linkname (157:257) + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + tw.cString(s.next(32), hdr.Uname) // 265:297 + tw.cString(s.next(32), hdr.Gname) // 297:329 + tw.numeric(s.next(8), hdr.Devmajor) // 329:337 + tw.numeric(s.next(8), hdr.Devminor) // 337:345 + + // Use the GNU magic instead of POSIX magic if we used any GNU extensions. + if tw.usedBinary { + copy(header[257:265], []byte("ustar \x00")) + } + + // The chksum field is terminated by a NUL and a space. + // This is different from the other octal fields. + chksum, _ := checksum(header) + tw.octal(header[148:155], chksum) + header[155] = ' ' + + if tw.err != nil { + // problem with header; probably integer too big for a field. + return tw.err + } + + _, tw.err = tw.w.Write(header) + + return tw.err +} + +// Write writes to the current entry in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// hdr.Size bytes are written after WriteHeader. +func (tw *Writer) Write(b []byte) (n int, err os.Error) { + if tw.closed { + err = ErrWriteTooLong + return + } + overwrite := false + if int64(len(b)) > tw.nb { + b = b[0:tw.nb] + overwrite = true + } + n, err = tw.w.Write(b) + tw.nb -= int64(n) + if err == nil && overwrite { + err = ErrWriteTooLong + return + } + tw.err = err + return +} + +// Close closes the tar archive, flushing any unwritten +// data to the underlying writer. +func (tw *Writer) Close() os.Error { + if tw.err != nil || tw.closed { + return tw.err + } + tw.Flush() + tw.closed = true + + // trailer: two zero blocks + for i := 0; i < 2; i++ { + _, tw.err = tw.w.Write(zeroBlock) + if tw.err != nil { + break + } + } + return tw.err +} diff --git a/src/pkg/archive/tar/writer_test.go b/src/pkg/archive/tar/writer_test.go new file mode 100644 index 000000000..838cb7e1f --- /dev/null +++ b/src/pkg/archive/tar/writer_test.go @@ -0,0 +1,157 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "testing" + "testing/iotest" +) + +type writerTestEntry struct { + header *Header + contents string +} + +type writerTest struct { + file string // filename of expected output + entries []*writerTestEntry +} + +var writerTests = []*writerTest{ + &writerTest{ + file: "testdata/writer.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + Mtime: 1246508266, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Kilts", + }, + &writerTestEntry{ + header: &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + Mtime: 1245217492, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Google.com\n", + }, + }, + }, + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + &writerTest{ + file: "testdata/writer-big.tar", + entries: []*writerTestEntry{ + &writerTestEntry{ + header: &Header{ + Name: "tmp/16gig.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 16 << 30, + Mtime: 1254699560, + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + // no contents + }, + }, + }, +} + +// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. +func bytestr(offset int, b []byte) string { + const rowLen = 32 + s := fmt.Sprintf("%04x ", offset) + for _, ch := range b { + switch { + case '0' <= ch && ch <= '9', 'A' <= ch && ch <= 'Z', 'a' <= ch && ch <= 'z': + s += fmt.Sprintf(" %c", ch) + default: + s += fmt.Sprintf(" %02x", ch) + } + } + return s +} + +// Render a pseudo-diff between two blocks of bytes. +func bytediff(a []byte, b []byte) string { + const rowLen = 32 + s := fmt.Sprintf("(%d bytes vs. %d bytes)\n", len(a), len(b)) + for offset := 0; len(a)+len(b) > 0; offset += rowLen { + na, nb := rowLen, rowLen + if na > len(a) { + na = len(a) + } + if nb > len(b) { + nb = len(b) + } + sa := bytestr(offset, a[0:na]) + sb := bytestr(offset, b[0:nb]) + if sa != sb { + s += fmt.Sprintf("-%v\n+%v\n", sa, sb) + } + a = a[na:] + b = b[nb:] + } + return s +} + +func TestWriter(t *testing.T) { +testLoop: + for i, test := range writerTests { + expected, err := ioutil.ReadFile(test.file) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + + buf := new(bytes.Buffer) + tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB + for j, entry := range test.entries { + if err := tw.WriteHeader(entry.header); err != nil { + t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err) + continue testLoop + } + if _, err := io.WriteString(tw, entry.contents); err != nil { + t.Errorf("test %d, entry %d: Failed writing contents: %v", i, j, err) + continue testLoop + } + } + if err := tw.Close(); err != nil { + t.Errorf("test %d: Failed closing archive: %v", i, err) + continue testLoop + } + + actual := buf.Bytes() + if !bytes.Equal(expected, actual) { + t.Errorf("test %d: Incorrect result: (-=expected, +=actual)\n%v", + i, bytediff(expected, actual)) + } + if testing.Short() { // The second test is expensive. + break + } + } +} diff --git a/src/pkg/archive/zip/Makefile b/src/pkg/archive/zip/Makefile new file mode 100644 index 000000000..9071690f0 --- /dev/null +++ b/src/pkg/archive/zip/Makefile @@ -0,0 +1,13 @@ +# Copyright 2010 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../../Make.inc + +TARG=archive/zip +GOFILES=\ + reader.go\ + struct.go\ + writer.go\ + +include ../../../Make.pkg diff --git a/src/pkg/archive/zip/reader.go b/src/pkg/archive/zip/reader.go new file mode 100644 index 000000000..f92f9297a --- /dev/null +++ b/src/pkg/archive/zip/reader.go @@ -0,0 +1,311 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "compress/flate" + "hash" + "hash/crc32" + "encoding/binary" + "io" + "io/ioutil" + "os" +) + +var ( + FormatError = os.NewError("zip: not a valid zip file") + UnsupportedMethod = os.NewError("zip: unsupported compression algorithm") + ChecksumError = os.NewError("zip: checksum error") +) + +type Reader struct { + r io.ReaderAt + File []*File + Comment string +} + +type ReadCloser struct { + f *os.File + Reader +} + +type File struct { + FileHeader + zipr io.ReaderAt + zipsize int64 + headerOffset int64 +} + +func (f *File) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, os.Error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size); err != nil { + f.Close() + return nil, err + } + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, os.Error) { + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) os.Error { + end, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + z.r = r + z.File = make([]*File, 0, end.directoryRecords) + z.Comment = end.comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report a FormatError or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zipr: r, zipsize: size} + err = readDirectoryHeader(f, buf) + if err == FormatError || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + z.File = append(z.File, f) + } + if uint16(len(z.File)) != end.directoryRecords { + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() os.Error { + return rc.f.Close() +} + +// Open returns a ReadCloser that provides access to the File's contents. +// It is safe to Open and Read from files concurrently. +func (f *File) Open() (rc io.ReadCloser, err os.Error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + size := int64(f.CompressedSize) + if size == 0 && f.hasDataDescriptor() { + // permit SectionReader to see the rest of the file + size = f.zipsize - (f.headerOffset + bodyOffset) + } + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + switch f.Method { + case Store: // (no compression) + rc = ioutil.NopCloser(r) + case Deflate: + rc = flate.NewReader(r) + default: + err = UnsupportedMethod + } + if rc != nil { + rc = &checksumReader{rc, crc32.NewIEEE(), f, r} + } + return +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + f *File + zipr io.Reader // for reading the data descriptor +} + +func (r *checksumReader) Read(b []byte) (n int, err os.Error) { + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + if err != os.EOF { + return + } + if r.f.hasDataDescriptor() { + if err = readDataDescriptor(r.zipr, r.f); err != nil { + return + } + } + if r.hash.Sum32() != r.f.CRC32 { + err = ChecksumError + } + return +} + +func (r *checksumReader) Close() os.Error { return r.rc.Close() } + +func readFileHeader(f *File, r io.Reader) os.Error { + var b [fileHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != fileHeaderSignature { + return FormatError + } + f.ReaderVersion = c.Uint16(b[4:6]) + f.Flags = c.Uint16(b[6:8]) + f.Method = c.Uint16(b[8:10]) + f.ModifiedTime = c.Uint16(b[10:12]) + f.ModifiedDate = c.Uint16(b[12:14]) + f.CRC32 = c.Uint32(b[14:18]) + f.CompressedSize = c.Uint32(b[18:22]) + f.UncompressedSize = c.Uint32(b[22:26]) + filenameLen := int(c.Uint16(b[26:28])) + extraLen := int(c.Uint16(b[28:30])) + d := make([]byte, filenameLen+extraLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen:] + return nil +} + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, os.Error) { + r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset) + var b [fileHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return 0, err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != fileHeaderSignature { + return 0, FormatError + } + filenameLen := int(c.Uint16(b[26:28])) + extraLen := int(c.Uint16(b[28:30])) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and FormatError if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) os.Error { + var b [directoryHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature { + return FormatError + } + f.CreatorVersion = c.Uint16(b[4:6]) + f.ReaderVersion = c.Uint16(b[6:8]) + f.Flags = c.Uint16(b[8:10]) + f.Method = c.Uint16(b[10:12]) + f.ModifiedTime = c.Uint16(b[12:14]) + f.ModifiedDate = c.Uint16(b[14:16]) + f.CRC32 = c.Uint32(b[16:20]) + f.CompressedSize = c.Uint32(b[20:24]) + f.UncompressedSize = c.Uint32(b[24:28]) + filenameLen := int(c.Uint16(b[28:30])) + extraLen := int(c.Uint16(b[30:32])) + commentLen := int(c.Uint16(b[32:34])) + // startDiskNumber := c.Uint16(b[34:36]) // Unused + // internalAttributes := c.Uint16(b[36:38]) // Unused + // externalAttributes := c.Uint32(b[38:42]) // Unused + f.headerOffset = int64(c.Uint32(b[42:46])) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + return nil +} + +func readDataDescriptor(r io.Reader, f *File) os.Error { + var b [dataDescriptorLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + f.CRC32 = c.Uint32(b[:4]) + f.CompressedSize = c.Uint32(b[4:8]) + f.UncompressedSize = c.Uint32(b[8:12]) + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var b []byte + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + b = make([]byte, int(bLen)) + if _, err := r.ReadAt(b, size-bLen); err != nil && err != os.EOF { + return nil, err + } + if p := findSignatureInBlock(b); p >= 0 { + b = b[p:] + break + } + if i == 1 || bLen == size { + return nil, FormatError + } + } + + // read header into struct + c := binary.LittleEndian + d := new(directoryEnd) + d.diskNbr = c.Uint16(b[4:6]) + d.dirDiskNbr = c.Uint16(b[6:8]) + d.dirRecordsThisDisk = c.Uint16(b[8:10]) + d.directoryRecords = c.Uint16(b[10:12]) + d.directorySize = c.Uint32(b[12:16]) + d.directoryOffset = c.Uint32(b[16:20]) + d.commentLen = c.Uint16(b[20:22]) + d.comment = string(b[22 : 22+int(d.commentLen)]) + return d, nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i == len(b) { + return i + } + } + } + return -1 +} diff --git a/src/pkg/archive/zip/reader_test.go b/src/pkg/archive/zip/reader_test.go new file mode 100644 index 000000000..fd5fed2af --- /dev/null +++ b/src/pkg/archive/zip/reader_test.go @@ -0,0 +1,233 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "encoding/binary" + "io" + "io/ioutil" + "os" + "testing" + "time" +) + +type ZipTest struct { + Name string + Comment string + File []ZipTestFile + Error os.Error // the error that Opening this file should return +} + +type ZipTestFile struct { + Name string + Content []byte // if blank, will attempt to compare against File + File string // name of file to compare to (relative to testdata/) + Mtime string // modified time in format "mm-dd-yy hh:mm:ss" +} + +// Caution: The Mtime values found for the test files should correspond to +// the values listed with unzip -l <zipfile>. However, the values +// listed by unzip appear to be off by some hours. When creating +// fresh test files and testing them, this issue is not present. +// The test files were created in Sydney, so there might be a time +// zone issue. The time zone information does have to be encoded +// somewhere, because otherwise unzip -l could not provide a different +// time from what the archive/zip package provides, but there appears +// to be no documentation about this. + +var tests = []ZipTest{ + { + Name: "test.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Mtime: "09-05-10 12:12:02", + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Mtime: "09-05-10 15:52:58", + }, + }, + }, + { + Name: "r.zip", + File: []ZipTestFile{ + { + Name: "r/r.zip", + File: "r.zip", + Mtime: "03-04-10 00:24:16", + }, + }, + }, + {Name: "readme.zip"}, + {Name: "readme.notzip", Error: FormatError}, + { + Name: "dd.zip", + File: []ZipTestFile{ + { + Name: "filename", + Content: []byte("This is a test textfile.\n"), + Mtime: "02-02-11 13:06:20", + }, + }, + }, +} + +func TestReader(t *testing.T) { + for _, zt := range tests { + readTestZip(t, zt) + } +} + +func readTestZip(t *testing.T, zt ZipTest) { + z, err := OpenReader("testdata/" + zt.Name) + if err != zt.Error { + t.Errorf("error=%v, want %v", err, zt.Error) + return + } + + // bail if file is not zip + if err == FormatError { + return + } + defer z.Close() + + // bail here if no Files expected to be tested + // (there may actually be files in the zip, but we don't care) + if zt.File == nil { + return + } + + if z.Comment != zt.Comment { + t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment) + } + if len(z.File) != len(zt.File) { + t.Errorf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File)) + } + + // test read of each file + for i, ft := range zt.File { + readTestFile(t, ft, z.File[i]) + } + + // test simultaneous reads + n := 0 + done := make(chan bool) + for i := 0; i < 5; i++ { + for j, ft := range zt.File { + go func() { + readTestFile(t, ft, z.File[j]) + done <- true + }() + n++ + } + } + for ; n > 0; n-- { + <-done + } + + // test invalid checksum + if !z.File[0].hasDataDescriptor() { // skip test when crc32 in dd + z.File[0].CRC32++ // invalidate + r, err := z.File[0].Open() + if err != nil { + t.Error(err) + return + } + var b bytes.Buffer + _, err = io.Copy(&b, r) + if err != ChecksumError { + t.Errorf("%s: copy error=%v, want %v", z.File[0].Name, err, ChecksumError) + } + } +} + +func readTestFile(t *testing.T, ft ZipTestFile, f *File) { + if f.Name != ft.Name { + t.Errorf("name=%q, want %q", f.Name, ft.Name) + } + + mtime, err := time.Parse("01-02-06 15:04:05", ft.Mtime) + if err != nil { + t.Error(err) + return + } + if got, want := f.Mtime_ns()/1e9, mtime.Seconds(); got != want { + t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want) + } + + size0 := f.UncompressedSize + + var b bytes.Buffer + r, err := f.Open() + if err != nil { + t.Error(err) + return + } + + if size1 := f.UncompressedSize; size0 != size1 { + t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1) + } + + _, err = io.Copy(&b, r) + if err != nil { + t.Error(err) + return + } + r.Close() + + var c []byte + if len(ft.Content) != 0 { + c = ft.Content + } else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil { + t.Error(err) + return + } + + if b.Len() != len(c) { + t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c)) + return + } + + for i, b := range b.Bytes() { + if b != c[i] { + t.Errorf("%s: content[%d]=%q want %q", f.Name, i, b, c[i]) + return + } + } +} + +func TestInvalidFiles(t *testing.T) { + const size = 1024 * 70 // 70kb + b := make([]byte, size) + + // zeroes + _, err := NewReader(sliceReaderAt(b), size) + if err != FormatError { + t.Errorf("zeroes: error=%v, want %v", err, FormatError) + } + + // repeated directoryEndSignatures + sig := make([]byte, 4) + binary.LittleEndian.PutUint32(sig, directoryEndSignature) + for i := 0; i < size-4; i += 4 { + copy(b[i:i+4], sig) + } + _, err = NewReader(sliceReaderAt(b), size) + if err != FormatError { + t.Errorf("sigs: error=%v, want %v", err, FormatError) + } +} + +type sliceReaderAt []byte + +func (r sliceReaderAt) ReadAt(b []byte, off int64) (int, os.Error) { + copy(b, r[int(off):int(off)+len(b)]) + return len(b), nil +} diff --git a/src/pkg/archive/zip/struct.go b/src/pkg/archive/zip/struct.go new file mode 100644 index 000000000..1d6e70f10 --- /dev/null +++ b/src/pkg/archive/zip/struct.go @@ -0,0 +1,91 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zip provides support for reading and writing ZIP archives. + +See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT + +This package does not support ZIP64 or disk spanning. +*/ +package zip + +import "os" +import "time" + +// Compression methods. +const ( + Store uint16 = 0 + Deflate uint16 = 8 +) + +const ( + fileHeaderSignature = 0x04034b50 + directoryHeaderSignature = 0x02014b50 + directoryEndSignature = 0x06054b50 + fileHeaderLen = 30 // + filename + extra + directoryHeaderLen = 46 // + filename + extra + comment + directoryEndLen = 22 // + comment + dataDescriptorLen = 12 +) + +type FileHeader struct { + Name string + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + Method uint16 + ModifiedTime uint16 // MS-DOS time + ModifiedDate uint16 // MS-DOS date + CRC32 uint32 + CompressedSize uint32 + UncompressedSize uint32 + Extra []byte + Comment string +} + +type directoryEnd struct { + diskNbr uint16 // unused + dirDiskNbr uint16 // unused + dirRecordsThisDisk uint16 // unused + directoryRecords uint16 + directorySize uint32 + directoryOffset uint32 // relative to file + commentLen uint16 + comment string +} + +func recoverError(err *os.Error) { + if e := recover(); e != nil { + if osErr, ok := e.(os.Error); ok { + *err = osErr + return + } + panic(e) + } +} + +// msDosTimeToTime converts an MS-DOS date and time into a time.Time. +// The resolution is 2s. +// See: http://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +func msDosTimeToTime(dosDate, dosTime uint16) time.Time { + return time.Time{ + // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 + Year: int64(dosDate>>9 + 1980), + Month: int(dosDate >> 5 & 0xf), + Day: int(dosDate & 0x1f), + + // time bits 0-4: second/2; 5-10: minute; 11-15: hour + Hour: int(dosTime >> 11), + Minute: int(dosTime >> 5 & 0x3f), + Second: int(dosTime & 0x1f * 2), + } +} + +// Mtime_ns returns the modified time in ns since epoch. +// The resolution is 2s. +func (h *FileHeader) Mtime_ns() int64 { + t := msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) + return t.Seconds() * 1e9 +} diff --git a/src/pkg/archive/zip/testdata/dd.zip b/src/pkg/archive/zip/testdata/dd.zip Binary files differnew file mode 100644 index 000000000..e53378b0b --- /dev/null +++ b/src/pkg/archive/zip/testdata/dd.zip diff --git a/src/pkg/archive/zip/testdata/gophercolor16x16.png b/src/pkg/archive/zip/testdata/gophercolor16x16.png Binary files differnew file mode 100644 index 000000000..48854ff3b --- /dev/null +++ b/src/pkg/archive/zip/testdata/gophercolor16x16.png diff --git a/src/pkg/archive/zip/testdata/r.zip b/src/pkg/archive/zip/testdata/r.zip Binary files differnew file mode 100644 index 000000000..ea0fa2ffc --- /dev/null +++ b/src/pkg/archive/zip/testdata/r.zip diff --git a/src/pkg/archive/zip/testdata/readme.notzip b/src/pkg/archive/zip/testdata/readme.notzip Binary files differnew file mode 100644 index 000000000..06668c4c1 --- /dev/null +++ b/src/pkg/archive/zip/testdata/readme.notzip diff --git a/src/pkg/archive/zip/testdata/readme.zip b/src/pkg/archive/zip/testdata/readme.zip Binary files differnew file mode 100644 index 000000000..db3bb900e --- /dev/null +++ b/src/pkg/archive/zip/testdata/readme.zip diff --git a/src/pkg/archive/zip/testdata/test.zip b/src/pkg/archive/zip/testdata/test.zip Binary files differnew file mode 100644 index 000000000..03890c05d --- /dev/null +++ b/src/pkg/archive/zip/testdata/test.zip diff --git a/src/pkg/archive/zip/writer.go b/src/pkg/archive/zip/writer.go new file mode 100644 index 000000000..2065b06da --- /dev/null +++ b/src/pkg/archive/zip/writer.go @@ -0,0 +1,244 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "compress/flate" + "encoding/binary" + "hash" + "hash/crc32" + "io" + "os" +) + +// TODO(adg): support zip file comments +// TODO(adg): support specifying deflate level + +// Writer implements a zip file writer. +type Writer struct { + *countWriter + dir []*header + last *fileWriter + closed bool +} + +type header struct { + *FileHeader + offset uint32 +} + +// NewWriter returns a new Writer writing a zip file to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{countWriter: &countWriter{w: bufio.NewWriter(w)}} +} + +// Close finishes writing the zip file by writing the central directory. +// It does not (and can not) close the underlying writer. +func (w *Writer) Close() (err os.Error) { + if w.last != nil && !w.last.closed { + if err = w.last.close(); err != nil { + return + } + w.last = nil + } + if w.closed { + return os.NewError("zip: writer closed twice") + } + w.closed = true + + defer recoverError(&err) + + // write central directory + start := w.count + for _, h := range w.dir { + write(w, uint32(directoryHeaderSignature)) + write(w, h.CreatorVersion) + write(w, h.ReaderVersion) + write(w, h.Flags) + write(w, h.Method) + write(w, h.ModifiedTime) + write(w, h.ModifiedDate) + write(w, h.CRC32) + write(w, h.CompressedSize) + write(w, h.UncompressedSize) + write(w, uint16(len(h.Name))) + write(w, uint16(len(h.Extra))) + write(w, uint16(len(h.Comment))) + write(w, uint16(0)) // disk number start + write(w, uint16(0)) // internal file attributes + write(w, uint32(0)) // external file attributes + write(w, h.offset) + writeBytes(w, []byte(h.Name)) + writeBytes(w, h.Extra) + writeBytes(w, []byte(h.Comment)) + } + end := w.count + + // write end record + write(w, uint32(directoryEndSignature)) + write(w, uint16(0)) // disk number + write(w, uint16(0)) // disk number where directory starts + write(w, uint16(len(w.dir))) // number of entries this disk + write(w, uint16(len(w.dir))) // number of entries total + write(w, uint32(end-start)) // size of directory + write(w, uint32(start)) // start of directory + write(w, uint16(0)) // size of comment + + return w.w.(*bufio.Writer).Flush() +} + +// Create adds a file to the zip file using the provided name. +// It returns a Writer to which the file contents should be written. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) Create(name string) (io.Writer, os.Error) { + header := &FileHeader{ + Name: name, + Method: Deflate, + } + return w.CreateHeader(header) +} + +// CreateHeader adds a file to the zip file using the provided FileHeader +// for the file metadata. +// It returns a Writer to which the file contents should be written. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, os.Error) { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return nil, err + } + } + + fh.Flags |= 0x8 // we will write a data descriptor + fh.CreatorVersion = 0x14 + fh.ReaderVersion = 0x14 + + fw := &fileWriter{ + zipw: w, + compCount: &countWriter{w: w}, + crc32: crc32.NewIEEE(), + } + switch fh.Method { + case Store: + fw.comp = nopCloser{fw.compCount} + case Deflate: + fw.comp = flate.NewWriter(fw.compCount, 5) + default: + return nil, UnsupportedMethod + } + fw.rawCount = &countWriter{w: fw.comp} + + h := &header{ + FileHeader: fh, + offset: uint32(w.count), + } + w.dir = append(w.dir, h) + fw.header = h + + if err := writeHeader(w, fh); err != nil { + return nil, err + } + + w.last = fw + return fw, nil +} + +func writeHeader(w io.Writer, h *FileHeader) (err os.Error) { + defer recoverError(&err) + write(w, uint32(fileHeaderSignature)) + write(w, h.ReaderVersion) + write(w, h.Flags) + write(w, h.Method) + write(w, h.ModifiedTime) + write(w, h.ModifiedDate) + write(w, h.CRC32) + write(w, h.CompressedSize) + write(w, h.UncompressedSize) + write(w, uint16(len(h.Name))) + write(w, uint16(len(h.Extra))) + writeBytes(w, []byte(h.Name)) + writeBytes(w, h.Extra) + return nil +} + +type fileWriter struct { + *header + zipw io.Writer + rawCount *countWriter + comp io.WriteCloser + compCount *countWriter + crc32 hash.Hash32 + closed bool +} + +func (w *fileWriter) Write(p []byte) (int, os.Error) { + if w.closed { + return 0, os.NewError("zip: write to closed file") + } + w.crc32.Write(p) + return w.rawCount.Write(p) +} + +func (w *fileWriter) close() (err os.Error) { + if w.closed { + return os.NewError("zip: file closed twice") + } + w.closed = true + if err = w.comp.Close(); err != nil { + return + } + + // update FileHeader + fh := w.header.FileHeader + fh.CRC32 = w.crc32.Sum32() + fh.CompressedSize = uint32(w.compCount.count) + fh.UncompressedSize = uint32(w.rawCount.count) + + // write data descriptor + defer recoverError(&err) + write(w.zipw, fh.CRC32) + write(w.zipw, fh.CompressedSize) + write(w.zipw, fh.UncompressedSize) + + return nil +} + +type countWriter struct { + w io.Writer + count int64 +} + +func (w *countWriter) Write(p []byte) (int, os.Error) { + n, err := w.w.Write(p) + w.count += int64(n) + return n, err +} + +type nopCloser struct { + io.Writer +} + +func (w nopCloser) Close() os.Error { + return nil +} + +func write(w io.Writer, data interface{}) { + if err := binary.Write(w, binary.LittleEndian, data); err != nil { + panic(err) + } +} + +func writeBytes(w io.Writer, b []byte) { + n, err := w.Write(b) + if err != nil { + panic(err) + } + if n != len(b) { + panic(io.ErrShortWrite) + } +} diff --git a/src/pkg/archive/zip/writer_test.go b/src/pkg/archive/zip/writer_test.go new file mode 100644 index 000000000..eb2a80c3f --- /dev/null +++ b/src/pkg/archive/zip/writer_test.go @@ -0,0 +1,73 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "io/ioutil" + "rand" + "testing" +) + +// TODO(adg): a more sophisticated test suite + +const testString = "Rabbits, guinea pigs, gophers, marsupial rats, and quolls." + +func TestWriter(t *testing.T) { + largeData := make([]byte, 1<<17) + for i := range largeData { + largeData[i] = byte(rand.Int()) + } + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + testCreate(t, w, "foo", []byte(testString), Store) + testCreate(t, w, "bar", largeData, Deflate) + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(sliceReaderAt(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + testReadFile(t, r.File[0], []byte(testString)) + testReadFile(t, r.File[1], largeData) +} + +func testCreate(t *testing.T, w *Writer, name string, data []byte, method uint16) { + header := &FileHeader{ + Name: name, + Method: method, + } + f, err := w.CreateHeader(header) + if err != nil { + t.Fatal(err) + } + _, err = f.Write(data) + if err != nil { + t.Fatal(err) + } +} + +func testReadFile(t *testing.T, f *File, data []byte) { + rc, err := f.Open() + if err != nil { + t.Fatal("opening:", err) + } + b, err := ioutil.ReadAll(rc) + if err != nil { + t.Fatal("reading:", err) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if !bytes.Equal(b, data) { + t.Errorf("File contents %q, want %q", b, data) + } +} diff --git a/src/pkg/archive/zip/zip_test.go b/src/pkg/archive/zip/zip_test.go new file mode 100644 index 000000000..0f71fdfac --- /dev/null +++ b/src/pkg/archive/zip/zip_test.go @@ -0,0 +1,57 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests that involve both reading and writing. + +package zip + +import ( + "bytes" + "fmt" + "os" + "testing" +) + +type stringReaderAt string + +func (s stringReaderAt) ReadAt(p []byte, off int64) (n int, err os.Error) { + if off >= int64(len(s)) { + return 0, os.EOF + } + n = copy(p, s[off:]) + return +} + +func TestOver65kFiles(t *testing.T) { + if testing.Short() { + t.Logf("slow test; skipping") + return + } + buf := new(bytes.Buffer) + w := NewWriter(buf) + const nFiles = (1 << 16) + 42 + for i := 0; i < nFiles; i++ { + _, err := w.Create(fmt.Sprintf("%d.dat", i)) + if err != nil { + t.Fatalf("creating file %d: %v", i, err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + rat := stringReaderAt(buf.String()) + zr, err := NewReader(rat, int64(len(rat))) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + if got := len(zr.File); got != nFiles { + t.Fatalf("File contains %d files, want %d", got, nFiles) + } + for i := 0; i < nFiles; i++ { + want := fmt.Sprintf("%d.dat", i) + if zr.File[i].Name != want { + t.Fatalf("File(%d) = %q, want %q", i, zr.File[i].Name, want) + } + } +} |