diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/archive/zip/reader.go | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/archive/zip/reader.go')
-rw-r--r-- | src/archive/zip/reader.go | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go new file mode 100644 index 000000000..8136b840d --- /dev/null +++ b/src/archive/zip/reader.go @@ -0,0 +1,453 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "os" +) + +var ( + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") +) + +type Reader struct { + r io.ReaderAt + File []*File + Comment string +} + +type ReadCloser struct { + f *os.File + Reader +} + +type File struct { + FileHeader + zipr io.ReaderAt + zipsize int64 + headerOffset int64 +} + +func (f *File) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size()); err != nil { + f.Close() + return nil, err + } + r.f = f + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { + end, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + z.r = r + z.File = make([]*File, 0, end.directoryRecords) + z.Comment = end.comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report a ErrFormat or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zipr: r, zipsize: size} + err = readDirectoryHeader(f, buf) + if err == ErrFormat || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + z.File = append(z.File, f) + } + if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { + return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (rc io.ReadCloser, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + size := int64(f.CompressedSize64) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + dcomp := decompressor(f.Method) + if dcomp == nil { + err = ErrAlgorithm + return + } + rc = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } + rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} + return +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + f *File + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + if err == nil { + return + } + if err == io.EOF { + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + err = err1 + } else if r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } else { + // If there's not a data descriptor, we still compare + // the CRC32 of what we've read against the file header + // or TOC's CRC32, if it seems like it was set. + if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } + } + r.err = err + return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { + var buf [fileHeaderLen]byte + if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + return 0, err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != fileHeaderSignature { + return 0, ErrFormat + } + b = b[22:] // skip over most of the header + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { + var buf [directoryHeaderLen]byte + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != directoryHeaderSignature { + return ErrFormat + } + f.CreatorVersion = b.uint16() + f.ReaderVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + f.CompressedSize64 = uint64(f.CompressedSize) + f.UncompressedSize64 = uint64(f.UncompressedSize) + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + commentLen := int(b.uint16()) + b = b[4:] // skipped start disk number and internal attributes (2x uint16) + f.ExternalAttrs = b.uint32() + f.headerOffset = int64(b.uint32()) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + + if len(f.Extra) > 0 { + b := readBuf(f.Extra) + for len(b) >= 4 { // need at least tag and size + tag := b.uint16() + size := b.uint16() + if int(size) > len(b) { + return ErrFormat + } + if tag == zip64ExtraId { + // update directory values from the zip64 extra block + eb := readBuf(b[:size]) + if len(eb) >= 8 { + f.UncompressedSize64 = eb.uint64() + } + if len(eb) >= 8 { + f.CompressedSize64 = eb.uint64() + } + if len(eb) >= 8 { + f.headerOffset = int64(eb.uint64()) + } + } + b = b[size:] + } + // Should have consumed the whole header. + // But popular zip & JAR creation tools are broken and + // may pad extra zeros at the end, so accept those + // too. See golang.org/issue/8186. + for _, v := range b { + if v != 0 { + return ErrFormat + } + } + } + return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte + + // The spec says: "Although not originally assigned a + // signature, the value 0x08074b50 has commonly been adopted + // as a signature value for the data descriptor record. + // Implementers should be aware that ZIP files may be + // encountered with or without this signature marking data + // descriptors and should account for either case when reading + // ZIP files to ensure compatibility." + // + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. + if _, err := io.ReadFull(r, buf[:4]); err != nil { + return err + } + off := 0 + maybeSig := readBuf(buf[:4]) + if maybeSig.uint32() != dataDescriptorSignature { + // No data descriptor signature. Keep these four + // bytes. + off += 4 + } + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err + } + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum + } + + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. + + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return nil, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return nil, ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + diskNbr: uint32(b.uint16()), + dirDiskNbr: uint32(b.uint16()), + dirRecordsThisDisk: uint64(b.uint16()), + directoryRecords: uint64(b.uint16()), + directorySize: uint64(b.uint32()), + directoryOffset: uint64(b.uint32()), + commentLen: b.uint16(), + } + l := int(d.commentLen) + if l > len(b) { + return nil, errors.New("zip: invalid comment length") + } + d.comment = string(b[:l]) + + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + err = readDirectory64End(r, p, d) + } + if err != nil { + return nil, err + } + + // Make sure directoryOffset points to somewhere in our file. + if o := int64(d.directoryOffset); o < 0 || o >= size { + return nil, ErrFormat + } + return d, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + b = b[4:] // skip number of the disk with the start of the zip64 end of central directory + p := b.uint64() // relative offset of the zip64 end of central directory record + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +type readBuf []byte + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} |