diff options
| author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 | 
|---|---|---|
| committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 | 
| commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
| tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/archive/zip/reader.go | |
| parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
| download | golang-upstream/1.4.tar.gz | |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/archive/zip/reader.go')
| -rw-r--r-- | src/archive/zip/reader.go | 453 | 
1 files changed, 453 insertions, 0 deletions
| diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go new file mode 100644 index 000000000..8136b840d --- /dev/null +++ b/src/archive/zip/reader.go @@ -0,0 +1,453 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( +	"bufio" +	"encoding/binary" +	"errors" +	"hash" +	"hash/crc32" +	"io" +	"os" +) + +var ( +	ErrFormat    = errors.New("zip: not a valid zip file") +	ErrAlgorithm = errors.New("zip: unsupported compression algorithm") +	ErrChecksum  = errors.New("zip: checksum error") +) + +type Reader struct { +	r       io.ReaderAt +	File    []*File +	Comment string +} + +type ReadCloser struct { +	f *os.File +	Reader +} + +type File struct { +	FileHeader +	zipr         io.ReaderAt +	zipsize      int64 +	headerOffset int64 +} + +func (f *File) hasDataDescriptor() bool { +	return f.Flags&0x8 != 0 +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { +	f, err := os.Open(name) +	if err != nil { +		return nil, err +	} +	fi, err := f.Stat() +	if err != nil { +		f.Close() +		return nil, err +	} +	r := new(ReadCloser) +	if err := r.init(f, fi.Size()); err != nil { +		f.Close() +		return nil, err +	} +	r.f = f +	return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { +	zr := new(Reader) +	if err := zr.init(r, size); err != nil { +		return nil, err +	} +	return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { +	end, err := readDirectoryEnd(r, size) +	if err != nil { +		return err +	} +	z.r = r +	z.File = make([]*File, 0, end.directoryRecords) +	z.Comment = end.comment +	rs := io.NewSectionReader(r, 0, size) +	if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { +		return err +	} +	buf := bufio.NewReader(rs) + +	// The count of files inside a zip is truncated to fit in a uint16. +	// Gloss over this by reading headers until we encounter +	// a bad one, and then only report a ErrFormat or UnexpectedEOF if +	// the file count modulo 65536 is incorrect. +	for { +		f := &File{zipr: r, zipsize: size} +		err = readDirectoryHeader(f, buf) +		if err == ErrFormat || err == io.ErrUnexpectedEOF { +			break +		} +		if err != nil { +			return err +		} +		z.File = append(z.File, f) +	} +	if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here +		// Return the readDirectoryHeader error if we read +		// the wrong number of directory entries. +		return err +	} +	return nil +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { +	return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { +	bodyOffset, err := f.findBodyOffset() +	if err != nil { +		return +	} +	return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (rc io.ReadCloser, err error) { +	bodyOffset, err := f.findBodyOffset() +	if err != nil { +		return +	} +	size := int64(f.CompressedSize64) +	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) +	dcomp := decompressor(f.Method) +	if dcomp == nil { +		err = ErrAlgorithm +		return +	} +	rc = dcomp(r) +	var desr io.Reader +	if f.hasDataDescriptor() { +		desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) +	} +	rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} +	return +} + +type checksumReader struct { +	rc   io.ReadCloser +	hash hash.Hash32 +	f    *File +	desr io.Reader // if non-nil, where to read the data descriptor +	err  error     // sticky error +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { +	if r.err != nil { +		return 0, r.err +	} +	n, err = r.rc.Read(b) +	r.hash.Write(b[:n]) +	if err == nil { +		return +	} +	if err == io.EOF { +		if r.desr != nil { +			if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { +				err = err1 +			} else if r.hash.Sum32() != r.f.CRC32 { +				err = ErrChecksum +			} +		} else { +			// If there's not a data descriptor, we still compare +			// the CRC32 of what we've read against the file header +			// or TOC's CRC32, if it seems like it was set. +			if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { +				err = ErrChecksum +			} +		} +	} +	r.err = err +	return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { +	var buf [fileHeaderLen]byte +	if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { +		return 0, err +	} +	b := readBuf(buf[:]) +	if sig := b.uint32(); sig != fileHeaderSignature { +		return 0, ErrFormat +	} +	b = b[22:] // skip over most of the header +	filenameLen := int(b.uint16()) +	extraLen := int(b.uint16()) +	return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { +	var buf [directoryHeaderLen]byte +	if _, err := io.ReadFull(r, buf[:]); err != nil { +		return err +	} +	b := readBuf(buf[:]) +	if sig := b.uint32(); sig != directoryHeaderSignature { +		return ErrFormat +	} +	f.CreatorVersion = b.uint16() +	f.ReaderVersion = b.uint16() +	f.Flags = b.uint16() +	f.Method = b.uint16() +	f.ModifiedTime = b.uint16() +	f.ModifiedDate = b.uint16() +	f.CRC32 = b.uint32() +	f.CompressedSize = b.uint32() +	f.UncompressedSize = b.uint32() +	f.CompressedSize64 = uint64(f.CompressedSize) +	f.UncompressedSize64 = uint64(f.UncompressedSize) +	filenameLen := int(b.uint16()) +	extraLen := int(b.uint16()) +	commentLen := int(b.uint16()) +	b = b[4:] // skipped start disk number and internal attributes (2x uint16) +	f.ExternalAttrs = b.uint32() +	f.headerOffset = int64(b.uint32()) +	d := make([]byte, filenameLen+extraLen+commentLen) +	if _, err := io.ReadFull(r, d); err != nil { +		return err +	} +	f.Name = string(d[:filenameLen]) +	f.Extra = d[filenameLen : filenameLen+extraLen] +	f.Comment = string(d[filenameLen+extraLen:]) + +	if len(f.Extra) > 0 { +		b := readBuf(f.Extra) +		for len(b) >= 4 { // need at least tag and size +			tag := b.uint16() +			size := b.uint16() +			if int(size) > len(b) { +				return ErrFormat +			} +			if tag == zip64ExtraId { +				// update directory values from the zip64 extra block +				eb := readBuf(b[:size]) +				if len(eb) >= 8 { +					f.UncompressedSize64 = eb.uint64() +				} +				if len(eb) >= 8 { +					f.CompressedSize64 = eb.uint64() +				} +				if len(eb) >= 8 { +					f.headerOffset = int64(eb.uint64()) +				} +			} +			b = b[size:] +		} +		// Should have consumed the whole header. +		// But popular zip & JAR creation tools are broken and +		// may pad extra zeros at the end, so accept those +		// too. See golang.org/issue/8186. +		for _, v := range b { +			if v != 0 { +				return ErrFormat +			} +		} +	} +	return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { +	var buf [dataDescriptorLen]byte + +	// The spec says: "Although not originally assigned a +	// signature, the value 0x08074b50 has commonly been adopted +	// as a signature value for the data descriptor record. +	// Implementers should be aware that ZIP files may be +	// encountered with or without this signature marking data +	// descriptors and should account for either case when reading +	// ZIP files to ensure compatibility." +	// +	// dataDescriptorLen includes the size of the signature but +	// first read just those 4 bytes to see if it exists. +	if _, err := io.ReadFull(r, buf[:4]); err != nil { +		return err +	} +	off := 0 +	maybeSig := readBuf(buf[:4]) +	if maybeSig.uint32() != dataDescriptorSignature { +		// No data descriptor signature. Keep these four +		// bytes. +		off += 4 +	} +	if _, err := io.ReadFull(r, buf[off:12]); err != nil { +		return err +	} +	b := readBuf(buf[:12]) +	if b.uint32() != f.CRC32 { +		return ErrChecksum +	} + +	// The two sizes that follow here can be either 32 bits or 64 bits +	// but the spec is not very clear on this and different +	// interpretations has been made causing incompatibilities. We +	// already have the sizes from the central directory so we can +	// just ignore these. + +	return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { +	// look for directoryEndSignature in the last 1k, then in the last 65k +	var buf []byte +	var directoryEndOffset int64 +	for i, bLen := range []int64{1024, 65 * 1024} { +		if bLen > size { +			bLen = size +		} +		buf = make([]byte, int(bLen)) +		if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { +			return nil, err +		} +		if p := findSignatureInBlock(buf); p >= 0 { +			buf = buf[p:] +			directoryEndOffset = size - bLen + int64(p) +			break +		} +		if i == 1 || bLen == size { +			return nil, ErrFormat +		} +	} + +	// read header into struct +	b := readBuf(buf[4:]) // skip signature +	d := &directoryEnd{ +		diskNbr:            uint32(b.uint16()), +		dirDiskNbr:         uint32(b.uint16()), +		dirRecordsThisDisk: uint64(b.uint16()), +		directoryRecords:   uint64(b.uint16()), +		directorySize:      uint64(b.uint32()), +		directoryOffset:    uint64(b.uint32()), +		commentLen:         b.uint16(), +	} +	l := int(d.commentLen) +	if l > len(b) { +		return nil, errors.New("zip: invalid comment length") +	} +	d.comment = string(b[:l]) + +	p, err := findDirectory64End(r, directoryEndOffset) +	if err == nil && p >= 0 { +		err = readDirectory64End(r, p, d) +	} +	if err != nil { +		return nil, err +	} + +	// Make sure directoryOffset points to somewhere in our file. +	if o := int64(d.directoryOffset); o < 0 || o >= size { +		return nil, ErrFormat +	} +	return d, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { +	locOffset := directoryEndOffset - directory64LocLen +	if locOffset < 0 { +		return -1, nil // no need to look for a header outside the file +	} +	buf := make([]byte, directory64LocLen) +	if _, err := r.ReadAt(buf, locOffset); err != nil { +		return -1, err +	} +	b := readBuf(buf) +	if sig := b.uint32(); sig != directory64LocSignature { +		return -1, nil +	} +	b = b[4:]       // skip number of the disk with the start of the zip64 end of central directory +	p := b.uint64() // relative offset of the zip64 end of central directory record +	return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { +	buf := make([]byte, directory64EndLen) +	if _, err := r.ReadAt(buf, offset); err != nil { +		return err +	} + +	b := readBuf(buf) +	if sig := b.uint32(); sig != directory64EndSignature { +		return ErrFormat +	} + +	b = b[12:]                        // skip dir size, version and version needed (uint64 + 2x uint16) +	d.diskNbr = b.uint32()            // number of this disk +	d.dirDiskNbr = b.uint32()         // number of the disk with the start of the central directory +	d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk +	d.directoryRecords = b.uint64()   // total number of entries in the central directory +	d.directorySize = b.uint64()      // size of the central directory +	d.directoryOffset = b.uint64()    // offset of start of central directory with respect to the starting disk number + +	return nil +} + +func findSignatureInBlock(b []byte) int { +	for i := len(b) - directoryEndLen; i >= 0; i-- { +		// defined from directoryEndSignature in struct.go +		if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { +			// n is length of comment +			n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 +			if n+directoryEndLen+i <= len(b) { +				return i +			} +		} +	} +	return -1 +} + +type readBuf []byte + +func (b *readBuf) uint16() uint16 { +	v := binary.LittleEndian.Uint16(*b) +	*b = (*b)[2:] +	return v +} + +func (b *readBuf) uint32() uint32 { +	v := binary.LittleEndian.Uint32(*b) +	*b = (*b)[4:] +	return v +} + +func (b *readBuf) uint64() uint64 { +	v := binary.LittleEndian.Uint64(*b) +	*b = (*b)[8:] +	return v +} | 
