diff options
Diffstat (limited to 'src/lib/archive/tar/untar.go')
| -rw-r--r-- | src/lib/archive/tar/untar.go | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/src/lib/archive/tar/untar.go b/src/lib/archive/tar/untar.go new file mode 100644 index 000000000..300c0f932 --- /dev/null +++ b/src/lib/archive/tar/untar.go @@ -0,0 +1,242 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The tar package implements access to tar archives. +// It aims to cover most of the variations, including those produced +// by GNU and BSD tars (not yet started). +// +// References: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// http://www.gnu.org/software/tar/manual/html_node/Standard.html +package tar + +// TODO(dsymonds): +// - Make it seekable. +// - Extensions. + +import ( + "bufio"; + "bytes"; + "io"; + "os"; + "strconv"; +) + +var ( + HeaderError os.Error = os.ErrorString("invalid tar header"); +) + +// A tar archive consists of a sequence of files. +// A Reader provides sequential access to the contents of a tar archive. +// The Next method advances to the next file in the archive (including the first), +// and then it can be treated as an io.Reader to access the file's data. +// +// Example: +// tr := NewTarReader(r); +// for { +// hdr, err := tr.Next(); +// if err != nil { +// // handle error +// } +// if hdr == nil { +// // end of tar archive +// break +// } +// io.Copy(tr, somewhere); +// } +type Reader struct { + r io.Reader; + err os.Error; + nb int64; // number of unread bytes for current file entry + pad int64; // amount of padding (ignored) after current file entry +} + +// A Header represents a single header in a tar archive. +// Only some fields may be populated. +type Header struct { + Name string; + Mode int64; + Uid int64; + Gid int64; + Size int64; + Mtime int64; + Typeflag byte; + Linkname string; + Uname string; + Gname string; + Devmajor int64; + Devminor int64; +} + +func (tr *Reader) skipUnread() +func (tr *Reader) readHeader() *Header + +// NewReader creates a new Reader reading the given io.Reader. +func NewReader(r io.Reader) *Reader { + return &Reader{ r: r } +} + +// Next advances to the next entry in the tar archive. +func (tr *Reader) Next() (*Header, os.Error) { + var hdr *Header; + if tr.err == nil { + tr.skipUnread(); + } + if tr.err == nil { + hdr = tr.readHeader(); + } + return hdr, tr.err +} + +const ( + blockSize = 512; + + // Types + TypeReg = '0'; + TypeRegA = '\x00'; + TypeLink = '1'; + TypeSymlink = '2'; + TypeChar = '3'; + TypeBlock = '4'; + TypeDir = '5'; + TypeFifo = '6'; + TypeCont = '7'; + TypeXHeader = 'x'; + TypeXGlobalHeader = 'g'; +) + +var zeroBlock = make([]byte, blockSize); + +// Parse bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func cString(b []byte) string { + n := 0; + for n < len(b) && b[n] != 0 { + n++; + } + return string(b[0:n]) +} + +func (tr *Reader) octalNumber(b []byte) int64 { + x, err := strconv.Btoui64(cString(b), 8); + if err != nil { + tr.err = err; + } + return int64(x) +} + +type ignoreWriter struct {} +func (ignoreWriter) Write(b []byte) (n int, err os.Error) { + return len(b), nil +} + +type seeker interface { + Seek(offset int64, whence int) (ret int64, err os.Error); +} + +// Skip any unread bytes in the existing file entry, as well as any alignment padding. +func (tr *Reader) skipUnread() { + nr := tr.nb + tr.pad; // number of bytes to skip + + var n int64; + if sr, ok := tr.r.(seeker); ok { + n, tr.err = sr.Seek(nr, 1); + } else { + n, tr.err = io.Copyn(tr.r, ignoreWriter{}, nr); + } + tr.nb, tr.pad = 0, 0; +} + +func (tr *Reader) verifyChecksum(header []byte) bool { + given := tr.octalNumber(header[148:156]); + if tr.err != nil { + return false + } + + var computed int64; + for i := 0; i < len(header); i++ { + if i == 148 { + // The chksum field is special: it should be treated as space bytes. + computed += ' ' * 8; + i += 7; + continue + } + computed += int64(header[i]); + } + + return given == computed +} + +type slicer []byte +func (s *slicer) next(n int) (b []byte) { + b, *s = s[0:n], s[n:len(s)]; + return +} + +func (tr *Reader) readHeader() *Header { + header := make([]byte, blockSize); + var n int; + if n, tr.err = io.FullRead(tr.r, header); tr.err != nil { + return nil + } + + // Two blocks of zero bytes marks the end of the archive. + if bytes.Equal(header, zeroBlock[0:blockSize]) { + if n, tr.err = io.FullRead(tr.r, header); tr.err != nil { + return nil + } + if !bytes.Equal(header, zeroBlock[0:blockSize]) { + tr.err = HeaderError; + } + return nil + } + + if !tr.verifyChecksum(header) { + tr.err = HeaderError; + return nil + } + + // Unpack + hdr := new(Header); + s := slicer(header); + + // TODO(dsymonds): The format of the header depends on the value of magic (hdr[257:262]), + // so use that value to do the correct parsing below. + + hdr.Name = cString(s.next(100)); + hdr.Mode = tr.octalNumber(s.next(8)); + hdr.Uid = tr.octalNumber(s.next(8)); + hdr.Gid = tr.octalNumber(s.next(8)); + hdr.Size = tr.octalNumber(s.next(12)); + hdr.Mtime = tr.octalNumber(s.next(12)); + s.next(8); // chksum + hdr.Typeflag = s.next(1)[0]; + hdr.Linkname = cString(s.next(100)); + s.next(8); // magic, version + + if tr.err != nil { + tr.err = HeaderError; + return nil + } + + // Maximum value of hdr.Size is 64 GB (12 octal digits), + // so there's no risk of int64 overflowing. + tr.nb = int64(hdr.Size); + tr.pad = -tr.nb & (blockSize - 1); // blockSize is a power of two + + return hdr +} + +// Read reads from the current entry in the tar archive. +// It returns 0, nil when it reaches the end of that entry, +// until Next is called to advance to the next entry. +func (tr *Reader) Read(b []uint8) (n int, err os.Error) { + if int64(len(b)) > tr.nb { + b = b[0:tr.nb]; + } + n, err = tr.r.Read(b); + tr.nb -= int64(n); + tr.err = err; + return +} |
