1 files changed, 188 insertions, 17 deletions
diff --git a/src/pkg/archive/tar/reader.go b/src/pkg/archive/tar/reader.go
index 1b40af812..05f82a40d 100644
--- a/src/pkg/archive/tar/reader.go
+++ b/src/pkg/archive/tar/reader.go
@@ -14,6 +14,7 @@ import (
 	"io/ioutil"
 	"os"
 	"strconv"
+	"strings"
 	"time"
 )
 
@@ -21,24 +22,12 @@ var (
 	ErrHeader = errors.New("archive/tar: invalid tar header")
 )
 
+const maxNanoSecondIntSize = 9
+
 // A Reader provides sequential access to the contents of a tar archive.
 // A tar archive consists of a sequence of files.
 // The Next method advances to the next file in the archive (including the first),
 // and then it can be treated as an io.Reader to access the file's data.
-//
-// Example:
-//	tr := tar.NewReader(r)
-//	for {
-//		hdr, err := tr.Next()
-//		if err == io.EOF {
-//			// end of tar archive
-//			break
-//		}
-//		if err != nil {
-//			// handle error
-//		}
-//		io.Copy(data, tr)
-//	}
 type Reader struct {
 	r   io.Reader
 	err error
@@ -55,13 +44,183 @@ func (tr *Reader) Next() (*Header, error) {
 	if tr.err == nil {
 		tr.skipUnread()
 	}
-	if tr.err == nil {
+	if tr.err != nil {
+		return hdr, tr.err
+	}
+	hdr = tr.readHeader()
+	if hdr == nil {
+		return hdr, tr.err
+	}
+	// Check for PAX/GNU header.
+	switch hdr.Typeflag {
+	case TypeXHeader:
+		//  PAX extended header
+		headers, err := parsePAX(tr)
+		if err != nil {
+			return nil, err
+		}
+		// We actually read the whole file,
+		// but this skips alignment padding
+		tr.skipUnread()
 		hdr = tr.readHeader()
+		mergePAX(hdr, headers)
+		return hdr, nil
+	case TypeGNULongName:
+		// We have a GNU long name header. Its contents are the real file name.
+		realname, err := ioutil.ReadAll(tr)
+		if err != nil {
+			return nil, err
+		}
+		hdr, err := tr.Next()
+		hdr.Name = cString(realname)
+		return hdr, err
+	case TypeGNULongLink:
+		// We have a GNU long link header.
+		realname, err := ioutil.ReadAll(tr)
+		if err != nil {
+			return nil, err
+		}
+		hdr, err := tr.Next()
+		hdr.Linkname = cString(realname)
+		return hdr, err
 	}
 	return hdr, tr.err
 }
 
-// Parse bytes as a NUL-terminated C-style string.
+// mergePAX merges well known headers according to PAX standard.
+// In general headers with the same name as those found
+// in the header struct overwrite those found in the header
+// struct with higher precision or longer values. Esp. useful
+// for name and linkname fields.
+func mergePAX(hdr *Header, headers map[string]string) error {
+	for k, v := range headers {
+		switch k {
+		case "path":
+			hdr.Name = v
+		case "linkpath":
+			hdr.Linkname = v
+		case "gname":
+			hdr.Gname = v
+		case "uname":
+			hdr.Uname = v
+		case "uid":
+			uid, err := strconv.ParseInt(v, 10, 0)
+			if err != nil {
+				return err
+			}
+			hdr.Uid = int(uid)
+		case "gid":
+			gid, err := strconv.ParseInt(v, 10, 0)
+			if err != nil {
+				return err
+			}
+			hdr.Gid = int(gid)
+		case "atime":
+			t, err := parsePAXTime(v)
+			if err != nil {
+				return err
+			}
+			hdr.AccessTime = t
+		case "mtime":
+			t, err := parsePAXTime(v)
+			if err != nil {
+				return err
+			}
+			hdr.ModTime = t
+		case "ctime":
+			t, err := parsePAXTime(v)
+			if err != nil {
+				return err
+			}
+			hdr.ChangeTime = t
+		case "size":
+			size, err := strconv.ParseInt(v, 10, 0)
+			if err != nil {
+				return err
+			}
+			hdr.Size = int64(size)
+		}
+
+	}
+	return nil
+}
+
+// parsePAXTime takes a string of the form %d.%d as described in
+// the PAX specification.
+func parsePAXTime(t string) (time.Time, error) {
+	buf := []byte(t)
+	pos := bytes.IndexByte(buf, '.')
+	var seconds, nanoseconds int64
+	var err error
+	if pos == -1 {
+		seconds, err = strconv.ParseInt(t, 10, 0)
+		if err != nil {
+			return time.Time{}, err
+		}
+	} else {
+		seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
+		if err != nil {
+			return time.Time{}, err
+		}
+		nano_buf := string(buf[pos+1:])
+		// Pad as needed before converting to a decimal.
+		// For example .030 -> .030000000 -> 30000000 nanoseconds
+		if len(nano_buf) < maxNanoSecondIntSize {
+			// Right pad
+			nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
+		} else if len(nano_buf) > maxNanoSecondIntSize {
+			// Right truncate
+			nano_buf = nano_buf[:maxNanoSecondIntSize]
+		}
+		nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
+		if err != nil {
+			return time.Time{}, err
+		}
+	}
+	ts := time.Unix(seconds, nanoseconds)
+	return ts, nil
+}
+
+// parsePAX parses PAX headers.
+// If an extended header (type 'x') is invalid, ErrHeader is returned
+func parsePAX(r io.Reader) (map[string]string, error) {
+	buf, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+	headers := make(map[string]string)
+	// Each record is constructed as
+	//     "%d %s=%s\n", length, keyword, value
+	for len(buf) > 0 {
+		// or the header was empty to start with.
+		var sp int
+		// The size field ends at the first space.
+		sp = bytes.IndexByte(buf, ' ')
+		if sp == -1 {
+			return nil, ErrHeader
+		}
+		// Parse the first token as a decimal integer.
+		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
+		if err != nil {
+			return nil, ErrHeader
+		}
+		// Extract everything between the decimal and the n -1 on the
+		// beginning to to eat the ' ', -1 on the end to skip the newline.
+		var record []byte
+		record, buf = buf[sp+1:n-1], buf[n:]
+		// The first equals is guaranteed to mark the end of the key.
+		// Everything else is value.
+		eq := bytes.IndexByte(record, '=')
+		if eq == -1 {
+			return nil, ErrHeader
+		}
+		key, value := record[:eq], record[eq+1:]
+		headers[string(key)] = string(value)
+	}
+	return headers, nil
+}
+
+// cString parses bytes as a NUL-terminated C-style string.
 // If a NUL byte is not found then the whole slice is returned as a string.
 func cString(b []byte) string {
 	n := 0
@@ -72,6 +231,18 @@ func cString(b []byte) string {
 }
 
 func (tr *Reader) octal(b []byte) int64 {
+	// Check for binary format first.
+	if len(b) > 0 && b[0]&0x80 != 0 {
+		var x int64
+		for i, c := range b {
+			if i == 0 {
+				c &= 0x7f // ignore signal bit in first byte
+			}
+			x = x<<8 | int64(c)
+		}
+		return x
+	}
+
 	// Removing leading spaces.
 	for len(b) > 0 && b[0] == ' ' {
 		b = b[1:]
@@ -87,7 +258,7 @@ func (tr *Reader) octal(b []byte) int64 {
 	return int64(x)
 }
 
-// Skip any unread bytes in the existing file entry, as well as any alignment padding.
+// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
 func (tr *Reader) skipUnread() {
 	nr := tr.nb + tr.pad // number of bytes to skip
 	tr.nb, tr.pad = 0, 0