diff options
Diffstat (limited to 'src/pkg/archive/tar')
-rw-r--r-- | src/pkg/archive/tar/common.go | 184 | ||||
-rw-r--r-- | src/pkg/archive/tar/example_test.go | 79 | ||||
-rw-r--r-- | src/pkg/archive/tar/reader.go | 205 | ||||
-rw-r--r-- | src/pkg/archive/tar/reader_test.go | 106 | ||||
-rw-r--r-- | src/pkg/archive/tar/stat_atim.go | 20 | ||||
-rw-r--r-- | src/pkg/archive/tar/stat_atimespec.go | 20 | ||||
-rw-r--r-- | src/pkg/archive/tar/stat_unix.go | 32 | ||||
-rw-r--r-- | src/pkg/archive/tar/tar_test.go | 271 | ||||
-rw-r--r-- | src/pkg/archive/tar/testdata/pax.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | src/pkg/archive/tar/testdata/ustar.tar | bin | 0 -> 2048 bytes | |||
-rw-r--r-- | src/pkg/archive/tar/writer.go | 155 | ||||
-rw-r--r-- | src/pkg/archive/tar/writer_test.go | 80 |
12 files changed, 1105 insertions, 47 deletions
diff --git a/src/pkg/archive/tar/common.go b/src/pkg/archive/tar/common.go index fc7a40923..60d207c48 100644 --- a/src/pkg/archive/tar/common.go +++ b/src/pkg/archive/tar/common.go @@ -9,9 +9,16 @@ // References: // http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 // http://www.gnu.org/software/tar/manual/html_node/Standard.html +// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html package tar -import "time" +import ( + "errors" + "fmt" + "os" + "path" + "time" +) const ( blockSize = 512 @@ -28,6 +35,8 @@ const ( TypeCont = '7' // reserved TypeXHeader = 'x' // extended header TypeXGlobalHeader = 'g' // global extended header + TypeGNULongName = 'L' // Next file has a long name + TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name ) // A Header represents a single header in a tar archive. @@ -49,6 +58,179 @@ type Header struct { ChangeTime time.Time // status change time } +// File name constants from the tar spec. +const ( + fileNameSize = 100 // Maximum number of bytes in a standard tar name. + fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. +) + +// FileInfo returns an os.FileInfo for the Header. +func (h *Header) FileInfo() os.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements os.FileInfo. +type headerFileInfo struct { + h *Header +} + +func (fi headerFileInfo) Size() int64 { return fi.h.Size } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } +func (fi headerFileInfo) Sys() interface{} { return fi.h } + +// Name returns the base name of the file. +func (fi headerFileInfo) Name() string { + if fi.IsDir() { + return path.Clean(fi.h.Name) + } + return fi.h.Name +} + +// Mode returns the permission and mode bits for the headerFileInfo. +func (fi headerFileInfo) Mode() (mode os.FileMode) { + // Set file permission bits. + mode = os.FileMode(fi.h.Mode).Perm() + + // Set setuid, setgid and sticky bits. + if fi.h.Mode&c_ISUID != 0 { + // setuid + mode |= os.ModeSetuid + } + if fi.h.Mode&c_ISGID != 0 { + // setgid + mode |= os.ModeSetgid + } + if fi.h.Mode&c_ISVTX != 0 { + // sticky + mode |= os.ModeSticky + } + + // Set file mode bits. + // clear perm, setuid, setgid and sticky bits. + m := os.FileMode(fi.h.Mode) &^ 07777 + if m == c_ISDIR { + // directory + mode |= os.ModeDir + } + if m == c_ISFIFO { + // named pipe (FIFO) + mode |= os.ModeNamedPipe + } + if m == c_ISLNK { + // symbolic link + mode |= os.ModeSymlink + } + if m == c_ISBLK { + // device file + mode |= os.ModeDevice + } + if m == c_ISCHR { + // Unix character device + mode |= os.ModeDevice + mode |= os.ModeCharDevice + } + if m == c_ISSOCK { + // Unix domain socket + mode |= os.ModeSocket + } + + switch fi.h.Typeflag { + case TypeLink, TypeSymlink: + // hard link, symbolic link + mode |= os.ModeSymlink + case TypeChar: + // character device node + mode |= os.ModeDevice + mode |= os.ModeCharDevice + case TypeBlock: + // block device node + mode |= os.ModeDevice + case TypeDir: + // directory + mode |= os.ModeDir + case TypeFifo: + // fifo node + mode |= os.ModeNamedPipe + } + + return mode +} + +// sysStat, if non-nil, populates h from system-dependent fields of fi. +var sysStat func(fi os.FileInfo, h *Header) error + +// Mode constants from the tar spec. +const ( + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + c_ISDIR = 040000 // Directory + c_ISFIFO = 010000 // FIFO + c_ISREG = 0100000 // Regular file + c_ISLNK = 0120000 // Symbolic link + c_ISBLK = 060000 // Block special file + c_ISCHR = 020000 // Character special file + c_ISSOCK = 0140000 // Socket +) + +// FileInfoHeader creates a partially-populated Header from fi. +// If fi describes a symlink, FileInfoHeader records link as the link target. +// If fi describes a directory, a slash is appended to the name. +func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { + if fi == nil { + return nil, errors.New("tar: FileInfo is nil") + } + fm := fi.Mode() + h := &Header{ + Name: fi.Name(), + ModTime: fi.ModTime(), + Mode: int64(fm.Perm()), // or'd with c_IS* constants later + } + switch { + case fm.IsRegular(): + h.Mode |= c_ISREG + h.Typeflag = TypeReg + h.Size = fi.Size() + case fi.IsDir(): + h.Typeflag = TypeDir + h.Mode |= c_ISDIR + h.Name += "/" + case fm&os.ModeSymlink != 0: + h.Typeflag = TypeSymlink + h.Mode |= c_ISLNK + h.Linkname = link + case fm&os.ModeDevice != 0: + if fm&os.ModeCharDevice != 0 { + h.Mode |= c_ISCHR + h.Typeflag = TypeChar + } else { + h.Mode |= c_ISBLK + h.Typeflag = TypeBlock + } + case fm&os.ModeNamedPipe != 0: + h.Typeflag = TypeFifo + h.Mode |= c_ISFIFO + case fm&os.ModeSocket != 0: + h.Mode |= c_ISSOCK + default: + return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + } + if fm&os.ModeSetuid != 0 { + h.Mode |= c_ISUID + } + if fm&os.ModeSetgid != 0 { + h.Mode |= c_ISGID + } + if fm&os.ModeSticky != 0 { + h.Mode |= c_ISVTX + } + if sysStat != nil { + return h, sysStat(fi, h) + } + return h, nil +} + var zeroBlock = make([]byte, blockSize) // POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. diff --git a/src/pkg/archive/tar/example_test.go b/src/pkg/archive/tar/example_test.go new file mode 100644 index 000000000..351eaa0e6 --- /dev/null +++ b/src/pkg/archive/tar/example_test.go @@ -0,0 +1,79 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar_test + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "log" + "os" +) + +func Example() { + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new tar archive. + tw := tar.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling licence."}, + } + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Size: int64(len(file.Body)), + } + if err := tw.WriteHeader(hdr); err != nil { + log.Fatalln(err) + } + if _, err := tw.Write([]byte(file.Body)); err != nil { + log.Fatalln(err) + } + } + // Make sure to check the error on Close. + if err := tw.Close(); err != nil { + log.Fatalln(err) + } + + // Open the tar archive for reading. + r := bytes.NewReader(buf.Bytes()) + tr := tar.NewReader(r) + + // Iterate through the files in the archive. + for { + hdr, err := tr.Next() + if err == io.EOF { + // end of tar archive + break + } + if err != nil { + log.Fatalln(err) + } + fmt.Printf("Contents of %s:\n", hdr.Name) + if _, err := io.Copy(os.Stdout, tr); err != nil { + log.Fatalln(err) + } + fmt.Println() + } + + // Output: + // Contents of readme.txt: + // This archive contains some text files. + // Contents of gopher.txt: + // Gopher names: + // George + // Geoffrey + // Gonzo + // Contents of todo.txt: + // Get animal handling licence. +} diff --git a/src/pkg/archive/tar/reader.go b/src/pkg/archive/tar/reader.go index 1b40af812..05f82a40d 100644 --- a/src/pkg/archive/tar/reader.go +++ b/src/pkg/archive/tar/reader.go @@ -14,6 +14,7 @@ import ( "io/ioutil" "os" "strconv" + "strings" "time" ) @@ -21,24 +22,12 @@ var ( ErrHeader = errors.New("archive/tar: invalid tar header") ) +const maxNanoSecondIntSize = 9 + // A Reader provides sequential access to the contents of a tar archive. // A tar archive consists of a sequence of files. // The Next method advances to the next file in the archive (including the first), // and then it can be treated as an io.Reader to access the file's data. -// -// Example: -// tr := tar.NewReader(r) -// for { -// hdr, err := tr.Next() -// if err == io.EOF { -// // end of tar archive -// break -// } -// if err != nil { -// // handle error -// } -// io.Copy(data, tr) -// } type Reader struct { r io.Reader err error @@ -55,13 +44,183 @@ func (tr *Reader) Next() (*Header, error) { if tr.err == nil { tr.skipUnread() } - if tr.err == nil { + if tr.err != nil { + return hdr, tr.err + } + hdr = tr.readHeader() + if hdr == nil { + return hdr, tr.err + } + // Check for PAX/GNU header. + switch hdr.Typeflag { + case TypeXHeader: + // PAX extended header + headers, err := parsePAX(tr) + if err != nil { + return nil, err + } + // We actually read the whole file, + // but this skips alignment padding + tr.skipUnread() hdr = tr.readHeader() + mergePAX(hdr, headers) + return hdr, nil + case TypeGNULongName: + // We have a GNU long name header. Its contents are the real file name. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + hdr, err := tr.Next() + hdr.Name = cString(realname) + return hdr, err + case TypeGNULongLink: + // We have a GNU long link header. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + hdr, err := tr.Next() + hdr.Linkname = cString(realname) + return hdr, err } return hdr, tr.err } -// Parse bytes as a NUL-terminated C-style string. +// mergePAX merges well known headers according to PAX standard. +// In general headers with the same name as those found +// in the header struct overwrite those found in the header +// struct with higher precision or longer values. Esp. useful +// for name and linkname fields. +func mergePAX(hdr *Header, headers map[string]string) error { + for k, v := range headers { + switch k { + case "path": + hdr.Name = v + case "linkpath": + hdr.Linkname = v + case "gname": + hdr.Gname = v + case "uname": + hdr.Uname = v + case "uid": + uid, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Uid = int(uid) + case "gid": + gid, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Gid = int(gid) + case "atime": + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.AccessTime = t + case "mtime": + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.ModTime = t + case "ctime": + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.ChangeTime = t + case "size": + size, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Size = int64(size) + } + + } + return nil +} + +// parsePAXTime takes a string of the form %d.%d as described in +// the PAX specification. +func parsePAXTime(t string) (time.Time, error) { + buf := []byte(t) + pos := bytes.IndexByte(buf, '.') + var seconds, nanoseconds int64 + var err error + if pos == -1 { + seconds, err = strconv.ParseInt(t, 10, 0) + if err != nil { + return time.Time{}, err + } + } else { + seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) + if err != nil { + return time.Time{}, err + } + nano_buf := string(buf[pos+1:]) + // Pad as needed before converting to a decimal. + // For example .030 -> .030000000 -> 30000000 nanoseconds + if len(nano_buf) < maxNanoSecondIntSize { + // Right pad + nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) + } else if len(nano_buf) > maxNanoSecondIntSize { + // Right truncate + nano_buf = nano_buf[:maxNanoSecondIntSize] + } + nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) + if err != nil { + return time.Time{}, err + } + } + ts := time.Unix(seconds, nanoseconds) + return ts, nil +} + +// parsePAX parses PAX headers. +// If an extended header (type 'x') is invalid, ErrHeader is returned +func parsePAX(r io.Reader) (map[string]string, error) { + buf, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + headers := make(map[string]string) + // Each record is constructed as + // "%d %s=%s\n", length, keyword, value + for len(buf) > 0 { + // or the header was empty to start with. + var sp int + // The size field ends at the first space. + sp = bytes.IndexByte(buf, ' ') + if sp == -1 { + return nil, ErrHeader + } + // Parse the first token as a decimal integer. + n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) + if err != nil { + return nil, ErrHeader + } + // Extract everything between the decimal and the n -1 on the + // beginning to to eat the ' ', -1 on the end to skip the newline. + var record []byte + record, buf = buf[sp+1:n-1], buf[n:] + // The first equals is guaranteed to mark the end of the key. + // Everything else is value. + eq := bytes.IndexByte(record, '=') + if eq == -1 { + return nil, ErrHeader + } + key, value := record[:eq], record[eq+1:] + headers[string(key)] = string(value) + } + return headers, nil +} + +// cString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. func cString(b []byte) string { n := 0 @@ -72,6 +231,18 @@ func cString(b []byte) string { } func (tr *Reader) octal(b []byte) int64 { + // Check for binary format first. + if len(b) > 0 && b[0]&0x80 != 0 { + var x int64 + for i, c := range b { + if i == 0 { + c &= 0x7f // ignore signal bit in first byte + } + x = x<<8 | int64(c) + } + return x + } + // Removing leading spaces. for len(b) > 0 && b[0] == ' ' { b = b[1:] @@ -87,7 +258,7 @@ func (tr *Reader) octal(b []byte) int64 { return int64(x) } -// Skip any unread bytes in the existing file entry, as well as any alignment padding. +// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. func (tr *Reader) skipUnread() { nr := tr.nb + tr.pad // number of bytes to skip tr.nb, tr.pad = 0, 0 diff --git a/src/pkg/archive/tar/reader_test.go b/src/pkg/archive/tar/reader_test.go index 0a8646c39..9a1968237 100644 --- a/src/pkg/archive/tar/reader_test.go +++ b/src/pkg/archive/tar/reader_test.go @@ -10,6 +10,8 @@ import ( "fmt" "io" "os" + "reflect" + "strings" "testing" "time" ) @@ -108,6 +110,38 @@ var untarTests = []*untarTest{ }, }, }, + { + file: "testdata/pax.tar", + headers: []*Header{ + { + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + }, + { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + }, } func TestReader(t *testing.T) { @@ -133,7 +167,7 @@ testLoop: } hdr, err := tr.Next() if err == io.EOF { - break + continue testLoop } if hdr != nil || err != nil { t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) @@ -260,3 +294,73 @@ func TestNonSeekable(t *testing.T) { t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) } } + +func TestParsePAXHeader(t *testing.T) { + paxTests := [][3]string{ + {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths + {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length + {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}} + for _, test := range paxTests { + key, expected, raw := test[0], test[1], test[2] + reader := bytes.NewBuffer([]byte(raw)) + headers, err := parsePAX(reader) + if err != nil { + t.Errorf("Couldn't parse correctly formatted headers: %v", err) + continue + } + if strings.EqualFold(headers[key], expected) { + t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected) + continue + } + trailer := make([]byte, 100) + n, err := reader.Read(trailer) + if err != io.EOF || n != 0 { + t.Error("Buffer wasn't consumed") + } + } + badHeader := bytes.NewBuffer([]byte("3 somelongkey=")) + if _, err := parsePAX(badHeader); err != ErrHeader { + t.Fatal("Unexpected success when parsing bad header") + } +} + +func TestParsePAXTime(t *testing.T) { + // Some valid PAX time values + timestamps := map[string]time.Time{ + "1350244992.023960108": time.Unix(1350244992, 23960108), // The commoon case + "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value + "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value + "1350244992": time.Unix(1350244992, 0), // Low precision value + } + for input, expected := range timestamps { + ts, err := parsePAXTime(input) + if err != nil { + t.Fatal(err) + } + if !ts.Equal(expected) { + t.Fatalf("Time parsing failure %s %s", ts, expected) + } + } +} + +func TestMergePAX(t *testing.T) { + hdr := new(Header) + // Test a string, integer, and time based value. + headers := map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + } + err := mergePAX(hdr, headers) + if err != nil { + t.Fatal(err) + } + want := &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + } + if !reflect.DeepEqual(hdr, want) { + t.Errorf("incorrect merge: got %+v, want %+v", hdr, want) + } +} diff --git a/src/pkg/archive/tar/stat_atim.go b/src/pkg/archive/tar/stat_atim.go new file mode 100644 index 000000000..6029b0871 --- /dev/null +++ b/src/pkg/archive/tar/stat_atim.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux openbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atim.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctim.Unix()) +} diff --git a/src/pkg/archive/tar/stat_atimespec.go b/src/pkg/archive/tar/stat_atimespec.go new file mode 100644 index 000000000..6f17dbe30 --- /dev/null +++ b/src/pkg/archive/tar/stat_atimespec.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd netbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atimespec.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctimespec.Unix()) +} diff --git a/src/pkg/archive/tar/stat_unix.go b/src/pkg/archive/tar/stat_unix.go new file mode 100644 index 000000000..92bc92424 --- /dev/null +++ b/src/pkg/archive/tar/stat_unix.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux darwin freebsd openbsd netbsd + +package tar + +import ( + "os" + "syscall" +) + +func init() { + sysStat = statUnix +} + +func statUnix(fi os.FileInfo, h *Header) error { + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + // TODO(bradfitz): populate username & group. os/user + // doesn't cache LookupId lookups, and lacks group + // lookup functions. + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + // TODO(bradfitz): major/minor device numbers? + return nil +} diff --git a/src/pkg/archive/tar/tar_test.go b/src/pkg/archive/tar/tar_test.go new file mode 100644 index 000000000..dd6310313 --- /dev/null +++ b/src/pkg/archive/tar/tar_test.go @@ -0,0 +1,271 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "io/ioutil" + "os" + "reflect" + "testing" + "time" +) + +func TestFileInfoHeader(t *testing.T) { + fi, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "small.txt"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Mode, int64(fi.Mode().Perm())|c_ISREG; g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(5); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } +} + +func TestFileInfoHeaderDir(t *testing.T) { + fi, err := os.Stat("testdata") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "testdata/"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + // Ignoring c_ISGID for golang.org/issue/4867 + if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm())|c_ISDIR; g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(0); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } +} + +func TestFileInfoHeaderSymlink(t *testing.T) { + h, err := FileInfoHeader(symlink{}, "some-target") + if err != nil { + t.Fatal(err) + } + if g, e := h.Name, "some-symlink"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Linkname, "some-target"; g != e { + t.Errorf("Linkname = %q; want %q", g, e) + } +} + +type symlink struct{} + +func (symlink) Name() string { return "some-symlink" } +func (symlink) Size() int64 { return 0 } +func (symlink) Mode() os.FileMode { return os.ModeSymlink } +func (symlink) ModTime() time.Time { return time.Time{} } +func (symlink) IsDir() bool { return false } +func (symlink) Sys() interface{} { return nil } + +func TestRoundTrip(t *testing.T) { + data := []byte("some file contents") + + var b bytes.Buffer + tw := NewWriter(&b) + hdr := &Header{ + Name: "file.txt", + Uid: 1 << 21, // too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now(), + } + // tar only supports second precision. + hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond) + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tw.WriteHeader: %v", err) + } + if _, err := tw.Write(data); err != nil { + t.Fatalf("tw.Write: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("tw.Close: %v", err) + } + + // Read it back. + tr := NewReader(&b) + rHdr, err := tr.Next() + if err != nil { + t.Fatalf("tr.Next: %v", err) + } + if !reflect.DeepEqual(rHdr, hdr) { + t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) + } + rData, err := ioutil.ReadAll(tr) + if err != nil { + t.Fatalf("Read: %v", err) + } + if !bytes.Equal(rData, data) { + t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) + } +} + +type headerRoundTripTest struct { + h *Header + fm os.FileMode +} + +func TestHeaderRoundTrip(t *testing.T) { + golden := []headerRoundTripTest{ + // regular file. + { + h: &Header{ + Name: "test.txt", + Mode: 0644 | c_ISREG, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, + }, + fm: 0644, + }, + // hard link. + { + h: &Header{ + Name: "hard.txt", + Mode: 0644 | c_ISLNK, + Size: 0, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, + }, + fm: 0644 | os.ModeSymlink, + }, + // symbolic link. + { + h: &Header{ + Name: "link.txt", + Mode: 0777 | c_ISLNK, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, + }, + fm: 0777 | os.ModeSymlink, + }, + // character device node. + { + h: &Header{ + Name: "dev/null", + Mode: 0666 | c_ISCHR, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, + }, + fm: 0666 | os.ModeDevice | os.ModeCharDevice, + }, + // block device node. + { + h: &Header{ + Name: "dev/sda", + Mode: 0660 | c_ISBLK, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, + }, + fm: 0660 | os.ModeDevice, + }, + // directory. + { + h: &Header{ + Name: "dir/", + Mode: 0755 | c_ISDIR, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, + }, + fm: 0755 | os.ModeDir, + }, + // fifo node. + { + h: &Header{ + Name: "dev/initctl", + Mode: 0600 | c_ISFIFO, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, + }, + fm: 0600 | os.ModeNamedPipe, + }, + // setuid. + { + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISREG | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, + }, + fm: 0755 | os.ModeSetuid, + }, + // setguid. + { + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISREG | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, + }, + fm: 0750 | os.ModeSetgid, + }, + // sticky. + { + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISREG | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, + }, + fm: 0600 | os.ModeSticky, + }, + } + + for i, g := range golden { + fi := g.h.FileInfo() + h2, err := FileInfoHeader(fi, "") + if err != nil { + t.Error(err) + continue + } + if got, want := h2.Name, g.h.Name; got != want { + t.Errorf("i=%d: Name: got %v, want %v", i, got, want) + } + if got, want := h2.Size, g.h.Size; got != want { + t.Errorf("i=%d: Size: got %v, want %v", i, got, want) + } + if got, want := h2.Mode, g.h.Mode; got != want { + t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) + } + if got, want := fi.Mode(), g.fm; got != want { + t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) + } + if got, want := h2.ModTime, g.h.ModTime; got != want { + t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) + } + if sysh, ok := fi.Sys().(*Header); !ok || sysh != g.h { + t.Errorf("i=%d: Sys didn't return original *Header", i) + } + } +} diff --git a/src/pkg/archive/tar/testdata/pax.tar b/src/pkg/archive/tar/testdata/pax.tar Binary files differnew file mode 100644 index 000000000..9bc24b658 --- /dev/null +++ b/src/pkg/archive/tar/testdata/pax.tar diff --git a/src/pkg/archive/tar/testdata/ustar.tar b/src/pkg/archive/tar/testdata/ustar.tar Binary files differnew file mode 100644 index 000000000..29679d9a3 --- /dev/null +++ b/src/pkg/archive/tar/testdata/ustar.tar diff --git a/src/pkg/archive/tar/writer.go b/src/pkg/archive/tar/writer.go index b2b7a58a1..d92dd06ea 100644 --- a/src/pkg/archive/tar/writer.go +++ b/src/pkg/archive/tar/writer.go @@ -8,33 +8,28 @@ package tar // - catch more errors (no first header, etc.) import ( + "bytes" "errors" "fmt" "io" + "os" + "path" "strconv" + "strings" + "time" ) var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") + errNameTooLong = errors.New("archive/tar: name too long") ) // A Writer provides sequential writing of a tar archive in POSIX.1 format. // A tar archive consists of a sequence of files. // Call WriteHeader to begin a new file, and then call Write to supply that file's data, // writing at most hdr.Size bytes in total. -// -// Example: -// tw := tar.NewWriter(w) -// hdr := new(Header) -// hdr.Size = length of data in bytes -// // populate other hdr fields as desired -// if err := tw.WriteHeader(hdr); err != nil { -// // handle error -// } -// io.Copy(tw, data) -// tw.Close() type Writer struct { w io.Writer err error @@ -110,6 +105,12 @@ func (tw *Writer) numeric(b []byte, x int64) { b[0] |= 0x80 // highest bit indicates binary format } +var ( + minTime = time.Unix(0, 0) + // There is room for 11 octal digits (33 bits) of mtime. + maxTime = minTime.Add((1<<33 - 1) * time.Second) +) + // WriteHeader writes hdr and prepares to accept the file's contents. // WriteHeader calls Flush if it is not the first header. // Calling after a Close will return ErrWriteAfterClose. @@ -123,34 +124,60 @@ func (tw *Writer) WriteHeader(hdr *Header) error { if tw.err != nil { return tw.err } - + // Decide whether or not to use PAX extensions + // TODO(shanemhansen): we might want to use PAX headers for + // subsecond time resolution, but for now let's just capture + // the long name/long symlink use case. + suffix := hdr.Name + prefix := "" + if len(hdr.Name) > fileNameSize || len(hdr.Linkname) > fileNameSize { + var err error + prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) + // Either we were unable to pack the long name into ustar format + // or the link name is too long; use PAX headers. + if err == errNameTooLong || len(hdr.Linkname) > fileNameSize { + if err := tw.writePAXHeader(hdr); err != nil { + return err + } + } else if err != nil { + return err + } + } tw.nb = int64(hdr.Size) tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two header := make([]byte, blockSize) s := slicer(header) + tw.cString(s.next(fileNameSize), suffix) - // TODO(dsymonds): handle names longer than 100 chars - copy(s.next(100), []byte(hdr.Name)) - - tw.octal(s.next(8), hdr.Mode) // 100:108 - tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116 - tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124 - tw.numeric(s.next(12), hdr.Size) // 124:136 - tw.numeric(s.next(12), hdr.ModTime.Unix()) // 136:148 - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 - tw.cString(s.next(100), hdr.Linkname) // linkname (157:257) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - tw.cString(s.next(32), hdr.Uname) // 265:297 - tw.cString(s.next(32), hdr.Gname) // 297:329 - tw.numeric(s.next(8), hdr.Devmajor) // 329:337 - tw.numeric(s.next(8), hdr.Devminor) // 337:345 + // Handle out of range ModTime carefully. + var modTime int64 + if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { + modTime = hdr.ModTime.Unix() + } + tw.octal(s.next(8), hdr.Mode) // 100:108 + tw.numeric(s.next(8), int64(hdr.Uid)) // 108:116 + tw.numeric(s.next(8), int64(hdr.Gid)) // 116:124 + tw.numeric(s.next(12), hdr.Size) // 124:136 + tw.numeric(s.next(12), modTime) // 136:148 + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 + tw.cString(s.next(100), hdr.Linkname) // linkname (157:257) + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + tw.cString(s.next(32), hdr.Uname) // 265:297 + tw.cString(s.next(32), hdr.Gname) // 297:329 + tw.numeric(s.next(8), hdr.Devmajor) // 329:337 + tw.numeric(s.next(8), hdr.Devminor) // 337:345 + tw.cString(s.next(155), prefix) // 345:500 // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { copy(header[257:265], []byte("ustar \x00")) } + // Use the ustar magic if we used ustar long names. + if len(prefix) > 0 { + copy(header[257:265], []byte("ustar\000")) + } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. @@ -168,6 +195,78 @@ func (tw *Writer) WriteHeader(hdr *Header) error { return tw.err } +// writeUSTARLongName splits a USTAR long name hdr.Name. +// name must be < 256 characters. errNameTooLong is returned +// if hdr.Name can't be split. The splitting heuristic +// is compatible with gnu tar. +func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { + length := len(name) + if length > fileNamePrefixSize+1 { + length = fileNamePrefixSize + 1 + } else if name[length-1] == '/' { + length-- + } + i := strings.LastIndex(name[:length], "/") + nlen := length - i - 1 + if i <= 0 || nlen > fileNameSize || nlen == 0 { + err = errNameTooLong + return + } + prefix, suffix = name[:i], name[i+1:] + return +} + +// writePaxHeader writes an extended pax header to the +// archive. +func (tw *Writer) writePAXHeader(hdr *Header) error { + // Prepare extended header + ext := new(Header) + ext.Typeflag = TypeXHeader + // Setting ModTime is required for reader parsing to + // succeed, and seems harmless enough. + ext.ModTime = hdr.ModTime + // The spec asks that we namespace our pseudo files + // with the current pid. + pid := os.Getpid() + dir, file := path.Split(hdr.Name) + ext.Name = path.Join(dir, + fmt.Sprintf("PaxHeaders.%d", pid), file)[0:100] + // Construct the body + var buf bytes.Buffer + if len(hdr.Name) > fileNameSize { + fmt.Fprint(&buf, paxHeader("path="+hdr.Name)) + } + if len(hdr.Linkname) > fileNameSize { + fmt.Fprint(&buf, paxHeader("linkpath="+hdr.Linkname)) + } + ext.Size = int64(len(buf.Bytes())) + if err := tw.WriteHeader(ext); err != nil { + return err + } + if _, err := tw.Write(buf.Bytes()); err != nil { + return err + } + if err := tw.Flush(); err != nil { + return err + } + return nil +} + +// paxHeader formats a single pax record, prefixing it with the appropriate length +func paxHeader(msg string) string { + const padding = 2 // Extra padding for space and newline + size := len(msg) + padding + size += len(strconv.Itoa(size)) + record := fmt.Sprintf("%d %s\n", size, msg) + if len(record) != size { + // Final adjustment if adding size increased + // the number of digits in size + size = len(record) + record = fmt.Sprintf("%d %s\n", size, msg) + } + return record +} + // Write writes to the current entry in the tar archive. // Write returns the error ErrWriteTooLong if more than // hdr.Size bytes are written after WriteHeader. diff --git a/src/pkg/archive/tar/writer_test.go b/src/pkg/archive/tar/writer_test.go index a214e57b9..4cf7c72af 100644 --- a/src/pkg/archive/tar/writer_test.go +++ b/src/pkg/archive/tar/writer_test.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "io/ioutil" + "os" "strings" "testing" "testing/iotest" @@ -101,6 +102,27 @@ var writerTests = []*writerTest{ }, }, }, + // This file was produced using gnu tar 1.17 + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + { + file: "testdata/ustar.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: strings.Repeat("longname/", 15) + "file.txt", + Mode: 0644, + Uid: 0765, + Gid: 024, + Size: 06, + ModTime: time.Unix(1360135598, 0), + Typeflag: '0', + Uname: "shane", + Gname: "staff", + }, + contents: "hello\n", + }, + }, + }, } // Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. @@ -180,3 +202,61 @@ testLoop: } } } + +func TestPax(t *testing.T) { + // Create an archive with a large name + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + // Force a PAX long name to be written + longName := strings.Repeat("ab", 100) + contents := strings.Repeat(" ", int(hdr.Size)) + hdr.Name = longName + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long file name") + } +} + +func TestPAXHeader(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + paxTests := [][2]string{ + {"name=/etc/hosts", "19 name=/etc/hosts\n"}, + {"a=b", "6 a=b\n"}, // Single digit length + {"a=names", "11 a=names\n"}, // Test case involving carries + {"name=" + longName, fmt.Sprintf("210 name=%s\n", longName)}, + {"name=" + medName, fmt.Sprintf("110 name=%s\n", medName)}} + + for _, test := range paxTests { + key, expected := test[0], test[1] + if result := paxHeader(key); result != expected { + t.Fatalf("paxHeader: got %s, expected %s", result, expected) + } + } +} |