1 files changed, 666 insertions, 0 deletions
diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go
new file mode 100644
index 000000000..79a83e59a
--- /dev/null
+++ b/src/cmd/internal/goobj/read.go
@@ -0,0 +1,666 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package goobj implements reading of Go object files and archives.
+//
+// TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
+// TODO(rsc): Decide the appropriate integer types for various fields.
+// TODO(rsc): Write tests. (File format still up in the air a little.)
+package goobj
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+)
+
+// A SymKind describes the kind of memory represented by a symbol.
+type SymKind int
+
+// This list is taken from include/link.h.
+
+// Defined SymKind values.
+// TODO(rsc): Give idiomatic Go names.
+// TODO(rsc): Reduce the number of symbol types in the object files.
+const (
+	_ SymKind = iota
+
+	// readonly, executable
+	STEXT
+	SELFRXSECT
+
+	// readonly, non-executable
+	STYPE
+	SSTRING
+	SGOSTRING
+	SGOFUNC
+	SRODATA
+	SFUNCTAB
+	STYPELINK
+	SSYMTAB // TODO: move to unmapped section
+	SPCLNTAB
+	SELFROSECT
+
+	// writable, non-executable
+	SMACHOPLT
+	SELFSECT
+	SMACHO // Mach-O __nl_symbol_ptr
+	SMACHOGOT
+	SNOPTRDATA
+	SINITARR
+	SDATA
+	SWINDOWS
+	SBSS
+	SNOPTRBSS
+	STLSBSS
+
+	// not mapped
+	SXREF
+	SMACHOSYMSTR
+	SMACHOSYMTAB
+	SMACHOINDIRECTPLT
+	SMACHOINDIRECTGOT
+	SFILE
+	SFILEPATH
+	SCONST
+	SDYNIMPORT
+	SHOSTOBJ
+)
+
+var symKindStrings = []string{
+	SBSS:              "SBSS",
+	SCONST:            "SCONST",
+	SDATA:             "SDATA",
+	SDYNIMPORT:        "SDYNIMPORT",
+	SELFROSECT:        "SELFROSECT",
+	SELFRXSECT:        "SELFRXSECT",
+	SELFSECT:          "SELFSECT",
+	SFILE:             "SFILE",
+	SFILEPATH:         "SFILEPATH",
+	SFUNCTAB:          "SFUNCTAB",
+	SGOFUNC:           "SGOFUNC",
+	SGOSTRING:         "SGOSTRING",
+	SHOSTOBJ:          "SHOSTOBJ",
+	SINITARR:          "SINITARR",
+	SMACHO:            "SMACHO",
+	SMACHOGOT:         "SMACHOGOT",
+	SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT",
+	SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT",
+	SMACHOPLT:         "SMACHOPLT",
+	SMACHOSYMSTR:      "SMACHOSYMSTR",
+	SMACHOSYMTAB:      "SMACHOSYMTAB",
+	SNOPTRBSS:         "SNOPTRBSS",
+	SNOPTRDATA:        "SNOPTRDATA",
+	SPCLNTAB:          "SPCLNTAB",
+	SRODATA:           "SRODATA",
+	SSTRING:           "SSTRING",
+	SSYMTAB:           "SSYMTAB",
+	STEXT:             "STEXT",
+	STLSBSS:           "STLSBSS",
+	STYPE:             "STYPE",
+	STYPELINK:         "STYPELINK",
+	SWINDOWS:          "SWINDOWS",
+	SXREF:             "SXREF",
+}
+
+func (k SymKind) String() string {
+	if k < 0 || int(k) >= len(symKindStrings) {
+		return fmt.Sprintf("SymKind(%d)", k)
+	}
+	return symKindStrings[k]
+}
+
+// A Sym is a named symbol in an object file.
+type Sym struct {
+	SymID         // symbol identifier (name and version)
+	Kind  SymKind // kind of symbol
+	DupOK bool    // are duplicate definitions okay?
+	Size  int     // size of corresponding data
+	Type  SymID   // symbol for Go type information
+	Data  Data    // memory image of symbol
+	Reloc []Reloc // relocations to apply to Data
+	Func  *Func   // additional data for functions
+}
+
+// A SymID - the combination of Name and Version - uniquely identifies
+// a symbol within a package.
+type SymID struct {
+	// Name is the name of a symbol.
+	Name string
+
+	// Version is zero for symbols with global visibility.
+	// Symbols with only file visibility (such as file-level static
+	// declarations in C) have a non-zero version distinguishing
+	// a symbol in one file from a symbol of the same name
+	// in another file
+	Version int
+}
+
+func (s SymID) String() string {
+	if s.Version == 0 {
+		return s.Name
+	}
+	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
+}
+
+// A Data is a reference to data stored in an object file.
+// It records the offset and size of the data, so that a client can
+// read the data only if necessary.
+type Data struct {
+	Offset int64
+	Size   int64
+}
+
+// A Reloc describes a relocation applied to a memory image to refer
+// to an address within a particular symbol.
+type Reloc struct {
+	// The bytes at [Offset, Offset+Size) within the memory image
+	// should be updated to refer to the address Add bytes after the start
+	// of the symbol Sym.
+	Offset int
+	Size   int
+	Sym    SymID
+	Add    int
+
+	// The Type records the form of address expected in the bytes
+	// described by the previous fields: absolute, PC-relative, and so on.
+	// TODO(rsc): The interpretation of Type is not exposed by this package.
+	Type int
+}
+
+// A Var describes a variable in a function stack frame: a declared
+// local variable, an input argument, or an output result.
+type Var struct {
+	// The combination of Name, Kind, and Offset uniquely
+	// identifies a variable in a function stack frame.
+	// Using fewer of these - in particular, using only Name - does not.
+	Name   string // Name of variable.
+	Kind   int    // TODO(rsc): Define meaning.
+	Offset int    // Frame offset. TODO(rsc): Define meaning.
+
+	Type SymID // Go type for variable.
+}
+
+// Func contains additional per-symbol information specific to functions.
+type Func struct {
+	Args     int        // size in bytes of argument frame: inputs and outputs
+	Frame    int        // size in bytes of local variable frame
+	Leaf     bool       // function omits save of link register (ARM)
+	NoSplit  bool       // function omits stack split prologue
+	Var      []Var      // detail about local variables
+	PCSP     Data       // PC → SP offset map
+	PCFile   Data       // PC → file number map (index into File)
+	PCLine   Data       // PC → line number map
+	PCData   []Data     // PC → runtime support data map
+	FuncData []FuncData // non-PC-specific runtime support data
+	File     []string   // paths indexed by PCFile
+}
+
+// TODO: Add PCData []byte and PCDataIter (similar to liblink).
+
+// A FuncData is a single function-specific data value.
+type FuncData struct {
+	Sym    SymID // symbol holding data
+	Offset int64 // offset into symbol for funcdata pointer
+}
+
+// A Package is a parsed Go object file or archive defining a Go package.
+type Package struct {
+	ImportPath string   // import path denoting this package
+	Imports    []string // packages imported by this package
+	Syms       []*Sym   // symbols defined by this package
+	MaxVersion int      // maximum Version in any SymID in Syms
+}
+
+var (
+	archiveHeader = []byte("!<arch>\n")
+	archiveMagic  = []byte("`\n")
+	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
+
+	errCorruptArchive   = errors.New("corrupt archive")
+	errTruncatedArchive = errors.New("truncated archive")
+	errNotArchive       = errors.New("unrecognized archive format")
+
+	errCorruptObject   = errors.New("corrupt object file")
+	errTruncatedObject = errors.New("truncated object file")
+	errNotObject       = errors.New("unrecognized object file format")
+)
+
+// An objReader is an object file reader.
+type objReader struct {
+	p         *Package
+	b         *bufio.Reader
+	f         io.ReadSeeker
+	err       error
+	offset    int64
+	limit     int64
+	tmp       [256]byte
+	pkg       string
+	pkgprefix string
+}
+
+// importPathToPrefix returns the prefix that will be used in the
+// final symbol table for the given import path.
+// We escape '%', '"', all control characters and non-ASCII bytes,
+// and any '.' after the final slash.
+//
+// See ../../../cmd/ld/lib.c:/^pathtoprefix and
+// ../../../cmd/gc/subr.c:/^pathtoprefix.
+func importPathToPrefix(s string) string {
+	// find index of last slash, if any, or else -1.
+	// used for determining whether an index is after the last slash.
+	slash := strings.LastIndex(s, "/")
+
+	// check for chars that need escaping
+	n := 0
+	for r := 0; r < len(s); r++ {
+		if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
+			n++
+		}
+	}
+
+	// quick exit
+	if n == 0 {
+		return s
+	}
+
+	// escape
+	const hex = "0123456789abcdef"
+	p := make([]byte, 0, len(s)+2*n)
+	for r := 0; r < len(s); r++ {
+		if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
+			p = append(p, '%', hex[c>>4], hex[c&0xF])
+		} else {
+			p = append(p, c)
+		}
+	}
+
+	return string(p)
+}
+
+// init initializes r to read package p from f.
+func (r *objReader) init(f io.ReadSeeker, p *Package) {
+	r.f = f
+	r.p = p
+	r.offset, _ = f.Seek(0, 1)
+	r.limit, _ = f.Seek(0, 2)
+	f.Seek(r.offset, 0)
+	r.b = bufio.NewReader(f)
+	r.pkgprefix = importPathToPrefix(p.ImportPath) + "."
+}
+
+// error records that an error occurred.
+// It returns only the first error, so that an error
+// caused by an earlier error does not discard information
+// about the earlier error.
+func (r *objReader) error(err error) error {
+	if r.err == nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		r.err = err
+	}
+	// panic("corrupt") // useful for debugging
+	return r.err
+}
+
+// readByte reads and returns a byte from the input file.
+// On I/O error or EOF, it records the error but returns byte 0.
+// A sequence of 0 bytes will eventually terminate any
+// parsing state in the object file. In particular, it ends the
+// reading of a varint.
+func (r *objReader) readByte() byte {
+	if r.err != nil {
+		return 0
+	}
+	if r.offset >= r.limit {
+		r.error(io.ErrUnexpectedEOF)
+		return 0
+	}
+	b, err := r.b.ReadByte()
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		r.error(err)
+		b = 0
+	} else {
+		r.offset++
+	}
+	return b
+}
+
+// read reads exactly len(b) bytes from the input file.
+// If an error occurs, read returns the error but also
+// records it, so it is safe for callers to ignore the result
+// as long as delaying the report is not a problem.
+func (r *objReader) readFull(b []byte) error {
+	if r.err != nil {
+		return r.err
+	}
+	if r.offset+int64(len(b)) > r.limit {
+		return r.error(io.ErrUnexpectedEOF)
+	}
+	n, err := io.ReadFull(r.b, b)
+	r.offset += int64(n)
+	if err != nil {
+		return r.error(err)
+	}
+	return nil
+}
+
+// readInt reads a zigzag varint from the input file.
+func (r *objReader) readInt() int {
+	var u uint64
+
+	for shift := uint(0); ; shift += 7 {
+		if shift >= 64 {
+			r.error(errCorruptObject)
+			return 0
+		}
+		c := r.readByte()
+		u |= uint64(c&0x7F) << shift
+		if c&0x80 == 0 {
+			break
+		}
+	}
+
+	v := int64(u>>1) ^ (int64(u) << 63 >> 63)
+	if int64(int(v)) != v {
+		r.error(errCorruptObject) // TODO
+		return 0
+	}
+	return int(v)
+}
+
+// readString reads a length-delimited string from the input file.
+func (r *objReader) readString() string {
+	n := r.readInt()
+	buf := make([]byte, n)
+	r.readFull(buf)
+	return string(buf)
+}
+
+// readSymID reads a SymID from the input file.
+func (r *objReader) readSymID() SymID {
+	name, vers := r.readString(), r.readInt()
+
+	// In a symbol name in an object file, "". denotes the
+	// prefix for the package in which the object file has been found.
+	// Expand it.
+	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
+
+	// An individual object file only records version 0 (extern) or 1 (static).
+	// To make static symbols unique across all files being read, we
+	// replace version 1 with the version corresponding to the current
+	// file number. The number is incremented on each call to parseObject.
+	if vers != 0 {
+		vers = r.p.MaxVersion
+	}
+
+	return SymID{name, vers}
+}
+
+// readData reads a data reference from the input file.
+func (r *objReader) readData() Data {
+	n := r.readInt()
+	d := Data{Offset: r.offset, Size: int64(n)}
+	r.skip(int64(n))
+	return d
+}
+
+// skip skips n bytes in the input.
+func (r *objReader) skip(n int64) {
+	if n < 0 {
+		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
+	}
+	if n < int64(len(r.tmp)) {
+		// Since the data is so small, a just reading from the buffered
+		// reader is better than flushing the buffer and seeking.
+		r.readFull(r.tmp[:n])
+	} else if n <= int64(r.b.Buffered()) {
+		// Even though the data is not small, it has already been read.
+		// Advance the buffer instead of seeking.
+		for n > int64(len(r.tmp)) {
+			r.readFull(r.tmp[:])
+			n -= int64(len(r.tmp))
+		}
+		r.readFull(r.tmp[:n])
+	} else {
+		// Seek, giving up buffered data.
+		_, err := r.f.Seek(r.offset+n, 0)
+		if err != nil {
+			r.error(err)
+		}
+		r.offset += n
+		r.b.Reset(r.f)
+	}
+}
+
+// Parse parses an object file or archive from r,
+// assuming that its import path is pkgpath.
+func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) {
+	if pkgpath == "" {
+		pkgpath = `""`
+	}
+	p := new(Package)
+	p.ImportPath = pkgpath
+
+	var rd objReader
+	rd.init(r, p)
+	err := rd.readFull(rd.tmp[:8])
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+
+	switch {
+	default:
+		return nil, errNotObject
+
+	case bytes.Equal(rd.tmp[:8], archiveHeader):
+		if err := rd.parseArchive(); err != nil {
+			return nil, err
+		}
+	case bytes.Equal(rd.tmp[:8], goobjHeader):
+		if err := rd.parseObject(goobjHeader); err != nil {
+			return nil, err
+		}
+	}
+
+	return p, nil
+}
+
+// trimSpace removes trailing spaces from b and returns the corresponding string.
+// This effectively parses the form used in archive headers.
+func trimSpace(b []byte) string {
+	return string(bytes.TrimRight(b, " "))
+}
+
+// parseArchive parses a Unix archive of Go object files.
+// TODO(rsc): Need to skip non-Go object files.
+// TODO(rsc): Maybe record table of contents in r.p so that
+// linker can avoid having code to parse archives too.
+func (r *objReader) parseArchive() error {
+	for r.offset < r.limit {
+		if err := r.readFull(r.tmp[:60]); err != nil {
+			return err
+		}
+		data := r.tmp[:60]
+
+		// Each file is preceded by this text header (slice indices in first column):
+		//	 0:16	name
+		//	16:28 date
+		//	28:34 uid
+		//	34:40 gid
+		//	40:48 mode
+		//	48:58 size
+		//	58:60 magic - `\n
+		// We only care about name, size, and magic.
+		// The fields are space-padded on the right.
+		// The size is in decimal.
+		// The file data - size bytes - follows the header.
+		// Headers are 2-byte aligned, so if size is odd, an extra padding
+		// byte sits between the file data and the next header.
+		// The file data that follows is padded to an even number of bytes:
+		// if size is odd, an extra padding byte is inserted betw the next header.
+		if len(data) < 60 {
+			return errTruncatedArchive
+		}
+		if !bytes.Equal(data[58:60], archiveMagic) {
+			return errCorruptArchive
+		}
+		name := trimSpace(data[0:16])
+		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
+		if err != nil {
+			return errCorruptArchive
+		}
+		data = data[60:]
+		fsize := size + size&1
+		if fsize < 0 || fsize < size {
+			return errCorruptArchive
+		}
+		switch name {
+		case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF":
+			r.skip(size)
+		default:
+			oldLimit := r.limit
+			r.limit = r.offset + size
+			if err := r.parseObject(nil); err != nil {
+				return fmt.Errorf("parsing archive member %q: %v", name, err)
+			}
+			r.skip(r.limit - r.offset)
+			r.limit = oldLimit
+		}
+		if size&1 != 0 {
+			r.skip(1)
+		}
+	}
+	return nil
+}
+
+// parseObject parses a single Go object file.
+// The prefix is the bytes already read from the file,
+// typically in order to detect that this is an object file.
+// The object file consists of a textual header ending in "\n!\n"
+// and then the part we want to parse begins.
+// The format of that part is defined in a comment at the top
+// of src/liblink/objfile.c.
+func (r *objReader) parseObject(prefix []byte) error {
+	// TODO(rsc): Maybe use prefix and the initial input to
+	// record the header line from the file, which would
+	// give the architecture and other version information.
+
+	r.p.MaxVersion++
+	var c1, c2, c3 byte
+	for {
+		c1, c2, c3 = c2, c3, r.readByte()
+		if c3 == 0 { // NUL or EOF, either is bad
+			return errCorruptObject
+		}
+		if c1 == '\n' && c2 == '!' && c3 == '\n' {
+			break
+		}
+	}
+
+	r.readFull(r.tmp[:8])
+	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) {
+		return r.error(errCorruptObject)
+	}
+
+	b := r.readByte()
+	if b != 1 {
+		return r.error(errCorruptObject)
+	}
+
+	// Direct package dependencies.
+	for {
+		s := r.readString()
+		if s == "" {
+			break
+		}
+		r.p.Imports = append(r.p.Imports, s)
+	}
+
+	// Symbols.
+	for {
+		if b := r.readByte(); b != 0xfe {
+			if b != 0xff {
+				return r.error(errCorruptObject)
+			}
+			break
+		}
+
+		typ := r.readInt()
+		s := &Sym{SymID: r.readSymID()}
+		r.p.Syms = append(r.p.Syms, s)
+		s.Kind = SymKind(typ)
+		flags := r.readInt()
+		s.DupOK = flags&1 != 0
+		s.Size = r.readInt()
+		s.Type = r.readSymID()
+		s.Data = r.readData()
+		s.Reloc = make([]Reloc, r.readInt())
+		for i := range s.Reloc {
+			rel := &s.Reloc[i]
+			rel.Offset = r.readInt()
+			rel.Size = r.readInt()
+			rel.Type = r.readInt()
+			rel.Add = r.readInt()
+			r.readInt() // Xadd - ignored
+			rel.Sym = r.readSymID()
+			r.readSymID() // Xsym - ignored
+		}
+
+		if s.Kind == STEXT {
+			f := new(Func)
+			s.Func = f
+			f.Args = r.readInt()
+			f.Frame = r.readInt()
+			flags := r.readInt()
+			f.Leaf = flags&1 != 0
+			f.NoSplit = r.readInt() != 0
+			f.Var = make([]Var, r.readInt())
+			for i := range f.Var {
+				v := &f.Var[i]
+				v.Name = r.readSymID().Name
+				v.Offset = r.readInt()
+				v.Kind = r.readInt()
+				v.Type = r.readSymID()
+			}
+
+			f.PCSP = r.readData()
+			f.PCFile = r.readData()
+			f.PCLine = r.readData()
+			f.PCData = make([]Data, r.readInt())
+			for i := range f.PCData {
+				f.PCData[i] = r.readData()
+			}
+			f.FuncData = make([]FuncData, r.readInt())
+			for i := range f.FuncData {
+				f.FuncData[i].Sym = r.readSymID()
+			}
+			for i := range f.FuncData {
+				f.FuncData[i].Offset = int64(r.readInt()) // TODO
+			}
+			f.File = make([]string, r.readInt())
+			for i := range f.File {
+				f.File[i] = r.readSymID().Name
+			}
+		}
+	}
+
+	r.readFull(r.tmp[:7])
+	if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) {
+		return r.error(errCorruptObject)
+	}
+
+	return nil
+}