From f154da9e12608589e8d5f0508f908a0c3e88a1bb Mon Sep 17 00:00:00 2001 From: Tianon Gravi Date: Thu, 15 Jan 2015 11:54:00 -0700 Subject: Imported Upstream version 1.4 --- src/cmd/internal/goobj/read.go | 666 ++ src/cmd/internal/goobj/read_test.go | 28 + src/cmd/internal/objfile/disasm.go | 248 + src/cmd/internal/objfile/elf.go | 104 + src/cmd/internal/objfile/goobj.go | 93 + src/cmd/internal/objfile/macho.go | 116 + src/cmd/internal/objfile/objfile.go | 94 + src/cmd/internal/objfile/pe.go | 201 + src/cmd/internal/objfile/plan9obj.go | 146 + src/cmd/internal/rsc.io/arm/armasm/Makefile | 2 + src/cmd/internal/rsc.io/arm/armasm/decode.go | 567 ++ src/cmd/internal/rsc.io/arm/armasm/decode_test.go | 69 + src/cmd/internal/rsc.io/arm/armasm/ext_test.go | 614 ++ src/cmd/internal/rsc.io/arm/armasm/gnu.go | 164 + src/cmd/internal/rsc.io/arm/armasm/inst.go | 438 + src/cmd/internal/rsc.io/arm/armasm/objdump_test.go | 258 + .../internal/rsc.io/arm/armasm/objdumpext_test.go | 260 + src/cmd/internal/rsc.io/arm/armasm/plan9x.go | 211 + src/cmd/internal/rsc.io/arm/armasm/tables.go | 9448 +++++++++++++++++++ .../internal/rsc.io/arm/armasm/testdata/Makefile | 5 + .../internal/rsc.io/arm/armasm/testdata/decode.txt | 306 + src/cmd/internal/rsc.io/x86/x86asm/Makefile | 3 + src/cmd/internal/rsc.io/x86/x86asm/decode.go | 1616 ++++ src/cmd/internal/rsc.io/x86/x86asm/decode_test.go | 71 + src/cmd/internal/rsc.io/x86/x86asm/ext_test.go | 811 ++ src/cmd/internal/rsc.io/x86/x86asm/gnu.go | 926 ++ src/cmd/internal/rsc.io/x86/x86asm/inst.go | 641 ++ src/cmd/internal/rsc.io/x86/x86asm/inst_test.go | 20 + src/cmd/internal/rsc.io/x86/x86asm/intel.go | 518 ++ src/cmd/internal/rsc.io/x86/x86asm/objdump_test.go | 383 + .../internal/rsc.io/x86/x86asm/objdumpext_test.go | 314 + .../internal/rsc.io/x86/x86asm/plan9ext_test.go | 120 + src/cmd/internal/rsc.io/x86/x86asm/plan9x.go | 346 + src/cmd/internal/rsc.io/x86/x86asm/plan9x_test.go | 54 + src/cmd/internal/rsc.io/x86/x86asm/tables.go | 9760 ++++++++++++++++++++ .../internal/rsc.io/x86/x86asm/testdata/Makefile | 12 + .../internal/rsc.io/x86/x86asm/testdata/decode.txt | 6731 ++++++++++++++ src/cmd/internal/rsc.io/x86/x86asm/xed_test.go | 211 + src/cmd/internal/rsc.io/x86/x86asm/xedext_test.go | 206 + 39 files changed, 36781 insertions(+) create mode 100644 src/cmd/internal/goobj/read.go create mode 100644 src/cmd/internal/goobj/read_test.go create mode 100644 src/cmd/internal/objfile/disasm.go create mode 100644 src/cmd/internal/objfile/elf.go create mode 100644 src/cmd/internal/objfile/goobj.go create mode 100644 src/cmd/internal/objfile/macho.go create mode 100644 src/cmd/internal/objfile/objfile.go create mode 100644 src/cmd/internal/objfile/pe.go create mode 100644 src/cmd/internal/objfile/plan9obj.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/Makefile create mode 100644 src/cmd/internal/rsc.io/arm/armasm/decode.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/decode_test.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/ext_test.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/gnu.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/inst.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/objdump_test.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/plan9x.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/tables.go create mode 100644 src/cmd/internal/rsc.io/arm/armasm/testdata/Makefile create mode 100644 src/cmd/internal/rsc.io/arm/armasm/testdata/decode.txt create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/Makefile create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/decode.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/decode_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/ext_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/gnu.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/inst.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/inst_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/intel.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/objdump_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/objdumpext_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/plan9ext_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/plan9x.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/plan9x_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/tables.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/testdata/Makefile create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/testdata/decode.txt create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/xed_test.go create mode 100644 src/cmd/internal/rsc.io/x86/x86asm/xedext_test.go (limited to 'src/cmd/internal') diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go new file mode 100644 index 000000000..79a83e59a --- /dev/null +++ b/src/cmd/internal/goobj/read.go @@ -0,0 +1,666 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package goobj implements reading of Go object files and archives. +// +// TODO(rsc): Decide where this package should live. (golang.org/issue/6932) +// TODO(rsc): Decide the appropriate integer types for various fields. +// TODO(rsc): Write tests. (File format still up in the air a little.) +package goobj + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "strconv" + "strings" +) + +// A SymKind describes the kind of memory represented by a symbol. +type SymKind int + +// This list is taken from include/link.h. + +// Defined SymKind values. +// TODO(rsc): Give idiomatic Go names. +// TODO(rsc): Reduce the number of symbol types in the object files. +const ( + _ SymKind = iota + + // readonly, executable + STEXT + SELFRXSECT + + // readonly, non-executable + STYPE + SSTRING + SGOSTRING + SGOFUNC + SRODATA + SFUNCTAB + STYPELINK + SSYMTAB // TODO: move to unmapped section + SPCLNTAB + SELFROSECT + + // writable, non-executable + SMACHOPLT + SELFSECT + SMACHO // Mach-O __nl_symbol_ptr + SMACHOGOT + SNOPTRDATA + SINITARR + SDATA + SWINDOWS + SBSS + SNOPTRBSS + STLSBSS + + // not mapped + SXREF + SMACHOSYMSTR + SMACHOSYMTAB + SMACHOINDIRECTPLT + SMACHOINDIRECTGOT + SFILE + SFILEPATH + SCONST + SDYNIMPORT + SHOSTOBJ +) + +var symKindStrings = []string{ + SBSS: "SBSS", + SCONST: "SCONST", + SDATA: "SDATA", + SDYNIMPORT: "SDYNIMPORT", + SELFROSECT: "SELFROSECT", + SELFRXSECT: "SELFRXSECT", + SELFSECT: "SELFSECT", + SFILE: "SFILE", + SFILEPATH: "SFILEPATH", + SFUNCTAB: "SFUNCTAB", + SGOFUNC: "SGOFUNC", + SGOSTRING: "SGOSTRING", + SHOSTOBJ: "SHOSTOBJ", + SINITARR: "SINITARR", + SMACHO: "SMACHO", + SMACHOGOT: "SMACHOGOT", + SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", + SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", + SMACHOPLT: "SMACHOPLT", + SMACHOSYMSTR: "SMACHOSYMSTR", + SMACHOSYMTAB: "SMACHOSYMTAB", + SNOPTRBSS: "SNOPTRBSS", + SNOPTRDATA: "SNOPTRDATA", + SPCLNTAB: "SPCLNTAB", + SRODATA: "SRODATA", + SSTRING: "SSTRING", + SSYMTAB: "SSYMTAB", + STEXT: "STEXT", + STLSBSS: "STLSBSS", + STYPE: "STYPE", + STYPELINK: "STYPELINK", + SWINDOWS: "SWINDOWS", + SXREF: "SXREF", +} + +func (k SymKind) String() string { + if k < 0 || int(k) >= len(symKindStrings) { + return fmt.Sprintf("SymKind(%d)", k) + } + return symKindStrings[k] +} + +// A Sym is a named symbol in an object file. +type Sym struct { + SymID // symbol identifier (name and version) + Kind SymKind // kind of symbol + DupOK bool // are duplicate definitions okay? + Size int // size of corresponding data + Type SymID // symbol for Go type information + Data Data // memory image of symbol + Reloc []Reloc // relocations to apply to Data + Func *Func // additional data for functions +} + +// A SymID - the combination of Name and Version - uniquely identifies +// a symbol within a package. +type SymID struct { + // Name is the name of a symbol. + Name string + + // Version is zero for symbols with global visibility. + // Symbols with only file visibility (such as file-level static + // declarations in C) have a non-zero version distinguishing + // a symbol in one file from a symbol of the same name + // in another file + Version int +} + +func (s SymID) String() string { + if s.Version == 0 { + return s.Name + } + return fmt.Sprintf("%s<%d>", s.Name, s.Version) +} + +// A Data is a reference to data stored in an object file. +// It records the offset and size of the data, so that a client can +// read the data only if necessary. +type Data struct { + Offset int64 + Size int64 +} + +// A Reloc describes a relocation applied to a memory image to refer +// to an address within a particular symbol. +type Reloc struct { + // The bytes at [Offset, Offset+Size) within the memory image + // should be updated to refer to the address Add bytes after the start + // of the symbol Sym. + Offset int + Size int + Sym SymID + Add int + + // The Type records the form of address expected in the bytes + // described by the previous fields: absolute, PC-relative, and so on. + // TODO(rsc): The interpretation of Type is not exposed by this package. + Type int +} + +// A Var describes a variable in a function stack frame: a declared +// local variable, an input argument, or an output result. +type Var struct { + // The combination of Name, Kind, and Offset uniquely + // identifies a variable in a function stack frame. + // Using fewer of these - in particular, using only Name - does not. + Name string // Name of variable. + Kind int // TODO(rsc): Define meaning. + Offset int // Frame offset. TODO(rsc): Define meaning. + + Type SymID // Go type for variable. +} + +// Func contains additional per-symbol information specific to functions. +type Func struct { + Args int // size in bytes of argument frame: inputs and outputs + Frame int // size in bytes of local variable frame + Leaf bool // function omits save of link register (ARM) + NoSplit bool // function omits stack split prologue + Var []Var // detail about local variables + PCSP Data // PC → SP offset map + PCFile Data // PC → file number map (index into File) + PCLine Data // PC → line number map + PCData []Data // PC → runtime support data map + FuncData []FuncData // non-PC-specific runtime support data + File []string // paths indexed by PCFile +} + +// TODO: Add PCData []byte and PCDataIter (similar to liblink). + +// A FuncData is a single function-specific data value. +type FuncData struct { + Sym SymID // symbol holding data + Offset int64 // offset into symbol for funcdata pointer +} + +// A Package is a parsed Go object file or archive defining a Go package. +type Package struct { + ImportPath string // import path denoting this package + Imports []string // packages imported by this package + Syms []*Sym // symbols defined by this package + MaxVersion int // maximum Version in any SymID in Syms +} + +var ( + archiveHeader = []byte("!\n") + archiveMagic = []byte("`\n") + goobjHeader = []byte("go objec") // truncated to size of archiveHeader + + errCorruptArchive = errors.New("corrupt archive") + errTruncatedArchive = errors.New("truncated archive") + errNotArchive = errors.New("unrecognized archive format") + + errCorruptObject = errors.New("corrupt object file") + errTruncatedObject = errors.New("truncated object file") + errNotObject = errors.New("unrecognized object file format") +) + +// An objReader is an object file reader. +type objReader struct { + p *Package + b *bufio.Reader + f io.ReadSeeker + err error + offset int64 + limit int64 + tmp [256]byte + pkg string + pkgprefix string +} + +// importPathToPrefix returns the prefix that will be used in the +// final symbol table for the given import path. +// We escape '%', '"', all control characters and non-ASCII bytes, +// and any '.' after the final slash. +// +// See ../../../cmd/ld/lib.c:/^pathtoprefix and +// ../../../cmd/gc/subr.c:/^pathtoprefix. +func importPathToPrefix(s string) string { + // find index of last slash, if any, or else -1. + // used for determining whether an index is after the last slash. + slash := strings.LastIndex(s, "/") + + // check for chars that need escaping + n := 0 + for r := 0; r < len(s); r++ { + if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { + n++ + } + } + + // quick exit + if n == 0 { + return s + } + + // escape + const hex = "0123456789abcdef" + p := make([]byte, 0, len(s)+2*n) + for r := 0; r < len(s); r++ { + if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { + p = append(p, '%', hex[c>>4], hex[c&0xF]) + } else { + p = append(p, c) + } + } + + return string(p) +} + +// init initializes r to read package p from f. +func (r *objReader) init(f io.ReadSeeker, p *Package) { + r.f = f + r.p = p + r.offset, _ = f.Seek(0, 1) + r.limit, _ = f.Seek(0, 2) + f.Seek(r.offset, 0) + r.b = bufio.NewReader(f) + r.pkgprefix = importPathToPrefix(p.ImportPath) + "." +} + +// error records that an error occurred. +// It returns only the first error, so that an error +// caused by an earlier error does not discard information +// about the earlier error. +func (r *objReader) error(err error) error { + if r.err == nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + } + // panic("corrupt") // useful for debugging + return r.err +} + +// readByte reads and returns a byte from the input file. +// On I/O error or EOF, it records the error but returns byte 0. +// A sequence of 0 bytes will eventually terminate any +// parsing state in the object file. In particular, it ends the +// reading of a varint. +func (r *objReader) readByte() byte { + if r.err != nil { + return 0 + } + if r.offset >= r.limit { + r.error(io.ErrUnexpectedEOF) + return 0 + } + b, err := r.b.ReadByte() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.error(err) + b = 0 + } else { + r.offset++ + } + return b +} + +// read reads exactly len(b) bytes from the input file. +// If an error occurs, read returns the error but also +// records it, so it is safe for callers to ignore the result +// as long as delaying the report is not a problem. +func (r *objReader) readFull(b []byte) error { + if r.err != nil { + return r.err + } + if r.offset+int64(len(b)) > r.limit { + return r.error(io.ErrUnexpectedEOF) + } + n, err := io.ReadFull(r.b, b) + r.offset += int64(n) + if err != nil { + return r.error(err) + } + return nil +} + +// readInt reads a zigzag varint from the input file. +func (r *objReader) readInt() int { + var u uint64 + + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + r.error(errCorruptObject) + return 0 + } + c := r.readByte() + u |= uint64(c&0x7F) << shift + if c&0x80 == 0 { + break + } + } + + v := int64(u>>1) ^ (int64(u) << 63 >> 63) + if int64(int(v)) != v { + r.error(errCorruptObject) // TODO + return 0 + } + return int(v) +} + +// readString reads a length-delimited string from the input file. +func (r *objReader) readString() string { + n := r.readInt() + buf := make([]byte, n) + r.readFull(buf) + return string(buf) +} + +// readSymID reads a SymID from the input file. +func (r *objReader) readSymID() SymID { + name, vers := r.readString(), r.readInt() + + // In a symbol name in an object file, "". denotes the + // prefix for the package in which the object file has been found. + // Expand it. + name = strings.Replace(name, `"".`, r.pkgprefix, -1) + + // An individual object file only records version 0 (extern) or 1 (static). + // To make static symbols unique across all files being read, we + // replace version 1 with the version corresponding to the current + // file number. The number is incremented on each call to parseObject. + if vers != 0 { + vers = r.p.MaxVersion + } + + return SymID{name, vers} +} + +// readData reads a data reference from the input file. +func (r *objReader) readData() Data { + n := r.readInt() + d := Data{Offset: r.offset, Size: int64(n)} + r.skip(int64(n)) + return d +} + +// skip skips n bytes in the input. +func (r *objReader) skip(n int64) { + if n < 0 { + r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) + } + if n < int64(len(r.tmp)) { + // Since the data is so small, a just reading from the buffered + // reader is better than flushing the buffer and seeking. + r.readFull(r.tmp[:n]) + } else if n <= int64(r.b.Buffered()) { + // Even though the data is not small, it has already been read. + // Advance the buffer instead of seeking. + for n > int64(len(r.tmp)) { + r.readFull(r.tmp[:]) + n -= int64(len(r.tmp)) + } + r.readFull(r.tmp[:n]) + } else { + // Seek, giving up buffered data. + _, err := r.f.Seek(r.offset+n, 0) + if err != nil { + r.error(err) + } + r.offset += n + r.b.Reset(r.f) + } +} + +// Parse parses an object file or archive from r, +// assuming that its import path is pkgpath. +func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { + if pkgpath == "" { + pkgpath = `""` + } + p := new(Package) + p.ImportPath = pkgpath + + var rd objReader + rd.init(r, p) + err := rd.readFull(rd.tmp[:8]) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + + switch { + default: + return nil, errNotObject + + case bytes.Equal(rd.tmp[:8], archiveHeader): + if err := rd.parseArchive(); err != nil { + return nil, err + } + case bytes.Equal(rd.tmp[:8], goobjHeader): + if err := rd.parseObject(goobjHeader); err != nil { + return nil, err + } + } + + return p, nil +} + +// trimSpace removes trailing spaces from b and returns the corresponding string. +// This effectively parses the form used in archive headers. +func trimSpace(b []byte) string { + return string(bytes.TrimRight(b, " ")) +} + +// parseArchive parses a Unix archive of Go object files. +// TODO(rsc): Need to skip non-Go object files. +// TODO(rsc): Maybe record table of contents in r.p so that +// linker can avoid having code to parse archives too. +func (r *objReader) parseArchive() error { + for r.offset < r.limit { + if err := r.readFull(r.tmp[:60]); err != nil { + return err + } + data := r.tmp[:60] + + // Each file is preceded by this text header (slice indices in first column): + // 0:16 name + // 16:28 date + // 28:34 uid + // 34:40 gid + // 40:48 mode + // 48:58 size + // 58:60 magic - `\n + // We only care about name, size, and magic. + // The fields are space-padded on the right. + // The size is in decimal. + // The file data - size bytes - follows the header. + // Headers are 2-byte aligned, so if size is odd, an extra padding + // byte sits between the file data and the next header. + // The file data that follows is padded to an even number of bytes: + // if size is odd, an extra padding byte is inserted betw the next header. + if len(data) < 60 { + return errTruncatedArchive + } + if !bytes.Equal(data[58:60], archiveMagic) { + return errCorruptArchive + } + name := trimSpace(data[0:16]) + size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) + if err != nil { + return errCorruptArchive + } + data = data[60:] + fsize := size + size&1 + if fsize < 0 || fsize < size { + return errCorruptArchive + } + switch name { + case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF": + r.skip(size) + default: + oldLimit := r.limit + r.limit = r.offset + size + if err := r.parseObject(nil); err != nil { + return fmt.Errorf("parsing archive member %q: %v", name, err) + } + r.skip(r.limit - r.offset) + r.limit = oldLimit + } + if size&1 != 0 { + r.skip(1) + } + } + return nil +} + +// parseObject parses a single Go object file. +// The prefix is the bytes already read from the file, +// typically in order to detect that this is an object file. +// The object file consists of a textual header ending in "\n!\n" +// and then the part we want to parse begins. +// The format of that part is defined in a comment at the top +// of src/liblink/objfile.c. +func (r *objReader) parseObject(prefix []byte) error { + // TODO(rsc): Maybe use prefix and the initial input to + // record the header line from the file, which would + // give the architecture and other version information. + + r.p.MaxVersion++ + var c1, c2, c3 byte + for { + c1, c2, c3 = c2, c3, r.readByte() + if c3 == 0 { // NUL or EOF, either is bad + return errCorruptObject + } + if c1 == '\n' && c2 == '!' && c3 == '\n' { + break + } + } + + r.readFull(r.tmp[:8]) + if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) { + return r.error(errCorruptObject) + } + + b := r.readByte() + if b != 1 { + return r.error(errCorruptObject) + } + + // Direct package dependencies. + for { + s := r.readString() + if s == "" { + break + } + r.p.Imports = append(r.p.Imports, s) + } + + // Symbols. + for { + if b := r.readByte(); b != 0xfe { + if b != 0xff { + return r.error(errCorruptObject) + } + break + } + + typ := r.readInt() + s := &Sym{SymID: r.readSymID()} + r.p.Syms = append(r.p.Syms, s) + s.Kind = SymKind(typ) + flags := r.readInt() + s.DupOK = flags&1 != 0 + s.Size = r.readInt() + s.Type = r.readSymID() + s.Data = r.readData() + s.Reloc = make([]Reloc, r.readInt()) + for i := range s.Reloc { + rel := &s.Reloc[i] + rel.Offset = r.readInt() + rel.Size = r.readInt() + rel.Type = r.readInt() + rel.Add = r.readInt() + r.readInt() // Xadd - ignored + rel.Sym = r.readSymID() + r.readSymID() // Xsym - ignored + } + + if s.Kind == STEXT { + f := new(Func) + s.Func = f + f.Args = r.readInt() + f.Frame = r.readInt() + flags := r.readInt() + f.Leaf = flags&1 != 0 + f.NoSplit = r.readInt() != 0 + f.Var = make([]Var, r.readInt()) + for i := range f.Var { + v := &f.Var[i] + v.Name = r.readSymID().Name + v.Offset = r.readInt() + v.Kind = r.readInt() + v.Type = r.readSymID() + } + + f.PCSP = r.readData() + f.PCFile = r.readData() + f.PCLine = r.readData() + f.PCData = make([]Data, r.readInt()) + for i := range f.PCData { + f.PCData[i] = r.readData() + } + f.FuncData = make([]FuncData, r.readInt()) + for i := range f.FuncData { + f.FuncData[i].Sym = r.readSymID() + } + for i := range f.FuncData { + f.FuncData[i].Offset = int64(r.readInt()) // TODO + } + f.File = make([]string, r.readInt()) + for i := range f.File { + f.File[i] = r.readSymID().Name + } + } + } + + r.readFull(r.tmp[:7]) + if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) { + return r.error(errCorruptObject) + } + + return nil +} diff --git a/src/cmd/internal/goobj/read_test.go b/src/cmd/internal/goobj/read_test.go new file mode 100644 index 000000000..cc991e5d9 --- /dev/null +++ b/src/cmd/internal/goobj/read_test.go @@ -0,0 +1,28 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package goobj + +import "testing" + +var importPathToPrefixTests = []struct { + in string + out string +}{ + {"runtime", "runtime"}, + {"sync/atomic", "sync/atomic"}, + {"golang.org/x/tools/godoc", "golang.org/x/tools/godoc"}, + {"foo.bar/baz.quux", "foo.bar/baz%2equux"}, + {"", ""}, + {"%foo%bar", "%25foo%25bar"}, + {"\x01\x00\x7F☺", "%01%00%7f%e2%98%ba"}, +} + +func TestImportPathToPrefix(t *testing.T) { + for _, tt := range importPathToPrefixTests { + if out := importPathToPrefix(tt.in); out != tt.out { + t.Errorf("importPathToPrefix(%q) = %q, want %q", tt.in, out, tt.out) + } + } +} diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go new file mode 100644 index 000000000..1a339c321 --- /dev/null +++ b/src/cmd/internal/objfile/disasm.go @@ -0,0 +1,248 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package objfile + +import ( + "bufio" + "debug/gosym" + "encoding/binary" + "fmt" + "io" + "regexp" + "sort" + "strings" + "text/tabwriter" + + "cmd/internal/rsc.io/arm/armasm" + "cmd/internal/rsc.io/x86/x86asm" +) + +// Disasm is a disassembler for a given File. +type Disasm struct { + syms []Sym //symbols in file, sorted by address + pcln *gosym.Table // pcln table + text []byte // bytes of text segment (actual instructions) + textStart uint64 // start PC of text + textEnd uint64 // end PC of text + goarch string // GOARCH string + disasm disasmFunc // disassembler function for goarch + byteOrder binary.ByteOrder // byte order for goarch +} + +// Disasm returns a disassembler for the file f. +func (f *File) Disasm() (*Disasm, error) { + syms, err := f.Symbols() + if err != nil { + return nil, err + } + + pcln, err := f.PCLineTable() + if err != nil { + return nil, err + } + + textStart, textBytes, err := f.Text() + if err != nil { + return nil, err + } + + goarch := f.GOARCH() + disasm := disasms[goarch] + byteOrder := byteOrders[goarch] + if disasm == nil || byteOrder == nil { + return nil, fmt.Errorf("unsupported architecture") + } + + // Filter out section symbols, overwriting syms in place. + keep := syms[:0] + for _, sym := range syms { + switch sym.Name { + case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext": + // drop + default: + keep = append(keep, sym) + } + } + syms = keep + d := &Disasm{ + syms: syms, + pcln: pcln, + text: textBytes, + textStart: textStart, + textEnd: textStart + uint64(len(textBytes)), + goarch: goarch, + disasm: disasm, + byteOrder: byteOrder, + } + + return d, nil +} + +// lookup finds the symbol name containing addr. +func (d *Disasm) lookup(addr uint64) (name string, base uint64) { + i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr }) + if i > 0 { + s := d.syms[i-1] + if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) { + return s.Name, s.Addr + } + } + return "", 0 +} + +// base returns the final element in the path. +// It works on both Windows and Unix paths, +// regardless of host operating system. +func base(path string) string { + path = path[strings.LastIndex(path, "/")+1:] + path = path[strings.LastIndex(path, `\`)+1:] + return path +} + +// Print prints a disassembly of the file to w. +// If filter is non-nil, the disassembly only includes functions with names matching filter. +// The disassembly only includes functions that overlap the range [start, end). +func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { + if start < d.textStart { + start = d.textStart + } + if end > d.textEnd { + end = d.textEnd + } + printed := false + bw := bufio.NewWriter(w) + for _, sym := range d.syms { + symStart := sym.Addr + symEnd := sym.Addr + uint64(sym.Size) + if sym.Code != 'T' && sym.Code != 't' || + symStart < d.textStart || + symEnd <= start || end <= symStart || + filter != nil && !filter.MatchString(sym.Name) { + continue + } + if printed { + fmt.Fprintf(bw, "\n") + } + printed = true + + file, _, _ := d.pcln.PCToLine(sym.Addr) + fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file) + + tw := tabwriter.NewWriter(bw, 1, 8, 1, '\t', 0) + if symEnd > end { + symEnd = end + } + code := d.text[:end-d.textStart] + d.Decode(symStart, symEnd, func(pc, size uint64, file string, line int, text string) { + i := pc - d.textStart + fmt.Fprintf(tw, "\t%s:%d\t%#x\t", base(file), line, pc) + if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" { + // Print instruction as bytes. + fmt.Fprintf(tw, "%x", code[i:i+size]) + } else { + // Print instruction as 32-bit words. + for j := uint64(0); j < size; j += 4 { + if j > 0 { + fmt.Fprintf(tw, " ") + } + fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:])) + } + } + fmt.Fprintf(tw, "\t%s\n", text) + }) + tw.Flush() + } + bw.Flush() +} + +// Decode disassembles the text segment range [start, end), calling f for each instruction. +func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, line int, text string)) { + if start < d.textStart { + start = d.textStart + } + if end > d.textEnd { + end = d.textEnd + } + code := d.text[:end-d.textStart] + lookup := d.lookup + for pc := start; pc < end; { + i := pc - d.textStart + text, size := d.disasm(code[i:], pc, lookup) + file, line, _ := d.pcln.PCToLine(pc) + f(pc, uint64(size), file, line, text) + pc += uint64(size) + } +} + +type lookupFunc func(addr uint64) (sym string, base uint64) +type disasmFunc func(code []byte, pc uint64, lookup lookupFunc) (text string, size int) + +func disasm_386(code []byte, pc uint64, lookup lookupFunc) (string, int) { + return disasm_x86(code, pc, lookup, 32) +} + +func disasm_amd64(code []byte, pc uint64, lookup lookupFunc) (string, int) { + return disasm_x86(code, pc, lookup, 64) +} + +func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int) (string, int) { + inst, err := x86asm.Decode(code, 64) + var text string + size := inst.Len + if err != nil || size == 0 || inst.Op == 0 { + size = 1 + text = "?" + } else { + text = x86asm.Plan9Syntax(inst, pc, lookup) + } + return text, size +} + +type textReader struct { + code []byte + pc uint64 +} + +func (r textReader) ReadAt(data []byte, off int64) (n int, err error) { + if off < 0 || uint64(off) < r.pc { + return 0, io.EOF + } + d := uint64(off) - r.pc + if d >= uint64(len(r.code)) { + return 0, io.EOF + } + n = copy(data, r.code[d:]) + if n < len(data) { + err = io.ErrUnexpectedEOF + } + return +} + +func disasm_arm(code []byte, pc uint64, lookup lookupFunc) (string, int) { + inst, err := armasm.Decode(code, armasm.ModeARM) + var text string + size := inst.Len + if err != nil || size == 0 || inst.Op == 0 { + size = 4 + text = "?" + } else { + text = armasm.Plan9Syntax(inst, pc, lookup, textReader{code, pc}) + } + return text, size +} + +var disasms = map[string]disasmFunc{ + "386": disasm_386, + "amd64": disasm_amd64, + "arm": disasm_arm, +} + +var byteOrders = map[string]binary.ByteOrder{ + "386": binary.LittleEndian, + "amd64": binary.LittleEndian, + "arm": binary.LittleEndian, + "power64": binary.BigEndian, + "power64le": binary.LittleEndian, +} diff --git a/src/cmd/internal/objfile/elf.go b/src/cmd/internal/objfile/elf.go new file mode 100644 index 000000000..17755b84d --- /dev/null +++ b/src/cmd/internal/objfile/elf.go @@ -0,0 +1,104 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parsing of ELF executables (Linux, FreeBSD, and so on). + +package objfile + +import ( + "debug/elf" + "fmt" + "os" +) + +type elfFile struct { + elf *elf.File +} + +func openElf(r *os.File) (rawFile, error) { + f, err := elf.NewFile(r) + if err != nil { + return nil, err + } + return &elfFile{f}, nil +} + +func (f *elfFile) symbols() ([]Sym, error) { + elfSyms, err := f.elf.Symbols() + if err != nil { + return nil, err + } + + var syms []Sym + for _, s := range elfSyms { + sym := Sym{Addr: s.Value, Name: s.Name, Size: int64(s.Size), Code: '?'} + switch s.Section { + case elf.SHN_UNDEF: + sym.Code = 'U' + case elf.SHN_COMMON: + sym.Code = 'B' + default: + i := int(s.Section) + if i < 0 || i >= len(f.elf.Sections) { + break + } + sect := f.elf.Sections[i] + switch sect.Flags & (elf.SHF_WRITE | elf.SHF_ALLOC | elf.SHF_EXECINSTR) { + case elf.SHF_ALLOC | elf.SHF_EXECINSTR: + sym.Code = 'T' + case elf.SHF_ALLOC: + sym.Code = 'R' + case elf.SHF_ALLOC | elf.SHF_WRITE: + sym.Code = 'D' + } + } + if elf.ST_BIND(s.Info) == elf.STB_LOCAL { + sym.Code += 'a' - 'A' + } + syms = append(syms, sym) + } + + return syms, nil +} + +func (f *elfFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + if sect := f.elf.Section(".text"); sect != nil { + textStart = sect.Addr + } + if sect := f.elf.Section(".gosymtab"); sect != nil { + if symtab, err = sect.Data(); err != nil { + return 0, nil, nil, err + } + } + if sect := f.elf.Section(".gopclntab"); sect != nil { + if pclntab, err = sect.Data(); err != nil { + return 0, nil, nil, err + } + } + return textStart, symtab, pclntab, nil +} + +func (f *elfFile) text() (textStart uint64, text []byte, err error) { + sect := f.elf.Section(".text") + if sect == nil { + return 0, nil, fmt.Errorf("text section not found") + } + textStart = sect.Addr + text, err = sect.Data() + return +} + +func (f *elfFile) goarch() string { + switch f.elf.Machine { + case elf.EM_386: + return "386" + case elf.EM_X86_64: + return "amd64" + case elf.EM_ARM: + return "arm" + case elf.EM_PPC64: + return "power64" + } + return "" +} diff --git a/src/cmd/internal/objfile/goobj.go b/src/cmd/internal/objfile/goobj.go new file mode 100644 index 000000000..6b1607a17 --- /dev/null +++ b/src/cmd/internal/objfile/goobj.go @@ -0,0 +1,93 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parsing of Go intermediate object files and archives. + +package objfile + +import ( + "cmd/internal/goobj" + "fmt" + "os" +) + +type goobjFile struct { + goobj *goobj.Package +} + +func openGoobj(r *os.File) (rawFile, error) { + f, err := goobj.Parse(r, `""`) + if err != nil { + return nil, err + } + return &goobjFile{f}, nil +} + +func goobjName(id goobj.SymID) string { + if id.Version == 0 { + return id.Name + } + return fmt.Sprintf("%s<%d>", id.Name, id.Version) +} + +func (f *goobjFile) symbols() ([]Sym, error) { + seen := make(map[goobj.SymID]bool) + + var syms []Sym + for _, s := range f.goobj.Syms { + seen[s.SymID] = true + sym := Sym{Addr: uint64(s.Data.Offset), Name: goobjName(s.SymID), Size: int64(s.Size), Type: s.Type.Name, Code: '?'} + switch s.Kind { + case goobj.STEXT, goobj.SELFRXSECT: + sym.Code = 'T' + case goobj.STYPE, goobj.SSTRING, goobj.SGOSTRING, goobj.SGOFUNC, goobj.SRODATA, goobj.SFUNCTAB, goobj.STYPELINK, goobj.SSYMTAB, goobj.SPCLNTAB, goobj.SELFROSECT: + sym.Code = 'R' + case goobj.SMACHOPLT, goobj.SELFSECT, goobj.SMACHO, goobj.SMACHOGOT, goobj.SNOPTRDATA, goobj.SINITARR, goobj.SDATA, goobj.SWINDOWS: + sym.Code = 'D' + case goobj.SBSS, goobj.SNOPTRBSS, goobj.STLSBSS: + sym.Code = 'B' + case goobj.SXREF, goobj.SMACHOSYMSTR, goobj.SMACHOSYMTAB, goobj.SMACHOINDIRECTPLT, goobj.SMACHOINDIRECTGOT, goobj.SFILE, goobj.SFILEPATH, goobj.SCONST, goobj.SDYNIMPORT, goobj.SHOSTOBJ: + sym.Code = 'X' // should not see + } + if s.Version != 0 { + sym.Code += 'a' - 'A' + } + syms = append(syms, sym) + } + + for _, s := range f.goobj.Syms { + for _, r := range s.Reloc { + if !seen[r.Sym] { + seen[r.Sym] = true + sym := Sym{Name: goobjName(r.Sym), Code: 'U'} + if s.Version != 0 { + // should not happen but handle anyway + sym.Code = 'u' + } + syms = append(syms, sym) + } + } + } + + return syms, nil +} + +// pcln does not make sense for Go object files, because each +// symbol has its own individual pcln table, so there is no global +// space of addresses to map. +func (f *goobjFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + return 0, nil, nil, fmt.Errorf("pcln not available in go object file") +} + +// text does not make sense for Go object files, because +// each function has a separate section. +func (f *goobjFile) text() (textStart uint64, text []byte, err error) { + return 0, nil, fmt.Errorf("text not available in go object file") +} + +// goarch makes sense but is not exposed in debug/goobj's API, +// and we don't need it yet for any users of internal/objfile. +func (f *goobjFile) goarch() string { + return "GOARCH unimplemented for debug/goobj files" +} diff --git a/src/cmd/internal/objfile/macho.go b/src/cmd/internal/objfile/macho.go new file mode 100644 index 000000000..7dd84a339 --- /dev/null +++ b/src/cmd/internal/objfile/macho.go @@ -0,0 +1,116 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parsing of Mach-O executables (OS X). + +package objfile + +import ( + "debug/macho" + "fmt" + "os" + "sort" +) + +type machoFile struct { + macho *macho.File +} + +func openMacho(r *os.File) (rawFile, error) { + f, err := macho.NewFile(r) + if err != nil { + return nil, err + } + return &machoFile{f}, nil +} + +func (f *machoFile) symbols() ([]Sym, error) { + if f.macho.Symtab == nil { + return nil, fmt.Errorf("missing symbol table") + } + + // Build sorted list of addresses of all symbols. + // We infer the size of a symbol by looking at where the next symbol begins. + var addrs []uint64 + for _, s := range f.macho.Symtab.Syms { + addrs = append(addrs, s.Value) + } + sort.Sort(uint64s(addrs)) + + var syms []Sym + for _, s := range f.macho.Symtab.Syms { + sym := Sym{Name: s.Name, Addr: s.Value, Code: '?'} + i := sort.Search(len(addrs), func(x int) bool { return addrs[x] > s.Value }) + if i < len(addrs) { + sym.Size = int64(addrs[i] - s.Value) + } + if s.Sect == 0 { + sym.Code = 'U' + } else if int(s.Sect) <= len(f.macho.Sections) { + sect := f.macho.Sections[s.Sect-1] + switch sect.Seg { + case "__TEXT": + sym.Code = 'R' + case "__DATA": + sym.Code = 'D' + } + switch sect.Seg + " " + sect.Name { + case "__TEXT __text": + sym.Code = 'T' + case "__DATA __bss", "__DATA __noptrbss": + sym.Code = 'B' + } + } + syms = append(syms, sym) + } + + return syms, nil +} + +func (f *machoFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + if sect := f.macho.Section("__text"); sect != nil { + textStart = sect.Addr + } + if sect := f.macho.Section("__gosymtab"); sect != nil { + if symtab, err = sect.Data(); err != nil { + return 0, nil, nil, err + } + } + if sect := f.macho.Section("__gopclntab"); sect != nil { + if pclntab, err = sect.Data(); err != nil { + return 0, nil, nil, err + } + } + return textStart, symtab, pclntab, nil +} + +func (f *machoFile) text() (textStart uint64, text []byte, err error) { + sect := f.macho.Section("__text") + if sect == nil { + return 0, nil, fmt.Errorf("text section not found") + } + textStart = sect.Addr + text, err = sect.Data() + return +} + +func (f *machoFile) goarch() string { + switch f.macho.Cpu { + case macho.Cpu386: + return "386" + case macho.CpuAmd64: + return "amd64" + case macho.CpuArm: + return "arm" + case macho.CpuPpc64: + return "power64" + } + return "" +} + +type uint64s []uint64 + +func (x uint64s) Len() int { return len(x) } +func (x uint64s) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x uint64s) Less(i, j int) bool { return x[i] < x[j] } diff --git a/src/cmd/internal/objfile/objfile.go b/src/cmd/internal/objfile/objfile.go new file mode 100644 index 000000000..9227ef387 --- /dev/null +++ b/src/cmd/internal/objfile/objfile.go @@ -0,0 +1,94 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package objfile implements portable access to OS-specific executable files. +package objfile + +import ( + "debug/gosym" + "fmt" + "os" + "sort" +) + +type rawFile interface { + symbols() (syms []Sym, err error) + pcln() (textStart uint64, symtab, pclntab []byte, err error) + text() (textStart uint64, text []byte, err error) + goarch() string +} + +// A File is an opened executable file. +type File struct { + r *os.File + raw rawFile +} + +// A Sym is a symbol defined in an executable file. +type Sym struct { + Name string // symbol name + Addr uint64 // virtual address of symbol + Size int64 // size in bytes + Code rune // nm code (T for text, D for data, and so on) + Type string // XXX? +} + +var openers = []func(*os.File) (rawFile, error){ + openElf, + openGoobj, + openMacho, + openPE, + openPlan9, +} + +// Open opens the named file. +// The caller must call f.Close when the file is no longer needed. +func Open(name string) (*File, error) { + r, err := os.Open(name) + if err != nil { + return nil, err + } + for _, try := range openers { + if raw, err := try(r); err == nil { + return &File{r, raw}, nil + } + } + r.Close() + return nil, fmt.Errorf("open %s: unrecognized object file", name) +} + +func (f *File) Close() error { + return f.r.Close() +} + +func (f *File) Symbols() ([]Sym, error) { + syms, err := f.raw.symbols() + if err != nil { + return nil, err + } + sort.Sort(byAddr(syms)) + return syms, nil +} + +type byAddr []Sym + +func (x byAddr) Less(i, j int) bool { return x[i].Addr < x[j].Addr } +func (x byAddr) Len() int { return len(x) } +func (x byAddr) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (f *File) PCLineTable() (*gosym.Table, error) { + textStart, symtab, pclntab, err := f.raw.pcln() + if err != nil { + return nil, err + } + return gosym.NewTable(symtab, gosym.NewLineTable(pclntab, textStart)) +} + +func (f *File) Text() (uint64, []byte, error) { + return f.raw.text() +} + +func (f *File) GOARCH() string { + return f.raw.goarch() +} diff --git a/src/cmd/internal/objfile/pe.go b/src/cmd/internal/objfile/pe.go new file mode 100644 index 000000000..67e59c226 --- /dev/null +++ b/src/cmd/internal/objfile/pe.go @@ -0,0 +1,201 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parsing of PE executables (Microsoft Windows). + +package objfile + +import ( + "debug/pe" + "fmt" + "os" + "sort" +) + +type peFile struct { + pe *pe.File +} + +func openPE(r *os.File) (rawFile, error) { + f, err := pe.NewFile(r) + if err != nil { + return nil, err + } + switch f.OptionalHeader.(type) { + case *pe.OptionalHeader32, *pe.OptionalHeader64: + // ok + default: + return nil, fmt.Errorf("unrecognized PE format") + } + return &peFile{f}, nil +} + +func (f *peFile) symbols() ([]Sym, error) { + // Build sorted list of addresses of all symbols. + // We infer the size of a symbol by looking at where the next symbol begins. + var addrs []uint64 + + var imageBase uint64 + switch oh := f.pe.OptionalHeader.(type) { + case *pe.OptionalHeader32: + imageBase = uint64(oh.ImageBase) + case *pe.OptionalHeader64: + imageBase = oh.ImageBase + } + + var syms []Sym + for _, s := range f.pe.Symbols { + const ( + N_UNDEF = 0 // An undefined (extern) symbol + N_ABS = -1 // An absolute symbol (e_value is a constant, not an address) + N_DEBUG = -2 // A debugging symbol + ) + sym := Sym{Name: s.Name, Addr: uint64(s.Value), Code: '?'} + switch s.SectionNumber { + case N_UNDEF: + sym.Code = 'U' + case N_ABS: + sym.Code = 'C' + case N_DEBUG: + sym.Code = '?' + default: + if s.SectionNumber < 0 || len(f.pe.Sections) < int(s.SectionNumber) { + return nil, fmt.Errorf("invalid section number in symbol table") + } + sect := f.pe.Sections[s.SectionNumber-1] + const ( + text = 0x20 + data = 0x40 + bss = 0x80 + permX = 0x20000000 + permR = 0x40000000 + permW = 0x80000000 + ) + ch := sect.Characteristics + switch { + case ch&text != 0: + sym.Code = 'T' + case ch&data != 0: + if ch&permW == 0 { + sym.Code = 'R' + } else { + sym.Code = 'D' + } + case ch&bss != 0: + sym.Code = 'B' + } + sym.Addr += imageBase + uint64(sect.VirtualAddress) + } + syms = append(syms, sym) + addrs = append(addrs, sym.Addr) + } + + sort.Sort(uint64s(addrs)) + for i := range syms { + j := sort.Search(len(addrs), func(x int) bool { return addrs[x] > syms[i].Addr }) + if j < len(addrs) { + syms[i].Size = int64(addrs[j] - syms[i].Addr) + } + } + + return syms, nil +} + +func (f *peFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + var imageBase uint64 + switch oh := f.pe.OptionalHeader.(type) { + case *pe.OptionalHeader32: + imageBase = uint64(oh.ImageBase) + case *pe.OptionalHeader64: + imageBase = oh.ImageBase + default: + return 0, nil, nil, fmt.Errorf("pe file format not recognized") + } + if sect := f.pe.Section(".text"); sect != nil { + textStart = imageBase + uint64(sect.VirtualAddress) + } + if pclntab, err = loadPETable(f.pe, "runtime.pclntab", "runtime.epclntab"); err != nil { + // We didn't find the symbols, so look for the names used in 1.3 and earlier. + // TODO: Remove code looking for the old symbols when we no longer care about 1.3. + var err2 error + if pclntab, err2 = loadPETable(f.pe, "pclntab", "epclntab"); err2 != nil { + return 0, nil, nil, err + } + } + if symtab, err = loadPETable(f.pe, "runtime.symtab", "runtime.esymtab"); err != nil { + // Same as above. + var err2 error + if symtab, err2 = loadPETable(f.pe, "symtab", "esymtab"); err2 != nil { + return 0, nil, nil, err + } + } + return textStart, symtab, pclntab, nil +} + +func (f *peFile) text() (textStart uint64, text []byte, err error) { + var imageBase uint64 + switch oh := f.pe.OptionalHeader.(type) { + case *pe.OptionalHeader32: + imageBase = uint64(oh.ImageBase) + case *pe.OptionalHeader64: + imageBase = oh.ImageBase + default: + return 0, nil, fmt.Errorf("pe file format not recognized") + } + sect := f.pe.Section(".text") + if sect == nil { + return 0, nil, fmt.Errorf("text section not found") + } + textStart = imageBase + uint64(sect.VirtualAddress) + text, err = sect.Data() + return +} + +func findPESymbol(f *pe.File, name string) (*pe.Symbol, error) { + for _, s := range f.Symbols { + if s.Name != name { + continue + } + if s.SectionNumber <= 0 { + return nil, fmt.Errorf("symbol %s: invalid section number %d", name, s.SectionNumber) + } + if len(f.Sections) < int(s.SectionNumber) { + return nil, fmt.Errorf("symbol %s: section number %d is larger than max %d", name, s.SectionNumber, len(f.Sections)) + } + return s, nil + } + return nil, fmt.Errorf("no %s symbol found", name) +} + +func loadPETable(f *pe.File, sname, ename string) ([]byte, error) { + ssym, err := findPESymbol(f, sname) + if err != nil { + return nil, err + } + esym, err := findPESymbol(f, ename) + if err != nil { + return nil, err + } + if ssym.SectionNumber != esym.SectionNumber { + return nil, fmt.Errorf("%s and %s symbols must be in the same section", sname, ename) + } + sect := f.Sections[ssym.SectionNumber-1] + data, err := sect.Data() + if err != nil { + return nil, err + } + return data[ssym.Value:esym.Value], nil +} + +func (f *peFile) goarch() string { + // Not sure how to get the info we want from PE header. + // Look in symbol table for telltale rt0 symbol. + if _, err := findPESymbol(f.pe, "_rt0_386_windows"); err == nil { + return "386" + } + if _, err := findPESymbol(f.pe, "_rt0_amd64_windows"); err == nil { + return "amd64" + } + return "" +} diff --git a/src/cmd/internal/objfile/plan9obj.go b/src/cmd/internal/objfile/plan9obj.go new file mode 100644 index 000000000..eb6cba5eb --- /dev/null +++ b/src/cmd/internal/objfile/plan9obj.go @@ -0,0 +1,146 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parsing of Plan 9 a.out executables. + +package objfile + +import ( + "debug/plan9obj" + "fmt" + "os" + "sort" +) + +var validSymType = map[rune]bool{ + 'T': true, + 't': true, + 'D': true, + 'd': true, + 'B': true, + 'b': true, +} + +type plan9File struct { + plan9 *plan9obj.File +} + +func openPlan9(r *os.File) (rawFile, error) { + f, err := plan9obj.NewFile(r) + if err != nil { + return nil, err + } + return &plan9File{f}, nil +} + +func (f *plan9File) symbols() ([]Sym, error) { + plan9Syms, err := f.plan9.Symbols() + if err != nil { + return nil, err + } + + // Build sorted list of addresses of all symbols. + // We infer the size of a symbol by looking at where the next symbol begins. + var addrs []uint64 + for _, s := range plan9Syms { + if !validSymType[s.Type] { + continue + } + addrs = append(addrs, s.Value) + } + sort.Sort(uint64s(addrs)) + + var syms []Sym + + for _, s := range plan9Syms { + if !validSymType[s.Type] { + continue + } + sym := Sym{Addr: s.Value, Name: s.Name, Code: rune(s.Type)} + i := sort.Search(len(addrs), func(x int) bool { return addrs[x] > s.Value }) + if i < len(addrs) { + sym.Size = int64(addrs[i] - s.Value) + } + syms = append(syms, sym) + } + + return syms, nil +} + +func (f *plan9File) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + textStart = f.plan9.LoadAddress + f.plan9.HdrSize + if pclntab, err = loadPlan9Table(f.plan9, "runtime.pclntab", "runtime.epclntab"); err != nil { + // We didn't find the symbols, so look for the names used in 1.3 and earlier. + // TODO: Remove code looking for the old symbols when we no longer care about 1.3. + var err2 error + if pclntab, err2 = loadPlan9Table(f.plan9, "pclntab", "epclntab"); err2 != nil { + return 0, nil, nil, err + } + } + if symtab, err = loadPlan9Table(f.plan9, "runtime.symtab", "runtime.esymtab"); err != nil { + // Same as above. + var err2 error + if symtab, err2 = loadPlan9Table(f.plan9, "symtab", "esymtab"); err2 != nil { + return 0, nil, nil, err + } + } + return textStart, symtab, pclntab, nil +} + +func (f *plan9File) text() (textStart uint64, text []byte, err error) { + sect := f.plan9.Section("text") + if sect == nil { + return 0, nil, fmt.Errorf("text section not found") + } + textStart = f.plan9.LoadAddress + f.plan9.HdrSize + text, err = sect.Data() + return +} + +func findPlan9Symbol(f *plan9obj.File, name string) (*plan9obj.Sym, error) { + syms, err := f.Symbols() + if err != nil { + return nil, err + } + for _, s := range syms { + if s.Name != name { + continue + } + return &s, nil + } + return nil, fmt.Errorf("no %s symbol found", name) +} + +func loadPlan9Table(f *plan9obj.File, sname, ename string) ([]byte, error) { + ssym, err := findPlan9Symbol(f, sname) + if err != nil { + return nil, err + } + esym, err := findPlan9Symbol(f, ename) + if err != nil { + return nil, err + } + sect := f.Section("text") + if sect == nil { + return nil, err + } + data, err := sect.Data() + if err != nil { + return nil, err + } + textStart := f.LoadAddress + f.HdrSize + return data[ssym.Value-textStart : esym.Value-textStart], nil +} + +func (f *plan9File) goarch() string { + switch f.plan9.Magic { + case plan9obj.Magic386: + return "386" + case plan9obj.MagicAMD64: + return "amd64" + case plan9obj.MagicARM: + return "arm" + } + return "" +} diff --git a/src/cmd/internal/rsc.io/arm/armasm/Makefile b/src/cmd/internal/rsc.io/arm/armasm/Makefile new file mode 100644 index 000000000..a3f57001f --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/Makefile @@ -0,0 +1,2 @@ +tables.go: ../armmap/map.go ../arm.csv + go run ../armmap/map.go -fmt=decoder ../arm.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go diff --git a/src/cmd/internal/rsc.io/arm/armasm/decode.go b/src/cmd/internal/rsc.io/arm/armasm/decode.go new file mode 100644 index 000000000..6b4d73841 --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/decode.go @@ -0,0 +1,567 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package armasm + +import ( + "encoding/binary" + "fmt" +) + +// An instFormat describes the format of an instruction encoding. +// An instruction with 32-bit value x matches the format if x&mask == value +// and the condition matches. +// The condition matches if x>>28 == 0xF && value>>28==0xF +// or if x>>28 != 0xF and value>>28 == 0. +// If x matches the format, then the rest of the fields describe how to interpret x. +// The opBits describe bits that should be extracted from x and added to the opcode. +// For example opBits = 0x1234 means that the value +// (2 bits at offset 1) followed by (4 bits at offset 3) +// should be added to op. +// Finally the args describe how to decode the instruction arguments. +// args is stored as a fixed-size array; if there are fewer than len(args) arguments, +// args[i] == 0 marks the end of the argument list. +type instFormat struct { + mask uint32 + value uint32 + priority int8 + op Op + opBits uint64 + args instArgs +} + +type instArgs [4]instArg + +var ( + errMode = fmt.Errorf("unsupported execution mode") + errShort = fmt.Errorf("truncated instruction") + errUnknown = fmt.Errorf("unknown instruction") +) + +var decoderCover []bool + +// Decode decodes the leading bytes in src as a single instruction. +func Decode(src []byte, mode Mode) (inst Inst, err error) { + if mode != ModeARM { + return Inst{}, errMode + } + if len(src) < 4 { + return Inst{}, errShort + } + + if decoderCover == nil { + decoderCover = make([]bool, len(instFormats)) + } + + x := binary.LittleEndian.Uint32(src) + + // The instFormat table contains both conditional and unconditional instructions. + // Considering only the top 4 bits, the conditional instructions use mask=0, value=0, + // while the unconditional instructions use mask=f, value=f. + // Prepare a version of x with the condition cleared to 0 in conditional instructions + // and then assume mask=f during matching. + const condMask = 0xf0000000 + xNoCond := x + if x&condMask != condMask { + xNoCond &^= condMask + } + var priority int8 +Search: + for i := range instFormats { + f := &instFormats[i] + if xNoCond&(f.mask|condMask) != f.value || f.priority <= priority { + continue + } + delta := uint32(0) + deltaShift := uint(0) + for opBits := f.opBits; opBits != 0; opBits >>= 16 { + n := uint(opBits & 0xFF) + off := uint((opBits >> 8) & 0xFF) + delta |= (x >> off) & (1<> 8) & (1<<4 - 1)) + case arg_R_12: + return Reg((x >> 12) & (1<<4 - 1)) + case arg_R_16: + return Reg((x >> 16) & (1<<4 - 1)) + + case arg_R_12_nzcv: + r := Reg((x >> 12) & (1<<4 - 1)) + if r == R15 { + return APSR_nzcv + } + return r + + case arg_R_16_WB: + mode := AddrLDM + if (x>>21)&1 != 0 { + mode = AddrLDM_WB + } + return Mem{Base: Reg((x >> 16) & (1<<4 - 1)), Mode: mode} + + case arg_R_rotate: + Rm := Reg(x & (1<<4 - 1)) + typ, count := decodeShift(x) + // ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1. + if typ == RotateRightExt { + return Reg(Rm) + } + return RegShift{Rm, typ, uint8(count)} + + case arg_R_shift_R: + Rm := Reg(x & (1<<4 - 1)) + Rs := Reg((x >> 8) & (1<<4 - 1)) + typ := Shift((x >> 5) & (1<<2 - 1)) + return RegShiftReg{Rm, typ, Rs} + + case arg_R_shift_imm: + Rm := Reg(x & (1<<4 - 1)) + typ, count := decodeShift(x) + if typ == ShiftLeft && count == 0 { + return Reg(Rm) + } + return RegShift{Rm, typ, uint8(count)} + + case arg_R1_0: + return Reg((x & (1<<4 - 1))) + case arg_R1_12: + return Reg(((x >> 12) & (1<<4 - 1))) + case arg_R2_0: + return Reg((x & (1<<4 - 1)) | 1) + case arg_R2_12: + return Reg(((x >> 12) & (1<<4 - 1)) | 1) + + case arg_SP: + return SP + + case arg_Sd_Dd: + v := (x >> 12) & (1<<4 - 1) + vx := (x >> 22) & 1 + sz := (x >> 8) & 1 + if sz != 0 { + return D0 + Reg(vx<<4+v) + } else { + return S0 + Reg(v<<1+vx) + } + + case arg_Dd_Sd: + return decodeArg(arg_Sd_Dd, x^(1<<8)) + + case arg_Sd: + v := (x >> 12) & (1<<4 - 1) + vx := (x >> 22) & 1 + return S0 + Reg(v<<1+vx) + + case arg_Sm_Dm: + v := (x >> 0) & (1<<4 - 1) + vx := (x >> 5) & 1 + sz := (x >> 8) & 1 + if sz != 0 { + return D0 + Reg(vx<<4+v) + } else { + return S0 + Reg(v<<1+vx) + } + + case arg_Sm: + v := (x >> 0) & (1<<4 - 1) + vx := (x >> 5) & 1 + return S0 + Reg(v<<1+vx) + + case arg_Dn_half: + v := (x >> 16) & (1<<4 - 1) + vx := (x >> 7) & 1 + return RegX{D0 + Reg(vx<<4+v), int((x >> 21) & 1)} + + case arg_Sn_Dn: + v := (x >> 16) & (1<<4 - 1) + vx := (x >> 7) & 1 + sz := (x >> 8) & 1 + if sz != 0 { + return D0 + Reg(vx<<4+v) + } else { + return S0 + Reg(v<<1+vx) + } + + case arg_Sn: + v := (x >> 16) & (1<<4 - 1) + vx := (x >> 7) & 1 + return S0 + Reg(v<<1+vx) + + case arg_const: + v := x & (1<<8 - 1) + rot := (x >> 8) & (1<<4 - 1) * 2 + if rot > 0 && v&3 == 0 { + // could rotate less + return ImmAlt{uint8(v), uint8(rot)} + } + if rot >= 24 && ((v<<(32-rot))&0xFF)>>(32-rot) == v { + // could wrap around to rot==0. + return ImmAlt{uint8(v), uint8(rot)} + } + return Imm(v>>rot | v<<(32-rot)) + + case arg_endian: + return Endian((x >> 9) & 1) + + case arg_fbits: + return Imm((16 << ((x >> 7) & 1)) - ((x&(1<<4-1))<<1 | (x>>5)&1)) + + case arg_fp_0: + return Imm(0) + + case arg_imm24: + return Imm(x & (1<<24 - 1)) + + case arg_imm5: + return Imm((x >> 7) & (1<<5 - 1)) + + case arg_imm5_32: + x = (x >> 7) & (1<<5 - 1) + if x == 0 { + x = 32 + } + return Imm(x) + + case arg_imm5_nz: + x = (x >> 7) & (1<<5 - 1) + if x == 0 { + return nil + } + return Imm(x) + + case arg_imm_4at16_12at0: + return Imm((x>>16)&(1<<4-1)<<12 | x&(1<<12-1)) + + case arg_imm_12at8_4at0: + return Imm((x>>8)&(1<<12-1)<<4 | x&(1<<4-1)) + + case arg_imm_vfp: + x = (x>>16)&(1<<4-1)<<4 | x&(1<<4-1) + return Imm(x) + + case arg_label24: + imm := (x & (1<<24 - 1)) << 2 + return PCRel(int32(imm<<6) >> 6) + + case arg_label24H: + h := (x >> 24) & 1 + imm := (x&(1<<24-1))<<2 | h<<1 + return PCRel(int32(imm<<6) >> 6) + + case arg_label_m_12: + d := int32(x & (1<<12 - 1)) + return Mem{Base: PC, Mode: AddrOffset, Offset: int16(-d)} + + case arg_label_p_12: + d := int32(x & (1<<12 - 1)) + return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} + + case arg_label_pm_12: + d := int32(x & (1<<12 - 1)) + u := (x >> 23) & 1 + if u == 0 { + d = -d + } + return Mem{Base: PC, Mode: AddrOffset, Offset: int16(d)} + + case arg_label_pm_4_4: + d := int32((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) + u := (x >> 23) & 1 + if u == 0 { + d = -d + } + return PCRel(d) + + case arg_lsb_width: + lsb := (x >> 7) & (1<<5 - 1) + msb := (x >> 16) & (1<<5 - 1) + if msb < lsb || msb >= 32 { + return nil + } + return Imm(msb + 1 - lsb) + + case arg_mem_R: + Rn := Reg((x >> 16) & (1<<4 - 1)) + return Mem{Base: Rn, Mode: AddrOffset} + + case arg_mem_R_pm_R_postindex: + // Treat [],+/- like [,+/-{,}]{!} + // by forcing shift bits to <<0 and P=0, W=0 (postindex=true). + return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5|1<<24|1<<21)) + + case arg_mem_R_pm_R_W: + // Treat [,+/-]{!} like [,+/-{,}]{!} + // by forcing shift bits to <<0. + return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^((1<<7-1)<<5)) + + case arg_mem_R_pm_R_shift_imm_offset: + // Treat [],+/-{,} like [,+/-{,}]{!} + // by forcing P=1, W=0 (index=false, wback=false). + return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<21)|1<<24) + + case arg_mem_R_pm_R_shift_imm_postindex: + // Treat [],+/-{,} like [,+/-{,}]{!} + // by forcing P=0, W=0 (postindex=true). + return decodeArg(arg_mem_R_pm_R_shift_imm_W, x&^(1<<24|1<<21)) + + case arg_mem_R_pm_R_shift_imm_W: + Rn := Reg((x >> 16) & (1<<4 - 1)) + Rm := Reg(x & (1<<4 - 1)) + typ, count := decodeShift(x) + u := (x >> 23) & 1 + w := (x >> 21) & 1 + p := (x >> 24) & 1 + if p == 0 && w == 1 { + return nil + } + sign := int8(+1) + if u == 0 { + sign = -1 + } + mode := AddrMode(uint8(p<<1) | uint8(w^1)) + return Mem{Base: Rn, Mode: mode, Sign: sign, Index: Rm, Shift: typ, Count: count} + + case arg_mem_R_pm_imm12_offset: + // Treat [,#+/-] like [{,#+/-}]{!} + // by forcing P=1, W=0 (index=false, wback=false). + return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<21)|1<<24) + + case arg_mem_R_pm_imm12_postindex: + // Treat [],#+/- like [{,#+/-}]{!} + // by forcing P=0, W=0 (postindex=true). + return decodeArg(arg_mem_R_pm_imm12_W, x&^(1<<24|1<<21)) + + case arg_mem_R_pm_imm12_W: + Rn := Reg((x >> 16) & (1<<4 - 1)) + u := (x >> 23) & 1 + w := (x >> 21) & 1 + p := (x >> 24) & 1 + if p == 0 && w == 1 { + return nil + } + sign := int8(+1) + if u == 0 { + sign = -1 + } + imm := int16(x & (1<<12 - 1)) + mode := AddrMode(uint8(p<<1) | uint8(w^1)) + return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} + + case arg_mem_R_pm_imm8_postindex: + // Treat [],#+/- like [{,#+/-}]{!} + // by forcing P=0, W=0 (postindex=true). + return decodeArg(arg_mem_R_pm_imm8_W, x&^(1<<24|1<<21)) + + case arg_mem_R_pm_imm8_W: + Rn := Reg((x >> 16) & (1<<4 - 1)) + u := (x >> 23) & 1 + w := (x >> 21) & 1 + p := (x >> 24) & 1 + if p == 0 && w == 1 { + return nil + } + sign := int8(+1) + if u == 0 { + sign = -1 + } + imm := int16((x>>8)&(1<<4-1)<<4 | x&(1<<4-1)) + mode := AddrMode(uint8(p<<1) | uint8(w^1)) + return Mem{Base: Rn, Mode: mode, Offset: int16(sign) * imm} + + case arg_mem_R_pm_imm8at0_offset: + Rn := Reg((x >> 16) & (1<<4 - 1)) + u := (x >> 23) & 1 + sign := int8(+1) + if u == 0 { + sign = -1 + } + imm := int16(x&(1<<8-1)) << 2 + return Mem{Base: Rn, Mode: AddrOffset, Offset: int16(sign) * imm} + + case arg_option: + return Imm(x & (1<<4 - 1)) + + case arg_registers: + return RegList(x & (1<<16 - 1)) + + case arg_registers2: + x &= 1<<16 - 1 + n := 0 + for i := 0; i < 16; i++ { + if x>>uint(i)&1 != 0 { + n++ + } + } + if n < 2 { + return nil + } + return RegList(x) + + case arg_registers1: + Rt := (x >> 12) & (1<<4 - 1) + return RegList(1 << Rt) + + case arg_satimm4: + return Imm((x >> 16) & (1<<4 - 1)) + + case arg_satimm5: + return Imm((x >> 16) & (1<<5 - 1)) + + case arg_satimm4m1: + return Imm((x>>16)&(1<<4-1) + 1) + + case arg_satimm5m1: + return Imm((x>>16)&(1<<5-1) + 1) + + case arg_widthm1: + return Imm((x>>16)&(1<<5-1) + 1) + + } +} + +// decodeShift decodes the shift-by-immediate encoded in x. +func decodeShift(x uint32) (Shift, uint8) { + count := (x >> 7) & (1<<5 - 1) + typ := Shift((x >> 5) & (1<<2 - 1)) + switch typ { + case ShiftRight, ShiftRightSigned: + if count == 0 { + count = 32 + } + case RotateRight: + if count == 0 { + typ = RotateRightExt + count = 1 + } + } + return typ, uint8(count) +} diff --git a/src/cmd/internal/rsc.io/arm/armasm/decode_test.go b/src/cmd/internal/rsc.io/arm/armasm/decode_test.go new file mode 100644 index 000000000..25a345a88 --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/decode_test.go @@ -0,0 +1,69 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package armasm + +import ( + "encoding/hex" + "io/ioutil" + "strconv" + "strings" + "testing" +) + +func TestDecode(t *testing.T) { + data, err := ioutil.ReadFile("testdata/decode.txt") + if err != nil { + t.Fatal(err) + } + all := string(data) + for strings.Contains(all, "\t\t") { + all = strings.Replace(all, "\t\t", "\t", -1) + } + for _, line := range strings.Split(all, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.SplitN(line, "\t", 4) + i := strings.Index(f[0], "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f[0]) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f[0]) + } + size := i / 2 + code, err := hex.DecodeString(f[0][:i] + f[0][i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f[0], err) + continue + } + mode, err := strconv.Atoi(f[1]) + if err != nil { + t.Errorf("invalid mode %q in: %s", f[1], line) + continue + } + syntax, asm := f[2], f[3] + inst, err := Decode(code, Mode(mode)) + var out string + if err != nil { + out = "error: " + err.Error() + } else { + switch syntax { + case "gnu": + out = GNUSyntax(inst) + case "plan9": + out = Plan9Syntax(inst, 0, nil, nil) + default: + t.Errorf("unknown syntax %q", syntax) + continue + } + } + if out != asm || inst.Len != size { + t.Errorf("Decode(%s) [%s] = %s, %d, want %s, %d", f[0], syntax, out, inst.Len, asm, size) + } + } +} diff --git a/src/cmd/internal/rsc.io/arm/armasm/ext_test.go b/src/cmd/internal/rsc.io/arm/armasm/ext_test.go new file mode 100644 index 000000000..b0bd85597 --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/ext_test.go @@ -0,0 +1,614 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Support for testing against external disassembler program. +// Copied and simplified from rsc.io/x86/x86asm/ext_test.go. + +package armasm + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io/ioutil" + "log" + "math/rand" + "os" + "os/exec" + "regexp" + "runtime" + "strings" + "testing" + "time" +) + +var ( + printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths") + dumpTest = flag.Bool("dump", false, "dump all encodings") + mismatch = flag.Bool("mismatch", false, "log allowed mismatches") + longTest = flag.Bool("long", false, "long test") + keep = flag.Bool("keep", false, "keep object files around") + debug = false +) + +// A ExtInst represents a single decoded instruction parsed +// from an external disassembler's output. +type ExtInst struct { + addr uint32 + enc [4]byte + nenc int + text string +} + +func (r ExtInst) String() string { + return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) +} + +// An ExtDis is a connection between an external disassembler and a test. +type ExtDis struct { + Arch Mode + Dec chan ExtInst + File *os.File + Size int + KeepFile bool + Cmd *exec.Cmd +} + +// Run runs the given command - the external disassembler - and returns +// a buffered reader of its standard output. +func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { + if *keep { + log.Printf("%s\n", strings.Join(cmd, " ")) + } + ext.Cmd = exec.Command(cmd[0], cmd[1:]...) + out, err := ext.Cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdoutpipe: %v", err) + } + if err := ext.Cmd.Start(); err != nil { + return nil, fmt.Errorf("exec: %v", err) + } + + b := bufio.NewReaderSize(out, 1<<20) + return b, nil +} + +// Wait waits for the command started with Run to exit. +func (ext *ExtDis) Wait() error { + return ext.Cmd.Wait() +} + +// testExtDis tests a set of byte sequences against an external disassembler. +// The disassembler is expected to produce the given syntax and be run +// in the given architecture mode (16, 32, or 64-bit). +// The extdis function must start the external disassembler +// and then parse its output, sending the parsed instructions on ext.Dec. +// The generate function calls its argument f once for each byte sequence +// to be tested. The generate function itself will be called twice, and it must +// make the same sequence of calls to f each time. +// When a disassembly does not match the internal decoding, +// allowedMismatch determines whether this mismatch should be +// allowed, or else considered an error. +func testExtDis( + t *testing.T, + syntax string, + arch Mode, + extdis func(ext *ExtDis) error, + generate func(f func([]byte)), + allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool, +) { + start := time.Now() + ext := &ExtDis{ + Dec: make(chan ExtInst), + Arch: arch, + } + errc := make(chan error) + + // First pass: write instructions to input file for external disassembler. + file, f, size, err := writeInst(generate) + if err != nil { + t.Fatal(err) + } + ext.Size = size + ext.File = f + defer func() { + f.Close() + if !*keep { + os.Remove(file) + } + }() + + // Second pass: compare disassembly against our decodings. + var ( + totalTests = 0 + totalSkips = 0 + totalErrors = 0 + + errors = make([]string, 0, 100) // sampled errors, at most cap + ) + go func() { + errc <- extdis(ext) + }() + generate(func(enc []byte) { + dec, ok := <-ext.Dec + if !ok { + t.Errorf("decoding stream ended early") + return + } + inst, text := disasm(syntax, arch, pad(enc)) + totalTests++ + if *dumpTest { + fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) + } + if text != dec.text || inst.Len != dec.nenc { + suffix := "" + if allowedMismatch(text, size, &inst, dec) { + totalSkips++ + if !*mismatch { + return + } + suffix += " (allowed mismatch)" + } + totalErrors++ + if len(errors) >= cap(errors) { + j := rand.Intn(totalErrors) + if j >= cap(errors) { + return + } + errors = append(errors[:j], errors[j+1:]...) + } + errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix)) + } + }) + + if *mismatch { + totalErrors -= totalSkips + } + + for _, b := range errors { + t.Log(b) + } + + if totalErrors > 0 { + t.Fail() + } + t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) + + if err := <-errc; err != nil { + t.Fatal("external disassembler: %v", err) + } + +} + +const start = 0x8000 // start address of text + +// writeInst writes the generated byte sequences to a new file +// starting at offset start. That file is intended to be the input to +// the external disassembler. +func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { + f, err = ioutil.TempFile("", "armasm") + if err != nil { + return + } + + file = f.Name() + + f.Seek(start, 0) + w := bufio.NewWriter(f) + defer w.Flush() + size = 0 + generate(func(x []byte) { + if len(x) > 4 { + x = x[:4] + } + if debug { + fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) + } + w.Write(x) + w.Write(zeros[len(x):]) + size += len(zeros) + }) + return file, f, size, nil +} + +var zeros = []byte{0, 0, 0, 0} + +// pad pads the code sequenc with pops. +func pad(enc []byte) []byte { + if len(enc) < 4 { + enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) + } + return enc +} + +// disasm returns the decoded instruction and text +// for the given source bytes, using the given syntax and mode. +func disasm(syntax string, mode Mode, src []byte) (inst Inst, text string) { + // If printTests is set, we record the coverage value + // before and after, and we write out the inputs for which + // coverage went up, in the format expected in testdata/decode.text. + // This produces a fairly small set of test cases that exercise nearly + // all the code. + var cover float64 + if *printTests { + cover -= coverage() + } + + inst, err := Decode(src, mode) + if err != nil { + text = "error: " + err.Error() + } else { + text = inst.String() + switch syntax { + //case "arm": + // text = ARMSyntax(inst) + case "gnu": + text = GNUSyntax(inst) + //case "plan9": + // text = Plan9Syntax(inst, 0, nil) + default: + text = "error: unknown syntax " + syntax + } + } + + if *printTests { + cover += coverage() + if cover > 0 { + max := len(src) + if max > 4 && inst.Len <= 4 { + max = 4 + } + fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text) + } + } + + return +} + +// coverage returns a floating point number denoting the +// test coverage until now. The number increases when new code paths are exercised, +// both in the Go program and in the decoder byte code. +func coverage() float64 { + /* + testing.Coverage is not in the main distribution. + The implementation, which must go in package testing, is: + + // Coverage reports the current code coverage as a fraction in the range [0, 1]. + func Coverage() float64 { + var n, d int64 + for _, counters := range cover.Counters { + for _, c := range counters { + if c > 0 { + n++ + } + d++ + } + } + if d == 0 { + return 0 + } + return float64(n) / float64(d) + } + */ + + var f float64 + f += testing.Coverage() + f += decodeCoverage() + return f +} + +func decodeCoverage() float64 { + n := 0 + for _, t := range decoderCover { + if t { + n++ + } + } + return float64(1+n) / float64(1+len(decoderCover)) +} + +// Helpers for writing disassembler output parsers. + +// hasPrefix reports whether any of the space-separated words in the text s +// begins with any of the given prefixes. +func hasPrefix(s string, prefixes ...string) bool { + for _, prefix := range prefixes { + for s := s; s != ""; { + if strings.HasPrefix(s, prefix) { + return true + } + i := strings.Index(s, " ") + if i < 0 { + break + } + s = s[i+1:] + } + } + return false +} + +// contains reports whether the text s contains any of the given substrings. +func contains(s string, substrings ...string) bool { + for _, sub := range substrings { + if strings.Contains(s, sub) { + return true + } + } + return false +} + +// isHex reports whether b is a hexadecimal character (0-9A-Fa-f). +func isHex(b byte) bool { return b == '0' || unhex[b] > 0 } + +// parseHex parses the hexadecimal byte dump in hex, +// appending the parsed bytes to raw and returning the updated slice. +// The returned bool signals whether any invalid hex was found. +// Spaces and tabs between bytes are okay but any other non-hex is not. +func parseHex(hex []byte, raw []byte) ([]byte, bool) { + hex = trimSpace(hex) + for j := 0; j < len(hex); { + for hex[j] == ' ' || hex[j] == '\t' { + j++ + } + if j >= len(hex) { + break + } + if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { + return nil, false + } + raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]]) + j += 2 + } + return raw, true +} + +var unhex = [256]byte{ + '0': 0, + '1': 1, + '2': 2, + '3': 3, + '4': 4, + '5': 5, + '6': 6, + '7': 7, + '8': 8, + '9': 9, + 'A': 10, + 'B': 11, + 'C': 12, + 'D': 13, + 'E': 14, + 'F': 15, + 'a': 10, + 'b': 11, + 'c': 12, + 'd': 13, + 'e': 14, + 'f': 15, +} + +// index is like bytes.Index(s, []byte(t)) but avoids the allocation. +func index(s []byte, t string) int { + i := 0 + for { + j := bytes.IndexByte(s[i:], t[0]) + if j < 0 { + return -1 + } + i = i + j + if i+len(t) > len(s) { + return -1 + } + for k := 1; k < len(t); k++ { + if s[i+k] != t[k] { + goto nomatch + } + } + return i + nomatch: + i++ + } +} + +// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. +// If s must be rewritten, it is rewritten in place. +func fixSpace(s []byte) []byte { + s = trimSpace(s) + for i := 0; i < len(s); i++ { + if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { + goto Fix + } + } + return s + +Fix: + b := s + w := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\t' || c == '\n' { + c = ' ' + } + if c == ' ' && w > 0 && b[w-1] == ' ' { + continue + } + b[w] = c + w++ + } + if w > 0 && b[w-1] == ' ' { + w-- + } + return b[:w] +} + +// trimSpace trims leading and trailing space from s, returning a subslice of s. +func trimSpace(s []byte) []byte { + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { + j-- + } + i := 0 + for i < j && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return s[i:j] +} + +// pcrel matches instructions using relative addressing mode. +var ( + pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)?) 0x([0-9a-f]+)$`) +) + +// Generators. +// +// The test cases are described as functions that invoke a callback repeatedly, +// with a new input sequence each time. These helpers make writing those +// a little easier. + +// condCases generates conditional instructions. +func condCases(t *testing.T) func(func([]byte)) { + return func(try func([]byte)) { + // All the strides are relatively prime to 2 and therefore to 2²⁸, + // so we will not repeat any instructions until we have tried all 2²⁸. + // Using a stride other than 1 is meant to visit the instructions in a + // pseudorandom order, which gives better variety in the set of + // test cases chosen by -printtests. + stride := uint32(10007) + n := 1 << 28 / 7 + if testing.Short() { + stride = 100003 + n = 1 << 28 / 1001 + } else if *longTest { + stride = 200000033 + n = 1 << 28 + } + x := uint32(0) + for i := 0; i < n; i++ { + enc := (x%15)<<28 | x&(1<<28-1) + try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) + x += stride + } + } +} + +// uncondCases generates unconditional instructions. +func uncondCases(t *testing.T) func(func([]byte)) { + return func(try func([]byte)) { + condCases(t)(func(enc []byte) { + enc[3] |= 0xF0 + try(enc) + }) + } +} + +func countBits(x uint32) int { + n := 0 + for ; x != 0; x >>= 1 { + n += int(x & 1) + } + return n +} + +func expandBits(x, m uint32) uint32 { + var out uint32 + for i := uint(0); i < 32; i++ { + out >>= 1 + if m&1 != 0 { + out |= (x & 1) << 31 + x >>= 1 + } + m >>= 1 + } + return out +} + +func tryCondMask(mask, val uint32, try func([]byte)) { + n := countBits(^mask) + bits := uint32(0) + for i := 0; i < 1<> 8), byte(x >> 16), byte(x >> 24)}) + } +} + +// vfpCases generates VFP instructions. +func vfpCases(t *testing.T) func(func([]byte)) { + const ( + vfpmask uint32 = 0xFF00FE10 + vfp uint32 = 0x0E009A00 + ) + return func(try func([]byte)) { + tryCondMask(0xff00fe10, 0x0e009a00, try) // standard VFP instruction space + tryCondMask(0xffc00f7f, 0x0e000b10, try) // VFP MOV core reg to/from float64 half + tryCondMask(0xffe00f7f, 0x0e000a10, try) // VFP MOV core reg to/from float32 + tryCondMask(0xffef0fff, 0x0ee10a10, try) // VFP MOV core reg to/from cond codes + } +} + +// hexCases generates the cases written in hexadecimal in the encoded string. +// Spaces in 'encoded' separate entire test cases, not individual bytes. +func hexCases(t *testing.T, encoded string) func(func([]byte)) { + return func(try func([]byte)) { + for _, x := range strings.Fields(encoded) { + src, err := hex.DecodeString(x) + if err != nil { + t.Errorf("parsing %q: %v", x, err) + } + try(src) + } + } +} + +// testdataCases generates the test cases recorded in testdata/decode.txt. +// It only uses the inputs; it ignores the answers recorded in that file. +func testdataCases(t *testing.T) func(func([]byte)) { + var codes [][]byte + data, err := ioutil.ReadFile("testdata/decode.txt") + if err != nil { + t.Fatal(err) + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.Fields(line)[0] + i := strings.Index(f, "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f) + } + code, err := hex.DecodeString(f[:i] + f[i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f, err) + continue + } + codes = append(codes, code) + } + + return func(try func([]byte)) { + for _, code := range codes { + try(code) + } + } +} + +func caller(skip int) string { + pc, _, _, _ := runtime.Caller(skip) + f := runtime.FuncForPC(pc) + name := "?" + if f != nil { + name = f.Name() + if i := strings.LastIndex(name, "."); i >= 0 { + name = name[i+1:] + } + } + return name +} diff --git a/src/cmd/internal/rsc.io/arm/armasm/gnu.go b/src/cmd/internal/rsc.io/arm/armasm/gnu.go new file mode 100644 index 000000000..1a97a5a84 --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/gnu.go @@ -0,0 +1,164 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package armasm + +import ( + "bytes" + "fmt" + "strings" +) + +var saveDot = strings.NewReplacer( + ".F16", "_dot_F16", + ".F32", "_dot_F32", + ".F64", "_dot_F64", + ".S32", "_dot_S32", + ".U32", "_dot_U32", + ".FXS", "_dot_S", + ".FXU", "_dot_U", + ".32", "_dot_32", +) + +// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. +// This form typically matches the syntax defined in the ARM Reference Manual. +func GNUSyntax(inst Inst) string { + var buf bytes.Buffer + op := inst.Op.String() + op = saveDot.Replace(op) + op = strings.Replace(op, ".", "", -1) + op = strings.Replace(op, "_dot_", ".", -1) + op = strings.ToLower(op) + buf.WriteString(op) + sep := " " + for i, arg := range inst.Args { + if arg == nil { + break + } + text := gnuArg(&inst, i, arg) + if text == "" { + continue + } + buf.WriteString(sep) + sep = ", " + buf.WriteString(text) + } + return buf.String() +} + +func gnuArg(inst *Inst, argIndex int, arg Arg) string { + switch inst.Op &^ 15 { + case LDRD_EQ, LDREXD_EQ, STRD_EQ: + if argIndex == 1 { + // second argument in consecutive pair not printed + return "" + } + case STREXD_EQ: + if argIndex == 2 { + // second argument in consecutive pair not printed + return "" + } + } + + switch arg := arg.(type) { + case Imm: + switch inst.Op &^ 15 { + case BKPT_EQ: + return fmt.Sprintf("%#04x", uint32(arg)) + case SVC_EQ: + return fmt.Sprintf("%#08x", uint32(arg)) + } + return fmt.Sprintf("#%d", int32(arg)) + + case ImmAlt: + return fmt.Sprintf("#%d, %d", arg.Val, arg.Rot) + + case Mem: + R := gnuArg(inst, -1, arg.Base) + X := "" + if arg.Sign != 0 { + X = "" + if arg.Sign < 0 { + X = "-" + } + X += gnuArg(inst, -1, arg.Index) + if arg.Shift == ShiftLeft && arg.Count == 0 { + // nothing + } else if arg.Shift == RotateRightExt { + X += ", rrx" + } else { + X += fmt.Sprintf(", %s #%d", strings.ToLower(arg.Shift.String()), arg.Count) + } + } else { + X = fmt.Sprintf("#%d", arg.Offset) + } + + switch arg.Mode { + case AddrOffset: + if X == "#0" { + return fmt.Sprintf("[%s]", R) + } + return fmt.Sprintf("[%s, %s]", R, X) + case AddrPreIndex: + return fmt.Sprintf("[%s, %s]!", R, X) + case AddrPostIndex: + return fmt.Sprintf("[%s], %s", R, X) + case AddrLDM: + if X == "#0" { + return R + } + case AddrLDM_WB: + if X == "#0" { + return R + "!" + } + } + return fmt.Sprintf("[%s Mode(%d) %s]", R, int(arg.Mode), X) + + case PCRel: + return fmt.Sprintf(".%+#x", int32(arg)+4) + + case Reg: + switch inst.Op &^ 15 { + case LDREX_EQ: + if argIndex == 0 { + return fmt.Sprintf("r%d", int32(arg)) + } + } + switch arg { + case R10: + return "sl" + case R11: + return "fp" + case R12: + return "ip" + } + + case RegList: + var buf bytes.Buffer + fmt.Fprintf(&buf, "{") + sep := "" + for i := 0; i < 16; i++ { + if arg&(1<= Op(len(opstr)) || opstr[op] == "" { + return fmt.Sprintf("Op(%d)", int(op)) + } + return opstr[op] +} + +// An Inst is a single instruction. +type Inst struct { + Op Op // Opcode mnemonic + Enc uint32 // Raw encoding bits. + Len int // Length of encoding in bytes. + Args Args // Instruction arguments, in ARM manual order. +} + +func (i Inst) String() string { + var buf bytes.Buffer + buf.WriteString(i.Op.String()) + for j, arg := range i.Args { + if arg == nil { + break + } + if j == 0 { + buf.WriteString(" ") + } else { + buf.WriteString(", ") + } + buf.WriteString(arg.String()) + } + return buf.String() +} + +// An Args holds the instruction arguments. +// If an instruction has fewer than 4 arguments, +// the final elements in the array are nil. +type Args [4]Arg + +// An Arg is a single instruction argument, one of these types: +// Endian, Imm, Mem, PCRel, Reg, RegList, RegShift, RegShiftReg. +type Arg interface { + IsArg() + String() string +} + +type Float32Imm float32 + +func (Float32Imm) IsArg() {} + +func (f Float32Imm) String() string { + return fmt.Sprintf("#%v", float32(f)) +} + +type Float64Imm float32 + +func (Float64Imm) IsArg() {} + +func (f Float64Imm) String() string { + return fmt.Sprintf("#%v", float64(f)) +} + +// An Imm is an integer constant. +type Imm uint32 + +func (Imm) IsArg() {} + +func (i Imm) String() string { + return fmt.Sprintf("#%#x", uint32(i)) +} + +// A ImmAlt is an alternate encoding of an integer constant. +type ImmAlt struct { + Val uint8 + Rot uint8 +} + +func (ImmAlt) IsArg() {} + +func (i ImmAlt) Imm() Imm { + v := uint32(i.Val) + r := uint(i.Rot) + return Imm(v>>r | v<<(32-r)) +} + +func (i ImmAlt) String() string { + return fmt.Sprintf("#%#x, %d", i.Val, i.Rot) +} + +// A Label is a text (code) address. +type Label uint32 + +func (Label) IsArg() {} + +func (i Label) String() string { + return fmt.Sprintf("%#x", uint32(i)) +} + +// A Reg is a single register. +// The zero value denotes R0, not the absence of a register. +type Reg uint8 + +const ( + R0 Reg = iota + R1 + R2 + R3 + R4 + R5 + R6 + R7 + R8 + R9 + R10 + R11 + R12 + R13 + R14 + R15 + + S0 + S1 + S2 + S3 + S4 + S5 + S6 + S7 + S8 + S9 + S10 + S11 + S12 + S13 + S14 + S15 + S16 + S17 + S18 + S19 + S20 + S21 + S22 + S23 + S24 + S25 + S26 + S27 + S28 + S29 + S30 + S31 + + D0 + D1 + D2 + D3 + D4 + D5 + D6 + D7 + D8 + D9 + D10 + D11 + D12 + D13 + D14 + D15 + D16 + D17 + D18 + D19 + D20 + D21 + D22 + D23 + D24 + D25 + D26 + D27 + D28 + D29 + D30 + D31 + + APSR + APSR_nzcv + FPSCR + + SP = R13 + LR = R14 + PC = R15 +) + +func (Reg) IsArg() {} + +func (r Reg) String() string { + switch r { + case APSR: + return "APSR" + case APSR_nzcv: + return "APSR_nzcv" + case FPSCR: + return "FPSCR" + case SP: + return "SP" + case PC: + return "PC" + case LR: + return "LR" + } + if R0 <= r && r <= R15 { + return fmt.Sprintf("R%d", int(r-R0)) + } + if S0 <= r && r <= S31 { + return fmt.Sprintf("S%d", int(r-S0)) + } + if D0 <= r && r <= D31 { + return fmt.Sprintf("D%d", int(r-D0)) + } + return fmt.Sprintf("Reg(%d)", int(r)) +} + +// A RegX represents a fraction of a multi-value register. +// The Index field specifies the index number, +// but the size of the fraction is not specified. +// It must be inferred from the instruction and the register type. +// For example, in a VMOV instruction, RegX{D5, 1} represents +// the top 32 bits of the 64-bit D5 register. +type RegX struct { + Reg Reg + Index int +} + +func (RegX) IsArg() {} + +func (r RegX) String() string { + return fmt.Sprintf("%s[%d]", r.Reg, r.Index) +} + +// A RegList is a register list. +// Bits at indexes x = 0 through 15 indicate whether the corresponding Rx register is in the list. +type RegList uint16 + +func (RegList) IsArg() {} + +func (r RegList) String() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "{") + sep := "" + for i := 0; i < 16; i++ { + if r&(1<= 4 { + raw := binary.LittleEndian.Uint32(dec.enc[:4]) + + // word 21FFF0B5. + // the manual is clear that this is pre-indexed mode (with !) but libopcodes generates post-index (without !). + if raw&0x01200000 == 0x01200000 && strings.Replace(text, "!", "", -1) == dec.text { + return true + } + + // word C100543E: libopcodes says tst, but no evidence for that. + if strings.HasPrefix(dec.text, "tst") && raw&0x0ff00000 != 0x03100000 && raw&0x0ff00000 != 0x01100000 { + return true + } + + // word C3203CE8: libopcodes says teq, but no evidence for that. + if strings.HasPrefix(dec.text, "teq") && raw&0x0ff00000 != 0x03300000 && raw&0x0ff00000 != 0x01300000 { + return true + } + + // word D14C552E: libopcodes says cmp but no evidence for that. + if strings.HasPrefix(dec.text, "cmp") && raw&0x0ff00000 != 0x03500000 && raw&0x0ff00000 != 0x01500000 { + return true + } + + // word 2166AA4A: libopcodes says cmn but no evidence for that. + if strings.HasPrefix(dec.text, "cmn") && raw&0x0ff00000 != 0x03700000 && raw&0x0ff00000 != 0x01700000 { + return true + } + + // word E70AEEEF: libopcodes says str but no evidence for that. + if strings.HasPrefix(dec.text, "str") && len(dec.text) >= 5 && (dec.text[3] == ' ' || dec.text[5] == ' ') && raw&0x0e500018 != 0x06000000 && raw&0x0e500000 != 0x0400000 { + return true + } + + // word B0AF48F4: libopcodes says strd but P=0,W=1 which is unpredictable. + if hasPrefix(dec.text, "ldr", "str") && raw&0x01200000 == 0x00200000 { + return true + } + + // word B6CC1C76: libopcodes inexplicably says 'uxtab16lt r1, ip, r6, ROR #24' instead of 'uxtab16lt r1, ip, r6, ror #24' + if strings.ToLower(dec.text) == text { + return true + } + + // word F410FDA1: libopcodes says PLDW but the manual is clear that PLDW is F5/F7, not F4. + // word F7D0FB17: libopcodes says PLDW but the manual is clear that PLDW has 0x10 clear + if hasPrefix(dec.text, "pld") && raw&0xfd000010 != 0xf5000000 { + return true + } + + // word F650FE14: libopcodes says PLI but the manual is clear that PLI has 0x10 clear + if hasPrefix(dec.text, "pli") && raw&0xff000010 != 0xf6000000 { + return true + } + } + + return false +} + +// Instructions known to libopcodes (or xed) but not to us. +// Most of these are floating point coprocessor instructions. +var unsupported = strings.Fields(` + abs + acs + adf + aes + asn + atn + cdp + cf + cmf + cnf + cos + cps + crc32 + dvf + eret + exp + fadd + fcmp + fcpy + fcvt + fdiv + fdv + fix + fld + flt + fmac + fmd + fml + fmr + fms + fmul + fmx + fneg + fnm + frd + fsit + fsq + fst + fsu + fto + fui + hlt + hvc + lda + ldc + ldf + lfm + lgn + log + mar + mcr + mcrr + mia + mnf + mra + mrc + mrrc + mrs + msr + msr + muf + mvf + nrm + pol + pow + rdf + rfc + rfe + rfs + rmf + rnd + rpw + rsf + sdiv + sev + sfm + sha1 + sha256 + sin + smc + sqt + srs + stc + stf + stl + suf + tan + udf + udiv + urd + vfma + vfms + vfnma + vfnms + vrint + wfc + wfs +`) diff --git a/src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go b/src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go new file mode 100644 index 000000000..d88c67fc0 --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/objdumpext_test.go @@ -0,0 +1,260 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Copied and simplified from rsc.io/x86/x86asm/objdumpext_test.go. + +package armasm + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "io" + "log" + "os" + "strconv" + "strings" + "testing" +) + +const objdumpPath = "/usr/local/bin/arm-linux-elf-objdump" + +func testObjdumpARM(t *testing.T, generate func(func([]byte))) { + testObjdumpArch(t, generate, ModeARM) +} + +func testObjdumpArch(t *testing.T, generate func(func([]byte)), arch Mode) { + if testing.Short() { + t.Skip("skipping objdump test in short mode") + } + + if _, err := os.Stat(objdumpPath); err != nil { + t.Fatal(err) + } + + testExtDis(t, "gnu", arch, objdump, generate, allowedMismatchObjdump) +} + +func objdump(ext *ExtDis) error { + // File already written with instructions; add ELF header. + if ext.Arch == ModeARM { + if err := writeELF32(ext.File, ext.Size); err != nil { + return err + } + } else { + panic("unknown arch") + } + + b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name()) + if err != nil { + return err + } + + var ( + nmatch int + reading bool + next uint32 = start + addr uint32 + encbuf [4]byte + enc []byte + text string + ) + flush := func() { + if addr == next { + if m := pcrel.FindStringSubmatch(text); m != nil { + targ, _ := strconv.ParseUint(m[2], 16, 64) + text = fmt.Sprintf("%s .%+#x", m[1], int32(uint32(targ)-addr-uint32(len(enc)))) + } + if strings.HasPrefix(text, "stmia") { + text = "stm" + text[5:] + } + if strings.HasPrefix(text, "stmfd") { + text = "stmdb" + text[5:] + } + if strings.HasPrefix(text, "ldmfd") { + text = "ldm" + text[5:] + } + text = strings.Replace(text, "#0.0", "#0", -1) + if text == "undefined" && len(enc) == 4 { + text = "error: unknown instruction" + enc = nil + } + if len(enc) == 4 { + // prints as word but we want to record bytes + enc[0], enc[3] = enc[3], enc[0] + enc[1], enc[2] = enc[2], enc[1] + } + ext.Dec <- ExtInst{addr, encbuf, len(enc), text} + encbuf = [4]byte{} + enc = nil + next += 4 + } + } + var textangle = []byte("<.text>:") + for { + line, err := b.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("reading objdump output: %v", err) + } + if bytes.Contains(line, textangle) { + reading = true + continue + } + if !reading { + continue + } + if debug { + os.Stdout.Write(line) + } + if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil { + enc = enc1 + continue + } + flush() + nmatch++ + addr, enc, text = parseLine(line, encbuf[:0]) + if addr > next { + return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line) + } + } + flush() + if next != start+uint32(ext.Size) { + return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size) + } + if err := ext.Wait(); err != nil { + return fmt.Errorf("exec: %v", err) + } + + return nil +} + +var ( + undefined = []byte("") + unpredictable = []byte("") + illegalShifter = []byte("") +) + +func parseLine(line []byte, encstart []byte) (addr uint32, enc []byte, text string) { + oline := line + i := index(line, ":\t") + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + x, err := strconv.ParseUint(string(trimSpace(line[:i])), 16, 32) + if err != nil { + log.Fatalf("cannot parse disassembly: %q", oline) + } + addr = uint32(x) + line = line[i+2:] + i = bytes.IndexByte(line, '\t') + if i < 0 { + log.Fatalf("cannot parse disassembly: %q", oline) + } + enc, ok := parseHex(line[:i], encstart) + if !ok { + log.Fatalf("cannot parse disassembly: %q", oline) + } + line = trimSpace(line[i:]) + if bytes.Contains(line, undefined) { + text = "undefined" + return + } + if bytes.Contains(line, illegalShifter) { + text = "undefined" + return + } + if false && bytes.Contains(line, unpredictable) { + text = "unpredictable" + return + } + if i := bytes.IndexByte(line, ';'); i >= 0 { + line = trimSpace(line[:i]) + } + text = string(fixSpace(line)) + return +} + +func parseContinuation(line []byte, enc []byte) []byte { + i := index(line, ":\t") + if i < 0 { + return nil + } + line = line[i+1:] + enc, _ = parseHex(line, enc) + return enc +} + +// writeELF32 writes an ELF32 header to the file, +// describing a text segment that starts at start +// and extends for size bytes. +func writeELF32(f *os.File, size int) error { + f.Seek(0, 0) + var hdr elf.Header32 + var prog elf.Prog32 + var sect elf.Section32 + var buf bytes.Buffer + binary.Write(&buf, binary.LittleEndian, &hdr) + off1 := buf.Len() + binary.Write(&buf, binary.LittleEndian, &prog) + off2 := buf.Len() + binary.Write(&buf, binary.LittleEndian, §) + off3 := buf.Len() + buf.Reset() + data := byte(elf.ELFDATA2LSB) + hdr = elf.Header32{ + Ident: [16]byte{0x7F, 'E', 'L', 'F', 1, data, 1}, + Type: 2, + Machine: uint16(elf.EM_ARM), + Version: 1, + Entry: start, + Phoff: uint32(off1), + Shoff: uint32(off2), + Flags: 0x05000002, + Ehsize: uint16(off1), + Phentsize: uint16(off2 - off1), + Phnum: 1, + Shentsize: uint16(off3 - off2), + Shnum: 3, + Shstrndx: 2, + } + binary.Write(&buf, binary.LittleEndian, &hdr) + prog = elf.Prog32{ + Type: 1, + Off: start, + Vaddr: start, + Paddr: start, + Filesz: uint32(size), + Memsz: uint32(size), + Flags: 5, + Align: start, + } + binary.Write(&buf, binary.LittleEndian, &prog) + binary.Write(&buf, binary.LittleEndian, §) // NULL section + sect = elf.Section32{ + Name: 1, + Type: uint32(elf.SHT_PROGBITS), + Addr: start, + Off: start, + Size: uint32(size), + Flags: uint32(elf.SHF_ALLOC | elf.SHF_EXECINSTR), + Addralign: 4, + } + binary.Write(&buf, binary.LittleEndian, §) // .text + sect = elf.Section32{ + Name: uint32(len("\x00.text\x00")), + Type: uint32(elf.SHT_STRTAB), + Addr: 0, + Off: uint32(off2 + (off3-off2)*3), + Size: uint32(len("\x00.text\x00.shstrtab\x00")), + Addralign: 1, + } + binary.Write(&buf, binary.LittleEndian, §) + buf.WriteString("\x00.text\x00.shstrtab\x00") + f.Write(buf.Bytes()) + return nil +} diff --git a/src/cmd/internal/rsc.io/arm/armasm/plan9x.go b/src/cmd/internal/rsc.io/arm/armasm/plan9x.go new file mode 100644 index 000000000..952c5190b --- /dev/null +++ b/src/cmd/internal/rsc.io/arm/armasm/plan9x.go @@ -0,0 +1,211 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package armasm + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "strings" +) + +// Plan9Syntax returns the Go assembler syntax for the instruction. +// The syntax was originally defined by Plan 9. +// The pc is the program counter of the instruction, used for expanding +// PC-relative addresses into absolute ones. +// The symname function queries the symbol table for the program +// being disassembled. Given a target address it returns the name and base +// address of the symbol containing the target, if any; otherwise it returns "", 0. +// The reader r should read from the text segment using text addresses +// as offsets; it is used to display pc-relative loads as constant loads. +func Plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string { + if symname == nil { + symname = func(uint64) (string, uint64) { return "", 0 } + } + + var args []string + for _, a := range inst.Args { + if a == nil { + break + } + args = append(args, plan9Arg(&inst, pc, symname, a)) + } + + op := inst.Op.String() + + switch inst.Op &^ 15 { + case LDR_EQ, LDRB_EQ, LDRH_EQ: + // Check for RET + reg, _ := inst.Args[0].(Reg) + mem, _ := inst.Args[1].(Mem) + if inst.Op&^15 == LDR_EQ && reg == R15 && mem.Base == SP && mem.Sign == 0 && mem.Mode == AddrPostIndex { + return fmt.Sprintf("RET%s #%d", op[3:], mem.Offset) + } + + // Check for PC-relative load. + if mem.Base == PC && mem.Sign == 0 && mem.Mode == AddrOffset && text != nil { + addr := uint32(pc) + 8 + uint32(mem.Offset) + buf := make([]byte, 4) + switch inst.Op &^ 15 { + case LDRB_EQ: + if _, err := text.ReadAt(buf[:1], int64(addr)); err != nil { + break + } + args[1] = fmt.Sprintf("$%#x", buf[0]) + + case LDRH_EQ: + if _, err := text.ReadAt(buf[:2], int64(addr)); err != nil { + break + } + args[1] = fmt.Sprintf("$%#x", binary.LittleEndian.Uint16(buf)) + + case LDR_EQ: + if _, err := text.ReadAt(buf, int64(addr)); err != nil { + break + } + x := binary.LittleEndian.Uint32(buf) + if s, base := symname(uint64(x)); s != "" && uint64(x) == base { + args[1] = fmt.Sprintf("$%s(SB)", s) + } else { + args[1] = fmt.Sprintf("$%#x", x) + } + } + } + } + + // Move addressing mode into opcode suffix. + suffix := "" + switch inst.Op &^ 15 { + case LDR_EQ, LDRB_EQ, LDRH_EQ, STR_EQ, STRB_EQ, STRH_EQ: + mem, _ := inst.Args[1].(Mem) + switch mem.Mode { + case AddrOffset, AddrLDM: + // no suffix + case AddrPreIndex, AddrLDM_WB: + suffix = ".W" + case AddrPostIndex: + suffix = ".P" + } + off := "" + if mem.Offset != 0 { + off = fmt.Sprintf("%#x", mem.Offset) + } + base := fmt.Sprintf("(R%d)", int(mem.Base)) + index := "" + if mem.Sign != 0 { + sign := "" + if mem.Sign < 0 { + sign = "" + } + shift := "" + if mem.Count != 0 { + shift = fmt.Sprintf("%s%d", plan9Shift[mem.Shift], mem.Count) + } + index = fmt.Sprintf("(%sR%d%s)", sign, int(mem.Index), shift) + } + args[1] = off + base + index + } + + // Reverse args, placing dest last. + for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 { + args[i], args[j] = args[j], args[i] + } + + switch inst.Op &^ 15 { + case MOV_EQ: + op = "MOVW" + op[3:] + + case LDR_EQ: + op = "MOVW" + op[3:] + suffix + case LDRB_EQ: + op = "MOVB" + op[4:] + suffix + case LDRH_EQ: + op = "MOVH" + op[4:] + suffix + + case STR_EQ: + op = "MOVW" + op[3:] + suffix + args[0], args[1] = args[1], args[0] + case STRB_EQ: + op = "MOVB" + op[4:] + suffix + args[0], args[1] = args[1], args[0] + case STRH_EQ: + op = "MOVH" + op[4:] + suffix + args[0], args[1] = args[1], args[0] + } + + if args != nil { + op += " " + strings.Join(args, ", ") + } + + return op +} + +// assembler syntax for the various shifts. +// @x> is a lie; the assembler uses @> 0 +// instead of @x> 1, but i wanted to be clear that it +// was a different operation (rotate right extended, not rotate right). +var plan9Shift = []string{"<<", ">>", "->", "@>", "@x>"} + +func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string { + switch a := arg.(type) { + case Endian: + + case Imm: + return fmt.Sprintf("$%d", int(a)) + + case Mem: + + case PCRel: + addr := uint32(pc) + 8 + uint32(a) + if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base { + return fmt.Sprintf("%s(SB)", s) + } + return fmt.Sprintf("%#x", addr) + + case Reg: + if a < 16 { + return fmt.Sprintf("R%d", int(a)) + } + + case RegList: + var buf bytes.Buffer + start := -2 + end := -2 + fmt.Fprintf(&buf, "[") + flush := func() { + if start >= 0 { + if buf.Len() > 1 { + fmt.Fprintf(&buf, ",") + } + if start == end { + fmt.Fprintf(&buf, "R%d", start) + } else { + fmt.Fprintf(&buf, "R%d-R%d", start, end) + } + } + } + for i := 0; i < 16; i++ { + if a&(1< ,,# cond:4|0|0|1|0|1|0|1|S|Rn:4|Rd:4|imm12:12 + {0x0fe00090, 0x00a00010, 4, ADC_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_R}}, // ADC{S} ,,, cond:4|0|0|0|0|1|0|1|S|Rn:4|Rd:4|Rs:4|0|type:2|1|Rm:4 + {0x0fe00010, 0x00a00000, 2, ADC_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_imm}}, // ADC{S} ,,{,} cond:4|0|0|0|0|1|0|1|S|Rn:4|Rd:4|imm5:5|type:2|0|Rm:4 + {0x0fe00000, 0x02800000, 2, ADD_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_const}}, // ADD{S} ,,# cond:4|0|0|1|0|1|0|0|S|Rn:4|Rd:4|imm12:12 + {0x0fe00090, 0x00800010, 4, ADD_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_R}}, // ADD{S} ,,, cond:4|0|0|0|0|1|0|0|S|Rn:4|Rd:4|Rs:4|0|type:2|1|Rm:4 + {0x0fe00010, 0x00800000, 2, ADD_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_imm}}, // ADD{S} ,,{,} cond:4|0|0|0|0|1|0|0|S|Rn:4|Rd:4|imm5:5|type:2|0|Rm:4 + {0x0fef0000, 0x028d0000, 2, ADD_EQ, 0x14011c04, instArgs{arg_R_12, arg_SP, arg_const}}, // ADD{S} ,SP,# cond:4|0|0|1|0|1|0|0|S|1|1|0|1|Rd:4|imm12:12 + {0x0fef0010, 0x008d0000, 2, ADD_EQ, 0x14011c04, instArgs{arg_R_12, arg_SP, arg_R_shift_imm}}, // ADD{S} ,SP,{,} cond:4|0|0|0|0|1|0|0|S|1|1|0|1|Rd:4|imm5:5|type:2|0|Rm:4 + {0x0fe00000, 0x02000000, 2, AND_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_const}}, // AND{S} ,,# cond:4|0|0|1|0|0|0|0|S|Rn:4|Rd:4|imm12:12 + {0x0fe00090, 0x00000010, 4, AND_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_R}}, // AND{S} ,,, cond:4|0|0|0|0|0|0|0|S|Rn:4|Rd:4|Rs:4|0|type:2|1|Rm:4 + {0x0fe00010, 0x00000000, 2, AND_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_imm}}, // AND{S} ,,{,} cond:4|0|0|0|0|0|0|0|S|Rn:4|Rd:4|imm5:5|type:2|0|Rm:4 + {0x0fef0070, 0x01a00040, 4, ASR_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_0, arg_imm5_32}}, // ASR{S} ,,# cond:4|0|0|0|1|1|0|1|S|0|0|0|0|Rd:4|imm5:5|1|0|0|Rm:4 + {0x0fef00f0, 0x01a00050, 4, ASR_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_0, arg_R_8}}, // ASR{S} ,, cond:4|0|0|0|1|1|0|1|S|0|0|0|0|Rd:4|Rm:4|0|1|0|1|Rn:4 + {0x0f000000, 0x0a000000, 4, B_EQ, 0x1c04, instArgs{arg_label24}}, // B cond:4|1|0|1|0|imm24:24 + {0x0fe0007f, 0x07c0001f, 4, BFC_EQ, 0x1c04, instArgs{arg_R_12, arg_imm5, arg_lsb_width}}, // BFC ,#,# cond:4|0|1|1|1|1|1|0|msb:5|Rd:4|lsb:5|0|0|1|1|1|1|1 + {0x0fe00070, 0x07c00010, 2, BFI_EQ, 0x1c04, instArgs{arg_R_12, arg_R_0, arg_imm5, arg_lsb_width}}, // BFI ,,#,# cond:4|0|1|1|1|1|1|0|msb:5|Rd:4|lsb:5|0|0|1|Rn:4 + {0x0fe00000, 0x03c00000, 2, BIC_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_const}}, // BIC{S} ,,# cond:4|0|0|1|1|1|1|0|S|Rn:4|Rd:4|imm12:12 + {0x0fe00090, 0x01c00010, 4, BIC_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_R}}, // BIC{S} ,,, cond:4|0|0|0|1|1|1|0|S|Rn:4|Rd:4|Rs:4|0|type:2|1|Rm:4 + {0x0fe00010, 0x01c00000, 2, BIC_EQ, 0x14011c04, instArgs{arg_R_12, arg_R_16, arg_R_shift_imm}}, // BIC{S} ,,{,} cond:4|0|0|0|1|1|1|0|S|Rn:4|Rd:4|imm5:5|type:2|0|Rm:4 + {0x0ff000f0, 0x01200070, 4, BKPT_EQ, 0x1c04, instArgs{arg_imm_12at8_4at0}}, // BKPT # cond:4|0|0|0|1|0|0|1|0|imm12:12|0|1|1|1|imm4:4 + {0x0f000000, 0x0b000000, 4, BL_EQ, 0x1c04, instArgs{arg_label24}}, // BL cond:4|1|0|1|1|imm24:24 + {0xfe000000, 0xfa000000, 4, BLX, 0x0, instArgs{arg_label24H}}, // BLX 1|1|1|1|1|0|1|H|imm24:24 + {0x0ffffff0, 0x012fff30, 4, BLX_EQ, 0x1c04, instArgs{arg_R_0}}, // BLX cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|1|1|Rm:4 + {0x0ff000f0, 0x012fff30, 3, BLX_EQ, 0x1c04, instArgs{arg_R_0}}, // BLX cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|1|1|Rm:4 + {0x0ffffff0, 0x012fff10, 4, BX_EQ, 0x1c04, instArgs{arg_R_0}}, // BX cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|0|1|Rm:4 + {0x0ff000f0, 0x012fff10, 3, BX_EQ, 0x1c04, instArgs{arg_R_0}}, // BX cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|0|1|Rm:4 + {0x0ffffff0, 0x012fff20, 4, BXJ_EQ, 0x1c04, instArgs{arg_R_0}}, // BXJ cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|1|0|Rm:4 + {0x0ff000f0, 0x012fff20, 3, BXJ_EQ, 0x1c04, instArgs{arg_R_0}}, // BXJ cond:4|0|0|0|1|0|0|1|0|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|0|0|1|0|Rm:4 + {0xffffffff, 0xf57ff01f, 4, CLREX, 0x0, instArgs{}}, // CLREX 1|1|1|1|0|1|0|1|0|1|1|1|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(0)|(0)|(0)|(0)|0|0|0|1|(1)|(1)|(1)|(1) + {0xfff000f0, 0xf57ff01f, 3, CLREX, 0x0, instArgs{}}, // CLREX 1|1|1|1|0|1|0|1|0|1|1|1|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(1)|(0)|(0)|(0)|(0)|0|0|0|1|(1)|(1)|(1)|(1) + {0x0fff0ff0, 0x016f0f10, 4, CLZ_EQ, 0x1c04, instArgs{arg_R_12, arg_R_0}}, // CLZ , cond:4|0|0|0|1|0|1|1|0|(1)|(1)|(1)|(1)|Rd:4|(1)|(1)|(1)|(1)|0|0|0|1|Rm:4 + {0x0ff000f0, 0x016f0f10, 3, CLZ_EQ, 0x1c04, instArgs{arg_R_12, arg_R_0}}, // CLZ , cond:4|0|0|0|1|0|1|1|0|(1)|(1)|(1)|(1)|Rd:4|(1)|(1)|(1)|(1)|0|0|0|1|Rm:4 + {0x0ff0f000, 0x03700000, 4, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_const}}, // CMN ,# cond:4|0|0|1|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|imm12:12 + {0x0ff00000, 0x03700000, 3, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_const}}, // CMN ,# cond:4|0|0|1|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|imm12:12 + {0x0ff0f090, 0x01700010, 4, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_R}}, // CMN ,, cond:4|0|0|0|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|Rs:4|0|type:2|1|Rm:4 + {0x0ff00090, 0x01700010, 3, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_R}}, // CMN ,, cond:4|0|0|0|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|Rs:4|0|type:2|1|Rm:4 + {0x0ff0f010, 0x01700000, 4, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_imm}}, // CMN ,{,} cond:4|0|0|0|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|imm5:5|type:2|0|Rm:4 + {0x0ff00010, 0x01700000, 3, CMN_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_imm}}, // CMN ,{,} cond:4|0|0|0|1|0|1|1|1|Rn:4|(0)|(0)|(0)|(0)|imm5:5|type:2|0|Rm:4 + {0x0ff0f000, 0x03500000, 4, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_const}}, // CMP ,# cond:4|0|0|1|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|imm12:12 + {0x0ff00000, 0x03500000, 3, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_const}}, // CMP ,# cond:4|0|0|1|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|imm12:12 + {0x0ff0f090, 0x01500010, 4, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_R}}, // CMP ,, cond:4|0|0|0|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|Rs:4|0|type:2|1|Rm:4 + {0x0ff00090, 0x01500010, 3, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_R}}, // CMP ,, cond:4|0|0|0|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|Rs:4|0|type:2|1|Rm:4 + {0x0ff0f010, 0x01500000, 4, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_imm}}, // CMP ,{,} cond:4|0|0|0|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|imm5:5|type:2|0|Rm:4 + {0x0ff00010, 0x01500000, 3, CMP_EQ, 0x1c04, instArgs{arg_R_16, arg_R_shift_imm}}, // CMP ,{,} cond:4|0|0|0|1|0|1|0|1|Rn:4|(0)|(0)|(0)|(0)|imm5:5|type:2|0|Rm:4 + {0x0ffffff0, 0x0320f0f0, 4, DBG_EQ, 0x1c04, instArgs{arg_option}}, // DBG #