diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/cmd/internal/rsc.io/x86/x86asm/ext_test.go | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/cmd/internal/rsc.io/x86/x86asm/ext_test.go')
-rw-r--r-- | src/cmd/internal/rsc.io/x86/x86asm/ext_test.go | 811 |
1 files changed, 811 insertions, 0 deletions
diff --git a/src/cmd/internal/rsc.io/x86/x86asm/ext_test.go b/src/cmd/internal/rsc.io/x86/x86asm/ext_test.go new file mode 100644 index 000000000..f65d6b2d5 --- /dev/null +++ b/src/cmd/internal/rsc.io/x86/x86asm/ext_test.go @@ -0,0 +1,811 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Support for testing against external disassembler program. + +package x86asm + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io/ioutil" + "log" + "math/rand" + "os" + "os/exec" + "regexp" + "runtime" + "strings" + "testing" + "time" +) + +var ( + printTests = flag.Bool("printtests", false, "print test cases that exercise new code paths") + dumpTest = flag.Bool("dump", false, "dump all encodings") + mismatch = flag.Bool("mismatch", false, "log allowed mismatches") + longTest = flag.Bool("long", false, "long test") + keep = flag.Bool("keep", false, "keep object files around") + debug = false +) + +// A ExtInst represents a single decoded instruction parsed +// from an external disassembler's output. +type ExtInst struct { + addr uint32 + enc [32]byte + nenc int + text string +} + +func (r ExtInst) String() string { + return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) +} + +// An ExtDis is a connection between an external disassembler and a test. +type ExtDis struct { + Arch int + Dec chan ExtInst + File *os.File + Size int + KeepFile bool + Cmd *exec.Cmd +} + +// Run runs the given command - the external disassembler - and returns +// a buffered reader of its standard output. +func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { + if *keep { + log.Printf("%s\n", strings.Join(cmd, " ")) + } + ext.Cmd = exec.Command(cmd[0], cmd[1:]...) + out, err := ext.Cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdoutpipe: %v", err) + } + if err := ext.Cmd.Start(); err != nil { + return nil, fmt.Errorf("exec: %v", err) + } + + b := bufio.NewReaderSize(out, 1<<20) + return b, nil +} + +// Wait waits for the command started with Run to exit. +func (ext *ExtDis) Wait() error { + return ext.Cmd.Wait() +} + +// testExtDis tests a set of byte sequences against an external disassembler. +// The disassembler is expected to produce the given syntax and be run +// in the given architecture mode (16, 32, or 64-bit). +// The extdis function must start the external disassembler +// and then parse its output, sending the parsed instructions on ext.Dec. +// The generate function calls its argument f once for each byte sequence +// to be tested. The generate function itself will be called twice, and it must +// make the same sequence of calls to f each time. +// When a disassembly does not match the internal decoding, +// allowedMismatch determines whether this mismatch should be +// allowed, or else considered an error. +func testExtDis( + t *testing.T, + syntax string, + arch int, + extdis func(ext *ExtDis) error, + generate func(f func([]byte)), + allowedMismatch func(text string, size int, inst *Inst, dec ExtInst) bool, +) { + start := time.Now() + ext := &ExtDis{ + Dec: make(chan ExtInst), + Arch: arch, + } + errc := make(chan error) + + // First pass: write instructions to input file for external disassembler. + file, f, size, err := writeInst(generate) + if err != nil { + t.Fatal(err) + } + ext.Size = size + ext.File = f + defer func() { + f.Close() + if !*keep { + os.Remove(file) + } + }() + + // Second pass: compare disassembly against our decodings. + var ( + totalTests = 0 + totalSkips = 0 + totalErrors = 0 + + errors = make([]string, 0, 100) // sampled errors, at most cap + ) + go func() { + errc <- extdis(ext) + }() + generate(func(enc []byte) { + dec, ok := <-ext.Dec + if !ok { + t.Errorf("decoding stream ended early") + return + } + inst, text := disasm(syntax, arch, pad(enc)) + totalTests++ + if *dumpTest { + fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) + } + if text != dec.text || inst.Len != dec.nenc { + suffix := "" + if allowedMismatch(text, size, &inst, dec) { + totalSkips++ + if !*mismatch { + return + } + suffix += " (allowed mismatch)" + } + totalErrors++ + if len(errors) >= cap(errors) { + j := rand.Intn(totalErrors) + if j >= cap(errors) { + return + } + errors = append(errors[:j], errors[j+1:]...) + } + errors = append(errors, fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s", enc, text, inst.Len, dec.text, dec.nenc, suffix)) + } + }) + + if *mismatch { + totalErrors -= totalSkips + } + + for _, b := range errors { + t.Log(b) + } + + if totalErrors > 0 { + t.Fail() + } + t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) + + if err := <-errc; err != nil { + t.Fatal("external disassembler: %v", err) + } + +} + +const start = 0x8000 // start address of text + +// writeInst writes the generated byte sequences to a new file +// starting at offset start. That file is intended to be the input to +// the external disassembler. +func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { + f, err = ioutil.TempFile("", "x86map") + if err != nil { + return + } + + file = f.Name() + + f.Seek(start, 0) + w := bufio.NewWriter(f) + defer w.Flush() + size = 0 + generate(func(x []byte) { + if len(x) > 16 { + x = x[:16] + } + if debug { + fmt.Printf("%#x: %x%x\n", start+size, x, pops[len(x):]) + } + w.Write(x) + w.Write(pops[len(x):]) + size += len(pops) + }) + return file, f, size, nil +} + +// 0x5F is a single-byte pop instruction. +// We pad the bytes we want decoded with enough 0x5Fs +// that no matter what state the instruction stream is in +// after reading our bytes, the pops will get us back to +// a forced instruction boundary. +var pops = []byte{ + 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, + 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, + 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, + 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, 0x5f, +} + +// pad pads the code sequenc with pops. +func pad(enc []byte) []byte { + return append(enc[:len(enc):len(enc)], pops...) +} + +// disasm returns the decoded instruction and text +// for the given source bytes, using the given syntax and mode. +func disasm(syntax string, mode int, src []byte) (inst Inst, text string) { + // If printTests is set, we record the coverage value + // before and after, and we write out the inputs for which + // coverage went up, in the format expected in testdata/decode.text. + // This produces a fairly small set of test cases that exercise nearly + // all the code. + var cover float64 + if *printTests { + cover -= coverage() + } + + inst, err := decode1(src, mode, syntax == "gnu") + if err != nil { + text = "error: " + err.Error() + } else { + switch syntax { + case "gnu": + text = GNUSyntax(inst) + case "intel": + text = IntelSyntax(inst) + case "plan9": + text = Plan9Syntax(inst, 0, nil) + default: + text = "error: unknown syntax " + syntax + } + } + + if *printTests { + cover += coverage() + if cover > 0 { + max := len(src) + if max > 16 && inst.Len <= 16 { + max = 16 + } + fmt.Printf("%x|%x\t%d\t%s\t%s\n", src[:inst.Len], src[inst.Len:max], mode, syntax, text) + } + } + + return +} + +// coverage returns a floating point number denoting the +// test coverage until now. The number increases when new code paths are exercised, +// both in the Go program and in the decoder byte code. +func coverage() float64 { + /* + testing.Coverage is not in the main distribution. + The implementation, which must go in package testing, is: + + // Coverage reports the current code coverage as a fraction in the range [0, 1]. + func Coverage() float64 { + var n, d int64 + for _, counters := range cover.Counters { + for _, c := range counters { + if c > 0 { + n++ + } + d++ + } + } + if d == 0 { + return 0 + } + return float64(n) / float64(d) + } + */ + + var f float64 + // f += testing.Coverage() + f += decodeCoverage() + return f +} + +func decodeCoverage() float64 { + n := 0 + for _, t := range decoderCover { + if t { + n++ + } + } + return float64(1+n) / float64(1+len(decoderCover)) +} + +// Helpers for writing disassembler output parsers. + +// isPrefix reports whether text is the name of an instruction prefix. +func isPrefix(text string) bool { + return prefixByte[text] > 0 +} + +// prefixByte maps instruction prefix text to actual prefix byte values. +var prefixByte = map[string]byte{ + "es": 0x26, + "cs": 0x2e, + "ss": 0x36, + "ds": 0x3e, + "fs": 0x64, + "gs": 0x65, + "data16": 0x66, + "addr16": 0x67, + "lock": 0xf0, + "repn": 0xf2, + "repne": 0xf2, + "rep": 0xf3, + "repe": 0xf3, + "xacquire": 0xf2, + "xrelease": 0xf3, + "bnd": 0xf2, + "addr32": 0x66, + "data32": 0x67, +} + +// hasPrefix reports whether any of the space-separated words in the text s +// begins with any of the given prefixes. +func hasPrefix(s string, prefixes ...string) bool { + for _, prefix := range prefixes { + for s := s; s != ""; { + if strings.HasPrefix(s, prefix) { + return true + } + i := strings.Index(s, " ") + if i < 0 { + break + } + s = s[i+1:] + } + } + return false +} + +// contains reports whether the text s contains any of the given substrings. +func contains(s string, substrings ...string) bool { + for _, sub := range substrings { + if strings.Contains(s, sub) { + return true + } + } + return false +} + +// isHex reports whether b is a hexadecimal character (0-9A-Fa-f). +func isHex(b byte) bool { return b == '0' || unhex[b] > 0 } + +// parseHex parses the hexadecimal byte dump in hex, +// appending the parsed bytes to raw and returning the updated slice. +// The returned bool signals whether any invalid hex was found. +// Spaces and tabs between bytes are okay but any other non-hex is not. +func parseHex(hex []byte, raw []byte) ([]byte, bool) { + hex = trimSpace(hex) + for j := 0; j < len(hex); { + for hex[j] == ' ' || hex[j] == '\t' { + j++ + } + if j >= len(hex) { + break + } + if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { + return nil, false + } + raw = append(raw, unhex[hex[j]]<<4|unhex[hex[j+1]]) + j += 2 + } + return raw, true +} + +var unhex = [256]byte{ + '0': 0, + '1': 1, + '2': 2, + '3': 3, + '4': 4, + '5': 5, + '6': 6, + '7': 7, + '8': 8, + '9': 9, + 'A': 10, + 'B': 11, + 'C': 12, + 'D': 13, + 'E': 14, + 'F': 15, + 'a': 10, + 'b': 11, + 'c': 12, + 'd': 13, + 'e': 14, + 'f': 15, +} + +// index is like bytes.Index(s, []byte(t)) but avoids the allocation. +func index(s []byte, t string) int { + i := 0 + for { + j := bytes.IndexByte(s[i:], t[0]) + if j < 0 { + return -1 + } + i = i + j + if i+len(t) > len(s) { + return -1 + } + for k := 1; k < len(t); k++ { + if s[i+k] != t[k] { + goto nomatch + } + } + return i + nomatch: + i++ + } +} + +// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. +// If s must be rewritten, it is rewritten in place. +func fixSpace(s []byte) []byte { + s = trimSpace(s) + for i := 0; i < len(s); i++ { + if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { + goto Fix + } + } + return s + +Fix: + b := s + w := 0 + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\t' || c == '\n' { + c = ' ' + } + if c == ' ' && w > 0 && b[w-1] == ' ' { + continue + } + b[w] = c + w++ + } + if w > 0 && b[w-1] == ' ' { + w-- + } + return b[:w] +} + +// trimSpace trims leading and trailing space from s, returning a subslice of s. +func trimSpace(s []byte) []byte { + j := len(s) + for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') { + j-- + } + i := 0 + for i < j && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return s[i:j] +} + +// pcrel and pcrelw match instructions using relative addressing mode. +var ( + pcrel = regexp.MustCompile(`^((?:.* )?(?:j[a-z]+|call|ljmp|loopn?e?w?|xbegin)q?(?:,p[nt])?) 0x([0-9a-f]+)$`) + pcrelw = regexp.MustCompile(`^((?:.* )?(?:callw|jmpw|xbeginw|ljmpw)(?:,p[nt])?) 0x([0-9a-f]+)$`) +) + +// Generators. +// +// The test cases are described as functions that invoke a callback repeatedly, +// with a new input sequence each time. These helpers make writing those +// a little easier. + +// hexCases generates the cases written in hexadecimal in the encoded string. +// Spaces in 'encoded' separate entire test cases, not individual bytes. +func hexCases(t *testing.T, encoded string) func(func([]byte)) { + return func(try func([]byte)) { + for _, x := range strings.Fields(encoded) { + src, err := hex.DecodeString(x) + if err != nil { + t.Errorf("parsing %q: %v", x, err) + } + try(src) + } + } +} + +// testdataCases generates the test cases recorded in testdata/decode.txt. +// It only uses the inputs; it ignores the answers recorded in that file. +func testdataCases(t *testing.T) func(func([]byte)) { + var codes [][]byte + data, err := ioutil.ReadFile("testdata/decode.txt") + if err != nil { + t.Fatal(err) + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + f := strings.Fields(line)[0] + i := strings.Index(f, "|") + if i < 0 { + t.Errorf("parsing %q: missing | separator", f) + continue + } + if i%2 != 0 { + t.Errorf("parsing %q: misaligned | separator", f) + } + code, err := hex.DecodeString(f[:i] + f[i+1:]) + if err != nil { + t.Errorf("parsing %q: %v", f, err) + continue + } + codes = append(codes, code) + } + + return func(try func([]byte)) { + for _, code := range codes { + try(code) + } + } +} + +// manyPrefixes generates all possible 2⁹ combinations of nine chosen prefixes. +// The relative ordering of the prefixes within the combinations varies deterministically. +func manyPrefixes(try func([]byte)) { + var prefixBytes = []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36, 0x66, 0x67} + var enc []byte + for i := 0; i < 1<<uint(len(prefixBytes)); i++ { + enc = enc[:0] + for j, p := range prefixBytes { + if i&(1<<uint(j)) != 0 { + enc = append(enc, p) + } + } + if len(enc) > 0 { + k := i % len(enc) + enc[0], enc[k] = enc[k], enc[0] + } + try(enc) + } +} + +// basicPrefixes geneartes 8 different possible prefix cases: no prefix +// and then one each of seven different prefix bytes. +func basicPrefixes(try func([]byte)) { + try(nil) + for _, b := range []byte{0x66, 0x67, 0xF0, 0xF2, 0xF3, 0x3E, 0x36} { + try([]byte{b}) + } +} + +func rexPrefixes(try func([]byte)) { + try(nil) + for _, b := range []byte{0x40, 0x48, 0x43, 0x4C} { + try([]byte{b}) + } +} + +// concat takes two generators and returns a generator for the +// cross product of the two, concatenating the results from each. +func concat(gen1, gen2 func(func([]byte))) func(func([]byte)) { + return func(try func([]byte)) { + gen1(func(enc1 []byte) { + gen2(func(enc2 []byte) { + try(append(enc1[:len(enc1):len(enc1)], enc2...)) + }) + }) + } +} + +// concat3 takes three generators and returns a generator for the +// cross product of the three, concatenating the results from each. +func concat3(gen1, gen2, gen3 func(func([]byte))) func(func([]byte)) { + return func(try func([]byte)) { + gen1(func(enc1 []byte) { + gen2(func(enc2 []byte) { + gen3(func(enc3 []byte) { + try(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...)) + }) + }) + }) + } +} + +// concat4 takes four generators and returns a generator for the +// cross product of the four, concatenating the results from each. +func concat4(gen1, gen2, gen3, gen4 func(func([]byte))) func(func([]byte)) { + return func(try func([]byte)) { + gen1(func(enc1 []byte) { + gen2(func(enc2 []byte) { + gen3(func(enc3 []byte) { + gen4(func(enc4 []byte) { + try(append(append(append(enc1[:len(enc1):len(enc1)], enc2...), enc3...), enc4...)) + }) + }) + }) + }) + } +} + +// filter generates the sequences from gen that satisfy ok. +func filter(gen func(func([]byte)), ok func([]byte) bool) func(func([]byte)) { + return func(try func([]byte)) { + gen(func(enc []byte) { + if ok(enc) { + try(enc) + } + }) + } +} + +// enum8bit generates all possible 1-byte sequences, followed by distinctive padding. +func enum8bit(try func([]byte)) { + for i := 0; i < 1<<8; i++ { + try([]byte{byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) + } +} + +// enum8bit generates all possible 2-byte sequences, followed by distinctive padding. +func enum16bit(try func([]byte)) { + for i := 0; i < 1<<16; i++ { + try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) + } +} + +// enum24bit generates all possible 3-byte sequences, followed by distinctive padding. +func enum24bit(try func([]byte)) { + for i := 0; i < 1<<24; i++ { + try([]byte{byte(i), byte(i >> 8), byte(i >> 16), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) + } +} + +// enumModRM generates all possible modrm bytes and, for modrm values that indicate +// a following sib byte, all possible modrm, sib combinations. +func enumModRM(try func([]byte)) { + for i := 0; i < 256; i++ { + if (i>>3)&07 == 04 && i>>6 != 3 { // has sib + for j := 0; j < 256; j++ { + try([]byte{0, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings + try([]byte{1, byte(i), byte(j), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings + } + } else { + try([]byte{0, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // byte encodings + try([]byte{1, byte(i), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}) // word encodings + } + } +} + +// fixed generates the single case b. +// It's mainly useful to prepare an argument for concat or concat3. +func fixed(b ...byte) func(func([]byte)) { + return func(try func([]byte)) { + try(b) + } +} + +// testBasic runs the given test function with cases all using opcode as the initial opcode bytes. +// It runs three phases: +// +// First, zero-or-one prefixes followed by opcode followed by all possible 1-byte values. +// If in -short mode, that's all. +// +// Second, zero-or-one prefixes followed by opcode followed by all possible 2-byte values. +// If not in -long mode, that's all. This phase and the next run in parallel with other tests +// (using t.Parallel). +// +// Finally, opcode followed by all possible 3-byte values. The test can take a very long time +// and prints progress messages to package log. +func testBasic(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) { + testfn(t, concat3(basicPrefixes, fixed(opcode...), enum8bit)) + if testing.Short() { + return + } + + t.Parallel() + testfn(t, concat3(basicPrefixes, fixed(opcode...), enum16bit)) + if !*longTest { + return + } + + name := caller(2) + op1 := make([]byte, len(opcode)+1) + copy(op1, opcode) + for i := 0; i < 256; i++ { + log.Printf("%s 24-bit: %d/256\n", name, i) + op1[len(opcode)] = byte(i) + testfn(t, concat(fixed(op1...), enum16bit)) + } +} + +func testBasicREX(t *testing.T, testfn func(*testing.T, func(func([]byte))), opcode ...byte) { + testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum8bit), isValidREX)) + if testing.Short() { + return + } + + t.Parallel() + testfn(t, filter(concat4(basicPrefixes, rexPrefixes, fixed(opcode...), enum16bit), isValidREX)) + if !*longTest { + return + } + + name := caller(2) + op1 := make([]byte, len(opcode)+1) + copy(op1, opcode) + for i := 0; i < 256; i++ { + log.Printf("%s 24-bit: %d/256\n", name, i) + op1[len(opcode)] = byte(i) + testfn(t, filter(concat3(rexPrefixes, fixed(op1...), enum16bit), isValidREX)) + } +} + +// testPrefix runs the given test function for all many prefix possibilities +// followed by all possible 1-byte sequences. +// +// If in -long mode, it then runs a test of all the prefix possibilities followed +// by all possible 2-byte sequences. +func testPrefix(t *testing.T, testfn func(*testing.T, func(func([]byte)))) { + t.Parallel() + testfn(t, concat(manyPrefixes, enum8bit)) + if testing.Short() || !*longTest { + return + } + + name := caller(2) + for i := 0; i < 256; i++ { + log.Printf("%s 16-bit: %d/256\n", name, i) + testfn(t, concat3(manyPrefixes, fixed(byte(i)), enum8bit)) + } +} + +func testPrefixREX(t *testing.T, testfn func(*testing.T, func(func([]byte)))) { + t.Parallel() + testfn(t, filter(concat3(manyPrefixes, rexPrefixes, enum8bit), isValidREX)) + if testing.Short() || !*longTest { + return + } + + name := caller(2) + for i := 0; i < 256; i++ { + log.Printf("%s 16-bit: %d/256\n", name, i) + testfn(t, filter(concat4(manyPrefixes, rexPrefixes, fixed(byte(i)), enum8bit), isValidREX)) + } +} + +func caller(skip int) string { + pc, _, _, _ := runtime.Caller(skip) + f := runtime.FuncForPC(pc) + name := "?" + if f != nil { + name = f.Name() + if i := strings.LastIndex(name, "."); i >= 0 { + name = name[i+1:] + } + } + return name +} + +func isValidREX(x []byte) bool { + i := 0 + for i < len(x) && isPrefixByte(x[i]) { + i++ + } + if i < len(x) && Prefix(x[i]).IsREX() { + i++ + if i < len(x) { + return !isPrefixByte(x[i]) && !Prefix(x[i]).IsREX() + } + } + return true +} + +func isPrefixByte(b byte) bool { + switch b { + case 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0x66, 0x67, 0xF0, 0xF2, 0xF3: + return true + } + return false +} |