| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Support for testing against external disassembler program. |
| // Copied and simplified from ../../arm/armasm/ext_test.go. |
| |
| package arm64asm |
| |
| import ( |
| "bufio" |
| "bytes" |
| "encoding/hex" |
| "encoding/json" |
| "flag" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "log" |
| "math/rand" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "regexp" |
| "strconv" |
| "strings" |
| "testing" |
| "time" |
| ) |
| |
| var ( |
| dumpTest = flag.Bool("dump", false, "dump all encodings") |
| mismatch = flag.Bool("mismatch", false, "log allowed mismatches") |
| longTest = flag.Bool("long", false, "long test") |
| keep = flag.Bool("keep", false, "keep object files around") |
| debug = false |
| ) |
| |
| // An ExtInst represents a single decoded instruction parsed |
| // from an external disassembler's output. |
| type ExtInst struct { |
| addr uint64 |
| enc [4]byte |
| nenc int |
| text string |
| } |
| |
| func (r ExtInst) String() string { |
| return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text) |
| } |
| |
| // An ExtDis is a connection between an external disassembler and a test. |
| type ExtDis struct { |
| Arch Mode |
| Dec chan ExtInst |
| File *os.File |
| Size int |
| KeepFile bool |
| Cmd *exec.Cmd |
| } |
| |
| // InstJson describes instruction fields value got from ARMv8-A Reference Manual |
| type InstJson struct { |
| Name string |
| Bits string |
| Arch string |
| Syntax string |
| Code string |
| Alias string |
| Enc uint32 |
| } |
| |
| // A Mode is an instruction execution mode. |
| type Mode int |
| |
| const ( |
| _ Mode = iota |
| ModeARM64 |
| ) |
| |
| // Run runs the given command - the external disassembler - and returns |
| // a buffered reader of its standard output. |
| func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) { |
| if *keep { |
| log.Printf("%s\n", strings.Join(cmd, " ")) |
| } |
| ext.Cmd = exec.Command(cmd[0], cmd[1:]...) |
| out, err := ext.Cmd.StdoutPipe() |
| if err != nil { |
| return nil, fmt.Errorf("stdoutpipe: %v", err) |
| } |
| if err := ext.Cmd.Start(); err != nil { |
| return nil, fmt.Errorf("exec: %v", err) |
| } |
| |
| b := bufio.NewReaderSize(out, 1<<20) |
| return b, nil |
| } |
| |
| // Wait waits for the command started with Run to exit. |
| func (ext *ExtDis) Wait() error { |
| return ext.Cmd.Wait() |
| } |
| |
| // testExtDis tests a set of byte sequences against an external disassembler. |
| // The disassembler is expected to produce the given syntax and run |
| // in the given architecture mode (16, 32, or 64-bit). |
| // The extdis function must start the external disassembler |
| // and then parse its output, sending the parsed instructions on ext.Dec. |
| // The generate function calls its argument f once for each byte sequence |
| // to be tested. The generate function itself will be called twice, and it must |
| // make the same sequence of calls to f each time. |
| // When a disassembly does not match the internal decoding, |
| // allowedMismatch determines whether this mismatch should be |
| // allowed, or else considered an error. |
| func testExtDis( |
| t *testing.T, |
| syntax string, |
| arch Mode, |
| extdis func(ext *ExtDis) error, |
| generate func(f func([]byte)), |
| allowedMismatch func(text string, inst *Inst, dec ExtInst) bool, |
| ) { |
| start := time.Now() |
| ext := &ExtDis{ |
| Dec: make(chan ExtInst), |
| Arch: arch, |
| } |
| errc := make(chan error) |
| |
| // First pass: write instructions to input file for external disassembler. |
| file, f, size, err := writeInst(generate) |
| if err != nil { |
| t.Fatal(err) |
| } |
| ext.Size = size |
| ext.File = f |
| defer func() { |
| f.Close() |
| if !*keep { |
| os.Remove(file) |
| } |
| }() |
| |
| // Second pass: compare disassembly against our decodings. |
| var ( |
| totalTests = 0 |
| totalSkips = 0 |
| totalErrors = 0 |
| |
| errors = make([]string, 0, 100) // Sampled errors, at most cap |
| ) |
| go func() { |
| errc <- extdis(ext) |
| }() |
| |
| generate(func(enc []byte) { |
| dec, ok := <-ext.Dec |
| if !ok { |
| t.Errorf("decoding stream ended early") |
| return |
| } |
| inst, text := disasm(syntax, pad(enc)) |
| |
| totalTests++ |
| if *dumpTest { |
| fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc) |
| } |
| if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" { |
| suffix := "" |
| if allowedMismatch(text, &inst, dec) { |
| totalSkips++ |
| if !*mismatch { |
| return |
| } |
| suffix += " (allowed mismatch)" |
| } |
| totalErrors++ |
| cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix) |
| |
| if len(errors) >= cap(errors) { |
| j := rand.Intn(totalErrors) |
| if j >= cap(errors) { |
| return |
| } |
| errors = append(errors[:j], errors[j+1:]...) |
| } |
| errors = append(errors, cmp) |
| } |
| }) |
| |
| if *mismatch { |
| totalErrors -= totalSkips |
| } |
| |
| for _, b := range errors { |
| t.Log(b) |
| } |
| |
| if totalErrors > 0 { |
| t.Fail() |
| } |
| t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds()) |
| t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage()) |
| if err := <-errc; err != nil { |
| t.Fatalf("external disassembler: %v", err) |
| } |
| |
| } |
| |
| // Start address of text. |
| const start = 0x8000 |
| |
| // writeInst writes the generated byte sequences to a new file |
| // starting at offset start. That file is intended to be the input to |
| // the external disassembler. |
| func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) { |
| f, err = ioutil.TempFile("", "arm64asm") |
| if err != nil { |
| return |
| } |
| |
| file = f.Name() |
| |
| f.Seek(start, io.SeekStart) |
| w := bufio.NewWriter(f) |
| defer w.Flush() |
| size = 0 |
| generate(func(x []byte) { |
| if debug { |
| fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):]) |
| } |
| w.Write(x) |
| w.Write(zeros[len(x):]) |
| size += len(zeros) |
| }) |
| return file, f, size, nil |
| } |
| |
| var zeros = []byte{0, 0, 0, 0} |
| |
| // pad pads the code sequence with pops. |
| func pad(enc []byte) []byte { |
| if len(enc) < 4 { |
| enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...) |
| } |
| return enc |
| } |
| |
| // disasm returns the decoded instruction and text |
| // for the given source bytes, using the given syntax and mode. |
| func disasm(syntax string, src []byte) (inst Inst, text string) { |
| var err error |
| inst, err = Decode(src) |
| if err != nil { |
| text = "error: " + err.Error() |
| return |
| } |
| text = inst.String() |
| switch syntax { |
| case "gnu": |
| text = GNUSyntax(inst) |
| case "plan9": // [sic] |
| text = GoSyntax(inst, 0, nil, nil) |
| default: |
| text = "error: unknown syntax " + syntax |
| } |
| return |
| } |
| |
| // decodecoverage returns a floating point number denoting the |
| // decoder coverage. |
| func decodeCoverage() float64 { |
| n := 0 |
| for _, t := range decoderCover { |
| if t { |
| n++ |
| } |
| } |
| return 100 * float64(1+n) / float64(1+len(decoderCover)) |
| } |
| |
| // Helpers for writing disassembler output parsers. |
| |
| // hasPrefix reports whether any of the space-separated words in the text s |
| // begins with any of the given prefixes. |
| func hasPrefix(s string, prefixes ...string) bool { |
| for _, prefix := range prefixes { |
| for cur_s := s; cur_s != ""; { |
| if strings.HasPrefix(cur_s, prefix) { |
| return true |
| } |
| i := strings.Index(cur_s, " ") |
| if i < 0 { |
| break |
| } |
| cur_s = cur_s[i+1:] |
| } |
| } |
| return false |
| } |
| |
| // isHex reports whether b is a hexadecimal character (0-9a-fA-F). |
| func isHex(b byte) bool { |
| return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F') |
| } |
| |
| // parseHex parses the hexadecimal byte dump in hex, |
| // appending the parsed bytes to raw and returning the updated slice. |
| // The returned bool reports whether any invalid hex was found. |
| // Spaces and tabs between bytes are okay but any other non-hex is not. |
| func parseHex(hex []byte, raw []byte) ([]byte, bool) { |
| hex = bytes.TrimSpace(hex) |
| for j := 0; j < len(hex); { |
| for hex[j] == ' ' || hex[j] == '\t' { |
| j++ |
| } |
| if j >= len(hex) { |
| break |
| } |
| if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) { |
| return nil, false |
| } |
| raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1])) |
| j += 2 |
| } |
| return raw, true |
| } |
| |
| func unhex(b byte) byte { |
| if '0' <= b && b <= '9' { |
| return b - '0' |
| } else if 'A' <= b && b <= 'F' { |
| return b - 'A' + 10 |
| } else if 'a' <= b && b <= 'f' { |
| return b - 'a' + 10 |
| } |
| return 0 |
| } |
| |
| // index is like bytes.Index(s, []byte(t)) but avoids the allocation. |
| func index(s []byte, t string) int { |
| i := 0 |
| for { |
| j := bytes.IndexByte(s[i:], t[0]) |
| if j < 0 { |
| return -1 |
| } |
| i = i + j |
| if i+len(t) > len(s) { |
| return -1 |
| } |
| for k := 1; k < len(t); k++ { |
| if s[i+k] != t[k] { |
| goto nomatch |
| } |
| } |
| return i |
| nomatch: |
| i++ |
| } |
| } |
| |
| // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s. |
| // If s must be rewritten, it is rewritten in place. |
| func fixSpace(s []byte) []byte { |
| s = bytes.TrimSpace(s) |
| for i := 0; i < len(s); i++ { |
| if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' { |
| goto Fix |
| } |
| } |
| return s |
| |
| Fix: |
| b := s |
| w := 0 |
| for i := 0; i < len(s); i++ { |
| c := s[i] |
| if c == '\t' || c == '\n' { |
| c = ' ' |
| } |
| if c == ' ' && w > 0 && b[w-1] == ' ' { |
| continue |
| } |
| b[w] = c |
| w++ |
| } |
| if w > 0 && b[w-1] == ' ' { |
| w-- |
| } |
| return b[:w] |
| } |
| |
| // Fllowing regular expressions matches instructions using relative addressing mode. |
| // pcrel matches B instructions and BL instructions. |
| // pcrelr matches instrucions which consisted of register arguments and label arguments. |
| // pcrelim matches instructions which consisted of register arguments, immediate |
| // arguments and lable arguments. |
| // pcrelrzr and prcelimzr matches instructions when register arguments is zero register. |
| // pcrelprfm matches PRFM instructions when arguments consisted of register and lable. |
| // pcrelprfmim matches PRFM instructions when arguments consisted of immediate and lable. |
| var ( |
| pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:\.)?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|nv)?) 0x([0-9a-f]+)$`) |
| pcrelr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w|s|d|q)(?:[0-9]+,)) 0x([0-9a-f]+)$`) |
| pcrelrzr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w)zr,) 0x([0-9a-f]+)$`) |
| pcrelim = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)(?:[0-9]+,) (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`) |
| pcrelimzr = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)zr, (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`) |
| pcrelprfm = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:[0-9a-z]+,)) 0x([0-9a-f]+)$`) |
| pcrelprfmim = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:#0x[0-9a-f]+,)) 0x([0-9a-f]+)$`) |
| ) |
| |
| // Round is the multiple of the number of instructions that read from Json file. |
| // Round used as seed value for pseudo-random number generator provides the same sequence |
| // in the same round run for the external disassembler and decoder. |
| var Round int |
| |
| // condmark is used to mark conditional instructions when need to generate and test |
| // conditional instructions. |
| var condmark bool = false |
| |
| // Generate instruction binary according to Json file |
| // Encode variable field of instruction with random value |
| func doFuzzy(inst *InstJson, Ninst int) { |
| var testdata uint32 |
| var NonDigRE = regexp.MustCompile(`[\D]`) |
| rand.Seed(int64(Round + Ninst)) |
| off := 0 |
| DigBit := "" |
| if condmark == true && !strings.Contains(inst.Bits, "cond") { |
| inst.Enc = 0xffffffff |
| } else { |
| for _, f := range strings.Split(inst.Bits, "|") { |
| if i := strings.Index(f, ":"); i >= 0 { |
| // consider f contains "01:2" and "Rm:5" |
| DigBit = f[:i] |
| m := NonDigRE.FindStringSubmatch(DigBit) |
| if m == nil { |
| DigBit = strings.TrimSpace(DigBit) |
| s := strings.Split(DigBit, "") |
| for i := 0; i < len(s); i++ { |
| switch s[i] { |
| case "1", "(1)": |
| testdata |= 1 << uint(31-off) |
| } |
| off++ |
| } |
| } else { |
| // DigBit is "Rn" or "imm3" |
| n, _ := strconv.Atoi(f[i+1:]) |
| if DigBit == "cond" && condmark == true { |
| r := uint8(Round) |
| for i := n - 1; i >= 0; i-- { |
| switch (r >> uint(i)) & 1 { |
| case 1: |
| testdata |= 1 << uint(31-off) |
| } |
| off++ |
| } |
| } else { |
| for i := 0; i < n; i++ { |
| r := rand.Intn(2) |
| switch r { |
| case 1: |
| testdata |= 1 << uint(31-off) |
| } |
| off++ |
| } |
| } |
| } |
| continue |
| } |
| for _, bit := range strings.Fields(f) { |
| switch bit { |
| case "0", "(0)": |
| off++ |
| continue |
| case "1", "(1)": |
| testdata |= 1 << uint(31-off) |
| default: |
| r := rand.Intn(2) |
| switch r { |
| case 1: |
| testdata |= 1 << uint(31-off) |
| } |
| } |
| off++ |
| } |
| } |
| if off != 32 { |
| log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off) |
| } |
| inst.Enc = testdata |
| } |
| } |
| |
| // Generators. |
| // |
| // The test cases are described as functions that invoke a callback repeatedly, |
| // with a new input sequence each time. These helpers make writing those |
| // a little easier. |
| |
| // JSONCases generates ARM64 instructions according to inst.json. |
| func JSONCases(t *testing.T) func(func([]byte)) { |
| return func(try func([]byte)) { |
| data, err := ioutil.ReadFile("inst.json") |
| if err != nil { |
| t.Fatal(err) |
| } |
| var insts []InstJson |
| var instsN []InstJson |
| // Change N value to get more cases only when condmark=false. |
| N := 100 |
| if condmark == true { |
| N = 16 |
| } |
| if err := json.Unmarshal(data, &insts); err != nil { |
| t.Fatal(err) |
| } |
| // Append instructions to get more test cases. |
| for i := 0; i < N; { |
| for _, inst := range insts { |
| instsN = append(instsN, inst) |
| } |
| i++ |
| } |
| Round = 0 |
| for i := range instsN { |
| if i%len(insts) == 0 { |
| Round++ |
| } |
| doFuzzy(&instsN[i], i) |
| } |
| for _, inst := range instsN { |
| if condmark == true && inst.Enc == 0xffffffff { |
| continue |
| } |
| enc := inst.Enc |
| try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)}) |
| } |
| } |
| } |
| |
| // condCases generates conditional instructions. |
| func condCases(t *testing.T) func(func([]byte)) { |
| return func(try func([]byte)) { |
| condmark = true |
| JSONCases(t)(func(enc []byte) { |
| try(enc) |
| }) |
| } |
| } |
| |
| // hexCases generates the cases written in hexadecimal in the encoded string. |
| // Spaces in 'encoded' separate entire test cases, not individual bytes. |
| func hexCases(t *testing.T, encoded string) func(func([]byte)) { |
| return func(try func([]byte)) { |
| for _, x := range strings.Fields(encoded) { |
| src, err := hex.DecodeString(x) |
| if err != nil { |
| t.Errorf("parsing %q: %v", x, err) |
| } |
| try(src) |
| } |
| } |
| } |
| |
| // testdataCases generates the test cases recorded in testdata/cases.txt. |
| // It only uses the inputs; it ignores the answers recorded in that file. |
| func testdataCases(t *testing.T, syntax string) func(func([]byte)) { |
| var codes [][]byte |
| input := filepath.Join("testdata", syntax+"cases.txt") |
| data, err := ioutil.ReadFile(input) |
| if err != nil { |
| t.Fatal(err) |
| } |
| for _, line := range strings.Split(string(data), "\n") { |
| line = strings.TrimSpace(line) |
| if line == "" || strings.HasPrefix(line, "#") { |
| continue |
| } |
| f := strings.Fields(line)[0] |
| i := strings.Index(f, "|") |
| if i < 0 { |
| t.Errorf("parsing %q: missing | separator", f) |
| continue |
| } |
| if i%2 != 0 { |
| t.Errorf("parsing %q: misaligned | separator", f) |
| } |
| code, err := hex.DecodeString(f[:i] + f[i+1:]) |
| if err != nil { |
| t.Errorf("parsing %q: %v", f, err) |
| continue |
| } |
| codes = append(codes, code) |
| } |
| |
| return func(try func([]byte)) { |
| for _, code := range codes { |
| try(code) |
| } |
| } |
| } |