blob: f432203e1501a9eb7a1842881e28c63fe308caf9 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Support for testing against external disassembler program.
// Copied and simplified from ../../arm/armasm/ext_test.go.
package arm64asm
import (
"bufio"
"bytes"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"testing"
"time"
)
var (
dumpTest = flag.Bool("dump", false, "dump all encodings")
mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
longTest = flag.Bool("long", false, "long test")
keep = flag.Bool("keep", false, "keep object files around")
debug = false
)
// An ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint64
enc [4]byte
nenc int
text string
}
func (r ExtInst) String() string {
return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
}
// An ExtDis is a connection between an external disassembler and a test.
type ExtDis struct {
Arch Mode
Dec chan ExtInst
File *os.File
Size int
KeepFile bool
Cmd *exec.Cmd
}
// InstJson describes instruction fields value got from ARMv8-A Reference Manual
type InstJson struct {
Name string
Bits string
Arch string
Syntax string
Code string
Alias string
Enc uint32
}
// A Mode is an instruction execution mode.
type Mode int
const (
_ Mode = iota
ModeARM64
)
// Run runs the given command - the external disassembler - and returns
// a buffered reader of its standard output.
func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
if *keep {
log.Printf("%s\n", strings.Join(cmd, " "))
}
ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
out, err := ext.Cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("stdoutpipe: %v", err)
}
if err := ext.Cmd.Start(); err != nil {
return nil, fmt.Errorf("exec: %v", err)
}
b := bufio.NewReaderSize(out, 1<<20)
return b, nil
}
// Wait waits for the command started with Run to exit.
func (ext *ExtDis) Wait() error {
return ext.Cmd.Wait()
}
// testExtDis tests a set of byte sequences against an external disassembler.
// The disassembler is expected to produce the given syntax and run
// in the given architecture mode (16, 32, or 64-bit).
// The extdis function must start the external disassembler
// and then parse its output, sending the parsed instructions on ext.Dec.
// The generate function calls its argument f once for each byte sequence
// to be tested. The generate function itself will be called twice, and it must
// make the same sequence of calls to f each time.
// When a disassembly does not match the internal decoding,
// allowedMismatch determines whether this mismatch should be
// allowed, or else considered an error.
func testExtDis(
t *testing.T,
syntax string,
arch Mode,
extdis func(ext *ExtDis) error,
generate func(f func([]byte)),
allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
) {
start := time.Now()
ext := &ExtDis{
Dec: make(chan ExtInst),
Arch: arch,
}
errc := make(chan error)
// First pass: write instructions to input file for external disassembler.
file, f, size, err := writeInst(generate)
if err != nil {
t.Fatal(err)
}
ext.Size = size
ext.File = f
defer func() {
f.Close()
if !*keep {
os.Remove(file)
}
}()
// Second pass: compare disassembly against our decodings.
var (
totalTests = 0
totalSkips = 0
totalErrors = 0
errors = make([]string, 0, 100) // Sampled errors, at most cap
)
go func() {
errc <- extdis(ext)
}()
generate(func(enc []byte) {
dec, ok := <-ext.Dec
if !ok {
t.Errorf("decoding stream ended early")
return
}
inst, text := disasm(syntax, pad(enc))
totalTests++
if *dumpTest {
fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
}
if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
suffix := ""
if allowedMismatch(text, &inst, dec) {
totalSkips++
if !*mismatch {
return
}
suffix += " (allowed mismatch)"
}
totalErrors++
cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
if len(errors) >= cap(errors) {
j := rand.Intn(totalErrors)
if j >= cap(errors) {
return
}
errors = append(errors[:j], errors[j+1:]...)
}
errors = append(errors, cmp)
}
})
if *mismatch {
totalErrors -= totalSkips
}
for _, b := range errors {
t.Log(b)
}
if totalErrors > 0 {
t.Fail()
}
t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
if err := <-errc; err != nil {
t.Fatalf("external disassembler: %v", err)
}
}
// Start address of text.
const start = 0x8000
// writeInst writes the generated byte sequences to a new file
// starting at offset start. That file is intended to be the input to
// the external disassembler.
func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
f, err = ioutil.TempFile("", "arm64asm")
if err != nil {
return
}
file = f.Name()
f.Seek(start, io.SeekStart)
w := bufio.NewWriter(f)
defer w.Flush()
size = 0
generate(func(x []byte) {
if debug {
fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
}
w.Write(x)
w.Write(zeros[len(x):])
size += len(zeros)
})
return file, f, size, nil
}
var zeros = []byte{0, 0, 0, 0}
// pad pads the code sequence with pops.
func pad(enc []byte) []byte {
if len(enc) < 4 {
enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
}
return enc
}
// disasm returns the decoded instruction and text
// for the given source bytes, using the given syntax and mode.
func disasm(syntax string, src []byte) (inst Inst, text string) {
var err error
inst, err = Decode(src)
if err != nil {
text = "error: " + err.Error()
return
}
text = inst.String()
switch syntax {
case "gnu":
text = GNUSyntax(inst)
case "plan9": // [sic]
text = GoSyntax(inst, 0, nil, nil)
default:
text = "error: unknown syntax " + syntax
}
return
}
// decodecoverage returns a floating point number denoting the
// decoder coverage.
func decodeCoverage() float64 {
n := 0
for _, t := range decoderCover {
if t {
n++
}
}
return 100 * float64(1+n) / float64(1+len(decoderCover))
}
// Helpers for writing disassembler output parsers.
// hasPrefix reports whether any of the space-separated words in the text s
// begins with any of the given prefixes.
func hasPrefix(s string, prefixes ...string) bool {
for _, prefix := range prefixes {
for cur_s := s; cur_s != ""; {
if strings.HasPrefix(cur_s, prefix) {
return true
}
i := strings.Index(cur_s, " ")
if i < 0 {
break
}
cur_s = cur_s[i+1:]
}
}
return false
}
// isHex reports whether b is a hexadecimal character (0-9a-fA-F).
func isHex(b byte) bool {
return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
}
// parseHex parses the hexadecimal byte dump in hex,
// appending the parsed bytes to raw and returning the updated slice.
// The returned bool reports whether any invalid hex was found.
// Spaces and tabs between bytes are okay but any other non-hex is not.
func parseHex(hex []byte, raw []byte) ([]byte, bool) {
hex = bytes.TrimSpace(hex)
for j := 0; j < len(hex); {
for hex[j] == ' ' || hex[j] == '\t' {
j++
}
if j >= len(hex) {
break
}
if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
return nil, false
}
raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1]))
j += 2
}
return raw, true
}
func unhex(b byte) byte {
if '0' <= b && b <= '9' {
return b - '0'
} else if 'A' <= b && b <= 'F' {
return b - 'A' + 10
} else if 'a' <= b && b <= 'f' {
return b - 'a' + 10
}
return 0
}
// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
func index(s []byte, t string) int {
i := 0
for {
j := bytes.IndexByte(s[i:], t[0])
if j < 0 {
return -1
}
i = i + j
if i+len(t) > len(s) {
return -1
}
for k := 1; k < len(t); k++ {
if s[i+k] != t[k] {
goto nomatch
}
}
return i
nomatch:
i++
}
}
// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
// If s must be rewritten, it is rewritten in place.
func fixSpace(s []byte) []byte {
s = bytes.TrimSpace(s)
for i := 0; i < len(s); i++ {
if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
goto Fix
}
}
return s
Fix:
b := s
w := 0
for i := 0; i < len(s); i++ {
c := s[i]
if c == '\t' || c == '\n' {
c = ' '
}
if c == ' ' && w > 0 && b[w-1] == ' ' {
continue
}
b[w] = c
w++
}
if w > 0 && b[w-1] == ' ' {
w--
}
return b[:w]
}
// Fllowing regular expressions matches instructions using relative addressing mode.
// pcrel matches B instructions and BL instructions.
// pcrelr matches instrucions which consisted of register arguments and label arguments.
// pcrelim matches instructions which consisted of register arguments, immediate
// arguments and lable arguments.
// pcrelrzr and prcelimzr matches instructions when register arguments is zero register.
// pcrelprfm matches PRFM instructions when arguments consisted of register and lable.
// pcrelprfmim matches PRFM instructions when arguments consisted of immediate and lable.
var (
pcrel = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:\.)?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|nv)?) 0x([0-9a-f]+)$`)
pcrelr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w|s|d|q)(?:[0-9]+,)) 0x([0-9a-f]+)$`)
pcrelrzr = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w)zr,) 0x([0-9a-f]+)$`)
pcrelim = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)(?:[0-9]+,) (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
pcrelimzr = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)zr, (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
pcrelprfm = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:[0-9a-z]+,)) 0x([0-9a-f]+)$`)
pcrelprfmim = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:#0x[0-9a-f]+,)) 0x([0-9a-f]+)$`)
)
// Round is the multiple of the number of instructions that read from Json file.
// Round used as seed value for pseudo-random number generator provides the same sequence
// in the same round run for the external disassembler and decoder.
var Round int
// condmark is used to mark conditional instructions when need to generate and test
// conditional instructions.
var condmark bool = false
// Generate instruction binary according to Json file
// Encode variable field of instruction with random value
func doFuzzy(inst *InstJson, Ninst int) {
var testdata uint32
var NonDigRE = regexp.MustCompile(`[\D]`)
rand.Seed(int64(Round + Ninst))
off := 0
DigBit := ""
if condmark == true && !strings.Contains(inst.Bits, "cond") {
inst.Enc = 0xffffffff
} else {
for _, f := range strings.Split(inst.Bits, "|") {
if i := strings.Index(f, ":"); i >= 0 {
// consider f contains "01:2" and "Rm:5"
DigBit = f[:i]
m := NonDigRE.FindStringSubmatch(DigBit)
if m == nil {
DigBit = strings.TrimSpace(DigBit)
s := strings.Split(DigBit, "")
for i := 0; i < len(s); i++ {
switch s[i] {
case "1", "(1)":
testdata |= 1 << uint(31-off)
}
off++
}
} else {
// DigBit is "Rn" or "imm3"
n, _ := strconv.Atoi(f[i+1:])
if DigBit == "cond" && condmark == true {
r := uint8(Round)
for i := n - 1; i >= 0; i-- {
switch (r >> uint(i)) & 1 {
case 1:
testdata |= 1 << uint(31-off)
}
off++
}
} else {
for i := 0; i < n; i++ {
r := rand.Intn(2)
switch r {
case 1:
testdata |= 1 << uint(31-off)
}
off++
}
}
}
continue
}
for _, bit := range strings.Fields(f) {
switch bit {
case "0", "(0)":
off++
continue
case "1", "(1)":
testdata |= 1 << uint(31-off)
default:
r := rand.Intn(2)
switch r {
case 1:
testdata |= 1 << uint(31-off)
}
}
off++
}
}
if off != 32 {
log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off)
}
inst.Enc = testdata
}
}
// Generators.
//
// The test cases are described as functions that invoke a callback repeatedly,
// with a new input sequence each time. These helpers make writing those
// a little easier.
// JSONCases generates ARM64 instructions according to inst.json.
func JSONCases(t *testing.T) func(func([]byte)) {
return func(try func([]byte)) {
data, err := ioutil.ReadFile("inst.json")
if err != nil {
t.Fatal(err)
}
var insts []InstJson
var instsN []InstJson
// Change N value to get more cases only when condmark=false.
N := 100
if condmark == true {
N = 16
}
if err := json.Unmarshal(data, &insts); err != nil {
t.Fatal(err)
}
// Append instructions to get more test cases.
for i := 0; i < N; {
for _, inst := range insts {
instsN = append(instsN, inst)
}
i++
}
Round = 0
for i := range instsN {
if i%len(insts) == 0 {
Round++
}
doFuzzy(&instsN[i], i)
}
for _, inst := range instsN {
if condmark == true && inst.Enc == 0xffffffff {
continue
}
enc := inst.Enc
try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
}
}
}
// condCases generates conditional instructions.
func condCases(t *testing.T) func(func([]byte)) {
return func(try func([]byte)) {
condmark = true
JSONCases(t)(func(enc []byte) {
try(enc)
})
}
}
// hexCases generates the cases written in hexadecimal in the encoded string.
// Spaces in 'encoded' separate entire test cases, not individual bytes.
func hexCases(t *testing.T, encoded string) func(func([]byte)) {
return func(try func([]byte)) {
for _, x := range strings.Fields(encoded) {
src, err := hex.DecodeString(x)
if err != nil {
t.Errorf("parsing %q: %v", x, err)
}
try(src)
}
}
}
// testdataCases generates the test cases recorded in testdata/cases.txt.
// It only uses the inputs; it ignores the answers recorded in that file.
func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
var codes [][]byte
input := filepath.Join("testdata", syntax+"cases.txt")
data, err := ioutil.ReadFile(input)
if err != nil {
t.Fatal(err)
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
f := strings.Fields(line)[0]
i := strings.Index(f, "|")
if i < 0 {
t.Errorf("parsing %q: missing | separator", f)
continue
}
if i%2 != 0 {
t.Errorf("parsing %q: misaligned | separator", f)
}
code, err := hex.DecodeString(f[:i] + f[i+1:])
if err != nil {
t.Errorf("parsing %q: %v", f, err)
continue
}
codes = append(codes, code)
}
return func(try func([]byte)) {
for _, code := range codes {
try(code)
}
}
}