blob: 905469bb8a6fd8703610a3e2a990759bf1011de3 [file] [edit]
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package xmlspec implements the parser of the A64 instruction set XML specification.
// It parses the XML files and returns a list of Instruction objects.
// The expected data is fetched from:
//
// https://developer.arm.com/-/cdn-downloads/permalink/Exploration-Tools-A64-ISA/ISA_A64/ISA_A64_xml_A_profile-2025-12.tar.gz
//
// Pass directory ISA_A64_xml_A_profile-2025-12 to ParseXMLFiles to get the instructions.
//
// Currently the parser only processes SVE and SVE2 instructions.
// Other instructions will still be unmarshalled but they won't have processing logics.
package xmlspec
import (
"encoding/xml"
"flag"
"fmt"
"io"
"log"
"os"
"path"
"regexp"
"sort"
"strconv"
"strings"
"sync"
)
var debug = flag.Int("debug", 0, "enable debug output")
var (
reZREG = regexp.MustCompile(`(^|[^/])<Z[A-Za-z1-9]+>`)
rePREG = regexp.MustCompile(`<P[A-Za-z1-9]+>`)
)
type fixedElemType int
const (
FixedArrangement fixedElemType = iota
FixedLSL
FixedSXTW
FixedUXTW
FixedModAmt
)
type fixedElemRule struct {
re *regexp.Regexp
t fixedElemType
val string
}
var fixedElemRules = []fixedElemRule{
{regexp.MustCompile(`\.B`), FixedArrangement, "B"},
{regexp.MustCompile(`\.H`), FixedArrangement, "H"},
{regexp.MustCompile(`\.S`), FixedArrangement, "S"},
{regexp.MustCompile(`\.D`), FixedArrangement, "D"},
{regexp.MustCompile(`\.Q`), FixedArrangement, "Q"},
{regexp.MustCompile(`\.N`), FixedArrangement, "N"},
{regexp.MustCompile(`LSL #1`), FixedLSL, "1"},
{regexp.MustCompile(`LSL #2`), FixedLSL, "2"},
{regexp.MustCompile(`LSL #3`), FixedLSL, "3"},
{regexp.MustCompile(`LSL #4`), FixedLSL, "4"},
{regexp.MustCompile(`SXTW`), FixedSXTW, "SXTW"},
{regexp.MustCompile(`UXTW`), FixedUXTW, "UXTW"},
// FixedModAmt rules are special, they requires a mapping of
// the preceding elem's symbol to be <mod>.
{regexp.MustCompile(`#1`), FixedModAmt, "1"},
{regexp.MustCompile(`#2`), FixedModAmt, "2"},
{regexp.MustCompile(`#3`), FixedModAmt, "3"},
{regexp.MustCompile(`#4`), FixedModAmt, "4"},
{regexp.MustCompile(`#8`), FixedModAmt, "8"},
}
type operandRule struct {
re *regexp.Regexp
class string
}
var operandRules = []operandRule{
// AC_ARNG: Registers with arrangement (e.g. .B, .D, .S) or type variable (<T>).
{regexp.MustCompile(`^<[PVZ][a-zA-Z]+>\.([1-9]*[BDHQS]|<T[a-z]*>)$`), "AC_ARNG"},
// AC_ZREG: Scalable vector registers (Z).
{regexp.MustCompile(`^<Z[a-z]+>$`), "AC_ZREG"},
// AC_PREG: Predicate registers (P).
{regexp.MustCompile(`^<P[a-z]{1}>$`), "AC_PREG"},
// AC_PREG: Predicate-as-counter registers (PN).
{regexp.MustCompile(`^<PN[a-z]{1}>$`), "AC_PREG"},
// AC_PREGZM: Predicate registers with merging predication (/M).
{regexp.MustCompile(`^<P[N]?[a-z]{1}>\/M$`), "AC_PREGZM"},
// AC_PREGZM: Predicate registers with zeroing predication (/Z).
{regexp.MustCompile(`^<P[N]?[a-z]{1}>\/(Z|<ZM>)$`), "AC_PREGZM"},
// AC_SPZGREG: Standard scalar registers (W, X, R).
{regexp.MustCompile(`^(<[WX][a-z]+>!?|<R><[a-z]+>|X[0-9]+|{<[WX][a-z]+>})$`), "AC_SPZGREG"},
// AC_SPZGREG: Scalar registers or stack pointer (SP).
{regexp.MustCompile(`^<([WX][a-z]{1}|R><n)\|[W]?SP>$`), "AC_SPZGREG"},
// AC_VREG: V registers (SIMD).
{regexp.MustCompile(`(^<Dd>|^<V>.*)$`), "AC_VREG"},
// AC_ARNGIDX: Register arrangement with index.
{regexp.MustCompile(`^<[VZ][a-zA-Z]*>\.([1-9]*[BDHQS]|<T[a-z]*>)\[(<(index|imm)[1-9]*>|[0-9]+)\]$`), "AC_ARNGIDX"},
{regexp.MustCompile(`^{[\s]+<[PVZ][a-z]+[1-4]*>\.[BDHQS],*[\s]*}\[<index>\]$`), "AC_ARNGIDX"},
// AC_PREGIDX: P Registers with immediate index.
{regexp.MustCompile(`^(<P[N]?[a-z]{1}>)\[<[a-z]+>\]$`), "AC_PREGIDX"},
// AC_ZREGIDX: Z Registers with (optional) immediate index.
{regexp.MustCompile(`^(<Z[a-z]{1}>)\{?\[<[a-z]+>\]\}?$`), "AC_ZREGIDX"},
// AC_REGLIST1: List of 1 register with arrangement.
{regexp.MustCompile(`^{[\s]+<[PVZ][a-z]+>\.([1-9]*[BDHQS]|<T[a-z]*>)[\s]+}$`), "AC_REGLIST1"},
// AC_REGLIST2: List of 2 registers with arrangement.
{regexp.MustCompile(`^{[\s]+(<[PVZ][a-z]+([1-2]|\+[1-2])*>\.([1-9]*[BDHQS]|<T[a-z]*>),*[\s]*){2}}$`), "AC_REGLIST2"},
// AC_REGLIST3: List of 3 registers with arrangement.
{regexp.MustCompile(`^{[\s]+(<[PVZ][a-z]+([1-3]|\+[1-3])*>\.([1-9]*[BDHQS]|<T[a-z]*>)(,|-)*[\s]*){3}}$`), "AC_REGLIST3"},
// AC_REGLIST4: List of 4 registers with arrangement.
{regexp.MustCompile(`^{[\s]+(<[PVZ][a-z]+([1-4]|\+[1-4])*>\.([1-9]*[BDHQS]|<T[a-z]*>),*[\s]*){4}}$`), "AC_REGLIST4"},
// AC_REGLIST_RANGE: List of registers in a range.
{regexp.MustCompile(`^{[\s]+(<[PVZ][a-z]+[1-2]*>\.([1-9]*[BDHQS]|<T[a-z]*>)-*[\s]*){2}}$`), "AC_REGLIST_RANGE"},
{regexp.MustCompile(`^{[\s]+(<[PVZ][a-z]+[14]>\.([BDHQS]|<T[a-z]*>)-*[\s]*){2}}$`), "AC_REGLIST_RANGE"}, // It's 4 registers, but in the mnemonic it's 2.
// AC_MEMOFF: Memory operand with immediate offset.
{regexp.MustCompile(`^\[<Xn\|SP>([\s]*\{,[\s]*#([0-9]+|<[a-z]+>)\})*\]$`), "AC_MEMOFF"},
{regexp.MustCompile(`^\[<Z[a-z]+>\.[BDHQS](\{,[\s]*#<[a-z]+>\})*\]$`), "AC_MEMOFF"},
// AC_MEMOFFMULVL: Memory operand with immediate offset that is multiplied by the vector's in-memory size.
{regexp.MustCompile(`^\[<Xn\|SP>[\s]*\{,[\s]*#<[a-z]+>,[\s]*MUL[\s]+VL[\s]*\}\]$`), "AC_MEMOFFMULVL"},
// AC_MEMEXT: Memory operand with register offset and optional extension (signed or unsigned) or shift (logical shift left).
{regexp.MustCompile(`^\[<Xn\|SP>,[\s]*(<X[a-z]+>|<Z[a-z]+>\.[BDHQS])\]$`), "AC_MEMEXT"},
{regexp.MustCompile(`^\[<Xn\|SP>\{?,[\s]*<X[a-z]+>\{?,[\s]*LSL[\s]+(<amount>|#[0-9]+)\}?\]$`), "AC_MEMEXT"},
{regexp.MustCompile(`^\[(<Xn\|SP>|<Z[a-z]+>\.[BDHQS])\{,[\s]*<X[a-z]+>\}\]$`), "AC_MEMEXT"},
{regexp.MustCompile(`^\[<Xn\|SP>,[\s]*<Z[a-z]+>\.[BDHQS],[\s]*(<mod>([\s]+#[0-9]+)*|LSL[\s]+#[0-9]+)\]$`), "AC_MEMEXT"},
{regexp.MustCompile(`^\[<Z[a-z]+>\.(<T>|[BDHQS]),[\s]*<Z[a-z]+>\.(<T>|[BDHQS])\{?,[\s]*(<mod>|SXTW\{?|UXTW\{?)[\s]*<amount>\}?\]$`), "AC_MEMEXT"},
// AC_SPECIAL: Prefetch operation.
{regexp.MustCompile(`^<prfop>$`), "AC_SPECIAL"},
// AC_SPECIAL: Vector length.
{regexp.MustCompile(`^<vl>$`), "AC_SPECIAL"},
// AC_REG_PATTERN: Register with rotate/replication pattern.
{regexp.MustCompile(`^<[WX][dn]+>(\{\s*,\s*<pattern>(\{\s*,\s*MUL\s+#<imm>\s*\})?\s*\})?$`), "AC_REG_PATTERN"},
// AC_ZREG_PATTERN: Z register with rotate/replication pattern.
{regexp.MustCompile(`^<Z[dn]+>\.(<T>|[BDHQS])(\{\s*,\s*<pattern>(\{\s*,\s*MUL\s+#<imm>\s*\})?\s*\})?$`), "AC_ZREG_PATTERN"},
// AC_PREGSEL: Predicate register with selector register and immediate index.
{regexp.MustCompile(`^<P[nm]>\.<T>\[\s*<Wv>\s*,\s*<imm>\s*\]$`), "AC_PREGSEL"},
// AC_PREG_PATTERN: Predicate register with pattern.
{regexp.MustCompile(`^<P[dn]>\.<T>(\{\s*,\s*<pattern>\s*\})?$`), "AC_PREG_PATTERN"},
// AC_IMM: Immediate value.
{regexp.MustCompile(`(^#.*)|(<const>)$`), "AC_IMM"},
}
// warmUpCache initializes the XML decoding cache for the Instruction type.
// This is necessary because encoding/xml uses reflect to build a cache of
// struct fields, and this process is not thread-safe if multiple goroutines
// attempt to unmarshal into the same type for the first time concurrently.
func warmUpCache() {
var inst InstructionParsed
// Unmarshal a more complete XML to warm up the cache for nested types.
// This ensures that reflection data for all referenced types is initialized
// sequentially before parallel workers start.
dummyXML := `
<instructionsection>
<docvars>
<docvar key="a" value="b"/>
</docvars>
<classes>
<iclass>
<encoding name="e">
<box hibit="31" width="1" name="n">
<c>1</c>
</box>
<asmtemplate>
<text>ADD</text>
<a link="s" hover="h">X0</a>
</asmtemplate>
</encoding>
</iclass>
</classes>
<explanations>
<explanation>
<symbol link="s">X0</symbol>
<account encodedin="e">
<intro>
<para>text</para>
</intro>
</account>
<definition encodedin="e">
<intro>text</intro>
<table>
<tgroup cols="1">
<thead>
<row>
<entry>Val</entry>
</row>
</thead>
<tbody>
<row>
<entry>1</entry>
</row>
</tbody>
</tgroup>
</table>
</definition>
</explanation>
</explanations>
</instructionsection>
`
_ = xml.Unmarshal([]byte(dummyXML), &inst)
}
func init() {
warmUpCache()
}
func ParseXMLFiles(dir string) []*InstructionParsed {
log.Println("Start parsing the xml files")
files, err := os.ReadDir(dir)
if err != nil {
log.Fatal(err)
}
var wg sync.WaitGroup
insts := make([]*InstructionParsed, len(files))
for i, file := range files {
fileName := file.Name()
if ext := path.Ext(fileName); ext != ".xml" {
continue
}
wg.Add(1)
fileName = path.Join(dir, fileName)
go func(name string, i int) {
defer wg.Done()
if inst := parse(name); inst != nil {
insts[i] = inst
}
}(fileName, i)
}
wg.Wait()
log.Println("Finish parsing the xml files")
return insts
}
// parse parses an xml file and returns the instruction.
func parse(f string) *InstructionParsed {
xmlFile, err := os.Open(f)
if err != nil {
log.Fatalf("Open file %s failed: %v\n", f, err)
}
defer xmlFile.Close()
byteValue, err := io.ReadAll(xmlFile)
if err != nil {
log.Fatalf("io.ReadAll %s failed: %v\n", f, err)
}
var inst = new(InstructionParsed)
inst.file = f
if err = xml.Unmarshal(byteValue, inst); err != nil {
// Ignore non-instruction files.
if strings.HasPrefix(err.Error(), "expected element type <instructionsection>") {
return nil
}
log.Fatalf("Unmarshal %s failed: %v\n", f, err)
}
if inst.Type != "instruction" && inst.Type != "alias" {
return nil
}
return inst
}
func (inst *InstructionParsed) setBinary(code, bitVal uint32, value string) uint32 {
switch value {
case "0", "(0)":
code &^= bitVal
case "1", "(1)":
code |= bitVal
case "x":
// unspecified bits, just ignore
default:
log.Fatalf("unexpected binary value %s in %s\n", value, inst.file)
}
return code
}
func (inst *InstructionParsed) setMask(code, bitVal uint32, value string) uint32 {
switch value {
// See the comment of [Regdiagram.mask]
case "0", "1", "(0)", "(1)":
code |= bitVal
case "x":
// unspecified bits, just ignore
default:
log.Fatalf("unexpected mask value %s in %s\n", value, inst.file)
}
return code
}
func (inst *InstructionParsed) boxEncoding(b Box, callBack func(uint32, uint32, string) uint32) uint32 {
code := uint32(0)
hi, err := strconv.Atoi(b.HiBit)
if err != nil {
log.Fatalf("convert HiBit to int failed, HiBit = %s in %s\n", b.HiBit, inst.file)
}
for _, c := range b.Cs {
if c.ColSpan != "" {
log.Fatalf("unexpected colspan in %s\n", inst.file)
}
code = callBack(code, uint32(1<<hi), c.Value)
hi--
}
return code
}
func (inst *InstructionParsed) supported() bool {
foundSVE := false
for _, doc := range inst.DocVars {
if doc.Key == "instr-class" {
if doc.Value == "sve" || doc.Value == "sve2" {
foundSVE = true
}
}
}
return foundSVE
}
// extractBinary extracts the known bits of instruction encoding in regdiagram,
// and assign the binary to inst.regdiagram.binary.
func (inst *InstructionParsed) extractBinary() {
if !inst.supported() {
return
}
for i := range inst.Classes.Iclass {
bin, mask := uint32(0), uint32(0)
regDiagram := &inst.Classes.Iclass[i].RegDiagram
for _, box := range regDiagram.Boxes {
if len(box.Cs) > 1 || (len(box.Cs) == 1 && box.Cs[0].ColSpan == "") {
// Fixed bits
bin |= inst.boxEncoding(box, inst.setBinary)
mask |= inst.boxEncoding(box, inst.setMask)
} else if len(box.Cs) == 1 && box.Cs[0].ColSpan != "" {
// Named variable bits
h, err := strconv.Atoi(box.HiBit)
h++ // Arm provided high bit to be inclusive, but we need to make it exclusive.
if err != nil {
log.Fatalf("convert HiBit to int failed, HiBit = %s in %s\n", box.HiBit, inst.file)
}
cs, err := strconv.Atoi(box.Cs[0].ColSpan)
if err != nil {
log.Fatalf("convert ColSpan to int failed, ColSpan = %s in %s\n", box.Cs[0].ColSpan, inst.file)
}
if box.Name == "" {
log.Fatalf("empty name in named box in %s\n", inst.file)
}
if regDiagram.varBin == nil {
regDiagram.varBin = make(map[string]bitRange)
}
if _, ok := regDiagram.varBin[box.Name]; ok {
log.Fatalf("duplicate name in named box in %s\n", inst.file)
}
regDiagram.varBin[box.Name] = bitRange{
hi: h,
lo: h - cs,
}
} else {
log.Fatalf("unrecognized box in %s\n", inst.file)
}
}
regDiagram.fixedBin = bin
regDiagram.mask = mask
regDiagram.Parsed = true
if inst.Title == "URSQRTE -- A64" || inst.Title == "URECPE -- A64" {
// Special case, its "size" box is actually not specified in the assembler symbol section.
// By reading the decoding ASL we know that this "size" box should be 0b10...
regDiagram.fixedBin |= uint32(1 << 23)
}
if len(inst.Classes.Iclass[i].PsSection) == 1 {
squashedPs := inst.Classes.Iclass[i].PsSection[0].Ps[0].PSText
if strings.Contains(strings.Join(squashedPs, "\n"), "if size IN {'0x'} then EndOfDecode") {
// Very ugly encoding specification in the decoding ASL. We have to set
// the high bit of the size box to 1.
// Example instruction is "Unsigned divide (predicated)":
// UDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
if _, ok := regDiagram.varBin["size"]; !ok {
log.Fatalf("size box not found in %s", inst.file)
}
if regDiagram.varBin["size"].hi != 24 || regDiagram.varBin["size"].lo != 22 {
log.Fatalf("unexpecetd size box in %s", inst.file)
}
regDiagram.fixedBin |= uint32(1 << 23)
}
}
}
}
// processEncoding handles each encoding element of a inst.
func (inst *InstructionParsed) processEncodings() {
if !inst.supported() {
return
}
for i := range inst.Classes.Iclass {
iclass := &inst.Classes.Iclass[i]
for j := 0; j < len(iclass.Encodings); j++ {
enc := &iclass.Encodings[j]
// Set instruction class.
if !enc.instClass() {
// Unsupported instruction class.
continue
}
// Set alias
enc.Alias = inst.Type == "alias"
// Refine the known bits and mask of the binary.
bin, mask := iclass.RegDiagram.fixedBin, iclass.RegDiagram.mask
for _, box := range enc.Boxes {
bin |= inst.boxEncoding(box, inst.setBinary)
mask |= inst.boxEncoding(box, inst.setMask)
}
enc.Binary = bin
enc.mask = mask
inst.parseOperands(enc)
inst.arm64Opcode(enc)
inst.goOpcode(enc)
inst.template(enc)
inst.operandsType(enc)
enc.sortOperands()
if inst.Title == "REVB, REVH, REVW -- A64" ||
inst.Title == "SXTB, SXTH, SXTW -- A64" ||
inst.Title == "UXTB, UXTH, UXTW -- A64" {
// Special case, its "size" box is not specified in the assembler symbol
// section for the [B] and [W] variants, which for [B] it's 0b00 (no-op)
// and for [W] it's 0b11.
imnemonic := enc.Operands[0].Name
if imnemonic[len(imnemonic)-1] == 'W' {
enc.Binary |= uint32(0b11 << 22)
}
}
enc.Parsed = true
}
}
}
func (inst *InstructionParsed) findExplanation(link string) *Explanation {
for _, exp := range inst.Explanations.Explanations {
if exp.Symbol.Link == link {
return &exp
}
}
return nil
}
func trimXMLEscape(s string) string {
return strings.ReplaceAll(strings.ReplaceAll(s, "&lt;", "<"), "&gt;", ">")
}
// key is the fixed symbol, value is its attached symbols (the currently parsed part of the operand)
// and the instruction that contains this fixed symbol.
// This data is just used for debugging.
var allFixedSymbolsLock sync.Mutex
var allFixedSymbols = map[string]map[string]*InstructionParsed{}
func (inst *InstructionParsed) parseOperands(enc *EncodingParsed) {
// This is the most vulnerable part.
//
// The mnemonic and operands of an instruction are sequentially recorded
// in TextA, and we need to parse them out. According to the following rules:
// 1. The mnemonic and the operand are separated by " ".
// 2. The operands are separated by ", ". Symbols without intervals belong to
// the same operand.
// 3, An operand may contain [] and {}, and the brackets contained in the
// operand must be in pairs. For example <R><m>{, <extend> {#<amount>}}
// is one operand, not two.
//
// After this step we'll get all operands of this instruction encoding, the
// operand interval symbol ", " will be discarded.
asm, oprAsm := "", ""
leftCurly, leftSquare := 0, 0
elems := []Element{}
recordFixedSymbol := func(symbol, val string) {
allFixedSymbolsLock.Lock()
if _, ok := allFixedSymbols[symbol]; !ok {
allFixedSymbols[symbol] = make(map[string]*InstructionParsed)
}
if _, ok := allFixedSymbols[symbol][val]; ok {
allFixedSymbolsLock.Unlock()
return
}
allFixedSymbols[symbol][val] = inst
allFixedSymbolsLock.Unlock()
}
for m, ta := range enc.AsmTemplate.TextA {
val := ta.Value
if link := ta.Link; link != "" { // An <a> element
// Check if it's a special operand with fixed candidates.
exp := inst.findExplanation(link)
if exp == nil {
log.Fatalf("explanation not found for link %s in %s\n", link, inst.file)
}
var explanation, encodedin string
if tblClass := exp.Definition.Table.Class; tblClass != "" {
switch tblClass {
case "valuetable":
heads := []string{}
for _, entry := range exp.Definition.Table.TGroup.THead.Row.Entries {
heads = append(heads, trimXMLEscape(entry.Value))
}
bodies := [][]string{}
for i, row := range exp.Definition.Table.TGroup.TBody.Row {
bodies = append(bodies, []string{})
for _, entry := range row.Entries {
bodies[i] = append(bodies[i], entry.Value)
}
}
explanation = strings.Join(heads, "\t")
for _, body := range bodies {
explanation += "\n" + strings.Join(body, "\t")
}
if explanation == "" {
log.Fatalf("explanation is empty in %s\n", inst.file)
}
explanation = trimXMLEscape(exp.Definition.Intro) + "\n" + explanation
encodedin = exp.Definition.Encodedin
if encodedin == "" {
log.Fatalf("definition.encodedin is empty in %s\n", inst.file)
}
default:
log.Fatalf("unknown table class %s in %s\n", tblClass, inst.file)
}
} else if exp.Account.Encodedin != "" {
explanation = trimXMLEscape(exp.Account.Intro)
if explanation == "" {
log.Fatalf("account.intro.para is empty in %s\n", inst.file)
}
encodedin = exp.Account.Encodedin
if encodedin == "" {
log.Fatalf("account.encodedin is empty in %s\n", inst.file)
}
}
val = trimXMLEscape(val)
elem := Element{encodedIn: encodedin, textExp: explanation, symbol: val}
// Some hardcoded logic to populate register type and the presence
// of <mod> for deduplication purposes
if strings.HasPrefix(val, "<X") {
elem.fixedScalarWidth = 64
recordFixedSymbol("X", val)
} else if strings.HasPrefix(val, "<W") {
elem.fixedScalarWidth = 32
recordFixedSymbol("W", val)
} else if val == "<mod>" {
recordFixedSymbol("mod", val)
elem.hasMod = true
} else if strings.HasPrefix(val, "<P") {
recordFixedSymbol("P", val)
elem.isP = true
} else if strings.HasPrefix(val, "<Z") {
recordFixedSymbol("Z", val)
elem.isZ = true
}
elems = append(elems, elem)
} else {
// It's a text section, we want to extract fixed symbols if any.
for _, rule := range fixedElemRules {
matchCnt := 0
if rule.re.MatchString(val) {
if len(elems) == 0 {
// These instructions are just named UXTW and SXTW
if rule.t == FixedUXTW && inst.Title == "UXTB, UXTH, UXTW -- A64" {
continue
}
if rule.t == FixedSXTW && inst.Title == "SXTB, SXTH, SXTW -- A64" {
continue
}
log.Fatalf("fixed arrangement symbol %s without preceding element in %s\n", val, inst.file)
}
if matchCnt != 0 {
log.Fatalf("fixed arrangement symbol %s with multiple match in %s\n", val, inst.file)
}
matchCnt++
lastElem := &elems[len(elems)-1]
switch rule.t {
case FixedArrangement:
lastElem.fixedArng = rule.val
case FixedLSL:
lastElem.fixedLSL = rule.val
case FixedSXTW:
lastElem.fixedSXTW = true
case FixedUXTW:
lastElem.fixedUXTW = true
case FixedModAmt:
if lastElem.symbol == "<mod>" {
lastElem.fixedModAmt = rule.val
}
}
// Also book keep the fixed symbol in the global map.
recordFixedSymbol(rule.val, fmt.Sprintf("%s in %s", lastElem.symbol, oprAsm+val))
}
}
}
asm += val
appendOperand := func() {
elemsCopy := make([]Element, len(elems))
copy(elemsCopy, elems)
opr := Operand{Name: oprAsm, Elems: elemsCopy}
enc.Operands = append(enc.Operands, opr)
oprAsm = ""
elems = elems[:0]
}
// Parse operands
for n := 0; n < len(val); n++ {
ch := val[n]
switch ch {
case ',':
if leftCurly == 0 && leftSquare == 0 {
// This "," is an interval.
continue
}
case ' ':
if leftCurly == 0 && leftSquare == 0 {
if oprAsm == "" {
// Consecutive space separators.
continue
}
// This first one is mnemonic, followed by operands.
appendOperand()
continue
}
case '{':
leftCurly++
case '[':
leftSquare++
case '}':
leftCurly--
case ']':
leftSquare--
}
oprAsm += string(ch)
}
// The last operand.
if m == len(enc.AsmTemplate.TextA)-1 && leftCurly == 0 && leftSquare == 0 && oprAsm != "" {
appendOperand()
}
}
if oprAsm != "" || len(elems) != 0 {
log.Fatalf("malformed Asmtemplate, oprAsm: %v, elems: %v in %s\n", oprAsm, elems, inst.file)
}
enc.Asm = asm
}
// template resets the arm64 assembly template of an encoding, to make it cleaner.
func (inst *InstructionParsed) template(enc *EncodingParsed) {
asm := enc.Operands[0].Name
if len(enc.Operands) > 1 { // Has operands
asm += " "
i := 1
for ; i < len(enc.Operands)-1; i++ {
asm += enc.Operands[i].Name + ", "
}
asm += enc.Operands[i].Name
}
enc.Asm = asm
}
// arm64Opcode sets the arm64 opcode of an encoding.
func (inst *InstructionParsed) arm64Opcode(enc *EncodingParsed) {
if len(enc.Operands) == 0 {
log.Fatalf("Miss mnemonic: %v in %s\n", enc, inst.file)
}
// Add a prefix "A64", to differ with the "A" prefix of Go opcode.
enc.arm64Op = "A64" + enc.Operands[0].Name
}
func (enc *EncodingParsed) classString() string {
val := ""
for _, d := range enc.DocVars {
if d.Key == "instr-class" {
val = d.Value
break
}
}
return val
}
func (enc *EncodingParsed) instClass() bool {
val := enc.classString()
switch val {
case "sve":
enc.class = C_SVE
case "sve2":
enc.class = C_SVE2
default:
return false
}
return true
}
func (enc *EncodingParsed) hasZREG() bool {
// Special case: <Pg>/<ZM>, <ZM> is not Z register.
return reZREG.MatchString(enc.Asm)
}
func (enc *EncodingParsed) hasPREG() bool {
return rePREG.MatchString(enc.Asm)
}
func (enc *EncodingParsed) goOpcodePrefix(inst *InstructionParsed) string {
if enc.prefix != "" {
return enc.prefix
}
prefix := ""
switch enc.class {
case C_SVE, C_SVE2:
if enc.hasZREG() {
prefix = "Z"
} else if enc.hasPREG() {
prefix = "P"
}
default:
log.Fatalf("unknown instruction class %v in %s\n", enc.class, inst.file)
}
return prefix
}
// goOpcode determines the Go opcode representation of an encoding.
func (inst *InstructionParsed) goOpcode(enc *EncodingParsed) {
if len(enc.Operands) == 0 {
log.Fatalf("Missing mnemonic: %v in %s\n", enc, inst.file)
}
if enc.GoOp != "" {
return
}
prefix, opcode := "A", ""
prefix += enc.goOpcodePrefix(inst)
opcode = enc.Operands[0].Name
enc.GoOp = prefix + opcode
enc.prefix = prefix
}
// sortOperands reorders the operands of an encoding according to Go assembly syntax.
func (enc *EncodingParsed) sortOperands() {
// Reverse args, placing dest last.
for i, j := 1, len(enc.Operands)-1; i < j; i, j = i+1, j-1 {
enc.Operands[i], enc.Operands[j] = enc.Operands[j], enc.Operands[i]
}
}
func (inst *InstructionParsed) operandType(opr Operand) string {
if opr.Typ != "" {
return opr.Typ
}
name := opr.Name
for i := 0; i < len(operandRules); i++ {
if operandRules[i].re.MatchString(name) {
return operandRules[i].class
}
}
inst.ParseError = fmt.Sprintf("unrecognized operand type: %s in %s\n", name, inst.file)
return "AC_NONE"
}
// operandsType classifies all operands of an encoding.
func (inst *InstructionParsed) operandsType(enc *EncodingParsed) {
for i := 1; i < len(enc.Operands); i++ {
enc.Operands[i].Typ = inst.operandType(enc.Operands[i])
}
}
func ProcessXMLFiles(insts []*InstructionParsed) {
var wg sync.WaitGroup
sort.Slice(insts, func(i, j int) bool {
if insts[i] == nil {
return false
}
if insts[j] == nil {
return true
}
return insts[i].Title < insts[j].Title
})
for i, inst := range insts {
if inst == nil {
insts = insts[:i]
break
}
}
for _, inst := range insts {
wg.Add(1)
go func(inst *InstructionParsed) {
defer wg.Done()
inst.extractBinary()
inst.processEncodings()
}(inst)
}
wg.Wait()
validate(insts)
debugInfo(*debug)
}
// The operand constraints, the value is an example instruction.
var allOpConstraints = map[string]*InstructionParsed{}
// The encoding function descriptions with their references to named bit ranges expanded.
// The value is an example instruction.
var AllEncodingDescs = map[string]*InstructionParsed{}
// The mapping from encoding function description to encoded-in.
var EncodingDescsToEncodedIn = map[string]string{}
var concatedRangeRe = regexp.MustCompile(`\((.*?) :: (.*?)(?: :: (.*?))?\)`)
var rangeIndexRe = regexp.MustCompile(`(.*?)\[(\d+)\]`)
func (inst *InstructionParsed) expandNamedBitRanges(elm *Element, varBin map[string]bitRange) string {
ranges := map[string]string{}
textExp := elm.textExp
br, ok := varBin[elm.encodedIn]
if !ok {
if matches := concatedRangeRe.FindStringSubmatch(elm.encodedIn); len(matches) > 1 {
for _, key := range matches[1:] {
br, ok2 := varBin[key]
if ok2 {
ranges[key] = fmt.Sprintf("[%d:%d)", br.lo, br.hi)
}
ok = true
}
} else if matches := rangeIndexRe.FindStringSubmatch(elm.encodedIn); len(matches) > 2 {
key := matches[1]
idx := matches[2]
idxI, err := strconv.Atoi(idx)
if err != nil {
log.Fatalf("invalid index: %s in %s, available: %v in %s\n", idx, elm.encodedIn, varBin, inst.file)
}
br, ok2 := varBin[key]
if ok2 {
ok = true
ranges[key] = fmt.Sprintf("[%d:%d)", br.lo+idxI, br.lo+idxI+1)
}
}
} else {
ranges[elm.encodedIn] = fmt.Sprintf("[%d:%d)", br.lo, br.hi)
}
if !ok {
if inst.Title == "SDOT (4-way, vectors) -- A64" || inst.Title == "UDOT (4-way, vectors) -- A64" {
// Known inconsistencies, the box contains a fixed bit, and the parsing logic missed it.
ranges["size"] = "[22:23)"
ok = true
} else {
log.Printf("unknown bit range: %s in %s, available: %v in %s\n", elm.encodedIn, elm.textExp, varBin, inst.file)
}
}
textExp += "\nbit range mappings:\n"
// Sort keys to ensure deterministic order
keys := make([]string, 0, len(ranges))
for k := range ranges {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
textExp += fmt.Sprintf("%s: %s\n", k, ranges[k])
}
return textExp
}
// validate does the following:
// 1. checks if all instruction encodings are unique with regard to this tuple:
//
// (assembly mnemonic, [operand info])
//
// Note: variable arrangements are not checked, as before we reason about the encoding
// semantics we cannot fully deduplicate them, e.g.:
//
// SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
// SDOT <Zda>.H, <Zn>.B, <Zm>.B
// SDOT <Zda>.S, <Zn>.H, <Zm>.H
//
// <T> and <Tb> are specified in the encoding text, that there is a constraint "T = 4*Tb".
// We don't know this fact by looking at the <asmtemplate> solely, without this information
// the first encoding domain entails the rest 2.
// We defer this deduplication to the assembler.
//
// 2. populates the constraints field of each operand.
// 3. bookkeep the encoding function descriptions and operand constraints.
func validate(insts []*InstructionParsed) {
allEncodings := map[string][]string{}
for _, inst := range insts {
for i, iclass := range inst.Classes.Iclass {
for j, encoding := range iclass.Encodings {
if encoding.Parsed == false {
continue
}
key := encoding.arm64Op
for k, operand := range encoding.Operands {
key += " " + operand.Typ
for l, elem := range operand.Elems {
constraints := []string{fmt.Sprintf("COP_%s__%d_", operand.Typ, l)}
if elem.fixedArng != "" {
key += "_(Arng:" + elem.fixedArng + ")"
constraints = append(constraints, "ARNG"+elem.fixedArng)
}
if elem.fixedModAmt != "" {
key += "_(ModAmt:" + elem.fixedModAmt + ")"
constraints = append(constraints, "MODAMT"+elem.fixedModAmt)
}
if elem.fixedScalarWidth != 0 {
key += fmt.Sprintf("_(ScalarWidth:%d)", elem.fixedScalarWidth)
constraints = append(constraints, fmt.Sprintf("R%d", elem.fixedScalarWidth))
}
if elem.fixedLSL != "" {
key += "_(LSL:" + elem.fixedLSL + ")"
constraints = append(constraints, "LSL"+elem.fixedLSL)
}
if elem.fixedSXTW {
key += "_(SXTW)"
constraints = append(constraints, "SXTW")
}
if elem.fixedUXTW {
key += "_(UXTW)"
constraints = append(constraints, "UXTW")
}
if elem.hasMod {
key += "_(mod)"
}
if elem.isP {
key += "_(P)"
}
if elem.isZ {
key += "_(Z)"
}
var cStr = "COP_NONE"
if len(constraints) > 1 {
cStr = strings.Join(constraints, "_")
allOpConstraints[cStr] = inst
}
inst.Classes.Iclass[i].Encodings[j].Operands[k].constraints = append(
inst.Classes.Iclass[i].Encodings[j].Operands[k].constraints, cStr)
textExpWithRanges := inst.expandNamedBitRanges(&elem, iclass.RegDiagram.varBin)
AllEncodingDescs[textExpWithRanges] = inst
if existing, ok := EncodingDescsToEncodedIn[textExpWithRanges]; ok && existing != elem.encodedIn {
log.Fatalf("duplicate encoding description for two different encoded-ins: %s for %s and %s in %s\n",
textExpWithRanges, existing, elem.encodedIn, inst.file)
}
EncodingDescsToEncodedIn[textExpWithRanges] = elem.encodedIn
inst.Classes.Iclass[i].Encodings[j].Operands[k].Elems[l].TextExpWithRanges = textExpWithRanges
}
inst.Classes.Iclass[i].Encodings[j].Operands[k].resolveConstraints()
}
allEncodings[key] = append(allEncodings[key], encoding.Asm)
}
}
}
keys := make([]string, 0, len(allEncodings))
for k := range allEncodings {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
v := allEncodings[k]
if len(v) > 1 {
if strings.HasPrefix(k, "A64MOV") {
// These currently are:
// MOV <Zd>.<T>, #<const> (mov_dupm_z_i.xml)
// MOV <Zd>.<T>, #<imm>{, <shift>} (mov_dup_z_i.xml)
// These 2 instructions actually overlaps their domain!
// Although <shift> is optional here, we can force the user
// to always specify <shift> to manually deduplicate.
// Otherwise the assembler will just panic.
continue
}
if strings.HasPrefix(k, "A64FMOV") || strings.HasPrefix(k, "A64COMPACT") {
// Their domains does not overlap, it's ok to ignore.
continue
}
if len(v) == 2 {
var z2Cnt, z4Cnt int
for _, s := range v {
if strings.Contains(s, "-<Zt2>") || strings.Contains(s, "-<Zdn2>") {
z2Cnt++
}
if strings.Contains(s, "-<Zt4>") || strings.Contains(s, "-<Zdn4>") {
z4Cnt++
}
}
// These are reglists of 2 or 4 registers, they will be deduplicated by the assembler
// at encoding stage.
if z2Cnt == 1 && z4Cnt == 1 {
continue
}
}
// If the diff is only by Pg/M or Pg/Z, it's ok to ignore, they are handled by the assembler.
if len(v) == 2 {
var hasPgM, hasPgZ bool
for _, s := range v {
if strings.Contains(s, "/M") {
hasPgM = true
}
if strings.Contains(s, "/Z") {
hasPgZ = true
}
}
if hasPgM && hasPgZ {
continue
}
}
sort.Strings(v)
log.Printf("%s:\n\t%v\n", k, strings.Join(v, "\n\t"))
}
}
}
// expectedElemCount is the expected number of elements for each
// operand class (AClass in the assembler).
// The comments on the map elems are the GNU mnemonic forms;
// the arrow-bracket enclosed parts are elements.
var expectedElemCount = map[string]int{
// <reg>.<T>
"AC_ARNG": 2,
"AC_PREG": 2,
"AC_PREGZM": 2,
"AC_ZREG": 2,
// <reg>.<T>[<index>]
"AC_ARNGIDX": 3,
"AC_ZREGIDX": 3,
"AC_PREGIDX": 3,
// #<imm>, <shift>
"AC_IMM": 1,
// [<reg1>.<T1>, <reg2>.<T2>, <mod> <amount>]
"AC_MEMEXT": 6,
// [<reg>.<T>, #<imm>]
"AC_MEMOFF": 3,
// [<xn|sp>{, #<imm>, MUL VL}]
// xn implies the constraint that it's an X reg, so one additional encoding func to check.
"AC_MEMOFFMULVL": 3,
// <preg>.<T>[<selreg>, <imm>]
// selreg must be a W reg, so one additional encoding func to check.
"AC_PREGSEL": 5,
// <width><reg>
"AC_SPZGREG": 2,
"AC_VREG": 2,
// {<reg>.<T>}
"AC_REGLIST1": 2,
// {<reg1>.<T1>, <reg2>.<T2>}
"AC_REGLIST2": 4,
// {<reg1>.<T1>, <reg2>.<T2>, <reg3>.<T3>}
"AC_REGLIST3": 6,
// {<reg1>.<T1>, <reg2>.<T2>, <reg3>.<T3>, <reg4>.<T4>}
"AC_REGLIST4": 8,
// {<reg1>.<T1>-<reg2>.<T2>}
"AC_REGLIST_RANGE": 4,
// <vl> or <prfop>
"AC_SPECIAL": 1,
// {<Reg>.<T>, <pattern>, MUL #<imm>}
"AC_PREG_PATTERN": 4,
"AC_REG_PATTERN": 4,
"AC_ZREG_PATTERN": 4,
}
// unresolvedConstraints stores the constraints that are not resolved.
var unresolvedConstraints = map[string]struct{}{}
var noOpCheck = "No-op check, returns true"
// resolveConstraints resolves the constraints for the given operand,
// It understands the logic and expands the constraints to encoding functions
// with proper comments. Resolving constraints is done by replacing the
// constrained element with more elements in place, and adding the constraints
// to the new element's encoding function.
// This function also checks that the operand has the expected number of elements
// after resolving the constraints.
func (op *Operand) resolveConstraints() {
// insertElmAt takes idx as the index in the old op.Elems slice.
// we need to keep track of the index shifts brought by prior insertions.
insertionHistory := make([]int, expectedElemCount[op.Typ])
insertElmAt := func(idx int, symbol, textExpWithRanges string, needOffset bool) {
if needOffset {
offset := 0
for i := range idx + 1 {
offset += insertionHistory[i]
}
insertionHistory[idx]++
idx += offset
}
op.Elems = append(op.Elems[:idx], append([]Element{
{
encodedIn: "nil",
TextExpWithRanges: textExpWithRanges,
symbol: symbol,
},
}, op.Elems[idx:]...)...)
AllEncodingDescs[textExpWithRanges] = nil
}
// Constraint format: COP_<AClass>__<index>_(_<constraintTypes>)*
// <AClass> is the operand class, e.g. AC_SPZGREG, AC_IMM, etc.
// <index> is the index of the constrained element in the operand, e.g. 0, 1, 2, etc.
// <constraintTypes> is the type of the constraint, e.g. ARNG, MODAMT, etc.
for _, constraint := range op.constraints {
constraint = strings.TrimPrefix(constraint, "COP_")
parts := strings.Split(constraint, "__")
if len(parts) != 3 {
if constraint != "NONE" {
log.Printf("Invalid constraint format: %s", constraint)
}
continue
}
acl := parts[0]
index, err := strconv.Atoi(parts[1])
if err != nil {
log.Printf("Invalid constraint format: %s", constraint)
}
constraintTypes := strings.Split(parts[2], "_")
for _, constraintType := range constraintTypes {
switch constraintType {
case "ARNGB":
insertElmAt(index+1, "B", "Check this is a B arrangement", true)
case "ARNGD":
insertElmAt(index+1, "D", "Check this is a D arrangement", true)
case "ARNGH":
insertElmAt(index+1, "H", "Check this is a H arrangement", true)
case "ARNGQ":
insertElmAt(index+1, "Q", "Check this is a Q arrangement", true)
case "ARNGS":
insertElmAt(index+1, "S", "Check this is a S arrangement", true)
case "R64":
if acl == "AC_SPZGREG" {
// Width constraints are preceeding the element.
insertElmAt(index, "X", "Check this is a 64-bit scalar register", true)
} else {
insertElmAt(index+1, "nil", noOpCheck, true)
}
case "R32":
if acl == "AC_SPZGREG" {
// Width constraints are preceeding the element.
insertElmAt(index, "W", "Check this is a 32-bit scalar register", true)
} else {
insertElmAt(index+1, "nil", noOpCheck, true)
}
case "LSL1", "LSL2", "LSL3", "LSL4", "SXTW", "UXTW", "MODAMT1", "MODAMT2", "MODAMT3":
if acl == "AC_MEMEXT" {
switch constraintType {
case "LSL1", "LSL2", "LSL3", "LSL4":
insertElmAt(index+1, "LSL", "Check this is mod and is LSL", true)
case "UXTW":
insertElmAt(index+1, "UXTW", "Check this is mod and is UXTW", true)
case "SXTW":
insertElmAt(index+1, "SXTW", "Check this is mod and is SXTW", true)
}
switch constraintType {
case "LSL1", "MODAMT1":
insertElmAt(index+1, "#1", "Check this is mod amount and is 1", true)
case "LSL2", "MODAMT2":
insertElmAt(index+1, "#2", "Check this is mod amount and is 2", true)
case "LSL3", "MODAMT3":
insertElmAt(index+1, "#3", "Check this is mod amount and is 3", true)
case "LSL4":
insertElmAt(index+1, "#4", "Check this is mod amount and is 4", true)
}
} else {
log.Printf("Unknown constraint: %s", constraint)
}
default:
log.Printf("Unknown constraint: %s", constraint)
}
}
}
// Check the number of elements
if el := expectedElemCount[op.Typ]; len(op.Elems) != el {
resolved := false
switch op.Name {
case "#0.0":
if el == 1 && len(op.Elems) == 0 {
op.Elems = make([]Element, 0, 1)
insertElmAt(0, "#0.0", "Check this is immediate 0.0", false)
resolved = true
}
case "#<imm>{, <shift>}":
if el == 1 && len(op.Elems) == 2 {
// The 2 elements explanation need to be merged
insertElmAt(0, "#<imm>{, <shift>}", op.Elems[0].TextExpWithRanges+"\n"+op.Elems[1].TextExpWithRanges, false)
op.Elems = op.Elems[:1]
resolved = true
}
case "<Pd>", "<Pg>", "<Pn>", "<PNg>", "<Pt>", "<Pv>", "<Zd>", "<Zm>", "<Zn>", "<Zt>":
if el == 2 && len(op.Elems) == 1 {
insertElmAt(1, "nil", noOpCheck, false)
resolved = true
}
case "<Dd>":
if el == 2 && len(op.Elems) == 1 {
insertElmAt(0, "nil", "Check this SIMD vector register is of width 64-bit.", false)
resolved = true
}
case "<PNg>/Z", "<Pg>/Z":
if el == 2 && len(op.Elems) == 1 {
insertElmAt(1, "Z", "Check this is a zeroing predication", false)
resolved = true
}
case "<Pg>/M", "<Pv>/M":
if el == 2 && len(op.Elems) == 1 {
insertElmAt(1, "M", "Check this is a merging predication", false)
resolved = true
}
case "<PNn>[<imm>]":
if el == 3 && len(op.Elems) == 2 {
insertElmAt(1, "nil", noOpCheck, false)
resolved = true
}
case "<Pd>.<T>{, <pattern>}":
if el == 4 && len(op.Elems) == 3 {
insertElmAt(3, "nil", noOpCheck, false)
resolved = true
}
case "<Zd>{[<imm>]}", "<Zm>[<index>]", "<Zn>{[<imm>]}":
if el == 3 && len(op.Elems) == 2 {
insertElmAt(1, "nil", noOpCheck, false)
resolved = true
}
case "[<Xn|SP>, <Xm>]", "[<Xn|SP>, <Zm>.D]", "[<Xn|SP>{, <Xm>}]", "[<Zn>.D{, <Xm>}]", "[<Zn>.S{, <Xm>}]":
if el == 6 && len(op.Elems) == 4 {
insertElmAt(4, "nil", "Check that there is no modifier (UXTW, SXTW, LSL)", false)
insertElmAt(5, "nil", "Check that there is no modifier amount", false)
resolved = true
}
case "[<Xn|SP>, <Zm>.S, <mod>]", "[<Xn|SP>, <Zm>.D, <mod>]":
if el == 6 && len(op.Elems) == 5 {
insertElmAt(5, "nil", "Check that there is no modifier amount", false)
resolved = true
}
}
if !resolved {
unresolvedConstraints[fmt.Sprintf("Operand %s has %d elements, expected %d", op.Name, len(op.Elems), expectedElemCount[op.Typ])] = struct{}{}
}
}
}
var noOpWidthDescInterpreter = func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
for _, idx := range nonSVEIndices {
enc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
return []EncodingParsed{enc}
}
var widthDescInterpreters = map[string]func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed{
"No-op check, returns true": noOpWidthDescInterpreter,
`
Is a width specifier,
size <V>
00 B
01 H
10 S
11 D
bit range mappings:
size: [22:24)`: func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
encs := []EncodingParsed{}
for i, suffix := range []string{"B", "H", "S", "D"} {
newEnc := enc
// Hard code the width into the fixed binary.
newEnc.Binary |= uint32(i) << 22
newEnc.GoOp = enc.GoOp + suffix
// Nullify the width specifier element.
for _, idx := range nonSVEIndices {
newEnc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
encs = append(encs, newEnc)
}
return encs
},
`
Is a width specifier,
size <R>
00 W
01 W
10 W
11 X
bit range mappings:
size: [22:24)`: func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
encs := []EncodingParsed{}
// X doesn't need a suffix.
for i, suffix := range []string{"W", ""} {
newEnc := enc
// It looks like GNU assembler prefers 00 for W.
newEnc.Binary |= uint32(i*3) << 22
newEnc.GoOp = enc.GoOp + suffix
for _, idx := range nonSVEIndices {
newEnc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
encs = append(encs, newEnc)
}
return encs
},
`
Is a width specifier,
sz <R>
0 W
1 X
bit range mappings:
sz: [22:23)`: func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
encs := []EncodingParsed{}
for i, suffix := range []string{"W", ""} {
newEnc := enc
newEnc.Binary |= uint32(i) << 22
newEnc.GoOp = enc.GoOp + suffix
for _, idx := range nonSVEIndices {
newEnc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
encs = append(encs, newEnc)
}
return encs
},
`
Is a width specifier,
size <V>
00 RESERVED
01 H
10 S
11 D
bit range mappings:
size: [22:24)`: func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
encs := []EncodingParsed{}
for i, suffix := range []string{"H", "S", "D"} {
newEnc := enc
newEnc.Binary |= uint32(i+1) << 22
newEnc.GoOp = enc.GoOp + suffix
for _, idx := range nonSVEIndices {
newEnc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
encs = append(encs, newEnc)
}
return encs
},
"Check this SIMD vector register is of width 64-bit.": func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
enc.GoOp = enc.GoOp + "D"
for _, idx := range nonSVEIndices {
enc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
return []EncodingParsed{enc}
},
`
Is a width specifier,
sf <R>
0 W
1 X
bit range mappings:
sf: [12:13)`: func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
encs := []EncodingParsed{}
for i, suffix := range []string{"W", ""} {
newEnc := enc
newEnc.Binary |= uint32(i) << 12
newEnc.GoOp = enc.GoOp + suffix
for _, idx := range nonSVEIndices {
newEnc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
encs = append(encs, newEnc)
}
return encs
},
"Check this is a 64-bit scalar register": noOpWidthDescInterpreter,
"Check this is a 32-bit scalar register": func(enc EncodingParsed, nonSVEIndices []int) []EncodingParsed {
enc.GoOp = enc.GoOp + "W"
for _, idx := range nonSVEIndices {
enc.Operands[idx].Elems[0].TextExpWithRanges = noOpCheck
}
return []EncodingParsed{enc}
},
}
// SplitInstByRegWidth splits an instruction encoding into multiple instruction
// encodings based on the register width. This takes an instruction encoding that's
// already validated and returns a list of instruction encodings that are split by
// register width. The expected instruction encoding should only contain one elem of
// type AC_SPZGREG, AC_VREG, and the first element is a width specifier.
// The width specifier needs to be understood by this function.
func SplitInstByRegWidth(enc EncodingParsed) []EncodingParsed {
widthDesc := []string{}
nonSVEIndices := []int{}
for i, op := range enc.Operands {
if op.Typ == "AC_SPZGREG" || op.Typ == "AC_VREG" {
widthDesc = append(widthDesc, op.Elems[0].TextExpWithRanges)
nonSVEIndices = append(nonSVEIndices, i)
}
}
if len(widthDesc) == 0 {
return []EncodingParsed{enc}
}
if len(widthDesc) > 1 {
// Manually handle them...
switch enc.Name {
case "addpl_r_ri_", "addvl_r_ri_":
// They are fixed X registers, so OK to do nothing.
return []EncodingParsed{enc}
case "clasta_v_p_z_", "clasta_r_p_z_", "clastb_v_p_z_", "clastb_r_p_z_",
"ctermeq_rr_", "ctermne_rr_", "fadda_v_p_z_", "index_z_rr_":
// The 2 non-sve registers are the same width, so OK to slip it through to the interpreter.
case "sqdecp_r_p_r_sx", "sqincp_r_p_r_sx":
// These 2 instructions have 2 variants:
// e.g. for SQINCP:
// 1. SQINCP <Xdn>, <Pm>.<T>, <Wdn> (these are the ones we see in this case)
// 2. SQINCP <Xdn>, <Pm>.<T> (these has encoding name without _sx, and they slip through)
// The logic following will rename the first one as SQINCPW, the second one unchanged.
// So it's also safe to slip it through to the interpreter.
default:
if !strings.HasPrefix(enc.Name, "while") {
// The series of while instructions all have the same width for their 2 non-sve registers, so
// also safe to slip through.
log.Printf("Warning: instruction %s has %d width descriptors", enc.Name, len(widthDesc))
return []EncodingParsed{enc}
}
}
}
for knownWD := range widthDescInterpreters {
if strings.Join(strings.Fields(widthDesc[0]), " ") == strings.Join(strings.Fields(knownWD), " ") {
return widthDescInterpreters[knownWD](enc, nonSVEIndices)
}
}
log.Printf("Warning: unknown width descriptor: %s", widthDesc[0])
return []EncodingParsed{enc}
}
// debugInfo prints all fixed symbols, operand constraints and encoding function descriptions
// in deterministic order.
func debugInfo(debug int) {
log.Printf("len(allFixedSymbols) = %v\n", len(allFixedSymbols))
if debug > 0 {
keys := make([]string, 0, len(allFixedSymbols))
for k := range allFixedSymbols {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf("%s:\n", k)
for v2 := range allFixedSymbols[k] {
fmt.Printf("\t%s\n", v2)
if debug > 1 {
fmt.Printf("Example Inst at %s\n", allFixedSymbols[k][v2].file)
}
}
}
}
log.Printf("len(allOpConstraints) = %v\n", len(allOpConstraints))
if debug > 0 {
keys := make([]string, 0, len(allOpConstraints))
for k := range allOpConstraints {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf("%s\n", k)
if debug > 1 {
fmt.Printf("Example Inst at %s\n", allOpConstraints[k].file)
}
}
}
log.Printf("len(AllEncodingDescs) = %v\n", len(AllEncodingDescs))
if debug > 0 {
keys := make([]string, 0, len(AllEncodingDescs))
for k := range AllEncodingDescs {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf("%s\n", k)
if debug > 1 {
fmt.Printf("Example Inst at %s\n", AllEncodingDescs[k].file)
}
}
}
keys := make([]string, 0, len(unresolvedConstraints))
for k := range unresolvedConstraints {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
log.Printf("%s\n", k)
}
}