blob: 4c32961cf40cb6ac8a78d9aa401cd62bde986a75 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction
// encoding details and output to tables.go.
//
// usage: go run spec.go LoongArch-Vol1-EN.pdf
//
// [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
package main
import (
"bytes"
"fmt"
"log"
"math"
"os"
"regexp"
"sort"
"strconv"
"strings"
"rsc.io/pdf"
)
func mergeMap(m1 map[string]string, m2 map[string]string) {
for k := range m2 {
m1[k] = m2[k]
}
}
func main() {
log.SetFlags(0)
log.SetPrefix("loong64spec: ")
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n")
os.Exit(2)
}
f, err := pdf.Open(os.Args[1])
if err != nil {
log.Fatal(err)
}
var prologue bytes.Buffer
prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))
var op_f bytes.Buffer
op_f.Write([]byte("const (\n\t_ Op = iota\n"))
var opstr_f bytes.Buffer
opstr_f.Write([]byte("var opstr = [...]string{\n"))
var instFormats_f bytes.Buffer
instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))
// Scan document looking for instructions.
n := f.NumPage()
var ops []string
opstrs := map[string]string{}
instFormatComments := map[string]string{}
instFormats := map[string]string{}
var fp int
for pageNum := 1; pageNum <= n; pageNum++ {
p := f.Page(pageNum)
if fp == 0 {
if !isFirstPage(p) {
continue
}
fp = pageNum
}
cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
ops = append(ops, cPageOps...)
mergeMap(opstrs, cPageOpstrs)
mergeMap(instFormatComments, cPageInstFormatComments)
mergeMap(instFormats, cPageInstFormats)
}
sort.Strings(ops)
for _, op := range ops {
// 1. write op
op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
// 2. write opstr
opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
// 3. write instFormat
instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
}
op_f.Write([]byte(")\n\n"))
opstr_f.Write([]byte("}\n\n"))
instFormats_f.Write([]byte("}\n"))
fileTables, err := os.Create("tables.go")
defer fileTables.Close()
fileTables.Write(prologue.Bytes())
fileTables.Write(op_f.Bytes())
fileTables.Write(opstr_f.Bytes())
fileTables.Write(instFormats_f.Bytes())
fileTables.Close()
}
func isFirstPage(page pdf.Page) bool {
content := page.Content()
appendixb := "AppendixB"
ct := ""
for _, t := range content.Text {
ct += t.S
if ct == "AppendixB" {
return true
}
if strings.HasPrefix(appendixb, ct) {
continue
} else {
return false
}
}
return false
}
func getArg(name string) (length int, argName string) {
switch {
case strings.Contains("arg_fd", name):
return 5, "arg_fd"
case strings.Contains("arg_fj", name):
return 5, "arg_fj"
case strings.Contains("arg_fk", name):
return 5, "arg_fk"
case strings.Contains("arg_fa", name):
return 5, "arg_fa"
case strings.Contains("arg_rd", name):
return 5, "arg_rd"
case strings.Contains("arg_rj", name) || name == "rj!=0,1":
return 5, "arg_rj"
case strings.Contains("arg_rk", name):
return 5, "arg_rk"
case name == "csr":
return 14, "arg_csr_23_10"
case strings.Contains("arg_cd", name):
return 5, "arg_cd"
case strings.Contains("arg_cj", name):
return 5, "arg_cj"
case strings.Contains("arg_ca", name):
return 5, "arg_ca"
case strings.Contains(name, "sa"):
length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
if length == 2 {
argName = "arg_sa2_16_15"
} else {
argName = "arg_sa3_17_15"
}
return length, argName
case strings.Contains("arg_seq_17_10", name):
return 8, "arg_seq_17_10"
case strings.Contains("arg_op_4_0", name):
return 5, "arg_op_4_0"
case strings.Contains(name, "ui"):
length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
if length == 5 {
argName = "arg_ui5_14_10"
} else if length == 6 {
argName = "arg_ui6_15_10"
} else {
argName = "arg_ui12_21_10"
}
return length, argName
case strings.Contains("arg_lsbw", name):
return 5, "arg_lsbw"
case strings.Contains("arg_msbw", name):
return 5, "arg_msbw"
case strings.Contains("arg_lsbd", name):
return 6, "arg_lsbd"
case strings.Contains("arg_msbd", name):
return 6, "arg_msbd"
case strings.Contains(name, "si"):
length, _ := strconv.Atoi(strings.Split(name, "si")[1])
if length == 12 {
argName = "arg_si12_21_10"
} else if length == 14 {
argName = "arg_si14_23_10"
} else if length == 16 {
argName = "arg_si16_25_10"
} else {
argName = "arg_si20_24_5"
}
return length, argName
case strings.Contains(name, "offs"):
splitName := strings.Split(name, ":")
left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
return left - right + 1, "offs"
default:
return 0, ""
}
}
func binstrToHex(str string) string {
rst := 0
hex := "0x"
charArray := []byte(str)
for i := 0; i < 32; {
rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
switch rst {
case 10:
hex = hex + "a"
case 11:
hex = hex + "b"
case 12:
hex = hex + "c"
case 13:
hex = hex + "d"
case 14:
hex = hex + "e"
case 15:
hex = hex + "f"
default:
hex += strconv.Itoa(rst)
}
i = i + 4
}
return hex
}
/*
Here we deal with the instruction FCMP.cond.S/D, which has the following format:
| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
|---------|---------|---------|-------|---|---|-------|
| op | cond | fk | fj | 0 | 0 | cd |
The `cond` field has these possible values:
"CAF": "00",
"CUN": "08",
"CEQ": "04",
"CUEQ": "0c",
"CLT": "02",
"CULT": "0a",
"CLE": "06",
"CULE": "0e",
"CNE": "10",
"COR": "14",
"CUNE": "18",
"SAF": "01",
"SUN": "09",
"SEQ": "05",
"SUEQ": "0d",
"SLT": "03",
"SULT": "0b",
"SLE": "07",
"SULE": "0f",
"SNE": "11",
"SOR": "15",
"SUNE": "19",
These values are the hexadecimal numbers of bits 19 to 15, the same as
described in the instruction set manual.
The following code defines a map, the values in it represent the hexadecimal
encoding of the cond field in the entire instruction. In this case, the upper
4 bits and the lowest 1 bit are encoded separately, so the encoding is
different from the encoding described above.
*/
func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
conds := map[string]string{
"CAF": "00",
"CUN": "40",
"CEQ": "20",
"CUEQ": "60",
"CLT": "10",
"CULT": "50",
"CLE": "30",
"CULE": "70",
"CNE": "80",
"COR": "a0",
"CUNE": "c0",
"SAF": "08",
"SUN": "48",
"SEQ": "28",
"SUEQ": "68",
"SLT": "18",
"SULT": "58",
"SLE": "38",
"SULE": "78",
"SNE": "88",
"SOR": "a8",
"SUNE": "c8",
}
fcmpConditions = make(map[string]map[string]string)
for k, v := range conds {
op := fmt.Sprintf("FCMP_%s_%s", k, ds)
opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
var instFormat string
if ds == "D" {
instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
} else {
instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
}
fcmpConditions[op] = make(map[string]string)
fcmpConditions[op]["op"] = op
fcmpConditions[op]["opstr"] = opstr
fcmpConditions[op]["instFormatComment"] = instFormatComment
fcmpConditions[op]["instFormat"] = instFormat
}
return
}
func findWords(chars []pdf.Text) (words []pdf.Text) {
for i := 0; i < len(chars); {
xRange := []float64{chars[i].X, chars[i].X}
j := i + 1
// Find all chars on one line.
for j < len(chars) && chars[j].Y == chars[i].Y {
xRange[1] = chars[j].X
j++
}
// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
// be contact to current word, because the next word's X should bigger than current one.
for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
j++
}
var end float64
// Split line into words (really, phrases).
for k := i; k < j; {
ck := &chars[k]
s := ck.S
end = ck.X + ck.W
charSpace := ck.FontSize / 6
wordSpace := ck.FontSize * 2 / 3
l := k + 1
for l < j {
// Grow word.
cl := &chars[l]
if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
s += cl.S
end = cl.X + cl.W
l++
continue
}
// Add space to phrase before next word.
if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
s += " " + cl.S
end = cl.X + cl.W
l++
continue
}
break
}
f := ck.Font
words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
k = l
}
i = j
}
return words
}
func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
opstrs = make(map[string]string)
instFormatComments = make(map[string]string)
instFormats = make(map[string]string)
content := p.Content()
var text []pdf.Text
for _, t := range content.Text {
text = append(text, t)
}
// table name(70), table header(64), page num(3)
if isFP {
text = text[134 : len(text)-3]
} else {
text = text[64 : len(text)-3]
}
text = findWords(text)
for i := 0; i < len(text); {
var fcmpConditions map[string]map[string]string
if strings.HasPrefix(text[i].S, "FCMP") {
fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])
for fc, inst := range fcmpConditions {
ops = append(ops, inst["op"])
opstrs[fc] = inst["opstr"]
instFormatComments[fc] = inst["instFormatComment"]
instFormats[fc] = inst["instFormat"]
}
t := i + 1
for ; text[t].Y == text[i].Y; t++ {
continue
}
i = t
continue
}
op := strings.Replace(text[i].S, ".", "_", -1)
opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
instFormatComment := ""
binValue := ""
binMask := ""
instArgs := ""
offs := false
var offArgs []string
j := i + 1
for ; j < len(text) && text[j].Y == text[i].Y; j++ {
// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
continue
}
if text[j].S == "0" || text[j].S == "1" {
binValue += text[j].S
binMask += "1"
} else {
argLen, argName := getArg(text[j].S)
// Get argument's length failed, compute it by other arguments.
if argLen == 0 {
left := 31 - len(binValue)
right := 0
l := j + 1
if l < len(text) && text[l].Y == text[j].Y {
for ; text[l].Y == text[j].Y; l++ {
if text[l].S == "0" || text[l].S == "1" {
right += 1
} else {
tArgLen, _ := getArg(text[l].S)
if tArgLen == 0 {
fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
}
right += tArgLen
}
}
}
argLen = left - right + 1
argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
}
for k := 0; k < argLen; k++ {
binValue += "0"
binMask += "0"
}
if argName != "offs" {
if instArgs != "" {
instArgs = ", " + instArgs
}
instArgs = argName + instArgs
} else {
offs = true
offArgs = append(offArgs, text[j].S)
}
}
}
// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
if offs && offArgs != nil {
var left int
var right int
if len(offArgs) == 1 {
left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
} else if len(offArgs) == 2 {
left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
}
if instArgs == "" {
instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
} else {
instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
}
}
ops = append(ops, op)
opstrs[op] = opstr
if instFormatComment == "" {
instFormatComment = "// " + text[i].S
} else if strings.HasPrefix(op, "AM") {
instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
}
instFormatComments[op] = instFormatComment
// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
if instArgs != "" {
args := strings.Split(instFormatComment, " ")[2:]
tInstArgs := strings.Split(instArgs, ", ")
newOrderedInstArgs := []string{}
for _, a := range args {
a = strings.Split(a, ",")[0]
for _, aa := range tInstArgs {
if strings.Contains(aa, a) {
newOrderedInstArgs = append(newOrderedInstArgs, aa)
break
} else if a == "rd" && aa == "arg_fd" {
newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
break
}
}
}
instArgs = strings.Join(newOrderedInstArgs, ", ")
}
if strings.HasPrefix(op, "AM") {
instArgs = "arg_rd, arg_rk, arg_rj"
}
instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
instFormats[op] = instFormat
i = j // next instruction
}
return
}