| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction |
| // encoding details and output to tables.go. |
| // |
| // usage: go run spec.go LoongArch-Vol1-EN.pdf |
| // |
| // [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf |
| |
| package main |
| |
| import ( |
| "bytes" |
| "fmt" |
| "log" |
| "math" |
| "os" |
| "regexp" |
| "sort" |
| "strconv" |
| "strings" |
| |
| "rsc.io/pdf" |
| ) |
| |
| func mergeMap(m1 map[string]string, m2 map[string]string) { |
| for k := range m2 { |
| m1[k] = m2[k] |
| } |
| } |
| |
| func main() { |
| log.SetFlags(0) |
| log.SetPrefix("loong64spec: ") |
| |
| if len(os.Args) != 2 { |
| fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n") |
| os.Exit(2) |
| } |
| f, err := pdf.Open(os.Args[1]) |
| if err != nil { |
| log.Fatal(err) |
| } |
| var prologue bytes.Buffer |
| prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n")) |
| |
| var op_f bytes.Buffer |
| op_f.Write([]byte("const (\n\t_ Op = iota\n")) |
| |
| var opstr_f bytes.Buffer |
| opstr_f.Write([]byte("var opstr = [...]string{\n")) |
| |
| var instFormats_f bytes.Buffer |
| instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n")) |
| |
| // Scan document looking for instructions. |
| n := f.NumPage() |
| var ops []string |
| opstrs := map[string]string{} |
| instFormatComments := map[string]string{} |
| instFormats := map[string]string{} |
| var fp int |
| for pageNum := 1; pageNum <= n; pageNum++ { |
| p := f.Page(pageNum) |
| if fp == 0 { |
| if !isFirstPage(p) { |
| continue |
| } |
| fp = pageNum |
| } |
| cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum) |
| ops = append(ops, cPageOps...) |
| mergeMap(opstrs, cPageOpstrs) |
| mergeMap(instFormatComments, cPageInstFormatComments) |
| mergeMap(instFormats, cPageInstFormats) |
| } |
| |
| sort.Strings(ops) |
| |
| for _, op := range ops { |
| // 1. write op |
| op_f.Write([]byte(fmt.Sprintf("\t%s\n", op))) |
| // 2. write opstr |
| opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op]))) |
| // 3. write instFormat |
| instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op]))) |
| } |
| |
| op_f.Write([]byte(")\n\n")) |
| opstr_f.Write([]byte("}\n\n")) |
| instFormats_f.Write([]byte("}\n")) |
| |
| fileTables, err := os.Create("tables.go") |
| defer fileTables.Close() |
| |
| fileTables.Write(prologue.Bytes()) |
| fileTables.Write(op_f.Bytes()) |
| fileTables.Write(opstr_f.Bytes()) |
| fileTables.Write(instFormats_f.Bytes()) |
| |
| fileTables.Close() |
| } |
| |
| func isFirstPage(page pdf.Page) bool { |
| content := page.Content() |
| appendixb := "AppendixB" |
| ct := "" |
| for _, t := range content.Text { |
| ct += t.S |
| if ct == "AppendixB" { |
| return true |
| } |
| if strings.HasPrefix(appendixb, ct) { |
| continue |
| } else { |
| return false |
| } |
| } |
| return false |
| } |
| |
| func getArg(name string) (length int, argName string) { |
| switch { |
| case strings.Contains("arg_fd", name): |
| return 5, "arg_fd" |
| case strings.Contains("arg_fj", name): |
| return 5, "arg_fj" |
| case strings.Contains("arg_fk", name): |
| return 5, "arg_fk" |
| case strings.Contains("arg_fa", name): |
| return 5, "arg_fa" |
| case strings.Contains("arg_rd", name): |
| return 5, "arg_rd" |
| case strings.Contains("arg_rj", name) || name == "rj!=0,1": |
| return 5, "arg_rj" |
| case strings.Contains("arg_rk", name): |
| return 5, "arg_rk" |
| case name == "csr": |
| return 14, "arg_csr_23_10" |
| case strings.Contains("arg_cd", name): |
| return 5, "arg_cd" |
| case strings.Contains("arg_cj", name): |
| return 5, "arg_cj" |
| case strings.Contains("arg_ca", name): |
| return 5, "arg_ca" |
| case strings.Contains(name, "sa"): |
| length, _ := strconv.Atoi(strings.Split(name, "sa")[1]) |
| if length == 2 { |
| argName = "arg_sa2_16_15" |
| } else { |
| argName = "arg_sa3_17_15" |
| } |
| return length, argName |
| case strings.Contains("arg_seq_17_10", name): |
| return 8, "arg_seq_17_10" |
| case strings.Contains("arg_op_4_0", name): |
| return 5, "arg_op_4_0" |
| case strings.Contains(name, "ui"): |
| length, _ := strconv.Atoi(strings.Split(name, "ui")[1]) |
| if length == 5 { |
| argName = "arg_ui5_14_10" |
| } else if length == 6 { |
| argName = "arg_ui6_15_10" |
| } else { |
| argName = "arg_ui12_21_10" |
| } |
| return length, argName |
| case strings.Contains("arg_lsbw", name): |
| return 5, "arg_lsbw" |
| case strings.Contains("arg_msbw", name): |
| return 5, "arg_msbw" |
| case strings.Contains("arg_lsbd", name): |
| return 6, "arg_lsbd" |
| case strings.Contains("arg_msbd", name): |
| return 6, "arg_msbd" |
| case strings.Contains(name, "si"): |
| length, _ := strconv.Atoi(strings.Split(name, "si")[1]) |
| if length == 12 { |
| argName = "arg_si12_21_10" |
| } else if length == 14 { |
| argName = "arg_si14_23_10" |
| } else if length == 16 { |
| argName = "arg_si16_25_10" |
| } else { |
| argName = "arg_si20_24_5" |
| } |
| return length, argName |
| case strings.Contains(name, "offs"): |
| splitName := strings.Split(name, ":") |
| left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1]) |
| right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0]) |
| return left - right + 1, "offs" |
| default: |
| return 0, "" |
| } |
| } |
| |
| func binstrToHex(str string) string { |
| rst := 0 |
| hex := "0x" |
| charArray := []byte(str) |
| for i := 0; i < 32; { |
| rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48) |
| switch rst { |
| case 10: |
| hex = hex + "a" |
| case 11: |
| hex = hex + "b" |
| case 12: |
| hex = hex + "c" |
| case 13: |
| hex = hex + "d" |
| case 14: |
| hex = hex + "e" |
| case 15: |
| hex = hex + "f" |
| default: |
| hex += strconv.Itoa(rst) |
| } |
| |
| i = i + 4 |
| } |
| return hex |
| } |
| |
| /* |
| Here we deal with the instruction FCMP.cond.S/D, which has the following format: |
| |
| | 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 | |
| |---------|---------|---------|-------|---|---|-------| |
| | op | cond | fk | fj | 0 | 0 | cd | |
| |
| The `cond` field has these possible values: |
| |
| "CAF": "00", |
| "CUN": "08", |
| "CEQ": "04", |
| "CUEQ": "0c", |
| "CLT": "02", |
| "CULT": "0a", |
| "CLE": "06", |
| "CULE": "0e", |
| "CNE": "10", |
| "COR": "14", |
| "CUNE": "18", |
| "SAF": "01", |
| "SUN": "09", |
| "SEQ": "05", |
| "SUEQ": "0d", |
| "SLT": "03", |
| "SULT": "0b", |
| "SLE": "07", |
| "SULE": "0f", |
| "SNE": "11", |
| "SOR": "15", |
| "SUNE": "19", |
| |
| These values are the hexadecimal numbers of bits 19 to 15, the same as |
| described in the instruction set manual. |
| |
| The following code defines a map, the values in it represent the hexadecimal |
| encoding of the cond field in the entire instruction. In this case, the upper |
| 4 bits and the lowest 1 bit are encoded separately, so the encoding is |
| different from the encoding described above. |
| */ |
| func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) { |
| conds := map[string]string{ |
| "CAF": "00", |
| "CUN": "40", |
| "CEQ": "20", |
| "CUEQ": "60", |
| "CLT": "10", |
| "CULT": "50", |
| "CLE": "30", |
| "CULE": "70", |
| "CNE": "80", |
| "COR": "a0", |
| "CUNE": "c0", |
| "SAF": "08", |
| "SUN": "48", |
| "SEQ": "28", |
| "SUEQ": "68", |
| "SLT": "18", |
| "SULT": "58", |
| "SLE": "38", |
| "SULE": "78", |
| "SNE": "88", |
| "SOR": "a8", |
| "SUNE": "c8", |
| } |
| fcmpConditions = make(map[string]map[string]string) |
| for k, v := range conds { |
| op := fmt.Sprintf("FCMP_%s_%s", k, ds) |
| opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds) |
| instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds) |
| var instFormat string |
| if ds == "D" { |
| instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) |
| } else { |
| instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) |
| } |
| |
| fcmpConditions[op] = make(map[string]string) |
| fcmpConditions[op]["op"] = op |
| fcmpConditions[op]["opstr"] = opstr |
| fcmpConditions[op]["instFormatComment"] = instFormatComment |
| fcmpConditions[op]["instFormat"] = instFormat |
| } |
| return |
| } |
| |
| func findWords(chars []pdf.Text) (words []pdf.Text) { |
| for i := 0; i < len(chars); { |
| xRange := []float64{chars[i].X, chars[i].X} |
| j := i + 1 |
| |
| // Find all chars on one line. |
| for j < len(chars) && chars[j].Y == chars[i].Y { |
| xRange[1] = chars[j].X |
| j++ |
| } |
| |
| // we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued |
| // chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should |
| // be contact to current word, because the next word's X should bigger than current one. |
| for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] { |
| j++ |
| } |
| |
| var end float64 |
| // Split line into words (really, phrases). |
| for k := i; k < j; { |
| ck := &chars[k] |
| s := ck.S |
| end = ck.X + ck.W |
| charSpace := ck.FontSize / 6 |
| wordSpace := ck.FontSize * 2 / 3 |
| l := k + 1 |
| for l < j { |
| // Grow word. |
| cl := &chars[l] |
| |
| if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { |
| s += cl.S |
| end = cl.X + cl.W |
| l++ |
| continue |
| } |
| // Add space to phrase before next word. |
| if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { |
| s += " " + cl.S |
| end = cl.X + cl.W |
| l++ |
| continue |
| } |
| break |
| } |
| f := ck.Font |
| words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s}) |
| k = l |
| } |
| i = j |
| } |
| |
| return words |
| } |
| |
| func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) { |
| opstrs = make(map[string]string) |
| instFormatComments = make(map[string]string) |
| instFormats = make(map[string]string) |
| |
| content := p.Content() |
| |
| var text []pdf.Text |
| for _, t := range content.Text { |
| text = append(text, t) |
| } |
| |
| // table name(70), table header(64), page num(3) |
| if isFP { |
| text = text[134 : len(text)-3] |
| } else { |
| text = text[64 : len(text)-3] |
| } |
| |
| text = findWords(text) |
| |
| for i := 0; i < len(text); { |
| var fcmpConditions map[string]map[string]string |
| if strings.HasPrefix(text[i].S, "FCMP") { |
| fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2]) |
| |
| for fc, inst := range fcmpConditions { |
| ops = append(ops, inst["op"]) |
| opstrs[fc] = inst["opstr"] |
| instFormatComments[fc] = inst["instFormatComment"] |
| instFormats[fc] = inst["instFormat"] |
| } |
| t := i + 1 |
| for ; text[t].Y == text[i].Y; t++ { |
| continue |
| } |
| i = t |
| continue |
| } |
| |
| op := strings.Replace(text[i].S, ".", "_", -1) |
| opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S) |
| instFormatComment := "" |
| binValue := "" |
| binMask := "" |
| instArgs := "" |
| offs := false |
| var offArgs []string |
| |
| j := i + 1 |
| for ; j < len(text) && text[j].Y == text[i].Y; j++ { |
| |
| // Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped. |
| if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false { |
| instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1)) |
| continue |
| } |
| if text[j].S == "0" || text[j].S == "1" { |
| binValue += text[j].S |
| binMask += "1" |
| } else { |
| argLen, argName := getArg(text[j].S) |
| |
| // Get argument's length failed, compute it by other arguments. |
| if argLen == 0 { |
| left := 31 - len(binValue) |
| right := 0 |
| l := j + 1 |
| if l < len(text) && text[l].Y == text[j].Y { |
| for ; text[l].Y == text[j].Y; l++ { |
| if text[l].S == "0" || text[l].S == "1" { |
| right += 1 |
| } else { |
| tArgLen, _ := getArg(text[l].S) |
| if tArgLen == 0 { |
| fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n") |
| } |
| right += tArgLen |
| } |
| } |
| } |
| argLen = left - right + 1 |
| argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10) |
| } |
| |
| for k := 0; k < argLen; k++ { |
| binValue += "0" |
| binMask += "0" |
| } |
| |
| if argName != "offs" { |
| if instArgs != "" { |
| instArgs = ", " + instArgs |
| } |
| instArgs = argName + instArgs |
| } else { |
| offs = true |
| offArgs = append(offArgs, text[j].S) |
| } |
| } |
| } |
| |
| // The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ |
| if offs && offArgs != nil { |
| var left int |
| var right int |
| if len(offArgs) == 1 { |
| left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1]) |
| right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) |
| } else if len(offArgs) == 2 { |
| left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1]) |
| right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) |
| } |
| |
| if instArgs == "" { |
| instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right) |
| } else { |
| instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right) |
| } |
| } |
| |
| ops = append(ops, op) |
| opstrs[op] = opstr |
| if instFormatComment == "" { |
| instFormatComment = "// " + text[i].S |
| } else if strings.HasPrefix(op, "AM") { |
| instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S) |
| } |
| instFormatComments[op] = instFormatComment |
| // The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.* |
| if instArgs != "" { |
| args := strings.Split(instFormatComment, " ")[2:] |
| tInstArgs := strings.Split(instArgs, ", ") |
| newOrderedInstArgs := []string{} |
| for _, a := range args { |
| a = strings.Split(a, ",")[0] |
| for _, aa := range tInstArgs { |
| if strings.Contains(aa, a) { |
| newOrderedInstArgs = append(newOrderedInstArgs, aa) |
| break |
| } else if a == "rd" && aa == "arg_fd" { |
| newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd") |
| break |
| } |
| } |
| } |
| instArgs = strings.Join(newOrderedInstArgs, ", ") |
| } |
| if strings.HasPrefix(op, "AM") { |
| instArgs = "arg_rd, arg_rk, arg_rj" |
| } |
| instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs) |
| instFormats[op] = instFormat |
| |
| i = j // next instruction |
| } |
| |
| return |
| } |