loong64/loong64spec/spec.go - arch - Git at Google

 // Copyright 2024 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction
 // encoding details and output to tables.go.
 //
 // usage: go run spec.go LoongArch-Vol1-EN.pdf
 //
 // [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf

 package main

 import (
 	"bytes"
 	"fmt"
 	"log"
 	"math"
 	"os"
 	"regexp"
 	"sort"
 	"strconv"
 	"strings"

 	"rsc.io/pdf"
 )

 func mergeMap(m1 map[string]string, m2 map[string]string) {
 	for k := range m2 {
 		m1[k] = m2[k]
 	}
 }

 func main() {
 	log.SetFlags(0)
 	log.SetPrefix("loong64spec: ")

 	if len(os.Args) != 2 {
 		fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n")
 		os.Exit(2)
 	}
 	f, err := pdf.Open(os.Args[1])
 	if err != nil {
 		log.Fatal(err)
 	}
 	var prologue bytes.Buffer
 	prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))

 	var op_f bytes.Buffer
 	op_f.Write([]byte("const (\n\t_ Op = iota\n"))

 	var opstr_f bytes.Buffer
 	opstr_f.Write([]byte("var opstr = [...]string{\n"))

 	var instFormats_f bytes.Buffer
 	instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))

 	// Scan document looking for instructions.
 	n := f.NumPage()
 	var ops []string
 	opstrs := map[string]string{}
 	instFormatComments := map[string]string{}
 	instFormats := map[string]string{}
 	var fp int
 	for pageNum := 1; pageNum <= n; pageNum++ {
 		p := f.Page(pageNum)
 		if fp == 0 {
 			if !isFirstPage(p) {
 				continue
 			}
 			fp = pageNum
 		}
 		cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
 		ops = append(ops, cPageOps...)
 		mergeMap(opstrs, cPageOpstrs)
 		mergeMap(instFormatComments, cPageInstFormatComments)
 		mergeMap(instFormats, cPageInstFormats)
 	}

 	sort.Strings(ops)

 	for _, op := range ops {
 		// 1. write op
 		op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
 		// 2. write opstr
 		opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
 		// 3. write instFormat
 		instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
 	}

 	op_f.Write([]byte(")\n\n"))
 	opstr_f.Write([]byte("}\n\n"))
 	instFormats_f.Write([]byte("}\n"))

 	fileTables, err := os.Create("tables.go")
 	defer fileTables.Close()

 	fileTables.Write(prologue.Bytes())
 	fileTables.Write(op_f.Bytes())
 	fileTables.Write(opstr_f.Bytes())
 	fileTables.Write(instFormats_f.Bytes())

 	fileTables.Close()
 }

 func isFirstPage(page pdf.Page) bool {
 	content := page.Content()
 	appendixb := "AppendixB"
 	ct := ""
 	for _, t := range content.Text {
 		ct += t.S
 		if ct == "AppendixB" {
 			return true
 		}
 		if strings.HasPrefix(appendixb, ct) {
 			continue
 		} else {
 			return false
 		}
 	}
 	return false
 }

 func getArg(name string) (length int, argName string) {
 	switch {
 	case strings.Contains("arg_fd", name):
 		return 5, "arg_fd"
 	case strings.Contains("arg_fj", name):
 		return 5, "arg_fj"
 	case strings.Contains("arg_fk", name):
 		return 5, "arg_fk"
 	case strings.Contains("arg_fa", name):
 		return 5, "arg_fa"
 	case strings.Contains("arg_rd", name):
 		return 5, "arg_rd"
 	case strings.Contains("arg_rj", name) || name == "rj!=0,1":
 		return 5, "arg_rj"
 	case strings.Contains("arg_rk", name):
 		return 5, "arg_rk"
 	case name == "csr":
 		return 14, "arg_csr_23_10"
 	case strings.Contains("arg_cd", name):
 		return 5, "arg_cd"
 	case strings.Contains("arg_cj", name):
 		return 5, "arg_cj"
 	case strings.Contains("arg_ca", name):
 		return 5, "arg_ca"
 	case strings.Contains(name, "sa"):
 		length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
 		if length == 2 {
 			argName = "arg_sa2_16_15"
 		} else {
 			argName = "arg_sa3_17_15"
 		}
 		return length, argName
 	case strings.Contains("arg_seq_17_10", name):
 		return 8, "arg_seq_17_10"
 	case strings.Contains("arg_op_4_0", name):
 		return 5, "arg_op_4_0"
 	case strings.Contains(name, "ui"):
 		length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
 		if length == 5 {
 			argName = "arg_ui5_14_10"
 		} else if length == 6 {
 			argName = "arg_ui6_15_10"
 		} else {
 			argName = "arg_ui12_21_10"
 		}
 		return length, argName
 	case strings.Contains("arg_lsbw", name):
 		return 5, "arg_lsbw"
 	case strings.Contains("arg_msbw", name):
 		return 5, "arg_msbw"
 	case strings.Contains("arg_lsbd", name):
 		return 6, "arg_lsbd"
 	case strings.Contains("arg_msbd", name):
 		return 6, "arg_msbd"
 	case strings.Contains(name, "si"):
 		length, _ := strconv.Atoi(strings.Split(name, "si")[1])
 		if length == 12 {
 			argName = "arg_si12_21_10"
 		} else if length == 14 {
 			argName = "arg_si14_23_10"
 		} else if length == 16 {
 			argName = "arg_si16_25_10"
 		} else {
 			argName = "arg_si20_24_5"
 		}
 		return length, argName
 	case strings.Contains(name, "offs"):
 		splitName := strings.Split(name, ":")
 		left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
 		right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
 		return left - right + 1, "offs"
 	default:
 		return 0, ""
 	}
 }

 func binstrToHex(str string) string {
 	rst := 0
 	hex := "0x"
 	charArray := []byte(str)
 	for i := 0; i < 32; {
 		rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
 		switch rst {
 		case 10:
 			hex = hex + "a"
 		case 11:
 			hex = hex + "b"
 		case 12:
 			hex = hex + "c"
 		case 13:
 			hex = hex + "d"
 		case 14:
 			hex = hex + "e"
 		case 15:
 			hex = hex + "f"
 		default:
 			hex += strconv.Itoa(rst)
 		}

 		i = i + 4
 	}
 	return hex
 }

 /*
 Here we deal with the instruction FCMP.cond.S/D, which has the following format:

 	| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
 	|---------|---------|---------|-------|---|---|-------|
 	|   op    |  cond   |    fk   |   fj  | 0 | 0 |  cd   |

 The `cond` field has these possible values:

 	"CAF": "00",
 	"CUN": "08",
 	"CEQ": "04",
 	"CUEQ": "0c",
 	"CLT": "02",
 	"CULT": "0a",
 	"CLE": "06",
 	"CULE": "0e",
 	"CNE": "10",
 	"COR": "14",
 	"CUNE": "18",
 	"SAF": "01",
 	"SUN": "09",
 	"SEQ": "05",
 	"SUEQ": "0d",
 	"SLT": "03",
 	"SULT": "0b",
 	"SLE": "07",
 	"SULE": "0f",
 	"SNE": "11",
 	"SOR": "15",
 	"SUNE": "19",

 These values are the hexadecimal numbers of bits 19 to 15, the same as
 described in the instruction set manual.

 The following code defines a map, the values in it represent the hexadecimal
 encoding of the cond field in the entire instruction. In this case, the upper
 4 bits and the lowest 1 bit are encoded separately, so the encoding is
 different from the encoding described above.
 */
 func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
 	conds := map[string]string{
 		"CAF":  "00",
 		"CUN":  "40",
 		"CEQ":  "20",
 		"CUEQ": "60",
 		"CLT":  "10",
 		"CULT": "50",
 		"CLE":  "30",
 		"CULE": "70",
 		"CNE":  "80",
 		"COR":  "a0",
 		"CUNE": "c0",
 		"SAF":  "08",
 		"SUN":  "48",
 		"SEQ":  "28",
 		"SUEQ": "68",
 		"SLT":  "18",
 		"SULT": "58",
 		"SLE":  "38",
 		"SULE": "78",
 		"SNE":  "88",
 		"SOR":  "a8",
 		"SUNE": "c8",
 	}
 	fcmpConditions = make(map[string]map[string]string)
 	for k, v := range conds {
 		op := fmt.Sprintf("FCMP_%s_%s", k, ds)
 		opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
 		instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
 		var instFormat string
 		if ds == "D" {
 			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
 		} else {
 			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
 		}

 		fcmpConditions[op] = make(map[string]string)
 		fcmpConditions[op]["op"] = op
 		fcmpConditions[op]["opstr"] = opstr
 		fcmpConditions[op]["instFormatComment"] = instFormatComment
 		fcmpConditions[op]["instFormat"] = instFormat
 	}
 	return
 }

 func findWords(chars []pdf.Text) (words []pdf.Text) {
 	for i := 0; i < len(chars); {
 		xRange := []float64{chars[i].X, chars[i].X}
 		j := i + 1

 		// Find all chars on one line.
 		for j < len(chars) && chars[j].Y == chars[i].Y {
 			xRange[1] = chars[j].X
 			j++
 		}

 		// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
 		// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
 		// be contact to current word, because the next word's X should bigger than current one.
 		for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
 			j++
 		}

 		var end float64
 		// Split line into words (really, phrases).
 		for k := i; k < j; {
 			ck := &chars[k]
 			s := ck.S
 			end = ck.X + ck.W
 			charSpace := ck.FontSize / 6
 			wordSpace := ck.FontSize * 2 / 3
 			l := k + 1
 			for l < j {
 				// Grow word.
 				cl := &chars[l]

 				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
 					s += cl.S
 					end = cl.X + cl.W
 					l++
 					continue
 				}
 				// Add space to phrase before next word.
 				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
 					s += " " + cl.S
 					end = cl.X + cl.W
 					l++
 					continue
 				}
 				break
 			}
 			f := ck.Font
 			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
 			k = l
 		}
 		i = j
 	}

 	return words
 }

 func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
 	opstrs = make(map[string]string)
 	instFormatComments = make(map[string]string)
 	instFormats = make(map[string]string)

 	content := p.Content()

 	var text []pdf.Text
 	for _, t := range content.Text {
 		text = append(text, t)
 	}

 	// table name(70), table header(64), page num(3)
 	if isFP {
 		text = text[134 : len(text)-3]
 	} else {
 		text = text[64 : len(text)-3]
 	}

 	text = findWords(text)

 	for i := 0; i < len(text); {
 		var fcmpConditions map[string]map[string]string
 		if strings.HasPrefix(text[i].S, "FCMP") {
 			fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])

 			for fc, inst := range fcmpConditions {
 				ops = append(ops, inst["op"])
 				opstrs[fc] = inst["opstr"]
 				instFormatComments[fc] = inst["instFormatComment"]
 				instFormats[fc] = inst["instFormat"]
 			}
 			t := i + 1
 			for ; text[t].Y == text[i].Y; t++ {
 				continue
 			}
 			i = t
 			continue
 		}

 		op := strings.Replace(text[i].S, ".", "_", -1)
 		opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
 		instFormatComment := ""
 		binValue := ""
 		binMask := ""
 		instArgs := ""
 		offs := false
 		var offArgs []string

 		j := i + 1
 		for ; j < len(text) && text[j].Y == text[i].Y; j++ {

 			// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
 			if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
 				instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
 				continue
 			}
 			if text[j].S == "0" || text[j].S == "1" {
 				binValue += text[j].S
 				binMask += "1"
 			} else {
 				argLen, argName := getArg(text[j].S)

 				// Get argument's length failed, compute it by other arguments.
 				if argLen == 0 {
 					left := 31 - len(binValue)
 					right := 0
 					l := j + 1
 					if l < len(text) && text[l].Y == text[j].Y {
 						for ; text[l].Y == text[j].Y; l++ {
 							if text[l].S == "0" || text[l].S == "1" {
 								right += 1
 							} else {
 								tArgLen, _ := getArg(text[l].S)
 								if tArgLen == 0 {
 									fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
 								}
 								right += tArgLen
 							}
 						}
 					}
 					argLen = left - right + 1
 					argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
 				}

 				for k := 0; k < argLen; k++ {
 					binValue += "0"
 					binMask += "0"
 				}

 				if argName != "offs" {
 					if instArgs != "" {
 						instArgs = ", " + instArgs
 					}
 					instArgs = argName + instArgs
 				} else {
 					offs = true
 					offArgs = append(offArgs, text[j].S)
 				}
 			}
 		}

 		// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
 		if offs && offArgs != nil {
 			var left int
 			var right int
 			if len(offArgs) == 1 {
 				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
 				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
 			} else if len(offArgs) == 2 {
 				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
 				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
 			}

 			if instArgs == "" {
 				instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
 			} else {
 				instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
 			}
 		}

 		ops = append(ops, op)
 		opstrs[op] = opstr
 		if instFormatComment == "" {
 			instFormatComment = "// " + text[i].S
 		} else if strings.HasPrefix(op, "AM") {
 			instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
 		}
 		instFormatComments[op] = instFormatComment
 		// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
 		if instArgs != "" {
 			args := strings.Split(instFormatComment, " ")[2:]
 			tInstArgs := strings.Split(instArgs, ", ")
 			newOrderedInstArgs := []string{}
 			for _, a := range args {
 				a = strings.Split(a, ",")[0]
 				for _, aa := range tInstArgs {
 					if strings.Contains(aa, a) {
 						newOrderedInstArgs = append(newOrderedInstArgs, aa)
 						break
 					} else if a == "rd" && aa == "arg_fd" {
 						newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
 						break
 					}
 				}
 			}
 			instArgs = strings.Join(newOrderedInstArgs, ", ")
 		}
 		if strings.HasPrefix(op, "AM") {
 			instArgs = "arg_rd, arg_rk, arg_rj"
 		}
 		instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
 		instFormats[op] = instFormat

 		i = j // next instruction
 	}

 	return
 }
	// Copyright 2024 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction
	// encoding details and output to tables.go.
	//
	// usage: go run spec.go LoongArch-Vol1-EN.pdf
	//
	// [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf

	package main

	import (
	"bytes"
	"fmt"
	"log"
	"math"
	"os"
	"regexp"
	"sort"
	"strconv"
	"strings"

	"rsc.io/pdf"
	)

	func mergeMap(m1 map[string]string, m2 map[string]string) {
	for k := range m2 {
	m1[k] = m2[k]
	}
	}

	func main() {
	log.SetFlags(0)
	log.SetPrefix("loong64spec: ")

	if len(os.Args) != 2 {
	fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n")
	os.Exit(2)
	}
	f, err := pdf.Open(os.Args[1])
	if err != nil {
	log.Fatal(err)
	}
	var prologue bytes.Buffer
	prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))

	var op_f bytes.Buffer
	op_f.Write([]byte("const (\n\t_ Op = iota\n"))

	var opstr_f bytes.Buffer
	opstr_f.Write([]byte("var opstr = [...]string{\n"))

	var instFormats_f bytes.Buffer
	instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))

	// Scan document looking for instructions.
	n := f.NumPage()
	var ops []string
	opstrs := map[string]string{}
	instFormatComments := map[string]string{}
	instFormats := map[string]string{}
	var fp int
	for pageNum := 1; pageNum <= n; pageNum++ {
	p := f.Page(pageNum)
	if fp == 0 {
	if !isFirstPage(p) {
	continue
	}
	fp = pageNum
	}
	cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
	ops = append(ops, cPageOps...)
	mergeMap(opstrs, cPageOpstrs)
	mergeMap(instFormatComments, cPageInstFormatComments)
	mergeMap(instFormats, cPageInstFormats)
	}

	sort.Strings(ops)

	for _, op := range ops {
	// 1. write op
	op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
	// 2. write opstr
	opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
	// 3. write instFormat
	instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
	}

	op_f.Write([]byte(")\n\n"))
	opstr_f.Write([]byte("}\n\n"))
	instFormats_f.Write([]byte("}\n"))

	fileTables, err := os.Create("tables.go")
	defer fileTables.Close()

	fileTables.Write(prologue.Bytes())
	fileTables.Write(op_f.Bytes())
	fileTables.Write(opstr_f.Bytes())
	fileTables.Write(instFormats_f.Bytes())

	fileTables.Close()
	}

	func isFirstPage(page pdf.Page) bool {
	content := page.Content()
	appendixb := "AppendixB"
	ct := ""
	for _, t := range content.Text {
	ct += t.S
	if ct == "AppendixB" {
	return true
	}
	if strings.HasPrefix(appendixb, ct) {
	continue
	} else {
	return false
	}
	}
	return false
	}

	func getArg(name string) (length int, argName string) {
	switch {
	case strings.Contains("arg_fd", name):
	return 5, "arg_fd"
	case strings.Contains("arg_fj", name):
	return 5, "arg_fj"
	case strings.Contains("arg_fk", name):
	return 5, "arg_fk"
	case strings.Contains("arg_fa", name):
	return 5, "arg_fa"
	case strings.Contains("arg_rd", name):
	return 5, "arg_rd"
	case strings.Contains("arg_rj", name) \|\| name == "rj!=0,1":
	return 5, "arg_rj"
	case strings.Contains("arg_rk", name):
	return 5, "arg_rk"
	case name == "csr":
	return 14, "arg_csr_23_10"
	case strings.Contains("arg_cd", name):
	return 5, "arg_cd"
	case strings.Contains("arg_cj", name):
	return 5, "arg_cj"
	case strings.Contains("arg_ca", name):
	return 5, "arg_ca"
	case strings.Contains(name, "sa"):
	length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
	if length == 2 {
	argName = "arg_sa2_16_15"
	} else {
	argName = "arg_sa3_17_15"
	}
	return length, argName
	case strings.Contains("arg_seq_17_10", name):
	return 8, "arg_seq_17_10"
	case strings.Contains("arg_op_4_0", name):
	return 5, "arg_op_4_0"
	case strings.Contains(name, "ui"):
	length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
	if length == 5 {
	argName = "arg_ui5_14_10"
	} else if length == 6 {
	argName = "arg_ui6_15_10"
	} else {
	argName = "arg_ui12_21_10"
	}
	return length, argName
	case strings.Contains("arg_lsbw", name):
	return 5, "arg_lsbw"
	case strings.Contains("arg_msbw", name):
	return 5, "arg_msbw"
	case strings.Contains("arg_lsbd", name):
	return 6, "arg_lsbd"
	case strings.Contains("arg_msbd", name):
	return 6, "arg_msbd"
	case strings.Contains(name, "si"):
	length, _ := strconv.Atoi(strings.Split(name, "si")[1])
	if length == 12 {
	argName = "arg_si12_21_10"
	} else if length == 14 {
	argName = "arg_si14_23_10"
	} else if length == 16 {
	argName = "arg_si16_25_10"
	} else {
	argName = "arg_si20_24_5"
	}
	return length, argName
	case strings.Contains(name, "offs"):
	splitName := strings.Split(name, ":")
	left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
	right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
	return left - right + 1, "offs"
	default:
	return 0, ""
	}
	}

	func binstrToHex(str string) string {
	rst := 0
	hex := "0x"
	charArray := []byte(str)
	for i := 0; i < 32; {
	rst = 1(int(charArray[i+3])-48) + 2(int(charArray[i+2])-48) + 4(int(charArray[i+1])-48) + 8(int(charArray[i])-48)
	switch rst {
	case 10:
	hex = hex + "a"
	case 11:
	hex = hex + "b"
	case 12:
	hex = hex + "c"
	case 13:
	hex = hex + "d"
	case 14:
	hex = hex + "e"
	case 15:
	hex = hex + "f"
	default:
	hex += strconv.Itoa(rst)
	}

	i = i + 4
	}
	return hex
	}

	/*
	Here we deal with the instruction FCMP.cond.S/D, which has the following format:

	\| 31 - 20 \| 19 - 15 \| 14 - 10 \| 9 - 5 \| 4 \| 3 \| 2 - 0 \|
	\|---------\|---------\|---------\|-------\|---\|---\|-------\|
	\| op \| cond \| fk \| fj \| 0 \| 0 \| cd \|

	The `cond` field has these possible values:

	"CAF": "00",
	"CUN": "08",
	"CEQ": "04",
	"CUEQ": "0c",
	"CLT": "02",
	"CULT": "0a",
	"CLE": "06",
	"CULE": "0e",
	"CNE": "10",
	"COR": "14",
	"CUNE": "18",
	"SAF": "01",
	"SUN": "09",
	"SEQ": "05",
	"SUEQ": "0d",
	"SLT": "03",
	"SULT": "0b",
	"SLE": "07",
	"SULE": "0f",
	"SNE": "11",
	"SOR": "15",
	"SUNE": "19",

	These values are the hexadecimal numbers of bits 19 to 15, the same as
	described in the instruction set manual.

	The following code defines a map, the values in it represent the hexadecimal
	encoding of the cond field in the entire instruction. In this case, the upper
	4 bits and the lowest 1 bit are encoded separately, so the encoding is
	different from the encoding described above.
	*/
	func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
	conds := map[string]string{
	"CAF": "00",
	"CUN": "40",
	"CEQ": "20",
	"CUEQ": "60",
	"CLT": "10",
	"CULT": "50",
	"CLE": "30",
	"CULE": "70",
	"CNE": "80",
	"COR": "a0",
	"CUNE": "c0",
	"SAF": "08",
	"SUN": "48",
	"SEQ": "28",
	"SUEQ": "68",
	"SLT": "18",
	"SULT": "58",
	"SLE": "38",
	"SULE": "78",
	"SNE": "88",
	"SOR": "a8",
	"SUNE": "c8",
	}
	fcmpConditions = make(map[string]map[string]string)
	for k, v := range conds {
	op := fmt.Sprintf("FCMP_%s_%s", k, ds)
	opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
	instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
	var instFormat string
	if ds == "D" {
	instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
	} else {
	instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
	}

	fcmpConditions[op] = make(map[string]string)
	fcmpConditions[op]["op"] = op
	fcmpConditions[op]["opstr"] = opstr
	fcmpConditions[op]["instFormatComment"] = instFormatComment
	fcmpConditions[op]["instFormat"] = instFormat
	}
	return
	}

	func findWords(chars []pdf.Text) (words []pdf.Text) {
	for i := 0; i < len(chars); {
	xRange := []float64{chars[i].X, chars[i].X}
	j := i + 1

	// Find all chars on one line.
	for j < len(chars) && chars[j].Y == chars[i].Y {
	xRange[1] = chars[j].X
	j++
	}

	// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
	// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
	// be contact to current word, because the next word's X should bigger than current one.
	for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
	j++
	}

	var end float64
	// Split line into words (really, phrases).
	for k := i; k < j; {
	ck := &chars[k]
	s := ck.S
	end = ck.X + ck.W
	charSpace := ck.FontSize / 6
	wordSpace := ck.FontSize * 2 / 3
	l := k + 1
	for l < j {
	// Grow word.
	cl := &chars[l]

	if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
	s += cl.S
	end = cl.X + cl.W
	l++
	continue
	}
	// Add space to phrase before next word.
	if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
	s += " " + cl.S
	end = cl.X + cl.W
	l++
	continue
	}
	break
	}
	f := ck.Font
	words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
	k = l
	}
	i = j
	}

	return words
	}

	func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
	opstrs = make(map[string]string)
	instFormatComments = make(map[string]string)
	instFormats = make(map[string]string)

	content := p.Content()

	var text []pdf.Text
	for _, t := range content.Text {
	text = append(text, t)
	}

	// table name(70), table header(64), page num(3)
	if isFP {
	text = text[134 : len(text)-3]
	} else {
	text = text[64 : len(text)-3]
	}

	text = findWords(text)

	for i := 0; i < len(text); {
	var fcmpConditions map[string]map[string]string
	if strings.HasPrefix(text[i].S, "FCMP") {
	fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])

	for fc, inst := range fcmpConditions {
	ops = append(ops, inst["op"])
	opstrs[fc] = inst["opstr"]
	instFormatComments[fc] = inst["instFormatComment"]
	instFormats[fc] = inst["instFormat"]
	}
	t := i + 1
	for ; text[t].Y == text[i].Y; t++ {
	continue
	}
	i = t
	continue
	}

	op := strings.Replace(text[i].S, ".", "_", -1)
	opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
	instFormatComment := ""
	binValue := ""
	binMask := ""
	instArgs := ""
	offs := false
	var offArgs []string

	j := i + 1
	for ; j < len(text) && text[j].Y == text[i].Y; j++ {

	// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
	if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
	instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
	continue
	}
	if text[j].S == "0" \|\| text[j].S == "1" {
	binValue += text[j].S
	binMask += "1"
	} else {
	argLen, argName := getArg(text[j].S)

	// Get argument's length failed, compute it by other arguments.
	if argLen == 0 {
	left := 31 - len(binValue)
	right := 0
	l := j + 1
	if l < len(text) && text[l].Y == text[j].Y {
	for ; text[l].Y == text[j].Y; l++ {
	if text[l].S == "0" \|\| text[l].S == "1" {
	right += 1
	} else {
	tArgLen, _ := getArg(text[l].S)
	if tArgLen == 0 {
	fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
	}
	right += tArgLen
	}
	}
	}
	argLen = left - right + 1
	argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
	}

	for k := 0; k < argLen; k++ {
	binValue += "0"
	binMask += "0"
	}

	if argName != "offs" {
	if instArgs != "" {
	instArgs = ", " + instArgs
	}
	instArgs = argName + instArgs
	} else {
	offs = true
	offArgs = append(offArgs, text[j].S)
	}
	}
	}

	// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
	if offs && offArgs != nil {
	var left int
	var right int
	if len(offArgs) == 1 {
	left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
	right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
	} else if len(offArgs) == 2 {
	left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
	right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
	}

	if instArgs == "" {
	instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
	} else {
	instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
	}
	}

	ops = append(ops, op)
	opstrs[op] = opstr
	if instFormatComment == "" {
	instFormatComment = "// " + text[i].S
	} else if strings.HasPrefix(op, "AM") {
	instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
	}
	instFormatComments[op] = instFormatComment
	// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
	if instArgs != "" {
	args := strings.Split(instFormatComment, " ")[2:]
	tInstArgs := strings.Split(instArgs, ", ")
	newOrderedInstArgs := []string{}
	for _, a := range args {
	a = strings.Split(a, ",")[0]
	for _, aa := range tInstArgs {
	if strings.Contains(aa, a) {
	newOrderedInstArgs = append(newOrderedInstArgs, aa)
	break
	} else if a == "rd" && aa == "arg_fd" {
	newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
	break
	}
	}
	}
	instArgs = strings.Join(newOrderedInstArgs, ", ")
	}
	if strings.HasPrefix(op, "AM") {
	instArgs = "arg_rd, arg_rk, arg_rj"
	}
	instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
	instFormats[op] = instFormat

	i = j // next instruction
	}

	return
	}