riscv64: implement riscv64spec for instruction table generation

Support generate all riscv extensions in
$GOROOT/src/src/cmd/internal/obj/riscv/inst.go,
also including "C" Standard Extension for Compressed Instructions,
used to support instruction decoding on riscv64 target.

riscv64spec relies on the riscv-opcodes project:
https://github.com/riscv/riscv-opcodes

Change-Id: Ib0589a87d1ba31fe431162d1f2d44a42bdb2ae06
Reviewed-on: https://go-review.googlesource.com/c/arch/+/602875
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go
new file mode 100644
index 0000000..53c0f1d
--- /dev/null
+++ b/riscv64/riscv64spec/spec.go
@@ -0,0 +1,476 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// riscv64spec reads the files contained in riscv-opcodes repo
+// to collect instruction encoding details.
+// repo url: https://github.com/riscv/riscv-opcodes
+// usage: go run spec.go <opcodes-repo-path>
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// RV64GC_zba_zbb_zbs Extensions Listing
+// Reference: $GOROOT/src/src/cmd/internal/obj/riscv/inst.go
+var extensions = []string{
+	"rv_a",
+	"rv_c",
+	"rv_c_d",
+	"rv_d",
+	"rv_f",
+	"rv_i",
+	"rv_m",
+	"rv_q",
+	"rv_zba",
+	"rv_zbb",
+	"rv_zbs",
+	"rv_zfh",
+	"rv_zicsr",
+	"rv_zifencei",
+	"rv64_a",
+	"rv64_c",
+	"rv64_d",
+	"rv64_f",
+	"rv64_i",
+	"rv64_m",
+	"rv64_q",
+	"rv64_zba",
+	"rv64_zbb",
+	"rv64_zbs",
+	"rv64_zfh",
+}
+
+const (
+	prologueSec    = "// Generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n"
+	opSec          = "const (\n\t_ Op = iota\n"
+	opstrSec       = "var opstr = [...]string{\n"
+	instFormatsSec = "var instFormats = [...]instFormat{\n"
+)
+
+var (
+	ops                []string
+	opstrs             = make(map[string]string)
+	instFormatComments = make(map[string]string)
+	instFormats        = make(map[string]string)
+)
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("riscv64spec: ")
+
+	var repoPath string
+	if len(os.Args) < 1 {
+		log.Fatal("usage: go run spec.go <opcodes-repo-path>")
+	}
+	repoPath = os.Args[1]
+
+	fileTables, err := os.Create("tables.go")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	buf := bufio.NewWriter(fileTables)
+	_, err = buf.Write([]byte(prologueSec))
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, ext := range extensions {
+		f, err := os.Open(filepath.Join(repoPath, ext))
+		if err != nil {
+			log.Fatal(err)
+		}
+		defer f.Close()
+
+		buf := bufio.NewScanner(f)
+		for buf.Scan() {
+			line := buf.Text()
+			if len(line) == 0 {
+				continue
+			}
+			words := strings.Fields(line)
+			if len(words) == 0 || words[0][0] == '#' {
+				continue
+			}
+
+			// skip $pseudo_op except rv_zbb/rv64_zbb
+			if words[0][0] == '$' {
+				if ext != "rv_zbb" && ext != "rv64_zbb" {
+					continue
+				}
+				words = words[2:]
+			}
+
+			genInst(words)
+		}
+	}
+
+	// c.unimp wasn't in riscv-opcodes, so add it there
+	c_unimp := "c.unimp 15..0=0"
+	genInst(strings.Fields(c_unimp))
+
+	sort.Strings(ops)
+
+	// 1. write op
+	if _, err := buf.Write([]byte(opSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n", op); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err := buf.Write([]byte(")\n\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	// 2. write opstr
+	if _, err := buf.Write([]byte(opstrSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n", opstrs[op]); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err := buf.Write([]byte("}\n\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	// 3. write instFormatComment and instFormat
+	if _, err := buf.Write([]byte(instFormatsSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n\t%s\n", instFormatComments[op], instFormats[op]); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err = buf.Write([]byte("}\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	if err := buf.Flush(); err != nil {
+		log.Fatal(err)
+	}
+
+	if err := fileTables.Close(); err != nil {
+		log.Fatal(err)
+	}
+}
+
+func genInst(words []string) {
+	op := strings.ToUpper(strings.Replace(words[0], ".", "_", -1))
+	opstr := fmt.Sprintf("%s:\t\"%s\",", op, strings.ToUpper(words[0]))
+
+	var value uint32
+	var mask uint32
+	var instArgs []string
+
+	for i := 1; i < len(words); i++ {
+		if strings.Contains(words[i], "=") {
+			val := strings.Split(words[i], "=")
+			sec := strings.Split(val[0], "..")
+			if len(sec) < 2 {
+				sec[0] = val[0]
+			}
+			subval, submsk := genValueAndMask(val, sec)
+			value |= subval
+			mask |= submsk
+		} else if len(words[i]) > 0 {
+			instArgs = append(instArgs, words[i])
+		}
+	}
+
+	instArgsStr := inferFormats(instArgs, op)
+	instFormatComment := "// " + strings.Replace(op, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
+	instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", mask, value, op, instArgsStr)
+
+	// Handle the suffix of atomic instruction.
+	if isAtomic(op) {
+		suffix := []string{"", ".RL", ".AQ", ".AQRL"}
+		// Re-generate the opcode string, opcode value and mask.
+		for i, suf := range suffix {
+			aop := op + strings.Replace(suf, ".", "_", -1)
+			aopstr := fmt.Sprintf("%s:\t\"%s\",", aop, strings.ToUpper(words[0])+suf)
+			avalue := value | (uint32(i) << 25)
+			amask := mask | 0x06000000
+			ainstFormatComment := "// " + strings.Replace(aop, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
+			ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", amask, avalue, aop, instArgsStr)
+			ops = append(ops, aop)
+			opstrs[aop] = aopstr
+			instFormats[aop] = ainstFormat
+			instFormatComments[aop] = ainstFormatComment
+		}
+	} else {
+		ops = append(ops, op)
+		opstrs[op] = opstr
+		instFormats[op] = instFormat
+		instFormatComments[op] = instFormatComment
+	}
+}
+
+// inferFormats identifies inst format:
+// R-Type (inst rd, rs1, rs2),
+// I-Type (inst rd, rs1, imm / inst rd, offset(rs1)),
+// UJ-Type (inst rd, imm),
+// U-Type (inst rd, imm),
+// SB-Type (inst rs1, rs2, offset)
+// S-Type (inst rs2, offset(rs1))
+func inferFormats(instArgs []string, op string) string {
+	switch {
+	case strings.Contains(op, "AMO") || strings.Contains(op, "SC_"):
+		return "arg_rd, arg_rs2, arg_rs1_amo"
+
+	case strings.Contains(op, "LR_"):
+		return "arg_rd, arg_rs1_amo"
+
+	case op == "LB" || op == "LBU" || op == "LD" ||
+		op == "LH" || op == "LHU" || op == "LW" || op == "LWU":
+		return "arg_rd, arg_rs1_mem"
+
+	case op == "FLD" || op == "FLW" || op == "FLH" || op == "FLQ":
+		return "arg_fd, arg_rs1_mem"
+
+	case op == "FSD" || op == "FSW" || op == "FSH" || op == "FSQ":
+		return "arg_fs2, arg_rs1_store"
+
+	case op == "SD" || op == "SB" || op == "SW" || op == "SH":
+		return "arg_rs2, arg_rs1_store"
+
+	case op == "CSRRW" || op == "CSRRS" || op == "CSRRC":
+		return "arg_rd, arg_csr, arg_rs1"
+
+	case op == "CSRRWI" || op == "CSRRSI" || op == "CSRRCI":
+		return "arg_rd, arg_csr, arg_zimm"
+
+	case op == "JALR":
+		return "arg_rd, arg_rs1_mem"
+
+	case op == "FENCE_I":
+		return ""
+
+	case op == "FENCE":
+		return "arg_pred, arg_succ"
+
+	default:
+		var instStr []string
+		for _, arg := range instArgs {
+			if decodeArgs(arg, op) != "" {
+				instStr = append(instStr, decodeArgs(arg, op))
+			}
+		}
+		return strings.Join(instStr, ", ")
+	}
+}
+
+// decodeArgs turns the args into formats defined in arg.go
+func decodeArgs(arg string, op string) string {
+	switch {
+	case strings.Contains("arg_rd", arg):
+		if isFloatReg(op, "rd") || strings.Contains(op, "C_FLDSP") {
+			return "arg_fd"
+		}
+		return "arg_rd"
+
+	case strings.Contains("arg_rs1", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs1"
+		}
+		return "arg_rs1"
+
+	case strings.Contains("arg_rs2", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs2"
+		}
+		return "arg_rs2"
+
+	case strings.Contains("arg_rs3", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs3"
+		}
+		return "arg_rs3"
+
+	case arg == "imm12":
+		return "arg_imm12"
+
+	case arg == "imm20":
+		return "arg_imm20"
+
+	case arg == "jimm20":
+		return "arg_jimm20"
+
+	case arg == "bimm12lo":
+		return "arg_bimm12"
+
+	case arg == "imm12lo":
+		return "arg_simm12"
+
+	case arg == "shamtw":
+		return "arg_shamt5"
+
+	case arg == "shamtd":
+		return "arg_shamt6"
+
+	case arg == "rd_p":
+		if strings.Contains(op, "C_FLD") {
+			return "arg_fd_p"
+		}
+		return "arg_rd_p"
+
+	case arg == "rs1_p":
+		return "arg_rs1_p"
+
+	case arg == "rd_rs1_p":
+		return "arg_rd_rs1_p"
+
+	case arg == "rs2_p":
+		if strings.Contains(op, "C_FSD") {
+			return "arg_fs2_p"
+		}
+		return "arg_rs2_p"
+
+	case arg == "rd_n0":
+		return "arg_rd_n0"
+
+	case arg == "rs1_n0":
+		return "arg_rs1_n0"
+
+	case arg == "rd_rs1_n0":
+		return "arg_rd_rs1_n0"
+
+	case arg == "c_rs1_n0":
+		return "arg_c_rs1_n0"
+
+	case arg == "c_rs2_n0":
+		return "arg_c_rs2_n0"
+
+	case arg == "c_rs2":
+		if strings.Contains(op, "C_FSDSP") {
+			return "arg_c_fs2"
+		}
+		return "arg_c_rs2"
+
+	case arg == "rd_n2":
+		return "arg_rd_n2"
+
+	case arg == "c_imm6lo":
+		return "arg_c_imm6"
+
+	case arg == "c_nzimm6lo":
+		return "arg_c_nzimm6"
+
+	case arg == "c_nzuimm6lo":
+		return "arg_c_nzuimm6"
+
+	case arg == "c_uimm7lo":
+		return "arg_c_uimm7"
+
+	case arg == "c_uimm8lo":
+		return "arg_c_uimm8"
+
+	case arg == "c_uimm8sp_s":
+		return "arg_c_uimm8sp_s"
+
+	case arg == "c_uimm8splo":
+		return "arg_c_uimm8sp"
+
+	case arg == "c_uimm9sp_s":
+		return "arg_c_uimm9sp_s"
+
+	case arg == "c_uimm9splo":
+		return "arg_c_uimm9sp"
+
+	case arg == "c_bimm9lo":
+		return "arg_c_bimm9"
+
+	case arg == "c_nzimm10lo":
+		return "arg_c_nzimm10"
+
+	case arg == "c_nzuimm10":
+		return "arg_c_nzuimm10"
+
+	case arg == "c_imm12":
+		return "arg_c_imm12"
+
+	case arg == "c_nzimm18lo":
+		return "arg_c_nzimm18"
+	}
+	return ""
+}
+
+// genValueAndMask generates instruction value and relative mask.
+func genValueAndMask(valStr []string, secStr []string) (uint32, uint32) {
+	var val int64
+
+	val, err := strconv.ParseInt(valStr[1], 0, 32)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	l, err := strconv.Atoi(secStr[0])
+	if err != nil {
+		log.Fatal(err)
+	}
+	var r int
+	if len(secStr) == 1 {
+		r = l
+	} else {
+		r, err = strconv.Atoi(secStr[1])
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	subval := uint32(val << r)
+	submsk := ^uint32(0) << (31 - l) >> (31 - l + r) << r
+	return subval, submsk
+}
+
+// isAtomic reports whether the instruction is atomic.
+func isAtomic(op string) bool {
+	return strings.HasPrefix(op, "AMO") || strings.HasPrefix(op, "LR_") || strings.HasPrefix(op, "SC_")
+}
+
+// isFloatReg reports whether the register of a floating point instruction is a floating point register.
+func isFloatReg(op string, reg string) bool {
+	switch {
+	case strings.Contains(op, "FADD") || strings.Contains(op, "FSUB") ||
+		strings.Contains(op, "FDIV") || strings.Contains(op, "FMUL") ||
+		strings.Contains(op, "FMIN") || strings.Contains(op, "FMAX") ||
+		strings.Contains(op, "FMADD") || strings.Contains(op, "FMSUB") ||
+		strings.Contains(op, "FCVT_D_S") || strings.Contains(op, "FCVT_S_D") ||
+		strings.Contains(op, "FCVT_D_Q") || strings.Contains(op, "FCVT_Q_D") ||
+		strings.Contains(op, "FCVT_S_Q") || strings.Contains(op, "FCVT_Q_S") ||
+		strings.Contains(op, "FCVT_H_S") || strings.Contains(op, "FCVT_S_H") ||
+		strings.Contains(op, "FNM") || strings.Contains(op, "FNEG") ||
+		strings.Contains(op, "FSQRT") || strings.Contains(op, "FSGNJ"):
+		return true
+
+	case strings.Contains(op, "FCLASS") || strings.Contains(op, "FCVT_L") ||
+		strings.Contains(op, "FCVT_W") || strings.Contains(op, "FEQ") ||
+		strings.Contains(op, "FLE") || strings.Contains(op, "FLT") ||
+		strings.Contains(op, "FMV_X_H") || strings.Contains(op, "FMV_X_D") ||
+		strings.Contains(op, "FMV_X_W"):
+		return reg != "rd"
+
+	case strings.Contains(op, "FCVT_D") || strings.Contains(op, "FCVT_S") ||
+		strings.Contains(op, "FCVT_H") || strings.Contains(op, "FCVT_Q") ||
+		strings.Contains(op, "FMV_H_X") || strings.Contains(op, "FMV_D_X") ||
+		strings.Contains(op, "FMV_W_X"):
+		return reg != "rs"
+
+	default:
+		return false
+	}
+}