unix: add riscv_hwprobe for riscv64

The riscv_hwprobe system call was introduced in Linux 6.4 and allows
the caller to determine a number of interesting pieces of information
about the underlying RISC-V CPUs, e.g., which extensions they support
and whether they allow fast unaligned memory accesses.  For more information
please see:

https://docs.kernel.org/riscv/hwprobe.html

We also update linux/mksysnum.go to ensure that the generated syscall constants
written to the zsysnum_linux_*.go files are always sorted by their syscall numbers
in ascending order.

Updates golang/go#61416

Change-Id: Iedb0a86adb65faac9061b9a5969ffa09eb5b303a
Reviewed-on: https://go-review.googlesource.com/c/sys/+/510795
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
diff --git a/unix/linux/mksysnum.go b/unix/linux/mksysnum.go
index ed41ce9..e1712cb 100644
--- a/unix/linux/mksysnum.go
+++ b/unix/linux/mksysnum.go
@@ -13,6 +13,7 @@
 	"os"
 	"os/exec"
 	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 )
@@ -36,15 +37,15 @@
 	return fmt.Sprintf("%s,%s", goarch, goos)
 }
 
-func format(name string, num int, offset int) string {
+func format(name string, num int, offset int) (int, string) {
 	if num > 999 {
 		// ignore deprecated syscalls that are no longer implemented
 		// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/asm-generic/unistd.h?id=refs/heads/master#n716
-		return ""
+		return 0, ""
 	}
 	name = strings.ToUpper(name)
 	num = num + offset
-	return fmt.Sprintf("	SYS_%s = %d;\n", name, num)
+	return num, fmt.Sprintf("	SYS_%s = %d;\n", name, num)
 }
 
 func checkErr(err error) {
@@ -69,6 +70,36 @@
 	return false
 }
 
+// syscallNum holds the syscall number and the string
+// we will write to the generated file.
+type syscallNum struct {
+	num         int
+	declaration string
+}
+
+// syscallNums is a slice of syscallNum sorted by the syscall number in ascending order.
+type syscallNums []syscallNum
+
+// addSyscallNum adds the syscall declaration to syscallNums.
+func (nums *syscallNums) addSyscallNum(num int, declaration string) {
+	if declaration == "" {
+		return
+	}
+	if len(*nums) == 0 || (*nums)[len(*nums)-1].num <= num {
+		// This is the most common case as the syscall declarations output by the preprocessor
+		// are almost always sorted.
+		*nums = append(*nums, syscallNum{num, declaration})
+		return
+	}
+	i := sort.Search(len(*nums), func(i int) bool { return (*nums)[i].num >= num })
+
+	// Maintain the ordering in the preprocessor output when we have multiple definitions with
+	// the same value. i cannot be > len(nums) - 1 as nums[len(nums)-1].num > num.
+	for ; (*nums)[i].num == num; i++ {
+	}
+	*nums = append((*nums)[:i], append([]syscallNum{{num, declaration}}, (*nums)[i:]...)...)
+}
+
 func main() {
 	// Get the OS and architecture (using GOARCH_TARGET if it exists)
 	goos = os.Getenv("GOOS")
@@ -100,11 +131,23 @@
 		fmt.Fprintf(os.Stderr, "can't run %s", cc)
 		os.Exit(1)
 	}
-	text := ""
 	s := bufio.NewScanner(strings.NewReader(string(cmd)))
-	var offset, prev int
+	var offset, prev, asOffset int
+	var nums syscallNums
 	for s.Scan() {
 		t := re{str: s.Text()}
+
+		// The generated zsysnum_linux_*.go files for some platforms (arm64, loong64, riscv64)
+		// treat SYS_ARCH_SPECIFIC_SYSCALL as if it's a syscall which it isn't.  It's an offset.
+		// However, as this constant is already part of the public API we leave it in place.
+		// Lines of type SYS_ARCH_SPECIFIC_SYSCALL = 244 are thus processed twice, once to extract
+		// the offset and once to add the constant.
+
+		if t.Match(`^#define __NR_arch_specific_syscall\s+([0-9]+)`) {
+			// riscv: extract arch specific offset
+			asOffset, _ = strconv.Atoi(t.sub[1]) // Make asOffset=0 if empty or non-numeric
+		}
+
 		if t.Match(`^#define __NR_Linux\s+([0-9]+)`) {
 			// mips/mips64: extract offset
 			offset, _ = strconv.Atoi(t.sub[1]) // Make offset=0 if empty or non-numeric
@@ -118,24 +161,32 @@
 		} else if t.Match(`^#define __NR_(\w+)\s+([0-9]+)`) {
 			prev, err = strconv.Atoi(t.sub[2])
 			checkErr(err)
-			text += format(t.sub[1], prev, offset)
+			nums.addSyscallNum(format(t.sub[1], prev, offset))
 		} else if t.Match(`^#define __NR3264_(\w+)\s+([0-9]+)`) {
 			prev, err = strconv.Atoi(t.sub[2])
 			checkErr(err)
-			text += format(t.sub[1], prev, offset)
+			nums.addSyscallNum(format(t.sub[1], prev, offset))
 		} else if t.Match(`^#define __NR_(\w+)\s+\(\w+\+\s*([0-9]+)\)`) {
 			r2, err := strconv.Atoi(t.sub[2])
 			checkErr(err)
-			text += format(t.sub[1], prev+r2, offset)
+			nums.addSyscallNum(format(t.sub[1], prev+r2, offset))
 		} else if t.Match(`^#define __NR_(\w+)\s+\(__NR_(?:SYSCALL_BASE|Linux) \+ ([0-9]+)`) {
 			r2, err := strconv.Atoi(t.sub[2])
 			checkErr(err)
-			text += format(t.sub[1], r2, offset)
+			nums.addSyscallNum(format(t.sub[1], r2, offset))
+		} else if asOffset != 0 && t.Match(`^#define __NR_(\w+)\s+\(__NR_arch_specific_syscall \+ ([0-9]+)`) {
+			r2, err := strconv.Atoi(t.sub[2])
+			checkErr(err)
+			nums.addSyscallNum(format(t.sub[1], r2, asOffset))
 		}
 	}
 	err = s.Err()
 	checkErr(err)
-	fmt.Printf(template, cmdLine(), goBuildTags(), plusBuildTags(), text)
+	var text strings.Builder
+	for _, num := range nums {
+		text.WriteString(num.declaration)
+	}
+	fmt.Printf(template, cmdLine(), goBuildTags(), plusBuildTags(), text.String())
 }
 
 const template = `// %s
diff --git a/unix/linux/types.go b/unix/linux/types.go
index f2cbd2d..bf2df60 100644
--- a/unix/linux/types.go
+++ b/unix/linux/types.go
@@ -426,6 +426,32 @@
 	__u32 brp_max;
 	__u32 brp_inc;
 };
+
+#if defined(__riscv)
+#include <asm/hwprobe.h>
+#else
+
+// copied from /usr/include/asm/hwprobe.h
+// values are not used but they need to be defined.
+
+#define RISCV_HWPROBE_KEY_MVENDORID	0
+#define RISCV_HWPROBE_KEY_MARCHID	1
+#define RISCV_HWPROBE_KEY_MIMPID	2
+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR	3
+#define		RISCV_HWPROBE_BASE_BEHAVIOR_IMA	(1 << 0)
+#define RISCV_HWPROBE_KEY_IMA_EXT_0	4
+#define		RISCV_HWPROBE_IMA_FD		(1 << 0)
+#define		RISCV_HWPROBE_IMA_C		(1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0	5
+#define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_SLOW		(2 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_FAST		(3 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_UNSUPPORTED	(4 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_MASK		(7 << 0)
+
+struct riscv_hwprobe {};
+#endif
 */
 import "C"
 
@@ -5747,3 +5773,26 @@
 	VIRTIO_NET_HDR_GSO_UDP_L4 = C.VIRTIO_NET_HDR_GSO_UDP_L4
 	VIRTIO_NET_HDR_GSO_ECN    = C.VIRTIO_NET_HDR_GSO_ECN
 )
+
+type RISCVHWProbePairs C.struct_riscv_hwprobe
+
+// Filtered out for non RISC-V architectures in mkpost.go
+// generated by:
+// perl -nlE '/^#define\s+(RISCV_HWPROBE_\w+)/ && say "$1 = C.$1"' /tmp/riscv64/include/asm/hwprobe.h
+const (
+	RISCV_HWPROBE_KEY_MVENDORID          = C.RISCV_HWPROBE_KEY_MVENDORID
+	RISCV_HWPROBE_KEY_MARCHID            = C.RISCV_HWPROBE_KEY_MARCHID
+	RISCV_HWPROBE_KEY_MIMPID             = C.RISCV_HWPROBE_KEY_MIMPID
+	RISCV_HWPROBE_KEY_BASE_BEHAVIOR      = C.RISCV_HWPROBE_KEY_BASE_BEHAVIOR
+	RISCV_HWPROBE_BASE_BEHAVIOR_IMA      = C.RISCV_HWPROBE_BASE_BEHAVIOR_IMA
+	RISCV_HWPROBE_KEY_IMA_EXT_0          = C.RISCV_HWPROBE_KEY_IMA_EXT_0
+	RISCV_HWPROBE_IMA_FD                 = C.RISCV_HWPROBE_IMA_FD
+	RISCV_HWPROBE_IMA_C                  = C.RISCV_HWPROBE_IMA_C
+	RISCV_HWPROBE_KEY_CPUPERF_0          = C.RISCV_HWPROBE_KEY_CPUPERF_0
+	RISCV_HWPROBE_MISALIGNED_UNKNOWN     = C.RISCV_HWPROBE_MISALIGNED_UNKNOWN
+	RISCV_HWPROBE_MISALIGNED_EMULATED    = C.RISCV_HWPROBE_MISALIGNED_EMULATED
+	RISCV_HWPROBE_MISALIGNED_SLOW        = C.RISCV_HWPROBE_MISALIGNED_SLOW
+	RISCV_HWPROBE_MISALIGNED_FAST        = C.RISCV_HWPROBE_MISALIGNED_FAST
+	RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = C.RISCV_HWPROBE_MISALIGNED_UNSUPPORTED
+	RISCV_HWPROBE_MISALIGNED_MASK        = C.RISCV_HWPROBE_MISALIGNED_MASK
+)
diff --git a/unix/mkpost.go b/unix/mkpost.go
index 3d19d3f..7de7179 100644
--- a/unix/mkpost.go
+++ b/unix/mkpost.go
@@ -112,6 +112,12 @@
 		}
 	}
 
+	if goos == "linux" && goarch != "riscv64" {
+		// The RISCV_HWPROBE_ constants are only defined on Linux for riscv64
+		hwprobeConstRexexp := regexp.MustCompile(`const\s+\(\s+RISCV_HWPROBE_[^\)]+\)`)
+		b = hwprobeConstRexexp.ReplaceAll(b, nil)
+	}
+
 	// Intentionally export __val fields in Fsid and Sigset_t
 	valRegex := regexp.MustCompile(`type (Fsid|Sigset_t) struct {(\s+)X__(bits|val)(\s+\S+\s+)}`)
 	b = valRegex.ReplaceAll(b, []byte("type $1 struct {${2}Val$4}"))
@@ -138,6 +144,11 @@
 	ptraceRexexp := regexp.MustCompile(`type Ptrace((Psw|Fpregs|Per) struct {\s*})`)
 	b = ptraceRexexp.ReplaceAll(b, nil)
 
+	// If we have an empty RISCVHWProbePairs struct, we should delete it. Only riscv64 emits
+	// nonempty RISCVHWProbePairs structs.
+	hwprobeRexexp := regexp.MustCompile(`type RISCVHWProbePairs struct {\s*}`)
+	b = hwprobeRexexp.ReplaceAll(b, nil)
+
 	// Replace the control_regs union with a blank identifier for now.
 	controlRegsRegex := regexp.MustCompile(`(Control_regs)\s+\[0\]uint64`)
 	b = controlRegsRegex.ReplaceAll(b, []byte("_ [0]uint64"))
diff --git a/unix/syscall_linux_riscv64.go b/unix/syscall_linux_riscv64.go
index b1de100..5e6ceee 100644
--- a/unix/syscall_linux_riscv64.go
+++ b/unix/syscall_linux_riscv64.go
@@ -177,3 +177,14 @@
 	}
 	return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags)
 }
+
+//sys	riscvHWProbe(pairs []RISCVHWProbePairs, cpuCount uintptr, cpus *CPUSet, flags uint) (err error)
+
+func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error) {
+	var setSize uintptr
+
+	if set != nil {
+		setSize = uintptr(unsafe.Sizeof(*set))
+	}
+	return riscvHWProbe(pairs, setSize, set, flags)
+}
diff --git a/unix/zsyscall_linux_riscv64.go b/unix/zsyscall_linux_riscv64.go
index 0b29239..0ab4f2e 100644
--- a/unix/zsyscall_linux_riscv64.go
+++ b/unix/zsyscall_linux_riscv64.go
@@ -531,3 +531,19 @@
 	}
 	return
 }
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
+func riscvHWProbe(pairs []RISCVHWProbePairs, cpuCount uintptr, cpus *CPUSet, flags uint) (err error) {
+	var _p0 unsafe.Pointer
+	if len(pairs) > 0 {
+		_p0 = unsafe.Pointer(&pairs[0])
+	} else {
+		_p0 = unsafe.Pointer(&_zero)
+	}
+	_, _, e1 := Syscall6(SYS_RISCV_HWPROBE, uintptr(_p0), uintptr(len(pairs)), uintptr(cpuCount), uintptr(unsafe.Pointer(cpus)), uintptr(flags), 0)
+	if e1 != 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
diff --git a/unix/zsysnum_linux_riscv64.go b/unix/zsysnum_linux_riscv64.go
index 3e594a8..ef285c5 100644
--- a/unix/zsysnum_linux_riscv64.go
+++ b/unix/zsysnum_linux_riscv64.go
@@ -251,6 +251,8 @@
 	SYS_ACCEPT4                 = 242
 	SYS_RECVMMSG                = 243
 	SYS_ARCH_SPECIFIC_SYSCALL   = 244
+	SYS_RISCV_HWPROBE           = 258
+	SYS_RISCV_FLUSH_ICACHE      = 259
 	SYS_WAIT4                   = 260
 	SYS_PRLIMIT64               = 261
 	SYS_FANOTIFY_INIT           = 262
diff --git a/unix/ztypes_linux_riscv64.go b/unix/ztypes_linux_riscv64.go
index 9ea54b7..83c69c1 100644
--- a/unix/ztypes_linux_riscv64.go
+++ b/unix/ztypes_linux_riscv64.go
@@ -718,3 +718,26 @@
 	_      uint64
 	_      uint64
 }
+
+type RISCVHWProbePairs struct {
+	Key   int64
+	Value uint64
+}
+
+const (
+	RISCV_HWPROBE_KEY_MVENDORID          = 0x0
+	RISCV_HWPROBE_KEY_MARCHID            = 0x1
+	RISCV_HWPROBE_KEY_MIMPID             = 0x2
+	RISCV_HWPROBE_KEY_BASE_BEHAVIOR      = 0x3
+	RISCV_HWPROBE_BASE_BEHAVIOR_IMA      = 0x1
+	RISCV_HWPROBE_KEY_IMA_EXT_0          = 0x4
+	RISCV_HWPROBE_IMA_FD                 = 0x1
+	RISCV_HWPROBE_IMA_C                  = 0x2
+	RISCV_HWPROBE_KEY_CPUPERF_0          = 0x5
+	RISCV_HWPROBE_MISALIGNED_UNKNOWN     = 0x0
+	RISCV_HWPROBE_MISALIGNED_EMULATED    = 0x1
+	RISCV_HWPROBE_MISALIGNED_SLOW        = 0x2
+	RISCV_HWPROBE_MISALIGNED_FAST        = 0x3
+	RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4
+	RISCV_HWPROBE_MISALIGNED_MASK        = 0x7
+)