cpu: add support for sve2 detection

Fixes golang/go#66952

Change-Id: Idaf2ce3b09baf33cf29079677a83a51ea9c4b255
GitHub-Last-Rev: eac00886d1370e7a0fa7a6cb18299c5e7b6e21e4
GitHub-Pull-Request: golang/sys#193
Reviewed-on: https://go-review.googlesource.com/c/sys/+/580655
Reviewed-by: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Tobias Klauser <tobias.klauser@gmail.com>
diff --git a/cpu/cpu.go b/cpu/cpu.go
index 4756ad5..8fa707a 100644
--- a/cpu/cpu.go
+++ b/cpu/cpu.go
@@ -103,6 +103,7 @@
 	HasASIMDDP  bool // Advanced SIMD double precision instruction set
 	HasSHA512   bool // SHA512 hardware implementation
 	HasSVE      bool // Scalable Vector Extensions
+	HasSVE2     bool // Scalable Vector Extensions 2
 	HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
 	_           CacheLinePad
 }
diff --git a/cpu/cpu_arm64.go b/cpu/cpu_arm64.go
index f3eb993..0e27a21 100644
--- a/cpu/cpu_arm64.go
+++ b/cpu/cpu_arm64.go
@@ -28,6 +28,7 @@
 		{Name: "sm3", Feature: &ARM64.HasSM3},
 		{Name: "sm4", Feature: &ARM64.HasSM4},
 		{Name: "sve", Feature: &ARM64.HasSVE},
+		{Name: "sve2", Feature: &ARM64.HasSVE2},
 		{Name: "crc32", Feature: &ARM64.HasCRC32},
 		{Name: "atomics", Feature: &ARM64.HasATOMICS},
 		{Name: "asimdhp", Feature: &ARM64.HasASIMDHP},
@@ -164,6 +165,15 @@
 	switch extractBits(pfr0, 32, 35) {
 	case 1:
 		ARM64.HasSVE = true
+
+		parseARM64SVERegister(getzfr0())
+	}
+}
+
+func parseARM64SVERegister(zfr0 uint64) {
+	switch extractBits(zfr0, 0, 3) {
+	case 1:
+		ARM64.HasSVE2 = true
 	}
 }
 
diff --git a/cpu/cpu_arm64.s b/cpu/cpu_arm64.s
index fcb9a38..22cc998 100644
--- a/cpu/cpu_arm64.s
+++ b/cpu/cpu_arm64.s
@@ -29,3 +29,11 @@
 	WORD	$0xd5380400
 	MOVD	R0, ret+0(FP)
 	RET
+
+// func getzfr0() uint64
+TEXT ·getzfr0(SB),NOSPLIT,$0-8
+	// get SVE Feature Register 0 into x0
+	// mrs	x0, ID_AA64ZFR0_EL1 = d5380480
+	WORD $0xd5380480
+	MOVD	R0, ret+0(FP)
+	RET
diff --git a/cpu/cpu_gc_arm64.go b/cpu/cpu_gc_arm64.go
index a8acd3e..6ac6e1e 100644
--- a/cpu/cpu_gc_arm64.go
+++ b/cpu/cpu_gc_arm64.go
@@ -9,3 +9,4 @@
 func getisar0() uint64
 func getisar1() uint64
 func getpfr0() uint64
+func getzfr0() uint64
diff --git a/cpu/cpu_linux_arm64.go b/cpu/cpu_linux_arm64.go
index a968b80..3d386d0 100644
--- a/cpu/cpu_linux_arm64.go
+++ b/cpu/cpu_linux_arm64.go
@@ -35,6 +35,8 @@
 	hwcap_SHA512   = 1 << 21
 	hwcap_SVE      = 1 << 22
 	hwcap_ASIMDFHM = 1 << 23
+
+	hwcap2_SVE2 = 1 << 1
 )
 
 // linuxKernelCanEmulateCPUID reports whether we're running
@@ -104,6 +106,9 @@
 	ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
 	ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
 	ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
+
+	// HWCAP2 feature bits
+	ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2)
 }
 
 func isSet(hwc uint, value uint) bool {