cpu: add support for AVX-VNNI and IFMA detection

Added detection for x86 AVX-VNNI (VEX-coded Vector Neural Network
Instructions) and AVX-IFMA (VEX-coded Integer Fused Multiply Add),
including both the base VNNI set and the Int8 extention.

Fixes golang/go#71142

Change-Id: I9e8d18b2e8bf81d5d4313a4a47fdf731fb3d44dd
GitHub-Last-Rev: 32ea443fc247f1b6e957d2b27a44909c620d2fb2
GitHub-Pull-Request: golang/sys#242
Reviewed-on: https://go-review.googlesource.com/c/sys/+/641155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
diff --git a/cpu/cpu.go b/cpu/cpu.go
index 02609d5..9c105f2 100644
--- a/cpu/cpu.go
+++ b/cpu/cpu.go
@@ -72,6 +72,9 @@
 	HasSSSE3            bool // Supplemental streaming SIMD extension 3
 	HasSSE41            bool // Streaming SIMD extension 4 and 4.1
 	HasSSE42            bool // Streaming SIMD extension 4 and 4.2
+	HasAVXIFMA          bool // Advanced vector extension Integer Fused Multiply Add
+	HasAVXVNNI          bool // Advanced vector extension Vector Neural Network Instructions
+	HasAVXVNNIInt8      bool // Advanced vector extension Vector Neural Network Int8 instructions
 	_                   CacheLinePad
 }
 
diff --git a/cpu/cpu_test.go b/cpu/cpu_test.go
index 7a9bac7..dd493ec 100644
--- a/cpu/cpu_test.go
+++ b/cpu/cpu_test.go
@@ -41,6 +41,40 @@
 	}
 }
 
+func TestAVX512BF16HasAVX512(t *testing.T) {
+	if runtime.GOARCH == "amd64" {
+		if cpu.X86.HasAVX512BF16 && !cpu.X86.HasAVX512 {
+			t.Fatal("HasAVX512 expected true, got false")
+		}
+	}
+}
+
+func TestAVXVNNIHasAVX(t *testing.T) {
+	if cpu.X86.HasAVXVNNI && !cpu.X86.HasAVX {
+		t.Fatal("HasAVX expected true, got false")
+	}
+}
+
+func TestAVXIFMAHasAVXVNNIAndAVX(t *testing.T) {
+	if cpu.X86.HasAVXIFMA && !cpu.X86.HasAVX {
+		t.Fatal("HasAVX expected true, got false")
+	}
+
+	if cpu.X86.HasAVXIFMA && !cpu.X86.HasAVXVNNI {
+		t.Fatal("HasAVXVNNI expected true, got false")
+	}
+}
+
+func TestAVXVNNIInt8HasAVXVNNIAndAVX(t *testing.T) {
+	if cpu.X86.HasAVXVNNIInt8 && !cpu.X86.HasAVXVNNI {
+		t.Fatal("HasAVXVNNI expected true, got false")
+	}
+
+	if cpu.X86.HasAVXVNNIInt8 && !cpu.X86.HasAVX {
+		t.Fatal("HasAVX expected true, got false")
+	}
+}
+
 func TestARM64minimalFeatures(t *testing.T) {
 	if runtime.GOARCH != "arm64" || runtime.GOOS == "ios" {
 		return
diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go
index 600a680..1e642f3 100644
--- a/cpu/cpu_x86.go
+++ b/cpu/cpu_x86.go
@@ -53,6 +53,9 @@
 		{Name: "sse41", Feature: &X86.HasSSE41},
 		{Name: "sse42", Feature: &X86.HasSSE42},
 		{Name: "ssse3", Feature: &X86.HasSSSE3},
+		{Name: "avxifma", Feature: &X86.HasAVXIFMA},
+		{Name: "avxvnni", Feature: &X86.HasAVXVNNI},
+		{Name: "avxvnniint8", Feature: &X86.HasAVXVNNIInt8},
 
 		// These capabilities should always be enabled on amd64:
 		{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
@@ -106,7 +109,7 @@
 		return
 	}
 
-	_, ebx7, ecx7, edx7 := cpuid(7, 0)
+	eax7, ebx7, ecx7, edx7 := cpuid(7, 0)
 	X86.HasBMI1 = isSet(3, ebx7)
 	X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
 	X86.HasBMI2 = isSet(8, ebx7)
@@ -134,14 +137,24 @@
 		X86.HasAVX512VAES = isSet(9, ecx7)
 		X86.HasAVX512VBMI2 = isSet(6, ecx7)
 		X86.HasAVX512BITALG = isSet(12, ecx7)
-
-		eax71, _, _, _ := cpuid(7, 1)
-		X86.HasAVX512BF16 = isSet(5, eax71)
 	}
 
 	X86.HasAMXTile = isSet(24, edx7)
 	X86.HasAMXInt8 = isSet(25, edx7)
 	X86.HasAMXBF16 = isSet(22, edx7)
+
+	// These features depend on the second level of extended features.
+	if eax7 >= 1 {
+		eax71, _, _, edx71 := cpuid(7, 1)
+		if X86.HasAVX512 {
+			X86.HasAVX512BF16 = isSet(5, eax71)
+		}
+		if X86.HasAVX {
+			X86.HasAVXIFMA = isSet(23, eax71)
+			X86.HasAVXVNNI = isSet(4, eax71)
+			X86.HasAVXVNNIInt8 = isSet(4, edx71)
+		}
+	}
 }
 
 func isSet(bitpos uint, value uint32) bool {