cpu: fix AVX512 support detection on Darwin

Summary: On darwin/amd64, it is not adequate to use OSXSAVE
bits to determine AVX512 availabilty. The reason is involved.
See github issue for details.

The fix consists of implementing Apple's recommended approach
using the process commpage cpu_capabilities bits to determine
availability of AVX512.

Fixes golang/go#43089

Change-Id: I1ba89965498863d268fbf2e427dbfd6429c7409f
Reviewed-on: https://go-review.googlesource.com/c/sys/+/285572
Trust: Tobias Klauser <tobias.klauser@gmail.com>
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
diff --git a/cpu/cpu_gc_x86.go b/cpu/cpu_gc_x86.go
index fa7cdb9..3298a87 100644
--- a/cpu/cpu_gc_x86.go
+++ b/cpu/cpu_gc_x86.go
@@ -15,3 +15,7 @@
 // xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
 // and in cpu_gccgo.c for gccgo.
 func xgetbv() (eax, edx uint32)
+
+// darwinSupportsAVX512 is implemented in cpu_x86.s for gc compiler
+// and in cpu_gccgo_x86.go for gccgo.
+func darwinSupportsAVX512() bool
diff --git a/cpu/cpu_gccgo_x86.go b/cpu/cpu_gccgo_x86.go
index 8478a6d..863d415 100644
--- a/cpu/cpu_gccgo_x86.go
+++ b/cpu/cpu_gccgo_x86.go
@@ -25,3 +25,9 @@
 	gccgoXgetbv(&a, &d)
 	return a, d
 }
+
+// gccgo doesn't build on Darwin, per:
+// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
+func darwinSupportsAVX512() bool {
+	return false
+}
diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go
index fd380c0..54ca466 100644
--- a/cpu/cpu_x86.go
+++ b/cpu/cpu_x86.go
@@ -87,8 +87,14 @@
 		// Check if XMM and YMM registers have OS support.
 		osSupportsAVX = isSet(1, eax) && isSet(2, eax)
 
-		// Check if OPMASK and ZMM registers have OS support.
-		osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
+		if runtime.GOOS == "darwin" {
+			// Check darwin commpage for AVX512 support. Necessary because:
+			// https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L175-L201
+			osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512()
+		} else {
+			// Check if OPMASK and ZMM registers have OS support.
+			osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
+		}
 	}
 
 	X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
diff --git a/cpu/cpu_x86.s b/cpu/cpu_x86.s
index 39acab2..b748ba5 100644
--- a/cpu/cpu_x86.s
+++ b/cpu/cpu_x86.s
@@ -26,3 +26,27 @@
 	MOVL AX, eax+0(FP)
 	MOVL DX, edx+4(FP)
 	RET
+
+// func darwinSupportsAVX512() bool
+TEXT ·darwinSupportsAVX512(SB), NOSPLIT, $0-1
+    MOVB    $0, ret+0(FP) // default to false
+#ifdef GOOS_darwin   // return if not darwin
+#ifdef GOARCH_amd64  // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address         0x00007fffffe00000
+#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
+#define commpage64_version              (commpage64_base_address+0x01E)
+#define hasAVX512F                      0x0000004000000000
+    MOVQ    $commpage64_version, BX
+    CMPW    (BX), $13  // cpu_capabilities64 undefined in versions < 13
+    JL      no_avx512
+    MOVQ    $commpage64_cpu_capabilities64, BX
+    MOVQ    $hasAVX512F, CX
+    TESTQ   (BX), CX
+    JZ      no_avx512
+    MOVB    $1, ret+0(FP)
+no_avx512:
+#endif
+#endif
+    RET