runtime: mlock top of signal stack on Linux 5.2–5.4.1

Linux 5.2 introduced a bug that can corrupt vector registers on return
from a signal if the signal stack isn't faulted in:
https://bugzilla.kernel.org/show_bug.cgi?id=205663

This CL works around this by mlocking the top page of all Go signal
stacks on the affected kernels.

Fixes #35326, #35777

Change-Id: I77c80a2baa4780827633f92f464486caa222295d
Reviewed-on: https://go-review.googlesource.com/c/go/+/209899
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go
index 9eb5646..8144354 100644
--- a/src/runtime/defs_linux_amd64.go
+++ b/src/runtime/defs_linux_amd64.go
@@ -263,3 +263,14 @@
 	family uint16
 	path   [108]byte
 }
+
+const __NEW_UTS_LEN = 64
+
+type new_utsname struct {
+	sysname    [__NEW_UTS_LEN + 1]byte
+	nodename   [__NEW_UTS_LEN + 1]byte
+	release    [__NEW_UTS_LEN + 1]byte
+	version    [__NEW_UTS_LEN + 1]byte
+	machine    [__NEW_UTS_LEN + 1]byte
+	domainname [__NEW_UTS_LEN + 1]byte
+}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 27c66f7..1eb86e9 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -289,6 +289,7 @@
 func osinit() {
 	ncpu = getproccount()
 	physHugePageSize = getHugePageSize()
+	osArchInit()
 }
 
 var urandom_dev = []byte("/dev/urandom\x00")
@@ -318,11 +319,20 @@
 	initsig(true)
 }
 
+// gsignalInitQuirk, if non-nil, is called for every allocated gsignal G.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var gsignalInitQuirk func(gsignal *g)
+
 // Called to initialize a new m (including the bootstrap m).
 // Called on the parent thread (main thread in case of bootstrap), can allocate memory.
 func mpreinit(mp *m) {
 	mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
 	mp.gsignal.m = mp
+	if gsignalInitQuirk != nil {
+		gsignalInitQuirk(mp.gsignal)
+	}
 }
 
 func gettid() uint32
diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
new file mode 100644
index 0000000..9be88a5
--- /dev/null
+++ b/src/runtime/os_linux_386.go
@@ -0,0 +1,7 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func osArchInit() {}
diff --git a/src/runtime/os_linux_amd64.go b/src/runtime/os_linux_amd64.go
new file mode 100644
index 0000000..21e4790
--- /dev/null
+++ b/src/runtime/os_linux_amd64.go
@@ -0,0 +1,63 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+//go:noescape
+func uname(utsname *new_utsname) int
+
+func mlock(addr, len uintptr) int
+
+func osArchInit() {
+	// Linux 5.2 introduced a bug that can corrupt vector
+	// registers on return from a signal if the signal stack isn't
+	// faulted in:
+	// https://bugzilla.kernel.org/show_bug.cgi?id=205663
+	//
+	// It was fixed in 5.3.15, 5.4.2, and all 5.5 and later
+	// kernels.
+	//
+	// If we're on an affected kernel, work around this issue by
+	// mlocking the top page of every signal stack. This doesn't
+	// help for signal stacks created in C, but there's not much
+	// we can do about that.
+	//
+	// TODO(austin): Remove this in Go 1.15, at which point it
+	// will be unlikely to encounter any of the affected kernels
+	// in the wild.
+
+	var uts new_utsname
+	if uname(&uts) < 0 {
+		throw("uname failed")
+	}
+	// Check for null terminator to ensure gostringnocopy doesn't
+	// walk off the end of the release string.
+	found := false
+	for _, b := range uts.release {
+		if b == 0 {
+			found = true
+			break
+		}
+	}
+	if !found {
+		return
+	}
+	rel := gostringnocopy(&uts.release[0])
+
+	major, minor, patch, ok := parseRelease(rel)
+	if !ok {
+		return
+	}
+
+	if major == 5 && (minor == 2 || minor == 3 && patch < 15 || minor == 4 && patch < 2) {
+		gsignalInitQuirk = mlockGsignal
+		if m0.gsignal != nil {
+			throw("gsignal quirk too late")
+		}
+	}
+}
+
+func mlockGsignal(gsignal *g) {
+	mlock(gsignal.stack.hi-physPageSize, physPageSize)
+}
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
index 5f89c30..b590da7 100644
--- a/src/runtime/os_linux_arm.go
+++ b/src/runtime/os_linux_arm.go
@@ -39,6 +39,8 @@
 	}
 }
 
+func osArchInit() {}
+
 //go:nosplit
 func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed fastrand().
diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go
index b51bc88..19968dc 100644
--- a/src/runtime/os_linux_arm64.go
+++ b/src/runtime/os_linux_arm64.go
@@ -27,6 +27,8 @@
 	}
 }
 
+func osArchInit() {}
+
 //go:nosplit
 func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed fastrand().
diff --git a/src/runtime/os_linux_mips64x.go b/src/runtime/os_linux_mips64x.go
index 59d2a8f..464a26a 100644
--- a/src/runtime/os_linux_mips64x.go
+++ b/src/runtime/os_linux_mips64x.go
@@ -10,6 +10,8 @@
 func archauxv(tag, val uintptr) {
 }
 
+func osArchInit() {}
+
 //go:nosplit
 func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed fastrand().
diff --git a/src/runtime/os_linux_mipsx.go b/src/runtime/os_linux_mipsx.go
index ccdc3a7..87962ed 100644
--- a/src/runtime/os_linux_mipsx.go
+++ b/src/runtime/os_linux_mipsx.go
@@ -10,6 +10,8 @@
 func archauxv(tag, val uintptr) {
 }
 
+func osArchInit() {}
+
 //go:nosplit
 func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed fastrand().
diff --git a/src/runtime/os_linux_ppc64x.go b/src/runtime/os_linux_ppc64x.go
index cc79cc4..3aedc23 100644
--- a/src/runtime/os_linux_ppc64x.go
+++ b/src/runtime/os_linux_ppc64x.go
@@ -20,3 +20,5 @@
 		cpu.HWCap2 = uint(val)
 	}
 }
+
+func osArchInit() {}
diff --git a/src/runtime/os_linux_s390x.go b/src/runtime/os_linux_s390x.go
index 55d35c7..ee18fd1 100644
--- a/src/runtime/os_linux_s390x.go
+++ b/src/runtime/os_linux_s390x.go
@@ -17,3 +17,5 @@
 		cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0
 	}
 }
+
+func osArchInit() {}
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index d16060f..174120f 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -33,8 +33,10 @@
 #define SYS_clone		56
 #define SYS_exit		60
 #define SYS_kill		62
+#define SYS_uname		63
 #define SYS_fcntl		72
 #define SYS_sigaltstack 	131
+#define SYS_mlock		149
 #define SYS_arch_prctl		158
 #define SYS_gettid		186
 #define SYS_futex		202
@@ -764,3 +766,20 @@
 	SYSCALL
 	MOVQ	AX, ret+0(FP)
 	RET
+
+// func uname(utsname *new_utsname) int
+TEXT ·uname(SB),NOSPLIT,$0-16
+	MOVQ    utsname+0(FP), DI
+	MOVL    $SYS_uname, AX
+	SYSCALL
+	MOVQ	AX, ret+8(FP)
+	RET
+
+// func mlock(addr, len uintptr) int
+TEXT ·mlock(SB),NOSPLIT,$0-24
+	MOVQ    addr+0(FP), DI
+	MOVQ    len+8(FP), SI
+	MOVL    $SYS_mlock, AX
+	SYSCALL
+	MOVQ	AX, ret+16(FP)
+	RET