runtime: test malformed address fault and fix on OS X

The garbage collector poison pointers
(0x6969696969696969 and 0x6868686868686868)
are malformed addresses on amd64.
That is, they are not 48-bit addresses sign extended
to 64 bits. This causes a different kind of hardware fault
than the usual 'unmapped page' when accessing such
an address, and OS X 10.9.2 sends the resulting SIGSEGV
incorrectly, making it look like it was user-generated
rather than kernel-generated and does not include the
faulting address. This means that in GODEBUG=gcdead=1
mode, if there is a bug and something tries to dereference
a poisoned pointer, the runtime delivers the SIGSEGV to
os/signal and returns to the faulting code, which faults
again, causing the process to hang instead of crashing.

Fix by rewriting "user-generated" SIGSEGV on OS X to
look like a kernel-generated SIGSEGV with fault address
0xb01dfacedebac1e.

I chose that address because (1) when printed in hex
during a crash, it is obviously spelling out English text,
(2) there are no current Google hits for that pointer,
which will make its origin easy to find once this CL
is indexed, and (3) it is not an altogether inaccurate
description of the situation.

Add a test. Maybe other systems will break too.

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, iant, ken
https://golang.org/cl/83270049
diff --git a/src/pkg/runtime/runtime_test.go b/src/pkg/runtime/runtime_test.go
index 9aca68e..67d3921 100644
--- a/src/pkg/runtime/runtime_test.go
+++ b/src/pkg/runtime/runtime_test.go
@@ -134,6 +134,43 @@
 	SetCPUProfileRate(0)
 }
 
+// Addresses to test for faulting behavior.
+// This is less a test of SetPanicOnFault and more a check that
+// the operating system and the runtime can process these faults
+// correctly. That is, we're indirectly testing that without SetPanicOnFault
+// these would manage to turn into ordinary crashes.
+// Note that these are truncated on 32-bit systems, so the bottom 32 bits
+// of the larger addresses must themselves be invalid addresses.
+// We might get unlucky and the OS might have mapped one of these
+// addresses, but probably not: they're all in the first page, very high
+// adderesses that normally an OS would reserve for itself, or malformed
+// addresses. Even so, we might have to remove one or two on different
+// systems. We will see.
+
+var faultAddrs = []uint64{
+	// low addresses
+	0,
+	1,
+	0xfff,
+	// high (kernel) addresses
+	// or else malformed.
+	0xffffffffffffffff,
+	0xfffffffffffff001,
+	// no 0xffffffffffff0001; 0xffff0001 is mapped for 32-bit user space on OS X
+	0xfffffffffff00001,
+	0xffffffffff000001,
+	0xfffffffff0000001,
+	0xffffffff00000001,
+	0xfffffff000000001,
+	0xffffff0000000001,
+	0xfffff00000000001,
+	0xffff000000000001,
+	0xfff0000000000001,
+	0xff00000000000001,
+	0xf000000000000001,
+	0x8000000000000001,
+}
+
 func TestSetPanicOnFault(t *testing.T) {
 	// This currently results in a fault in the signal trampoline on
 	// dragonfly/386 - see issue 7421.
@@ -144,6 +181,12 @@
 	old := debug.SetPanicOnFault(true)
 	defer debug.SetPanicOnFault(old)
 
+	for _, addr := range faultAddrs {
+		testSetPanicOnFault(t, uintptr(addr))
+	}
+}
+
+func testSetPanicOnFault(t *testing.T, addr uintptr) {
 	defer func() {
 		if err := recover(); err == nil {
 			t.Fatalf("did not find error in recover")
@@ -151,7 +194,7 @@
 	}()
 
 	var p *int
-	p = (*int)(unsafe.Pointer(^uintptr(0)))
+	p = (*int)(unsafe.Pointer(addr))
 	println(*p)
-	t.Fatalf("still here - should have faulted")
+	t.Fatalf("still here - should have faulted on address %#x", addr)
 }
diff --git a/src/pkg/runtime/signal_amd64x.c b/src/pkg/runtime/signal_amd64x.c
index 309bad3..04026f3 100644
--- a/src/pkg/runtime/signal_amd64x.c
+++ b/src/pkg/runtime/signal_amd64x.c
@@ -52,6 +52,27 @@
 		return;
 	}
 
+#ifdef GOOS_darwin
+	// x86-64 has 48-bit virtual addresses. The top 16 bits must echo bit 47.
+	// The hardware delivers a different kind of fault for a malformed address
+	// than it does for an attempt to access a valid but unmapped address.
+	// OS X 10.9.2 mishandles the malformed address case, making it look like
+	// a user-generated signal (like someone ran kill -SEGV ourpid).
+	// We pass user-generated signals to os/signal, or else ignore them.
+	// Doing that here - and returning to the faulting code - results in an
+	// infinite loop. It appears the best we can do is rewrite what the kernel
+	// delivers into something more like the truth. The address used below
+	// has very little chance of being the one that caused the fault, but it is
+	// malformed, it is clearly not a real pointer, and if it does get printed
+	// in real life, people will probably search for it and find this code.
+	// There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e
+	// as I type this comment.
+	if(sig == SIGSEGV && SIG_CODE0(info, ctxt) == SI_USER) {
+		SIG_CODE0(info, ctxt) = SI_USER+1;
+		info->si_addr = (void*)(uintptr)0xb01dfacedebac1eULL;
+	}
+#endif
+
 	t = &runtime·sigtab[sig];
 	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
 		// Make it look like a call to the signal func.