runtime: change top-most return PC from goexit to goexit+PCQuantum

If you get a stack of PCs from Callers, it would be expected
that every PC is immediately after a call instruction, so to find
the line of the call, you look up the line for PC-1.
CL 163550043 now explicitly documents that.

The most common exception to this is the top-most return PC
on the stack, which is the entry address of the runtime.goexit
function. Subtracting 1 from that PC will end up in a different
function entirely.

To remove this special case, make the top-most return PC
goexit+PCQuantum and then implement goexit in assembly
so that the first instruction can be skipped.

Fixes #7690.

LGTM=r
R=r
CC=golang-codereviews
https://golang.org/cl/170720043
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 0d46a9e..b4b81d7 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -2284,3 +2284,9 @@
 	MOVL	m_curg(AX), AX
 	MOVL	(g_stack+stack_hi)(AX), AX
 	RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT,$0-0
+	BYTE	$0x90	// NOP
+	CALL	runtime·goexit1(SB)	// does not return
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index a9b082b..39d7c78 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -2229,3 +2229,9 @@
 	MOVQ	m_curg(AX), AX
 	MOVQ	(g_stack+stack_hi)(AX), AX
 	RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT,$0-0
+	BYTE	$0x90	// NOP
+	CALL	runtime·goexit1(SB)	// does not return
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index 28875bc..a1116b5 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -1079,3 +1079,9 @@
 TEXT runtime·return0(SB), NOSPLIT, $0
 	MOVL	$0, AX
 	RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT,$0-0
+	BYTE	$0x90	// NOP
+	CALL	runtime·goexit1(SB)	// does not return
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index e94b4c1f..0f3b5ee 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -1320,3 +1320,9 @@
 	MOVW	saveG-8(SP), g
 	MOVW	saveR11-4(SP), R11
 	RET
+
+// The top-most function running on a goroutine
+// returns to goexit+PCQuantum.
+TEXT runtime·goexit(SB),NOSPLIT,$-4-0
+	MOVW	R0, R0	// NOP
+	BL	runtime·goexit1(SB)	// does not return
diff --git a/src/runtime/proc.c b/src/runtime/proc.c
index b46f670..4be51e1 100644
--- a/src/runtime/proc.c
+++ b/src/runtime/proc.c
@@ -1643,12 +1643,10 @@
 }
 
 // Finishes execution of the current goroutine.
-// Need to mark it as nosplit, because it runs with sp > stackbase.
-// Since it does not return it does not matter.  But if it is preempted
-// at the split stack check, GC will complain about inconsistent sp.
+// Must be NOSPLIT because it is called from Go.
 #pragma textflag NOSPLIT
 void
-runtime·goexit(void)
+runtime·goexit1(void)
 {
 	void (*fn)(G*);
 
@@ -2192,7 +2190,7 @@
 
 	runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
 	newg->sched.sp = (uintptr)sp;
-	newg->sched.pc = (uintptr)runtime·goexit;
+	newg->sched.pc = (uintptr)runtime·goexit + PCQuantum; // +PCQuantum so that previous instruction is in same function
 	newg->sched.g = newg;
 	runtime·gostartcallfn(&newg->sched, fn);
 	newg->gopc = (uintptr)callerpc;