[release-branch.go1.21] runtime: increase g0 stack size in non-cgo case

Currently, for non-cgo programs, the g0 stack size is 8 KiB on
most platforms. With PGO which could cause aggressive inlining in
the runtime, the runtime stack frames are larger and could
overflow the 8 KiB g0 stack. Increase it to 16 KiB. This is only
one per OS thread, so it shouldn't increase memory use much.

Updates #62120.
Updates #62489.
Fixes #62537.

Change-Id: I565b154517021f1fd849424dafc3f0f26a755cac
Reviewed-on: https://go-review.googlesource.com/c/go/+/526995
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
(cherry picked from commit c6d550a6683cebb2a11d7fa91823edf7db1d58a5)
Reviewed-on: https://go-review.googlesource.com/c/go/+/527055
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 9fd200e..afb33c1 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1543,7 +1543,7 @@
 		// but is somewhat arbitrary.
 		size := gp.stack.hi
 		if size == 0 {
-			size = 8192 * sys.StackGuardMultiplier
+			size = 16384 * sys.StackGuardMultiplier
 		}
 		gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
 		gp.stack.lo = gp.stack.hi - size + 1024
@@ -1939,7 +1939,7 @@
 	if iscgo || mStackIsSystemAllocated() {
 		mp.g0 = malg(-1)
 	} else {
-		mp.g0 = malg(8192 * sys.StackGuardMultiplier)
+		mp.g0 = malg(16384 * sys.StackGuardMultiplier)
 	}
 	mp.g0.m = mp