runtime: make stack 16-byte aligned for external code in _rt0_amd64_linux_lib

Fixes #16618.

Change-Id: Iffada12e8672bbdbcf2e787782c497e2c45701b1
Reviewed-on: https://go-review.googlesource.com/25550
Run-TryBot: Minux Ma <minux@golang.org>
Reviewed-by: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/runtime/rt0_linux_amd64.s b/src/runtime/rt0_linux_amd64.s
index 564b51c..ced471f 100644
--- a/src/runtime/rt0_linux_amd64.s
+++ b/src/runtime/rt0_linux_amd64.s
@@ -12,13 +12,18 @@
 
 // When building with -buildmode=c-shared, this symbol is called when the shared
 // library is loaded.
-TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0x48
+// Note: This function calls external C code, which might required 16-byte stack
+// alignment after cmd/internal/obj applies its transformations.
+TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0x50
+	MOVQ	SP, AX
+	ANDQ	$-16, SP
 	MOVQ	BX, 0x10(SP)
 	MOVQ	BP, 0x18(SP)
 	MOVQ	R12, 0x20(SP)
 	MOVQ	R13, 0x28(SP)
 	MOVQ	R14, 0x30(SP)
 	MOVQ	R15, 0x38(SP)
+	MOVQ	AX, 0x40(SP)
 
 	MOVQ	DI, _rt0_amd64_linux_lib_argc<>(SB)
 	MOVQ	SI, _rt0_amd64_linux_lib_argv<>(SB)
@@ -50,6 +55,7 @@
 	MOVQ	0x28(SP), R13
 	MOVQ	0x30(SP), R14
 	MOVQ	0x38(SP), R15
+	MOVQ	0x40(SP), SP
 	RET
 
 TEXT _rt0_amd64_linux_lib_go(SB),NOSPLIT,$0