blake2b: use proper Go frame sizes
Similar to the previous commit, blake2b's assembly routines claim they
have a zero byte frame and manually subtract a frame from the SP,
which can fail to grow the stack when necessary, leading to memory
corruption.
Fix this by using the correct stack frame sizes so the generated stack
growth prologue is correct, and aligning the SP up instead of down.
Change-Id: Ic426338c45c94a2c01d549860c2295a0ee9200be
Reviewed-on: https://go-review.googlesource.com/31585
Reviewed-by: Adam Langley <agl@golang.org>
Reviewed-by: Andreas Auernhammer <aead@mail.de>
diff --git a/blake2b/blake2bAVX2_amd64.s b/blake2b/blake2bAVX2_amd64.s
index 2996621..1703fe4 100644
--- a/blake2b/blake2bAVX2_amd64.s
+++ b/blake2b/blake2bAVX2_amd64.s
@@ -95,7 +95,7 @@
VINSERTI128 $1, X11, Y15, Y15
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX2(SB), 4, $0-48
+TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
@@ -103,8 +103,10 @@
MOVQ blocks_len+32(FP), DI
MOVQ SP, DX
- ANDQ $0xFFFFFFFFFFFFFFE0, SP
- SUBQ $(32+32+256), SP
+ MOVQ SP, R9
+ ADDQ $31, R9
+ ANDQ $~31, R9
+ MOVQ R9, SP
MOVQ CX, 16(SP)
XORQ CX, CX
diff --git a/blake2b/blake2b_amd64.s b/blake2b/blake2b_amd64.s
index 51d3701..3017cf0 100644
--- a/blake2b/blake2b_amd64.s
+++ b/blake2b/blake2b_amd64.s
@@ -111,7 +111,7 @@
PINSRQ $1, i7*8(src), m3
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $0-48
+TEXT ·hashBlocksSSE4(SB), 4, $32-48 // frame size = 16 + 16 byte alignment
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVQ flag+16(FP), CX
@@ -119,8 +119,10 @@
MOVQ blocks_len+32(FP), DI
MOVQ SP, BP
- ANDQ $0xFFFFFFFFFFFFFFF0, SP
- SUBQ $(16+16), SP
+ MOVQ SP, R9
+ ADDQ $15, R9
+ ANDQ $~15, R9
+ MOVQ R9, SP
MOVOU ·iv3<>(SB), X0
MOVO X0, 0(SP)