blob: 08496940084c6bd2529e62c1c4cce37585f9a37c [file] [edit]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
#define LOAD(index) \
MOVBU ((index*4)+0)(X29), X5; \
MOVBU ((index*4)+1)(X29), X6; \
MOVBU ((index*4)+2)(X29), X7; \
MOVBU ((index*4)+3)(X29), X8; \
SLL $24, X5; \
SLL $16, X6; \
OR X5, X6, X5; \
SLL $8, X7; \
OR X5, X7, X5; \
OR X5, X8, X5; \
MOVW X5, (index*4)(X19)
#define SHUFFLE(index) \
MOVWU (((index)&0xf)*4)(X19), X5; \
MOVWU (((index-3)&0xf)*4)(X19), X6; \
MOVWU (((index-8)&0xf)*4)(X19), X7; \
MOVWU (((index-14)&0xf)*4)(X19), X8; \
XOR X6, X5; \
XOR X7, X5; \
XOR X8, X5; \
RORW $31, X5; \
MOVW X5, (((index)&0xf)*4)(X19)
// f = d ^ (b & (c ^ d))
#define FUNC1(a, b, c, d, e) \
XOR c, d, X7; \
AND b, X7; \
XOR d, X7
// f = b ^ c ^ d
#define FUNC2(a, b, c, d, e) \
XOR b, c, X7; \
XOR d, X7
// f = (b & c) | ((b | c) & d)
#define FUNC3(a, b, c, d, e) \
OR b, c, X8; \
AND b, c, X6; \
AND d, X8; \
OR X6, X8, X7
#define FUNC4 FUNC2
#define MIX(a, b, c, d, e, key) \
RORW $2, b; \
ADD X7, e; \
RORW $27, a, X8; \
ADD X5, e; \
ADD key, e; \
ADD X8, e
#define ROUND1(a, b, c, d, e, index) \
LOAD(index); \
FUNC1(a, b, c, d, e); \
MIX(a, b, c, d, e, X15)
#define ROUND1x(a, b, c, d, e, index) \
SHUFFLE(index); \
FUNC1(a, b, c, d, e); \
MIX(a, b, c, d, e, X15)
#define ROUND2(a, b, c, d, e, index) \
SHUFFLE(index); \
FUNC2(a, b, c, d, e); \
MIX(a, b, c, d, e, X16)
#define ROUND3(a, b, c, d, e, index) \
SHUFFLE(index); \
FUNC3(a, b, c, d, e); \
MIX(a, b, c, d, e, X17)
#define ROUND4(a, b, c, d, e, index) \
SHUFFLE(index); \
FUNC4(a, b, c, d, e); \
MIX(a, b, c, d, e, X18)
// func block(dig *Digest, p []byte)
TEXT ·block(SB),NOSPLIT,$64-32
MOV p_base+8(FP), X29
MOV p_len+16(FP), X30
SRL $6, X30
SLL $6, X30
ADD X29, X30, X28
BEQ X28, X29, end
ADD $8, X2, X19 // message schedule buffer on stack
MOV dig+0(FP), X20
MOVWU (0*4)(X20), X10 // a = H0
MOVWU (1*4)(X20), X11 // b = H1
MOVWU (2*4)(X20), X12 // c = H2
MOVWU (3*4)(X20), X13 // d = H3
MOVWU (4*4)(X20), X14 // e = H4
MOV $·_K(SB), X21
MOVW (0*4)(X21), X15
MOVW (1*4)(X21), X16
MOVW (2*4)(X21), X17
MOVW (3*4)(X21), X18
loop:
MOVW X10, X22
MOVW X11, X23
MOVW X12, X24
MOVW X13, X25
MOVW X14, X26
ROUND1(X10, X11, X12, X13, X14, 0)
ROUND1(X14, X10, X11, X12, X13, 1)
ROUND1(X13, X14, X10, X11, X12, 2)
ROUND1(X12, X13, X14, X10, X11, 3)
ROUND1(X11, X12, X13, X14, X10, 4)
ROUND1(X10, X11, X12, X13, X14, 5)
ROUND1(X14, X10, X11, X12, X13, 6)
ROUND1(X13, X14, X10, X11, X12, 7)
ROUND1(X12, X13, X14, X10, X11, 8)
ROUND1(X11, X12, X13, X14, X10, 9)
ROUND1(X10, X11, X12, X13, X14, 10)
ROUND1(X14, X10, X11, X12, X13, 11)
ROUND1(X13, X14, X10, X11, X12, 12)
ROUND1(X12, X13, X14, X10, X11, 13)
ROUND1(X11, X12, X13, X14, X10, 14)
ROUND1(X10, X11, X12, X13, X14, 15)
ROUND1x(X14, X10, X11, X12, X13, 16)
ROUND1x(X13, X14, X10, X11, X12, 17)
ROUND1x(X12, X13, X14, X10, X11, 18)
ROUND1x(X11, X12, X13, X14, X10, 19)
ROUND2(X10, X11, X12, X13, X14, 20)
ROUND2(X14, X10, X11, X12, X13, 21)
ROUND2(X13, X14, X10, X11, X12, 22)
ROUND2(X12, X13, X14, X10, X11, 23)
ROUND2(X11, X12, X13, X14, X10, 24)
ROUND2(X10, X11, X12, X13, X14, 25)
ROUND2(X14, X10, X11, X12, X13, 26)
ROUND2(X13, X14, X10, X11, X12, 27)
ROUND2(X12, X13, X14, X10, X11, 28)
ROUND2(X11, X12, X13, X14, X10, 29)
ROUND2(X10, X11, X12, X13, X14, 30)
ROUND2(X14, X10, X11, X12, X13, 31)
ROUND2(X13, X14, X10, X11, X12, 32)
ROUND2(X12, X13, X14, X10, X11, 33)
ROUND2(X11, X12, X13, X14, X10, 34)
ROUND2(X10, X11, X12, X13, X14, 35)
ROUND2(X14, X10, X11, X12, X13, 36)
ROUND2(X13, X14, X10, X11, X12, 37)
ROUND2(X12, X13, X14, X10, X11, 38)
ROUND2(X11, X12, X13, X14, X10, 39)
ROUND3(X10, X11, X12, X13, X14, 40)
ROUND3(X14, X10, X11, X12, X13, 41)
ROUND3(X13, X14, X10, X11, X12, 42)
ROUND3(X12, X13, X14, X10, X11, 43)
ROUND3(X11, X12, X13, X14, X10, 44)
ROUND3(X10, X11, X12, X13, X14, 45)
ROUND3(X14, X10, X11, X12, X13, 46)
ROUND3(X13, X14, X10, X11, X12, 47)
ROUND3(X12, X13, X14, X10, X11, 48)
ROUND3(X11, X12, X13, X14, X10, 49)
ROUND3(X10, X11, X12, X13, X14, 50)
ROUND3(X14, X10, X11, X12, X13, 51)
ROUND3(X13, X14, X10, X11, X12, 52)
ROUND3(X12, X13, X14, X10, X11, 53)
ROUND3(X11, X12, X13, X14, X10, 54)
ROUND3(X10, X11, X12, X13, X14, 55)
ROUND3(X14, X10, X11, X12, X13, 56)
ROUND3(X13, X14, X10, X11, X12, 57)
ROUND3(X12, X13, X14, X10, X11, 58)
ROUND3(X11, X12, X13, X14, X10, 59)
ROUND4(X10, X11, X12, X13, X14, 60)
ROUND4(X14, X10, X11, X12, X13, 61)
ROUND4(X13, X14, X10, X11, X12, 62)
ROUND4(X12, X13, X14, X10, X11, 63)
ROUND4(X11, X12, X13, X14, X10, 64)
ROUND4(X10, X11, X12, X13, X14, 65)
ROUND4(X14, X10, X11, X12, X13, 66)
ROUND4(X13, X14, X10, X11, X12, 67)
ROUND4(X12, X13, X14, X10, X11, 68)
ROUND4(X11, X12, X13, X14, X10, 69)
ROUND4(X10, X11, X12, X13, X14, 70)
ROUND4(X14, X10, X11, X12, X13, 71)
ROUND4(X13, X14, X10, X11, X12, 72)
ROUND4(X12, X13, X14, X10, X11, 73)
ROUND4(X11, X12, X13, X14, X10, 74)
ROUND4(X10, X11, X12, X13, X14, 75)
ROUND4(X14, X10, X11, X12, X13, 76)
ROUND4(X13, X14, X10, X11, X12, 77)
ROUND4(X12, X13, X14, X10, X11, 78)
ROUND4(X11, X12, X13, X14, X10, 79)
ADD X22, X10
ADD X23, X11
ADD X24, X12
ADD X25, X13
ADD X26, X14
ADD $64, X29
BNE X28, X29, loop
end:
MOVW X10, (0*4)(X20)
MOVW X11, (1*4)(X20)
MOVW X12, (2*4)(X20)
MOVW X13, (3*4)(X20)
MOVW X14, (4*4)(X20)
RET
GLOBL ·_K(SB),RODATA,$16
DATA ·_K+0(SB)/4, $0x5A827999
DATA ·_K+4(SB)/4, $0x6ED9EBA1
DATA ·_K+8(SB)/4, $0x8F1BBCDC
DATA ·_K+12(SB)/4, $0xCA62C1D6