| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "textflag.h" |
| |
| // SHA1 block routine. See sha1block.go for Go equivalent. |
| // |
| // There are 80 rounds of 4 types: |
| // - rounds 0-15 are type 1 and load data (ROUND1 macro). |
| // - rounds 16-19 are type 1 and do not load data (ROUND1x macro). |
| // - rounds 20-39 are type 2 and do not load data (ROUND2 macro). |
| // - rounds 40-59 are type 3 and do not load data (ROUND3 macro). |
| // - rounds 60-79 are type 4 and do not load data (ROUND4 macro). |
| // |
| // Each round loads or shuffles the data, then computes a per-round |
| // function of b, c, d, and then mixes the result into and rotates the |
| // five registers a, b, c, d, e holding the intermediate results. |
| // |
| // The register rotation is implemented by rotating the arguments to |
| // the round macros instead of by explicit move instructions. |
| // |
| // amd64p32 version. |
| // To ensure safety for Native Client, avoids use of BP and R15 |
| // as well as two-register addressing modes. |
| |
| #define LOAD(index) \ |
| MOVL (index*4)(SI), R10; \ |
| BSWAPL R10; \ |
| MOVL R10, (index*4)(SP) |
| |
| #define SHUFFLE(index) \ |
| MOVL (((index)&0xf)*4)(SP), R10; \ |
| XORL (((index-3)&0xf)*4)(SP), R10; \ |
| XORL (((index-8)&0xf)*4)(SP), R10; \ |
| XORL (((index-14)&0xf)*4)(SP), R10; \ |
| ROLL $1, R10; \ |
| MOVL R10, (((index)&0xf)*4)(SP) |
| |
| #define FUNC1(a, b, c, d, e) \ |
| MOVL d, R9; \ |
| XORL c, R9; \ |
| ANDL b, R9; \ |
| XORL d, R9 |
| |
| #define FUNC2(a, b, c, d, e) \ |
| MOVL b, R9; \ |
| XORL c, R9; \ |
| XORL d, R9 |
| |
| #define FUNC3(a, b, c, d, e) \ |
| MOVL b, R8; \ |
| ORL c, R8; \ |
| ANDL d, R8; \ |
| MOVL b, R9; \ |
| ANDL c, R9; \ |
| ORL R8, R9 |
| |
| #define FUNC4 FUNC2 |
| |
| #define MIX(a, b, c, d, e, const) \ |
| ROLL $30, b; \ |
| ADDL R9, e; \ |
| MOVL a, R8; \ |
| ROLL $5, R8; \ |
| LEAL const(e)(R10*1), e; \ |
| ADDL R8, e |
| |
| #define ROUND1(a, b, c, d, e, index) \ |
| LOAD(index); \ |
| FUNC1(a, b, c, d, e); \ |
| MIX(a, b, c, d, e, 0x5A827999) |
| |
| #define ROUND1x(a, b, c, d, e, index) \ |
| SHUFFLE(index); \ |
| FUNC1(a, b, c, d, e); \ |
| MIX(a, b, c, d, e, 0x5A827999) |
| |
| #define ROUND2(a, b, c, d, e, index) \ |
| SHUFFLE(index); \ |
| FUNC2(a, b, c, d, e); \ |
| MIX(a, b, c, d, e, 0x6ED9EBA1) |
| |
| #define ROUND3(a, b, c, d, e, index) \ |
| SHUFFLE(index); \ |
| FUNC3(a, b, c, d, e); \ |
| MIX(a, b, c, d, e, 0x8F1BBCDC) |
| |
| #define ROUND4(a, b, c, d, e, index) \ |
| SHUFFLE(index); \ |
| FUNC4(a, b, c, d, e); \ |
| MIX(a, b, c, d, e, 0xCA62C1D6) |
| |
| TEXT ·block(SB),NOSPLIT,$64-32 |
| MOVL dig+0(FP), R14 |
| MOVL p_base+4(FP), SI |
| MOVL p_len+8(FP), DX |
| SHRQ $6, DX |
| SHLQ $6, DX |
| |
| LEAQ (SI)(DX*1), DI |
| MOVL (0*4)(R14), AX |
| MOVL (1*4)(R14), BX |
| MOVL (2*4)(R14), CX |
| MOVL (3*4)(R14), DX |
| MOVL (4*4)(R14), R13 |
| |
| CMPQ SI, DI |
| JEQ end |
| |
| loop: |
| #define BP R13 /* keep diff from sha1block_amd64.s small */ |
| ROUND1(AX, BX, CX, DX, BP, 0) |
| ROUND1(BP, AX, BX, CX, DX, 1) |
| ROUND1(DX, BP, AX, BX, CX, 2) |
| ROUND1(CX, DX, BP, AX, BX, 3) |
| ROUND1(BX, CX, DX, BP, AX, 4) |
| ROUND1(AX, BX, CX, DX, BP, 5) |
| ROUND1(BP, AX, BX, CX, DX, 6) |
| ROUND1(DX, BP, AX, BX, CX, 7) |
| ROUND1(CX, DX, BP, AX, BX, 8) |
| ROUND1(BX, CX, DX, BP, AX, 9) |
| ROUND1(AX, BX, CX, DX, BP, 10) |
| ROUND1(BP, AX, BX, CX, DX, 11) |
| ROUND1(DX, BP, AX, BX, CX, 12) |
| ROUND1(CX, DX, BP, AX, BX, 13) |
| ROUND1(BX, CX, DX, BP, AX, 14) |
| ROUND1(AX, BX, CX, DX, BP, 15) |
| |
| ROUND1x(BP, AX, BX, CX, DX, 16) |
| ROUND1x(DX, BP, AX, BX, CX, 17) |
| ROUND1x(CX, DX, BP, AX, BX, 18) |
| ROUND1x(BX, CX, DX, BP, AX, 19) |
| |
| ROUND2(AX, BX, CX, DX, BP, 20) |
| ROUND2(BP, AX, BX, CX, DX, 21) |
| ROUND2(DX, BP, AX, BX, CX, 22) |
| ROUND2(CX, DX, BP, AX, BX, 23) |
| ROUND2(BX, CX, DX, BP, AX, 24) |
| ROUND2(AX, BX, CX, DX, BP, 25) |
| ROUND2(BP, AX, BX, CX, DX, 26) |
| ROUND2(DX, BP, AX, BX, CX, 27) |
| ROUND2(CX, DX, BP, AX, BX, 28) |
| ROUND2(BX, CX, DX, BP, AX, 29) |
| ROUND2(AX, BX, CX, DX, BP, 30) |
| ROUND2(BP, AX, BX, CX, DX, 31) |
| ROUND2(DX, BP, AX, BX, CX, 32) |
| ROUND2(CX, DX, BP, AX, BX, 33) |
| ROUND2(BX, CX, DX, BP, AX, 34) |
| ROUND2(AX, BX, CX, DX, BP, 35) |
| ROUND2(BP, AX, BX, CX, DX, 36) |
| ROUND2(DX, BP, AX, BX, CX, 37) |
| ROUND2(CX, DX, BP, AX, BX, 38) |
| ROUND2(BX, CX, DX, BP, AX, 39) |
| |
| ROUND3(AX, BX, CX, DX, BP, 40) |
| ROUND3(BP, AX, BX, CX, DX, 41) |
| ROUND3(DX, BP, AX, BX, CX, 42) |
| ROUND3(CX, DX, BP, AX, BX, 43) |
| ROUND3(BX, CX, DX, BP, AX, 44) |
| ROUND3(AX, BX, CX, DX, BP, 45) |
| ROUND3(BP, AX, BX, CX, DX, 46) |
| ROUND3(DX, BP, AX, BX, CX, 47) |
| ROUND3(CX, DX, BP, AX, BX, 48) |
| ROUND3(BX, CX, DX, BP, AX, 49) |
| ROUND3(AX, BX, CX, DX, BP, 50) |
| ROUND3(BP, AX, BX, CX, DX, 51) |
| ROUND3(DX, BP, AX, BX, CX, 52) |
| ROUND3(CX, DX, BP, AX, BX, 53) |
| ROUND3(BX, CX, DX, BP, AX, 54) |
| ROUND3(AX, BX, CX, DX, BP, 55) |
| ROUND3(BP, AX, BX, CX, DX, 56) |
| ROUND3(DX, BP, AX, BX, CX, 57) |
| ROUND3(CX, DX, BP, AX, BX, 58) |
| ROUND3(BX, CX, DX, BP, AX, 59) |
| |
| ROUND4(AX, BX, CX, DX, BP, 60) |
| ROUND4(BP, AX, BX, CX, DX, 61) |
| ROUND4(DX, BP, AX, BX, CX, 62) |
| ROUND4(CX, DX, BP, AX, BX, 63) |
| ROUND4(BX, CX, DX, BP, AX, 64) |
| ROUND4(AX, BX, CX, DX, BP, 65) |
| ROUND4(BP, AX, BX, CX, DX, 66) |
| ROUND4(DX, BP, AX, BX, CX, 67) |
| ROUND4(CX, DX, BP, AX, BX, 68) |
| ROUND4(BX, CX, DX, BP, AX, 69) |
| ROUND4(AX, BX, CX, DX, BP, 70) |
| ROUND4(BP, AX, BX, CX, DX, 71) |
| ROUND4(DX, BP, AX, BX, CX, 72) |
| ROUND4(CX, DX, BP, AX, BX, 73) |
| ROUND4(BX, CX, DX, BP, AX, 74) |
| ROUND4(AX, BX, CX, DX, BP, 75) |
| ROUND4(BP, AX, BX, CX, DX, 76) |
| ROUND4(DX, BP, AX, BX, CX, 77) |
| ROUND4(CX, DX, BP, AX, BX, 78) |
| ROUND4(BX, CX, DX, BP, AX, 79) |
| #undef BP |
| |
| ADDL (0*4)(R14), AX |
| ADDL (1*4)(R14), BX |
| ADDL (2*4)(R14), CX |
| ADDL (3*4)(R14), DX |
| ADDL (4*4)(R14), R13 |
| |
| MOVL AX, (0*4)(R14) |
| MOVL BX, (1*4)(R14) |
| MOVL CX, (2*4)(R14) |
| MOVL DX, (3*4)(R14) |
| MOVL R13, (4*4)(R14) |
| |
| ADDQ $64, SI |
| CMPQ SI, DI |
| JB loop |
| |
| end: |
| RET |