crypto/sha1: faster amd64, 386 implementations
-- amd64 --
On a MacBookPro10,2 (Core i5):
benchmark old ns/op new ns/op delta
BenchmarkHash8Bytes 785 592 -24.59%
BenchmarkHash1K 8727 3014 -65.46%
BenchmarkHash8K 64926 20723 -68.08%
benchmark old MB/s new MB/s speedup
BenchmarkHash8Bytes 10.19 13.50 1.32x
BenchmarkHash1K 117.34 339.71 2.90x
BenchmarkHash8K 126.17 395.31 3.13x
For comparison, on the same machine, openssl 0.9.8r reports
its sha1 speed as 341 MB/s for 1K and 404 MB/s for 8K.
On an Intel Xeon E5520:
benchmark old ns/op new ns/op delta
BenchmarkHash8Bytes 984 707 -28.15%
BenchmarkHash1K 11141 3466 -68.89%
BenchmarkHash8K 82435 23411 -71.60%
benchmark old MB/s new MB/s speedup
BenchmarkHash8Bytes 8.13 11.31 1.39x
BenchmarkHash1K 91.91 295.36 3.21x
BenchmarkHash8K 99.37 349.91 3.52x
For comparison, on the same machine, openssl 1.0.1 reports
its sha1 speed as 286 MB/s for 1K and 394 MB/s for 8K.
-- 386 --
On a MacBookPro10,2 (Core i5):
benchmark old ns/op new ns/op delta
BenchmarkHash8Bytes 1041 713 -31.51%
BenchmarkHash1K 15612 3382 -78.34%
BenchmarkHash8K 110152 22733 -79.36%
benchmark old MB/s new MB/s speedup
BenchmarkHash8Bytes 7.68 11.21 1.46x
BenchmarkHash1K 65.59 302.76 4.62x
BenchmarkHash8K 74.37 360.36 4.85x
On an Intel Xeon E5520:
benchmark old ns/op new ns/op delta
BenchmarkHash8Bytes 1221 842 -31.04%
BenchmarkHash1K 14643 4137 -71.75%
BenchmarkHash8K 108722 27394 -74.80%
benchmark old MB/s new MB/s speedup
BenchmarkHash8Bytes 6.55 9.49 1.45x
BenchmarkHash1K 69.93 247.51 3.54x
BenchmarkHash8K 75.35 299.04 3.97x
R=agl, dave
CC=golang-dev
https://golang.org/cl/7763049
diff --git a/src/pkg/crypto/sha1/sha1block.go b/src/pkg/crypto/sha1/sha1block.go
index 1c9507c..92224fc 100644
--- a/src/pkg/crypto/sha1/sha1block.go
+++ b/src/pkg/crypto/sha1/sha1block.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build !amd64,!386
+
// SHA1 block step.
// In its own file so that a faster assembly or C version
// can be substituted easily.
diff --git a/src/pkg/crypto/sha1/sha1block_386.s b/src/pkg/crypto/sha1/sha1block_386.s
new file mode 100644
index 0000000..fbf237b
--- /dev/null
+++ b/src/pkg/crypto/sha1/sha1block_386.s
@@ -0,0 +1,233 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// SHA1 block routine. See sha1block.go for Go equivalent.
+//
+// There are 80 rounds of 4 types:
+// - rounds 0-15 are type 1 and load data (ROUND1 macro).
+// - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
+// - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
+// - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
+// - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
+//
+// Each round loads or shuffles the data, then computes a per-round
+// function of b, c, d, and then mixes the result into and rotates the
+// five registers a, b, c, d, e holding the intermediate results.
+//
+// The register rotation is implemented by rotating the arguments to
+// the round macros instead of by explicit move instructions.
+
+// Like sha1block_amd64.s, but we keep the data and limit pointers on the stack.
+// To free up the word pointer (R10 on amd64, DI here), we add it to e during
+// LOAD/SHUFFLE instead of during MIX.
+//
+// The stack holds the intermediate word array - 16 uint32s - at 0(SP) up to 64(SP).
+// The saved a, b, c, d, e (R11 through R15 on amd64) are at 64(SP) up to 84(SP).
+// The saved limit pointer (DI on amd64) is at 84(SP).
+// The saved data pointer (SI on amd64) is at 88(SP).
+
+#define LOAD(index, e) \
+ MOVL 88(SP), SI; \
+ MOVL (index*4)(SI), DI; \
+ BSWAPL DI; \
+ MOVL DI, (index*4)(SP); \
+ ADDL DI, e
+
+#define SHUFFLE(index, e) \
+ MOVL (((index)&0xf)*4)(SP), DI; \
+ XORL (((index-3)&0xf)*4)(SP), DI; \
+ XORL (((index-8)&0xf)*4)(SP), DI; \
+ XORL (((index-14)&0xf)*4)(SP), DI; \
+ ROLL $1, DI; \
+ MOVL DI, (((index)&0xf)*4)(SP); \
+ ADDL DI, e
+
+#define FUNC1(a, b, c, d, e) \
+ MOVL b, SI; \
+ ANDL c, SI; \
+ MOVL b, DI; \
+ NOTL DI; \
+ ANDL d, DI; \
+ ORL SI, DI
+
+#define FUNC2(a, b, c, d, e) \
+ MOVL b, DI; \
+ XORL c, DI; \
+ XORL d, DI
+
+#define FUNC3(a, b, c, d, e) \
+ MOVL b, SI; \
+ ORL c, SI; \
+ ANDL d, SI; \
+ MOVL b, DI; \
+ ANDL c, DI; \
+ ORL SI, DI
+
+#define FUNC4 FUNC2
+
+#define MIX(a, b, c, d, e, const) \
+ ROLL $30, b; \
+ ADDL DI, e; \
+ MOVL a, SI; \
+ ROLL $5, SI; \
+ LEAL const(e)(SI*1), e
+
+#define ROUND1(a, b, c, d, e, index) \
+ LOAD(index, e); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND1x(a, b, c, d, e, index) \
+ SHUFFLE(index, e); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND2(a, b, c, d, e, index) \
+ SHUFFLE(index, e); \
+ FUNC2(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x6ED9EBA1)
+
+#define ROUND3(a, b, c, d, e, index) \
+ SHUFFLE(index, e); \
+ FUNC3(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x8F1BBCDC)
+
+#define ROUND4(a, b, c, d, e, index) \
+ SHUFFLE(index, e); \
+ FUNC4(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0xCA62C1D6)
+
+// func block(dig *digest, p []byte)
+TEXT ·block(SB),7,$92-16
+ MOVL dig+0(FP), BP
+ MOVL p+4(FP), SI
+ MOVL n+8(FP), DX
+ SHRL $6, DX
+ SHLL $6, DX
+
+ LEAL (SI)(DX*1), DI
+ MOVL (0*4)(BP), AX
+ MOVL (1*4)(BP), BX
+ MOVL (2*4)(BP), CX
+ MOVL (3*4)(BP), DX
+ MOVL (4*4)(BP), BP
+
+ CMPL SI, DI
+ JEQ end
+
+ MOVL DI, 84(SP)
+
+loop:
+ MOVL SI, 88(SP)
+
+ MOVL AX, 64(SP)
+ MOVL BX, 68(SP)
+ MOVL CX, 72(SP)
+ MOVL DX, 76(SP)
+ MOVL BP, 80(SP)
+
+ ROUND1(AX, BX, CX, DX, BP, 0)
+ ROUND1(BP, AX, BX, CX, DX, 1)
+ ROUND1(DX, BP, AX, BX, CX, 2)
+ ROUND1(CX, DX, BP, AX, BX, 3)
+ ROUND1(BX, CX, DX, BP, AX, 4)
+ ROUND1(AX, BX, CX, DX, BP, 5)
+ ROUND1(BP, AX, BX, CX, DX, 6)
+ ROUND1(DX, BP, AX, BX, CX, 7)
+ ROUND1(CX, DX, BP, AX, BX, 8)
+ ROUND1(BX, CX, DX, BP, AX, 9)
+ ROUND1(AX, BX, CX, DX, BP, 10)
+ ROUND1(BP, AX, BX, CX, DX, 11)
+ ROUND1(DX, BP, AX, BX, CX, 12)
+ ROUND1(CX, DX, BP, AX, BX, 13)
+ ROUND1(BX, CX, DX, BP, AX, 14)
+ ROUND1(AX, BX, CX, DX, BP, 15)
+
+ ROUND1x(BP, AX, BX, CX, DX, 16)
+ ROUND1x(DX, BP, AX, BX, CX, 17)
+ ROUND1x(CX, DX, BP, AX, BX, 18)
+ ROUND1x(BX, CX, DX, BP, AX, 19)
+
+ ROUND2(AX, BX, CX, DX, BP, 20)
+ ROUND2(BP, AX, BX, CX, DX, 21)
+ ROUND2(DX, BP, AX, BX, CX, 22)
+ ROUND2(CX, DX, BP, AX, BX, 23)
+ ROUND2(BX, CX, DX, BP, AX, 24)
+ ROUND2(AX, BX, CX, DX, BP, 25)
+ ROUND2(BP, AX, BX, CX, DX, 26)
+ ROUND2(DX, BP, AX, BX, CX, 27)
+ ROUND2(CX, DX, BP, AX, BX, 28)
+ ROUND2(BX, CX, DX, BP, AX, 29)
+ ROUND2(AX, BX, CX, DX, BP, 30)
+ ROUND2(BP, AX, BX, CX, DX, 31)
+ ROUND2(DX, BP, AX, BX, CX, 32)
+ ROUND2(CX, DX, BP, AX, BX, 33)
+ ROUND2(BX, CX, DX, BP, AX, 34)
+ ROUND2(AX, BX, CX, DX, BP, 35)
+ ROUND2(BP, AX, BX, CX, DX, 36)
+ ROUND2(DX, BP, AX, BX, CX, 37)
+ ROUND2(CX, DX, BP, AX, BX, 38)
+ ROUND2(BX, CX, DX, BP, AX, 39)
+
+ ROUND3(AX, BX, CX, DX, BP, 40)
+ ROUND3(BP, AX, BX, CX, DX, 41)
+ ROUND3(DX, BP, AX, BX, CX, 42)
+ ROUND3(CX, DX, BP, AX, BX, 43)
+ ROUND3(BX, CX, DX, BP, AX, 44)
+ ROUND3(AX, BX, CX, DX, BP, 45)
+ ROUND3(BP, AX, BX, CX, DX, 46)
+ ROUND3(DX, BP, AX, BX, CX, 47)
+ ROUND3(CX, DX, BP, AX, BX, 48)
+ ROUND3(BX, CX, DX, BP, AX, 49)
+ ROUND3(AX, BX, CX, DX, BP, 50)
+ ROUND3(BP, AX, BX, CX, DX, 51)
+ ROUND3(DX, BP, AX, BX, CX, 52)
+ ROUND3(CX, DX, BP, AX, BX, 53)
+ ROUND3(BX, CX, DX, BP, AX, 54)
+ ROUND3(AX, BX, CX, DX, BP, 55)
+ ROUND3(BP, AX, BX, CX, DX, 56)
+ ROUND3(DX, BP, AX, BX, CX, 57)
+ ROUND3(CX, DX, BP, AX, BX, 58)
+ ROUND3(BX, CX, DX, BP, AX, 59)
+
+ ROUND4(AX, BX, CX, DX, BP, 60)
+ ROUND4(BP, AX, BX, CX, DX, 61)
+ ROUND4(DX, BP, AX, BX, CX, 62)
+ ROUND4(CX, DX, BP, AX, BX, 63)
+ ROUND4(BX, CX, DX, BP, AX, 64)
+ ROUND4(AX, BX, CX, DX, BP, 65)
+ ROUND4(BP, AX, BX, CX, DX, 66)
+ ROUND4(DX, BP, AX, BX, CX, 67)
+ ROUND4(CX, DX, BP, AX, BX, 68)
+ ROUND4(BX, CX, DX, BP, AX, 69)
+ ROUND4(AX, BX, CX, DX, BP, 70)
+ ROUND4(BP, AX, BX, CX, DX, 71)
+ ROUND4(DX, BP, AX, BX, CX, 72)
+ ROUND4(CX, DX, BP, AX, BX, 73)
+ ROUND4(BX, CX, DX, BP, AX, 74)
+ ROUND4(AX, BX, CX, DX, BP, 75)
+ ROUND4(BP, AX, BX, CX, DX, 76)
+ ROUND4(DX, BP, AX, BX, CX, 77)
+ ROUND4(CX, DX, BP, AX, BX, 78)
+ ROUND4(BX, CX, DX, BP, AX, 79)
+
+ ADDL 64(SP), AX
+ ADDL 68(SP), BX
+ ADDL 72(SP), CX
+ ADDL 76(SP), DX
+ ADDL 80(SP), BP
+
+ MOVL 88(SP), SI
+ ADDL $64, SI
+ CMPL SI, 84(SP)
+ JB loop
+
+end:
+ MOVL dig+0(FP), DI
+ MOVL AX, (0*4)(DI)
+ MOVL BX, (1*4)(DI)
+ MOVL CX, (2*4)(DI)
+ MOVL DX, (3*4)(DI)
+ MOVL BP, (4*4)(DI)
+ RET
diff --git a/src/pkg/crypto/sha1/sha1block_amd64.s b/src/pkg/crypto/sha1/sha1block_amd64.s
new file mode 100644
index 0000000..e2b286a
--- /dev/null
+++ b/src/pkg/crypto/sha1/sha1block_amd64.s
@@ -0,0 +1,216 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// SHA1 block routine. See sha1block.go for Go equivalent.
+//
+// There are 80 rounds of 4 types:
+// - rounds 0-15 are type 1 and load data (ROUND1 macro).
+// - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
+// - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
+// - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
+// - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
+//
+// Each round loads or shuffles the data, then computes a per-round
+// function of b, c, d, and then mixes the result into and rotates the
+// five registers a, b, c, d, e holding the intermediate results.
+//
+// The register rotation is implemented by rotating the arguments to
+// the round macros instead of by explicit move instructions.
+
+#define LOAD(index) \
+ MOVL (index*4)(SI), R10; \
+ BSWAPL R10; \
+ MOVL R10, (index*4)(SP)
+
+#define SHUFFLE(index) \
+ MOVL (((index)&0xf)*4)(SP), R10; \
+ XORL (((index-3)&0xf)*4)(SP), R10; \
+ XORL (((index-8)&0xf)*4)(SP), R10; \
+ XORL (((index-14)&0xf)*4)(SP), R10; \
+ ROLL $1, R10; \
+ MOVL R10, (((index)&0xf)*4)(SP)
+
+#define FUNC1(a, b, c, d, e) \
+ MOVL b, R8; \
+ ANDL c, R8; \
+ MOVL b, R9; \
+ NOTL R9; \
+ ANDL d, R9; \
+ ORL R8, R9
+
+#define FUNC2(a, b, c, d, e) \
+ MOVL b, R9; \
+ XORL c, R9; \
+ XORL d, R9
+
+#define FUNC3(a, b, c, d, e) \
+ MOVL b, R8; \
+ ORL c, R8; \
+ ANDL d, R8; \
+ MOVL b, R9; \
+ ANDL c, R9; \
+ ORL R8, R9
+
+#define FUNC4 FUNC2
+
+#define MIX(a, b, c, d, e, const) \
+ ROLL $30, b; \
+ ADDL R9, e; \
+ MOVL a, R8; \
+ ROLL $5, R8; \
+ LEAL const(e)(R10*1), e; \
+ ADDL R8, e
+
+#define ROUND1(a, b, c, d, e, index) \
+ LOAD(index); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND1x(a, b, c, d, e, index) \
+ SHUFFLE(index); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND2(a, b, c, d, e, index) \
+ SHUFFLE(index); \
+ FUNC2(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x6ED9EBA1)
+
+#define ROUND3(a, b, c, d, e, index) \
+ SHUFFLE(index); \
+ FUNC3(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0x8F1BBCDC)
+
+#define ROUND4(a, b, c, d, e, index) \
+ SHUFFLE(index); \
+ FUNC4(a, b, c, d, e); \
+ MIX(a, b, c, d, e, 0xCA62C1D6)
+
+TEXT ·block(SB),7,$64-32
+ MOVQ dig+0(FP), BP
+ MOVQ p+8(FP), SI
+ MOVQ n+16(FP), DX
+ SHRQ $6, DX
+ SHLQ $6, DX
+
+ LEAQ (SI)(DX*1), DI
+ MOVL (0*4)(BP), AX
+ MOVL (1*4)(BP), BX
+ MOVL (2*4)(BP), CX
+ MOVL (3*4)(BP), DX
+ MOVL (4*4)(BP), BP
+
+ CMPQ SI, DI
+ JEQ end
+
+loop:
+ MOVL AX, R11
+ MOVL BX, R12
+ MOVL CX, R13
+ MOVL DX, R14
+ MOVL BP, R15
+
+ ROUND1(AX, BX, CX, DX, BP, 0)
+ ROUND1(BP, AX, BX, CX, DX, 1)
+ ROUND1(DX, BP, AX, BX, CX, 2)
+ ROUND1(CX, DX, BP, AX, BX, 3)
+ ROUND1(BX, CX, DX, BP, AX, 4)
+ ROUND1(AX, BX, CX, DX, BP, 5)
+ ROUND1(BP, AX, BX, CX, DX, 6)
+ ROUND1(DX, BP, AX, BX, CX, 7)
+ ROUND1(CX, DX, BP, AX, BX, 8)
+ ROUND1(BX, CX, DX, BP, AX, 9)
+ ROUND1(AX, BX, CX, DX, BP, 10)
+ ROUND1(BP, AX, BX, CX, DX, 11)
+ ROUND1(DX, BP, AX, BX, CX, 12)
+ ROUND1(CX, DX, BP, AX, BX, 13)
+ ROUND1(BX, CX, DX, BP, AX, 14)
+ ROUND1(AX, BX, CX, DX, BP, 15)
+
+ ROUND1x(BP, AX, BX, CX, DX, 16)
+ ROUND1x(DX, BP, AX, BX, CX, 17)
+ ROUND1x(CX, DX, BP, AX, BX, 18)
+ ROUND1x(BX, CX, DX, BP, AX, 19)
+
+ ROUND2(AX, BX, CX, DX, BP, 20)
+ ROUND2(BP, AX, BX, CX, DX, 21)
+ ROUND2(DX, BP, AX, BX, CX, 22)
+ ROUND2(CX, DX, BP, AX, BX, 23)
+ ROUND2(BX, CX, DX, BP, AX, 24)
+ ROUND2(AX, BX, CX, DX, BP, 25)
+ ROUND2(BP, AX, BX, CX, DX, 26)
+ ROUND2(DX, BP, AX, BX, CX, 27)
+ ROUND2(CX, DX, BP, AX, BX, 28)
+ ROUND2(BX, CX, DX, BP, AX, 29)
+ ROUND2(AX, BX, CX, DX, BP, 30)
+ ROUND2(BP, AX, BX, CX, DX, 31)
+ ROUND2(DX, BP, AX, BX, CX, 32)
+ ROUND2(CX, DX, BP, AX, BX, 33)
+ ROUND2(BX, CX, DX, BP, AX, 34)
+ ROUND2(AX, BX, CX, DX, BP, 35)
+ ROUND2(BP, AX, BX, CX, DX, 36)
+ ROUND2(DX, BP, AX, BX, CX, 37)
+ ROUND2(CX, DX, BP, AX, BX, 38)
+ ROUND2(BX, CX, DX, BP, AX, 39)
+
+ ROUND3(AX, BX, CX, DX, BP, 40)
+ ROUND3(BP, AX, BX, CX, DX, 41)
+ ROUND3(DX, BP, AX, BX, CX, 42)
+ ROUND3(CX, DX, BP, AX, BX, 43)
+ ROUND3(BX, CX, DX, BP, AX, 44)
+ ROUND3(AX, BX, CX, DX, BP, 45)
+ ROUND3(BP, AX, BX, CX, DX, 46)
+ ROUND3(DX, BP, AX, BX, CX, 47)
+ ROUND3(CX, DX, BP, AX, BX, 48)
+ ROUND3(BX, CX, DX, BP, AX, 49)
+ ROUND3(AX, BX, CX, DX, BP, 50)
+ ROUND3(BP, AX, BX, CX, DX, 51)
+ ROUND3(DX, BP, AX, BX, CX, 52)
+ ROUND3(CX, DX, BP, AX, BX, 53)
+ ROUND3(BX, CX, DX, BP, AX, 54)
+ ROUND3(AX, BX, CX, DX, BP, 55)
+ ROUND3(BP, AX, BX, CX, DX, 56)
+ ROUND3(DX, BP, AX, BX, CX, 57)
+ ROUND3(CX, DX, BP, AX, BX, 58)
+ ROUND3(BX, CX, DX, BP, AX, 59)
+
+ ROUND4(AX, BX, CX, DX, BP, 60)
+ ROUND4(BP, AX, BX, CX, DX, 61)
+ ROUND4(DX, BP, AX, BX, CX, 62)
+ ROUND4(CX, DX, BP, AX, BX, 63)
+ ROUND4(BX, CX, DX, BP, AX, 64)
+ ROUND4(AX, BX, CX, DX, BP, 65)
+ ROUND4(BP, AX, BX, CX, DX, 66)
+ ROUND4(DX, BP, AX, BX, CX, 67)
+ ROUND4(CX, DX, BP, AX, BX, 68)
+ ROUND4(BX, CX, DX, BP, AX, 69)
+ ROUND4(AX, BX, CX, DX, BP, 70)
+ ROUND4(BP, AX, BX, CX, DX, 71)
+ ROUND4(DX, BP, AX, BX, CX, 72)
+ ROUND4(CX, DX, BP, AX, BX, 73)
+ ROUND4(BX, CX, DX, BP, AX, 74)
+ ROUND4(AX, BX, CX, DX, BP, 75)
+ ROUND4(BP, AX, BX, CX, DX, 76)
+ ROUND4(DX, BP, AX, BX, CX, 77)
+ ROUND4(CX, DX, BP, AX, BX, 78)
+ ROUND4(BX, CX, DX, BP, AX, 79)
+
+ ADDL R11, AX
+ ADDL R12, BX
+ ADDL R13, CX
+ ADDL R14, DX
+ ADDL R15, BP
+
+ ADDQ $64, SI
+ CMPQ SI, DI
+ JB loop
+
+end:
+ MOVQ dig+0(FP), DI
+ MOVL AX, (0*4)(DI)
+ MOVL BX, (1*4)(DI)
+ MOVL CX, (2*4)(DI)
+ MOVL DX, (3*4)(DI)
+ MOVL BP, (4*4)(DI)
+ RET
diff --git a/src/pkg/crypto/sha1/sha1block_decl.go b/src/pkg/crypto/sha1/sha1block_decl.go
new file mode 100644
index 0000000..348a6aa
--- /dev/null
+++ b/src/pkg/crypto/sha1/sha1block_decl.go
@@ -0,0 +1,9 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 386
+
+package sha1
+
+func block(*digest, []byte)