sha3: add optimized implementation for s390x
Message-security-assist extension 6 adds support for the SHA-3 and
SHAKE algorithms. This CL allows the sha3 package to use these new
features.
name old speed new speed delta
PermutationFunction 328MB/s ± 0% 385MB/s ± 0% +17.28% (p=0.000 n=9+10)
Sha3_512_MTU 108MB/s ± 0% 2011MB/s ± 0% +1768.56% (p=0.000 n=10+10)
Sha3_384_MTU 149MB/s ± 0% 2437MB/s ± 0% +1534.22% (p=0.000 n=10+10)
Sha3_256_MTU 185MB/s ± 0% 2739MB/s ± 0% +1379.93% (p=0.000 n=10+10)
Sha3_224_MTU 195MB/s ± 0% 2782MB/s ± 0% +1326.05% (p=0.000 n=10+10)
Shake128_MTU 225MB/s ± 0% 4436MB/s ± 0% +1873.18% (p=0.000 n=9+9)
Shake256_MTU 209MB/s ± 0% 4521MB/s ± 0% +2059.86% (p=0.000 n=8+10)
Shake256_16x 188MB/s ± 0% 1366MB/s ± 0% +624.70% (p=0.000 n=9+10)
Shake256_1MiB 212MB/s ± 0% 5861MB/s ± 0% +2666.67% (p=0.000 n=10+10)
Sha3_512_1MiB 116MB/s ± 0% 4328MB/s ± 0% +3628.33% (p=0.000 n=10+10)
Change-Id: I8ebc503ca2b9eda2ebb361dffdbfe79dd97e1975
Reviewed-on: https://go-review.googlesource.com/59391
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/sha3/hashes.go b/sha3/hashes.go
index 2b51cf4..f35a42f 100644
--- a/sha3/hashes.go
+++ b/sha3/hashes.go
@@ -15,22 +15,42 @@
// New224 creates a new SHA3-224 hash.
// Its generic security strength is 224 bits against preimage attacks,
// and 112 bits against collision attacks.
-func New224() hash.Hash { return &state{rate: 144, outputLen: 28, dsbyte: 0x06} }
+func New224() hash.Hash {
+ if h := new224Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 144, outputLen: 28, dsbyte: 0x06}
+}
// New256 creates a new SHA3-256 hash.
// Its generic security strength is 256 bits against preimage attacks,
// and 128 bits against collision attacks.
-func New256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x06} }
+func New256() hash.Hash {
+ if h := new256Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 136, outputLen: 32, dsbyte: 0x06}
+}
// New384 creates a new SHA3-384 hash.
// Its generic security strength is 384 bits against preimage attacks,
// and 192 bits against collision attacks.
-func New384() hash.Hash { return &state{rate: 104, outputLen: 48, dsbyte: 0x06} }
+func New384() hash.Hash {
+ if h := new384Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 104, outputLen: 48, dsbyte: 0x06}
+}
// New512 creates a new SHA3-512 hash.
// Its generic security strength is 512 bits against preimage attacks,
// and 256 bits against collision attacks.
-func New512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x06} }
+func New512() hash.Hash {
+ if h := new512Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 72, outputLen: 64, dsbyte: 0x06}
+}
// Sum224 returns the SHA3-224 digest of the data.
func Sum224(data []byte) (digest [28]byte) {
diff --git a/sha3/hashes_generic.go b/sha3/hashes_generic.go
new file mode 100644
index 0000000..c4ff3f6
--- /dev/null
+++ b/sha3/hashes_generic.go
@@ -0,0 +1,27 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build gccgo appengine !s390x
+
+package sha3
+
+import (
+ "hash"
+)
+
+// new224Asm returns an assembly implementation of SHA3-224 if available,
+// otherwise it returns nil.
+func new224Asm() hash.Hash { return nil }
+
+// new256Asm returns an assembly implementation of SHA3-256 if available,
+// otherwise it returns nil.
+func new256Asm() hash.Hash { return nil }
+
+// new384Asm returns an assembly implementation of SHA3-384 if available,
+// otherwise it returns nil.
+func new384Asm() hash.Hash { return nil }
+
+// new512Asm returns an assembly implementation of SHA3-512 if available,
+// otherwise it returns nil.
+func new512Asm() hash.Hash { return nil }
diff --git a/sha3/sha3_s390x.go b/sha3/sha3_s390x.go
new file mode 100644
index 0000000..f1fb79c
--- /dev/null
+++ b/sha3/sha3_s390x.go
@@ -0,0 +1,289 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !gccgo,!appengine
+
+package sha3
+
+// This file contains code for using the 'compute intermediate
+// message digest' (KIMD) and 'compute last message digest' (KLMD)
+// instructions to compute SHA-3 and SHAKE hashes on IBM Z.
+
+import (
+ "hash"
+)
+
+// codes represent 7-bit KIMD/KLMD function codes as defined in
+// the Principles of Operation.
+type code uint64
+
+const (
+ // function codes for KIMD/KLMD
+ sha3_224 code = 32
+ sha3_256 = 33
+ sha3_384 = 34
+ sha3_512 = 35
+ shake_128 = 36
+ shake_256 = 37
+ nopad = 0x100
+)
+
+// hasMSA6 reports whether the machine supports the SHA-3 and SHAKE function
+// codes, as defined in message-security-assist extension 6.
+func hasMSA6() bool
+
+// hasAsm caches the result of hasMSA6 (which might be expensive to call).
+var hasAsm = hasMSA6()
+
+// kimd is a wrapper for the 'compute intermediate message digest' instruction.
+// src must be a multiple of the rate for the given function code.
+//go:noescape
+func kimd(function code, chain *[200]byte, src []byte)
+
+// klmd is a wrapper for the 'compute last message digest' instruction.
+// src padding is handled by the instruction.
+//go:noescape
+func klmd(function code, chain *[200]byte, dst, src []byte)
+
+type asmState struct {
+ a [200]byte // 1600 bit state
+ buf []byte // care must be taken to ensure cap(buf) is a multiple of rate
+ rate int // equivalent to block size
+ storage [3072]byte // underlying storage for buf
+ outputLen int // output length if fixed, 0 if not
+ function code // KIMD/KLMD function code
+ state spongeDirection // whether the sponge is absorbing or squeezing
+}
+
+func newAsmState(function code) *asmState {
+ var s asmState
+ s.function = function
+ switch function {
+ case sha3_224:
+ s.rate = 144
+ s.outputLen = 28
+ case sha3_256:
+ s.rate = 136
+ s.outputLen = 32
+ case sha3_384:
+ s.rate = 104
+ s.outputLen = 48
+ case sha3_512:
+ s.rate = 72
+ s.outputLen = 64
+ case shake_128:
+ s.rate = 168
+ case shake_256:
+ s.rate = 136
+ default:
+ panic("sha3: unrecognized function code")
+ }
+
+ // limit s.buf size to a multiple of s.rate
+ s.resetBuf()
+ return &s
+}
+
+func (s *asmState) clone() *asmState {
+ c := *s
+ c.buf = c.storage[:len(s.buf):cap(s.buf)]
+ return &c
+}
+
+// copyIntoBuf copies b into buf. It will panic if there is not enough space to
+// store all of b.
+func (s *asmState) copyIntoBuf(b []byte) {
+ bufLen := len(s.buf)
+ s.buf = s.buf[:len(s.buf)+len(b)]
+ copy(s.buf[bufLen:], b)
+}
+
+// resetBuf points buf at storage, sets the length to 0 and sets cap to be a
+// multiple of the rate.
+func (s *asmState) resetBuf() {
+ max := (cap(s.storage) / s.rate) * s.rate
+ s.buf = s.storage[:0:max]
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (s *asmState) Write(b []byte) (int, error) {
+ if s.state != spongeAbsorbing {
+ panic("sha3: write to sponge after read")
+ }
+ length := len(b)
+ for len(b) > 0 {
+ if len(s.buf) == 0 && len(b) >= cap(s.buf) {
+ // Hash the data directly and push any remaining bytes
+ // into the buffer.
+ remainder := len(s.buf) % s.rate
+ kimd(s.function, &s.a, b[:len(b)-remainder])
+ if remainder != 0 {
+ s.copyIntoBuf(b[len(b)-remainder:])
+ }
+ return length, nil
+ }
+
+ if len(s.buf) == cap(s.buf) {
+ // flush the buffer
+ kimd(s.function, &s.a, s.buf)
+ s.buf = s.buf[:0]
+ }
+
+ // copy as much as we can into the buffer
+ n := len(b)
+ if len(b) > cap(s.buf)-len(s.buf) {
+ n = cap(s.buf) - len(s.buf)
+ }
+ s.copyIntoBuf(b[:n])
+ b = b[n:]
+ }
+ return length, nil
+}
+
+// Read squeezes an arbitrary number of bytes from the sponge.
+func (s *asmState) Read(out []byte) (n int, err error) {
+ n = len(out)
+
+ // need to pad if we were absorbing
+ if s.state == spongeAbsorbing {
+ s.state = spongeSqueezing
+
+ // write hash directly into out if possible
+ if len(out)%s.rate == 0 {
+ klmd(s.function, &s.a, out, s.buf) // len(out) may be 0
+ s.buf = s.buf[:0]
+ return
+ }
+
+ // write hash into buffer
+ max := cap(s.buf)
+ if max > len(out) {
+ max = (len(out)/s.rate)*s.rate + s.rate
+ }
+ klmd(s.function, &s.a, s.buf[:max], s.buf)
+ s.buf = s.buf[:max]
+ }
+
+ for len(out) > 0 {
+ // flush the buffer
+ if len(s.buf) != 0 {
+ c := copy(out, s.buf)
+ out = out[c:]
+ s.buf = s.buf[c:]
+ continue
+ }
+
+ // write hash directly into out if possible
+ if len(out)%s.rate == 0 {
+ klmd(s.function|nopad, &s.a, out, nil)
+ return
+ }
+
+ // write hash into buffer
+ s.resetBuf()
+ if cap(s.buf) > len(out) {
+ s.buf = s.buf[:(len(out)/s.rate)*s.rate+s.rate]
+ }
+ klmd(s.function|nopad, &s.a, s.buf, nil)
+ }
+ return
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (s *asmState) Sum(b []byte) []byte {
+ if s.outputLen == 0 {
+ panic("sha3: cannot call Sum on SHAKE functions")
+ }
+
+ // Copy the state to preserve the original.
+ a := s.a
+
+ // Hash the buffer. Note that we don't clear it because we
+ // aren't updating the state.
+ klmd(s.function, &a, nil, s.buf)
+ return append(b, a[:s.outputLen]...)
+}
+
+// Reset resets the Hash to its initial state.
+func (s *asmState) Reset() {
+ for i := range s.a {
+ s.a[i] = 0
+ }
+ s.resetBuf()
+ s.state = spongeAbsorbing
+}
+
+// Size returns the number of bytes Sum will return.
+func (s *asmState) Size() int {
+ return s.outputLen
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (s *asmState) BlockSize() int {
+ return s.rate
+}
+
+// Clone returns a copy of the ShakeHash in its current state.
+func (s *asmState) Clone() ShakeHash {
+ return s.clone()
+}
+
+// new224Asm returns an assembly implementation of SHA3-224 if available,
+// otherwise it returns nil.
+func new224Asm() hash.Hash {
+ if hasAsm {
+ return newAsmState(sha3_224)
+ }
+ return nil
+}
+
+// new256Asm returns an assembly implementation of SHA3-256 if available,
+// otherwise it returns nil.
+func new256Asm() hash.Hash {
+ if hasAsm {
+ return newAsmState(sha3_256)
+ }
+ return nil
+}
+
+// new384Asm returns an assembly implementation of SHA3-384 if available,
+// otherwise it returns nil.
+func new384Asm() hash.Hash {
+ if hasAsm {
+ return newAsmState(sha3_384)
+ }
+ return nil
+}
+
+// new512Asm returns an assembly implementation of SHA3-512 if available,
+// otherwise it returns nil.
+func new512Asm() hash.Hash {
+ if hasAsm {
+ return newAsmState(sha3_512)
+ }
+ return nil
+}
+
+// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
+// otherwise it returns nil.
+func newShake128Asm() ShakeHash {
+ if hasAsm {
+ return newAsmState(shake_128)
+ }
+ return nil
+}
+
+// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
+// otherwise it returns nil.
+func newShake256Asm() ShakeHash {
+ if hasAsm {
+ return newAsmState(shake_256)
+ }
+ return nil
+}
diff --git a/sha3/sha3_s390x.s b/sha3/sha3_s390x.s
new file mode 100644
index 0000000..20978fc
--- /dev/null
+++ b/sha3/sha3_s390x.s
@@ -0,0 +1,49 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !gccgo,!appengine
+
+#include "textflag.h"
+
+TEXT ·hasMSA6(SB), NOSPLIT, $16-1
+ MOVD $0, R0 // KIMD-Query function code
+ MOVD $tmp-16(SP), R1 // parameter block
+ XC $16, (R1), (R1) // clear the parameter block
+ WORD $0xB93E0002 // KIMD --, --
+ WORD $0x91FC1004 // TM 4(R1), 0xFC (test bits [32-37])
+ BVS yes
+
+no:
+ MOVB $0, ret+0(FP)
+ RET
+
+yes:
+ MOVB $1, ret+0(FP)
+ RET
+
+// func kimd(function code, params *[200]byte, src []byte)
+TEXT ·kimd(SB), NOFRAME|NOSPLIT, $0-40
+ MOVD function+0(FP), R0
+ MOVD params+8(FP), R1
+ LMG src+16(FP), R2, R3 // R2=base, R3=len
+
+continue:
+ WORD $0xB93E0002 // KIMD --, R2
+ BVS continue // continue if interrupted
+ MOVD $0, R0 // reset R0 for pre-go1.8 compilers
+ RET
+
+// func klmd(function code, params *[200]byte, dst, src []byte)
+TEXT ·klmd(SB), NOFRAME|NOSPLIT, $0-64
+ // TODO: SHAKE support
+ MOVD function+0(FP), R0
+ MOVD params+8(FP), R1
+ LMG dst+16(FP), R2, R3 // R2=base, R3=len
+ LMG src+40(FP), R4, R5 // R4=base, R5=len
+
+continue:
+ WORD $0xB93F0024 // KLMD R2, R4
+ BVS continue // continue if interrupted
+ MOVD $0, R0 // reset R0 for pre-go1.8 compilers
+ RET
diff --git a/sha3/shake.go b/sha3/shake.go
index 5a027d2..97c9b06 100644
--- a/sha3/shake.go
+++ b/sha3/shake.go
@@ -38,12 +38,22 @@
// NewShake128 creates a new SHAKE128 variable-output-length ShakeHash.
// Its generic security strength is 128 bits against all attacks if at
// least 32 bytes of its output are used.
-func NewShake128() ShakeHash { return &state{rate: 168, dsbyte: 0x1f} }
+func NewShake128() ShakeHash {
+ if h := newShake128Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 168, dsbyte: 0x1f}
+}
// NewShake256 creates a new SHAKE256 variable-output-length ShakeHash.
// Its generic security strength is 256 bits against all attacks if
// at least 64 bytes of its output are used.
-func NewShake256() ShakeHash { return &state{rate: 136, dsbyte: 0x1f} }
+func NewShake256() ShakeHash {
+ if h := newShake256Asm(); h != nil {
+ return h
+ }
+ return &state{rate: 136, dsbyte: 0x1f}
+}
// ShakeSum128 writes an arbitrary-length digest of data into hash.
func ShakeSum128(hash, data []byte) {
diff --git a/sha3/shake_generic.go b/sha3/shake_generic.go
new file mode 100644
index 0000000..73d0c90
--- /dev/null
+++ b/sha3/shake_generic.go
@@ -0,0 +1,19 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build gccgo appengine !s390x
+
+package sha3
+
+// newShake128Asm returns an assembly implementation of SHAKE-128 if available,
+// otherwise it returns nil.
+func newShake128Asm() ShakeHash {
+ return nil
+}
+
+// newShake256Asm returns an assembly implementation of SHAKE-256 if available,
+// otherwise it returns nil.
+func newShake256Asm() ShakeHash {
+ return nil
+}