poly1305: add (*MAC).Verify API and use it in chacha20poly1305
Also, make sure New inlines so it does not cause an allocation. With
this, we have a zero allocation poly1305 flow and a zero allocation
generic chacha20poly1305 composition! \o/
While at it, remove some redundant code, and prepare to drop some
complexity once the last assembly implementation of sum is dropped.
Benchstat with "-tags purego" on amd64
name old time/op new time/op delta
Open-64-8 461ns ± 2% 415ns ± 1% -9.93% (p=0.000 n=10+8)
Seal-64-8 450ns ± 3% 412ns ± 3% -8.41% (p=0.000 n=10+10)
Open-64-X-8 603ns ± 2% 544ns ± 2% -9.84% (p=0.000 n=10+10)
Seal-64-X-8 580ns ± 3% 553ns ± 1% -4.56% (p=0.000 n=9+8)
Open-1350-8 3.98µs ± 2% 3.65µs ± 2% -8.28% (p=0.000 n=9+10)
Seal-1350-8 3.95µs ± 2% 3.64µs ± 1% -7.93% (p=0.000 n=9+10)
Open-1350-X-8 4.06µs ± 1% 3.68µs ± 3% -9.31% (p=0.000 n=9+10)
Seal-1350-X-8 4.08µs ± 4% 3.64µs ± 1% -10.71% (p=0.000 n=9+10)
Open-8192-8 21.7µs ± 3% 18.9µs ± 2% -13.10% (p=0.000 n=9+10)
Seal-8192-8 21.5µs ± 3% 18.8µs ± 1% -12.51% (p=0.000 n=9+9)
Open-8192-X-8 21.4µs ± 1% 19.1µs ± 2% -10.88% (p=0.000 n=10+10)
Seal-8192-X-8 21.3µs ± 2% 19.0µs ± 3% -10.92% (p=0.000 n=10+10)
name old speed new speed delta
Open-64-8 139MB/s ± 2% 154MB/s ± 2% +11.05% (p=0.000 n=10+8)
Seal-64-8 142MB/s ± 3% 155MB/s ± 3% +9.11% (p=0.000 n=10+10)
Open-64-X-8 106MB/s ± 2% 118MB/s ± 2% +10.93% (p=0.000 n=10+10)
Seal-64-X-8 110MB/s ± 3% 116MB/s ± 1% +4.75% (p=0.000 n=9+8)
Open-1350-8 339MB/s ± 2% 370MB/s ± 2% +9.04% (p=0.000 n=9+10)
Seal-1350-8 342MB/s ± 2% 371MB/s ± 1% +8.60% (p=0.000 n=9+10)
Open-1350-X-8 333MB/s ± 1% 367MB/s ± 3% +10.30% (p=0.000 n=9+10)
Seal-1350-X-8 331MB/s ± 4% 371MB/s ± 2% +11.96% (p=0.000 n=9+10)
Open-8192-8 377MB/s ± 3% 434MB/s ± 2% +15.07% (p=0.000 n=9+10)
Seal-8192-8 381MB/s ± 3% 436MB/s ± 1% +14.29% (p=0.000 n=9+9)
Open-8192-X-8 383MB/s ± 1% 429MB/s ± 2% +12.21% (p=0.000 n=10+10)
Seal-8192-X-8 385MB/s ± 2% 432MB/s ± 3% +12.26% (p=0.000 n=10+10)
name old alloc/op new alloc/op delta
Open-64-8 96.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
Seal-64-8 96.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
Open-64-X-8 96.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
Seal-64-X-8 96.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10)
Open-1350-8 1.41kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Seal-1350-8 1.41kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Open-1350-X-8 1.41kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Seal-1350-X-8 1.41kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Open-8192-8 9.47kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Seal-8192-8 9.47kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Open-8192-X-8 9.47kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
Seal-8192-X-8 9.47kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
Open-64-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-64-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Open-64-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-64-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Open-1350-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-1350-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Open-1350-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-1350-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Open-8192-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-8192-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Open-8192-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Seal-8192-X-8 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)
Change-Id: I2c30ddc960a889b49c8ee8ff8073ffc4e75f43af
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/206977
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Katie Hockman <katie@golang.org>
diff --git a/chacha20poly1305/chacha20poly1305_generic.go b/chacha20poly1305/chacha20poly1305_generic.go
index 91b3856..fe191d3 100644
--- a/chacha20poly1305/chacha20poly1305_generic.go
+++ b/chacha20poly1305/chacha20poly1305_generic.go
@@ -12,56 +12,64 @@
"golang.org/x/crypto/poly1305"
)
-func roundTo16(n int) int {
- return 16 * ((n + 15) / 16)
+func writeWithPadding(p *poly1305.MAC, b []byte) {
+ p.Write(b)
+ if rem := len(b) % 16; rem != 0 {
+ var buf [16]byte
+ padLen := 16 - rem
+ p.Write(buf[:padLen])
+ }
+}
+
+func writeUint64(p *poly1305.MAC, n int) {
+ var buf [8]byte
+ binary.LittleEndian.PutUint64(buf[:], uint64(n))
+ p.Write(buf[:])
}
func (c *chacha20poly1305) sealGeneric(dst, nonce, plaintext, additionalData []byte) []byte {
ret, out := sliceForAppend(dst, len(plaintext)+poly1305.TagSize)
+ ciphertext, tag := out[:len(plaintext)], out[len(plaintext):]
if subtle.InexactOverlap(out, plaintext) {
panic("chacha20poly1305: invalid buffer overlap")
}
- var polyKey, discardBuf [32]byte
+ var polyKey [32]byte
s, _ := chacha20.NewUnauthenticatedCipher(c.key[:], nonce)
s.XORKeyStream(polyKey[:], polyKey[:])
- s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes
- s.XORKeyStream(out, plaintext)
+ s.SetCounter(1) // set the counter to 1, skipping 32 bytes
+ s.XORKeyStream(ciphertext, plaintext)
- polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(plaintext))+8+8)
- copy(polyInput, additionalData)
- copy(polyInput[roundTo16(len(additionalData)):], out[:len(plaintext)])
- binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
- binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(plaintext)))
-
- var tag [poly1305.TagSize]byte
- poly1305.Sum(&tag, polyInput, &polyKey)
- copy(out[len(plaintext):], tag[:])
+ p := poly1305.New(&polyKey)
+ writeWithPadding(p, additionalData)
+ writeWithPadding(p, ciphertext)
+ writeUint64(p, len(additionalData))
+ writeUint64(p, len(plaintext))
+ p.Sum(tag[:0])
return ret
}
func (c *chacha20poly1305) openGeneric(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
- var tag [poly1305.TagSize]byte
- copy(tag[:], ciphertext[len(ciphertext)-16:])
+ tag := ciphertext[len(ciphertext)-16:]
ciphertext = ciphertext[:len(ciphertext)-16]
- var polyKey, discardBuf [32]byte
+ var polyKey [32]byte
s, _ := chacha20.NewUnauthenticatedCipher(c.key[:], nonce)
s.XORKeyStream(polyKey[:], polyKey[:])
- s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes
+ s.SetCounter(1) // set the counter to 1, skipping 32 bytes
- polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(ciphertext))+8+8)
- copy(polyInput, additionalData)
- copy(polyInput[roundTo16(len(additionalData)):], ciphertext)
- binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
- binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(ciphertext)))
+ p := poly1305.New(&polyKey)
+ writeWithPadding(p, additionalData)
+ writeWithPadding(p, ciphertext)
+ writeUint64(p, len(additionalData))
+ writeUint64(p, len(ciphertext))
ret, out := sliceForAppend(dst, len(ciphertext))
if subtle.InexactOverlap(out, ciphertext) {
panic("chacha20poly1305: invalid buffer overlap")
}
- if !poly1305.Verify(&tag, polyInput, &polyKey) {
+ if !p.Verify(tag) {
for i := range out {
out[i] = 0
}
diff --git a/poly1305/mac_noasm.go b/poly1305/mac_noasm.go
index b0c2cd0..347c8b1 100644
--- a/poly1305/mac_noasm.go
+++ b/poly1305/mac_noasm.go
@@ -7,5 +7,3 @@
package poly1305
type mac struct{ macGeneric }
-
-func newMAC(key *[32]byte) mac { return mac{newMACGeneric(key)} }
diff --git a/poly1305/poly1305.go b/poly1305/poly1305.go
index 066159b..3c75c2a 100644
--- a/poly1305/poly1305.go
+++ b/poly1305/poly1305.go
@@ -46,10 +46,9 @@
// two different messages with the same key allows an attacker
// to forge messages at will.
func New(key *[32]byte) *MAC {
- return &MAC{
- mac: newMAC(key),
- finalized: false,
- }
+ m := &MAC{}
+ initialize(key, &m.macState)
+ return m
}
// MAC is an io.Writer computing an authentication tag
@@ -58,7 +57,7 @@
// MAC cannot be used like common hash.Hash implementations,
// because using a poly1305 key twice breaks its security.
// Therefore writing data to a running MAC after calling
-// Sum causes it to panic.
+// Sum or Verify causes it to panic.
type MAC struct {
mac // platform-dependent implementation
@@ -71,10 +70,10 @@
// Write adds more data to the running message authentication code.
// It never returns an error.
//
-// It must not be called after the first call of Sum.
+// It must not be called after the first call of Sum or Verify.
func (h *MAC) Write(p []byte) (n int, err error) {
if h.finalized {
- panic("poly1305: write to MAC after Sum")
+ panic("poly1305: write to MAC after Sum or Verify")
}
return h.mac.Write(p)
}
@@ -87,3 +86,12 @@
h.finalized = true
return append(b, mac[:]...)
}
+
+// Verify returns whether the authenticator of all data written to
+// the message authentication code matches the expected value.
+func (h *MAC) Verify(expected []byte) bool {
+ var mac [TagSize]byte
+ h.mac.Sum(&mac)
+ h.finalized = true
+ return subtle.ConstantTimeCompare(expected, mac[:]) == 1
+}
diff --git a/poly1305/poly1305_test.go b/poly1305/poly1305_test.go
index b258eed..721a262 100644
--- a/poly1305/poly1305_test.go
+++ b/poly1305/poly1305_test.go
@@ -60,6 +60,30 @@
if tag != v.Tag() {
t.Errorf("%d: expected %x, got %x", i, v.Tag(), tag[:])
}
+ if !Verify(&tag, in, &key) {
+ t.Errorf("%d: tag didn't verify", i)
+ }
+ // If the key is zero, the tag will always be zero, independent of the input.
+ if len(in) > 0 && key != [32]byte{} {
+ in[0] ^= 0xff
+ if Verify(&tag, in, &key) {
+ t.Errorf("%d: tag verified after altering the input", i)
+ }
+ in[0] ^= 0xff
+ }
+ // If the input is empty, the tag only depends on the second half of the key.
+ if len(in) > 0 {
+ key[0] ^= 0xff
+ if Verify(&tag, in, &key) {
+ t.Errorf("%d: tag verified after altering the key", i)
+ }
+ key[0] ^= 0xff
+ }
+ tag[0] ^= 0xff
+ if Verify(&tag, in, &key) {
+ t.Errorf("%d: tag verified after altering the tag", i)
+ }
+ tag[0] ^= 0xff
}
}
@@ -150,9 +174,17 @@
t.Errorf("#%d: unexpected Write results: n = %d, err = %v", i, n, err)
}
h.Sum(out[:0])
- if tag := v.Tag(); out != tag {
+ tag := v.Tag()
+ if out != tag {
t.Errorf("%d: expected %x, got %x", i, tag[:], out[:])
}
+ if !h.Verify(tag[:]) {
+ t.Errorf("%d: Verify failed", i)
+ }
+ tag[0] ^= 0xff
+ if h.Verify(tag[:]) {
+ t.Errorf("%d: Verify succeeded after modifying the tag", i)
+ }
}
}
diff --git a/poly1305/sum_amd64.go b/poly1305/sum_amd64.go
index 35b9e38..99e5a1d 100644
--- a/poly1305/sum_amd64.go
+++ b/poly1305/sum_amd64.go
@@ -9,17 +9,6 @@
//go:noescape
func update(state *macState, msg []byte)
-func sum(out *[16]byte, m []byte, key *[32]byte) {
- h := newMAC(key)
- h.Write(m)
- h.Sum(out)
-}
-
-func newMAC(key *[32]byte) (h mac) {
- initialize(key, &h.r, &h.s)
- return
-}
-
// mac is a wrapper for macGeneric that redirects calls that would have gone to
// updateGeneric to update.
//
diff --git a/poly1305/sum_generic.go b/poly1305/sum_generic.go
index 1187eab..c77ff17 100644
--- a/poly1305/sum_generic.go
+++ b/poly1305/sum_generic.go
@@ -31,9 +31,10 @@
h.Sum(out)
}
-func newMACGeneric(key *[32]byte) (h macGeneric) {
- initialize(key, &h.r, &h.s)
- return
+func newMACGeneric(key *[32]byte) macGeneric {
+ m := macGeneric{}
+ initialize(key, &m.macState)
+ return m
}
// macState holds numbers in saturated 64-bit little-endian limbs. That is,
@@ -97,11 +98,12 @@
rMask1 = 0x0FFFFFFC0FFFFFFC
)
-func initialize(key *[32]byte, r, s *[2]uint64) {
- r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
- r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
- s[0] = binary.LittleEndian.Uint64(key[16:24])
- s[1] = binary.LittleEndian.Uint64(key[24:32])
+// initialize loads the 256-bit key into the two 128-bit secret values r and s.
+func initialize(key *[32]byte, m *macState) {
+ m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
+ m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
+ m.s[0] = binary.LittleEndian.Uint64(key[16:24])
+ m.s[1] = binary.LittleEndian.Uint64(key[24:32])
}
// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
diff --git a/poly1305/sum_noasm.go b/poly1305/sum_noasm.go
index 2e3ae34..2b55a29 100644
--- a/poly1305/sum_noasm.go
+++ b/poly1305/sum_noasm.go
@@ -2,12 +2,17 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build s390x,!go1.11 !amd64,!s390x,!ppc64le gccgo purego
+// At this point only s390x has an assembly implementation of sum. All other
+// platforms have assembly implementations of mac, and just define sum as using
+// that through New. Once s390x is ported, this file can be deleted and the body
+// of sum moved into Sum.
+
+// +build !go1.11 !s390x gccgo purego
package poly1305
func sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
- h := newMAC(key)
+ h := New(key)
h.Write(msg)
- h.Sum(out)
+ h.Sum(out[:0])
}
diff --git a/poly1305/sum_ppc64le.go b/poly1305/sum_ppc64le.go
index 92597bb..2e7a120 100644
--- a/poly1305/sum_ppc64le.go
+++ b/poly1305/sum_ppc64le.go
@@ -9,17 +9,6 @@
//go:noescape
func update(state *macState, msg []byte)
-func sum(out *[16]byte, m []byte, key *[32]byte) {
- h := newMAC(key)
- h.Write(m)
- h.Sum(out)
-}
-
-func newMAC(key *[32]byte) (h mac) {
- initialize(key, &h.r, &h.s)
- return
-}
-
// mac is a wrapper for macGeneric that redirects calls that would have gone to
// updateGeneric to update.
//