xts: reduce tweak allocations

The call to k2.Encrypt causes tweak to escape to the heap, resulting
in a 16-byte allocation for each call to Encrypt/Decrypt. Moving
tweak into the Cipher struct would allow it to be reused, but this
is ruled out by the Cipher docstring, which states that it is safe
for concurrent use. Instead, manage tweak arrays with a sync.Pool.
Benchmarks indicate that this amortizes allocation cost without
impacting performance.

benchmark          old ns/op     new ns/op     delta
BenchmarkXTS-4     234           245           +4.70%

benchmark          old allocs    new allocs    delta
BenchmarkXTS-4     2             0             -100.00%

benchmark          old bytes     new bytes     delta
BenchmarkXTS-4     32            0             -100.00%

Change-Id: I5e0dd8c2e1a1078a151bbeb1d0760936b6b56216
GitHub-Last-Rev: 14d81f589f3ada2b19511d592000657af3410a51
GitHub-Pull-Request: golang/crypto#51
Reviewed-on: https://go-review.googlesource.com/c/118535
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/xts/xts.go b/xts/xts.go
index f08582b..b51308e 100644
--- a/xts/xts.go
+++ b/xts/xts.go
@@ -27,12 +27,13 @@
 	"crypto/cipher"
 	"encoding/binary"
 	"errors"
+	"sync"
 
 	"golang.org/x/crypto/internal/subtle"
 )
 
-// Cipher contains an expanded key structure. It doesn't contain mutable state
-// and therefore can be used concurrently.
+// Cipher contains an expanded key structure. It is safe for concurrent use if
+// the underlying block cipher is safe for concurrent use.
 type Cipher struct {
 	k1, k2 cipher.Block
 }
@@ -41,6 +42,12 @@
 // only defined for 16-byte ciphers.
 const blockSize = 16
 
+var tweakPool = sync.Pool{
+	New: func() interface{} {
+		return new([blockSize]byte)
+	},
+}
+
 // NewCipher creates a Cipher given a function for creating the underlying
 // block cipher (which must have a block size of 16 bytes). The key must be
 // twice the length of the underlying cipher's key.
@@ -72,7 +79,10 @@
 		panic("xts: invalid buffer overlap")
 	}
 
-	var tweak [blockSize]byte
+	tweak := tweakPool.Get().(*[blockSize]byte)
+	for i := range tweak {
+		tweak[i] = 0
+	}
 	binary.LittleEndian.PutUint64(tweak[:8], sectorNum)
 
 	c.k2.Encrypt(tweak[:], tweak[:])
@@ -88,8 +98,10 @@
 		plaintext = plaintext[blockSize:]
 		ciphertext = ciphertext[blockSize:]
 
-		mul2(&tweak)
+		mul2(tweak)
 	}
+
+	tweakPool.Put(tweak)
 }
 
 // Decrypt decrypts a sector of ciphertext and puts the result into plaintext.
@@ -106,7 +118,10 @@
 		panic("xts: invalid buffer overlap")
 	}
 
-	var tweak [blockSize]byte
+	tweak := tweakPool.Get().(*[blockSize]byte)
+	for i := range tweak {
+		tweak[i] = 0
+	}
 	binary.LittleEndian.PutUint64(tweak[:8], sectorNum)
 
 	c.k2.Encrypt(tweak[:], tweak[:])
@@ -122,8 +137,10 @@
 		plaintext = plaintext[blockSize:]
 		ciphertext = ciphertext[blockSize:]
 
-		mul2(&tweak)
+		mul2(tweak)
 	}
+
+	tweakPool.Put(tweak)
 }
 
 // mul2 multiplies tweak by 2 in GF(2¹²⁸) with an irreducible polynomial of
diff --git a/xts/xts_test.go b/xts/xts_test.go
index 96d3b6c..75db1c5 100644
--- a/xts/xts_test.go
+++ b/xts/xts_test.go
@@ -103,3 +103,19 @@
 		t.Errorf("En/Decryption is not inverse")
 	}
 }
+
+func BenchmarkXTS(b *testing.B) {
+	b.ReportAllocs()
+	c, err := NewCipher(aes.NewCipher, make([]byte, 32))
+	if err != nil {
+		b.Fatalf("NewCipher failed: %s", err)
+	}
+	plaintext := make([]byte, 32)
+	encrypted := make([]byte, 48)
+	decrypted := make([]byte, 48)
+
+	for i := 0; i < b.N; i++ {
+		c.Encrypt(encrypted, plaintext, 0)
+		c.Decrypt(decrypted, encrypted[:len(plaintext)], 0)
+	}
+}