chacha20poly1305: new package.

This change adds a package, chacha20poly1305, which implements the
ChaCha20-Poly1305 AEAD from RFC 7539. This AEAD has several attractive
features:
   1. It's naturally constant time. AES-GCM needs either dedicated
      hardware or extreme effort to be fast and constant-time, while
      this design is easy to make constant-time.
   2. It's fast on modern processors: it runs at 1GB/s on my IvyBrige
      system.
   3. It's seeing significant use in TLS. (A change for crypto/tls is
      forthcoming.)

This change merges two CLs:
  https://go-review.googlesource.com/#/c/24717
  https://go-review.googlesource.com/#/c/26691

I took the amd64-optimised AEAD implementation from the former because
it was significantly faster. But the structure of the change is taken
from the latter.

This version will be checked into x/crypto. This package will then be
vendored into the stdlib so that it can be used from crypto/tls.

Change-Id: I5a60587958b7afeec81ca1091e603a7e8517000b
Reviewed-on: https://go-review.googlesource.com/30728
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/chacha20poly1305/chacha20poly1305_generic.go b/chacha20poly1305/chacha20poly1305_generic.go
new file mode 100644
index 0000000..f7e4bfb
--- /dev/null
+++ b/chacha20poly1305/chacha20poly1305_generic.go
@@ -0,0 +1,70 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package chacha20poly1305
+
+import (
+	"encoding/binary"
+
+	"golang.org/x/crypto/chacha20poly1305/internal/chacha20"
+	"golang.org/x/crypto/poly1305"
+)
+
+func roundTo16(n int) int {
+	return 16 * ((n + 15) / 16)
+}
+
+func (c *chacha20poly1305) sealGeneric(dst, nonce, plaintext, additionalData []byte) []byte {
+	var counter [16]byte
+	copy(counter[4:], nonce)
+
+	var polyKey [32]byte
+	chacha20.XORKeyStream(polyKey[:], polyKey[:], &counter, &c.key)
+
+	ret, out := sliceForAppend(dst, len(plaintext)+poly1305.TagSize)
+	counter[0] = 1
+	chacha20.XORKeyStream(out, plaintext, &counter, &c.key)
+
+	polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(plaintext))+8+8)
+	copy(polyInput, additionalData)
+	copy(polyInput[roundTo16(len(additionalData)):], out[:len(plaintext)])
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(plaintext)))
+
+	var tag [poly1305.TagSize]byte
+	poly1305.Sum(&tag, polyInput, &polyKey)
+	copy(out[len(plaintext):], tag[:])
+
+	return ret
+}
+
+func (c *chacha20poly1305) openGeneric(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	var tag [poly1305.TagSize]byte
+	copy(tag[:], ciphertext[len(ciphertext)-16:])
+	ciphertext = ciphertext[:len(ciphertext)-16]
+
+	var counter [16]byte
+	copy(counter[4:], nonce)
+
+	var polyKey [32]byte
+	chacha20.XORKeyStream(polyKey[:], polyKey[:], &counter, &c.key)
+
+	polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(ciphertext))+8+8)
+	copy(polyInput, additionalData)
+	copy(polyInput[roundTo16(len(additionalData)):], ciphertext)
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(ciphertext)))
+
+	ret, out := sliceForAppend(dst, len(ciphertext))
+	if !poly1305.Verify(&tag, polyInput, &polyKey) {
+		for i := range out {
+			out[i] = 0
+		}
+		return nil, errOpen
+	}
+
+	counter[0] = 1
+	chacha20.XORKeyStream(out, ciphertext, &counter, &c.key)
+	return ret, nil
+}