argon2: fix incorrect key derivation if parallelism > 1

This change fixes an incorrect key derivation if the
degree of parallelism is greater than 1.

This change adds additional test vectors generated by the
https://github.com/P-H-C/phc-winner-argon2 CLI.

Fixes golang/go#23200

Change-Id: I8add8382b9e9ebbf9a70493050867c9af4ed6aa7
Reviewed-on: https://go-review.googlesource.com/85055
Reviewed-by: Adam Langley <agl@golang.org>
Run-TryBot: Adam Langley <agl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/argon2/argon2.go b/argon2/argon2.go
index 61216e8..71cf8c5 100644
--- a/argon2/argon2.go
+++ b/argon2/argon2.go
@@ -47,24 +47,28 @@
 		panic("argon2: number of rounds too small")
 	}
 	if threads < 1 {
-		panic("argon2: paralisim degree too low")
+		panic("argon2: parallelism degree too low")
 	}
-	mem := memory / (4 * uint32(threads)) * (4 * uint32(threads))
-	if mem < 8*uint32(threads) {
-		mem = 8 * uint32(threads)
+	h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
+
+	memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
+	if memory < 2*syncPoints*uint32(threads) {
+		memory = 2 * syncPoints * uint32(threads)
 	}
-	B := initBlocks(password, salt, secret, data, time, mem, uint32(threads), keyLen, mode)
-	processBlocks(B, time, mem, uint32(threads), mode)
-	return extractKey(B, mem, uint32(threads), keyLen)
+	B := initBlocks(&h0, memory, uint32(threads))
+	processBlocks(B, time, memory, uint32(threads), mode)
+	return extractKey(B, memory, uint32(threads), keyLen)
 }
 
-const blockLength = 128
+const (
+	blockLength = 128
+	syncPoints  = 4
+)
 
 type block [blockLength]uint64
 
-func initBlocks(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) []block {
+func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte {
 	var (
-		block0 [1024]byte
 		h0     [blake2b.Size + 8]byte
 		params [24]byte
 		tmp    [4]byte
@@ -91,7 +95,11 @@
 	b2.Write(tmp[:])
 	b2.Write(data)
 	b2.Sum(h0[:0])
+	return h0
+}
 
+func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block {
+	var block0 [1024]byte
 	B := make([]block, memory)
 	for lane := uint32(0); lane < threads; lane++ {
 		j := lane * (memory / threads)
@@ -99,13 +107,13 @@
 
 		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
 		blake2bHash(block0[:], h0[:])
-		for i := range B[0] {
+		for i := range B[j+0] {
 			B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
 		}
 
 		binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
 		blake2bHash(block0[:], h0[:])
-		for i := range B[0] {
+		for i := range B[j+1] {
 			B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
 		}
 	}
@@ -113,7 +121,6 @@
 }
 
 func processBlocks(B []block, time, memory, threads uint32, mode int) {
-	const syncPoints = 4
 	lanes := memory / threads
 	segments := lanes / syncPoints
 
@@ -131,7 +138,7 @@
 		index := uint32(0)
 		if n == 0 && slice == 0 {
 			index = 2 // we have already generated the first two blocks
-			if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
+			if mode == argon2i || mode == argon2id {
 				in[6]++
 				processBlock(&addresses, &in, &zero)
 				processBlock(&addresses, &addresses, &zero)
@@ -143,7 +150,7 @@
 		for index < segments {
 			prev := offset - 1
 			if index == 0 && slice == 0 {
-				prev = lane*lanes + lanes - 1 // last block in lane
+				prev += lanes // last block in lane
 			}
 			if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) {
 				if index%blockLength == 0 {
@@ -194,8 +201,10 @@
 
 func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 {
 	refLane := uint32(rand>>32) % threads
-
-	m, s := 3*segments, (slice+1)%4*segments
+	if n == 0 && slice == 0 {
+		refLane = lane
+	}
+	m, s := 3*segments, ((slice+1)%syncPoints)*segments
 	if lane == refLane {
 		m += index
 	}
diff --git a/argon2/argon2_test.go b/argon2/argon2_test.go
index 3f72c75..775b97a 100644
--- a/argon2/argon2_test.go
+++ b/argon2/argon2_test.go
@@ -1,6 +1,7 @@
 // Copyright 2017 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
+
 package argon2
 
 import (
@@ -76,6 +77,20 @@
 	}
 }
 
+func TestVectors(t *testing.T) {
+	password, salt := []byte("password"), []byte("somesalt")
+	for i, v := range testVectors {
+		want, err := hex.DecodeString(v.hash)
+		if err != nil {
+			t.Fatalf("Test %d: failed to decode hash: %v", i, err)
+		}
+		hash := deriveKey(v.mode, password, salt, nil, nil, v.time, v.memory, v.threads, uint32(len(want)))
+		if !bytes.Equal(hash, want) {
+			t.Errorf("Test %d - got: %s want: %s", i, hex.EncodeToString(hash), hex.EncodeToString(want))
+		}
+	}
+}
+
 func benchmarkArgon2(mode int, time, memory uint32, threads uint8, keyLen uint32, b *testing.B) {
 	password := []byte("password")
 	salt := []byte("choosing random salts is hard")
@@ -111,3 +126,108 @@
 	b.Run(" Time: 4, Memory: 64 MB, Threads: 4", func(b *testing.B) { benchmarkArgon2(argon2id, 4, 64*1024, 4, 32, b) })
 	b.Run(" Time: 5, Memory: 64 MB, Threads: 4", func(b *testing.B) { benchmarkArgon2(argon2id, 5, 64*1024, 4, 32, b) })
 }
+
+// Generated with the CLI of https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
+var testVectors = []struct {
+	mode         int
+	time, memory uint32
+	threads      uint8
+	hash         string
+}{
+	{
+		mode: argon2i, time: 1, memory: 64, threads: 1,
+		hash: "b9c401d1844a67d50eae3967dc28870b22e508092e861a37",
+	},
+	{
+		mode: argon2d, time: 1, memory: 64, threads: 1,
+		hash: "8727405fd07c32c78d64f547f24150d3f2e703a89f981a19",
+	},
+	{
+		mode: argon2id, time: 1, memory: 64, threads: 1,
+		hash: "655ad15eac652dc59f7170a7332bf49b8469be1fdb9c28bb",
+	},
+	{
+		mode: argon2i, time: 2, memory: 64, threads: 1,
+		hash: "8cf3d8f76a6617afe35fac48eb0b7433a9a670ca4a07ed64",
+	},
+	{
+		mode: argon2d, time: 2, memory: 64, threads: 1,
+		hash: "3be9ec79a69b75d3752acb59a1fbb8b295a46529c48fbb75",
+	},
+	{
+		mode: argon2id, time: 2, memory: 64, threads: 1,
+		hash: "068d62b26455936aa6ebe60060b0a65870dbfa3ddf8d41f7",
+	},
+	{
+		mode: argon2i, time: 2, memory: 64, threads: 2,
+		hash: "2089f3e78a799720f80af806553128f29b132cafe40d059f",
+	},
+	{
+		mode: argon2d, time: 2, memory: 64, threads: 2,
+		hash: "68e2462c98b8bc6bb60ec68db418ae2c9ed24fc6748a40e9",
+	},
+	{
+		mode: argon2id, time: 2, memory: 64, threads: 2,
+		hash: "350ac37222f436ccb5c0972f1ebd3bf6b958bf2071841362",
+	},
+	{
+		mode: argon2i, time: 3, memory: 256, threads: 2,
+		hash: "f5bbf5d4c3836af13193053155b73ec7476a6a2eb93fd5e6",
+	},
+	{
+		mode: argon2d, time: 3, memory: 256, threads: 2,
+		hash: "f4f0669218eaf3641f39cc97efb915721102f4b128211ef2",
+	},
+	{
+		mode: argon2id, time: 3, memory: 256, threads: 2,
+		hash: "4668d30ac4187e6878eedeacf0fd83c5a0a30db2cc16ef0b",
+	},
+	{
+		mode: argon2i, time: 4, memory: 4096, threads: 4,
+		hash: "a11f7b7f3f93f02ad4bddb59ab62d121e278369288a0d0e7",
+	},
+	{
+		mode: argon2d, time: 4, memory: 4096, threads: 4,
+		hash: "935598181aa8dc2b720914aa6435ac8d3e3a4210c5b0fb2d",
+	},
+	{
+		mode: argon2id, time: 4, memory: 4096, threads: 4,
+		hash: "145db9733a9f4ee43edf33c509be96b934d505a4efb33c5a",
+	},
+	{
+		mode: argon2i, time: 4, memory: 1024, threads: 8,
+		hash: "0cdd3956aa35e6b475a7b0c63488822f774f15b43f6e6e17",
+	},
+	{
+		mode: argon2d, time: 4, memory: 1024, threads: 8,
+		hash: "83604fc2ad0589b9d055578f4d3cc55bc616df3578a896e9",
+	},
+	{
+		mode: argon2id, time: 4, memory: 1024, threads: 8,
+		hash: "8dafa8e004f8ea96bf7c0f93eecf67a6047476143d15577f",
+	},
+	{
+		mode: argon2i, time: 2, memory: 64, threads: 3,
+		hash: "5cab452fe6b8479c8661def8cd703b611a3905a6d5477fe6",
+	},
+	{
+		mode: argon2d, time: 2, memory: 64, threads: 3,
+		hash: "22474a423bda2ccd36ec9afd5119e5c8949798cadf659f51",
+	},
+	{
+		mode: argon2id, time: 2, memory: 64, threads: 3,
+		hash: "4a15b31aec7c2590b87d1f520be7d96f56658172deaa3079",
+	},
+	{
+		mode: argon2i, time: 3, memory: 1024, threads: 6,
+		hash: "d236b29c2b2a09babee842b0dec6aa1e83ccbdea8023dced",
+	},
+	{
+		mode: argon2d, time: 3, memory: 1024, threads: 6,
+		hash: "a3351b0319a53229152023d9206902f4ef59661cdca89481",
+	},
+	{
+		mode: argon2id, time: 3, memory: 1024, threads: 6,
+		hash: "1640b932f4b60e272f5d2207b9a9c626ffa1bd88d2349016",
+	},
+}