notary/internal/tlog: add log tile helpers

This is part of a design sketch for a Go module notary.
Eventually the code will live outside golang.org/x/exp.

Everything here is subject to change! Don't depend on it!

This CL implements helpers for navigating the space of log tiles.

Change-Id: Ia37427c4dfb03f5dda5f5ef299ad7b2aec5b2485
Reviewed-on: https://go-review.googlesource.com/c/exp/+/161903
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/notary/internal/tlog/tile.go b/notary/internal/tlog/tile.go
new file mode 100644
index 0000000..8e39de9
--- /dev/null
+++ b/notary/internal/tlog/tile.go
@@ -0,0 +1,410 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// A Tile is a description of a transparency log tile.
+// A tile of height H at level L offset N lists W consecutive hashes
+// at level H*L of the tree starting at offset N*(2**H).
+// A complete tile lists 2**H hashes; a partial tile lists fewer.
+// Note that a tile represents the entire subtree of height H
+// with those hashes as the leaves. The levels above H*L
+// can be reconstructed by hashing the leaves.
+//
+// Each Tile can be encoded as a “tile coordinate path”
+// of the form tile/H/L/NNN[.p/W].
+// The .p/W suffix is present only for partial tiles, meaning W < 2**H.
+// The NNN element is an encoding of N into 3-digit path elements.
+// All but the last path element begins with an "x".
+// For example,
+// Tile{H: 3, L: 4, N: 1234067, W: 1}'s path
+// is tile/3/4/x001/x234/067.p/1, and
+// Tile{H: 3, L: 4, N: 1234067, W: 8}'s path
+// is tile/3/4/x001/x234/067.
+// See Tile's Path method and the ParseTilePath function.
+type Tile struct {
+	H int   // height of tile (1 ≤ H ≤ 30)
+	L int   // level in tiling (1 ≤ H ≤ 63)
+	N int64 // number within level (unbounded)
+	W int   // width of tile (1 ≤ W ≤ 2**H; 2**H is complete tile)
+}
+
+// TileForIndex returns the tile of height h ≥ 1
+// and least width storing the given hash storage index.
+func TileForIndex(h int, index int64) Tile {
+	if h < 1 {
+		panic("TileForIndex: invalid height")
+	}
+	t, _, _ := tileForIndex(h, index)
+	return t
+}
+
+// tileForIndex returns the tile of height h ≥ 1
+// storing the given hash index, which can be
+// reconstructed using tileHash(data[start:end]).
+func tileForIndex(h int, index int64) (t Tile, start, end int) {
+	level, n := SplitStoredHashIndex(index)
+	t.H = h
+	t.L = level / h
+	level -= t.L * h // now level within tile
+	t.N = n << uint(level) >> uint(t.H)
+	n -= t.N << uint(t.H) >> uint(level) // now n within tile at level
+	t.W = int((n + 1) << uint(level))
+	return t, int(n<<uint(level)) * HashSize, int((n+1)<<uint(level)) * HashSize
+}
+
+// HashFromTile returns the hash at the given storage index,
+// provided that t == TileForIndex(t.H, index) or a wider version,
+// and data is t's tile data (of length at least t.W*HashSize).
+func HashFromTile(t Tile, data []byte, index int64) (Hash, error) {
+	if t.H < 1 || t.H > 30 || t.L < 0 || t.L >= 64 || t.W < 1 || t.W > 1<<uint(t.H) {
+		return Hash{}, fmt.Errorf("invalid tile %v", t.Path())
+	}
+	if len(data) < t.W*HashSize {
+		return Hash{}, fmt.Errorf("data len %d too short for tile %v", len(data), t.Path())
+	}
+	t1, start, end := tileForIndex(t.H, index)
+	if t.L != t1.L || t.N != t1.N || t.W < t1.W {
+		return Hash{}, fmt.Errorf("index %v is in %v not %v", index, t1.Path(), t.Path())
+	}
+	return tileHash(data[start:end]), nil
+}
+
+// tileHash computes the subtree hash corresponding to the (2^K)-1 hashes in data.
+func tileHash(data []byte) Hash {
+	if len(data) == 0 {
+		panic("bad math in tileHash")
+	}
+	if len(data) == HashSize {
+		var h Hash
+		copy(h[:], data)
+		return h
+	}
+	n := len(data) / 2
+	return hashNode(tileHash(data[:n]), tileHash(data[n:]))
+}
+
+// NewTiles returns the coordinates of the tiles of height h ≥ 1
+// that must be published when publishing from a tree of
+// size newTreeSize to replace a tree of size oldTreeSize.
+// (No tiles need to be published for a tree of size zero.)
+func NewTiles(h int, oldTreeSize, newTreeSize int64) []Tile {
+	if h < 1 {
+		panic(fmt.Sprintf("NewTiles: invalid height %d", h))
+	}
+	H := uint(h)
+	var tiles []Tile
+	for level := uint(0); newTreeSize>>(H*level) > 0; level++ {
+		oldN := oldTreeSize >> (H * level)
+		newN := newTreeSize >> (H * level)
+		for n := oldN >> H; n < newN>>H; n++ {
+			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: 1 << H})
+		}
+		n := newN >> H
+		maxW := int(newN - n<<H)
+		minW := 1
+		if oldN > n<<H {
+			minW = int(oldN - n<<H)
+		}
+		for w := minW; w <= maxW; w++ {
+			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: w})
+		}
+	}
+	return tiles
+}
+
+// ReadTileData reads the hashes for tile t from r
+// and returns the corresponding tile data.
+func ReadTileData(t Tile, r HashReader) ([]byte, error) {
+	size := t.W
+	if size == 0 {
+		size = 1 << uint(t.H)
+	}
+	start := t.N << uint(t.H)
+	indexes := make([]int64, size)
+	for i := 0; i < size; i++ {
+		indexes[i] = StoredHashIndex(t.H*t.L, start+int64(i))
+	}
+
+	hashes, err := r.ReadHashes(indexes)
+	if err != nil {
+		return nil, err
+	}
+	if len(hashes) != len(indexes) {
+		return nil, fmt.Errorf("notary: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
+	}
+
+	tile := make([]byte, size*HashSize)
+	for i := 0; i < size; i++ {
+		copy(tile[i*HashSize:], hashes[i][:])
+	}
+	return tile, nil
+}
+
+// To limit the size of any particular directory listing,
+// we encode the (possibly very large) number N
+// by encoding three digits at a time.
+// For example, 123456789 encodes as x123/x456/789.
+// Each directory has at most 1000 each xNNN, NNN, and NNN.p children,
+// so there are at most 3000 entries in any one directory.
+const pathBase = 1000
+
+// Path returns a tile coordinate path describing t.
+func (t Tile) Path() string {
+	n := t.N
+	nStr := fmt.Sprintf("%03d", n%pathBase)
+	for n >= pathBase {
+		n /= pathBase
+		nStr = fmt.Sprintf("x%03d/%s", n%pathBase, nStr)
+	}
+	pStr := ""
+	if t.W != 1<<uint(t.H) {
+		pStr = fmt.Sprintf(".p/%d", t.W)
+	}
+	return fmt.Sprintf("tile/%d/%d/%s%s", t.H, t.L, nStr, pStr)
+}
+
+// ParseTilePath parses a tile coordinate path.
+func ParseTilePath(path string) (Tile, error) {
+	f := strings.Split(path, "/")
+	if len(f) < 4 || f[0] != "tile" {
+		return Tile{}, &badPathError{path}
+	}
+	h, err1 := strconv.Atoi(f[1])
+	l, err2 := strconv.Atoi(f[2])
+	if err1 != nil || err2 != nil || h < 1 || l < 0 || h > 30 {
+		return Tile{}, &badPathError{path}
+	}
+	w := 1 << uint(h)
+	if dotP := f[len(f)-2]; strings.HasSuffix(dotP, ".p") {
+		ww, err := strconv.Atoi(f[len(f)-1])
+		if err != nil || ww <= 0 || ww >= w {
+			return Tile{}, &badPathError{path}
+		}
+		w = ww
+		f[len(f)-2] = dotP[:len(dotP)-len(".p")]
+		f = f[:len(f)-1]
+	}
+	f = f[3:]
+	n := int64(0)
+	for _, s := range f {
+		nn, err := strconv.Atoi(strings.TrimPrefix(s, "x"))
+		if err != nil || nn < 0 || nn >= pathBase {
+			return Tile{}, &badPathError{path}
+		}
+		n = n*pathBase + int64(nn)
+	}
+	t := Tile{H: h, L: l, N: n, W: w}
+	if path != t.Path() {
+		return Tile{}, &badPathError{path}
+	}
+	return t, nil
+}
+
+type badPathError struct {
+	path string
+}
+
+func (e *badPathError) Error() string {
+	return fmt.Sprintf("malformed tile path %q", e.path)
+}
+
+// A TileReader reads tiles from a notary's log.
+type TileReader interface {
+	// Height returns the height of the available tiles.
+	Height() int
+
+	// ReadTiles returns the data for each requested tile.
+	// If ReadTiles returns err == nil, it must also return
+	// a data record for each tile (len(data) == len(tiles))
+	// and each data record must be the correct length
+	// (len(data[i]) == tiles[i].W*HashSize).
+	ReadTiles(tiles []Tile) (data [][]byte, err error)
+
+	// Reject marks the tile as invalid.
+	// A caller can call Reject if a returned Tile cannot
+	// be authenticated against a given Tree.
+	// TODO(rsc): The point of this method is to let the cache speculatively
+	// write the downloaded set of tiles to disk during ReadTiles
+	// and then have the caller call back with Reject when it finds
+	// out a tile was inconsistent with a known signed key.
+	// It would be better to let the TileReader implementation
+	// find out whether the tile is good before caching any.
+	Reject(tile Tile)
+}
+
+// TileHashReader returns a HashReader that satisfies requests
+// by loading tiles of the given tree.
+//
+// The returned HashReader checks that loaded tiles are
+// valid for the given tree; if not, it calls tr.Reject to report them
+// and returns an error. A consequence is that any hashes returned
+// by the HashReader are already proven to be in the tree.
+func TileHashReader(tree Tree, tr TileReader) HashReader {
+	return &tileHashReader{tree: tree, tr: tr}
+}
+
+type tileHashReader struct {
+	tree Tree
+	tr   TileReader
+}
+
+// tileParent returns t's k'th tile parent in the tiles for a tree of size n.
+// If there is no such parent, tileParent returns Tile{}.
+func tileParent(t Tile, k int, n int64) Tile {
+	t.L += k
+	t.N >>= uint(k * t.H)
+	t.W = 1 << uint(t.H)
+	if max := n >> uint(t.L*t.H); t.N<<uint(t.H)+int64(t.W) >= max {
+		if t.N<<uint(t.H) >= max {
+			return Tile{}
+		}
+		t.W = int(max - t.N<<uint(t.H))
+	}
+	return t
+}
+
+func (r *tileHashReader) ReadHashes(indexes []int64) ([]Hash, error) {
+	h := r.tr.Height()
+
+	tileOrder := make(map[Tile]int) // tileOrder[tileKey(tiles[i])] = i
+	var tiles []Tile
+
+	// Plan to fetch tiles necessary to recompute tree hash.
+	// If it matches, those tiles are authenticated.
+	stx := subTreeIndex(0, r.tree.N, nil)
+	stxTileOrder := make([]int, len(stx))
+	for i, x := range stx {
+		tile, _, _ := tileForIndex(h, x)
+		tile = tileParent(tile, 0, r.tree.N)
+		if j, ok := tileOrder[tile]; ok {
+			stxTileOrder[i] = j
+			continue
+		}
+		stxTileOrder[i] = len(tiles)
+		tileOrder[tile] = len(tiles)
+		tiles = append(tiles, tile)
+	}
+
+	// Plan to fetch tiles containing the indexes,
+	// along with any parent tiles needed
+	// for authentication. For most calls,
+	// the parents are being fetched anyway.
+	indexTileOrder := make([]int, len(indexes))
+	for i, x := range indexes {
+		level, n := SplitStoredHashIndex(x)
+		if n<<uint(level) >= r.tree.N {
+			return nil, fmt.Errorf("indexes not in tree")
+		}
+
+		tile, _, _ := tileForIndex(h, x)
+
+		// Walk up parent tiles until we find one we've requested.
+		// That one will be authenticated.
+		k := 0
+		for ; ; k++ {
+			p := tileParent(tile, k, r.tree.N)
+			if j, ok := tileOrder[p]; ok {
+				if k == 0 {
+					indexTileOrder[i] = j
+				}
+				break
+			}
+		}
+
+		// Walk back down recording child tiles after parents.
+		// This loop ends by revisiting the tile for this index
+		// (tileParent(tile, 0, r.tree.N)) unless k == 0, in which
+		// case the previous loop did it.
+		for k--; k >= 0; k-- {
+			p := tileParent(tile, k, r.tree.N)
+			if p.W != 1<<uint(p.H) {
+				// Only full tiles have parents.
+				// This tile has a parent, so it must be full.
+				return nil, fmt.Errorf("bad math in tileHashReader: %d %d %v", r.tree.N, x, p)
+			}
+			tileOrder[p] = len(tiles)
+			if k == 0 {
+				indexTileOrder[i] = len(tiles)
+			}
+			tiles = append(tiles, p)
+		}
+	}
+
+	// Fetch all the tile data.
+	data, err := r.tr.ReadTiles(tiles)
+	if err != nil {
+		return nil, err
+	}
+	if len(data) != len(tiles) {
+		return nil, fmt.Errorf("TileReader returned bad result slice (len=%d, want %d)", len(data), len(tiles))
+	}
+	for i, tile := range tiles {
+		if len(data[i]) != tile.W*HashSize {
+			return nil, fmt.Errorf("TileReader returned bad result slice (%v len=%d, want %d)", tile.Path(), len(data[i]), tile.W*HashSize)
+		}
+	}
+
+	// Authenticate the initial tiles against the tree hash.
+	// They are arranged so that parents are authenticated before children.
+	// First the tiles needed for the tree hash.
+	th, err := HashFromTile(tiles[stxTileOrder[len(stx)-1]], data[stxTileOrder[len(stx)-1]], stx[len(stx)-1])
+	if err != nil {
+		return nil, err
+	}
+	for i := len(stx) - 2; i >= 0; i-- {
+		h, err := HashFromTile(tiles[stxTileOrder[i]], data[stxTileOrder[i]], stx[i])
+		if err != nil {
+			return nil, err
+		}
+		th = hashNode(h, th)
+	}
+	if th != r.tree.Hash {
+		// The tiles do not support the tree hash.
+		// We know at least one is wrong, but not which one; reject them all.
+		for _, tile := range tiles[:len(stx)] {
+			r.tr.Reject(tile)
+		}
+		return nil, fmt.Errorf("downloaded inconsistent tile")
+	}
+
+	// Authenticate full tiles against their parents.
+	for i := len(stx); i < len(tiles); i++ {
+		tile := tiles[i]
+		p := tileParent(tile, 1, r.tree.N)
+		j, ok := tileOrder[p]
+		if !ok {
+			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost parent of %v", r.tree.N, indexes, tile)
+		}
+		h, err := HashFromTile(p, data[j], StoredHashIndex(p.L*p.H, tile.N))
+		if err != nil {
+			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash of %v: %v", r.tree.N, indexes, tile, err)
+		}
+		if h != tileHash(data[i]) {
+			r.tr.Reject(tile)
+			return nil, fmt.Errorf("downloaded inconsistent tile")
+		}
+	}
+
+	// Now we have all the tiles needed for the requested hashes,
+	// and we've authenticated the full tile set against the trusted tree hash.
+	// Pull out the requested hashes.
+	hashes := make([]Hash, len(indexes))
+	for i, x := range indexes {
+		j := indexTileOrder[i]
+		h, err := HashFromTile(tiles[j], data[j], x)
+		if err != nil {
+			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash %v: %v", r.tree.N, indexes, x, err)
+		}
+		hashes[i] = h
+	}
+
+	return hashes, nil
+}
diff --git a/notary/internal/tlog/tlog_test.go b/notary/internal/tlog/tlog_test.go
index c33e3e3..a225a0f 100644
--- a/notary/internal/tlog/tlog_test.go
+++ b/notary/internal/tlog/tlog_test.go
@@ -5,6 +5,7 @@
 package tlog
 
 import (
+	"bytes"
 	"fmt"
 	"testing"
 )
@@ -32,17 +33,38 @@
 	return out, nil
 }
 
+type testTilesStorage map[Tile][]byte
+
+func (t testTilesStorage) Height() int {
+	return 2
+}
+
+func (t testTilesStorage) Reject(tile Tile) {
+	println("REJECT")
+}
+
+func (t testTilesStorage) ReadTiles(tiles []Tile) ([][]byte, error) {
+	out := make([][]byte, len(tiles))
+	for i, tile := range tiles {
+		out[i] = t[tile]
+	}
+	return out, nil
+}
+
 func TestTree(t *testing.T) {
 	var trees []Hash
 	var leafhashes []Hash
 	var storage testHashStorage
-	for i := int64(0); i < 10; i++ {
+	tiles := make(testTilesStorage)
+	const testH = 2
+	for i := int64(0); i < 100; i++ {
 		data := []byte(fmt.Sprintf("leaf %d", i))
 		hashes, err := StoredHashes(i, data, storage)
 		if err != nil {
 			t.Fatal(err)
 		}
 		leafhashes = append(leafhashes, RecordHash(data))
+		oldStorage := len(storage)
 		storage = append(storage, hashes...)
 		if count := StoredHashCount(i + 1); count != int64(len(storage)) {
 			t.Errorf("StoredHashCount(%d) = %d, have %d StoredHashes", i+1, count, len(storage))
@@ -51,6 +73,56 @@
 		if err != nil {
 			t.Fatal(err)
 		}
+
+		for _, tile := range NewTiles(testH, i, i+1) {
+			data, err := ReadTileData(tile, storage)
+			if err != nil {
+				t.Fatal(err)
+			}
+			old := Tile{H: tile.H, L: tile.L, N: tile.N, W: tile.W - 1}
+			oldData := tiles[old]
+			if len(oldData) != len(data)-HashSize || !bytes.Equal(oldData, data[:len(oldData)]) {
+				t.Fatalf("tile %v not extending earlier tile %v", tile.Path(), old.Path())
+			}
+			tiles[tile] = data
+		}
+		for _, tile := range NewTiles(testH, 0, i+1) {
+			data, err := ReadTileData(tile, storage)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !bytes.Equal(tiles[tile], data) {
+				t.Fatalf("mismatch at %+v", tile)
+			}
+		}
+		for _, tile := range NewTiles(testH, i/2, i+1) {
+			data, err := ReadTileData(tile, storage)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !bytes.Equal(tiles[tile], data) {
+				t.Fatalf("mismatch at %+v", tile)
+			}
+		}
+
+		// Check that all the new hashes are readable from their tiles.
+		for j := oldStorage; j < len(storage); j++ {
+			tile := TileForIndex(testH, int64(j))
+			data, ok := tiles[tile]
+			if !ok {
+				t.Log(NewTiles(testH, 0, i+1))
+				t.Fatalf("TileForIndex(%d, %d) = %v, not yet stored (i=%d, stored %d)", testH, j, tile.Path(), i, len(storage))
+				continue
+			}
+			h, err := HashFromTile(tile, data, int64(j))
+			if err != nil {
+				t.Fatal(err)
+			}
+			if h != storage[j] {
+				t.Errorf("HashFromTile(%v, %d) = %v, want %v", tile.Path(), int64(j), h, storage[j])
+			}
+		}
+
 		trees = append(trees, th)
 
 		// Check that leaf proofs work, for all trees and leaves so far.
@@ -71,8 +143,42 @@
 			}
 		}
 
-		// Check that tree proofs work, for all trees so far.
+		// Check that leaf proofs work using TileReader.
+		// To prove a leaf that way, all you have to do is read and verify its hash.
+		thr := TileHashReader(Tree{i + 1, th}, tiles)
 		for j := int64(0); j <= i; j++ {
+			h, err := thr.ReadHashes([]int64{StoredHashIndex(0, j)})
+			if err != nil {
+				t.Fatalf("TileHashReader(%d).ReadHashes(%d): %v", i+1, j, err)
+			}
+			if h[0] != leafhashes[j] {
+				t.Fatalf("TileHashReader(%d).ReadHashes(%d) returned wrong hash", i+1, j)
+			}
+
+			// Even though reading the hash suffices,
+			// check we can generate the proof too.
+			p, err := ProveRecord(i+1, j, thr)
+			if err != nil {
+				t.Fatalf("ProveRecord(%d, %d, TileHashReader(%d)): %v", i+1, j, i+1, err)
+			}
+			if err := CheckRecord(p, i+1, th, j, leafhashes[j]); err != nil {
+				t.Fatalf("CheckRecord(%d, %d, TileHashReader(%d)): %v", i+1, j, i+1, err)
+			}
+		}
+
+		// Check that tree proofs work, for all trees so far, using TileReader.
+		// To prove a tree that way, all you have to do is compute and verify its hash.
+		for j := int64(0); j <= i; j++ {
+			h, err := TreeHash(j+1, thr)
+			if err != nil {
+				t.Fatalf("TreeHash(%d, TileHashReader(%d)): %v", j, i+1, err)
+			}
+			if h != trees[j] {
+				t.Fatalf("TreeHash(%d, TileHashReader(%d)) = %x, want %x (%v)", j, i+1, h[:], trees[j][:], trees[j])
+			}
+
+			// Even though computing the subtree hash suffices,
+			// check that we can generate the proof too.
 			p, err := ProveTree(i+1, j+1, storage)
 			if err != nil {
 				t.Fatalf("ProveTree(%d, %d): %v", i+1, j+1, err)
@@ -102,3 +208,30 @@
 		}
 	}
 }
+
+// TODO(rsc): Test invalid paths too, like "tile/3/5/123/456/078".
+var tilePaths = []struct {
+	path string
+	tile Tile
+}{
+	{"tile/4/0/001", Tile{4, 0, 1, 16}},
+	{"tile/4/0/001.p/5", Tile{4, 0, 1, 5}},
+	{"tile/3/5/x123/x456/078", Tile{3, 5, 123456078, 8}},
+	{"tile/3/5/x123/x456/078.p/2", Tile{3, 5, 123456078, 2}},
+	{"tile/1/0/x003/x057/500", Tile{1, 0, 3057500, 2}},
+}
+
+func TestTilePath(t *testing.T) {
+	for _, tt := range tilePaths {
+		p := tt.tile.Path()
+		if p != tt.path {
+			t.Errorf("%+v.Path() = %q, want %q", tt.tile, p, tt.path)
+		}
+		tile, err := ParseTilePath(tt.path)
+		if err != nil {
+			t.Errorf("ParseTilePath(%q): %v", tt.path, err)
+		} else if tile != tt.tile {
+			t.Errorf("ParseTilePath(%q) = %+v, want %+v", tt.path, tile, tt.tile)
+		}
+	}
+}