sumdb/tlog: transparent log data structures and algorithms
Copied from golang.org/x/exp/sumdb/internal/tlog.
For golang/go#31761.
Change-Id: If44ad226195908ebc8757c1ca54f20f2a6adb3ef
Reviewed-on: https://go-review.googlesource.com/c/mod/+/176462
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
diff --git a/sumdb/tlog/ct_test.go b/sumdb/tlog/ct_test.go
new file mode 100644
index 0000000..c2d9aeb
--- /dev/null
+++ b/sumdb/tlog/ct_test.go
@@ -0,0 +1,96 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "net/http"
+ "net/url"
+ "os"
+ "testing"
+)
+
+func TestCertificateTransparency(t *testing.T) {
+ // Test that we can verify actual Certificate Transparency proofs.
+ // (The other tests check that we can verify our own proofs;
+ // this is a test that the two are compatible.)
+
+ if testing.Short() {
+ t.Skip("skipping in -short mode")
+ }
+
+ var root ctTree
+ httpGET(t, "http://ct.googleapis.com/logs/argon2020/ct/v1/get-sth", &root)
+
+ var leaf ctEntries
+ httpGET(t, "http://ct.googleapis.com/logs/argon2020/ct/v1/get-entries?start=10000&end=10000", &leaf)
+ hash := RecordHash(leaf.Entries[0].Data)
+
+ var rp ctRecordProof
+ httpGET(t, "http://ct.googleapis.com/logs/argon2020/ct/v1/get-proof-by-hash?tree_size="+fmt.Sprint(root.Size)+"&hash="+url.QueryEscape(hash.String()), &rp)
+
+ err := CheckRecord(rp.Proof, root.Size, root.Hash, 10000, hash)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var tp ctTreeProof
+ httpGET(t, "http://ct.googleapis.com/logs/argon2020/ct/v1/get-sth-consistency?first=3654490&second="+fmt.Sprint(root.Size), &tp)
+
+ oh, _ := ParseHash("AuIZ5V6sDUj1vn3Y1K85oOaQ7y+FJJKtyRTl1edIKBQ=")
+ err = CheckTree(tp.Proof, root.Size, root.Hash, 3654490, oh)
+ if err != nil {
+ t.Fatal(err)
+ }
+}
+
+type ctTree struct {
+ Size int64 `json:"tree_size"`
+ Hash Hash `json:"sha256_root_hash"`
+}
+
+type ctEntries struct {
+ Entries []*ctEntry
+}
+
+type ctEntry struct {
+ Data []byte `json:"leaf_input"`
+}
+
+type ctRecordProof struct {
+ Index int64 `json:"leaf_index"`
+ Proof RecordProof `json:"audit_path"`
+}
+
+type ctTreeProof struct {
+ Proof TreeProof `json:"consistency"`
+}
+
+func httpGET(t *testing.T, url string, targ interface{}) {
+ if testing.Verbose() {
+ println()
+ println(url)
+ }
+ resp, err := http.Get(url)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer resp.Body.Close()
+ data, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if testing.Verbose() {
+ os.Stdout.Write(data)
+ }
+ err = json.Unmarshal(data, targ)
+ if err != nil {
+ println(url)
+ os.Stdout.Write(data)
+ t.Fatal(err)
+ }
+}
diff --git a/sumdb/tlog/note.go b/sumdb/tlog/note.go
new file mode 100644
index 0000000..ce5353e
--- /dev/null
+++ b/sumdb/tlog/note.go
@@ -0,0 +1,135 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+ "bytes"
+ "encoding/base64"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+)
+
+// A Tree is a tree description, to be signed by a go.sum database server.
+type Tree struct {
+ N int64
+ Hash Hash
+}
+
+// FormatTree formats a tree description for inclusion in a note.
+//
+// The encoded form is three lines, each ending in a newline (U+000A):
+//
+// go.sum database tree
+// N
+// Hash
+//
+// where N is in decimal and Hash is in base64.
+//
+// A future backwards-compatible encoding may add additional lines,
+// which the parser can ignore.
+// A future backwards-incompatible encoding would use a different
+// first line (for example, "go.sum database tree v2").
+func FormatTree(tree Tree) []byte {
+ return []byte(fmt.Sprintf("go.sum database tree\n%d\n%s\n", tree.N, tree.Hash))
+}
+
+var errMalformedTree = errors.New("malformed tree note")
+var treePrefix = []byte("go.sum database tree\n")
+
+// ParseTree parses a formatted tree root description.
+func ParseTree(text []byte) (tree Tree, err error) {
+ // The message looks like:
+ //
+ // go.sum database tree
+ // 2
+ // nND/nri/U0xuHUrYSy0HtMeal2vzD9V4k/BO79C+QeI=
+ //
+ // For forwards compatibility, extra text lines after the encoding are ignored.
+ if !bytes.HasPrefix(text, treePrefix) || bytes.Count(text, []byte("\n")) < 3 || len(text) > 1e6 {
+ return Tree{}, errMalformedTree
+ }
+
+ lines := strings.SplitN(string(text), "\n", 4)
+ n, err := strconv.ParseInt(lines[1], 10, 64)
+ if err != nil || n < 0 || lines[1] != strconv.FormatInt(n, 10) {
+ return Tree{}, errMalformedTree
+ }
+
+ h, err := base64.StdEncoding.DecodeString(lines[2])
+ if err != nil || len(h) != HashSize {
+ return Tree{}, errMalformedTree
+ }
+
+ var hash Hash
+ copy(hash[:], h)
+ return Tree{n, hash}, nil
+}
+
+var errMalformedRecord = errors.New("malformed record data")
+
+// FormatRecord formats a record for serving to a client
+// in a lookup response or data tile.
+//
+// The encoded form is the record ID as a single number,
+// then the text of the record, and then a terminating blank line.
+// Record text must be valid UTF-8 and must not contain any ASCII control
+// characters (those below U+0020) other than newline (U+000A).
+// It must end in a terminating newline and not contain any blank lines.
+func FormatRecord(id int64, text []byte) (msg []byte, err error) {
+ if !isValidRecordText(text) {
+ return nil, errMalformedRecord
+ }
+ msg = []byte(fmt.Sprintf("%d\n", id))
+ msg = append(msg, text...)
+ msg = append(msg, '\n')
+ return msg, nil
+}
+
+// isValidRecordText reports whether text is syntactically valid record text.
+func isValidRecordText(text []byte) bool {
+ var last rune
+ for i := 0; i < len(text); {
+ r, size := utf8.DecodeRune(text[i:])
+ if r < 0x20 && r != '\n' || r == utf8.RuneError && size == 1 || last == '\n' && r == '\n' {
+ return false
+ }
+ i += size
+ last = r
+ }
+ if last != '\n' {
+ return false
+ }
+ return true
+}
+
+// ParseRecord parses a record description at the start of text,
+// stopping immediately after the terminating blank line.
+// It returns the record id, the record text, and the remainder of text.
+func ParseRecord(msg []byte) (id int64, text, rest []byte, err error) {
+ // Leading record id.
+ i := bytes.IndexByte(msg, '\n')
+ if i < 0 {
+ return 0, nil, nil, errMalformedRecord
+ }
+ id, err = strconv.ParseInt(string(msg[:i]), 10, 64)
+ if err != nil {
+ return 0, nil, nil, errMalformedRecord
+ }
+ msg = msg[i+1:]
+
+ // Record text.
+ i = bytes.Index(msg, []byte("\n\n"))
+ if i < 0 {
+ return 0, nil, nil, errMalformedRecord
+ }
+ text, rest = msg[:i+1], msg[i+2:]
+ if !isValidRecordText(text) {
+ return 0, nil, nil, errMalformedRecord
+ }
+ return id, text, rest, nil
+}
diff --git a/sumdb/tlog/note_test.go b/sumdb/tlog/note_test.go
new file mode 100644
index 0000000..a32d6d2
--- /dev/null
+++ b/sumdb/tlog/note_test.go
@@ -0,0 +1,117 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestFormatTree(t *testing.T) {
+ n := int64(123456789012)
+ h := RecordHash([]byte("hello world"))
+ golden := "go.sum database tree\n123456789012\nTszzRgjTG6xce+z2AG31kAXYKBgQVtCSCE40HmuwBb0=\n"
+ b := FormatTree(Tree{n, h})
+ if string(b) != golden {
+ t.Errorf("FormatTree(...) = %q, want %q", b, golden)
+ }
+}
+
+func TestParseTree(t *testing.T) {
+ in := "go.sum database tree\n123456789012\nTszzRgjTG6xce+z2AG31kAXYKBgQVtCSCE40HmuwBb0=\n"
+ goldH := RecordHash([]byte("hello world"))
+ goldN := int64(123456789012)
+ tree, err := ParseTree([]byte(in))
+ if tree.N != goldN || tree.Hash != goldH || err != nil {
+ t.Fatalf("ParseTree(...) = Tree{%d, %v}, %v, want Tree{%d, %v}, nil", tree.N, tree.Hash, err, goldN, goldH)
+ }
+
+ // Check invalid trees.
+ var badTrees = []string{
+ "not-" + in,
+ "go.sum database tree\n0xabcdef\nTszzRgjTG6xce+z2AG31kAXYKBgQVtCSCE40HmuwBb0=\n",
+ "go.sum database tree\n123456789012\nTszzRgjTG6xce+z2AG31kAXYKBgQVtCSCE40HmuwBTOOBIG=\n",
+ }
+ for _, bad := range badTrees {
+ _, err := ParseTree([]byte(bad))
+ if err == nil {
+ t.Fatalf("ParseTree(%q) succeeded, want failure", in)
+ }
+ }
+
+ // Check junk on end is ignored.
+ var goodTrees = []string{
+ in + "JOE",
+ in + "JOE\n",
+ in + strings.Repeat("JOE\n", 1000),
+ }
+ for _, good := range goodTrees {
+ _, err := ParseTree([]byte(good))
+ if tree.N != goldN || tree.Hash != goldH || err != nil {
+ t.Fatalf("ParseTree(...+%q) = Tree{%d, %v}, %v, want Tree{%d, %v}, nil", good[len(in):], tree.N, tree.Hash, err, goldN, goldH)
+ }
+ }
+}
+
+func TestFormatRecord(t *testing.T) {
+ id := int64(123456789012)
+ text := "hello, world\n"
+ golden := "123456789012\nhello, world\n\n"
+ msg, err := FormatRecord(id, []byte(text))
+ if err != nil {
+ t.Fatalf("FormatRecord: %v", err)
+ }
+ if string(msg) != golden {
+ t.Fatalf("FormatRecord(...) = %q, want %q", msg, golden)
+ }
+
+ var badTexts = []string{
+ "",
+ "hello\nworld",
+ "hello\n\nworld\n",
+ "hello\x01world\n",
+ }
+ for _, bad := range badTexts {
+ msg, err := FormatRecord(id, []byte(bad))
+ if err == nil {
+ t.Errorf("FormatRecord(id, %q) = %q, want error", bad, msg)
+ }
+ }
+}
+
+func TestParseRecord(t *testing.T) {
+ in := "123456789012\nhello, world\n\njunk on end\x01\xff"
+ goldID := int64(123456789012)
+ goldText := "hello, world\n"
+ goldRest := "junk on end\x01\xff"
+ id, text, rest, err := ParseRecord([]byte(in))
+ if id != goldID || string(text) != goldText || string(rest) != goldRest || err != nil {
+ t.Fatalf("ParseRecord(%q) = %d, %q, %q, %v, want %d, %q, %q, nil", in, id, text, rest, err, goldID, goldText, goldRest)
+ }
+
+ in = "123456789012\nhello, world\n\n"
+ id, text, rest, err = ParseRecord([]byte(in))
+ if id != goldID || string(text) != goldText || len(rest) != 0 || err != nil {
+ t.Fatalf("ParseRecord(%q) = %d, %q, %q, %v, want %d, %q, %q, nil", in, id, text, rest, err, goldID, goldText, "")
+ }
+ if rest == nil {
+ t.Fatalf("ParseRecord(%q): rest = []byte(nil), want []byte{}", in)
+ }
+
+ // Check invalid records.
+ var badRecords = []string{
+ "not-" + in,
+ "123\nhello\x01world\n\n",
+ "123\nhello\xffworld\n\n",
+ "123\nhello world\n",
+ "0x123\nhello world\n\n",
+ }
+ for _, bad := range badRecords {
+ id, text, rest, err := ParseRecord([]byte(bad))
+ if err == nil {
+ t.Fatalf("ParseRecord(%q) = %d, %q, %q, nil, want error", in, id, text, rest)
+ }
+ }
+}
diff --git a/sumdb/tlog/tile.go b/sumdb/tlog/tile.go
new file mode 100644
index 0000000..e4aeb14
--- /dev/null
+++ b/sumdb/tlog/tile.go
@@ -0,0 +1,435 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// A Tile is a description of a transparency log tile.
+// A tile of height H at level L offset N lists W consecutive hashes
+// at level H*L of the tree starting at offset N*(2**H).
+// A complete tile lists 2**H hashes; a partial tile lists fewer.
+// Note that a tile represents the entire subtree of height H
+// with those hashes as the leaves. The levels above H*L
+// can be reconstructed by hashing the leaves.
+//
+// Each Tile can be encoded as a “tile coordinate path”
+// of the form tile/H/L/NNN[.p/W].
+// The .p/W suffix is present only for partial tiles, meaning W < 2**H.
+// The NNN element is an encoding of N into 3-digit path elements.
+// All but the last path element begins with an "x".
+// For example,
+// Tile{H: 3, L: 4, N: 1234067, W: 1}'s path
+// is tile/3/4/x001/x234/067.p/1, and
+// Tile{H: 3, L: 4, N: 1234067, W: 8}'s path
+// is tile/3/4/x001/x234/067.
+// See Tile's Path method and the ParseTilePath function.
+//
+// The special level L=-1 holds raw record data instead of hashes.
+// In this case, the level encodes into a tile path as the path element
+// "data" instead of "-1".
+//
+// See also https://golang.org/design/25530-sumdb#checksum-database
+// and https://research.swtch.com/tlog#tiling_a_log.
+type Tile struct {
+ H int // height of tile (1 ≤ H ≤ 30)
+ L int // level in tiling (-1 ≤ L ≤ 63)
+ N int64 // number within level (0 ≤ N, unbounded)
+ W int // width of tile (1 ≤ W ≤ 2**H; 2**H is complete tile)
+}
+
+// TileForIndex returns the tile of fixed height h ≥ 1
+// and least width storing the given hash storage index.
+//
+// If h ≤ 0, TileForIndex panics.
+func TileForIndex(h int, index int64) Tile {
+ if h <= 0 {
+ panic(fmt.Sprintf("TileForIndex: invalid height %d", h))
+ }
+ t, _, _ := tileForIndex(h, index)
+ return t
+}
+
+// tileForIndex returns the tile of height h ≥ 1
+// storing the given hash index, which can be
+// reconstructed using tileHash(data[start:end]).
+func tileForIndex(h int, index int64) (t Tile, start, end int) {
+ level, n := SplitStoredHashIndex(index)
+ t.H = h
+ t.L = level / h
+ level -= t.L * h // now level within tile
+ t.N = n << uint(level) >> uint(t.H)
+ n -= t.N << uint(t.H) >> uint(level) // now n within tile at level
+ t.W = int((n + 1) << uint(level))
+ return t, int(n<<uint(level)) * HashSize, int((n+1)<<uint(level)) * HashSize
+}
+
+// HashFromTile returns the hash at the given storage index,
+// provided that t == TileForIndex(t.H, index) or a wider version,
+// and data is t's tile data (of length at least t.W*HashSize).
+func HashFromTile(t Tile, data []byte, index int64) (Hash, error) {
+ if t.H < 1 || t.H > 30 || t.L < 0 || t.L >= 64 || t.W < 1 || t.W > 1<<uint(t.H) {
+ return Hash{}, fmt.Errorf("invalid tile %v", t.Path())
+ }
+ if len(data) < t.W*HashSize {
+ return Hash{}, fmt.Errorf("data len %d too short for tile %v", len(data), t.Path())
+ }
+ t1, start, end := tileForIndex(t.H, index)
+ if t.L != t1.L || t.N != t1.N || t.W < t1.W {
+ return Hash{}, fmt.Errorf("index %v is in %v not %v", index, t1.Path(), t.Path())
+ }
+ return tileHash(data[start:end]), nil
+}
+
+// tileHash computes the subtree hash corresponding to the (2^K)-1 hashes in data.
+func tileHash(data []byte) Hash {
+ if len(data) == 0 {
+ panic("bad math in tileHash")
+ }
+ if len(data) == HashSize {
+ var h Hash
+ copy(h[:], data)
+ return h
+ }
+ n := len(data) / 2
+ return NodeHash(tileHash(data[:n]), tileHash(data[n:]))
+}
+
+// NewTiles returns the coordinates of the tiles of height h ≥ 1
+// that must be published when publishing from a tree of
+// size newTreeSize to replace a tree of size oldTreeSize.
+// (No tiles need to be published for a tree of size zero.)
+//
+// If h ≤ 0, TileForIndex panics.
+func NewTiles(h int, oldTreeSize, newTreeSize int64) []Tile {
+ if h <= 0 {
+ panic(fmt.Sprintf("NewTiles: invalid height %d", h))
+ }
+ H := uint(h)
+ var tiles []Tile
+ for level := uint(0); newTreeSize>>(H*level) > 0; level++ {
+ oldN := oldTreeSize >> (H * level)
+ newN := newTreeSize >> (H * level)
+ for n := oldN >> H; n < newN>>H; n++ {
+ tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: 1 << H})
+ }
+ n := newN >> H
+ maxW := int(newN - n<<H)
+ minW := 1
+ if oldN > n<<H {
+ minW = int(oldN - n<<H)
+ }
+ for w := minW; w <= maxW; w++ {
+ tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: w})
+ }
+ }
+ return tiles
+}
+
+// ReadTileData reads the hashes for tile t from r
+// and returns the corresponding tile data.
+func ReadTileData(t Tile, r HashReader) ([]byte, error) {
+ size := t.W
+ if size == 0 {
+ size = 1 << uint(t.H)
+ }
+ start := t.N << uint(t.H)
+ indexes := make([]int64, size)
+ for i := 0; i < size; i++ {
+ indexes[i] = StoredHashIndex(t.H*t.L, start+int64(i))
+ }
+
+ hashes, err := r.ReadHashes(indexes)
+ if err != nil {
+ return nil, err
+ }
+ if len(hashes) != len(indexes) {
+ return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
+ }
+
+ tile := make([]byte, size*HashSize)
+ for i := 0; i < size; i++ {
+ copy(tile[i*HashSize:], hashes[i][:])
+ }
+ return tile, nil
+}
+
+// To limit the size of any particular directory listing,
+// we encode the (possibly very large) number N
+// by encoding three digits at a time.
+// For example, 123456789 encodes as x123/x456/789.
+// Each directory has at most 1000 each xNNN, NNN, and NNN.p children,
+// so there are at most 3000 entries in any one directory.
+const pathBase = 1000
+
+// Path returns a tile coordinate path describing t.
+func (t Tile) Path() string {
+ n := t.N
+ nStr := fmt.Sprintf("%03d", n%pathBase)
+ for n >= pathBase {
+ n /= pathBase
+ nStr = fmt.Sprintf("x%03d/%s", n%pathBase, nStr)
+ }
+ pStr := ""
+ if t.W != 1<<uint(t.H) {
+ pStr = fmt.Sprintf(".p/%d", t.W)
+ }
+ var L string
+ if t.L == -1 {
+ L = "data"
+ } else {
+ L = fmt.Sprintf("%d", t.L)
+ }
+ return fmt.Sprintf("tile/%d/%s/%s%s", t.H, L, nStr, pStr)
+}
+
+// ParseTilePath parses a tile coordinate path.
+func ParseTilePath(path string) (Tile, error) {
+ f := strings.Split(path, "/")
+ if len(f) < 4 || f[0] != "tile" {
+ return Tile{}, &badPathError{path}
+ }
+ h, err1 := strconv.Atoi(f[1])
+ isData := false
+ if f[2] == "data" {
+ isData = true
+ f[2] = "0"
+ }
+ l, err2 := strconv.Atoi(f[2])
+ if err1 != nil || err2 != nil || h < 1 || l < 0 || h > 30 {
+ return Tile{}, &badPathError{path}
+ }
+ w := 1 << uint(h)
+ if dotP := f[len(f)-2]; strings.HasSuffix(dotP, ".p") {
+ ww, err := strconv.Atoi(f[len(f)-1])
+ if err != nil || ww <= 0 || ww >= w {
+ return Tile{}, &badPathError{path}
+ }
+ w = ww
+ f[len(f)-2] = dotP[:len(dotP)-len(".p")]
+ f = f[:len(f)-1]
+ }
+ f = f[3:]
+ n := int64(0)
+ for _, s := range f {
+ nn, err := strconv.Atoi(strings.TrimPrefix(s, "x"))
+ if err != nil || nn < 0 || nn >= pathBase {
+ return Tile{}, &badPathError{path}
+ }
+ n = n*pathBase + int64(nn)
+ }
+ if isData {
+ l = -1
+ }
+ t := Tile{H: h, L: l, N: n, W: w}
+ if path != t.Path() {
+ return Tile{}, &badPathError{path}
+ }
+ return t, nil
+}
+
+type badPathError struct {
+ path string
+}
+
+func (e *badPathError) Error() string {
+ return fmt.Sprintf("malformed tile path %q", e.path)
+}
+
+// A TileReader reads tiles from a go.sum database log.
+type TileReader interface {
+ // Height returns the height of the available tiles.
+ Height() int
+
+ // ReadTiles returns the data for each requested tile.
+ // If ReadTiles returns err == nil, it must also return
+ // a data record for each tile (len(data) == len(tiles))
+ // and each data record must be the correct length
+ // (len(data[i]) == tiles[i].W*HashSize).
+ //
+ // An implementation of ReadTiles typically reads
+ // them from an on-disk cache or else from a remote
+ // tile server. Tile data downloaded from a server should
+ // be considered suspect and not saved into a persistent
+ // on-disk cache before returning from ReadTiles.
+ // When the client confirms the validity of the tile data,
+ // it will call SaveTiles to signal that they can be safely
+ // written to persistent storage.
+ // See also https://research.swtch.com/tlog#authenticating_tiles.
+ ReadTiles(tiles []Tile) (data [][]byte, err error)
+
+ // SaveTiles informs the TileReader that the tile data
+ // returned by ReadTiles has been confirmed as valid
+ // and can be saved in persistent storage (on disk).
+ SaveTiles(tiles []Tile, data [][]byte)
+}
+
+// TileHashReader returns a HashReader that satisfies requests
+// by loading tiles of the given tree.
+//
+// The returned HashReader checks that loaded tiles are
+// valid for the given tree. Therefore, any hashes returned
+// by the HashReader are already proven to be in the tree.
+func TileHashReader(tree Tree, tr TileReader) HashReader {
+ return &tileHashReader{tree: tree, tr: tr}
+}
+
+type tileHashReader struct {
+ tree Tree
+ tr TileReader
+}
+
+// tileParent returns t's k'th tile parent in the tiles for a tree of size n.
+// If there is no such parent, tileParent returns Tile{}.
+func tileParent(t Tile, k int, n int64) Tile {
+ t.L += k
+ t.N >>= uint(k * t.H)
+ t.W = 1 << uint(t.H)
+ if max := n >> uint(t.L*t.H); t.N<<uint(t.H)+int64(t.W) >= max {
+ if t.N<<uint(t.H) >= max {
+ return Tile{}
+ }
+ t.W = int(max - t.N<<uint(t.H))
+ }
+ return t
+}
+
+func (r *tileHashReader) ReadHashes(indexes []int64) ([]Hash, error) {
+ h := r.tr.Height()
+
+ tileOrder := make(map[Tile]int) // tileOrder[tileKey(tiles[i])] = i
+ var tiles []Tile
+
+ // Plan to fetch tiles necessary to recompute tree hash.
+ // If it matches, those tiles are authenticated.
+ stx := subTreeIndex(0, r.tree.N, nil)
+ stxTileOrder := make([]int, len(stx))
+ for i, x := range stx {
+ tile, _, _ := tileForIndex(h, x)
+ tile = tileParent(tile, 0, r.tree.N)
+ if j, ok := tileOrder[tile]; ok {
+ stxTileOrder[i] = j
+ continue
+ }
+ stxTileOrder[i] = len(tiles)
+ tileOrder[tile] = len(tiles)
+ tiles = append(tiles, tile)
+ }
+
+ // Plan to fetch tiles containing the indexes,
+ // along with any parent tiles needed
+ // for authentication. For most calls,
+ // the parents are being fetched anyway.
+ indexTileOrder := make([]int, len(indexes))
+ for i, x := range indexes {
+ if x >= StoredHashIndex(0, r.tree.N) {
+ return nil, fmt.Errorf("indexes not in tree")
+ }
+
+ tile, _, _ := tileForIndex(h, x)
+
+ // Walk up parent tiles until we find one we've requested.
+ // That one will be authenticated.
+ k := 0
+ for ; ; k++ {
+ p := tileParent(tile, k, r.tree.N)
+ if j, ok := tileOrder[p]; ok {
+ if k == 0 {
+ indexTileOrder[i] = j
+ }
+ break
+ }
+ }
+
+ // Walk back down recording child tiles after parents.
+ // This loop ends by revisiting the tile for this index
+ // (tileParent(tile, 0, r.tree.N)) unless k == 0, in which
+ // case the previous loop did it.
+ for k--; k >= 0; k-- {
+ p := tileParent(tile, k, r.tree.N)
+ if p.W != 1<<uint(p.H) {
+ // Only full tiles have parents.
+ // This tile has a parent, so it must be full.
+ return nil, fmt.Errorf("bad math in tileHashReader: %d %d %v", r.tree.N, x, p)
+ }
+ tileOrder[p] = len(tiles)
+ if k == 0 {
+ indexTileOrder[i] = len(tiles)
+ }
+ tiles = append(tiles, p)
+ }
+ }
+
+ // Fetch all the tile data.
+ data, err := r.tr.ReadTiles(tiles)
+ if err != nil {
+ return nil, err
+ }
+ if len(data) != len(tiles) {
+ return nil, fmt.Errorf("TileReader returned bad result slice (len=%d, want %d)", len(data), len(tiles))
+ }
+ for i, tile := range tiles {
+ if len(data[i]) != tile.W*HashSize {
+ return nil, fmt.Errorf("TileReader returned bad result slice (%v len=%d, want %d)", tile.Path(), len(data[i]), tile.W*HashSize)
+ }
+ }
+
+ // Authenticate the initial tiles against the tree hash.
+ // They are arranged so that parents are authenticated before children.
+ // First the tiles needed for the tree hash.
+ th, err := HashFromTile(tiles[stxTileOrder[len(stx)-1]], data[stxTileOrder[len(stx)-1]], stx[len(stx)-1])
+ if err != nil {
+ return nil, err
+ }
+ for i := len(stx) - 2; i >= 0; i-- {
+ h, err := HashFromTile(tiles[stxTileOrder[i]], data[stxTileOrder[i]], stx[i])
+ if err != nil {
+ return nil, err
+ }
+ th = NodeHash(h, th)
+ }
+ if th != r.tree.Hash {
+ // The tiles do not support the tree hash.
+ // We know at least one is wrong, but not which one.
+ return nil, fmt.Errorf("downloaded inconsistent tile")
+ }
+
+ // Authenticate full tiles against their parents.
+ for i := len(stx); i < len(tiles); i++ {
+ tile := tiles[i]
+ p := tileParent(tile, 1, r.tree.N)
+ j, ok := tileOrder[p]
+ if !ok {
+ return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost parent of %v", r.tree.N, indexes, tile)
+ }
+ h, err := HashFromTile(p, data[j], StoredHashIndex(p.L*p.H, tile.N))
+ if err != nil {
+ return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash of %v: %v", r.tree.N, indexes, tile, err)
+ }
+ if h != tileHash(data[i]) {
+ return nil, fmt.Errorf("downloaded inconsistent tile")
+ }
+ }
+
+ // Now we have all the tiles needed for the requested hashes,
+ // and we've authenticated the full tile set against the trusted tree hash.
+ r.tr.SaveTiles(tiles, data)
+
+ // Pull out the requested hashes.
+ hashes := make([]Hash, len(indexes))
+ for i, x := range indexes {
+ j := indexTileOrder[i]
+ h, err := HashFromTile(tiles[j], data[j], x)
+ if err != nil {
+ return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash %v: %v", r.tree.N, indexes, x, err)
+ }
+ hashes[i] = h
+ }
+
+ return hashes, nil
+}
diff --git a/sumdb/tlog/tlog.go b/sumdb/tlog/tlog.go
new file mode 100644
index 0000000..a5b3d03
--- /dev/null
+++ b/sumdb/tlog/tlog.go
@@ -0,0 +1,598 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package tlog implements a tamper-evident log
+// used in the Go module go.sum database server.
+//
+// This package follows the design of Certificate Transparency (RFC 6962)
+// and its proofs are compatible with that system.
+// See TestCertificateTransparency.
+//
+package tlog
+
+import (
+ "crypto/sha256"
+ "encoding/base64"
+ "errors"
+ "fmt"
+ "math/bits"
+)
+
+// A Hash is a hash identifying a log record or tree root.
+type Hash [HashSize]byte
+
+// HashSize is the size of a Hash in bytes.
+const HashSize = 32
+
+// String returns a base64 representation of the hash for printing.
+func (h Hash) String() string {
+ return base64.StdEncoding.EncodeToString(h[:])
+}
+
+// MarshalJSON marshals the hash as a JSON string containing the base64-encoded hash.
+func (h Hash) MarshalJSON() ([]byte, error) {
+ return []byte(`"` + h.String() + `"`), nil
+}
+
+// UnmarshalJSON unmarshals a hash from JSON string containing the a base64-encoded hash.
+func (h *Hash) UnmarshalJSON(data []byte) error {
+ if len(data) != 1+44+1 || data[0] != '"' || data[len(data)-2] != '=' || data[len(data)-1] != '"' {
+ return errors.New("cannot decode hash")
+ }
+
+ // As of Go 1.12, base64.StdEncoding.Decode insists on
+ // slicing into target[33:] even when it only writes 32 bytes.
+ // Since we already checked that the hash ends in = above,
+ // we can use base64.RawStdEncoding with the = removed;
+ // RawStdEncoding does not exhibit the same bug.
+ // We decode into a temporary to avoid writing anything to *h
+ // unless the entire input is well-formed.
+ var tmp Hash
+ n, err := base64.RawStdEncoding.Decode(tmp[:], data[1:len(data)-2])
+ if err != nil || n != HashSize {
+ return errors.New("cannot decode hash")
+ }
+ *h = tmp
+ return nil
+}
+
+// ParseHash parses the base64-encoded string form of a hash.
+func ParseHash(s string) (Hash, error) {
+ data, err := base64.StdEncoding.DecodeString(s)
+ if err != nil || len(data) != HashSize {
+ return Hash{}, fmt.Errorf("malformed hash")
+ }
+ var h Hash
+ copy(h[:], data)
+ return h, nil
+}
+
+// maxpow2 returns k, the maximum power of 2 smaller than n,
+// as well as l = log₂ k (so k = 1<<l).
+func maxpow2(n int64) (k int64, l int) {
+ l = 0
+ for 1<<uint(l+1) < n {
+ l++
+ }
+ return 1 << uint(l), l
+}
+
+var zeroPrefix = []byte{0x00}
+
+// RecordHash returns the content hash for the given record data.
+func RecordHash(data []byte) Hash {
+ // SHA256(0x00 || data)
+ // https://tools.ietf.org/html/rfc6962#section-2.1
+ h := sha256.New()
+ h.Write(zeroPrefix)
+ h.Write(data)
+ var h1 Hash
+ h.Sum(h1[:0])
+ return h1
+}
+
+// NodeHash returns the hash for an interior tree node with the given left and right hashes.
+func NodeHash(left, right Hash) Hash {
+ // SHA256(0x01 || left || right)
+ // https://tools.ietf.org/html/rfc6962#section-2.1
+ // We use a stack buffer to assemble the hash input
+ // to avoid allocating a hash struct with sha256.New.
+ var buf [1 + HashSize + HashSize]byte
+ buf[0] = 0x01
+ copy(buf[1:], left[:])
+ copy(buf[1+HashSize:], right[:])
+ return sha256.Sum256(buf[:])
+}
+
+// For information about the stored hash index ordering,
+// see section 3.3 of Crosby and Wallach's paper
+// "Efficient Data Structures for Tamper-Evident Logging".
+// https://www.usenix.org/legacy/event/sec09/tech/full_papers/crosby.pdf
+
+// StoredHashIndex maps the tree coordinates (level, n)
+// to a dense linear ordering that can be used for hash storage.
+// Hash storage implementations that store hashes in sequential
+// storage can use this function to compute where to read or write
+// a given hash.
+func StoredHashIndex(level int, n int64) int64 {
+ // Level L's n'th hash is written right after level L+1's 2n+1'th hash.
+ // Work our way down to the level 0 ordering.
+ // We'll add back the orignal level count at the end.
+ for l := level; l > 0; l-- {
+ n = 2*n + 1
+ }
+
+ // Level 0's n'th hash is written at n+n/2+n/4+... (eventually n/2ⁱ hits zero).
+ i := int64(0)
+ for ; n > 0; n >>= 1 {
+ i += n
+ }
+
+ return i + int64(level)
+}
+
+// SplitStoredHashIndex is the inverse of StoredHashIndex.
+// That is, SplitStoredHashIndex(StoredHashIndex(level, n)) == level, n.
+func SplitStoredHashIndex(index int64) (level int, n int64) {
+ // Determine level 0 record before index.
+ // StoredHashIndex(0, n) < 2*n,
+ // so the n we want is in [index/2, index/2+log₂(index)].
+ n = index / 2
+ indexN := StoredHashIndex(0, n)
+ if indexN > index {
+ panic("bad math")
+ }
+ for {
+ // Each new record n adds 1 + trailingZeros(n) hashes.
+ x := indexN + 1 + int64(bits.TrailingZeros64(uint64(n+1)))
+ if x > index {
+ break
+ }
+ n++
+ indexN = x
+ }
+ // The hash we want was commited with record n,
+ // meaning it is one of (0, n), (1, n/2), (2, n/4), ...
+ level = int(index - indexN)
+ return level, n >> uint(level)
+}
+
+// StoredHashCount returns the number of stored hashes
+// that are expected for a tree with n records.
+func StoredHashCount(n int64) int64 {
+ if n == 0 {
+ return 0
+ }
+ // The tree will have the hashes up to the last leaf hash.
+ numHash := StoredHashIndex(0, n-1) + 1
+ // And it will have any hashes for subtrees completed by that leaf.
+ for i := uint64(n - 1); i&1 != 0; i >>= 1 {
+ numHash++
+ }
+ return numHash
+}
+
+// StoredHashes returns the hashes that must be stored when writing
+// record n with the given data. The hashes should be stored starting
+// at StoredHashIndex(0, n). The result will have at most 1 + log₂ n hashes,
+// but it will average just under two per call for a sequence of calls for n=1..k.
+//
+// StoredHashes may read up to log n earlier hashes from r
+// in order to compute hashes for completed subtrees.
+func StoredHashes(n int64, data []byte, r HashReader) ([]Hash, error) {
+ return StoredHashesForRecordHash(n, RecordHash(data), r)
+}
+
+// StoredHashesForRecordHash is like StoredHashes but takes
+// as its second argument RecordHash(data) instead of data itself.
+func StoredHashesForRecordHash(n int64, h Hash, r HashReader) ([]Hash, error) {
+ // Start with the record hash.
+ hashes := []Hash{h}
+
+ // Build list of indexes needed for hashes for completed subtrees.
+ // Each trailing 1 bit in the binary representation of n completes a subtree
+ // and consumes a hash from an adjacent subtree.
+ m := int(bits.TrailingZeros64(uint64(n + 1)))
+ indexes := make([]int64, m)
+ for i := 0; i < m; i++ {
+ // We arrange indexes in sorted order.
+ // Note that n>>i is always odd.
+ indexes[m-1-i] = StoredHashIndex(i, n>>uint(i)-1)
+ }
+
+ // Fetch hashes.
+ old, err := r.ReadHashes(indexes)
+ if err != nil {
+ return nil, err
+ }
+ if len(old) != len(indexes) {
+ return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(old))
+ }
+
+ // Build new hashes.
+ for i := 0; i < m; i++ {
+ h = NodeHash(old[m-1-i], h)
+ hashes = append(hashes, h)
+ }
+ return hashes, nil
+}
+
+// A HashReader can read hashes for nodes in the log's tree structure.
+type HashReader interface {
+ // ReadHashes returns the hashes with the given stored hash indexes
+ // (see StoredHashIndex and SplitStoredHashIndex).
+ // ReadHashes must return a slice of hashes the same length as indexes,
+ // or else it must return a non-nil error.
+ // ReadHashes may run faster if indexes is sorted in increasing order.
+ ReadHashes(indexes []int64) ([]Hash, error)
+}
+
+// A HashReaderFunc is a function implementing HashReader.
+type HashReaderFunc func([]int64) ([]Hash, error)
+
+func (f HashReaderFunc) ReadHashes(indexes []int64) ([]Hash, error) {
+ return f(indexes)
+}
+
+// TreeHash computes the hash for the root of the tree with n records,
+// using the HashReader to obtain previously stored hashes
+// (those returned by StoredHashes during the writes of those n records).
+// TreeHash makes a single call to ReadHash requesting at most 1 + log₂ n hashes.
+// The tree of size zero is defined to have an all-zero Hash.
+func TreeHash(n int64, r HashReader) (Hash, error) {
+ if n == 0 {
+ return Hash{}, nil
+ }
+ indexes := subTreeIndex(0, n, nil)
+ hashes, err := r.ReadHashes(indexes)
+ if err != nil {
+ return Hash{}, err
+ }
+ if len(hashes) != len(indexes) {
+ return Hash{}, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
+ }
+ hash, hashes := subTreeHash(0, n, hashes)
+ if len(hashes) != 0 {
+ panic("tlog: bad index math in TreeHash")
+ }
+ return hash, nil
+}
+
+// subTreeIndex returns the storage indexes needed to compute
+// the hash for the subtree containing records [lo, hi),
+// appending them to need and returning the result.
+// See https://tools.ietf.org/html/rfc6962#section-2.1
+func subTreeIndex(lo, hi int64, need []int64) []int64 {
+ // See subTreeHash below for commentary.
+ for lo < hi {
+ k, level := maxpow2(hi - lo + 1)
+ if lo&(k-1) != 0 {
+ panic("tlog: bad math in subTreeIndex")
+ }
+ need = append(need, StoredHashIndex(level, lo>>uint(level)))
+ lo += k
+ }
+ return need
+}
+
+// subTreeHash computes the hash for the subtree containing records [lo, hi),
+// assuming that hashes are the hashes corresponding to the indexes
+// returned by subTreeIndex(lo, hi).
+// It returns any leftover hashes.
+func subTreeHash(lo, hi int64, hashes []Hash) (Hash, []Hash) {
+ // Repeatedly partition the tree into a left side with 2^level nodes,
+ // for as large a level as possible, and a right side with the fringe.
+ // The left hash is stored directly and can be read from storage.
+ // The right side needs further computation.
+ numTree := 0
+ for lo < hi {
+ k, _ := maxpow2(hi - lo + 1)
+ if lo&(k-1) != 0 || lo >= hi {
+ panic("tlog: bad math in subTreeHash")
+ }
+ numTree++
+ lo += k
+ }
+
+ if len(hashes) < numTree {
+ panic("tlog: bad index math in subTreeHash")
+ }
+
+ // Reconstruct hash.
+ h := hashes[numTree-1]
+ for i := numTree - 2; i >= 0; i-- {
+ h = NodeHash(hashes[i], h)
+ }
+ return h, hashes[numTree:]
+}
+
+// A RecordProof is a verifiable proof that a particular log root contains a particular record.
+// RFC 6962 calls this a “Merkle audit path.”
+type RecordProof []Hash
+
+// ProveRecord returns the proof that the tree of size t contains the record with index n.
+func ProveRecord(t, n int64, r HashReader) (RecordProof, error) {
+ if t < 0 || n < 0 || n >= t {
+ return nil, fmt.Errorf("tlog: invalid inputs in ProveRecord")
+ }
+ indexes := leafProofIndex(0, t, n, nil)
+ if len(indexes) == 0 {
+ return RecordProof{}, nil
+ }
+ hashes, err := r.ReadHashes(indexes)
+ if err != nil {
+ return nil, err
+ }
+ if len(hashes) != len(indexes) {
+ return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
+ }
+
+ p, hashes := leafProof(0, t, n, hashes)
+ if len(hashes) != 0 {
+ panic("tlog: bad index math in ProveRecord")
+ }
+ return p, nil
+}
+
+// leafProofIndex builds the list of indexes needed to construct the proof
+// that leaf n is contained in the subtree with leaves [lo, hi).
+// It appends those indexes to need and returns the result.
+// See https://tools.ietf.org/html/rfc6962#section-2.1.1
+func leafProofIndex(lo, hi, n int64, need []int64) []int64 {
+ // See leafProof below for commentary.
+ if !(lo <= n && n < hi) {
+ panic("tlog: bad math in leafProofIndex")
+ }
+ if lo+1 == hi {
+ return need
+ }
+ if k, _ := maxpow2(hi - lo); n < lo+k {
+ need = leafProofIndex(lo, lo+k, n, need)
+ need = subTreeIndex(lo+k, hi, need)
+ } else {
+ need = subTreeIndex(lo, lo+k, need)
+ need = leafProofIndex(lo+k, hi, n, need)
+ }
+ return need
+}
+
+// leafProof constructs the proof that leaf n is contained in the subtree with leaves [lo, hi).
+// It returns any leftover hashes as well.
+// See https://tools.ietf.org/html/rfc6962#section-2.1.1
+func leafProof(lo, hi, n int64, hashes []Hash) (RecordProof, []Hash) {
+ // We must have lo <= n < hi or else the code here has a bug.
+ if !(lo <= n && n < hi) {
+ panic("tlog: bad math in leafProof")
+ }
+
+ if lo+1 == hi { // n == lo
+ // Reached the leaf node.
+ // The verifier knows what the leaf hash is, so we don't need to send it.
+ return RecordProof{}, hashes
+ }
+
+ // Walk down the tree toward n.
+ // Record the hash of the path not taken (needed for verifying the proof).
+ var p RecordProof
+ var th Hash
+ if k, _ := maxpow2(hi - lo); n < lo+k {
+ // n is on left side
+ p, hashes = leafProof(lo, lo+k, n, hashes)
+ th, hashes = subTreeHash(lo+k, hi, hashes)
+ } else {
+ // n is on right side
+ th, hashes = subTreeHash(lo, lo+k, hashes)
+ p, hashes = leafProof(lo+k, hi, n, hashes)
+ }
+ return append(p, th), hashes
+}
+
+var errProofFailed = errors.New("invalid transparency proof")
+
+// CheckRecord verifies that p is a valid proof that the tree of size t
+// with hash th has an n'th record with hash h.
+func CheckRecord(p RecordProof, t int64, th Hash, n int64, h Hash) error {
+ if t < 0 || n < 0 || n >= t {
+ return fmt.Errorf("tlog: invalid inputs in CheckRecord")
+ }
+ th2, err := runRecordProof(p, 0, t, n, h)
+ if err != nil {
+ return err
+ }
+ if th2 == th {
+ return nil
+ }
+ return errProofFailed
+}
+
+// runRecordProof runs the proof p that leaf n is contained in the subtree with leaves [lo, hi).
+// Running the proof means constructing and returning the implied hash of that
+// subtree.
+func runRecordProof(p RecordProof, lo, hi, n int64, leafHash Hash) (Hash, error) {
+ // We must have lo <= n < hi or else the code here has a bug.
+ if !(lo <= n && n < hi) {
+ panic("tlog: bad math in runRecordProof")
+ }
+
+ if lo+1 == hi { // m == lo
+ // Reached the leaf node.
+ // The proof must not have any unnecessary hashes.
+ if len(p) != 0 {
+ return Hash{}, errProofFailed
+ }
+ return leafHash, nil
+ }
+
+ if len(p) == 0 {
+ return Hash{}, errProofFailed
+ }
+
+ k, _ := maxpow2(hi - lo)
+ if n < lo+k {
+ th, err := runRecordProof(p[:len(p)-1], lo, lo+k, n, leafHash)
+ if err != nil {
+ return Hash{}, err
+ }
+ return NodeHash(th, p[len(p)-1]), nil
+ } else {
+ th, err := runRecordProof(p[:len(p)-1], lo+k, hi, n, leafHash)
+ if err != nil {
+ return Hash{}, err
+ }
+ return NodeHash(p[len(p)-1], th), nil
+ }
+}
+
+// A TreeProof is a verifiable proof that a particular log tree contains
+// as a prefix all records present in an earlier tree.
+// RFC 6962 calls this a “Merkle consistency proof.”
+type TreeProof []Hash
+
+// ProveTree returns the proof that the tree of size t contains
+// as a prefix all the records from the tree of smaller size n.
+func ProveTree(t, n int64, h HashReader) (TreeProof, error) {
+ if t < 1 || n < 1 || n > t {
+ return nil, fmt.Errorf("tlog: invalid inputs in ProveTree")
+ }
+ indexes := treeProofIndex(0, t, n, nil)
+ if len(indexes) == 0 {
+ return TreeProof{}, nil
+ }
+ hashes, err := h.ReadHashes(indexes)
+ if err != nil {
+ return nil, err
+ }
+ if len(hashes) != len(indexes) {
+ return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
+ }
+
+ p, hashes := treeProof(0, t, n, hashes)
+ if len(hashes) != 0 {
+ panic("tlog: bad index math in ProveTree")
+ }
+ return p, nil
+}
+
+// treeProofIndex builds the list of indexes needed to construct
+// the sub-proof related to the subtree containing records [lo, hi).
+// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
+func treeProofIndex(lo, hi, n int64, need []int64) []int64 {
+ // See treeProof below for commentary.
+ if !(lo < n && n <= hi) {
+ panic("tlog: bad math in treeProofIndex")
+ }
+
+ if n == hi {
+ if lo == 0 {
+ return need
+ }
+ return subTreeIndex(lo, hi, need)
+ }
+
+ if k, _ := maxpow2(hi - lo); n <= lo+k {
+ need = treeProofIndex(lo, lo+k, n, need)
+ need = subTreeIndex(lo+k, hi, need)
+ } else {
+ need = subTreeIndex(lo, lo+k, need)
+ need = treeProofIndex(lo+k, hi, n, need)
+ }
+ return need
+}
+
+// treeProof constructs the sub-proof related to the subtree containing records [lo, hi).
+// It returns any leftover hashes as well.
+// See https://tools.ietf.org/html/rfc6962#section-2.1.2.
+func treeProof(lo, hi, n int64, hashes []Hash) (TreeProof, []Hash) {
+ // We must have lo < n <= hi or else the code here has a bug.
+ if !(lo < n && n <= hi) {
+ panic("tlog: bad math in treeProof")
+ }
+
+ // Reached common ground.
+ if n == hi {
+ if lo == 0 {
+ // This subtree corresponds exactly to the old tree.
+ // The verifier knows that hash, so we don't need to send it.
+ return TreeProof{}, hashes
+ }
+ th, hashes := subTreeHash(lo, hi, hashes)
+ return TreeProof{th}, hashes
+ }
+
+ // Interior node for the proof.
+ // Decide whether to walk down the left or right side.
+ var p TreeProof
+ var th Hash
+ if k, _ := maxpow2(hi - lo); n <= lo+k {
+ // m is on left side
+ p, hashes = treeProof(lo, lo+k, n, hashes)
+ th, hashes = subTreeHash(lo+k, hi, hashes)
+ } else {
+ // m is on right side
+ th, hashes = subTreeHash(lo, lo+k, hashes)
+ p, hashes = treeProof(lo+k, hi, n, hashes)
+ }
+ return append(p, th), hashes
+}
+
+// CheckTree verifies that p is a valid proof that the tree of size t with hash th
+// contains as a prefix the tree of size n with hash h.
+func CheckTree(p TreeProof, t int64, th Hash, n int64, h Hash) error {
+ if t < 1 || n < 1 || n > t {
+ return fmt.Errorf("tlog: invalid inputs in CheckTree")
+ }
+ h2, th2, err := runTreeProof(p, 0, t, n, h)
+ if err != nil {
+ return err
+ }
+ if th2 == th && h2 == h {
+ return nil
+ }
+ return errProofFailed
+}
+
+// runTreeProof runs the sub-proof p related to the subtree containing records [lo, hi),
+// where old is the hash of the old tree with n records.
+// Running the proof means constructing and returning the implied hashes of that
+// subtree in both the old and new tree.
+func runTreeProof(p TreeProof, lo, hi, n int64, old Hash) (Hash, Hash, error) {
+ // We must have lo < n <= hi or else the code here has a bug.
+ if !(lo < n && n <= hi) {
+ panic("tlog: bad math in runTreeProof")
+ }
+
+ // Reached common ground.
+ if n == hi {
+ if lo == 0 {
+ if len(p) != 0 {
+ return Hash{}, Hash{}, errProofFailed
+ }
+ return old, old, nil
+ }
+ if len(p) != 1 {
+ return Hash{}, Hash{}, errProofFailed
+ }
+ return p[0], p[0], nil
+ }
+
+ if len(p) == 0 {
+ return Hash{}, Hash{}, errProofFailed
+ }
+
+ // Interior node for the proof.
+ k, _ := maxpow2(hi - lo)
+ if n <= lo+k {
+ oh, th, err := runTreeProof(p[:len(p)-1], lo, lo+k, n, old)
+ if err != nil {
+ return Hash{}, Hash{}, err
+ }
+ return oh, NodeHash(th, p[len(p)-1]), nil
+ } else {
+ oh, th, err := runTreeProof(p[:len(p)-1], lo+k, hi, n, old)
+ if err != nil {
+ return Hash{}, Hash{}, err
+ }
+ return NodeHash(p[len(p)-1], oh), NodeHash(p[len(p)-1], th), nil
+ }
+}
diff --git a/sumdb/tlog/tlog_test.go b/sumdb/tlog/tlog_test.go
new file mode 100644
index 0000000..584e728
--- /dev/null
+++ b/sumdb/tlog/tlog_test.go
@@ -0,0 +1,269 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tlog
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+)
+
+type testHashStorage []Hash
+
+func (t testHashStorage) ReadHash(level int, n int64) (Hash, error) {
+ return t[StoredHashIndex(level, n)], nil
+}
+
+func (t testHashStorage) ReadHashes(index []int64) ([]Hash, error) {
+ // It's not required by HashReader that indexes be in increasing order,
+ // but check that the functions we are testing only ever ask for
+ // indexes in increasing order.
+ for i := 1; i < len(index); i++ {
+ if index[i-1] >= index[i] {
+ panic("indexes out of order")
+ }
+ }
+
+ out := make([]Hash, len(index))
+ for i, x := range index {
+ out[i] = t[x]
+ }
+ return out, nil
+}
+
+type testTilesStorage struct {
+ unsaved int
+ m map[Tile][]byte
+}
+
+func (t testTilesStorage) Height() int {
+ return 2
+}
+
+func (t *testTilesStorage) SaveTiles(tiles []Tile, data [][]byte) {
+ t.unsaved -= len(tiles)
+}
+
+func (t *testTilesStorage) ReadTiles(tiles []Tile) ([][]byte, error) {
+ out := make([][]byte, len(tiles))
+ for i, tile := range tiles {
+ out[i] = t.m[tile]
+ }
+ t.unsaved += len(tiles)
+ return out, nil
+}
+
+func TestTree(t *testing.T) {
+ var trees []Hash
+ var leafhashes []Hash
+ var storage testHashStorage
+ tiles := make(map[Tile][]byte)
+ const testH = 2
+ for i := int64(0); i < 100; i++ {
+ data := []byte(fmt.Sprintf("leaf %d", i))
+ hashes, err := StoredHashes(i, data, storage)
+ if err != nil {
+ t.Fatal(err)
+ }
+ leafhashes = append(leafhashes, RecordHash(data))
+ oldStorage := len(storage)
+ storage = append(storage, hashes...)
+ if count := StoredHashCount(i + 1); count != int64(len(storage)) {
+ t.Errorf("StoredHashCount(%d) = %d, have %d StoredHashes", i+1, count, len(storage))
+ }
+ th, err := TreeHash(i+1, storage)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for _, tile := range NewTiles(testH, i, i+1) {
+ data, err := ReadTileData(tile, storage)
+ if err != nil {
+ t.Fatal(err)
+ }
+ old := Tile{H: tile.H, L: tile.L, N: tile.N, W: tile.W - 1}
+ oldData := tiles[old]
+ if len(oldData) != len(data)-HashSize || !bytes.Equal(oldData, data[:len(oldData)]) {
+ t.Fatalf("tile %v not extending earlier tile %v", tile.Path(), old.Path())
+ }
+ tiles[tile] = data
+ }
+ for _, tile := range NewTiles(testH, 0, i+1) {
+ data, err := ReadTileData(tile, storage)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !bytes.Equal(tiles[tile], data) {
+ t.Fatalf("mismatch at %+v", tile)
+ }
+ }
+ for _, tile := range NewTiles(testH, i/2, i+1) {
+ data, err := ReadTileData(tile, storage)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !bytes.Equal(tiles[tile], data) {
+ t.Fatalf("mismatch at %+v", tile)
+ }
+ }
+
+ // Check that all the new hashes are readable from their tiles.
+ for j := oldStorage; j < len(storage); j++ {
+ tile := TileForIndex(testH, int64(j))
+ data, ok := tiles[tile]
+ if !ok {
+ t.Log(NewTiles(testH, 0, i+1))
+ t.Fatalf("TileForIndex(%d, %d) = %v, not yet stored (i=%d, stored %d)", testH, j, tile.Path(), i, len(storage))
+ continue
+ }
+ h, err := HashFromTile(tile, data, int64(j))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if h != storage[j] {
+ t.Errorf("HashFromTile(%v, %d) = %v, want %v", tile.Path(), int64(j), h, storage[j])
+ }
+ }
+
+ trees = append(trees, th)
+
+ // Check that leaf proofs work, for all trees and leaves so far.
+ for j := int64(0); j <= i; j++ {
+ p, err := ProveRecord(i+1, j, storage)
+ if err != nil {
+ t.Fatalf("ProveRecord(%d, %d): %v", i+1, j, err)
+ }
+ if err := CheckRecord(p, i+1, th, j, leafhashes[j]); err != nil {
+ t.Fatalf("CheckRecord(%d, %d): %v", i+1, j, err)
+ }
+ for k := range p {
+ p[k][0] ^= 1
+ if err := CheckRecord(p, i+1, th, j, leafhashes[j]); err == nil {
+ t.Fatalf("CheckRecord(%d, %d) succeeded with corrupt proof hash #%d!", i+1, j, k)
+ }
+ p[k][0] ^= 1
+ }
+ }
+
+ // Check that leaf proofs work using TileReader.
+ // To prove a leaf that way, all you have to do is read and verify its hash.
+ storage := &testTilesStorage{m: tiles}
+ thr := TileHashReader(Tree{i + 1, th}, storage)
+ for j := int64(0); j <= i; j++ {
+ h, err := thr.ReadHashes([]int64{StoredHashIndex(0, j)})
+ if err != nil {
+ t.Fatalf("TileHashReader(%d).ReadHashes(%d): %v", i+1, j, err)
+ }
+ if h[0] != leafhashes[j] {
+ t.Fatalf("TileHashReader(%d).ReadHashes(%d) returned wrong hash", i+1, j)
+ }
+
+ // Even though reading the hash suffices,
+ // check we can generate the proof too.
+ p, err := ProveRecord(i+1, j, thr)
+ if err != nil {
+ t.Fatalf("ProveRecord(%d, %d, TileHashReader(%d)): %v", i+1, j, i+1, err)
+ }
+ if err := CheckRecord(p, i+1, th, j, leafhashes[j]); err != nil {
+ t.Fatalf("CheckRecord(%d, %d, TileHashReader(%d)): %v", i+1, j, i+1, err)
+ }
+ }
+ if storage.unsaved != 0 {
+ t.Fatalf("TileHashReader(%d) did not save %d tiles", i+1, storage.unsaved)
+ }
+
+ // Check that ReadHashes will give an error if the index is not in the tree.
+ if _, err := thr.ReadHashes([]int64{(i + 1) * 2}); err == nil {
+ t.Fatalf("TileHashReader(%d).ReadHashes(%d) for index not in tree <nil>, want err", i, i+1)
+ }
+ if storage.unsaved != 0 {
+ t.Fatalf("TileHashReader(%d) did not save %d tiles", i+1, storage.unsaved)
+ }
+
+ // Check that tree proofs work, for all trees so far, using TileReader.
+ // To prove a tree that way, all you have to do is compute and verify its hash.
+ for j := int64(0); j <= i; j++ {
+ h, err := TreeHash(j+1, thr)
+ if err != nil {
+ t.Fatalf("TreeHash(%d, TileHashReader(%d)): %v", j, i+1, err)
+ }
+ if h != trees[j] {
+ t.Fatalf("TreeHash(%d, TileHashReader(%d)) = %x, want %x (%v)", j, i+1, h[:], trees[j][:], trees[j])
+ }
+
+ // Even though computing the subtree hash suffices,
+ // check that we can generate the proof too.
+ p, err := ProveTree(i+1, j+1, thr)
+ if err != nil {
+ t.Fatalf("ProveTree(%d, %d): %v", i+1, j+1, err)
+ }
+ if err := CheckTree(p, i+1, th, j+1, trees[j]); err != nil {
+ t.Fatalf("CheckTree(%d, %d): %v [%v]", i+1, j+1, err, p)
+ }
+ for k := range p {
+ p[k][0] ^= 1
+ if err := CheckTree(p, i+1, th, j+1, trees[j]); err == nil {
+ t.Fatalf("CheckTree(%d, %d) succeeded with corrupt proof hash #%d!", i+1, j+1, k)
+ }
+ p[k][0] ^= 1
+ }
+ }
+ if storage.unsaved != 0 {
+ t.Fatalf("TileHashReader(%d) did not save %d tiles", i+1, storage.unsaved)
+ }
+ }
+}
+
+func TestSplitStoredHashIndex(t *testing.T) {
+ for l := 0; l < 10; l++ {
+ for n := int64(0); n < 100; n++ {
+ x := StoredHashIndex(l, n)
+ l1, n1 := SplitStoredHashIndex(x)
+ if l1 != l || n1 != n {
+ t.Fatalf("StoredHashIndex(%d, %d) = %d, but SplitStoredHashIndex(%d) = %d, %d", l, n, x, x, l1, n1)
+ }
+ }
+ }
+}
+
+// TODO(rsc): Test invalid paths too, like "tile/3/5/123/456/078".
+var tilePaths = []struct {
+ path string
+ tile Tile
+}{
+ {"tile/4/0/001", Tile{4, 0, 1, 16}},
+ {"tile/4/0/001.p/5", Tile{4, 0, 1, 5}},
+ {"tile/3/5/x123/x456/078", Tile{3, 5, 123456078, 8}},
+ {"tile/3/5/x123/x456/078.p/2", Tile{3, 5, 123456078, 2}},
+ {"tile/1/0/x003/x057/500", Tile{1, 0, 3057500, 2}},
+ {"tile/3/5/123/456/078", Tile{}},
+ {"tile/3/-1/123/456/078", Tile{}},
+ {"tile/1/data/x003/x057/500", Tile{1, -1, 3057500, 2}},
+}
+
+func TestTilePath(t *testing.T) {
+ for _, tt := range tilePaths {
+ if tt.tile.H > 0 {
+ p := tt.tile.Path()
+ if p != tt.path {
+ t.Errorf("%+v.Path() = %q, want %q", tt.tile, p, tt.path)
+ }
+ }
+ tile, err := ParseTilePath(tt.path)
+ if err != nil {
+ if tt.tile.H == 0 {
+ // Expected error.
+ continue
+ }
+ t.Errorf("ParseTilePath(%q): %v", tt.path, err)
+ } else if tile != tt.tile {
+ if tt.tile.H == 0 {
+ t.Errorf("ParseTilePath(%q): expected error, got %+v", tt.path, tt.tile)
+ continue
+ }
+ t.Errorf("ParseTilePath(%q) = %+v, want %+v", tt.path, tile, tt.tile)
+ }
+ }
+}