sumdb/dirhash: directory tree hash algorithm
Copied from cmd/go/internal/dirhash, with additional doc comments.
For golang/go#31761.
Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
Reviewed-on: https://go-review.googlesource.com/c/mod/+/176464
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
diff --git a/sumdb/dirhash/hash.go b/sumdb/dirhash/hash.go
new file mode 100644
index 0000000..ef5df6f
--- /dev/null
+++ b/sumdb/dirhash/hash.go
@@ -0,0 +1,132 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package dirhash defines hashes over directory trees.
+// These hashes are recorded in go.sum files and in the Go checksum database,
+// to allow verifying that a newly-downloaded module has the expected content.
+package dirhash
+
+import (
+ "archive/zip"
+ "crypto/sha256"
+ "encoding/base64"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+)
+
+// DefaultHash is the default hash function used in new go.sum entries.
+var DefaultHash Hash = Hash1
+
+// A Hash is a directory hash function.
+// It accepts a list of files along with a function that opens the content of each file.
+// It opens, reads, hashes, and closes each file and returns the overall directory hash.
+type Hash func(files []string, open func(string) (io.ReadCloser, error)) (string, error)
+
+// Hash1 is the "h1:" directory hash function, using SHA-256.
+//
+// Hash1 is "h1:" followed by the base64-encoded SHA-256 hash of a summary
+// prepared as if by the Unix command:
+//
+// find . -type f | sort | sha256sum
+//
+// More precisely, the hashed summary contains a single line for each file in the list,
+// ordered by sort.Strings applied to the file names, where each line consists of
+// the hexadecimal SHA-256 hash of the file content,
+// two spaces (U+0020), the file name, and a newline (U+000A).
+//
+// File names with newlines (U+000A) are disallowed.
+func Hash1(files []string, open func(string) (io.ReadCloser, error)) (string, error) {
+ h := sha256.New()
+ files = append([]string(nil), files...)
+ sort.Strings(files)
+ for _, file := range files {
+ if strings.Contains(file, "\n") {
+ return "", errors.New("dirhash: filenames with newlines are not supported")
+ }
+ r, err := open(file)
+ if err != nil {
+ return "", err
+ }
+ hf := sha256.New()
+ _, err = io.Copy(hf, r)
+ r.Close()
+ if err != nil {
+ return "", err
+ }
+ fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file)
+ }
+ return "h1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)), nil
+}
+
+// HashDir returns the hash of the local file system directory dir,
+// replacing the directory name itself with prefix in the file names
+// used in the hash function.
+func HashDir(dir, prefix string, hash Hash) (string, error) {
+ files, err := DirFiles(dir, prefix)
+ if err != nil {
+ return "", err
+ }
+ osOpen := func(name string) (io.ReadCloser, error) {
+ return os.Open(filepath.Join(dir, strings.TrimPrefix(name, prefix)))
+ }
+ return hash(files, osOpen)
+}
+
+// DirFiles returns the list of files in the tree rooted at dir,
+// replacing the directory name dir with prefix in each name.
+// The resulting names always use forward slashes.
+func DirFiles(dir, prefix string) ([]string, error) {
+ var files []string
+ dir = filepath.Clean(dir)
+ err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ return nil
+ }
+ rel := file
+ if dir != "." {
+ rel = file[len(dir)+1:]
+ }
+ f := filepath.Join(prefix, rel)
+ files = append(files, filepath.ToSlash(f))
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return files, nil
+}
+
+// HashZip returns the hash of the file content in the named zip file.
+// Only the file names and their contents are included in the hash:
+// the exact zip file format encoding, compression method,
+// per-file modification times, and other metadata are ignored.
+func HashZip(zipfile string, hash Hash) (string, error) {
+ z, err := zip.OpenReader(zipfile)
+ if err != nil {
+ return "", err
+ }
+ defer z.Close()
+ var files []string
+ zfiles := make(map[string]*zip.File)
+ for _, file := range z.File {
+ files = append(files, file.Name)
+ zfiles[file.Name] = file
+ }
+ zipOpen := func(name string) (io.ReadCloser, error) {
+ f := zfiles[name]
+ if f == nil {
+ return nil, fmt.Errorf("file %q not found in zip", name) // should never happen
+ }
+ return f.Open()
+ }
+ return hash(files, zipOpen)
+}
diff --git a/sumdb/dirhash/hash_test.go b/sumdb/dirhash/hash_test.go
new file mode 100644
index 0000000..ed463c1
--- /dev/null
+++ b/sumdb/dirhash/hash_test.go
@@ -0,0 +1,135 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dirhash
+
+import (
+ "archive/zip"
+ "crypto/sha256"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func h(s string) string {
+ return fmt.Sprintf("%x", sha256.Sum256([]byte(s)))
+}
+
+func htop(k string, s string) string {
+ sum := sha256.Sum256([]byte(s))
+ return k + ":" + base64.StdEncoding.EncodeToString(sum[:])
+}
+
+func TestHash1(t *testing.T) {
+ files := []string{"xyz", "abc"}
+ open := func(name string) (io.ReadCloser, error) {
+ return ioutil.NopCloser(strings.NewReader("data for " + name)), nil
+ }
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "abc", h("data for xyz"), "xyz"))
+ out, err := Hash1(files, open)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if out != want {
+ t.Errorf("Hash1(...) = %s, want %s", out, want)
+ }
+
+ _, err = Hash1([]string{"xyz", "a\nbc"}, open)
+ if err == nil {
+ t.Error("Hash1: expected error on newline in filenames")
+ }
+}
+
+func TestHashDir(t *testing.T) {
+ dir, err := ioutil.TempDir("", "dirhash-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ if err := ioutil.WriteFile(filepath.Join(dir, "xyz"), []byte("data for xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "abc"), []byte("data for abc"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "prefix/abc", h("data for xyz"), "prefix/xyz"))
+ out, err := HashDir(dir, "prefix", Hash1)
+ if err != nil {
+ t.Fatalf("HashDir: %v", err)
+ }
+ if out != want {
+ t.Errorf("HashDir(...) = %s, want %s", out, want)
+ }
+}
+
+func TestHashZip(t *testing.T) {
+ f, err := ioutil.TempFile("", "dirhash-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(f.Name())
+ defer f.Close()
+
+ z := zip.NewWriter(f)
+ w, err := z.Create("prefix/xyz")
+ if err != nil {
+ t.Fatal(err)
+ }
+ w.Write([]byte("data for xyz"))
+ w, err = z.Create("prefix/abc")
+ if err != nil {
+ t.Fatal(err)
+ }
+ w.Write([]byte("data for abc"))
+ if err := z.Close(); err != nil {
+ t.Fatal(err)
+ }
+ if err := f.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "prefix/abc", h("data for xyz"), "prefix/xyz"))
+ out, err := HashZip(f.Name(), Hash1)
+ if err != nil {
+ t.Fatalf("HashDir: %v", err)
+ }
+ if out != want {
+ t.Errorf("HashDir(...) = %s, want %s", out, want)
+ }
+}
+
+func TestDirFiles(t *testing.T) {
+ dir, err := ioutil.TempDir("", "dirfiles-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ if err := ioutil.WriteFile(filepath.Join(dir, "xyz"), []byte("data for xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "abc"), []byte("data for abc"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := os.Mkdir(filepath.Join(dir, "subdir"), 0777); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "subdir", "xyz"), []byte("data for subdir xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ prefix := "foo/bar@v2.3.4"
+ out, err := DirFiles(dir, prefix)
+ if err != nil {
+ t.Fatalf("DirFiles: %v", err)
+ }
+ for _, file := range out {
+ if !strings.HasPrefix(file, prefix) {
+ t.Errorf("Dir file = %s, want prefix %s", file, prefix)
+ }
+ }
+}