internal/fetch: use FS for large forks
Replace ZipSignature with FSSignature.
For golang/go#47834
Change-Id: Ie6cc6053a21260b3e63a1b47359a95731d1be63b
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/343951
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 91806f7..b691e31 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -10,6 +10,7 @@
"context"
"errors"
"fmt"
+ "io/fs"
"net/http"
"path"
"sort"
@@ -254,7 +255,11 @@
// see if this is a fork. The intent is to avoid processing certain known
// large modules, not to find every fork.
if !fr.HasGoMod {
- forkedModule, err := forkedFrom(zipReader, fr.ModulePath, fr.ResolvedVersion)
+ contentsDir, err := fs.Sub(zipReader, fr.ModulePath+"@"+fr.ResolvedVersion)
+ if err != nil {
+ return fi, err
+ }
+ forkedModule, err := forkedFrom(contentsDir, fr.ModulePath, fr.ResolvedVersion)
if err != nil {
return fi, err
}
diff --git a/internal/fetch/gen_zip_signatures.go b/internal/fetch/gen_zip_signatures.go
index 904abb6..6ee1553 100644
--- a/internal/fetch/gen_zip_signatures.go
+++ b/internal/fetch/gen_zip_signatures.go
@@ -18,6 +18,7 @@
"flag"
"fmt"
"go/format"
+ "io/fs"
"io/ioutil"
"log"
"sort"
@@ -141,7 +142,11 @@
if err != nil {
return "", err
}
- sig, err := fetch.ZipSignature(zr, mv.String())
+ contentsDir, err := fs.Sub(zr, mv.String())
+ if err != nil {
+ return "", err
+ }
+ sig, err := fetch.FSSignature(contentsDir)
if err != nil {
return "", err
}
diff --git a/internal/fetch/largefork.go b/internal/fetch/largefork.go
index ca766c0..d632a4b 100644
--- a/internal/fetch/largefork.go
+++ b/internal/fetch/largefork.go
@@ -9,29 +9,43 @@
//go:generate go run gen_zip_signatures.go -v
import (
- "archive/zip"
"crypto/sha256"
+ "errors"
"fmt"
"io"
+ "io/fs"
"sort"
- "strings"
)
-// ZipSignature calculates a signature that uniquely identifies a zip file.
-// It hashes every filename and its contents. Filenames must begin with prefix,
-// which is not included in the hash.
-func ZipSignature(r *zip.Reader, prefix string) (string, error) {
- files := make([]*zip.File, len(r.File))
- copy(files, r.File)
- sort.Slice(files, func(i, j int) bool { return files[i].Name < files[j].Name })
- h := sha256.New()
- for _, f := range files {
- if !strings.HasPrefix(f.Name, prefix) {
- return "", fmt.Errorf("zip file %q does not have prefix %q", f.Name, prefix)
+// FSSignature calculates a signature that uniquely identifies a filesystem.
+// It hashes every filename and its contents.
+func FSSignature(fsys fs.FS) (string, error) {
+ // To match the behavior of the old ZipSignatures function that this is
+ // based on, sort the paths from fs.WalkDir. Although fs.WalkDir traverses
+ // the files in lexical order within each directory, that is not the same
+ // order as sorting all the paths. For example, fs.WalkDir will return
+ // ["a/b", "a#b"], but because '#' comes before '/', sorting the paths
+ // swaps them.
+ var paths []string
+ err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error {
+ if err != nil {
+ return err
}
- io.WriteString(h, f.Name[len(prefix):])
+ if !d.IsDir() {
+ paths = append(paths, path)
+ }
+ return nil
+ })
+ if err != nil && !errors.Is(err, fs.ErrNotExist) { // we can get NotExist on an empty FS
+ return "", err
+ }
+ sort.Strings(paths)
+
+ h := sha256.New()
+ for _, path := range paths {
+ io.WriteString(h, "/"+path) // slash needed to match ZipSignatures
h.Write([]byte{0})
- rc, err := f.Open()
+ rc, err := fsys.Open(path)
if err != nil {
return "", err
}
@@ -42,11 +56,11 @@
}
// forkedFrom returns a module that the current one has been forked from. It
-// consults a built-in list of modules and their zip signatures, and returns a
-// module path from that list if its zip file and version are identical to the
+// consults a built-in list of modules and their signatures, and returns a
+// module path from that list if its contents and version are identical to the
// given ones. If there is no matching module, it returns the empty string.
-func forkedFrom(z *zip.Reader, module, version string) (string, error) {
- sig, err := ZipSignature(z, module+"@"+version)
+func forkedFrom(moduleContents fs.FS, module, version string) (string, error) {
+ sig, err := FSSignature(moduleContents)
if err != nil {
return "", err
}
diff --git a/internal/fetch/largefork_test.go b/internal/fetch/largefork_test.go
index 436ee66..ac5fbf5 100644
--- a/internal/fetch/largefork_test.go
+++ b/internal/fetch/largefork_test.go
@@ -11,36 +11,36 @@
"testing"
)
-func TestZipSignature(t *testing.T) {
+func TestFSSignature(t *testing.T) {
zip1 := newzip(t, [][2]string{
- {"p/file1", "abc"},
- {"p/file2", "def"},
+ {"file1", "abc"},
+ {"file2", "def"},
})
zip2 := newzip(t, [][2]string{ // same files, different order, different prefix
- {"q/file2", "def"},
- {"q/file1", "abc"},
+ {"file2", "def"},
+ {"file1", "abc"},
})
zip3 := newzip(t, [][2]string{ // different files
- {"r/file1", "abc"},
- {"r/file2d", "ef"},
+ {"file1", "abc"},
+ {"file2d", "ef"},
})
- sig := func(z []byte, prefix string) string {
+ sig := func(z []byte) string {
r, err := zip.NewReader(bytes.NewReader(z), int64(len(z)))
if err != nil {
t.Fatal(err)
}
- s, err := ZipSignature(r, prefix)
+ s, err := FSSignature(r)
if err != nil {
t.Fatal(err)
}
return s
}
- if sig(zip1, "p") != sig(zip2, "q") {
+ if sig(zip1) != sig(zip2) {
t.Error("same files, different order: got different signatures, want same")
}
- if sig(zip1, "p") == sig(zip3, "r") {
+ if sig(zip1) == sig(zip3) {
t.Error("different files: got same signatures, wanted different")
}
}