internal/{worker,stdlib}: /fetch-std-master: avoid cloning go repo

On each call of the /fetch-std-master endpoint, we cloned the Go repo
for each supported branch to determine its current commit. Although
not a big deal, that is a bit wasteful and it also would occasionally
fail obscurely.

Instead we use git's remote list feature to get all the repo's refs in
one call. We then compare the hashes with the stored pseudo-versions
to see if the branch has moved.

Change-Id: I1d70e63668fcf6d35b90b9c231255faec614a04e
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/349753
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/stdlib/stdlib.go b/internal/stdlib/stdlib.go
index 3b05de4..1f82179 100644
--- a/internal/stdlib/stdlib.go
+++ b/internal/stdlib/stdlib.go
@@ -325,12 +325,9 @@
 	if UseTestData {
 		refNames = testRefs
 	} else {
-		re := git.NewRemote(memory.NewStorage(), &config.RemoteConfig{
-			URLs: []string{GoRepoURL},
-		})
-		refs, err := re.List(&git.ListOptions{})
+		refs, err := remoteRefs()
 		if err != nil {
-			return nil, fmt.Errorf("re.List: %v", err)
+			return nil, err
 		}
 		for _, r := range refs {
 			refNames = append(refNames, r.Name())
@@ -347,6 +344,34 @@
 	return versions, nil
 }
 
+// ResolveSupportedBranches returns the current hashes for each ref in
+// SupportedBranches.
+func ResolveSupportedBranches() (_ map[string]string, err error) {
+	defer derrors.Wrap(&err, "ResolveSupportedBranches")
+
+	refs, err := remoteRefs()
+	if err != nil {
+		return nil, err
+	}
+	m := map[string]string{}
+	for _, r := range refs {
+		name := r.Name().Short()
+		if SupportedBranches[name] {
+			m[name] = r.Hash().String()
+		}
+	}
+	return m, nil
+}
+
+func remoteRefs() (_ []*plumbing.Reference, err error) {
+	defer derrors.Wrap(&err, "remoteRefs")
+
+	re := git.NewRemote(memory.NewStorage(), &config.RemoteConfig{
+		URLs: []string{GoRepoURL},
+	})
+	return re.List(&git.ListOptions{})
+}
+
 // Directory returns the directory of the standard library relative to the repo root.
 func Directory(v string) string {
 	if semver.Compare(v, "v1.4.0-beta.1") >= 0 ||
@@ -456,8 +481,19 @@
 	return cdir, resolvedVersion, commitTime, nil
 }
 
+const pseudoHashLen = 12
+
 func newPseudoVersion(version string, commitTime time.Time, hash plumbing.Hash) string {
-	return fmt.Sprintf("%s-%s-%s", version, commitTime.Format("20060102150405"), hash.String()[:12])
+	return fmt.Sprintf("%s-%s-%s", version, commitTime.Format("20060102150405"), hash.String()[:pseudoHashLen])
+}
+
+// VersionMatchesHash reports whether v is a pseudo-version whose hash
+// part matches the prefix of the given hash.
+func VersionMatchesHash(v, hash string) bool {
+	if !version.IsPseudo(v) {
+		return false
+	}
+	return v[len(v)-pseudoHashLen:] == hash[:pseudoHashLen]
 }
 
 // semanticVersion returns the semantic version corresponding to the
diff --git a/internal/stdlib/stdlib_test.go b/internal/stdlib/stdlib_test.go
index 811c8c1..969be4f 100644
--- a/internal/stdlib/stdlib_test.go
+++ b/internal/stdlib/stdlib_test.go
@@ -335,3 +335,31 @@
 		}
 	}
 }
+
+func TestVersionMatchesHash(t *testing.T) {
+	v := "v0.0.0-20210910212848-c8dfa306babb"
+	h := "c8dfa306babb91e88f8ba25329b3ef8aa11944e1"
+	if !VersionMatchesHash(v, h) {
+		t.Error("got false, want true")
+	}
+	h = "c8dfa306babXb91e88f8ba25329b3ef8aa11944e1"
+	if VersionMatchesHash(v, h) {
+		t.Error("got true, want false")
+	}
+}
+
+func TestResolveSupportedBranches(t *testing.T) {
+	got, err := ResolveSupportedBranches()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// We can't check the hashes because they change, but we can check the keys.
+	for key := range got {
+		if !SupportedBranches[key] {
+			t.Errorf("got key %q not in SupportedBranches", key)
+		}
+	}
+	if g, w := len(got), len(SupportedBranches); g != w {
+		t.Errorf("got %d hashes, want %d", g, w)
+	}
+}
diff --git a/internal/worker/server.go b/internal/worker/server.go
index 6bcee22..8051007 100644
--- a/internal/worker/server.go
+++ b/internal/worker/server.go
@@ -504,22 +504,25 @@
 
 func (s *Server) handleFetchStdSupportedBranches(w http.ResponseWriter, r *http.Request) (err error) {
 	defer derrors.Wrap(&err, "handleFetchStdSupportedBranches")
+	resolvedHashes, err := stdlib.ResolveSupportedBranches()
+	if err != nil {
+		return err
+	}
 	for requestedVersion := range stdlib.SupportedBranches {
-		_, resolvedVersion, _, err := stdlib.ContentDir(requestedVersion)
-		if err != nil {
-			return err
-		}
-		var resolvedVersionDB string
+		var schedule bool
+		resolvedHash := resolvedHashes[requestedVersion]
 		vm, err := s.db.GetVersionMap(r.Context(), stdlib.ModulePath, requestedVersion)
 		switch {
 		case err == nil:
-			resolvedVersionDB = vm.ResolvedVersion
+			schedule = !stdlib.VersionMatchesHash(vm.ResolvedVersion, resolvedHash)
+			log.Debugf(r.Context(), "stdlib branch %s: have %s, remote is %q; scheduling = %t",
+				requestedVersion, vm.ResolvedVersion, resolvedHash, schedule)
 		case errors.Is(err, derrors.NotFound):
-			resolvedVersionDB = ""
+			schedule = true
 		default:
 			return err
 		}
-		if resolvedVersionDB != resolvedVersion {
+		if schedule {
 			if _, err := s.queue.ScheduleFetch(r.Context(), stdlib.ModulePath, requestedVersion, nil); err != nil {
 				return fmt.Errorf("error scheduling fetch for %s: %w", requestedVersion, err)
 			}