cmd/gitmirror: don't keep entire maintner corpus in memory

And don't download it all on start-up.

Fixes golang/go#35977

Change-Id: I00f079d585aad8bd536a37e027fba132cc137bbd
Reviewed-on: https://go-review.googlesource.com/c/build/+/210277
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/cmd/gitmirror/gitmirror.go b/cmd/gitmirror/gitmirror.go
index 7a5ea4c..bbfef15 100644
--- a/cmd/gitmirror/gitmirror.go
+++ b/cmd/gitmirror/gitmirror.go
@@ -11,10 +11,8 @@
 import (
 	"bytes"
 	"context"
-	"crypto/sha1"
 	"flag"
 	"fmt"
-	"io"
 	"io/ioutil"
 	"log"
 	"net"
@@ -708,42 +706,30 @@
 }
 
 func subscribeToMaintnerAndTickle() error {
-	log.Printf("Loading maintner data.")
-	t0 := time.Now()
 	ctx := context.Background()
-	corpus, err := godata.Get(ctx)
-	if err != nil {
-		return err
-	}
-	log.Printf("Loaded maintner data in %v", time.Since(t0))
-	last := map[string]string{} // go.googlesource.com repo base => digest of all refs
+	retryTicker := time.NewTicker(10 * time.Second)
+	defer retryTicker.Stop() // we never return, though
 	for {
-		corpus.Gerrit().ForeachProjectUnsorted(func(gp *maintner.GerritProject) error {
-			proj := path.Base(gp.ServerSlashProject())
-			s1 := sha1.New()
-			gp.ForeachNonChangeRef(func(ref string, hash maintner.GitHash) error {
-				io.WriteString(s1, string(hash))
-				return nil
-			})
-			sum := fmt.Sprintf("%x", s1.Sum(nil))
-			lastSum := last[proj]
-			if lastSum == sum {
-				return nil
+		err := maintner.TailNetworkMutationSource(ctx, godata.Server, func(e maintner.MutationStreamEvent) error {
+			if e.Mutation != nil && e.Mutation.Gerrit != nil {
+				gm := e.Mutation.Gerrit
+				if strings.HasPrefix(gm.Project, "go.googlesource.com/") {
+					proj := strings.TrimPrefix(gm.Project, "go.googlesource.com/")
+					log.Printf("maintner refs for %s changed", gm.Project)
+					select {
+					case repoTickler(proj) <- true:
+					default:
+					}
+				}
 			}
-			last[proj] = sum
-			if lastSum == "" {
-				return nil
-			}
-			log.Printf("maintner refs for %s changed", gp.ServerSlashProject())
-			select {
-			case repoTickler(proj) <- true:
-			default:
-			}
-			return nil
+			return e.Err
 		})
-		if err := corpus.Update(ctx); err != nil {
-			return err
-		}
+		log.Printf("maintner tail error: %v; sleeping+restarting", err)
+
+		// prevent retry looping faster than once every 10
+		// seconds; but usually retry immediately in the case
+		// where we've been runing for a while already.
+		<-retryTicker.C
 	}
 }
 
diff --git a/maintner/godata/godata.go b/maintner/godata/godata.go
index 3293bab..b2b30d4 100644
--- a/maintner/godata/godata.go
+++ b/maintner/godata/godata.go
@@ -19,6 +19,9 @@
 	"golang.org/x/build/maintner"
 )
 
+// Server is the Go project's production maintner log.
+const Server = "https://maintner.golang.org/logs"
+
 // Get returns the Go project's corpus, containing all Git commits,
 // Github activity, and Gerrit activity and metadata since the
 // beginning of the project.
@@ -46,7 +49,7 @@
 	if err := os.MkdirAll(targetDir, 0700); err != nil {
 		return nil, err
 	}
-	mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", targetDir)
+	mutSrc := maintner.NewNetworkMutationSource(Server, targetDir)
 	corpus := new(maintner.Corpus)
 	if err := corpus.Initialize(ctx, mutSrc); err != nil {
 		return nil, err