cmd/gitmirror: don't keep entire maintner corpus in memory
And don't download it all on start-up.
Fixes golang/go#35977
Change-Id: I00f079d585aad8bd536a37e027fba132cc137bbd
Reviewed-on: https://go-review.googlesource.com/c/build/+/210277
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/cmd/gitmirror/gitmirror.go b/cmd/gitmirror/gitmirror.go
index 7a5ea4c..bbfef15 100644
--- a/cmd/gitmirror/gitmirror.go
+++ b/cmd/gitmirror/gitmirror.go
@@ -11,10 +11,8 @@
import (
"bytes"
"context"
- "crypto/sha1"
"flag"
"fmt"
- "io"
"io/ioutil"
"log"
"net"
@@ -708,42 +706,30 @@
}
func subscribeToMaintnerAndTickle() error {
- log.Printf("Loading maintner data.")
- t0 := time.Now()
ctx := context.Background()
- corpus, err := godata.Get(ctx)
- if err != nil {
- return err
- }
- log.Printf("Loaded maintner data in %v", time.Since(t0))
- last := map[string]string{} // go.googlesource.com repo base => digest of all refs
+ retryTicker := time.NewTicker(10 * time.Second)
+ defer retryTicker.Stop() // we never return, though
for {
- corpus.Gerrit().ForeachProjectUnsorted(func(gp *maintner.GerritProject) error {
- proj := path.Base(gp.ServerSlashProject())
- s1 := sha1.New()
- gp.ForeachNonChangeRef(func(ref string, hash maintner.GitHash) error {
- io.WriteString(s1, string(hash))
- return nil
- })
- sum := fmt.Sprintf("%x", s1.Sum(nil))
- lastSum := last[proj]
- if lastSum == sum {
- return nil
+ err := maintner.TailNetworkMutationSource(ctx, godata.Server, func(e maintner.MutationStreamEvent) error {
+ if e.Mutation != nil && e.Mutation.Gerrit != nil {
+ gm := e.Mutation.Gerrit
+ if strings.HasPrefix(gm.Project, "go.googlesource.com/") {
+ proj := strings.TrimPrefix(gm.Project, "go.googlesource.com/")
+ log.Printf("maintner refs for %s changed", gm.Project)
+ select {
+ case repoTickler(proj) <- true:
+ default:
+ }
+ }
}
- last[proj] = sum
- if lastSum == "" {
- return nil
- }
- log.Printf("maintner refs for %s changed", gp.ServerSlashProject())
- select {
- case repoTickler(proj) <- true:
- default:
- }
- return nil
+ return e.Err
})
- if err := corpus.Update(ctx); err != nil {
- return err
- }
+ log.Printf("maintner tail error: %v; sleeping+restarting", err)
+
+ // prevent retry looping faster than once every 10
+ // seconds; but usually retry immediately in the case
+ // where we've been runing for a while already.
+ <-retryTicker.C
}
}
diff --git a/maintner/godata/godata.go b/maintner/godata/godata.go
index 3293bab..b2b30d4 100644
--- a/maintner/godata/godata.go
+++ b/maintner/godata/godata.go
@@ -19,6 +19,9 @@
"golang.org/x/build/maintner"
)
+// Server is the Go project's production maintner log.
+const Server = "https://maintner.golang.org/logs"
+
// Get returns the Go project's corpus, containing all Git commits,
// Github activity, and Gerrit activity and metadata since the
// beginning of the project.
@@ -46,7 +49,7 @@
if err := os.MkdirAll(targetDir, 0700); err != nil {
return nil, err
}
- mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", targetDir)
+ mutSrc := maintner.NewNetworkMutationSource(Server, targetDir)
corpus := new(maintner.Corpus)
if err := corpus.Initialize(ctx, mutSrc); err != nil {
return nil, err