maintner: make github poll more often after webhook wakeup fails to see new data

Sometimes Github sends a webhook update (which we only use as a
wake-up signal, ignoring the payload), but when we go to run our
sync-from-last-position code, we get cache hits from GitHub indicating
that nothing's new. It seems that GitHub sends webhooks before
invalidating its own caches.

This causes us to sometimes lose updates for 15 minutes until our
regular backup poller catches them.

So, keep track of whether we've been woken up by the webhook and are
currently awaiting new data. If the subsequent poll finds nothing,
assume it's bogus and reset our poll interval to be 1 second, growing
exponentially until either we're back at 15 minutes or we have new
data.

Change-Id: Iff38517a8a8773eb13323870a80e0855084a58ef
Reviewed-on: https://go-review.googlesource.com/41392
Reviewed-by: Kevin Burke <kev@inburke.com>
diff --git a/maintner/github.go b/maintner/github.go
index 905ad50..5dd4f55 100644
--- a/maintner/github.go
+++ b/maintner/github.go
@@ -1207,19 +1207,46 @@
 		ghc:   github.NewClient(hc),
 	}
 	activityCh := gr.github.c.activityChan("github:" + gr.id.String())
+	var unfetchedActivity bool // got webhook update, but haven't seen new data yet
+	var sleepDelay time.Duration
 	for {
+		prevLastUpdate := p.lastUpdate
 		err := p.sync(ctx)
 		if err == context.Canceled || !loop {
 			return err
 		}
+		sawChanges := !p.lastUpdate.Equal(prevLastUpdate)
+		if sawChanges {
+			unfetchedActivity = false
+		}
+		// If we got woken up by a webhook, sometimes
+		// immediately polling Github for the data results in
+		// a cache hit saying nothing's changed. Don't believe
+		// it. Polling quickly with exponential backoff until
+		// we see what we're expecting.
+		if unfetchedActivity {
+			if sleepDelay == 0 {
+				sleepDelay = 1 * time.Second
+			} else {
+				sleepDelay *= 2
+				if sleepDelay > 15*time.Minute {
+					sleepDelay = 15 * time.Minute
+				}
+			}
+			p.logf("unfetched activity; re-polling in %v", sleepDelay)
+		} else {
+			sleepDelay = 15 * time.Minute
+		}
 		p.logf("sync = %v; sleeping", err)
-		timer := time.NewTimer(15 * time.Minute)
+		timer := time.NewTimer(sleepDelay)
 		select {
 		case <-ctx.Done():
 			timer.Stop()
 			return ctx.Err()
 		case <-activityCh:
 			timer.Stop()
+			unfetchedActivity = true
+			sleepDelay = 0
 		case <-timer.C:
 		}
 	}
@@ -1228,10 +1255,11 @@
 // A githubRepoPoller updates the Corpus (gr.c) to have the latest
 // version of the Github repo rp, using the Github client ghc.
 type githubRepoPoller struct {
-	c     *Corpus // shortcut for gr.github.c
-	gr    *GitHubRepo
-	ghc   *github.Client
-	token string
+	c          *Corpus // shortcut for gr.github.c
+	gr         *GitHubRepo
+	ghc        *github.Client
+	token      string
+	lastUpdate time.Time // modified by sync
 }
 
 func (p *githubRepoPoller) Owner() string { return p.gr.id.Owner }
@@ -1285,6 +1313,7 @@
 		return nil
 	}
 	p.c.addMutation(&maintpb.Mutation{Github: mut})
+	p.lastUpdate = time.Now()
 	return nil
 }
 
@@ -1318,6 +1347,7 @@
 		return nil
 	}
 	p.c.addMutation(&maintpb.Mutation{Github: mut})
+	p.lastUpdate = time.Now()
 	return nil
 }
 
@@ -1466,6 +1496,7 @@
 			changes++
 			p.logf("changed issue %d: %s", is.GetNumber(), is.GetTitle())
 			c.addMutation(mp)
+			p.lastUpdate = time.Now()
 		}
 
 		if changes == 0 {
@@ -1516,6 +1547,7 @@
 			}
 			p.logf("modified issue %d: %s", issue.GetNumber(), issue.GetTitle())
 			c.addMutation(mp)
+			p.lastUpdate = time.Now()
 		}
 	}