cmd/coordinator: add missing code from CL 179419

I flubbed my git in CL 179419. This was supposed to be included.

Also updates some logging after watching it spam during testing.

Updates golang/go#21305

Change-Id: Icbf32a064d8725c46ffddd422938bd8ff208c829
Reviewed-on: https://go-review.googlesource.com/c/build/+/179617
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/cmd/coordinator/gce.go b/cmd/coordinator/gce.go
index c16d779..b00f64a 100644
--- a/cmd/coordinator/gce.go
+++ b/cmd/coordinator/gce.go
@@ -191,7 +191,7 @@
 	}
 
 	go gcePool.pollQuotaLoop()
-	go createBasepinDisks()
+	go createBasepinDisks(context.Background())
 	return nil
 }
 
@@ -701,29 +701,33 @@
 	}
 }
 
-// createBasepinDisks runs in the background on start-up and does a
-// best effort creation of zone-local copies of VM disk images, to
+// createBasepinDisks creates zone-local copies of VM disk images, to
 // speed up VM creations in the future.
 //
 // Other than a list call, this a no-op unless new VM images were
 // added or updated recently.
-func createBasepinDisks() {
+func createBasepinDisks(ctx context.Context) {
 	if !metadata.OnGCE() || (buildEnv != buildenv.Production && buildEnv != buildenv.Staging) {
 		return
 	}
-	t0 := time.Now()
-	ctx := context.Background()
-	bgc, err := buildgo.NewClient(ctx, buildEnv)
-	if err != nil {
-		log.Printf("basepin: NewClient: %v", err)
+	for {
+		t0 := time.Now()
+		bgc, err := buildgo.NewClient(ctx, buildEnv)
+		if err != nil {
+			log.Printf("basepin: NewClient: %v", err)
+			return
+		}
+		log.Printf("basepin: creating basepin disks...")
+		err = bgc.MakeBasepinDisks(ctx)
+		d := time.Since(t0).Round(time.Second / 10)
+		if err != nil {
+			basePinErr.Store(err.Error())
+			log.Printf("basepin: error creating basepin disks, after %v: %v", d, err)
+			time.Sleep(5 * time.Minute)
+			continue
+		}
+		basePinErr.Store("")
+		log.Printf("basepin: created basepin disks after %v", d)
 		return
 	}
-	log.Printf("basepin: creating basepin disks...")
-	err = bgc.MakeBasepinDisks(ctx)
-	d := time.Since(t0).Round(time.Second) / 10
-	if err != nil {
-		log.Printf("basepin: error creating basepin disks, after %v: %v", d, err)
-		return
-	}
-	log.Printf("basepin: created basepin disks after %v", d)
 }
diff --git a/cmd/coordinator/status.go b/cmd/coordinator/status.go
index ca5b9dc..e77f681 100644
--- a/cmd/coordinator/status.go
+++ b/cmd/coordinator/status.go
@@ -116,6 +116,11 @@
 	http.Handle("/status/"+hc.ID, healthCheckerHandler(hc))
 }
 
+// basePinErr is the status of the start-up time basepin disk creation
+// in gce.go. It's of type string; nil means no result yet, empty
+// string means success, and non-empty means an error.
+var basePinErr atomic.Value
+
 func init() {
 	addHealthChecker(newMacHealthChecker())
 	addHealthChecker(newScalewayHealthChecker())
@@ -123,6 +128,26 @@
 	addHealthChecker(newOSUPPC64Checker())
 	addHealthChecker(newOSUPPC64leChecker())
 	addHealthChecker(newJoyentChecker())
+	addHealthChecker(newBasepinChecker())
+}
+
+func newBasepinChecker() *healthChecker {
+	return &healthChecker{
+		ID:     "basepin",
+		Title:  "VM snapshots",
+		EnvURL: "https://golang.org/issue/21305",
+		Check: func(w *checkWriter) {
+			v := basePinErr.Load()
+			if v == nil {
+				w.warnf("still running")
+				return
+			}
+			if v == "" {
+				return
+			}
+			w.error(v.(string))
+		},
+	}
 }
 
 func newMacHealthChecker() *healthChecker {
diff --git a/internal/buildgo/basepin.go b/internal/buildgo/basepin.go
index 08722d1..9fb89e0 100644
--- a/internal/buildgo/basepin.go
+++ b/internal/buildgo/basepin.go
@@ -56,7 +56,9 @@
 			continue
 		}
 		if si, ok := need[d.SourceImage]; ok && d.SourceImageId == fmt.Sprint(si.Id) {
-			log.Printf("Have %s: %s (%v)\n", d.Name, d.SourceImage, d.SourceImageId)
+			if c.Verbose {
+				log.Printf("basepin: have %s: %s (%v)\n", d.Name, d.SourceImage, d.SourceImageId)
+			}
 			delete(need, d.SourceImage)
 		}
 	}
@@ -67,11 +69,11 @@
 	}
 	sort.Strings(needed)
 	for _, n := range needed {
-		log.Printf("Need %v", n)
+		log.Printf("basepin: need %v", n)
 	}
 	for i, imName := range needed {
 		im := need[imName]
-		log.Printf("(%d/%d) Creating %s ...", i+1, len(needed), im.Name)
+		log.Printf("basepin: (%d/%d) creating %s ...", i+1, len(needed), im.Name)
 		op, err := svc.Disks.Insert(c.Env.ProjectName, c.Env.Zone, &compute.Disk{
 			Description:   "zone-cached basepin image of " + im.Name,
 			Name:          "basepin-" + im.Name + "-" + fmt.Sprint(im.Id),
@@ -84,9 +86,10 @@
 			return err
 		}
 		if err := c.AwaitOp(ctx, op); err != nil {
-			log.Fatalf("failed to create: %v", err)
+			log.Fatalf("basepin: failed to create: %v", err)
 		}
 	}
+	log.Printf("basepin: created %d images", len(needed))
 	return nil
 }
 
@@ -117,7 +120,6 @@
 				}
 				return last
 			}
-			log.Printf("Success. %+v", op)
 			return nil
 		default:
 			return fmt.Errorf("Unknown status %q: %+v", op.Status, op)
diff --git a/internal/buildgo/client.go b/internal/buildgo/client.go
index b7a60db..23f0e59 100644
--- a/internal/buildgo/client.go
+++ b/internal/buildgo/client.go
@@ -21,6 +21,8 @@
 	Creds  *google.Credentials
 	Client *http.Client // OAuth2 client
 
+	Verbose bool // enable extra debug logging
+
 	mu             sync.Mutex
 	computeService *compute.Service // lazily initialized
 }