cmd/coordinator: add missing code from CL 179419
I flubbed my git in CL 179419. This was supposed to be included.
Also updates some logging after watching it spam during testing.
Updates golang/go#21305
Change-Id: Icbf32a064d8725c46ffddd422938bd8ff208c829
Reviewed-on: https://go-review.googlesource.com/c/build/+/179617
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/cmd/coordinator/gce.go b/cmd/coordinator/gce.go
index c16d779..b00f64a 100644
--- a/cmd/coordinator/gce.go
+++ b/cmd/coordinator/gce.go
@@ -191,7 +191,7 @@
}
go gcePool.pollQuotaLoop()
- go createBasepinDisks()
+ go createBasepinDisks(context.Background())
return nil
}
@@ -701,29 +701,33 @@
}
}
-// createBasepinDisks runs in the background on start-up and does a
-// best effort creation of zone-local copies of VM disk images, to
+// createBasepinDisks creates zone-local copies of VM disk images, to
// speed up VM creations in the future.
//
// Other than a list call, this a no-op unless new VM images were
// added or updated recently.
-func createBasepinDisks() {
+func createBasepinDisks(ctx context.Context) {
if !metadata.OnGCE() || (buildEnv != buildenv.Production && buildEnv != buildenv.Staging) {
return
}
- t0 := time.Now()
- ctx := context.Background()
- bgc, err := buildgo.NewClient(ctx, buildEnv)
- if err != nil {
- log.Printf("basepin: NewClient: %v", err)
+ for {
+ t0 := time.Now()
+ bgc, err := buildgo.NewClient(ctx, buildEnv)
+ if err != nil {
+ log.Printf("basepin: NewClient: %v", err)
+ return
+ }
+ log.Printf("basepin: creating basepin disks...")
+ err = bgc.MakeBasepinDisks(ctx)
+ d := time.Since(t0).Round(time.Second / 10)
+ if err != nil {
+ basePinErr.Store(err.Error())
+ log.Printf("basepin: error creating basepin disks, after %v: %v", d, err)
+ time.Sleep(5 * time.Minute)
+ continue
+ }
+ basePinErr.Store("")
+ log.Printf("basepin: created basepin disks after %v", d)
return
}
- log.Printf("basepin: creating basepin disks...")
- err = bgc.MakeBasepinDisks(ctx)
- d := time.Since(t0).Round(time.Second) / 10
- if err != nil {
- log.Printf("basepin: error creating basepin disks, after %v: %v", d, err)
- return
- }
- log.Printf("basepin: created basepin disks after %v", d)
}
diff --git a/cmd/coordinator/status.go b/cmd/coordinator/status.go
index ca5b9dc..e77f681 100644
--- a/cmd/coordinator/status.go
+++ b/cmd/coordinator/status.go
@@ -116,6 +116,11 @@
http.Handle("/status/"+hc.ID, healthCheckerHandler(hc))
}
+// basePinErr is the status of the start-up time basepin disk creation
+// in gce.go. It's of type string; nil means no result yet, empty
+// string means success, and non-empty means an error.
+var basePinErr atomic.Value
+
func init() {
addHealthChecker(newMacHealthChecker())
addHealthChecker(newScalewayHealthChecker())
@@ -123,6 +128,26 @@
addHealthChecker(newOSUPPC64Checker())
addHealthChecker(newOSUPPC64leChecker())
addHealthChecker(newJoyentChecker())
+ addHealthChecker(newBasepinChecker())
+}
+
+func newBasepinChecker() *healthChecker {
+ return &healthChecker{
+ ID: "basepin",
+ Title: "VM snapshots",
+ EnvURL: "https://golang.org/issue/21305",
+ Check: func(w *checkWriter) {
+ v := basePinErr.Load()
+ if v == nil {
+ w.warnf("still running")
+ return
+ }
+ if v == "" {
+ return
+ }
+ w.error(v.(string))
+ },
+ }
}
func newMacHealthChecker() *healthChecker {
diff --git a/internal/buildgo/basepin.go b/internal/buildgo/basepin.go
index 08722d1..9fb89e0 100644
--- a/internal/buildgo/basepin.go
+++ b/internal/buildgo/basepin.go
@@ -56,7 +56,9 @@
continue
}
if si, ok := need[d.SourceImage]; ok && d.SourceImageId == fmt.Sprint(si.Id) {
- log.Printf("Have %s: %s (%v)\n", d.Name, d.SourceImage, d.SourceImageId)
+ if c.Verbose {
+ log.Printf("basepin: have %s: %s (%v)\n", d.Name, d.SourceImage, d.SourceImageId)
+ }
delete(need, d.SourceImage)
}
}
@@ -67,11 +69,11 @@
}
sort.Strings(needed)
for _, n := range needed {
- log.Printf("Need %v", n)
+ log.Printf("basepin: need %v", n)
}
for i, imName := range needed {
im := need[imName]
- log.Printf("(%d/%d) Creating %s ...", i+1, len(needed), im.Name)
+ log.Printf("basepin: (%d/%d) creating %s ...", i+1, len(needed), im.Name)
op, err := svc.Disks.Insert(c.Env.ProjectName, c.Env.Zone, &compute.Disk{
Description: "zone-cached basepin image of " + im.Name,
Name: "basepin-" + im.Name + "-" + fmt.Sprint(im.Id),
@@ -84,9 +86,10 @@
return err
}
if err := c.AwaitOp(ctx, op); err != nil {
- log.Fatalf("failed to create: %v", err)
+ log.Fatalf("basepin: failed to create: %v", err)
}
}
+ log.Printf("basepin: created %d images", len(needed))
return nil
}
@@ -117,7 +120,6 @@
}
return last
}
- log.Printf("Success. %+v", op)
return nil
default:
return fmt.Errorf("Unknown status %q: %+v", op.Status, op)
diff --git a/internal/buildgo/client.go b/internal/buildgo/client.go
index b7a60db..23f0e59 100644
--- a/internal/buildgo/client.go
+++ b/internal/buildgo/client.go
@@ -21,6 +21,8 @@
Creds *google.Credentials
Client *http.Client // OAuth2 client
+ Verbose bool // enable extra debug logging
+
mu sync.Mutex
computeService *compute.Service // lazily initialized
}