cmd/makemac, cmd/coordinator: export warnings/error from makemac to coordinator
This adds information on warnings & errors to makemac's JSON status
handler that is then parsed by the coordinator's health checking code,
which already polls this JSON endpoint.
Updates golang/go#32449
Updates golang/go#15760
Change-Id: I69bea7b07c184d1f62a358bc317376aa97018230
Reviewed-on: https://go-review.googlesource.com/c/build/+/181217
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/cmd/coordinator/status.go b/cmd/coordinator/status.go
index bafef2a..9f8df65 100644
--- a/cmd/coordinator/status.go
+++ b/cmd/coordinator/status.go
@@ -10,6 +10,7 @@
"bufio"
"bytes"
"context"
+ "encoding/json"
"fmt"
"html"
"html/template"
@@ -236,31 +237,21 @@
// And check that the makemac daemon is listening.
var makeMac struct {
sync.Mutex
- lastErr error
- lastCheck time.Time // currently unused
+ lastCheck time.Time // currently unused
+ lastErrors []string
+ lastWarns []string
}
- setMakeMacErr := func(err error) {
+ setMakeMacStatus := func(errs, warns []string) {
makeMac.Lock()
defer makeMac.Unlock()
- makeMac.lastErr = err
makeMac.lastCheck = time.Now()
+ makeMac.lastErrors = errs
+ makeMac.lastWarns = warns
}
go func() {
- c := &http.Client{Timeout: 15 * time.Second}
for {
- res, err := c.Get("http://macstadiumd.golang.org:8713")
- if err != nil {
- setMakeMacErr(err)
- } else {
- res.Body.Close()
- if res.StatusCode != 200 {
- setMakeMacErr(fmt.Errorf("HTTP status %v", res.Status))
- } else if res.Header.Get("Content-Type") != "application/json" {
- setMakeMacErr(fmt.Errorf("unexpected content-type %q", res.Header.Get("Content-Type")))
- } else {
- setMakeMacErr(nil)
- }
- }
+ errs, warns := fetchMakeMacStatus()
+ setMakeMacStatus(errs, warns)
time.Sleep(15 * time.Second)
}
}()
@@ -274,13 +265,39 @@
// Check makemac daemon.
makeMac.Lock()
defer makeMac.Unlock()
- if makeMac.lastErr != nil {
- w.errorf("makemac daemon: %v", makeMac.lastErr)
+ for _, v := range makeMac.lastWarns {
+ w.warnf("makemac daemon: %v", v)
+ }
+ for _, v := range makeMac.lastErrors {
+ w.errorf("makemac daemon: %v", v)
}
},
}
}
+func fetchMakeMacStatus() (errs, warns []string) {
+ c := &http.Client{Timeout: 15 * time.Second}
+ res, err := c.Get("http://macstadiumd.golang.org:8713")
+ if err != nil {
+ return []string{fmt.Sprintf("failed to fetch status: %v", err)}, nil
+ }
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ return []string{fmt.Sprintf("HTTP status %v", res.Status)}, nil
+ }
+ if res.Header.Get("Content-Type") != "application/json" {
+ return []string{fmt.Sprintf("unexpected content-type %q; want JSON", res.Header.Get("Content-Type"))}, nil
+ }
+ var resj struct {
+ Errors []string
+ Warnings []string
+ }
+ if err := json.NewDecoder(res.Body).Decode(&resj); err != nil {
+ return []string{fmt.Sprintf("reading status response body: %v", err)}, nil
+ }
+ return resj.Errors, resj.Warnings
+}
+
func newJoyentSolarisChecker() *healthChecker {
return &healthChecker{
ID: "joyent-solaris",
diff --git a/cmd/makemac/README.md b/cmd/makemac/README.md
index a38671d..e436678 100644
--- a/cmd/makemac/README.md
+++ b/cmd/makemac/README.md
@@ -2,7 +2,9 @@
# golang.org/x/build/cmd/makemac
-The makemac command starts OS X VMs for the builders.
+The makemac command manages creating & destroying macOS VMs for the
+builders. See the README in x/build/env/darwin/macstadium for some
+more background.
## Deploying `makemac`
@@ -30,3 +32,12 @@
$ sudo systemctl restart makemac
$ sudo journalctl -f -u makemac # watch it
```
+
+## Checking that it's running:
+
+```
+$ curl -v http://macstadiumd.golang.org:8713
+```
+
+(Note that URL won't work in a browser due to HSTS requirements on
+ *.golang.org)
diff --git a/cmd/makemac/makemac.go b/cmd/makemac/makemac.go
index 4d52b13..0fe9cd3 100644
--- a/cmd/makemac/makemac.go
+++ b/cmd/makemac/makemac.go
@@ -16,6 +16,7 @@
package main
import (
+ "bufio"
"context"
"encoding/json"
"errors"
@@ -313,6 +314,9 @@
fmt.Fprintf(os.Stderr, "$ govc %v\n", strings.Join(args, " "))
out, err := exec.CommandContext(ctx, "govc", args...).CombinedOutput()
if err != nil {
+ if isFileSystemReadOnly() {
+ out = append(out, "; filesystem is read-only"...)
+ }
return fmt.Errorf("govc %s ...: %v, %s", args[0], err, out)
}
return nil
@@ -372,7 +376,7 @@
var hosts elementList
if err := govcJSONDecode(ctx, &hosts, "ls", "-json", "/MacStadium-ATL/host/MacMini_Cluster"); err != nil {
- return nil, fmt.Errorf("Reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
+ return nil, fmt.Errorf("getState: reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
}
for _, h := range hosts.Elements {
if h.Object.Self.Type == "HostSystem" {
@@ -384,7 +388,7 @@
var vms elementList
if err := govcJSONDecode(ctx, &vms, "ls", "-json", "/MacStadium-ATL/vm"); err != nil {
- return nil, fmt.Errorf("Reading /MacStadium-ATL/vm: %v", err)
+ return nil, fmt.Errorf("getState: reading /MacStadium-ATL/vm: %v", err)
}
for _, h := range vms.Elements {
if h.Object.Self.Type != "VirtualMachine" {
@@ -528,6 +532,8 @@
lastCheck time.Time
lastLog string
lastState *State
+ warnings []string
+ errors []string
}
func init() {
@@ -581,14 +587,20 @@
st, err := getState(ctx)
if err != nil {
- log.Printf("getting VMWare state: %v", err)
+ status.Lock()
+ status.errors = []string{err.Error()}
+ status.Unlock()
+ log.Print(err)
return
}
+ var warnings, errors []string
defer func() {
// Set status.lastState once we're now longer using it.
if st != nil {
status.Lock()
status.lastState = st
+ status.warnings = warnings
+ status.errors = errors
status.Unlock()
}
}()
@@ -597,12 +609,14 @@
req = req.WithContext(ctx)
res, err := http.DefaultClient.Do(req)
if err != nil {
+ errors = append(errors, fmt.Sprintf("getting /status/reverse.json from coordinator: %v", err))
log.Printf("getting reverse status: %v", err)
return
}
defer res.Body.Close()
var rstat types.ReverseBuilderStatus
if err := json.NewDecoder(res.Body).Decode(&rstat); err != nil {
+ errors = append(errors, fmt.Sprintf("decoding /status/reverse.json from coordinator: %v", err))
log.Printf("decoding reverse.json: %v", err)
return
}
@@ -618,6 +632,7 @@
}
// Destroy running VMs that appear to be dead and not connected to the coordinator.
+ // TODO: do these all concurrently.
dirty := false
for name, vi := range st.VMInfo {
if vi.BootTime.After(time.Now().Add(-3 * time.Minute)) {
@@ -632,18 +647,22 @@
// Look it up by its slot name instead.
rh = revHost[vi.SlotName]
}
- if rh == nil { // || (!rh.Busy && rh.ConnectedSec > 50 && rh.HostType == "host-darwin-10_12") {
+ if rh == nil {
log.Printf("Destroying VM %q unknown to coordinator...", name)
err := govc(ctx, "vm.destroy", name)
log.Printf("vm.destroy(%q) = %v", name, err)
dirty = true
+ if err != nil {
+ warnings = append(warnings, fmt.Sprintf("vm.destroy(%q) = %v", name, err))
+ }
}
}
for {
if dirty {
st, err = getState(ctx)
if err != nil {
- log.Printf("getState: %v", err)
+ errors = append(errors, err.Error())
+ log.Print(err)
return
}
}
@@ -661,7 +680,9 @@
dedupLogf("Have capacity for %d more Mac VMs; creating requested 10.%d ...", canCreate, ver)
slotName, err := st.CreateMac(ctx, ver)
if err != nil {
- log.Printf("Error creating 10.%d: %v", ver, err)
+ errStr := fmt.Sprintf("Error creating 10.%d: %v", ver, err)
+ errors = append(errors, errStr)
+ log.Print(errStr)
return
}
log.Printf("Created 10.%d VM on %q", ver, slotName)
@@ -715,10 +736,14 @@
LastCheck string
LastLog string
LastState *State
+ Warnings []string
+ Errors []string
}{
LastCheck: status.lastCheck.UTC().Format(time.RFC3339),
LastLog: status.lastLog,
LastState: status.lastState,
+ Warnings: status.warnings,
+ Errors: status.errors,
}
j, _ := json.MarshalIndent(res, "", "\t")
w.Write(j)
@@ -823,3 +848,22 @@
}
h.h.ServeHTTP(w, r)
}
+
+func isFileSystemReadOnly() bool {
+ f, err := os.Open("/proc/mounts")
+ if err != nil {
+ return false
+ }
+ defer f.Close()
+ // Look for line:
+ // /dev/sda1 / ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
+ bs := bufio.NewScanner(f)
+ for bs.Scan() {
+ f := strings.Fields(bs.Text())
+ mountPoint, state := f[1], f[3]
+ if mountPoint == "/" {
+ return strings.HasPrefix(state, "ro,")
+ }
+ }
+ return false
+}