cmd/makemac, cmd/coordinator: export warnings/error from makemac to coordinator

This adds information on warnings & errors to makemac's JSON status
handler that is then parsed by the coordinator's health checking code,
which already polls this JSON endpoint.

Updates golang/go#32449
Updates golang/go#15760

Change-Id: I69bea7b07c184d1f62a358bc317376aa97018230
Reviewed-on: https://go-review.googlesource.com/c/build/+/181217
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/cmd/coordinator/status.go b/cmd/coordinator/status.go
index bafef2a..9f8df65 100644
--- a/cmd/coordinator/status.go
+++ b/cmd/coordinator/status.go
@@ -10,6 +10,7 @@
 	"bufio"
 	"bytes"
 	"context"
+	"encoding/json"
 	"fmt"
 	"html"
 	"html/template"
@@ -236,31 +237,21 @@
 	// And check that the makemac daemon is listening.
 	var makeMac struct {
 		sync.Mutex
-		lastErr   error
-		lastCheck time.Time // currently unused
+		lastCheck  time.Time // currently unused
+		lastErrors []string
+		lastWarns  []string
 	}
-	setMakeMacErr := func(err error) {
+	setMakeMacStatus := func(errs, warns []string) {
 		makeMac.Lock()
 		defer makeMac.Unlock()
-		makeMac.lastErr = err
 		makeMac.lastCheck = time.Now()
+		makeMac.lastErrors = errs
+		makeMac.lastWarns = warns
 	}
 	go func() {
-		c := &http.Client{Timeout: 15 * time.Second}
 		for {
-			res, err := c.Get("http://macstadiumd.golang.org:8713")
-			if err != nil {
-				setMakeMacErr(err)
-			} else {
-				res.Body.Close()
-				if res.StatusCode != 200 {
-					setMakeMacErr(fmt.Errorf("HTTP status %v", res.Status))
-				} else if res.Header.Get("Content-Type") != "application/json" {
-					setMakeMacErr(fmt.Errorf("unexpected content-type %q", res.Header.Get("Content-Type")))
-				} else {
-					setMakeMacErr(nil)
-				}
-			}
+			errs, warns := fetchMakeMacStatus()
+			setMakeMacStatus(errs, warns)
 			time.Sleep(15 * time.Second)
 		}
 	}()
@@ -274,13 +265,39 @@
 			// Check makemac daemon.
 			makeMac.Lock()
 			defer makeMac.Unlock()
-			if makeMac.lastErr != nil {
-				w.errorf("makemac daemon: %v", makeMac.lastErr)
+			for _, v := range makeMac.lastWarns {
+				w.warnf("makemac daemon: %v", v)
+			}
+			for _, v := range makeMac.lastErrors {
+				w.errorf("makemac daemon: %v", v)
 			}
 		},
 	}
 }
 
+func fetchMakeMacStatus() (errs, warns []string) {
+	c := &http.Client{Timeout: 15 * time.Second}
+	res, err := c.Get("http://macstadiumd.golang.org:8713")
+	if err != nil {
+		return []string{fmt.Sprintf("failed to fetch status: %v", err)}, nil
+	}
+	defer res.Body.Close()
+	if res.StatusCode != 200 {
+		return []string{fmt.Sprintf("HTTP status %v", res.Status)}, nil
+	}
+	if res.Header.Get("Content-Type") != "application/json" {
+		return []string{fmt.Sprintf("unexpected content-type %q; want JSON", res.Header.Get("Content-Type"))}, nil
+	}
+	var resj struct {
+		Errors   []string
+		Warnings []string
+	}
+	if err := json.NewDecoder(res.Body).Decode(&resj); err != nil {
+		return []string{fmt.Sprintf("reading status response body: %v", err)}, nil
+	}
+	return resj.Errors, resj.Warnings
+}
+
 func newJoyentSolarisChecker() *healthChecker {
 	return &healthChecker{
 		ID:     "joyent-solaris",
diff --git a/cmd/makemac/README.md b/cmd/makemac/README.md
index a38671d..e436678 100644
--- a/cmd/makemac/README.md
+++ b/cmd/makemac/README.md
@@ -2,7 +2,9 @@
 
 # golang.org/x/build/cmd/makemac
 
-The makemac command starts OS X VMs for the builders.
+The makemac command manages creating & destroying macOS VMs for the
+builders. See the README in x/build/env/darwin/macstadium for some
+more background.
 
 ## Deploying `makemac`
 
@@ -30,3 +32,12 @@
   $ sudo systemctl restart makemac
   $ sudo journalctl -f -u makemac     # watch it
 ```
+
+## Checking that it's running:
+
+```
+$ curl -v http://macstadiumd.golang.org:8713
+```
+
+(Note that URL won't work in a browser due to HSTS requirements on
+ *.golang.org)
diff --git a/cmd/makemac/makemac.go b/cmd/makemac/makemac.go
index 4d52b13..0fe9cd3 100644
--- a/cmd/makemac/makemac.go
+++ b/cmd/makemac/makemac.go
@@ -16,6 +16,7 @@
 package main
 
 import (
+	"bufio"
 	"context"
 	"encoding/json"
 	"errors"
@@ -313,6 +314,9 @@
 	fmt.Fprintf(os.Stderr, "$ govc %v\n", strings.Join(args, " "))
 	out, err := exec.CommandContext(ctx, "govc", args...).CombinedOutput()
 	if err != nil {
+		if isFileSystemReadOnly() {
+			out = append(out, "; filesystem is read-only"...)
+		}
 		return fmt.Errorf("govc %s ...: %v, %s", args[0], err, out)
 	}
 	return nil
@@ -372,7 +376,7 @@
 
 	var hosts elementList
 	if err := govcJSONDecode(ctx, &hosts, "ls", "-json", "/MacStadium-ATL/host/MacMini_Cluster"); err != nil {
-		return nil, fmt.Errorf("Reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
+		return nil, fmt.Errorf("getState: reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
 	}
 	for _, h := range hosts.Elements {
 		if h.Object.Self.Type == "HostSystem" {
@@ -384,7 +388,7 @@
 
 	var vms elementList
 	if err := govcJSONDecode(ctx, &vms, "ls", "-json", "/MacStadium-ATL/vm"); err != nil {
-		return nil, fmt.Errorf("Reading /MacStadium-ATL/vm: %v", err)
+		return nil, fmt.Errorf("getState: reading /MacStadium-ATL/vm: %v", err)
 	}
 	for _, h := range vms.Elements {
 		if h.Object.Self.Type != "VirtualMachine" {
@@ -528,6 +532,8 @@
 	lastCheck time.Time
 	lastLog   string
 	lastState *State
+	warnings  []string
+	errors    []string
 }
 
 func init() {
@@ -581,14 +587,20 @@
 
 	st, err := getState(ctx)
 	if err != nil {
-		log.Printf("getting VMWare state: %v", err)
+		status.Lock()
+		status.errors = []string{err.Error()}
+		status.Unlock()
+		log.Print(err)
 		return
 	}
+	var warnings, errors []string
 	defer func() {
 		// Set status.lastState once we're now longer using it.
 		if st != nil {
 			status.Lock()
 			status.lastState = st
+			status.warnings = warnings
+			status.errors = errors
 			status.Unlock()
 		}
 	}()
@@ -597,12 +609,14 @@
 	req = req.WithContext(ctx)
 	res, err := http.DefaultClient.Do(req)
 	if err != nil {
+		errors = append(errors, fmt.Sprintf("getting /status/reverse.json from coordinator: %v", err))
 		log.Printf("getting reverse status: %v", err)
 		return
 	}
 	defer res.Body.Close()
 	var rstat types.ReverseBuilderStatus
 	if err := json.NewDecoder(res.Body).Decode(&rstat); err != nil {
+		errors = append(errors, fmt.Sprintf("decoding /status/reverse.json from coordinator: %v", err))
 		log.Printf("decoding reverse.json: %v", err)
 		return
 	}
@@ -618,6 +632,7 @@
 	}
 
 	// Destroy running VMs that appear to be dead and not connected to the coordinator.
+	// TODO: do these all concurrently.
 	dirty := false
 	for name, vi := range st.VMInfo {
 		if vi.BootTime.After(time.Now().Add(-3 * time.Minute)) {
@@ -632,18 +647,22 @@
 			// Look it up by its slot name instead.
 			rh = revHost[vi.SlotName]
 		}
-		if rh == nil { //  || (!rh.Busy && rh.ConnectedSec > 50 && rh.HostType == "host-darwin-10_12") {
+		if rh == nil {
 			log.Printf("Destroying VM %q unknown to coordinator...", name)
 			err := govc(ctx, "vm.destroy", name)
 			log.Printf("vm.destroy(%q) = %v", name, err)
 			dirty = true
+			if err != nil {
+				warnings = append(warnings, fmt.Sprintf("vm.destroy(%q) = %v", name, err))
+			}
 		}
 	}
 	for {
 		if dirty {
 			st, err = getState(ctx)
 			if err != nil {
-				log.Printf("getState: %v", err)
+				errors = append(errors, err.Error())
+				log.Print(err)
 				return
 			}
 		}
@@ -661,7 +680,9 @@
 		dedupLogf("Have capacity for %d more Mac VMs; creating requested 10.%d ...", canCreate, ver)
 		slotName, err := st.CreateMac(ctx, ver)
 		if err != nil {
-			log.Printf("Error creating 10.%d: %v", ver, err)
+			errStr := fmt.Sprintf("Error creating 10.%d: %v", ver, err)
+			errors = append(errors, errStr)
+			log.Print(errStr)
 			return
 		}
 		log.Printf("Created 10.%d VM on %q", ver, slotName)
@@ -715,10 +736,14 @@
 		LastCheck string
 		LastLog   string
 		LastState *State
+		Warnings  []string
+		Errors    []string
 	}{
 		LastCheck: status.lastCheck.UTC().Format(time.RFC3339),
 		LastLog:   status.lastLog,
 		LastState: status.lastState,
+		Warnings:  status.warnings,
+		Errors:    status.errors,
 	}
 	j, _ := json.MarshalIndent(res, "", "\t")
 	w.Write(j)
@@ -823,3 +848,22 @@
 	}
 	h.h.ServeHTTP(w, r)
 }
+
+func isFileSystemReadOnly() bool {
+	f, err := os.Open("/proc/mounts")
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+	// Look for line:
+	//    /dev/sda1 / ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
+	bs := bufio.NewScanner(f)
+	for bs.Scan() {
+		f := strings.Fields(bs.Text())
+		mountPoint, state := f[1], f[3]
+		if mountPoint == "/" {
+			return strings.HasPrefix(state, "ro,")
+		}
+	}
+	return false
+}