cmd/coordinator/status.go - build - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build go1.16 && (linux || darwin)
 // +build go1.16
 // +build linux darwin

 package main

 import (
 	"bufio"
 	"bytes"
 	"context"
 	_ "embed"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"html"
 	"html/template"
 	"io"
 	"log"
 	"net/http"
 	"os"
 	"os/exec"
 	"regexp"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"

 	"github.com/google/go-github/github"
 	"go.opencensus.io/stats"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/coordinator/remote"
 	"golang.org/x/build/internal/coordinator/schedule"
 	"golang.org/x/build/internal/secret"
 	"golang.org/x/build/kubernetes"
 	"golang.org/x/build/kubernetes/api"
 	"golang.org/x/oauth2"
 	"google.golang.org/grpc"
 )

 // status
 type statusLevel int

 const (
 	// levelInfo is an informational text that's not an error,
 	// such as "coordinator just started recently, waiting to
 	// start health check"
 	levelInfo statusLevel = iota
 	// levelWarn is a non-critical error, such as "missing 1 of 50
 	// of ARM machines"
 	levelWarn
 	// levelError is something that should be fixed sooner, such
 	// as "all Macs are gone".
 	levelError
 )

 func (l statusLevel) String() string {
 	switch l {
 	case levelInfo:
 		return "Info"
 	case levelWarn:
 		return "Warn"
 	case levelError:
 		return "Error"
 	}
 	return ""
 }

 type levelText struct {
 	Level statusLevel
 	Text  string
 }

 func (lt levelText) AsHTML() template.HTML {
 	switch lt.Level {
 	case levelInfo:
 		return template.HTML(html.EscapeString(lt.Text))
 	case levelWarn:
 		return template.HTML(fmt.Sprintf("<span style='color: orange'>%s</span>", html.EscapeString(lt.Text)))
 	case levelError:
 		return template.HTML(fmt.Sprintf("<span style='color: red'><b>%s</b></span>", html.EscapeString(lt.Text)))
 	}
 	return ""
 }

 type checkWriter struct {
 	Out []levelText
 }

 func (w *checkWriter) error(s string)                       { w.Out = append(w.Out, levelText{levelError, s}) }
 func (w *checkWriter) errorf(a string, args ...interface{}) { w.error(fmt.Sprintf(a, args...)) }
 func (w *checkWriter) info(s string)                        { w.Out = append(w.Out, levelText{levelInfo, s}) }
 func (w *checkWriter) infof(a string, args ...interface{})  { w.info(fmt.Sprintf(a, args...)) }
 func (w *checkWriter) warn(s string)                        { w.Out = append(w.Out, levelText{levelWarn, s}) }
 func (w *checkWriter) warnf(a string, args ...interface{})  { w.warn(fmt.Sprintf(a, args...)) }
 func (w *checkWriter) hasErrors() bool {
 	for _, v := range w.Out {
 		if v.Level == levelError {
 			return true
 		}
 	}
 	return false
 }

 type healthChecker struct {
 	ID     string
 	Title  string
 	DocURL string

 	// Check writes the health check status to a checkWriter.
 	//
 	// It's called when rendering the HTML page, so expensive
 	// operations (network calls, etc.) should be done in a
 	// separate goroutine and Check should report their results.
 	Check func(*checkWriter)
 }

 func (hc *healthChecker) DoCheck() *checkWriter {
 	cw := new(checkWriter)
 	hc.Check(cw)
 	return cw
 }

 var (
 	healthCheckers    []*healthChecker
 	healthCheckerByID = map[string]*healthChecker{}
 )

 func addHealthChecker(mux *http.ServeMux, hc *healthChecker) {
 	if _, dup := healthCheckerByID[hc.ID]; dup {
 		panic("duplicate health checker ID " + hc.ID)
 	}
 	healthCheckers = append(healthCheckers, hc)
 	healthCheckerByID[hc.ID] = hc
 	mux.Handle("/status/"+hc.ID, healthCheckerHandler(hc))
 }

 // basePinErr is the status of the start-up time basepin disk creation
 // in gce.go. It's of type string; no value means no result yet,
 // empty string means success, and non-empty means an error.
 var basePinErr atomic.Value

 func addHealthCheckers(ctx context.Context, mux *http.ServeMux, sc *secret.Client) {
 	addHealthChecker(mux, newMacHealthChecker())
 	addHealthChecker(mux, newMacOSARM64Checker())
 	addHealthChecker(mux, newOSUPPC64Checker())
 	addHealthChecker(mux, newOSUPPC64leChecker())
 	addHealthChecker(mux, newOSUPPC64lePower9Checker())
 	addHealthChecker(mux, newBasepinChecker())
 	addHealthChecker(mux, newGitMirrorChecker())
 	addHealthChecker(mux, newGitHubAPIChecker(ctx, sc))
 }

 func newBasepinChecker() *healthChecker {
 	return &healthChecker{
 		ID:     "basepin",
 		Title:  "VM snapshots",
 		DocURL: "https://golang.org/issue/21305",
 		Check: func(w *checkWriter) {
 			v := basePinErr.Load()
 			if v == nil {
 				w.warnf("still running")
 				return
 			}
 			if v == "" {
 				return
 			}
 			w.error(v.(string))
 		},
 	}
 }

 // gitMirrorStatus is the latest known status of the gitmirror service.
 var gitMirrorStatus = struct {
 	sync.Mutex
 	Errors   []string
 	Warnings []string
 }{Warnings: []string{"still checking"}}

 func monitorGitMirror(kcl *kubernetes.Client) {
 	for {
 		errs, warns := gitMirrorErrors(kcl)
 		gitMirrorStatus.Lock()
 		gitMirrorStatus.Errors, gitMirrorStatus.Warnings = errs, warns
 		gitMirrorStatus.Unlock()
 		time.Sleep(30 * time.Second)
 	}
 }

 // gitMirrorErrors queries the status pages of all
 // running gitmirror instances and reports errors.
 func gitMirrorErrors(kcl *kubernetes.Client) (errs, warns []string) {
 	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 	defer cancel()
 	pods, err := kcl.GetPods(ctx)
 	if err != nil {
 		log.Println("gitMirrorErrors: goKubeClient.GetPods:", err)
 		return []string{"failed to get pods; can't query gitmirror status"}, nil
 	}
 	var runningGitMirror []api.Pod
 	for _, p := range pods {
 		if !strings.HasPrefix(p.Labels["app"], "gitmirror") || p.Status.Phase != "Running" {
 			continue
 		}
 		runningGitMirror = append(runningGitMirror, p)
 	}
 	if len(runningGitMirror) == 0 {
 		return []string{"no running gitmirror instances"}, nil
 	}
 	for _, pod := range runningGitMirror {
 		// The gitmirror -http=:8585 status page URL is hardcoded here.
 		// If the ReplicationController configuration changes (rare), this
 		// health check will begin to fail until it's updated accordingly.
 		instErrs, instWarns := gitMirrorInstanceErrors(ctx, fmt.Sprintf("http://%s:8585/", pod.Status.PodIP))
 		for _, err := range instErrs {
 			errs = append(errs, fmt.Sprintf("instance %s: %s", pod.Name, err))
 		}
 		for _, warn := range instWarns {
 			warns = append(warns, fmt.Sprintf("instance %s: %s", pod.Name, warn))
 		}
 	}
 	return errs, warns
 }

 func gitMirrorInstanceErrors(ctx context.Context, url string) (errs, warns []string) {
 	req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
 	res, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return []string{err.Error()}, nil
 	}
 	defer res.Body.Close()
 	if res.StatusCode != 200 {
 		return []string{res.Status}, nil
 	}
 	// TODO: add a JSON mode to gitmirror so we don't need to parse HTML.
 	// This works for now. We control its output.
 	bs := bufio.NewScanner(res.Body)
 	for bs.Scan() {
 		// Lines look like:
 		//    <html><body><pre><a href='/debug/watcher/arch'>arch</a> - ok
 		// or:
 		//    <a href='/debug/watcher/arch'>arch</a> - ok
 		// (See https://farmer.golang.org/debug/watcher/)
 		line := bs.Text()
 		if strings.HasSuffix(line, " - ok") {
 			continue
 		}
 		m := gitMirrorLineRx.FindStringSubmatch(line)
 		if len(m) != 3 {
 			if strings.Contains(line, "</html>") {
 				break
 			}
 			return []string{fmt.Sprintf("error parsing line %q", line)}, nil
 		}
 		if strings.HasPrefix(m[2], "ok; ") {
 			// If the status begins with "ok", it can't be that bad.
 			warns = append(warns, fmt.Sprintf("repo %s: %s", m[1], m[2]))
 			continue
 		}
 		errs = append(errs, fmt.Sprintf("repo %s: %s", m[1], m[2]))
 	}
 	if err := bs.Err(); err != nil {
 		errs = append(errs, err.Error())
 	}
 	return errs, warns
 }

 // $1 is repo; $2 is error message
 var gitMirrorLineRx = regexp.MustCompile(`/debug/watcher/([\w-]+).?>.+</a> - (.*)`)

 func newGitMirrorChecker() *healthChecker {
 	return &healthChecker{
 		ID:     "gitmirror",
 		Title:  "Git mirroring",
 		DocURL: "https://github.com/golang/build/tree/master/cmd/gitmirror",
 		Check: func(w *checkWriter) {
 			gitMirrorStatus.Lock()
 			errs, warns := gitMirrorStatus.Errors, gitMirrorStatus.Warnings
 			gitMirrorStatus.Unlock()
 			for _, v := range errs {
 				w.error(v)
 			}
 			for _, v := range warns {
 				w.warn(v)
 			}
 		},
 	}
 }

 func newMacHealthChecker() *healthChecker {
 	var hosts []string
 	const numMacHosts = 8 // physical Mac Pros, not reverse buildlet connections. M1 Macs will be included in separate checks.
 	for i := 1; i <= numMacHosts; i++ {
 		for _, suf := range []string{"a", "b"} {
 			name := fmt.Sprintf("macstadium_host%02d%s", i, suf)
 			hosts = append(hosts, name)
 		}
 	}
 	checkHosts := reverseHostChecker(hosts)

 	// And check that the makemac daemon is listening.
 	var makeMac struct {
 		sync.Mutex
 		lastCheck  time.Time // currently unused
 		lastErrors []string
 		lastWarns  []string
 	}
 	setMakeMacStatus := func(errs, warns []string) {
 		makeMac.Lock()
 		defer makeMac.Unlock()
 		makeMac.lastCheck = time.Now()
 		makeMac.lastErrors = errs
 		makeMac.lastWarns = warns
 	}
 	go func() {
 		for {
 			errs, warns := fetchMakeMacStatus()
 			setMakeMacStatus(errs, warns)
 			time.Sleep(15 * time.Second)
 		}
 	}()
 	return &healthChecker{
 		ID:     "macs",
 		Title:  "MacStadium Mac VMs",
 		DocURL: "https://github.com/golang/build/tree/master/env/darwin/macstadium",
 		Check: func(w *checkWriter) {
 			// Check hosts.
 			checkHosts(w)
 			// Check makemac daemon.
 			makeMac.Lock()
 			defer makeMac.Unlock()
 			for _, v := range makeMac.lastWarns {
 				w.warnf("makemac daemon: %v", v)
 			}
 			for _, v := range makeMac.lastErrors {
 				w.errorf("makemac daemon: %v", v)
 			}
 		},
 	}
 }

 func fetchMakeMacStatus() (errs, warns []string) {
 	c := &http.Client{Timeout: 15 * time.Second}
 	res, err := c.Get("http://macstadiumd.golang.org:8713")
 	if err != nil {
 		return []string{fmt.Sprintf("failed to fetch status: %v", err)}, nil
 	}
 	defer res.Body.Close()
 	if res.StatusCode != 200 {
 		return []string{fmt.Sprintf("HTTP status %v", res.Status)}, nil
 	}
 	if res.Header.Get("Content-Type") != "application/json" {
 		return []string{fmt.Sprintf("unexpected content-type %q; want JSON", res.Header.Get("Content-Type"))}, nil
 	}
 	var resj struct {
 		Errors   []string
 		Warnings []string
 	}
 	if err := json.NewDecoder(res.Body).Decode(&resj); err != nil {
 		return []string{fmt.Sprintf("reading status response body: %v", err)}, nil
 	}
 	return resj.Errors, resj.Warnings
 }

 func newMacOSARM64Checker() *healthChecker {
 	var expect int // Number of expected darwin/arm64 reverse builders based on x/build/dashboard.
 	for hostType, hc := range dashboard.Hosts {
 		if !strings.HasPrefix(hostType, "host-darwin-arm64-") || !hc.IsReverse {
 			continue
 		}
 		expect += hc.ExpectNum
 	}
 	var hosts []string
 	for i := 1; i <= expect; i++ {
 		hosts = append(hosts, fmt.Sprintf("fishbowl-%02d.local", i))
 	}
 	return &healthChecker{
 		ID:     "macos-arm64",
 		Title:  "macOS ARM64 (M1 Mac minis)",
 		DocURL: "https://golang.org/issue/39782",
 		Check:  reverseHostChecker(hosts),
 	}
 }

 func expectedHosts(hostType string) int {
 	hc, ok := dashboard.Hosts[hostType]
 	if !ok {
 		panic(fmt.Sprintf("unknown host type %q", hostType))
 	}
 	return hc.ExpectNum
 }

 func newOSUPPC64Checker() *healthChecker {
 	var hosts []string
 	for i := 1; i <= expectedHosts("host-linux-ppc64-osu"); i++ {
 		name := fmt.Sprintf("host-linux-ppc64-osu:ppc64_%02d", i)
 		hosts = append(hosts, name)
 	}
 	return &healthChecker{
 		ID:     "osuppc64",
 		Title:  "OSU linux/ppc64 machines",
 		DocURL: "https://github.com/golang/build/tree/master/env/linux-ppc64/osuosl",
 		Check:  reverseHostChecker(hosts),
 	}
 }

 func newOSUPPC64leChecker() *healthChecker {
 	var hosts []string
 	for i := 1; i <= expectedHosts("host-linux-ppc64le-osu"); i++ {
 		name := fmt.Sprintf("host-linux-ppc64le-osu:power_%02d", i)
 		hosts = append(hosts, name)
 	}
 	return &healthChecker{
 		ID:     "osuppc64le",
 		Title:  "OSU linux/ppc64le POWER8 machines",
 		DocURL: "https://github.com/golang/build/tree/master/env/linux-ppc64le/osuosl",
 		Check:  reverseHostChecker(hosts),
 	}
 }

 func newOSUPPC64lePower9Checker() *healthChecker {
 	var hosts []string
 	for i := 1; i <= expectedHosts("host-linux-ppc64le-power9-osu"); i++ {
 		name := fmt.Sprintf("host-linux-ppc64le-power9-osu:power_%02d", i)
 		hosts = append(hosts, name)
 	}
 	return &healthChecker{
 		ID:     "osuppc64lepower9",
 		Title:  "OSU linux/ppc64le POWER9 machines",
 		DocURL: "https://github.com/golang/build/tree/master/env/linux-ppc64le/osuosl",
 		Check:  reverseHostChecker(hosts),
 	}
 }

 func reverseHostChecker(hosts []string) func(cw *checkWriter) {
 	const recentThreshold = 2 * time.Minute // let VMs be away 2 minutes; assume ~1 minute bootup + slop
 	checkStart := time.Now().Add(recentThreshold)

 	hostSet := map[string]bool{}
 	for _, v := range hosts {
 		hostSet[v] = true
 	}

 	// TODO(amedee): rethink how this is implemented. It has been
 	// modified due to golang.org/issues/36841
 	// instead of a single lock being held while all of the
 	// operations are performed, there is now a lock held
 	// during each BuildletLastSeen call and again when
 	// the buildlet host names are retrieved.
 	return func(cw *checkWriter) {
 		p := pool.ReversePool()

 		now := time.Now()
 		wantGoodSince := now.Add(-recentThreshold)
 		numMissing := 0
 		numGood := 0
 		// Check last good times
 		for _, host := range hosts {
 			lastGood, ok := p.BuildletLastSeen(host)
 			if ok && lastGood.After(wantGoodSince) {
 				numGood++
 				continue
 			}
 			if now.Before(checkStart) {
 				cw.infof("%s not yet connected", host)
 				continue
 			}
 			if ok {
 				cw.warnf("%s missing, not seen for %v", host, time.Now().Sub(lastGood).Round(time.Second))
 			} else {
 				cw.warnf("%s missing, never seen (at least %v)", host, uptime())
 			}
 			numMissing++
 		}
 		if numMissing > 0 {
 			sum := numMissing + numGood
 			percentMissing := float64(numMissing) / float64(sum)
 			msg := fmt.Sprintf("%d machines missing, %.0f%% of capacity", numMissing, percentMissing*100)
 			if percentMissing >= 0.15 {
 				cw.error(msg)
 			} else {
 				cw.warn(msg)
 			}
 		}

 		// And check that we don't have more than 1
 		// connected of any type.
 		count := map[string]int{}
 		for _, hostname := range p.BuildletHostnames() {
 			if hostSet[hostname] {
 				count[hostname]++
 			}
 		}
 		for name, n := range count {
 			if n > 1 {
 				cw.errorf("%q is connected from %v machines", name, n)
 			}
 		}
 	}
 }

 // newGitHubAPIChecker creates a GitHub API health checker
 // that queries the remaining rate limit at regular invervals
 // and reports when the hourly quota has been exceeded.
 //
 // It also records metrics to track remaining rate limit over time.
 func newGitHubAPIChecker(ctx context.Context, sc *secret.Client) *healthChecker {
 	// githubRate is the status of the GitHub API v3 client.
 	// It's of type *github.Rate; no value means no result yet,
 	// nil value means no recent result.
 	var githubRate atomic.Value

 	hc := &healthChecker{
 		ID:     "githubapi",
 		Title:  "GitHub API Rate Limit",
 		DocURL: "https://golang.org/issue/44406",
 		Check: func(w *checkWriter) {
 			rate, ok := githubRate.Load().(*github.Rate)
 			if !ok {
 				w.warn("still checking")
 			} else if rate == nil {
 				w.warn("no recent result")
 			} else if rate.Remaining == 0 {
 				resetIn := "a minute or so"
 				if t := time.Until(rate.Reset.Time); t > time.Minute {
 					resetIn = t.Round(time.Second).String()
 				}
 				w.warnf("hourly GitHub API rate limit exceeded; reset in %s", resetIn)
 			}
 		},
 	}

 	// Start measuring and reporting the remaining GitHub API v3 rate limit.
 	if sc == nil {
 		hc.Check = func(w *checkWriter) {
 			w.info("check disabled; credentials were not provided")
 		}
 		return hc
 	}
 	token, err := sc.Retrieve(ctx, secret.NameMaintnerGitHubToken)
 	if err != nil {
 		log.Printf("newGitHubAPIChecker: sc.Retrieve(_, %q) failed, err = %v\n", secret.NameMaintnerGitHubToken, err)
 		hc.Check = func(w *checkWriter) {
 			// The check is displayed publicly, so don't include details from err.
 			w.error("failed to retrieve API token")
 		}
 		return hc
 	}
 	gh := github.NewClient(oauth2.NewClient(ctx, oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})))
 	go func() {
 		t := time.NewTicker(time.Minute)
 		defer t.Stop()
 		for {
 			// Fetch the current rate limit from the GitHub API.
 			// This endpoint is special in that it doesn't consume rate limit quota itself.
 			var rate *github.Rate
 			rateLimitsCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
 			rl, _, err := gh.RateLimits(rateLimitsCtx)
 			cancel()
 			if rle := (*github.RateLimitError)(nil); errors.As(err, &rle) {
 				rate = &rle.Rate
 			} else if err != nil {
 				log.Println("GitHubAPIChecker: github.RateLimits:", err)
 			} else {
 				rate = rl.GetCore()
 			}

 			// Store the result of fetching, and record the current rate limit, if any.
 			githubRate.Store(rate)
 			if rate != nil {
 				stats.Record(ctx, mGitHubAPIRemaining.M(int64(rate.Remaining)))
 			}

 			select {
 			case <-t.C:
 			case <-ctx.Done():
 				return
 			}
 		}
 	}()

 	return hc
 }

 func healthCheckerHandler(hc *healthChecker) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		cw := new(checkWriter)
 		hc.Check(cw)
 		w.Header().Set("Content-Type", "text/plain; charset=utf-8")
 		if cw.hasErrors() {
 			w.WriteHeader(500)
 		} else {
 			w.WriteHeader(200)
 		}
 		if len(cw.Out) == 0 {
 			io.WriteString(w, "ok\n")
 			return
 		}
 		fmt.Fprintf(w, "# %q status: %s\n", hc.ID, hc.Title)
 		if hc.DocURL != "" {
 			fmt.Fprintf(w, "# Notes: %v\n", hc.DocURL)
 		}
 		for _, v := range cw.Out {
 			fmt.Fprintf(w, "%s: %s\n", v.Level, v.Text)
 		}
 	})
 }

 func uptime() time.Duration { return time.Since(processStartTime).Round(time.Second) }

 // grpcHandlerFunc creates handler which intercepts requests intended for a GRPC server and directs the calls to the server.
 // All other requests are directed toward the passed in handler.
 func grpcHandlerFunc(gs *grpc.Server, h http.HandlerFunc) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		if r.ProtoMajor == 2 && strings.HasPrefix(r.Header.Get("Content-Type"), "application/grpc") {
 			gs.ServeHTTP(w, r)
 			return
 		}
 		h(w, r)
 	}
 }

 func handleStatus(w http.ResponseWriter, r *http.Request) {
 	if r.URL.Path != "/" {
 		http.NotFound(w, r)
 		return
 	}
 	df := diskFree()

 	statusMu.Lock()
 	data := statusData{
 		Total:          len(status),
 		Uptime:         uptime(),
 		Recent:         append([]*buildStatus{}, statusDone...),
 		DiskFree:       df,
 		Version:        Version,
 		NumFD:          fdCount(),
 		NumGoroutine:   runtime.NumGoroutine(),
 		HealthCheckers: healthCheckers,
 	}
 	for _, st := range status {
 		if st.HasBuildlet() {
 			data.ActiveBuilds++
 			data.Active = append(data.Active, st)
 			if st.conf.IsReverse() {
 				data.ActiveReverse++
 			}
 		} else {
 			data.Pending = append(data.Pending, st)
 		}
 	}
 	// TODO: make this prettier.
 	var buf bytes.Buffer
 	for _, key := range tryList {
 		if ts := tries[key]; ts != nil {
 			state := ts.state()
 			fmt.Fprintf(&buf, "Change-ID: %v Commit: %v (<a href='/try?commit=%v'>status</a>)\n",
 				key.ChangeTriple(), key.Commit, key.Commit[:8])
 			fmt.Fprintf(&buf, "   Remain: %d, fails: %v\n", state.remain, state.failed)
 			for _, bs := range ts.builds {
 				fmt.Fprintf(&buf, "  %s: running=%v\n", bs.Name, bs.isRunning())
 			}
 		}
 	}
 	statusMu.Unlock()

 	gce := pool.NewGCEConfiguration()
 	data.GomoteInstances = remoteSessionStatus()

 	sort.Sort(byAge(data.Active))
 	sort.Sort(byAge(data.Pending))
 	sort.Sort(sort.Reverse(byAge(data.Recent)))
 	if gce.TryDepsErr() != nil {
 		data.TrybotsErr = gce.TryDepsErr().Error()
 	} else {
 		if buf.Len() == 0 {
 			data.Trybots = template.HTML("<i>(none)</i>")
 		} else {
 			data.Trybots = template.HTML("<pre>" + buf.String() + "</pre>")
 		}
 	}

 	buf.Reset()
 	gce.BuildletPool().WriteHTMLStatus(&buf)
 	data.GCEPoolStatus = template.HTML(buf.String())
 	buf.Reset()

 	buf.Reset()
 	pool.EC2BuildetPool().WriteHTMLStatus(&buf)
 	data.EC2PoolStatus = template.HTML(buf.String())
 	buf.Reset()

 	pool.ReversePool().WriteHTMLStatus(&buf)
 	data.ReversePoolStatus = template.HTML(buf.String())

 	data.SchedState = sched.State()

 	buf.Reset()
 	if err := statusTmpl.Execute(&buf, data); err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 	buf.WriteTo(w)
 }

 func fdCount() int {
 	f, err := os.Open("/proc/self/fd")
 	if err != nil {
 		return -1
 	}
 	defer f.Close()
 	n := 0
 	for {
 		names, err := f.Readdirnames(1000)
 		n += len(names)
 		if err == io.EOF {
 			return n
 		}
 		if err != nil {
 			return -1
 		}
 	}
 }

 func friendlyDuration(d time.Duration) string {
 	if d > 10*time.Second {
 		d2 := ((d + 50*time.Millisecond) / (100 * time.Millisecond)) * (100 * time.Millisecond)
 		return d2.String()
 	}
 	if d > time.Second {
 		d2 := ((d + 5*time.Millisecond) / (10 * time.Millisecond)) * (10 * time.Millisecond)
 		return d2.String()
 	}
 	d2 := ((d + 50*time.Microsecond) / (100 * time.Microsecond)) * (100 * time.Microsecond)
 	return d2.String()
 }

 func diskFree() string {
 	out, _ := exec.Command("df", "-h").Output()
 	return string(out)
 }

 // statusData is the data that fills out statusTmpl.
 type statusData struct {
 	Total             int // number of total builds (including those waiting for a buildlet)
 	ActiveBuilds      int // number of running builds (subset of Total with a buildlet)
 	ActiveReverse     int // subset of ActiveBuilds that are reverse buildlets
 	NumFD             int
 	NumGoroutine      int
 	Uptime            time.Duration
 	Active            []*buildStatus // have a buildlet
 	Pending           []*buildStatus // waiting on a buildlet
 	Recent            []*buildStatus
 	TrybotsErr        string
 	Trybots           template.HTML
 	GCEPoolStatus     template.HTML // TODO: embed template
 	EC2PoolStatus     template.HTML // TODO: embed template
 	ReversePoolStatus template.HTML // TODO: embed template
 	RemoteBuildlets   template.HTML
 	GomoteInstances   template.HTML
 	SchedState        schedule.SchedulerState
 	DiskFree          string
 	Version           string
 	HealthCheckers    []*healthChecker
 }

 //go:embed templates/base.html
 var baseTmplStr string

 // baseTmpl defines common templates for reuse in other coordinator templates.
 var baseTmpl = template.Must(template.New("").Parse(baseTmplStr))

 //go:embed templates/status.html
 var statusTmplStr string

 var statusTmpl = template.Must(baseTmpl.New("status").Parse(statusTmplStr))

 //go:embed style.css
 var styleCSS []byte

 func handleStyleCSS(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Cache-Control", "no-cache, private, max-age=0")
 	http.ServeContent(w, r, "style.css", processStartTime, bytes.NewReader(styleCSS))
 }

 // statusSessionPool to be used exclusively in the status file.
 var statusSessionPool *remote.SessionPool

 // setSessionPool sets the session pool for use in the status file.
 func setSessionPool(sp *remote.SessionPool) {
 	statusSessionPool = sp
 }

 // remoteSessionStatus creates the status HTML for the sessions in the session pool.
 func remoteSessionStatus() template.HTML {
 	sessions := statusSessionPool.List()
 	if len(sessions) == 0 {
 		return "<i>(none)</i>"
 	}
 	var buf bytes.Buffer
 	buf.WriteString("<ul>")
 	for _, s := range sessions {
 		fmt.Fprintf(&buf, "<li><b>%s</b>, created %v ago, expires in %v</li>\n",
 			html.EscapeString(s.ID),
 			time.Since(s.Created), time.Until(s.Expires))
 	}
 	buf.WriteString("</ul>")
 	return template.HTML(buf.String())
 }