blob: de8c38da9180b34e4dafab088fac7b4f8210c57c [file] [log] [blame]
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08005// +build build_coordinator
6
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07007// The coordinator runs on GCE and coordinates builds in Docker containers.
David Symonds0936d042014-12-09 15:00:58 +11008package main // import "golang.org/x/tools/dashboard/coordinator"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07009
10import (
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080011 "archive/tar"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070012 "bytes"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080013 "compress/gzip"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070014 "crypto/hmac"
15 "crypto/md5"
16 "encoding/json"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080017 "errors"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070018 "flag"
19 "fmt"
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +110020 "html"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070021 "io"
22 "io/ioutil"
23 "log"
24 "net/http"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080025 "net/url"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070026 "os"
27 "os/exec"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080028 "path"
29 "regexp"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070030 "sort"
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080031 "strconv"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070032 "strings"
33 "sync"
34 "time"
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080035
36 "golang.org/x/oauth2"
37 "golang.org/x/oauth2/google"
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -080038 "golang.org/x/tools/dashboard/types"
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080039 "google.golang.org/api/compute/v1"
40 "google.golang.org/cloud/compute/metadata"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070041)
42
43var (
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080044 masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
45 maxLocalBuilds = flag.Int("maxbuilds", 6, "Max concurrent Docker builds (VM builds don't count)")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070046
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080047 cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)")
48
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070049 // Debug flags:
50 addTemp = flag.Bool("temp", false, "Append -temp to all builders.")
51 just = flag.String("just", "", "If non-empty, run single build in the foreground. Requires rev.")
52 rev = flag.String("rev", "", "Revision to build.")
53)
54
55var (
56 startTime = time.Now()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080057 builders = map[string]buildConfig{} // populated at startup, keys like "openbsd-amd64-56"
58 watchers = map[string]watchConfig{} // populated at startup, keyed by repo, e.g. "https://go.googlesource.com/go"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070059 donec = make(chan builderRev) // reports of finished builders
60
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -080061 statusMu sync.Mutex // guards both status (ongoing ones) and statusDone (just finished)
62 status = map[builderRev]*buildStatus{}
63 statusDone []*buildStatus // finished recently, capped to maxStatusDone
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070064)
65
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -080066const maxStatusDone = 30
67
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080068// Initialized by initGCE:
69var (
70 projectID string
71 projectZone string
72 computeService *compute.Service
73)
74
75func initGCE() error {
76 if !metadata.OnGCE() {
77 return errors.New("not running on GCE; VM support disabled")
78 }
79 var err error
80 projectID, err = metadata.ProjectID()
81 if err != nil {
82 return fmt.Errorf("failed to get current GCE ProjectID: %v", err)
83 }
84 projectZone, err = metadata.Get("instance/zone")
85 if err != nil || projectZone == "" {
86 return fmt.Errorf("failed to get current GCE zone: %v", err)
87 }
88 // Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
89 projectZone = path.Base(projectZone)
90 if !hasComputeScope() {
91 return errors.New("The coordinator is not running with access to read and write Compute resources. VM support disabled.")
92
93 }
94 ts := google.ComputeTokenSource("default")
95 computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, ts))
96 return nil
97}
98
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070099type imageInfo struct {
100 url string // of tar file
101
102 mu sync.Mutex
103 lastMod string
104}
105
106var images = map[string]*imageInfo{
Chris Manghane126fb6e2014-12-01 09:59:02 -0800107 "go-commit-watcher": {url: "https://storage.googleapis.com/go-builder-data/docker-commit-watcher.tar.gz"},
Chris Manghane2cb776a2014-09-09 10:54:11 -0700108 "gobuilders/linux-x86-base": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.base.tar.gz"},
Luit van Drongelen802a9942014-10-22 09:33:04 +1100109 "gobuilders/linux-x86-clang": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.clang.tar.gz"},
Chris Manghane2cb776a2014-09-09 10:54:11 -0700110 "gobuilders/linux-x86-gccgo": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.gccgo.tar.gz"},
111 "gobuilders/linux-x86-nacl": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.nacl.tar.gz"},
Brad Fitzpatrick398721d2014-09-26 15:02:51 -0700112 "gobuilders/linux-x86-sid": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.sid.tar.gz"},
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700113}
114
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800115// A buildConfig describes how to run either a Docker-based or VM-based build.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700116type buildConfig struct {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800117 name string // "linux-amd64-race"
118
119 // VM-specific settings: (used if vmImage != "")
120 vmImage string // e.g. "openbsd-amd64-56"
121 machineType string // optional GCE instance type
122
123 // Docker-specific settings: (used if vmImage == "")
Chris Manghane28e69482014-10-09 14:04:49 -0700124 image string // Docker image to use to build
125 cmd string // optional -cmd flag (relative to go/src/)
126 env []string // extra environment ("key=value") pairs
127 dashURL string // url of the build dashboard
128 tool string // the tool this configuration is for
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700129}
130
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800131func (c *buildConfig) usesDocker() bool { return c.vmImage == "" }
132func (c *buildConfig) usesVM() bool { return c.vmImage != "" }
133
134func (c *buildConfig) MachineType() string {
135 if v := c.machineType; v != "" {
136 return v
137 }
138 return "n1-highcpu-4"
139}
140
141// recordResult sends build results to the dashboard
142func (b *buildConfig) recordResult(ok bool, hash, buildLog string, runTime time.Duration) error {
143 req := map[string]interface{}{
144 "Builder": b.name,
145 "PackagePath": "",
146 "Hash": hash,
147 "GoHash": "",
148 "OK": ok,
149 "Log": buildLog,
150 "RunTime": runTime,
151 }
152 args := url.Values{"key": {builderKey(b.name)}, "builder": {b.name}}
153 return dash("POST", "result", args, req, nil)
154}
155
Chris Manghane126fb6e2014-12-01 09:59:02 -0800156type watchConfig struct {
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800157 repo string // "https://go.googlesource.com/go"
Chris Manghane126fb6e2014-12-01 09:59:02 -0800158 dash string // "https://build.golang.org/" (must end in /)
159 interval time.Duration // Polling interval
160}
161
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700162func main() {
163 flag.Parse()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800164
165 if err := initGCE(); err != nil {
166 log.Printf("VM support disabled due to error initializing GCE: %v", err)
167 }
168
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700169 addBuilder(buildConfig{name: "linux-386"})
170 addBuilder(buildConfig{name: "linux-386-387", env: []string{"GO386=387"}})
171 addBuilder(buildConfig{name: "linux-amd64"})
Brad Fitzpatrick65528122014-09-04 17:48:56 -0700172 addBuilder(buildConfig{name: "linux-amd64-nocgo", env: []string{"CGO_ENABLED=0", "USER=root"}})
Brad Fitzpatrick0655b612014-09-24 12:54:59 -0700173 addBuilder(buildConfig{name: "linux-amd64-noopt", env: []string{"GO_GCFLAGS=-N -l"}})
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700174 addBuilder(buildConfig{name: "linux-amd64-race"})
175 addBuilder(buildConfig{name: "nacl-386"})
176 addBuilder(buildConfig{name: "nacl-amd64p32"})
Chris Manghane2cb776a2014-09-09 10:54:11 -0700177 addBuilder(buildConfig{
Chris Manghane28e69482014-10-09 14:04:49 -0700178 name: "linux-amd64-gccgo",
179 image: "gobuilders/linux-x86-gccgo",
180 cmd: "make RUNTESTFLAGS=\"--target_board=unix/-m64\" check-go -j16",
181 dashURL: "https://build.golang.org/gccgo",
182 tool: "gccgo",
183 })
184 addBuilder(buildConfig{
185 name: "linux-386-gccgo",
186 image: "gobuilders/linux-x86-gccgo",
187 cmd: "make RUNTESTFLAGS=\"--target_board=unix/-m32\" check-go -j16",
188 dashURL: "https://build.golang.org/gccgo",
189 tool: "gccgo",
Chris Manghane2cb776a2014-09-09 10:54:11 -0700190 })
Brad Fitzpatrick398721d2014-09-26 15:02:51 -0700191 addBuilder(buildConfig{name: "linux-386-sid", image: "gobuilders/linux-x86-sid"})
192 addBuilder(buildConfig{name: "linux-amd64-sid", image: "gobuilders/linux-x86-sid"})
Chris Manghane937fd962014-10-01 11:57:02 -0700193 addBuilder(buildConfig{name: "linux-386-clang", image: "gobuilders/linux-x86-clang"})
194 addBuilder(buildConfig{name: "linux-amd64-clang", image: "gobuilders/linux-x86-clang"})
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700195
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800196 // VMs:
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800197 addBuilder(buildConfig{name: "openbsd-amd64-gce56", vmImage: "openbsd-amd64-56"})
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800198 // addBuilder(buildConfig{name: "plan9-386-gce", vmImage: "plan9-386"})
199
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800200 addWatcher(watchConfig{repo: "https://go.googlesource.com/go", dash: "https://build.golang.org/"})
201 // TODO(adg,cmang): fix gccgo watcher
202 // addWatcher(watchConfig{repo: "https://code.google.com/p/gofrontend", dash: "https://build.golang.org/gccgo/"})
Chris Manghane126fb6e2014-12-01 09:59:02 -0800203
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700204 if (*just != "") != (*rev != "") {
205 log.Fatalf("--just and --rev must be used together")
206 }
207 if *just != "" {
208 conf, ok := builders[*just]
209 if !ok {
210 log.Fatalf("unknown builder %q", *just)
211 }
212 cmd := exec.Command("docker", append([]string{"run"}, conf.dockerRunArgs(*rev)...)...)
213 cmd.Stdout = os.Stdout
214 cmd.Stderr = os.Stderr
215 if err := cmd.Run(); err != nil {
216 log.Fatalf("Build failed: %v", err)
217 }
218 return
219 }
220
221 http.HandleFunc("/", handleStatus)
222 http.HandleFunc("/logs", handleLogs)
223 go http.ListenAndServe(":80", nil)
224
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +1100225 go cleanUpOldContainers()
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -0800226 go cleanUpOldVMs()
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +1100227
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800228 stopWatchers() // clean up before we start new ones
Chris Manghane126fb6e2014-12-01 09:59:02 -0800229 for _, watcher := range watchers {
230 if err := startWatching(watchers[watcher.repo]); err != nil {
231 log.Printf("Error starting watcher for %s: %v", watcher.repo, err)
232 }
233 }
234
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700235 workc := make(chan builderRev)
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800236 go findWorkLoop(workc)
237 // TODO(cmang): gccgo will need its own findWorkLoop
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700238
239 ticker := time.NewTicker(1 * time.Minute)
240 for {
241 select {
242 case work := <-workc:
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800243 log.Printf("workc received %+v; len(status) = %v, maxLocalBuilds = %v; cur = %p", work, len(status), *maxLocalBuilds, status[work])
244 if mayBuildRev(work) {
245 conf := builders[work.name]
246 if st, err := startBuilding(conf, work.rev); err == nil {
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700247 setStatus(work, st)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700248 } else {
249 log.Printf("Error starting to build %v: %v", work, err)
250 }
251 }
252 case done := <-donec:
253 log.Printf("%v done", done)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800254 markDone(done)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700255 case <-ticker.C:
256 if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
257 // TODO: halt the whole machine to kill the VM or something
258 }
259 }
260 }
261}
262
263func numCurrentBuilds() int {
264 statusMu.Lock()
265 defer statusMu.Unlock()
266 return len(status)
267}
268
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800269func isBuilding(work builderRev) bool {
270 statusMu.Lock()
271 defer statusMu.Unlock()
272 _, building := status[work]
273 return building
274}
275
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800276// mayBuildRev reports whether the build type & revision should be started.
277// It returns true if it's not already building, and there is capacity.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700278func mayBuildRev(work builderRev) bool {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800279 conf := builders[work.name]
280
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700281 statusMu.Lock()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800282 _, building := status[work]
283 statusMu.Unlock()
284
285 if building {
286 return false
287 }
288 if conf.usesVM() {
289 // These don't count towards *maxLocalBuilds.
290 return true
291 }
292 numDocker, err := numDockerBuilds()
293 if err != nil {
294 log.Printf("not starting %v due to docker ps failure: %v", work, err)
295 return false
296 }
297 return numDocker < *maxLocalBuilds
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700298}
299
300func setStatus(work builderRev, st *buildStatus) {
301 statusMu.Lock()
302 defer statusMu.Unlock()
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800303 status[work] = st
304}
305
306func markDone(work builderRev) {
307 statusMu.Lock()
308 defer statusMu.Unlock()
309 st, ok := status[work]
310 if !ok {
311 return
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700312 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800313 delete(status, work)
314 if len(statusDone) == maxStatusDone {
315 copy(statusDone, statusDone[1:])
316 statusDone = statusDone[:len(statusDone)-1]
317 }
318 statusDone = append(statusDone, st)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700319}
320
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800321// statusPtrStr disambiguates which status to return if there are
322// multiple in the history (e.g. recent failures where the build
323// didn't finish for reasons outside of all.bash failing)
324func getStatus(work builderRev, statusPtrStr string) *buildStatus {
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700325 statusMu.Lock()
326 defer statusMu.Unlock()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800327 match := func(st *buildStatus) bool {
328 return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr
329 }
330 if st, ok := status[work]; ok && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800331 return st
332 }
333 for _, st := range statusDone {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800334 if st.builderRev == work && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800335 return st
336 }
337 }
338 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700339}
340
341type byAge []*buildStatus
342
343func (s byAge) Len() int { return len(s) }
344func (s byAge) Less(i, j int) bool { return s[i].start.Before(s[j].start) }
345func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
346
347func handleStatus(w http.ResponseWriter, r *http.Request) {
348 var active []*buildStatus
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800349 var recent []*buildStatus
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700350 statusMu.Lock()
351 for _, st := range status {
352 active = append(active, st)
353 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800354 recent = append(recent, statusDone...)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800355 numTotal := len(status)
356 numDocker, err := numDockerBuilds()
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700357 statusMu.Unlock()
358
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700359 sort.Sort(byAge(active))
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800360 sort.Sort(sort.Reverse(byAge(recent)))
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800361
362 io.WriteString(w, "<html><body><h1>Go build coordinator</h1>")
363
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800364 if err != nil {
365 fmt.Fprintf(w, "<h2>Error</h2>Error fetching Docker build count: <i>%s</i>\n", html.EscapeString(err.Error()))
366 }
367
368 fmt.Fprintf(w, "<h2>running</h2><p>%d total builds active (Docker: %d/%d; VMs: %d/∞):",
369 numTotal, numDocker, *maxLocalBuilds, numTotal-numDocker)
370
371 io.WriteString(w, "<pre>")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700372 for _, st := range active {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800373 io.WriteString(w, st.htmlStatusLine())
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700374 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800375 io.WriteString(w, "</pre>")
376
377 io.WriteString(w, "<h2>recently completed</h2><pre>")
378 for _, st := range recent {
379 io.WriteString(w, st.htmlStatusLine())
380 }
381 io.WriteString(w, "</pre>")
382
383 fmt.Fprintf(w, "<h2>disk space</h2><pre>%s</pre></body></html>", html.EscapeString(diskFree()))
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +1100384}
385
386func diskFree() string {
387 out, _ := exec.Command("df", "-h").Output()
388 return string(out)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700389}
390
391func handleLogs(w http.ResponseWriter, r *http.Request) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800392 st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}, r.FormValue("st"))
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700393 if st == nil {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800394 http.NotFound(w, r)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700395 return
396 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700397 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800398 io.WriteString(w, st.logs())
399 // TODO: if st is still building, stream them to the user with
400 // http.Flusher.Flush and CloseNotifier and registering interest
401 // of new writes with the buildStatus. Will require moving the
402 // BUILDERKEY scrubbing into the Write method.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700403}
404
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800405// findWorkLoop polls http://build.golang.org/?mode=json looking for new work
406// for the main dashboard. It does not support gccgo.
407// TODO(bradfitz): it also currently does not support subrepos.
408func findWorkLoop(work chan<- builderRev) {
409 ticker := time.NewTicker(15 * time.Second)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700410 for {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800411 if err := findWork(work); err != nil {
412 log.Printf("failed to find new work: %v", err)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700413 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800414 <-ticker.C
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700415 }
416}
417
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800418func findWork(work chan<- builderRev) error {
419 var bs types.BuildStatus
420 res, err := http.Get("https://build.golang.org/?mode=json")
421 if err != nil {
422 return err
423 }
424 defer res.Body.Close()
425 if err := json.NewDecoder(res.Body).Decode(&bs); err != nil {
426 return err
427 }
428 if res.StatusCode != 200 {
429 return fmt.Errorf("unexpected http status %v", res.Status)
430 }
431
432 knownToDashboard := map[string]bool{} // keys are builder
433 for _, b := range bs.Builders {
434 knownToDashboard[b] = true
435 }
436
437 var goRevisions []string
438 for _, br := range bs.Revisions {
439 if br.Repo == "go" {
440 goRevisions = append(goRevisions, br.Revision)
441 } else {
442 // TODO(bradfitz): support these: golang.org/issue/9506
443 continue
444 }
445 if len(br.Results) != len(bs.Builders) {
446 return errors.New("bogus JSON response from dashboard: results is too long.")
447 }
448 for i, res := range br.Results {
449 if res != "" {
450 // It's either "ok" or a failure URL.
451 continue
452 }
453 builder := bs.Builders[i]
454 if _, ok := builders[builder]; !ok {
455 // Not managed by the coordinator.
456 continue
457 }
458 br := builderRev{bs.Builders[i], br.Revision}
459 if !isBuilding(br) {
460 work <- br
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700461 }
462 }
463 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800464
465 // And to bootstrap new builders, see if we have any builders
466 // that the dashboard doesn't know about.
467 for b := range builders {
468 if knownToDashboard[b] {
469 continue
470 }
471 for _, rev := range goRevisions {
472 br := builderRev{b, rev}
473 if !isBuilding(br) {
474 work <- br
475 }
476 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700477 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800478 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700479}
480
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800481// builderRev is a build configuration type and a revision.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700482type builderRev struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800483 name string // e.g. "linux-amd64-race"
484 rev string // lowercase hex git hash
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700485}
486
487// returns the part after "docker run"
488func (conf buildConfig) dockerRunArgs(rev string) (args []string) {
489 if key := builderKey(conf.name); key != "" {
490 tmpKey := "/tmp/" + conf.name + ".buildkey"
491 if _, err := os.Stat(tmpKey); err != nil {
492 if err := ioutil.WriteFile(tmpKey, []byte(key), 0600); err != nil {
493 log.Fatal(err)
494 }
495 }
Andrew Gerrand275f5002014-11-10 13:22:35 +1100496 // Images may look for .gobuildkey in / or /root, so provide both.
497 // TODO(adg): fix images that look in the wrong place.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700498 args = append(args, "-v", tmpKey+":/.gobuildkey")
Andrew Gerrand275f5002014-11-10 13:22:35 +1100499 args = append(args, "-v", tmpKey+":/root/.gobuildkey")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700500 }
501 for _, pair := range conf.env {
502 args = append(args, "-e", pair)
503 }
504 args = append(args,
505 conf.image,
506 "/usr/local/bin/builder",
507 "-rev="+rev,
Chris Manghane2cb776a2014-09-09 10:54:11 -0700508 "-dashboard="+conf.dashURL,
509 "-tool="+conf.tool,
Brad Fitzpatrick48eaaf62014-09-26 12:21:08 -0700510 "-buildroot=/",
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700511 "-v",
512 )
513 if conf.cmd != "" {
514 args = append(args, "-cmd", conf.cmd)
515 }
516 args = append(args, conf.name)
517 return
518}
519
520func addBuilder(c buildConfig) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800521 if c.tool == "gccgo" {
522 // TODO(cmang,bradfitz,adg): fix gccgo
523 return
524 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700525 if c.name == "" {
526 panic("empty name")
527 }
528 if *addTemp {
529 c.name += "-temp"
530 }
531 if _, dup := builders[c.name]; dup {
532 panic("dup name")
533 }
Chris Manghane2cb776a2014-09-09 10:54:11 -0700534 if c.dashURL == "" {
535 c.dashURL = "https://build.golang.org"
536 }
537 if c.tool == "" {
538 c.tool = "go"
539 }
540
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700541 if strings.HasPrefix(c.name, "nacl-") {
542 if c.image == "" {
543 c.image = "gobuilders/linux-x86-nacl"
544 }
545 if c.cmd == "" {
546 c.cmd = "/usr/local/bin/build-command.pl"
547 }
548 }
549 if strings.HasPrefix(c.name, "linux-") && c.image == "" {
550 c.image = "gobuilders/linux-x86-base"
551 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800552 if c.image == "" && c.vmImage == "" {
553 panic("empty image and vmImage")
554 }
555 if c.image != "" && c.vmImage != "" {
556 panic("can't specify both image and vmImage")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700557 }
558 builders[c.name] = c
559}
560
Chris Manghane126fb6e2014-12-01 09:59:02 -0800561// returns the part after "docker run"
562func (conf watchConfig) dockerRunArgs() (args []string) {
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800563 log.Printf("Running watcher with master key %q", masterKey())
564 if key := masterKey(); len(key) > 0 {
Chris Manghane126fb6e2014-12-01 09:59:02 -0800565 tmpKey := "/tmp/watcher.buildkey"
566 if _, err := os.Stat(tmpKey); err != nil {
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800567 if err := ioutil.WriteFile(tmpKey, key, 0600); err != nil {
Chris Manghane126fb6e2014-12-01 09:59:02 -0800568 log.Fatal(err)
569 }
570 }
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800571 // Images may look for .gobuildkey in / or /root, so provide both.
572 // TODO(adg): fix images that look in the wrong place.
Chris Manghane126fb6e2014-12-01 09:59:02 -0800573 args = append(args, "-v", tmpKey+":/.gobuildkey")
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800574 args = append(args, "-v", tmpKey+":/root/.gobuildkey")
Chris Manghane126fb6e2014-12-01 09:59:02 -0800575 }
576 args = append(args,
577 "go-commit-watcher",
578 "/usr/local/bin/watcher",
579 "-repo="+conf.repo,
580 "-dash="+conf.dash,
581 "-poll="+conf.interval.String(),
582 )
583 return
584}
585
586func addWatcher(c watchConfig) {
587 if c.repo == "" {
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -0800588 c.repo = "https://go.googlesource.com/go"
Chris Manghane126fb6e2014-12-01 09:59:02 -0800589 }
590 if c.dash == "" {
591 c.dash = "https://build.golang.org/"
592 }
593 if c.interval == 0 {
594 c.interval = 10 * time.Second
595 }
596 watchers[c.repo] = c
597}
598
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700599func condUpdateImage(img string) error {
600 ii := images[img]
601 if ii == nil {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800602 return fmt.Errorf("image %q doesn't exist", img)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700603 }
604 ii.mu.Lock()
605 defer ii.mu.Unlock()
606 res, err := http.Head(ii.url)
607 if err != nil {
608 return fmt.Errorf("Error checking %s: %v", ii.url, err)
609 }
610 if res.StatusCode != 200 {
611 return fmt.Errorf("Error checking %s: %v", ii.url, res.Status)
612 }
613 if res.Header.Get("Last-Modified") == ii.lastMod {
614 return nil
615 }
616
617 res, err = http.Get(ii.url)
618 if err != nil || res.StatusCode != 200 {
619 return fmt.Errorf("Get after Head failed for %s: %v, %v", ii.url, err, res)
620 }
621 defer res.Body.Close()
622
623 log.Printf("Running: docker load of %s\n", ii.url)
624 cmd := exec.Command("docker", "load")
625 cmd.Stdin = res.Body
626
627 var out bytes.Buffer
628 cmd.Stdout = &out
629 cmd.Stderr = &out
630
631 if cmd.Run(); err != nil {
632 log.Printf("Failed to pull latest %s from %s and pipe into docker load: %v, %s", img, ii.url, err, out.Bytes())
633 return err
634 }
635 ii.lastMod = res.Header.Get("Last-Modified")
636 return nil
637}
638
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800639// numDockerBuilds finds the number of go builder instances currently running.
640func numDockerBuilds() (n int, err error) {
641 out, err := exec.Command("docker", "ps").Output()
642 if err != nil {
643 return 0, err
644 }
645 for _, line := range strings.Split(string(out), "\n") {
646 if strings.Contains(line, "gobuilders/") {
647 n++
Chris Manghane126fb6e2014-12-01 09:59:02 -0800648 }
649 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800650 return n, nil
Chris Manghane126fb6e2014-12-01 09:59:02 -0800651}
652
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700653func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800654 if conf.usesVM() {
655 return startBuildingInVM(conf, rev)
656 } else {
657 return startBuildingInDocker(conf, rev)
658 }
659}
660
661func startBuildingInDocker(conf buildConfig, rev string) (*buildStatus, error) {
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700662 if err := condUpdateImage(conf.image); err != nil {
663 log.Printf("Failed to setup container for %v %v: %v", conf.name, rev, err)
664 return nil, err
665 }
666
667 cmd := exec.Command("docker", append([]string{"run", "-d"}, conf.dockerRunArgs(rev)...)...)
668 all, err := cmd.CombinedOutput()
669 log.Printf("Docker run for %v %v = err:%v, output:%s", conf.name, rev, err, all)
670 if err != nil {
671 return nil, err
672 }
673 container := strings.TrimSpace(string(all))
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800674 brev := builderRev{
675 name: conf.name,
676 rev: rev,
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800677 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800678 st := &buildStatus{
679 builderRev: brev,
680 container: container,
681 start: time.Now(),
682 }
683 log.Printf("%v now building in Docker container %v", brev, st.container)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800684 go func() {
685 all, err := exec.Command("docker", "wait", container).CombinedOutput()
686 output := strings.TrimSpace(string(all))
687 var ok bool
688 if err == nil {
689 exit, err := strconv.Atoi(output)
690 ok = (err == nil && exit == 0)
691 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800692 st.setDone(ok)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800693 log.Printf("docker wait %s/%s: %v, %s", container, rev, err, output)
694 donec <- builderRev{conf.name, rev}
695 exec.Command("docker", "rm", container).Run()
696 }()
697 go func() {
698 cmd := exec.Command("docker", "logs", "-f", container)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800699 cmd.Stdout = st
700 cmd.Stderr = st
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800701 if err := cmd.Run(); err != nil {
702 // The docker logs subcommand always returns
703 // success, even if the underlying process
704 // fails.
705 log.Printf("failed to follow docker logs of %s: %v", container, err)
706 }
707 }()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800708 return st, nil
709}
710
711var osArchRx = regexp.MustCompile(`^(\w+-\w+)`)
712
713// startBuildingInVM starts a VM on GCE running the buildlet binary to build rev.
714func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
715 brev := builderRev{
716 name: conf.name,
717 rev: rev,
718 }
719 st := &buildStatus{
720 builderRev: brev,
721 start: time.Now(),
722 }
723
724 // name is the project-wide unique name of the GCE instance. It can't be longer
725 // than 61 bytes, so we only use the first 8 bytes of the rev.
726 name := "buildlet-" + conf.name + "-" + rev[:8]
727
728 // buildletURL is the URL of the buildlet binary which the VMs
729 // are configured to download at boot and run. This lets us
730 // update the buildlet more easily than rebuilding the whole
731 // VM image. We put this URL in a well-known GCE metadata attribute.
732 // The value will be of the form:
733 // http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH
734 m := osArchRx.FindStringSubmatch(conf.name)
735 if m == nil {
736 return nil, fmt.Errorf("invalid builder name %q", conf.name)
737 }
738 buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m[1]
739
740 prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
741 machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf.MachineType()
742
743 instance := &compute.Instance{
744 Name: name,
745 Description: fmt.Sprintf("Go Builder building %s %s", conf.name, rev),
746 MachineType: machType,
747 Disks: []*compute.AttachedDisk{
748 {
749 AutoDelete: true,
750 Boot: true,
751 Type: "PERSISTENT",
752 InitializeParams: &compute.AttachedDiskInitializeParams{
753 DiskName: name,
754 SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.vmImage,
755 DiskType: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd",
756 },
757 },
758 },
759 Tags: &compute.Tags{
760 // Warning: do NOT list "http-server" or "allow-ssh" (our
761 // project's custom tag to allow ssh access) here; the
762 // buildlet provides full remote code execution.
763 Items: []string{},
764 },
765 Metadata: &compute.Metadata{
766 Items: []*compute.MetadataItems{
767 {
768 Key: "buildlet-binary-url",
769 Value: buildletURL,
770 },
771 // In case the VM gets away from us (generally: if the
772 // coordinator dies while a build is running), then we
773 // set this attribute of when it should be killed so
774 // we can kill it later when the coordinator is
775 // restarted. The cleanUpOldVMs goroutine loop handles
776 // that killing.
777 {
778 Key: "delete-at",
779 Value: fmt.Sprint(time.Now().Add(30 * time.Minute).Unix()),
780 },
781 },
782 },
783 NetworkInterfaces: []*compute.NetworkInterface{
784 &compute.NetworkInterface{
785 AccessConfigs: []*compute.AccessConfig{
786 &compute.AccessConfig{
787 Type: "ONE_TO_ONE_NAT",
788 Name: "External NAT",
789 },
790 },
791 Network: prefix + "/global/networks/default",
792 },
793 },
794 }
795 op, err := computeService.Instances.Insert(projectID, projectZone, instance).Do()
796 if err != nil {
797 return nil, fmt.Errorf("Failed to create instance: %v", err)
798 }
799 st.createOp = op.Name
800 st.instName = name
801 log.Printf("%v now building in VM %v", brev, st.instName)
802 // Start the goroutine to monitor the VM now that it's booting. This might
803 // take minutes for it to come up, and then even more time to do the build.
804 go func() {
805 err := watchVM(st)
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800806 if st.hasEvent("instance_created") {
807 deleteVM(projectZone, st.instName)
808 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800809 st.setDone(err == nil)
810 if err != nil {
811 fmt.Fprintf(st, "\n\nError: %v\n", err)
812 }
813 donec <- builderRev{conf.name, rev}
814 }()
815 return st, nil
816}
817
818// watchVM monitors a VM doing a build.
819func watchVM(st *buildStatus) (err error) {
820 goodRes := func(res *http.Response, err error, what string) bool {
821 if err != nil {
822 err = fmt.Errorf("%s: %v", what, err)
823 return false
824 }
825 if res.StatusCode/100 != 2 {
826 err = fmt.Errorf("%s: %v", what, res.Status)
827 return false
828
829 }
830 return true
831 }
832 st.logEventTime("instance_create_requested")
833 // Wait for instance create operation to succeed.
834OpLoop:
835 for {
836 time.Sleep(2 * time.Second)
837 op, err := computeService.ZoneOperations.Get(projectID, projectZone, st.createOp).Do()
838 if err != nil {
839 return fmt.Errorf("Failed to get op %s: %v", st.createOp, err)
840 }
841 switch op.Status {
842 case "PENDING", "RUNNING":
843 continue
844 case "DONE":
845 if op.Error != nil {
846 for _, operr := range op.Error.Errors {
847 return fmt.Errorf("Error creating instance: %+v", operr)
848 }
849 return errors.New("Failed to start.")
850 }
851 break OpLoop
852 default:
853 log.Fatalf("Unknown status %q: %+v", op.Status, op)
854 }
855 }
856 st.logEventTime("instance_created")
857
858 inst, err := computeService.Instances.Get(projectID, projectZone, st.instName).Do()
859 if err != nil {
860 return fmt.Errorf("Error getting instance %s details after creation: %v", st.instName, err)
861 }
862 st.logEventTime("got_instance_info")
863
864 // Find its internal IP.
865 var ip string
866 for _, iface := range inst.NetworkInterfaces {
867 if strings.HasPrefix(iface.NetworkIP, "10.") {
868 ip = iface.NetworkIP
869 }
870 }
871 if ip == "" {
872 return errors.New("didn't find its internal IP address")
873 }
874
875 // Wait for it to boot and its buildlet to come up on port 80.
876 st.logEventTime("waiting_for_buildlet")
877 buildletURL := "http://" + ip
878 const numTries = 60
879 var alive bool
880 for i := 1; i <= numTries; i++ {
881 res, err := http.Get(buildletURL)
882 if err != nil {
883 time.Sleep(1 * time.Second)
884 continue
885 }
886 res.Body.Close()
887 if res.StatusCode != 200 {
888 return fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, i)
889 }
890 st.logEventTime("buildlet_up")
891 alive = true
892 break
893 }
894 if !alive {
895 return fmt.Errorf("buildlet didn't come up in %d seconds", numTries)
896 }
897
898 // Write the VERSION file.
899 st.logEventTime("start_write_version_tar")
900 verReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", versionTgz(st.rev))
901 if err != nil {
902 return err
903 }
904 verRes, err := http.DefaultClient.Do(verReq)
905 if !goodRes(verRes, err, "writing VERSION tgz") {
906 return
907 }
908
909 // Feed the buildlet a tar file for it to extract.
910 // TODO: cache these.
911 st.logEventTime("start_fetch_gerrit_tgz")
912 tarRes, err := http.Get("https://go.googlesource.com/go/+archive/" + st.rev + ".tar.gz")
913 if !goodRes(tarRes, err, "fetching tarball from Gerrit") {
914 return
915 }
916
917 st.logEventTime("start_write_tar")
918 putReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", tarRes.Body)
919 if err != nil {
920 tarRes.Body.Close()
921 return err
922 }
923 putRes, err := http.DefaultClient.Do(putReq)
924 st.logEventTime("end_write_tar")
925 tarRes.Body.Close()
926 if !goodRes(putRes, err, "writing tarball to buildlet") {
927 return
928 }
929
930 // Run the builder
931 cmd := "all.bash"
932 if strings.HasPrefix(st.name, "windows-") {
933 cmd = "all.bat"
934 } else if strings.HasPrefix(st.name, "plan9-") {
935 cmd = "all.rc"
936 }
937 execStartTime := time.Now()
938 st.logEventTime("start_exec")
939 res, err := http.PostForm(buildletURL+"/exec", url.Values{"cmd": {"src/" + cmd}})
940 if !goodRes(res, err, "running "+cmd) {
941 return
942 }
943 defer res.Body.Close()
944 st.logEventTime("running_exec")
945 // Stream the output:
946 if _, err := io.Copy(st, res.Body); err != nil {
947 return fmt.Errorf("error copying response: %v", err)
948 }
949 st.logEventTime("done")
950 state := res.Trailer.Get("Process-State")
951
952 // Don't record to the dashboard unless we heard the trailer from
953 // the buildlet, otherwise it was probably some unrelated error
954 // (like the VM being killed, or the buildlet crashing due to
955 // e.g. https://golang.org/issue/9309, since we require a tip
956 // build of the buildlet to get Trailers support)
957 if state != "" {
958 conf := builders[st.name]
959 var log string
960 if state != "ok" {
961 log = st.logs()
962 }
963 if err := conf.recordResult(state == "ok", st.rev, log, time.Since(execStartTime)); err != nil {
964 return fmt.Errorf("Status was %q but failed to report it to the dashboard: %v", state, err)
965 }
966 }
967 if state != "ok" {
968
969 return fmt.Errorf("got Trailer process state %q", state)
970 }
971 return nil
972}
973
974type eventAndTime struct {
975 evt string
976 t time.Time
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700977}
978
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800979// buildStatus is the status of a build.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700980type buildStatus struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800981 // Immutable:
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700982 builderRev
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700983 start time.Time
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800984 container string // container ID for docker, else it's a VM
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700985
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800986 // Immutable, used by VM only:
987 createOp string // Instances.Insert operation name
988 instName string
989
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800990 mu sync.Mutex // guards following
991 done time.Time // finished running
992 succeeded bool // set when done
993 output bytes.Buffer // stdout and stderr
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800994 events []eventAndTime
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800995}
996
997func (st *buildStatus) setDone(succeeded bool) {
998 st.mu.Lock()
999 defer st.mu.Unlock()
1000 st.succeeded = succeeded
1001 st.done = time.Now()
1002}
1003
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001004func (st *buildStatus) logEventTime(event string) {
1005 st.mu.Lock()
1006 defer st.mu.Unlock()
1007 st.events = append(st.events, eventAndTime{event, time.Now()})
1008}
1009
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -08001010func (st *buildStatus) hasEvent(event string) bool {
1011 st.mu.Lock()
1012 defer st.mu.Unlock()
1013 for _, e := range st.events {
1014 if e.evt == event {
1015 return true
1016 }
1017 }
1018 return false
1019}
1020
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001021// htmlStatusLine returns the HTML to show within the <pre> block on
1022// the main page's list of active builds.
1023func (st *buildStatus) htmlStatusLine() string {
1024 st.mu.Lock()
1025 defer st.mu.Unlock()
1026
1027 urlPrefix := "https://go-review.googlesource.com/#/q/"
1028 if strings.Contains(st.name, "gccgo") {
1029 urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
1030 }
1031
1032 var buf bytes.Buffer
1033 fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
1034 st.name, urlPrefix, st.rev, st.rev)
1035
1036 if st.done.IsZero() {
1037 buf.WriteString(", running")
1038 } else if st.succeeded {
1039 buf.WriteString(", succeeded")
1040 } else {
1041 buf.WriteString(", failed")
1042 }
1043
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001044 logsURL := fmt.Sprintf("/logs?name=%s&rev=%s&st=%p", st.name, st.rev, st)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001045 if st.container != "" {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001046 fmt.Fprintf(&buf, " in container <a href='%s'>%s</a>", logsURL, st.container)
1047 } else {
1048 fmt.Fprintf(&buf, " in VM <a href='%s'>%s</a>", logsURL, st.instName)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001049 }
1050
1051 t := st.done
1052 if t.IsZero() {
1053 t = st.start
1054 }
1055 fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001056 for i, evt := range st.events {
1057 var elapsed string
1058 if i != 0 {
1059 elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds())
1060 }
1061 msg := evt.evt
1062 if msg == "running_exec" {
1063 msg = fmt.Sprintf("<a href='%s'>%s</a>", logsURL, msg)
1064 }
1065 fmt.Fprintf(&buf, " %7s %v %s\n", elapsed, evt.t.Format(time.RFC3339), msg)
1066 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001067 return buf.String()
1068}
1069
1070func (st *buildStatus) logs() string {
1071 st.mu.Lock()
1072 logs := st.output.String()
1073 st.mu.Unlock()
1074 key := builderKey(st.name)
1075 return strings.Replace(string(logs), key, "BUILDERKEY", -1)
1076}
1077
1078func (st *buildStatus) Write(p []byte) (n int, err error) {
1079 st.mu.Lock()
1080 defer st.mu.Unlock()
1081 const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001082 plen := len(p)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001083 if st.output.Len()+len(p) > maxBufferSize {
1084 p = p[:maxBufferSize-st.output.Len()]
1085 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001086 st.output.Write(p) // bytes.Buffer can't fail
1087 return plen, nil
1088}
1089
1090// Stop any previous go-commit-watcher Docker tasks, so they don't
1091// pile up upon restarts of the coordinator.
1092func stopWatchers() {
1093 out, err := exec.Command("docker", "ps").Output()
1094 if err != nil {
1095 return
1096 }
1097 for _, line := range strings.Split(string(out), "\n") {
1098 if !strings.Contains(line, "go-commit-watcher:") {
1099 continue
1100 }
1101 f := strings.Fields(line)
1102 exec.Command("docker", "rm", "-f", "-v", f[0]).Run()
1103 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001104}
1105
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -08001106func startWatching(conf watchConfig) (err error) {
1107 defer func() {
1108 if err != nil {
1109 restartWatcherSoon(conf)
1110 }
1111 }()
1112 log.Printf("Starting watcher for %v", conf.repo)
Chris Manghane126fb6e2014-12-01 09:59:02 -08001113 if err := condUpdateImage("go-commit-watcher"); err != nil {
1114 log.Printf("Failed to setup container for commit watcher: %v", err)
1115 return err
1116 }
1117
1118 cmd := exec.Command("docker", append([]string{"run", "-d"}, conf.dockerRunArgs()...)...)
1119 all, err := cmd.CombinedOutput()
Brad Fitzpatrick1c064a12014-12-11 18:19:15 -08001120 if err != nil {
1121 log.Printf("Docker run for commit watcher = err:%v, output: %s", err, all)
1122 return err
1123 }
1124 container := strings.TrimSpace(string(all))
1125 // Start a goroutine to wait for the watcher to die.
1126 go func() {
1127 exec.Command("docker", "wait", container).Run()
1128 exec.Command("docker", "rm", "-v", container).Run()
1129 log.Printf("Watcher crashed. Restarting soon.")
1130 restartWatcherSoon(conf)
1131 }()
1132 return nil
1133}
1134
1135func restartWatcherSoon(conf watchConfig) {
1136 time.AfterFunc(30*time.Second, func() {
1137 startWatching(conf)
1138 })
Chris Manghane126fb6e2014-12-01 09:59:02 -08001139}
1140
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001141func builderKey(builder string) string {
1142 master := masterKey()
1143 if len(master) == 0 {
1144 return ""
1145 }
1146 h := hmac.New(md5.New, master)
1147 io.WriteString(h, builder)
1148 return fmt.Sprintf("%x", h.Sum(nil))
1149}
1150
1151func masterKey() []byte {
1152 keyOnce.Do(loadKey)
1153 return masterKeyCache
1154}
1155
1156var (
1157 keyOnce sync.Once
1158 masterKeyCache []byte
1159)
1160
1161func loadKey() {
1162 if *masterKeyFile != "" {
1163 b, err := ioutil.ReadFile(*masterKeyFile)
1164 if err != nil {
1165 log.Fatal(err)
1166 }
1167 masterKeyCache = bytes.TrimSpace(b)
1168 return
1169 }
1170 req, _ := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/project/attributes/builder-master-key", nil)
1171 req.Header.Set("Metadata-Flavor", "Google")
1172 res, err := http.DefaultClient.Do(req)
1173 if err != nil {
1174 log.Fatal("No builder master key available")
1175 }
1176 defer res.Body.Close()
1177 if res.StatusCode != 200 {
1178 log.Fatalf("No builder-master-key project attribute available.")
1179 }
1180 slurp, err := ioutil.ReadAll(res.Body)
1181 if err != nil {
1182 log.Fatal(err)
1183 }
1184 masterKeyCache = bytes.TrimSpace(slurp)
1185}
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +11001186
1187func cleanUpOldContainers() {
1188 for {
1189 for _, cid := range oldContainers() {
1190 log.Printf("Cleaning old container %v", cid)
1191 exec.Command("docker", "rm", "-v", cid).Run()
1192 }
1193 time.Sleep(30 * time.Second)
1194 }
1195}
1196
1197func oldContainers() []string {
1198 out, _ := exec.Command("docker", "ps", "-a", "--filter=status=exited", "--no-trunc", "-q").Output()
1199 return strings.Fields(string(out))
1200}
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001201
1202// cleanUpOldVMs loops forever and periodically enumerates virtual
1203// machines and deletes those which have expired.
1204//
1205// A VM is considered expired if it has a "delete-at" metadata
1206// attribute having a unix timestamp before the current time.
1207//
1208// This is the safety mechanism to delete VMs which stray from the
1209// normal deleting process. VMs are created to run a single build and
1210// should be shut down by a controlling process. Due to various types
1211// of failures, they might get stranded. To prevent them from getting
1212// stranded and wasting resources forever, we instead set the
1213// "delete-at" metadata attribute on them when created to some time
1214// that's well beyond their expected lifetime.
1215func cleanUpOldVMs() {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001216 if computeService == nil {
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001217 return
1218 }
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001219 for {
1220 for _, zone := range strings.Split(*cleanZones, ",") {
1221 zone = strings.TrimSpace(zone)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001222 if err := cleanZoneVMs(zone); err != nil {
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001223 log.Printf("Error cleaning VMs in zone %q: %v", zone, err)
1224 }
1225 }
1226 time.Sleep(time.Minute)
1227 }
1228}
1229
1230// cleanZoneVMs is part of cleanUpOldVMs, operating on a single zone.
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001231func cleanZoneVMs(zone string) error {
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001232 // Fetch the first 500 (default) running instances and clean
1233 // thoes. We expect that we'll be running many fewer than
1234 // that. Even if we have more, eventually the first 500 will
1235 // either end or be cleaned, and then the next call will get a
1236 // partially-different 500.
1237 // TODO(bradfitz): revist this code if we ever start running
1238 // thousands of VMs.
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001239 list, err := computeService.Instances.List(projectID, zone).Do()
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001240 if err != nil {
1241 return fmt.Errorf("listing instances: %v", err)
1242 }
1243 for _, inst := range list.Items {
1244 if inst.Metadata == nil {
1245 // Defensive. Not seen in practice.
1246 continue
1247 }
1248 for _, it := range inst.Metadata.Items {
1249 if it.Key == "delete-at" {
1250 unixDeadline, err := strconv.ParseInt(it.Value, 10, 64)
1251 if err != nil {
1252 log.Printf("invalid delete-at value %q seen; ignoring", it.Value)
1253 }
1254 if err == nil && time.Now().Unix() > unixDeadline {
1255 log.Printf("Deleting expired VM %q in zone %q ...", inst.Name, zone)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001256 deleteVM(zone, inst.Name)
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001257 }
1258 }
1259 }
1260 }
1261 return nil
1262}
1263
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001264func deleteVM(zone, instName string) {
1265 op, err := computeService.Instances.Delete(projectID, zone, instName).Do()
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -08001266 if err != nil {
1267 log.Printf("Failed to delete instance %q in zone %q: %v", instName, zone, err)
1268 return
1269 }
1270 log.Printf("Sent request to delete instance %q in zone %q. Operation ID == %v", instName, zone, op.Id)
1271}
1272
1273func hasComputeScope() bool {
1274 if !metadata.OnGCE() {
1275 return false
1276 }
1277 scopes, err := metadata.Scopes("default")
1278 if err != nil {
1279 log.Printf("failed to query metadata default scopes: %v", err)
1280 return false
1281 }
1282 for _, v := range scopes {
1283 if v == compute.DevstorageFull_controlScope {
1284 return true
1285 }
1286 }
1287 return false
1288}
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001289
1290// dash is copied from the builder binary. It runs the given method and command on the dashboard.
1291//
1292// TODO(bradfitz,adg): unify this somewhere?
1293//
1294// If args is non-nil it is encoded as the URL query string.
1295// If req is non-nil it is JSON-encoded and passed as the body of the HTTP POST.
1296// If resp is non-nil the server's response is decoded into the value pointed
1297// to by resp (resp must be a pointer).
1298func dash(meth, cmd string, args url.Values, req, resp interface{}) error {
1299 const builderVersion = 1 // keep in sync with dashboard/app/build/handler.go
1300 argsCopy := url.Values{"version": {fmt.Sprint(builderVersion)}}
1301 for k, v := range args {
1302 if k == "version" {
1303 panic(`dash: reserved args key: "version"`)
1304 }
1305 argsCopy[k] = v
1306 }
1307 var r *http.Response
1308 var err error
1309 cmd = "https://build.golang.org/" + cmd + "?" + argsCopy.Encode()
1310 switch meth {
1311 case "GET":
1312 if req != nil {
1313 log.Panicf("%s to %s with req", meth, cmd)
1314 }
1315 r, err = http.Get(cmd)
1316 case "POST":
1317 var body io.Reader
1318 if req != nil {
1319 b, err := json.Marshal(req)
1320 if err != nil {
1321 return err
1322 }
1323 body = bytes.NewBuffer(b)
1324 }
1325 r, err = http.Post(cmd, "text/json", body)
1326 default:
1327 log.Panicf("%s: invalid method %q", cmd, meth)
1328 panic("invalid method: " + meth)
1329 }
1330 if err != nil {
1331 return err
1332 }
1333 defer r.Body.Close()
1334 if r.StatusCode != http.StatusOK {
1335 return fmt.Errorf("bad http response: %v", r.Status)
1336 }
1337 body := new(bytes.Buffer)
1338 if _, err := body.ReadFrom(r.Body); err != nil {
1339 return err
1340 }
1341
1342 // Read JSON-encoded Response into provided resp
1343 // and return an error if present.
1344 var result = struct {
1345 Response interface{}
1346 Error string
1347 }{
1348 // Put the provided resp in here as it can be a pointer to
1349 // some value we should unmarshal into.
1350 Response: resp,
1351 }
1352 if err = json.Unmarshal(body.Bytes(), &result); err != nil {
1353 log.Printf("json unmarshal %#q: %s\n", body.Bytes(), err)
1354 return err
1355 }
1356 if result.Error != "" {
1357 return errors.New(result.Error)
1358 }
1359
1360 return nil
1361}
1362
1363func versionTgz(rev string) io.Reader {
1364 var buf bytes.Buffer
1365 zw := gzip.NewWriter(&buf)
1366 tw := tar.NewWriter(zw)
1367
1368 contents := fmt.Sprintf("devel " + rev)
1369 check(tw.WriteHeader(&tar.Header{
1370 Name: "VERSION",
1371 Mode: 0644,
1372 Size: int64(len(contents)),
1373 }))
1374 _, err := io.WriteString(tw, contents)
1375 check(err)
1376 check(tw.Close())
1377 check(zw.Close())
1378 return bytes.NewReader(buf.Bytes())
1379}
1380
1381// check is only for things which should be impossible (not even rare)
1382// to fail.
1383func check(err error) {
1384 if err != nil {
1385 panic("previously assumed to never fail: " + err.Error())
1386 }
1387}