blob: 0aac8868c17346414ad726e7b12f66554372e5b9 [file] [log] [blame]
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The coordinator runs on GCE and coordinates builds in Docker containers.
Andrew Gerrandfa8373a2015-01-21 17:25:37 +11006package main // import "golang.org/x/build/cmd/coordinator"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07007
8import (
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08009 "archive/tar"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070010 "bytes"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080011 "compress/gzip"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080012 "crypto/sha1"
David Crawshaw02e988a2015-03-30 21:54:04 -040013 "crypto/tls"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070014 "encoding/json"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080015 "errors"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070016 "flag"
17 "fmt"
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +110018 "html"
David Crawshawdd0cf9f2015-04-29 17:58:09 -040019 "html/template"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070020 "io"
21 "io/ioutil"
22 "log"
David Crawshaw02e988a2015-03-30 21:54:04 -040023 "net"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070024 "net/http"
David Crawshaw581ddd12015-04-06 08:09:20 -040025 "os"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080026 "path"
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -080027 "runtime"
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -070028 "sort"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070029 "strings"
30 "sync"
31 "time"
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080032
Brad Fitzpatrick20d84832015-01-21 10:03:07 -080033 "camlistore.org/pkg/syncutil"
David Crawshaw66c36dd2015-04-23 10:23:22 -040034
Andrew Gerrandfa8373a2015-01-21 17:25:37 +110035 "golang.org/x/build/buildlet"
36 "golang.org/x/build/dashboard"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080037 "golang.org/x/build/gerrit"
Andrew Gerrandfa8373a2015-01-21 17:25:37 +110038 "golang.org/x/build/types"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080039 "google.golang.org/cloud/storage"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070040)
41
David Crawshawdd0cf9f2015-04-29 17:58:09 -040042var processStartTime = time.Now()
43
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -070044var Version string // set by linker -X
45
46// devPause is a debug option to pause for 5 minutes after the build
47// finishes before destroying buildlets.
48const devPause = false
49
Brad Fitzpatrickdfe82862015-03-01 09:23:57 -080050func init() {
51 // Disabled until we have test sharding. This takes 85+ minutes.
52 // Test sharding is https://github.com/golang/go/issues/10029
53 delete(dashboard.Builders, "linux-arm-qemu")
54}
55
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070056var (
Brad Fitzpatrick3900c162015-02-18 15:01:02 -080057 masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070058
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080059 // TODO(bradfitz): remove this list and just query it from the compute API:
60 // http://godoc.org/google.golang.org/api/compute/v1#RegionsService.Get
61 // and Region.Zones: http://godoc.org/google.golang.org/api/compute/v1#Region
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080062 cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)")
63
David Crawshaw581ddd12015-04-06 08:09:20 -040064 mode = flag.String("mode", "", "valid modes are 'dev', 'prod', or '' for auto-detect")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070065)
66
Andrew Gerranda9469a82015-05-27 14:15:18 -070067func buildLogBucket() string {
68 return devPrefix() + "go-build-log"
69}
70
Andrew Gerrandc53a5772015-05-27 14:29:36 -070071func snapBucket() string {
72 return devPrefix() + "go-build-snap"
73}
74
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080075// LOCK ORDER:
76// statusMu, buildStatus.mu, trySet.mu
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080077
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070078var (
79 startTime = time.Now()
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070080
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080081 statusMu sync.Mutex // guards the following four structures; see LOCK ORDER comment above
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -080082 status = map[builderRev]*buildStatus{}
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080083 statusDone []*buildStatus // finished recently, capped to maxStatusDone
84 tries = map[tryKey]*trySet{} // trybot builds
85 tryList []tryKey
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070086)
87
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080088// tryBuilders must be VMs. The Docker container builds are going away.
89var tryBuilders []dashboard.BuildConfig
90
91func init() {
92 tryList := []string{
Brad Fitzpatrick28de2aa2015-04-29 15:03:16 -070093 "all-compile",
David Crawshaw6c597e32015-05-01 12:45:09 -040094 "darwin-amd64-10_10",
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080095 "linux-386",
96 "linux-amd64",
97 "linux-amd64-race",
98 "freebsd-386-gce101",
99 "freebsd-amd64-gce101",
100 "windows-386-gce",
101 "windows-amd64-gce",
102 "openbsd-386-gce56",
103 "openbsd-amd64-gce56",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700104 "plan9-386",
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800105 "nacl-386",
106 "nacl-amd64p32",
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -0700107 "linux-arm-shard_test",
108 "linux-arm-shard_std_am",
109 "linux-arm-shard_std_nz",
110 "linux-arm-shard_runtimecpu",
111 "linux-arm-shard_cgotest",
112 "linux-arm-shard_misc",
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800113 }
114 for _, bname := range tryList {
115 conf, ok := dashboard.Builders[bname]
Brad Fitzpatrick83455d12015-02-19 16:14:20 -0800116 if ok {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800117 tryBuilders = append(tryBuilders, conf)
118 } else {
Brad Fitzpatrick83455d12015-02-19 16:14:20 -0800119 log.Printf("ignoring invalid try builder config %q", bname)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800120 }
121 }
122}
123
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -0800124const (
125 maxStatusDone = 30
126
127 // vmDeleteTimeout is how long before we delete a VM.
128 // In practice this need only be as long as the slowest
129 // builder (plan9 currently), because on startup this program
130 // already deletes all buildlets it doesn't know about
131 // (i.e. ones from a previous instance of the coordinator).
132 vmDeleteTimeout = 45 * time.Minute
133)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800134
David Crawshaw02e988a2015-03-30 21:54:04 -0400135func readGCSFile(name string) ([]byte, error) {
David Crawshaw581ddd12015-04-06 08:09:20 -0400136 if *mode == "dev" {
137 b, ok := testFiles[name]
138 if !ok {
139 return nil, &os.PathError{
140 Op: "open",
141 Path: name,
142 Err: os.ErrNotExist,
143 }
144 }
145 return []byte(b), nil
146 }
147
Andrew Gerranda9469a82015-05-27 14:15:18 -0700148 r, err := storage.NewReader(serviceCtx, devPrefix()+"go-builder-data", name)
David Crawshaw02e988a2015-03-30 21:54:04 -0400149 if err != nil {
150 return nil, err
151 }
152 defer r.Close()
153 return ioutil.ReadAll(r)
154}
155
David Crawshaw581ddd12015-04-06 08:09:20 -0400156// Fake keys signed by a fake CA.
157var testFiles = map[string]string{
158 "farmer-cert.pem": `-----BEGIN CERTIFICATE-----
159MIICljCCAX4CCQCoS+/smvkG2TANBgkqhkiG9w0BAQUFADANMQswCQYDVQQDEwJn
160bzAeFw0xNTA0MDYwMzE3NDJaFw0xNzA0MDUwMzE3NDJaMA0xCzAJBgNVBAMTAmdv
161MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA1NMaVxX8RfCMtQB18azV
162hL6/U7C8W2G+8WXYeFuOpgP2SHnMbsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBis
163yj+drspafnCr4Yp9oJx1xlIhVXGD/SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6V
164aQslRxhPEkLGsXIA/hCucPIplI9jgLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFy
165LiHjEkE7O7PIjjY51btCLep9QSts98zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp
1662QBpsDbVpHYv2VPCPvkdOGU1/nzumsxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptl
167hwIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQBxvUMKsX+DEhZSmc164IuSVJ9ucZ97
168+KWn4nCwnVkI/RrsJpiTj3pZNRkAxq2vmZTpUdU0CgGHdZNXp/6s/GX4cSzFphSf
169WZQN0CG/O50SQ39m7fz/dZ2Xse6EH2grr6KN0QsDhK/RVxecQv57rY9nLFHnC60t
170vJBDC739lWlnsGDxylJNxEk2l5c2rJdn82yGw2G9pQ/LDVAtO1G2rxGkpi4FcpGk
171rNAa6MiwcyFHcAr3OsigLm4Q9bCS6YXfQDvCZGAR91ADXVWDFC1sgBgM3U3+1bGp
172tgXUVKymUvoVq0BiY4BCCYDluoErgZDytLmnUOxrykYi532VpRbbK2ja
173-----END CERTIFICATE-----`,
174 "farmer-key.pem": `-----BEGIN RSA PRIVATE KEY-----
175MIIEowIBAAKCAQEA1NMaVxX8RfCMtQB18azVhL6/U7C8W2G+8WXYeFuOpgP2SHnM
176bsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBisyj+drspafnCr4Yp9oJx1xlIhVXGD
177/SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6VaQslRxhPEkLGsXIA/hCucPIplI9j
178gLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFyLiHjEkE7O7PIjjY51btCLep9QSts
17998zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp2QBpsDbVpHYv2VPCPvkdOGU1/nzu
180msxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptlhwIDAQABAoIBAAJOPyzOWitPzdZw
181KNbzbmS/xEbd1UyQJIds+QlkxIjb5iEm4KYakJd8I2Vj7qVJbOkCxpYVqsoiQRBo
182FP2cptKSGd045/4SrmoFHBNPXp9FaIMKdcmaX+Wjd83XCFHgsm/O4yYaDpYA/n8q
183HFicZxX6Pu8kPkcOXiSx/XzDJYCnuec0GIfiJfbrQEwNLA+Ck2HnFfLy6LyrgCqi
184eqaxyBoLolzjW7guWV6e/ECsnLXx2n/Pj4l1aqIFKlYxOjBIKRqeUsqzMFpOCbrx
185z/scaBuH88hO96jbGZWUAm3R6ZslocQ6TaENYWNVKN1SeGISiE3hRoMAUIu1eHVu
186mEzOjvECgYEA9Ypu04NzVjAHdZRwrP7IiX3+CmbyNatdZXIoagp8boPBYWw7QeL8
187TPwvc3PCSIjxcT+Jv2hHTZ9Ofz9vAm/XJx6Ios9o/uAbytA+RAolQJWtLGuFLKv1
188wxq78iDFcIWq3iPwpl8FJaXeCb/bsNP9jruPhwWWbJVvD1eTif09ZzsCgYEA3ePo
189aQ5S0YrPtaf5r70eSBloe5vveG/kW3EW0QMrN6YlOhGSX+mjdAJk7XI/JW6vVPYS
190aK+g+ZnzV7HL421McuVH8mmwPHi48l5o2FewF54qYfOoTAJS1cjV08j8WtQsrEax
191HHom4m4joQEm0o4QEnTxJDS8/u7T/hhMALxeziUCgYANwevjvgHAWoCQffiyOLRT
192v9N0EcCQcUGSZYsOJfhC2O8E3mOTlXw9dAPUnC/OkJ22krDNILKeDsb/Kja2FD4h
1932vwc4zIm1be47WIPveHIdJp3Wq7jid8DR4QwVNW7MEIaoDjjmX9YVKrUMQPGLJqQ
194XMH19sIu41CNs4J4wM+n8QKBgBiIcFPdP47neBuvnM2vbT+vf3vbO9jnFip+EHW/
195kfGvLwKCmtp77JSRBzOxpAWxfTU5l8N3V6cBPIR/pflZRlCVxSSqRtAI0PoLMjBp
196UZDq7eiylfMBdsMoV2v5Ft28A8xwbHinkNEMOGg+xloVVvWTdG36XsMZCNtZOF4E
197db75AoGBAIk6IW5O2lk9Vc537TCyLpl2HYCP0jI3v6xIkFFolnfHPEgsXLJo9YU8
198crVtB0zy4jzjN/SClc/iaeOzk5Ot+iwSRFBZu2jdt0TRxbG+cd+6vKLs0Baw6kB1
199gpRUwP6i5yhi838rMgurGVFr3O/0Sv7wMx5UNEJ/RopbQ2K/bnwn
200-----END RSA PRIVATE KEY-----`,
201}
202
David Crawshaw02e988a2015-03-30 21:54:04 -0400203func listenAndServeTLS() {
David Crawshawa3dce2c2015-04-07 19:46:19 -0400204 addr := ":443"
205 if *mode == "dev" {
206 addr = ":8119"
207 }
208 ln, err := net.Listen("tcp", addr)
209 if err != nil {
210 log.Fatalf("net.Listen(%s): %v", addr, err)
211 }
212 serveTLS(ln)
213}
214
215func serveTLS(ln net.Listener) {
David Crawshaw02e988a2015-03-30 21:54:04 -0400216 certPEM, err := readGCSFile("farmer-cert.pem")
217 if err != nil {
218 log.Printf("cannot load TLS cert, skipping https: %v", err)
219 return
220 }
221 keyPEM, err := readGCSFile("farmer-key.pem")
222 if err != nil {
223 log.Printf("cannot load TLS key, skipping https: %v", err)
224 return
225 }
226 cert, err := tls.X509KeyPair(certPEM, keyPEM)
227 if err != nil {
228 log.Printf("bad TLS cert: %v", err)
229 return
230 }
231
David Crawshawa3dce2c2015-04-07 19:46:19 -0400232 server := &http.Server{Addr: ln.Addr().String()}
David Crawshaw02e988a2015-03-30 21:54:04 -0400233 config := &tls.Config{
234 NextProtos: []string{"http/1.1"},
235 Certificates: []tls.Certificate{cert},
236 }
David Crawshaw02e988a2015-03-30 21:54:04 -0400237 tlsLn := tls.NewListener(tcpKeepAliveListener{ln.(*net.TCPListener)}, config)
David Crawshawa3dce2c2015-04-07 19:46:19 -0400238 log.Printf("Coordinator serving on: %v", tlsLn.Addr())
David Crawshaw02e988a2015-03-30 21:54:04 -0400239 if err := server.Serve(tlsLn); err != nil {
240 log.Fatalf("serve https: %v", err)
241 }
242}
243
244type tcpKeepAliveListener struct {
245 *net.TCPListener
246}
247
248func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
249 tc, err := ln.AcceptTCP()
250 if err != nil {
251 return
252 }
253 tc.SetKeepAlive(true)
254 tc.SetKeepAlivePeriod(3 * time.Minute)
255 return tc, nil
256}
257
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700258func main() {
259 flag.Parse()
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700260 log.Printf("coordinator version %q starting", Version)
David Crawshaw581ddd12015-04-06 08:09:20 -0400261 err := initGCE()
262 if err != nil {
263 if *mode == "" {
264 *mode = "dev"
265 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800266 log.Printf("VM support disabled due to error initializing GCE: %v", err)
David Crawshaw581ddd12015-04-06 08:09:20 -0400267 } else {
268 if *mode == "" {
269 *mode = "prod"
270 }
271 }
272 switch *mode {
273 case "dev", "prod":
274 log.Printf("Running in %s mode", *mode)
275 default:
276 log.Fatalf("Unknown mode: %q", *mode)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800277 }
278
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700279 http.HandleFunc("/", handleStatus)
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -0800280 http.HandleFunc("/debug/goroutines", handleDebugGoroutines)
David Crawshaweef380f2015-04-30 20:03:01 -0400281 http.HandleFunc("/builders", handleBuilders)
282 http.HandleFunc("/logs", handleLogs)
David Crawshaw581ddd12015-04-06 08:09:20 -0400283 http.HandleFunc("/reverse", handleReverse)
David Crawshawdd0cf9f2015-04-29 17:58:09 -0400284 http.HandleFunc("/style.css", handleStyleCSS)
David Crawshaweef380f2015-04-30 20:03:01 -0400285 http.HandleFunc("/try", handleTryStatus)
David Crawshaw02e988a2015-03-30 21:54:04 -0400286 go func() {
David Crawshaw581ddd12015-04-06 08:09:20 -0400287 if *mode == "dev" {
288 return
289 }
David Crawshaw02e988a2015-03-30 21:54:04 -0400290 err := http.ListenAndServe(":80", nil)
291 if err != nil {
292 log.Fatalf("http.ListenAndServe:80: %v", err)
293 }
294 }()
David Crawshaw66c36dd2015-04-23 10:23:22 -0400295
296 workc := make(chan builderRev)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700297
David Crawshaw581ddd12015-04-06 08:09:20 -0400298 if *mode == "dev" {
299 // TODO(crawshaw): do more in test mode
David Crawshaw66c36dd2015-04-23 10:23:22 -0400300 gcePool.SetEnabled(false)
301 http.HandleFunc("/dosomework/", handleDoSomeWork(workc))
302 } else {
303 go gcePool.cleanUpOldVMs()
304
Andrew Gerrand71716002015-05-18 13:23:24 +1000305 if devCluster {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700306 dashboard.BuildletBucket = "dev-go-builder-data"
Andrew Gerrand71716002015-05-18 13:23:24 +1000307 // Only run the linux-amd64 builder in the dev cluster (for now).
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000308 dashboard.Builders = devClusterBuilders()
Andrew Gerrand71716002015-05-18 13:23:24 +1000309 }
310
David Crawshaw66c36dd2015-04-23 10:23:22 -0400311 // Start the Docker processes on this host polling Gerrit and
312 // pinging build.golang.org when new commits are available.
313 startWatchers() // in watcher.go
314
315 go findWorkLoop(workc)
316 go findTryWorkLoop()
317 // TODO(cmang): gccgo will need its own findWorkLoop
David Crawshaw581ddd12015-04-06 08:09:20 -0400318 }
319
David Crawshaw66c36dd2015-04-23 10:23:22 -0400320 go listenAndServeTLS()
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700321
322 ticker := time.NewTicker(1 * time.Minute)
323 for {
324 select {
325 case work := <-workc:
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100326 if !mayBuildRev(work) {
327 continue
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700328 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100329 st, err := newBuild(work)
330 if err != nil {
331 log.Printf("Bad build work params %v: %v", work, err)
332 } else {
333 st.start()
334 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700335 case <-ticker.C:
336 if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
337 // TODO: halt the whole machine to kill the VM or something
338 }
339 }
340 }
341}
342
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000343func devClusterBuilders() map[string]dashboard.BuildConfig {
344 m := map[string]dashboard.BuildConfig{}
345 for _, name := range []string{
346 "linux-amd64",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700347 "linux-amd64-race",
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000348 "windows-amd64-gce",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700349 "plan9-386",
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000350 } {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700351 m[name] = dashboard.Builders[name]
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000352 }
353 return m
354}
355
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700356func numCurrentBuilds() int {
357 statusMu.Lock()
358 defer statusMu.Unlock()
359 return len(status)
360}
361
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800362func isBuilding(work builderRev) bool {
363 statusMu.Lock()
364 defer statusMu.Unlock()
365 _, building := status[work]
366 return building
367}
368
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800369// mayBuildRev reports whether the build type & revision should be started.
David Crawshaw66c36dd2015-04-23 10:23:22 -0400370// It returns true if it's not already building, and if a reverse buildlet is
371// required, if an appropriate machine is registered.
372func mayBuildRev(rev builderRev) bool {
373 if isBuilding(rev) {
374 return false
375 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700376 if devCluster && numCurrentBuilds() != 0 {
377 return false
378 }
David Crawshawdd57a132015-04-28 16:42:42 -0400379 if dashboard.Builders[rev.name].IsReverse {
380 return reversePool.CanBuild(rev.name)
David Crawshaw66c36dd2015-04-23 10:23:22 -0400381 }
David Crawshawdd57a132015-04-28 16:42:42 -0400382 return true
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700383}
384
385func setStatus(work builderRev, st *buildStatus) {
386 statusMu.Lock()
387 defer statusMu.Unlock()
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100388 // TODO: panic if status[work] already exists. audit all callers.
389 // For instance, what if a trybot is running, and then the CL is merged
390 // and the findWork goroutine picks it up and it has the same commit,
391 // because it didn't need to be rebased in Gerrit's cherrypick?
392 // Could we then have two running with the same key?
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800393 status[work] = st
394}
395
396func markDone(work builderRev) {
397 statusMu.Lock()
398 defer statusMu.Unlock()
399 st, ok := status[work]
400 if !ok {
401 return
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700402 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800403 delete(status, work)
404 if len(statusDone) == maxStatusDone {
405 copy(statusDone, statusDone[1:])
406 statusDone = statusDone[:len(statusDone)-1]
407 }
408 statusDone = append(statusDone, st)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700409}
410
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800411// statusPtrStr disambiguates which status to return if there are
412// multiple in the history (e.g. recent failures where the build
413// didn't finish for reasons outside of all.bash failing)
414func getStatus(work builderRev, statusPtrStr string) *buildStatus {
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700415 statusMu.Lock()
416 defer statusMu.Unlock()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800417 match := func(st *buildStatus) bool {
418 return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr
419 }
420 if st, ok := status[work]; ok && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800421 return st
422 }
423 for _, st := range statusDone {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800424 if st.builderRev == work && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800425 return st
426 }
427 }
428 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700429}
430
431type byAge []*buildStatus
432
433func (s byAge) Len() int { return len(s) }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100434func (s byAge) Less(i, j int) bool { return s[i].startTime.Before(s[j].startTime) }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700435func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
436
Brad Fitzpatrickc964c3f2015-02-25 16:43:53 -0800437func handleTryStatus(w http.ResponseWriter, r *http.Request) {
438 ts := trySetOfCommitPrefix(r.FormValue("commit"))
439 if ts == nil {
440 http.Error(w, "TryBot result not found (already done, invalid, or not yet discovered from Gerrit). Check Gerrit for results.", http.StatusNotFound)
441 return
442 }
443 ts.mu.Lock()
444 tss := ts.trySetState.clone()
445 ts.mu.Unlock()
446
447 w.Header().Set("Content-Type", "text/html; charset=utf-8")
448 fmt.Fprintf(w, "<html><head><title>trybot status</title></head><body>[<a href='/'>overall status</a>] &gt; %s\n", ts.ChangeID)
449
450 fmt.Fprintf(w, "<h1>trybot status</h1>")
451 fmt.Fprintf(w, "Change-ID: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.ChangeID, ts.ChangeID)
452 fmt.Fprintf(w, "Commit: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.Commit, ts.Commit)
453 fmt.Fprintf(w, "<p>Builds remain: %d</p>\n", tss.remain)
454 fmt.Fprintf(w, "<p>Builds failed: %v</p>\n", tss.failed)
455 fmt.Fprintf(w, "<p>Builds</p><table cellpadding=5 border=1>\n")
456 for _, bs := range tss.builds {
457 status := "<i>(running)</i>"
458 bs.mu.Lock()
459 if !bs.done.IsZero() {
460 if bs.succeeded {
461 status = "pass"
462 } else {
463 status = "<b>FAIL</b>"
464 }
465 }
466 bs.mu.Unlock()
467 fmt.Fprintf(w, "<tr valign=top><td align=left>%s</td><td align=center>%s</td><td><pre>%s</pre></td></tr>\n",
468 bs.name,
469 status,
David Crawshawdd0cf9f2015-04-29 17:58:09 -0400470 bs.HTMLStatusLine())
Brad Fitzpatrickc964c3f2015-02-25 16:43:53 -0800471 }
472 fmt.Fprintf(w, "</table></body></html>")
473}
474
475func trySetOfCommitPrefix(commitPrefix string) *trySet {
476 if commitPrefix == "" {
477 return nil
478 }
479 statusMu.Lock()
480 defer statusMu.Unlock()
481 for k, ts := range tries {
482 if strings.HasPrefix(k.Commit, commitPrefix) {
483 return ts
484 }
485 }
486 return nil
487}
488
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700489func handleLogs(w http.ResponseWriter, r *http.Request) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800490 st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}, r.FormValue("st"))
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700491 if st == nil {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800492 http.NotFound(w, r)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700493 return
494 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700495 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
Andrew Gerrandcc49d3b2015-03-12 10:27:49 +1100496 w.Header().Set("X-Content-Type-Options", "nosniff")
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800497 writeStatusHeader(w, st)
498
Andrew Gerrandff978d72015-05-28 15:01:51 -0700499 if r.FormValue("nostream") != "" {
500 fmt.Fprintf(w, "\n\n(no live streaming. reload manually to see status)\n")
501 st.mu.Lock()
502 defer st.mu.Unlock()
Andrew Gerrandaa078a32015-05-28 16:13:40 -0700503 w.Write(st.output.Bytes())
Andrew Gerrandff978d72015-05-28 15:01:51 -0700504 return
505 }
506
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100507 if !st.hasEvent("pre_exec") {
508 fmt.Fprintf(w, "\n\n(buildlet still starting; no live streaming. reload manually to see status)\n")
509 return
510 }
511
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -0800512 w.(http.Flusher).Flush()
513
Andrew Gerrand5f73aab2015-03-03 10:30:18 +1100514 logs := st.watchLogs()
515 defer st.unregisterWatcher(logs)
516 closed := w.(http.CloseNotifier).CloseNotify()
517 for {
518 select {
519 case b, ok := <-logs:
520 if !ok {
521 return
522 }
523 w.Write(b)
524 w.(http.Flusher).Flush()
525 case <-closed:
526 return
527 }
528 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700529}
530
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -0800531func handleDebugGoroutines(w http.ResponseWriter, r *http.Request) {
532 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
533 buf := make([]byte, 1<<20)
534 buf = buf[:runtime.Stack(buf, true)]
535 w.Write(buf)
536}
537
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800538func writeStatusHeader(w http.ResponseWriter, st *buildStatus) {
539 st.mu.Lock()
540 defer st.mu.Unlock()
541 fmt.Fprintf(w, " builder: %s\n", st.name)
542 fmt.Fprintf(w, " rev: %s\n", st.rev)
Brad Fitzpatrick46d9b002015-05-13 15:55:41 -0700543 workaroundFlush(w)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100544 fmt.Fprintf(w, " buildlet: %s\n", st.bc)
545 fmt.Fprintf(w, " started: %v\n", st.startTime)
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800546 done := !st.done.IsZero()
547 if done {
548 fmt.Fprintf(w, " started: %v\n", st.done)
549 fmt.Fprintf(w, " success: %v\n", st.succeeded)
550 } else {
551 fmt.Fprintf(w, " status: still running\n")
552 }
553 if len(st.events) > 0 {
554 io.WriteString(w, "\nEvents:\n")
555 st.writeEventsLocked(w, false)
556 }
557 io.WriteString(w, "\nBuild log:\n")
Brad Fitzpatrick46d9b002015-05-13 15:55:41 -0700558 workaroundFlush(w)
559}
560
561// workaroundFlush is an unnecessary flush to work around a bug in Chrome.
562// See https://code.google.com/p/chromium/issues/detail?id=2016 for the details.
563// In summary: a couple unnecessary chunk flushes bypass the content type
564// sniffing which happen (even if unused?), even if you set nosniff as we do
565// in func handleLogs.
566func workaroundFlush(w http.ResponseWriter) {
567 w.(http.Flusher).Flush()
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800568}
569
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800570// findWorkLoop polls http://build.golang.org/?mode=json looking for new work
571// for the main dashboard. It does not support gccgo.
572// TODO(bradfitz): it also currently does not support subrepos.
573func findWorkLoop(work chan<- builderRev) {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700574 // Useful for debugging a single run:
575 if devCluster && false {
576 work <- builderRev{name: "linux-amd64-race", rev: "54789eff385780c54254f822e09505b6222918e2"}
577 return
578 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800579 ticker := time.NewTicker(15 * time.Second)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700580 for {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800581 if err := findWork(work); err != nil {
582 log.Printf("failed to find new work: %v", err)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700583 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800584 <-ticker.C
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700585 }
586}
587
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800588func findWork(work chan<- builderRev) error {
589 var bs types.BuildStatus
Andrew Gerrand71716002015-05-18 13:23:24 +1000590 res, err := http.Get(dashBase() + "?mode=json")
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800591 if err != nil {
592 return err
593 }
594 defer res.Body.Close()
595 if err := json.NewDecoder(res.Body).Decode(&bs); err != nil {
596 return err
597 }
598 if res.StatusCode != 200 {
599 return fmt.Errorf("unexpected http status %v", res.Status)
600 }
601
602 knownToDashboard := map[string]bool{} // keys are builder
603 for _, b := range bs.Builders {
604 knownToDashboard[b] = true
605 }
606
607 var goRevisions []string
608 for _, br := range bs.Revisions {
609 if br.Repo == "go" {
610 goRevisions = append(goRevisions, br.Revision)
611 } else {
612 // TODO(bradfitz): support these: golang.org/issue/9506
613 continue
614 }
615 if len(br.Results) != len(bs.Builders) {
616 return errors.New("bogus JSON response from dashboard: results is too long.")
617 }
618 for i, res := range br.Results {
619 if res != "" {
620 // It's either "ok" or a failure URL.
621 continue
622 }
623 builder := bs.Builders[i]
Brad Fitzpatrickeb52e712015-05-13 18:38:20 -0700624 if builderInfo, ok := dashboard.Builders[builder]; !ok || builderInfo.TryOnly {
625 // Not managed by the coordinator, or a trybot-only one.
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800626 continue
627 }
628 br := builderRev{bs.Builders[i], br.Revision}
629 if !isBuilding(br) {
630 work <- br
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700631 }
632 }
633 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800634
635 // And to bootstrap new builders, see if we have any builders
636 // that the dashboard doesn't know about.
Brad Fitzpatrickeb52e712015-05-13 18:38:20 -0700637 for b, builderInfo := range dashboard.Builders {
638 if builderInfo.TryOnly || knownToDashboard[b] {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800639 continue
640 }
641 for _, rev := range goRevisions {
642 br := builderRev{b, rev}
643 if !isBuilding(br) {
644 work <- br
645 }
646 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700647 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800648 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700649}
650
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800651// findTryWorkLoop is a goroutine which loops periodically and queries
652// Gerrit for TryBot work.
653func findTryWorkLoop() {
654 if errTryDeps != nil {
655 return
656 }
657 ticker := time.NewTicker(60 * time.Second)
658 for {
659 if err := findTryWork(); err != nil {
660 log.Printf("failed to find trybot work: %v", err)
661 }
662 <-ticker.C
663 }
664}
665
666func findTryWork() error {
667 cis, err := gerritClient.QueryChanges("label:Run-TryBot=1 label:TryBot-Result=0 project:go status:open", gerrit.QueryChangesOpt{
668 Fields: []string{"CURRENT_REVISION"},
669 })
670 if err != nil {
671 return err
672 }
673 if len(cis) == 0 {
674 return nil
675 }
676
677 statusMu.Lock()
678 defer statusMu.Unlock()
679
680 tryList = make([]tryKey, 0, len(cis))
681 wanted := map[tryKey]bool{}
682 for _, ci := range cis {
683 if ci.ChangeID == "" || ci.CurrentRevision == "" {
684 log.Printf("Warning: skipping incomplete %#v", ci)
685 continue
686 }
687 key := tryKey{
688 ChangeID: ci.ChangeID,
689 Commit: ci.CurrentRevision,
690 }
691 tryList = append(tryList, key)
692 wanted[key] = true
693 if _, ok := tries[key]; ok {
694 // already in progress
695 continue
696 }
697 tries[key] = newTrySet(key)
698 }
699 for k, ts := range tries {
700 if !wanted[k] {
701 delete(tries, k)
702 go ts.cancelBuilds()
703 }
704 }
705 return nil
706}
707
708type tryKey struct {
709 ChangeID string // I1a27695838409259d1586a0adfa9f92bccf7ceba
710 Commit string // ecf3dffc81dc21408fb02159af352651882a8383
711}
712
713// trySet is a the state of a set of builds of different
714// configurations, all for the same (Change-ID, Commit) pair. The
715// sets which are still wanted (not already submitted or canceled) are
716// stored in the global 'tries' map.
717type trySet struct {
718 // immutable
719 tryKey
720
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700721 // mu guards state and errMsg
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800722 // See LOCK ORDER comment above.
723 mu sync.Mutex
724 trySetState
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700725 errMsg bytes.Buffer
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800726}
727
728type trySetState struct {
729 remain int
730 failed []string // build names
731 builds []*buildStatus
732}
733
734func (ts trySetState) clone() trySetState {
735 return trySetState{
736 remain: ts.remain,
737 failed: append([]string(nil), ts.failed...),
738 builds: append([]*buildStatus(nil), ts.builds...),
739 }
740}
741
742// newTrySet creates a new trySet group of builders for a given key,
743// the (Change-ID, Commit) pair. It also starts goroutines for each
744// build.
745//
746// Must hold statusMu.
747func newTrySet(key tryKey) *trySet {
748 log.Printf("Starting new trybot set for %v", key)
749 ts := &trySet{
750 tryKey: key,
751 trySetState: trySetState{
752 remain: len(tryBuilders),
753 builds: make([]*buildStatus, len(tryBuilders)),
754 },
755 }
756 go ts.notifyStarting()
757 for i, bconf := range tryBuilders {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800758 brev := builderRev{name: bconf.Name, rev: key.Commit}
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100759
760 bs, _ := newBuild(brev)
761 bs.trySet = ts
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800762 status[brev] = bs
763 ts.builds[i] = bs
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100764 go bs.start() // acquires statusMu itself, so in a goroutine
765 go ts.awaitTryBuild(i, bconf, bs)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800766 }
767 return ts
768}
769
770// state returns a copy of the trySet's state.
771func (ts *trySet) state() trySetState {
772 ts.mu.Lock()
773 defer ts.mu.Unlock()
774 return ts.trySetState.clone()
775}
776
777// notifyStarting runs in its own goroutine and posts to Gerrit that
778// the trybots have started on the user's CL with a link of where to watch.
779func (ts *trySet) notifyStarting() {
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700780 msg := "TryBots beginning. Status page: http://farmer.golang.org/try?commit=" + ts.Commit[:8]
781
782 if ci, err := gerritClient.GetChangeDetail(ts.ChangeID); err == nil {
783 for _, cmi := range ci.Messages {
784 if cmi.Message == msg {
785 // Dup. Don't spam.
786 return
787 }
788 }
789 }
790
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800791 // Ignore error. This isn't critical.
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700792 gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{Message: msg})
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800793}
794
795// awaitTryBuild runs in its own goroutine and waits for a build in a
796// trySet to complete.
797//
798// If the build fails without getting to the end, it sleeps and
799// reschedules it, as long as it's still wanted.
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100800func (ts *trySet) awaitTryBuild(idx int, bconf dashboard.BuildConfig, bs *buildStatus) {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800801 for {
802 WaitCh:
803 for {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100804 timeout := time.NewTimer(10 * time.Minute)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800805 select {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100806 case <-bs.donec:
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800807 timeout.Stop()
808 break WaitCh
809 case <-timeout.C:
810 if !ts.wanted() {
811 // Build was canceled.
812 return
813 }
814 }
815 }
816
817 if bs.hasEvent("done") {
818 ts.noteBuildComplete(bconf, bs)
819 return
820 }
821
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100822 // TODO(bradfitz): rethink this logic. we should only
823 // start a new build if the old one appears dead or
824 // hung.
825
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800826 // Sleep a bit and retry.
827 time.Sleep(30 * time.Second)
828 if !ts.wanted() {
829 return
830 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800831 brev := builderRev{name: bconf.Name, rev: ts.Commit}
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100832 bs, _ = newBuild(brev)
Brad Fitzpatrick9d86d3d2015-04-01 01:26:32 -0700833 bs.trySet = ts
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100834 go bs.start()
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800835 ts.mu.Lock()
836 ts.builds[idx] = bs
837 ts.mu.Unlock()
838 }
839}
840
841// wanted reports whether this trySet is still active.
842//
843// If the commmit has been submitted, or change abandoned, or the
844// checkbox unchecked, wanted returns false.
845func (ts *trySet) wanted() bool {
846 statusMu.Lock()
847 defer statusMu.Unlock()
848 _, ok := tries[ts.tryKey]
849 return ok
850}
851
852// cancelBuilds run in its own goroutine and cancels this trySet's
853// currently-active builds because they're no longer wanted.
854func (ts *trySet) cancelBuilds() {
855 // TODO(bradfitz): implement
856}
857
858func (ts *trySet) noteBuildComplete(bconf dashboard.BuildConfig, bs *buildStatus) {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800859 bs.mu.Lock()
860 succeeded := bs.succeeded
861 var buildLog string
862 if !succeeded {
863 buildLog = bs.output.String()
864 }
865 bs.mu.Unlock()
866
867 ts.mu.Lock()
868 ts.remain--
869 remain := ts.remain
870 if !succeeded {
871 ts.failed = append(ts.failed, bconf.Name)
872 }
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700873 numFail := len(ts.failed)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800874 ts.mu.Unlock()
875
876 if !succeeded {
877 s1 := sha1.New()
878 io.WriteString(s1, buildLog)
879 objName := fmt.Sprintf("%s/%s_%x.log", bs.rev[:8], bs.name, s1.Sum(nil)[:4])
Andrew Gerranda9469a82015-05-27 14:15:18 -0700880 wr := storage.NewWriter(serviceCtx, buildLogBucket(), objName)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800881 wr.ContentType = "text/plain; charset=utf-8"
882 wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
883 if _, err := io.WriteString(wr, buildLog); err != nil {
884 log.Printf("Failed to write to GCS: %v", err)
885 return
886 }
887 if err := wr.Close(); err != nil {
888 log.Printf("Failed to write to GCS: %v", err)
889 return
890 }
Andrew Gerranda9469a82015-05-27 14:15:18 -0700891 failLogURL := fmt.Sprintf("https://storage.googleapis.com/%s/%s", buildLogBucket(), objName)
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700892 ts.mu.Lock()
893 fmt.Fprintf(&ts.errMsg, "Failed on %s: %s\n", bs.name, failLogURL)
894 ts.mu.Unlock()
895
896 if numFail == 1 && remain > 0 {
897 if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{
898 Message: fmt.Sprintf(
899 "This change failed on %s:\n"+
900 "See %s\n\n"+
901 "Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.",
902 bs.name, failLogURL),
903 }); err != nil {
904 log.Printf("Failed to call Gerrit: %v", err)
905 return
906 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800907 }
908 }
909
910 if remain == 0 {
911 score, msg := 1, "TryBots are happy."
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700912 if numFail > 0 {
913 ts.mu.Lock()
914 errMsg := ts.errMsg.String()
915 ts.mu.Unlock()
916 score, msg = -1, fmt.Sprintf("%d of %d TryBots failed:\n%s\nConsult https://build.golang.org/ to see whether they are new failures.",
917 numFail, len(ts.builds), errMsg)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800918 }
919 if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{
920 Message: msg,
921 Labels: map[string]int{
922 "TryBot-Result": score,
923 },
924 }); err != nil {
925 log.Printf("Failed to call Gerrit: %v", err)
926 return
927 }
928 }
929}
930
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800931// builderRev is a build configuration type and a revision.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700932type builderRev struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800933 name string // e.g. "linux-amd64-race"
934 rev string // lowercase hex git hash
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100935 // TODO: optional subrepo name/hash
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700936}
937
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100938type eventTimeLogger interface {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700939 logEventTime(event string, optText ...string)
940}
941
942var ErrCanceled = errors.New("canceled")
943
944// Cancel is a channel that's closed by the caller when the request is no longer
945// desired. The function receiving a cancel should return ErrCanceled whenever
946// Cancel becomes readable.
947type Cancel <-chan struct{}
948
949func (c Cancel) IsCanceled() bool {
950 select {
951 case <-c:
952 return true
953 default:
954 return false
955 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800956}
957
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100958type BuildletPool interface {
959 // GetBuildlet returns a new buildlet client.
960 //
961 // The machineType is the machine type (e.g. "linux-amd64-race").
962 //
963 // The rev is git hash. Implementations should not use it for
964 // anything except for log messages or VM naming.
965 //
966 // Clients must Close when done with the client.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700967 GetBuildlet(cancel Cancel, machineType, rev string, el eventTimeLogger) (*buildlet.Client, error)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100968
969 String() string // TODO(bradfitz): more status stuff
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -0800970}
971
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700972// GetBuildlets creates up to n buildlets and sends them on the returned channel
973// before closing the channel.
974func GetBuildlets(cancel Cancel, pool BuildletPool, n int, machineType, rev string, el eventTimeLogger) <-chan *buildlet.Client {
975 ch := make(chan *buildlet.Client) // NOT buffered
976 var wg sync.WaitGroup
977 wg.Add(n)
978 for i := 0; i < n; i++ {
979 go func() {
980 defer wg.Done()
981 bc, err := pool.GetBuildlet(cancel, machineType, rev, el)
982 if err != nil {
983 if err != ErrCanceled {
984 log.Printf("failed to get a %s buildlet for rev %s: %v", machineType, rev, err)
985 }
986 return
987 }
988 el.logEventTime("helper_ready")
989 select {
990 case ch <- bc:
991 case <-cancel:
992 el.logEventTime("helper_killed_before_use")
993 bc.Close()
994 return
995 }
996 }()
997 }
998 go func() {
999 wg.Wait()
1000 close(ch)
1001 }()
1002 return ch
1003}
1004
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001005func poolForConf(conf dashboard.BuildConfig) (BuildletPool, error) {
1006 if conf.VMImage != "" {
1007 return gcePool, nil
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001008 }
David Crawshaw66c36dd2015-04-23 10:23:22 -04001009 return reversePool, nil
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001010}
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001011
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001012func newBuild(rev builderRev) (*buildStatus, error) {
1013 // Note: can't acquire statusMu in newBuild, as this is called
1014 // from findTryWork -> newTrySet, which holds statusMu.
1015
1016 conf, ok := dashboard.Builders[rev.name]
1017 if !ok {
1018 return nil, fmt.Errorf("unknown builder type %q", rev.name)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001019 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001020 return &buildStatus{
1021 builderRev: rev,
1022 conf: conf,
1023 donec: make(chan struct{}),
1024 startTime: time.Now(),
1025 }, nil
1026}
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001027
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001028// start sets the st.startTime and starts the build in a new goroutine.
1029// If it returns an error, st is not modified and a new goroutine has not
1030// been started.
1031// The build status's donec channel is closed on when the build is complete
1032// in either direction.
1033func (st *buildStatus) start() {
1034 setStatus(st.builderRev, st)
1035 go st.pingDashboard()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001036 go func() {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001037 err := st.build()
Brad Fitzpatrickf3c01932015-01-15 16:29:16 -08001038 if err != nil {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001039 fmt.Fprintf(st, "\n\nError: %v\n", err)
1040 }
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08001041 st.setDone(err == nil)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001042 markDone(st.builderRev)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001043 }()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001044}
1045
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001046func (st *buildStatus) buildletType() string {
1047 if v := st.conf.BuildletType; v != "" {
1048 return v
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001049 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001050 return st.conf.Name
1051}
1052
1053func (st *buildStatus) buildletPool() (BuildletPool, error) {
1054 buildletType := st.buildletType()
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001055 bconf, ok := dashboard.Builders[buildletType]
1056 if !ok {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001057 return nil, fmt.Errorf("invalid BuildletType %q for %q", buildletType, st.conf.Name)
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001058 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001059 return poolForConf(bconf)
1060}
1061
1062func (st *buildStatus) expectedMakeBashDuration() time.Duration {
1063 // TODO: base this on historical measurements, instead of statically configured.
1064 // TODO: move this to dashboard/builders.go? But once we based on on historical
1065 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
1066 // better in this file.
1067 goos, goarch := st.conf.GOOS(), st.conf.GOARCH()
1068
1069 if goos == "plan9" {
1070 return 2500 * time.Millisecond
1071 }
1072 if goos == "linux" {
1073 if goarch == "arm" {
1074 return 4 * time.Minute
1075 }
1076 return 1000 * time.Millisecond
1077 }
1078 if goos == "windows" {
1079 return 1000 * time.Millisecond
1080 }
1081
1082 return 1500 * time.Millisecond
1083}
1084
1085func (st *buildStatus) expectedBuildletStartDuration() time.Duration {
1086 // TODO: move this to dashboard/builders.go? But once we based on on historical
1087 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
1088 // better in this file.
1089 pool, _ := st.buildletPool()
1090 switch pool.(type) {
1091 case *gceBuildletPool:
1092 return time.Minute
1093 case *reverseBuildletPool:
1094 goos, arch := st.conf.GOOS(), st.conf.GOARCH()
1095 if goos == "darwin" {
1096 if arch == "arm" && arch == "arm64" {
1097 // iOS; idle or it's not.
1098 return 0
1099 }
1100 if arch == "amd64" || arch == "386" {
1101 return 0 // TODO: remove this once we're using VMware
1102 return 1 * time.Minute // VMware boot of hermetic OS X
1103 }
1104 }
1105 if goos == "linux" && arch == "arm" {
1106 // Scaleway. Ready or not.
1107 return 0
1108 }
1109 }
1110 return 0
1111}
1112
1113// getHelpersReadySoon waits a bit (as a function of the build
1114// configuration) and starts getting the buildlets for test sharding
1115// ready, such that they're ready when make.bash is done. But we don't
1116// want to start too early, lest we waste idle resources during make.bash.
1117func (st *buildStatus) getHelpersReadySoon() {
1118 if st.conf.NumTestHelpers == 0 {
1119 return
1120 }
1121 time.AfterFunc(st.expectedMakeBashDuration()-st.expectedBuildletStartDuration(),
1122 func() {
1123 st.logEventTime("starting_helpers")
1124 st.getHelpers() // and ignore the result.
1125 })
1126}
1127
1128// getHelpers returns a channel of buildlet test helpers, with an item
1129// sent as they become available. The channel is closed at the end.
1130func (st *buildStatus) getHelpers() <-chan *buildlet.Client {
1131 st.onceInitHelpers.Do(st.onceInitHelpersFunc)
1132 return st.helpers
1133}
1134
1135func (st *buildStatus) onceInitHelpersFunc() {
1136 pool, _ := st.buildletPool() // won't return an error since we called it already
1137 st.helpers = GetBuildlets(st.donec, pool, st.conf.NumTestHelpers, st.buildletType(), st.rev, st)
1138}
1139
1140func (st *buildStatus) build() (retErr error) {
1141 pool, err := st.buildletPool()
Brad Fitzpatrickf3c01932015-01-15 16:29:16 -08001142 if err != nil {
1143 return err
1144 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001145 st.logEventTime("get_buildlet")
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001146 bc, err := pool.GetBuildlet(nil, st.buildletType(), st.rev, st)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001147 if err != nil {
1148 return fmt.Errorf("failed to get a buildlet: %v", err)
1149 }
1150 defer bc.Close()
1151 st.mu.Lock()
1152 st.bc = bc
1153 st.mu.Unlock()
1154
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001155 st.logEventTime("got_buildlet", bc.IPPort())
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001156 goodRes := func(res *http.Response, err error, what string) bool {
1157 if err != nil {
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -08001158 retErr = fmt.Errorf("%s: %v", what, err)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001159 return false
1160 }
1161 if res.StatusCode/100 != 2 {
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -08001162 slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
1163 retErr = fmt.Errorf("%s: %v; body: %s", what, res.Status, slurp)
1164 res.Body.Close()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001165 return false
1166
1167 }
1168 return true
1169 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001170
1171 // Write the VERSION file.
1172 st.logEventTime("start_write_version_tar")
Brad Fitzpatrick4f7abaa2015-01-21 08:14:43 -08001173 if err := bc.PutTar(versionTgz(st.rev), "go"); err != nil {
Brad Fitzpatrickf3c01932015-01-15 16:29:16 -08001174 return fmt.Errorf("writing VERSION tgz: %v", err)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001175 }
1176
1177 // Feed the buildlet a tar file for it to extract.
1178 // TODO: cache these.
1179 st.logEventTime("start_fetch_gerrit_tgz")
1180 tarRes, err := http.Get("https://go.googlesource.com/go/+archive/" + st.rev + ".tar.gz")
1181 if !goodRes(tarRes, err, "fetching tarball from Gerrit") {
1182 return
1183 }
1184
Brad Fitzpatrick20d84832015-01-21 10:03:07 -08001185 var grp syncutil.Group
1186 grp.Go(func() error {
1187 st.logEventTime("start_write_go_tar")
1188 if err := bc.PutTar(tarRes.Body, "go"); err != nil {
1189 tarRes.Body.Close()
1190 return fmt.Errorf("writing tarball from Gerrit: %v", err)
1191 }
1192 st.logEventTime("end_write_go_tar")
1193 return nil
1194 })
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001195 if st.conf.Go14URL != "" {
Brad Fitzpatrick20d84832015-01-21 10:03:07 -08001196 grp.Go(func() error {
1197 st.logEventTime("start_write_go14_tar")
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001198 if err := bc.PutTarFromURL(st.conf.Go14URL, "go1.4"); err != nil {
Brad Fitzpatrick20d84832015-01-21 10:03:07 -08001199 return err
1200 }
1201 st.logEventTime("end_write_go14_tar")
1202 return nil
1203 })
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001204 }
Brad Fitzpatrick20d84832015-01-21 10:03:07 -08001205 if err := grp.Err(); err != nil {
1206 return err
1207 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001208
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001209 execStartTime := time.Now()
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001210 st.logEventTime("pre_exec")
Brad Fitzpatrick46d9b002015-05-13 15:55:41 -07001211 fmt.Fprintf(st, "%s at %v\n\n", st.name, st.rev)
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001212
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001213 var lastScript string
1214 var remoteErr error
1215 if st.conf.SplitMakeRun() {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001216 st.getHelpersReadySoon()
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001217 makeScript := st.conf.MakeScript()
1218 lastScript = makeScript
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001219 maket0 := time.Now()
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001220 remoteErr, err = bc.Exec(path.Join("go", makeScript), buildlet.ExecOpts{
1221 Output: st,
1222 OnStartExec: func() {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001223 st.logEventTime("running_exec", makeScript)
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001224 },
1225 ExtraEnv: st.conf.Env(),
1226 Debug: true,
1227 Args: st.conf.MakeScriptArgs(),
Andrew Gerrandfb774882015-05-21 14:02:38 +10001228 })
1229 if err != nil {
1230 return err
1231 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001232 st.logEventTime("exec_done", fmt.Sprintf("%s in %v", makeScript, time.Since(maket0)))
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001233
1234 if remoteErr == nil {
Andrew Gerrand7d4dc222015-05-27 17:44:52 -07001235 if err := st.cleanForSnapshot(); err != nil {
1236 return fmt.Errorf("cleanForSnapshot: %v", err)
1237 }
1238
1239 if err := st.writeSnapshot(); err != nil {
1240 return fmt.Errorf("writeSnapshot: %v", err)
1241 }
1242
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001243 lastScript = "runTests"
1244 remoteErr, err = st.runTests(st.getHelpers())
Brad Fitzpatrick44de54d2015-05-27 20:06:32 -07001245 if err != nil {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001246 return fmt.Errorf("runTests: %v", err)
Brad Fitzpatrick44de54d2015-05-27 20:06:32 -07001247 }
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001248 }
1249 } else {
1250 // Old way.
1251 // TOOD(bradfitz,adg): delete this block when all builders
1252 // can split make & run (and then delete the SplitMakeRun method)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001253 st.logEventTime("legacy_all_path")
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001254 allScript := st.conf.AllScript()
1255 lastScript = allScript
1256 remoteErr, err = bc.Exec(path.Join("go", allScript), buildlet.ExecOpts{
1257 Output: st,
1258 OnStartExec: func() {
1259 st.logEventTime("running_exec")
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001260 },
1261 ExtraEnv: st.conf.Env(),
1262 Debug: true,
1263 Args: st.conf.AllScriptArgs(),
1264 })
1265 if err != nil {
1266 return err
1267 }
Andrew Gerrandfb774882015-05-21 14:02:38 +10001268 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001269 doneMsg := "all tests passed"
1270 if remoteErr != nil {
1271 doneMsg = "with test failures"
1272 }
1273 st.logEventTime("done", doneMsg) // "done" is a magic value
1274
1275 if devPause {
1276 st.logEventTime("DEV_MAIN_SLEEP")
1277 time.Sleep(5 * time.Minute)
1278 }
Andrew Gerrandfb774882015-05-21 14:02:38 +10001279
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001280 if st.trySet == nil {
1281 var buildLog string
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001282 if remoteErr != nil {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001283 buildLog = st.logs()
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001284 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001285 if err := recordResult(st.name, remoteErr == nil, st.rev, buildLog, time.Since(execStartTime)); err != nil {
1286 if remoteErr != nil {
1287 return fmt.Errorf("Remote error was %q but failed to report it to the dashboard: %v", remoteErr, err)
1288 }
1289 return fmt.Errorf("Build succeeded but failed to report it to the dashboard: %v", err)
1290 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001291 }
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001292 if remoteErr != nil {
Andrew Gerrandfb774882015-05-21 14:02:38 +10001293 return fmt.Errorf("%v failed: %v", lastScript, remoteErr)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001294 }
1295 return nil
1296}
1297
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001298var cleanForSnapshotFiles = []string{
1299 "go/doc/gopher",
1300 "go/pkg/bootstrap",
1301}
1302
1303func (st *buildStatus) cleanForSnapshot() error {
1304 st.logEventTime("clean_for_snapshot")
1305 defer st.logEventTime("clean_for_snapshot_done")
1306
1307 return st.bc.RemoveAll(cleanForSnapshotFiles...)
1308}
1309
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001310func (st *buildStatus) snapshotObjectName() string {
1311 return fmt.Sprintf("%v/%v/%v.tar.gz", "go", st.name, st.rev)
1312}
1313
1314func (st *buildStatus) snapshotURL() string {
1315 return fmt.Sprintf("https://storage.googleapis.com/%s/%s", snapBucket(), st.snapshotObjectName())
1316}
1317
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001318func (st *buildStatus) writeSnapshot() error {
1319 st.logEventTime("write_snapshot")
1320 defer st.logEventTime("write_snapshot_done")
1321
1322 tgz, err := st.bc.GetTar("go")
1323 if err != nil {
1324 return err
1325 }
1326 defer tgz.Close()
1327
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001328 wr := storage.NewWriter(serviceCtx, snapBucket(), st.snapshotObjectName())
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001329 wr.ContentType = "application/octet-stream"
1330 wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
1331 if _, err := io.Copy(wr, tgz); err != nil {
1332 wr.Close()
1333 return err
1334 }
1335
1336 return wr.Close()
1337}
1338
Brad Fitzpatrick44de54d2015-05-27 20:06:32 -07001339func (st *buildStatus) distTestList() (names []string, err error) {
1340 var buf bytes.Buffer
1341 remoteErr, err := st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
1342 Output: &buf,
1343 ExtraEnv: st.conf.Env(),
1344 OnStartExec: func() { st.logEventTime("discovering_tests") },
1345 Path: []string{"$WORKDIR/go/bin", "$PATH"},
1346 Args: []string{"tool", "dist", "test", "--no-rebuild", "--list"},
1347 })
1348 if err != nil {
1349 return nil, fmt.Errorf("Exec error: %v, %s", remoteErr, buf.Bytes())
1350 }
1351 if remoteErr != nil {
1352 return nil, fmt.Errorf("Remote error: %v, %s", remoteErr, buf.Bytes())
1353 }
1354 return strings.Fields(buf.String()), nil
1355}
1356
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001357func (st *buildStatus) newTestSet(names []string) *testSet {
1358 set := &testSet{
1359 st: st,
1360 retryc: make(chan *testItem, len(names)),
1361 }
1362 for _, name := range names {
1363 set.items = append(set.items, &testItem{
1364 set: set,
1365 name: name,
1366 duration: testDuration(name),
1367 take: make(chan token, 1),
1368 done: make(chan token),
1369 })
1370 }
1371 return set
1372}
1373
1374func partitionGoTests(tests []string) (sets [][]string) {
1375 var srcTests []string
1376 var cmdTests []string
1377 for _, name := range tests {
1378 if strings.HasPrefix(name, "go_test:cmd/") {
1379 cmdTests = append(cmdTests, name)
1380 } else if strings.HasPrefix(name, "go_test:") {
1381 srcTests = append(srcTests, name)
1382 }
1383 }
1384 sort.Strings(srcTests)
1385 sort.Strings(cmdTests)
1386 goTests := append(srcTests, cmdTests...)
1387
1388 const sizeThres = 10 * time.Second
1389
1390 var curSet []string
1391 var curDur time.Duration
1392
1393 flush := func() {
1394 if len(curSet) > 0 {
1395 sets = append(sets, curSet)
1396 curSet = nil
1397 curDur = 0
1398 }
1399 }
1400 for _, name := range goTests {
1401 d := testDuration(name) - minGoTestSpeed // subtract 'go' tool overhead
1402 if curDur+d > sizeThres {
1403 flush() // no-op if empty
1404 }
1405 curSet = append(curSet, name)
1406 curDur += d
1407 }
1408
1409 flush()
1410 return
1411}
1412
1413var minGoTestSpeed = (func() time.Duration {
1414 var min Seconds
1415 for name, secs := range fixedTestDuration {
1416 if !strings.HasPrefix(name, "go_test:") {
1417 continue
1418 }
1419 if min == 0 || secs < min {
1420 min = secs
1421 }
1422 }
1423 return min.Duration()
1424})()
1425
1426type Seconds float64
1427
1428func (s Seconds) Duration() time.Duration {
1429 return time.Duration(float64(s) * float64(time.Second))
1430}
1431
1432// in seconds on Linux/amd64 (once on 2015-05-28), each
1433// by themselves. There seems to be a 0.6s+ overhead
1434// from the go tool which goes away if they're combined.
1435var fixedTestDuration = map[string]Seconds{
1436 "go_test:archive/tar": 1.30,
1437 "go_test:archive/zip": 1.68,
1438 "go_test:bufio": 1.61,
1439 "go_test:bytes": 1.50,
1440 "go_test:compress/bzip2": 0.82,
1441 "go_test:compress/flate": 1.73,
1442 "go_test:compress/gzip": 0.82,
1443 "go_test:compress/lzw": 0.86,
1444 "go_test:compress/zlib": 1.78,
1445 "go_test:container/heap": 0.69,
1446 "go_test:container/list": 0.72,
1447 "go_test:container/ring": 0.64,
1448 "go_test:crypto/aes": 0.79,
1449 "go_test:crypto/cipher": 0.96,
1450 "go_test:crypto/des": 0.96,
1451 "go_test:crypto/dsa": 0.75,
1452 "go_test:crypto/ecdsa": 0.86,
1453 "go_test:crypto/elliptic": 1.06,
1454 "go_test:crypto/hmac": 0.67,
1455 "go_test:crypto/md5": 0.77,
1456 "go_test:crypto/rand": 0.89,
1457 "go_test:crypto/rc4": 0.71,
1458 "go_test:crypto/rsa": 1.17,
1459 "go_test:crypto/sha1": 0.75,
1460 "go_test:crypto/sha256": 0.68,
1461 "go_test:crypto/sha512": 0.67,
1462 "go_test:crypto/subtle": 0.56,
1463 "go_test:crypto/tls": 3.29,
1464 "go_test:crypto/x509": 2.81,
1465 "go_test:database/sql": 1.75,
1466 "go_test:database/sql/driver": 0.64,
1467 "go_test:debug/dwarf": 0.77,
1468 "go_test:debug/elf": 1.41,
1469 "go_test:debug/gosym": 1.45,
1470 "go_test:debug/macho": 0.97,
1471 "go_test:debug/pe": 0.79,
1472 "go_test:debug/plan9obj": 0.73,
1473 "go_test:encoding/ascii85": 0.64,
1474 "go_test:encoding/asn1": 1.16,
1475 "go_test:encoding/base32": 0.79,
1476 "go_test:encoding/base64": 0.82,
1477 "go_test:encoding/binary": 0.96,
1478 "go_test:encoding/csv": 0.67,
1479 "go_test:encoding/gob": 2.70,
1480 "go_test:encoding/hex": 0.66,
1481 "go_test:encoding/json": 2.20,
1482 "test:errors": 0.54,
1483 "go_test:expvar": 1.36,
1484 "go_test:flag": 0.92,
1485 "go_test:fmt": 2.02,
1486 "go_test:go/ast": 1.44,
1487 "go_test:go/build": 1.42,
1488 "go_test:go/constant": 0.92,
1489 "go_test:go/doc": 1.51,
1490 "go_test:go/format": 0.73,
1491 "go_test:go/internal/gcimporter": 1.30,
1492 "go_test:go/parser": 1.30,
1493 "go_test:go/printer": 1.61,
1494 "go_test:go/scanner": 0.89,
1495 "go_test:go/token": 0.92,
1496 "go_test:go/types": 5.24,
1497 "go_test:hash/adler32": 0.62,
1498 "go_test:hash/crc32": 0.68,
1499 "go_test:hash/crc64": 0.55,
1500 "go_test:hash/fnv": 0.66,
1501 "go_test:html": 0.74,
1502 "go_test:html/template": 1.93,
1503 "go_test:image": 1.42,
1504 "go_test:image/color": 0.77,
1505 "go_test:image/draw": 1.32,
1506 "go_test:image/gif": 1.15,
1507 "go_test:image/jpeg": 1.32,
1508 "go_test:image/png": 1.23,
1509 "go_test:index/suffixarray": 0.79,
1510 "go_test:internal/singleflight": 0.66,
1511 "go_test:io": 0.97,
1512 "go_test:io/ioutil": 0.73,
1513 "go_test:log": 0.72,
1514 "go_test:log/syslog": 2.93,
1515 "go_test:math": 1.59,
1516 "go_test:math/big": 3.75,
1517 "go_test:math/cmplx": 0.81,
1518 "go_test:math/rand": 1.15,
1519 "go_test:mime": 1.01,
1520 "go_test:mime/multipart": 1.51,
1521 "go_test:mime/quotedprintable": 0.95,
1522 "go_test:net": 6.71,
1523 "go_test:net/http": 9.41,
1524 "go_test:net/http/cgi": 2.00,
1525 "go_test:net/http/cookiejar": 1.51,
1526 "go_test:net/http/fcgi": 1.43,
1527 "go_test:net/http/httptest": 1.36,
1528 "go_test:net/http/httputil": 1.54,
1529 "go_test:net/http/internal": 0.68,
1530 "go_test:net/internal/socktest": 0.58,
1531 "go_test:net/mail": 0.92,
1532 "go_test:net/rpc": 1.95,
1533 "go_test:net/rpc/jsonrpc": 1.50,
1534 "go_test:net/smtp": 1.43,
1535 "go_test:net/textproto": 1.01,
1536 "go_test:net/url": 1.45,
1537 "go_test:os": 1.88,
1538 "go_test:os/exec": 2.13,
1539 "go_test:os/signal": 4.22,
1540 "go_test:os/user": 0.93,
1541 "go_test:path": 0.68,
1542 "go_test:path/filepath": 1.14,
1543 "go_test:reflect": 3.42,
1544 "go_test:regexp": 1.65,
1545 "go_test:regexp/syntax": 1.40,
1546 "go_test:runtime": 21.02,
1547 "go_test:runtime/debug": 0.79,
1548 "go_test:runtime/pprof": 8.01,
1549 "go_test:sort": 0.96,
1550 "go_test:strconv": 1.60,
1551 "go_test:strings": 1.51,
1552 "go_test:sync": 1.05,
1553 "go_test:sync/atomic": 1.13,
1554 "go_test:syscall": 1.69,
1555 "go_test:testing": 3.70,
1556 "go_test:testing/quick": 0.74,
1557 "go_test:text/scanner": 0.79,
1558 "go_test:text/tabwriter": 0.71,
1559 "go_test:text/template": 1.65,
1560 "go_test:text/template/parse": 1.25,
1561 "go_test:time": 4.20,
1562 "go_test:unicode": 0.68,
1563 "go_test:unicode/utf16": 0.77,
1564 "go_test:unicode/utf8": 0.71,
1565 "go_test:cmd/addr2line": 1.73,
1566 "go_test:cmd/api": 1.33,
1567 "go_test:cmd/asm/internal/asm": 1.24,
1568 "go_test:cmd/asm/internal/lex": 0.91,
1569 "go_test:cmd/compile/internal/big": 5.26,
1570 "go_test:cmd/cover": 3.32,
1571 "go_test:cmd/fix": 1.26,
1572 "go_test:cmd/go": 3.63,
1573 "go_test:cmd/gofmt": 1.06,
1574 "go_test:cmd/internal/goobj": 0.65,
1575 "go_test:cmd/internal/obj": 1.16,
1576 "go_test:cmd/internal/obj/x86": 1.04,
1577 "go_test:cmd/internal/rsc.io/arm/armasm": 1.92,
1578 "go_test:cmd/internal/rsc.io/x86/x86asm": 2.22,
1579 "go_test:cmd/newlink": 1.48,
1580 "go_test:cmd/nm": 1.84,
1581 "go_test:cmd/objdump": 3.60,
1582 "go_test:cmd/pack": 2.64,
1583 "go_test:cmd/pprof/internal/profile": 1.29,
1584 "runtime:cpu124": 44.78,
1585 "sync_cpu": 1.01,
1586 "cgo_stdio": 1.53,
1587 "cgo_life": 1.56,
1588 "cgo_test": 45.60,
1589 "race": 42.55,
1590 "testgodefs": 2.37,
1591 "testso": 2.72,
1592 "testcarchive": 11.11,
1593 "testcshared": 15.80,
1594 "testshared": 7.13,
1595 "testasan": 2.56,
1596 "cgo_errors": 7.03,
1597 "testsigfwd": 2.74,
1598 "doc_progs": 5.38,
1599 "wiki": 3.56,
1600 "shootout": 11.34,
1601 "bench_go1": 3.72,
1602 "test": 45, // old, but valid for a couple weeks from 2015-06-04
1603 "test:0_5": 10,
1604 "test:1_5": 10,
1605 "test:2_5": 10,
1606 "test:3_5": 10,
1607 "test:4_5": 10,
1608 "codewalk": 2.42,
1609 "api": 7.38,
1610}
1611
1612// testDuration predicts how long the dist test 'name' will take.
1613// It's only a scheduling guess.
1614func testDuration(name string) time.Duration {
1615 if secs, ok := fixedTestDuration[name]; ok {
1616 return secs.Duration()
1617 }
1618 return minGoTestSpeed * 2
1619}
1620
1621// runTests is only called for builders which support a split make/run
1622// (should be everything, at least soon). Currently (2015-05-27) iOS
1623// and Android and Nacl may not. Untested.
1624func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err error) {
1625 testNames, err := st.distTestList()
1626 if err != nil {
1627 return nil, fmt.Errorf("distTestList: %v", err)
1628 }
1629 set := st.newTestSet(testNames)
1630 st.logEventTime("starting_tests", fmt.Sprintf("%d tests", len(set.items)))
1631 startTime := time.Now()
1632
1633 // We use our original buildlet to run the tests in order, to
1634 // make the streaming somewhat smooth and not incredibly
1635 // lumpy. The rest of the buildlets run the largest tests
1636 // first (critical path scheduling).
1637 go func() {
1638 goroot := "" // no need to override; main buildlet's GOROOT is baked into binaries
1639 for tis := range set.itemsInOrder() {
1640 st.runTestsOnBuildlet(st.bc, tis, goroot)
1641 }
1642 }()
1643 helperWork := set.itemsBiggestFirst()
1644 go func() {
1645 for helper := range helpers {
1646 go func(bc *buildlet.Client) {
1647 defer st.logEventTime("closed_helper", bc.IPPort())
1648 defer bc.Close()
1649 if devPause {
1650 defer time.Sleep(5 * time.Minute)
1651 defer st.logEventTime("DEV_HELPER_SLEEP", bc.IPPort())
1652 }
1653 st.logEventTime("got_helper", bc.IPPort())
1654 if err := bc.PutTarFromURL(st.snapshotURL(), "go"); err != nil {
1655 log.Printf("failed to extract snapshot for helper %s: %v", bc.IPPort(), err)
1656 return
1657 }
1658 workDir, err := bc.WorkDir()
1659 if err != nil {
1660 log.Printf("error discovering workdir for helper %s: %v", bc.IPPort(), err)
1661 return
1662 }
1663 goroot := st.conf.FilePathJoin(workDir, "go")
1664 st.logEventTime("setup_helper", bc.IPPort())
1665 for tis := range helperWork {
1666 st.runTestsOnBuildlet(bc, tis, goroot)
1667 }
1668 }(helper)
1669 }
1670 }()
1671
1672 var lastBanner string
1673 var serialDuration time.Duration
1674 for _, ti := range set.items {
1675 <-ti.done // wait for success
1676
1677 serialDuration += ti.execDuration
1678 if len(ti.output) > 0 {
1679 banner, out := parseOutputAndBanner(ti.output)
1680 if banner != lastBanner {
1681 lastBanner = banner
1682 fmt.Fprintf(st, "\n##### %s\n", banner)
1683 }
1684 if devCluster {
1685 out = bytes.TrimSuffix(out, nl)
1686 st.Write(out)
1687 fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize)
1688 } else {
1689 st.Write(out)
1690 }
1691 }
1692
1693 if ti.remoteErr != nil {
1694 set.cancelAll()
1695 return fmt.Errorf("dist test failed: %s: %v", ti.name, ti.remoteErr), nil
1696 }
1697 }
1698 shardedDuration := time.Since(startTime)
1699 st.logEventTime("tests_complete", fmt.Sprintf("took %v; aggregate %v; saved %v", shardedDuration, serialDuration, serialDuration-shardedDuration))
1700 fmt.Fprintf(st, "\nAll tests passed.\n")
1701 return nil, nil
1702}
1703
1704const (
1705 banner = "XXXBANNERXXX:" // flag passed to dist
1706 bannerPrefix = "\n" + banner // with the newline added by dist
1707)
1708
1709var bannerPrefixBytes = []byte(bannerPrefix)
1710
1711func parseOutputAndBanner(b []byte) (banner string, out []byte) {
1712 if bytes.HasPrefix(b, bannerPrefixBytes) {
1713 b = b[len(bannerPrefixBytes):]
1714 nl := bytes.IndexByte(b, '\n')
1715 if nl != -1 {
1716 banner = string(b[:nl])
1717 b = b[nl+1:]
1718 }
1719 }
1720 return banner, b
1721}
1722
1723// maxTestExecError is the number of test execution failures at which
1724// we give up and stop trying and instead permanently fail the test.
1725// Note that this is not related to whether the test failed remotely,
1726// but whether we were unable to start or complete watching it run.
1727// (A communication error)
1728const maxTestExecErrors = 3
1729
1730// runTestsOnBuildlet runs tis on bc, using the optional goroot environment variable.
1731func (st *buildStatus) runTestsOnBuildlet(bc *buildlet.Client, tis []*testItem, goroot string) {
1732 names := make([]string, len(tis))
1733 for i, ti := range tis {
1734 names[i] = ti.name
1735 if i > 0 && !strings.HasPrefix(ti.name, "go_test:") {
1736 panic("only go_test:* tests may be merged")
1737 }
1738 }
1739 which := fmt.Sprintf("%s: %v", bc.IPPort(), names)
1740 st.logEventTime("start_tests", which)
1741
1742 // TODO(bradfitz,adg): a few weeks after
1743 // https://go-review.googlesource.com/10688 is submitted,
1744 // around Jun 18th 2015, remove this innerRx stuff and just
1745 // pass a list of test names to dist instead. We don't want to
1746 // do it right away, so people don't have to rebase their CLs
1747 // to avoid trybot failures.
1748 var innerRx string
1749 if len(tis) > 1 {
1750 innerRx = "(" + strings.Join(names, "|") + ")"
1751 } else {
1752 innerRx = names[0]
1753 }
1754
1755 var buf bytes.Buffer
1756 t0 := time.Now()
1757 remoteErr, err := bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
1758 // We set Dir to "." instead of the default ("go/bin") so when the dist tests
1759 // try to run os/exec.Command("go", "test", ...), the LookPath of "go" doesn't
1760 // return "./go.exe" (which exists in the current directory: "go/bin") and then
1761 // fail when dist tries to run the binary in dir "$GOROOT/src", since
1762 // "$GOROOT/src" + "./go.exe" doesn't exist. Perhaps LookPath should return
1763 // an absolute path.
1764 Dir: ".",
1765 Output: &buf, // see "maybe stream lines" TODO below
1766 ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot),
1767 Path: []string{"$WORKDIR/go/bin", "$PATH"},
1768 Args: []string{"tool", "dist", "test", "--no-rebuild", "--banner=" + banner, "--run=^" + innerRx + "$"},
1769 })
1770 summary := "ok"
1771 if err != nil {
1772 summary = "commErr=" + err.Error()
1773 } else if remoteErr != nil {
1774 summary = "test failed remotely"
1775 }
1776 execDuration := time.Since(t0)
1777 st.logEventTime("end_tests", fmt.Sprintf("%s; %s after %v", which, summary, execDuration))
1778 if err != nil {
1779 for _, ti := range tis {
1780 ti.numFail++
1781 st.logf("Execution error running %s on %s: %v (numFails = %d)", ti.name, bc, err, ti.numFail)
1782 if ti.numFail >= maxTestExecErrors {
1783 msg := fmt.Sprintf("Failed to schedule %q test after %d tries.\n", ti.name, maxTestExecErrors)
1784 ti.output = []byte(msg)
1785 ti.remoteErr = errors.New(msg)
1786 close(ti.done)
1787 } else {
1788 ti.retry()
1789 }
1790 }
1791 return
1792 }
1793
1794 out := buf.Bytes()
1795 out = bytes.Replace(out, []byte("\nALL TESTS PASSED (some were excluded)\n"), nil, 1)
1796 out = bytes.Replace(out, []byte("\nALL TESTS PASSED\n"), nil, 1)
1797
1798 for _, ti := range tis {
1799 ti.output = out
1800 ti.remoteErr = remoteErr
1801 ti.execDuration = execDuration
1802 ti.groupSize = len(tis)
1803 ti.shardIPPort = bc.IPPort()
1804 close(ti.done)
1805
1806 // After the first one, make the rest succeed with no output.
1807 // TODO: maybe stream lines (set Output to a line-reading
1808 // Writer instead of &buf). for now we just wait for them in
1809 // ~10 second batches. Doesn't look as smooth on the output,
1810 // though.
1811 out = nil
1812 remoteErr = nil
1813 execDuration = 0
1814 }
1815}
1816
1817type testSet struct {
1818 st *buildStatus
1819 items []*testItem
1820
1821 // retryc communicates failures to watch a test. The channel is
1822 // never closed. Sends should also select on reading st.donec
1823 // to see if the things have stopped early due to another test
1824 // failing and aborting the build.
1825 retryc chan *testItem
1826}
1827
1828// cancelAll cancels all pending tests.
1829func (s *testSet) cancelAll() {
1830 for _, ti := range s.items {
1831 ti.tryTake() // ignore return value
1832 }
1833}
1834
1835// itemsInOrder returns a channel of items mostly in their original order.
1836// The exception is that an item which fails to execute may happen later
1837// in a different order.
1838// Each item sent in the channel has been took. (ti.tryTake returned true)
1839// The returned channel is closed when no items remain.
1840func (s *testSet) itemsInOrder() <-chan []*testItem {
1841 return s.itemChan(s.items)
1842}
1843
1844func (s *testSet) itemsBiggestFirst() <-chan []*testItem {
1845 items := append([]*testItem(nil), s.items...)
1846 sort.Sort(sort.Reverse(byTestDuration(items)))
1847 return s.itemChan(items)
1848}
1849
1850// itemChan returns a channel which yields the provided items, usually
1851// in the same order given items, but grouped with others tests they
1852// should be run with. (only stdlib tests are are grouped)
1853func (s *testSet) itemChan(items []*testItem) <-chan []*testItem {
1854 names := make([]string, len(items))
1855 namedItem := map[string]*testItem{}
1856 for i, ti := range items {
1857 names[i] = ti.name
1858 namedItem[ti.name] = ti
1859 }
1860 stdSets := partitionGoTests(names)
1861 setForTest := map[string][]*testItem{}
1862 for _, set := range stdSets {
1863 tis := make([]*testItem, len(set))
1864 for i, name := range set {
1865 tis[i] = namedItem[name]
1866 setForTest[name] = tis
1867 }
1868 }
1869
1870 ch := make(chan []*testItem)
1871 go func() {
1872 defer close(ch)
1873 for _, ti := range items {
1874 if !ti.tryTake() {
1875 continue
1876 }
1877 send := []*testItem{ti}
1878 for _, other := range setForTest[ti.name] {
1879 if other != ti && other.tryTake() {
1880 send = append(send, other)
1881 }
1882 }
1883 select {
1884 case ch <- send:
1885 case <-s.st.donec:
1886 return
1887 }
1888 }
1889 for {
1890 select {
1891 case ti := <-s.retryc:
1892 if ti.tryTake() {
1893 select {
1894 case ch <- []*testItem{ti}:
1895 case <-s.st.donec:
1896 return
1897 }
1898 }
1899 case <-s.st.donec:
1900 return
1901 }
1902 }
1903 }()
1904 return ch
1905}
1906
1907type testItem struct {
1908 set *testSet
1909 name string // "go_test:sort"
1910 duration time.Duration // optional approximate size
1911
1912 take chan token // buffered size 1: sending takes ownership of rest of fields:
1913
1914 done chan token // closed when done; guards output & failed
1915 numFail int // how many times it's failed to execute
1916
1917 // groupSize is the number of tests which were run together
1918 // along with this one with "go dist test".
1919 // This is 1 for non-std/cmd tests, and usually >1 for std/cmd tests.
1920 groupSize int
1921 shardIPPort string // buildlet's IPPort, for debugging
1922
1923 // the following are only set for the first item in a group:
1924 output []byte
1925 remoteErr error // real test failure (not a communications failure)
1926 execDuration time.Duration // actual time
1927}
1928
1929func (ti *testItem) tryTake() bool {
1930 select {
1931 case ti.take <- token{}:
1932 return true
1933 default:
1934 return false
1935 }
1936}
1937
1938func (ti *testItem) isDone() bool {
1939 select {
1940 case <-ti.done:
1941 return true
1942 default:
1943 return false
1944 }
1945}
1946
1947// retry reschedules the test to run again, if a machine died before
1948// or during execution, so its results aren't yet known.
1949// The caller must own the 'take' semaphore.
1950func (ti *testItem) retry() {
1951 // release it to make it available for somebody else to try later:
1952 <-ti.take
1953
1954 // Enqueue this test to retry, unless the build is
1955 // only proceeding to the first failure and it's
1956 // already failed.
1957 select {
1958 case ti.set.retryc <- ti:
1959 case <-ti.set.st.donec:
1960 }
1961}
1962
1963type byTestDuration []*testItem
1964
1965func (s byTestDuration) Len() int { return len(s) }
1966func (s byTestDuration) Less(i, j int) bool { return s[i].duration < s[j].duration }
1967func (s byTestDuration) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
1968
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001969type eventAndTime struct {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001970 t time.Time
1971 evt string
1972 text string
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001973}
1974
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001975// buildStatus is the status of a build.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001976type buildStatus struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001977 // Immutable:
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001978 builderRev
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001979 conf dashboard.BuildConfig
1980 startTime time.Time // actually time of newBuild (~same thing)
1981 trySet *trySet // or nil
1982 donec chan struct{} // closed when done
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001983
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001984 onceInitHelpers sync.Once // guards call of onceInitHelpersFunc, to init::
1985 helpers <-chan *buildlet.Client
1986
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001987 mu sync.Mutex // guards following
1988 bc *buildlet.Client // nil initially, until pool returns one
1989 done time.Time // finished running
1990 succeeded bool // set when done
1991 output bytes.Buffer // stdout and stderr
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001992 events []eventAndTime
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11001993 watcher []*logWatcher
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08001994}
1995
1996func (st *buildStatus) setDone(succeeded bool) {
1997 st.mu.Lock()
1998 defer st.mu.Unlock()
1999 st.succeeded = succeeded
2000 st.done = time.Now()
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002001 st.notifyWatchersLocked(true)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002002 close(st.donec)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002003}
2004
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002005func (st *buildStatus) isRunning() bool {
2006 st.mu.Lock()
2007 defer st.mu.Unlock()
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002008 return st.isRunningLocked()
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002009}
2010
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002011func (st *buildStatus) isRunningLocked() bool { return st.done.IsZero() }
2012
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002013func (st *buildStatus) logf(format string, args ...interface{}) {
2014 log.Printf("[build %s %s]: %s", st.name, st.rev, fmt.Sprintf(format, args...))
2015}
2016
2017func (st *buildStatus) logEventTime(event string, optText ...string) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002018 st.mu.Lock()
2019 defer st.mu.Unlock()
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002020 var text string
2021 if len(optText) > 0 {
2022 if len(optText) > 1 {
2023 panic("usage")
2024 }
2025 text = optText[0]
2026 }
2027 st.events = append(st.events, eventAndTime{
2028 t: time.Now(),
2029 evt: event,
2030 text: text,
2031 })
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002032}
2033
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -08002034func (st *buildStatus) hasEvent(event string) bool {
2035 st.mu.Lock()
2036 defer st.mu.Unlock()
2037 for _, e := range st.events {
2038 if e.evt == event {
2039 return true
2040 }
2041 }
2042 return false
2043}
2044
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002045// HTMLStatusLine returns the HTML to show within the <pre> block on
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002046// the main page's list of active builds.
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002047func (st *buildStatus) HTMLStatusLine() template.HTML {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002048 st.mu.Lock()
2049 defer st.mu.Unlock()
2050
2051 urlPrefix := "https://go-review.googlesource.com/#/q/"
2052 if strings.Contains(st.name, "gccgo") {
2053 urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
2054 }
2055
2056 var buf bytes.Buffer
2057 fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
2058 st.name, urlPrefix, st.rev, st.rev)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002059 if ts := st.trySet; ts != nil {
2060 fmt.Fprintf(&buf, " (trying <a href='https://go-review.googlesource.com/#/q/%s'>%s</a>)",
2061 ts.ChangeID, ts.ChangeID[:8])
2062 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002063
2064 if st.done.IsZero() {
2065 buf.WriteString(", running")
2066 } else if st.succeeded {
2067 buf.WriteString(", succeeded")
2068 } else {
2069 buf.WriteString(", failed")
2070 }
2071
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002072 fmt.Fprintf(&buf, "; <a href='%s'>build log</a>; %s", st.logsURL(), html.EscapeString(st.bc.String()))
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002073
2074 t := st.done
2075 if t.IsZero() {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002076 t = st.startTime
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002077 }
2078 fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -08002079 st.writeEventsLocked(&buf, true)
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002080 return template.HTML(buf.String())
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -08002081}
2082
2083func (st *buildStatus) logsURL() string {
2084 return fmt.Sprintf("/logs?name=%s&rev=%s&st=%p", st.name, st.rev, st)
2085}
2086
2087// st.mu must be held.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002088func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool) {
2089 var lastT time.Time
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002090 for i, evt := range st.events {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002091 lastT = evt.t
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002092 var elapsed string
2093 if i != 0 {
2094 elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds())
2095 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002096 e := evt.evt
2097 text := evt.text
2098 if htmlMode {
2099 if e == "running_exec" {
2100 e = fmt.Sprintf("<a href='%s'>%s</a>", st.logsURL(), e)
2101 }
2102 e = "<b>" + e + "</b>"
2103 text = "<i>" + html.EscapeString(text) + "</i>"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002104 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002105 fmt.Fprintf(w, " %7s %v %s %s\n", elapsed, evt.t.Format(time.RFC3339), e, text)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002106 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002107 fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastT).Seconds()))
2108
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002109}
2110
2111func (st *buildStatus) logs() string {
2112 st.mu.Lock()
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002113 defer st.mu.Unlock()
2114 return st.output.String()
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002115}
2116
2117func (st *buildStatus) Write(p []byte) (n int, err error) {
2118 st.mu.Lock()
2119 defer st.mu.Unlock()
2120 const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002121 plen := len(p)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002122 if st.output.Len()+len(p) > maxBufferSize {
2123 p = p[:maxBufferSize-st.output.Len()]
2124 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002125 st.output.Write(p) // bytes.Buffer can't fail
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002126 st.notifyWatchersLocked(false)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002127 return plen, nil
2128}
2129
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002130// logWatcher holds the state of a client watching the logs of a running build.
2131type logWatcher struct {
2132 ch chan []byte
2133 offset int // Offset of seen logs (offset == len(buf) means "up to date")
2134}
2135
2136// watchLogs returns a channel on which the build's logs is sent.
2137// When the build is complete the channel is closed.
2138func (st *buildStatus) watchLogs() <-chan []byte {
2139 st.mu.Lock()
2140 defer st.mu.Unlock()
2141
2142 ch := make(chan []byte, 10) // room for a few log writes
2143 ch <- st.output.Bytes()
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002144 if !st.isRunningLocked() {
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002145 close(ch)
2146 return ch
2147 }
2148
2149 st.watcher = append(st.watcher, &logWatcher{
2150 ch: ch,
2151 offset: st.output.Len(),
2152 })
2153 return ch
2154}
2155
2156// unregisterWatcher removes the provided channel from the list of watchers,
2157// so that it receives no further log data.
2158func (st *buildStatus) unregisterWatcher(ch <-chan []byte) {
2159 st.mu.Lock()
2160 defer st.mu.Unlock()
2161
2162 for i, w := range st.watcher {
2163 if w.ch == ch {
2164 st.watcher = append(st.watcher[:i], st.watcher[i+1:]...)
2165 break
2166 }
2167 }
2168}
2169
2170// notifyWatchersLocked pushes any new log data to watching clients.
2171// If done is true it closes any watcher channels.
2172//
2173// NOTE: st.mu must be held.
2174func (st *buildStatus) notifyWatchersLocked(done bool) {
2175 l := st.output.Len()
2176 for _, w := range st.watcher {
2177 if w.offset < l {
2178 select {
2179 case w.ch <- st.output.Bytes()[w.offset:]:
2180 w.offset = l
2181 default:
2182 // If the receiver isn't ready, drop the write.
2183 }
2184 }
2185 if done {
2186 close(w.ch)
2187 }
2188 }
2189}
2190
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002191func versionTgz(rev string) io.Reader {
2192 var buf bytes.Buffer
2193 zw := gzip.NewWriter(&buf)
2194 tw := tar.NewWriter(zw)
2195
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002196 // Writing to a bytes.Buffer should never fail, so check
2197 // errors with an explosion:
2198 check := func(err error) {
2199 if err != nil {
2200 panic("previously assumed to never fail: " + err.Error())
2201 }
2202 }
2203
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002204 contents := fmt.Sprintf("devel " + rev)
2205 check(tw.WriteHeader(&tar.Header{
2206 Name: "VERSION",
2207 Mode: 0644,
2208 Size: int64(len(contents)),
2209 }))
2210 _, err := io.WriteString(tw, contents)
2211 check(err)
2212 check(tw.Close())
2213 check(zw.Close())
2214 return bytes.NewReader(buf.Bytes())
2215}
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002216
2217var nl = []byte("\n")