blob: 24cf21736a6f9fcfb64353cde29469008023e4ab [file] [log] [blame]
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The coordinator runs on GCE and coordinates builds in Docker containers.
Andrew Gerrandfa8373a2015-01-21 17:25:37 +11006package main // import "golang.org/x/build/cmd/coordinator"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07007
8import (
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08009 "archive/tar"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070010 "bytes"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080011 "compress/gzip"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080012 "crypto/sha1"
David Crawshaw02e988a2015-03-30 21:54:04 -040013 "crypto/tls"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070014 "encoding/json"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080015 "errors"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070016 "flag"
17 "fmt"
Brad Fitzpatrickd9bbf3c2014-12-15 11:51:42 +110018 "html"
David Crawshawdd0cf9f2015-04-29 17:58:09 -040019 "html/template"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070020 "io"
21 "io/ioutil"
22 "log"
David Crawshaw02e988a2015-03-30 21:54:04 -040023 "net"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070024 "net/http"
David Crawshaw581ddd12015-04-06 08:09:20 -040025 "os"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -080026 "path"
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -080027 "runtime"
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -070028 "sort"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070029 "strings"
30 "sync"
31 "time"
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080032
Brad Fitzpatrick20d84832015-01-21 10:03:07 -080033 "camlistore.org/pkg/syncutil"
David Crawshaw66c36dd2015-04-23 10:23:22 -040034
Andrew Gerrandfa8373a2015-01-21 17:25:37 +110035 "golang.org/x/build/buildlet"
36 "golang.org/x/build/dashboard"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080037 "golang.org/x/build/gerrit"
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -070038 "golang.org/x/build/internal/lru"
39 "golang.org/x/build/internal/singleflight"
Andrew Gerrandfa8373a2015-01-21 17:25:37 +110040 "golang.org/x/build/types"
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080041 "google.golang.org/cloud/storage"
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070042)
43
Andrew Gerrand234725b2015-06-04 16:45:17 -070044const subrepoPrefix = "golang.org/x/"
45
David Crawshawdd0cf9f2015-04-29 17:58:09 -040046var processStartTime = time.Now()
47
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -070048var Version string // set by linker -X
49
50// devPause is a debug option to pause for 5 minutes after the build
51// finishes before destroying buildlets.
52const devPause = false
53
Brad Fitzpatrickdfe82862015-03-01 09:23:57 -080054func init() {
55 // Disabled until we have test sharding. This takes 85+ minutes.
56 // Test sharding is https://github.com/golang/go/issues/10029
57 delete(dashboard.Builders, "linux-arm-qemu")
58}
59
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070060var (
Brad Fitzpatrick3900c162015-02-18 15:01:02 -080061 masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070062
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080063 // TODO(bradfitz): remove this list and just query it from the compute API:
64 // http://godoc.org/google.golang.org/api/compute/v1#RegionsService.Get
65 // and Region.Zones: http://godoc.org/google.golang.org/api/compute/v1#Region
Brad Fitzpatrickb52c2242014-12-30 16:19:06 -080066 cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)")
67
David Crawshaw581ddd12015-04-06 08:09:20 -040068 mode = flag.String("mode", "", "valid modes are 'dev', 'prod', or '' for auto-detect")
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070069)
70
Andrew Gerranda9469a82015-05-27 14:15:18 -070071func buildLogBucket() string {
72 return devPrefix() + "go-build-log"
73}
74
Andrew Gerrandc53a5772015-05-27 14:29:36 -070075func snapBucket() string {
76 return devPrefix() + "go-build-snap"
77}
78
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080079// LOCK ORDER:
80// statusMu, buildStatus.mu, trySet.mu
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080081
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070082var (
83 startTime = time.Now()
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070084
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080085 statusMu sync.Mutex // guards the following four structures; see LOCK ORDER comment above
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -080086 status = map[builderRev]*buildStatus{}
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080087 statusDone []*buildStatus // finished recently, capped to maxStatusDone
88 tries = map[tryKey]*trySet{} // trybot builds
89 tryList []tryKey
Andrew Gerrand234725b2015-06-04 16:45:17 -070090
91 // subrepoHead contains the hashes of the latest master HEAD
92 // for each sub-repo. It is populated by findWork.
93 subrepoHead = struct {
94 sync.Mutex
95 m map[string]string // [repo]hash
96 }{m: map[string]string{}}
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -070097)
98
Brad Fitzpatrick421541f2015-02-11 22:57:39 -080099// tryBuilders must be VMs. The Docker container builds are going away.
100var tryBuilders []dashboard.BuildConfig
101
102func init() {
103 tryList := []string{
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -0700104 "misc-compile",
David Crawshaw6c597e32015-05-01 12:45:09 -0400105 "darwin-amd64-10_10",
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800106 "linux-386",
107 "linux-amd64",
108 "linux-amd64-race",
109 "freebsd-386-gce101",
110 "freebsd-amd64-gce101",
111 "windows-386-gce",
112 "windows-amd64-gce",
113 "openbsd-386-gce56",
114 "openbsd-amd64-gce56",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700115 "plan9-386",
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800116 "nacl-386",
117 "nacl-amd64p32",
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -0700118 /* "linux-arm-shard_test",
119 "linux-arm-shard_std_am",
120 "linux-arm-shard_std_nz",
121 "linux-arm-shard_runtimecpu",
122 "linux-arm-shard_cgotest",
123 "linux-arm-shard_misc",
124 */
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800125 }
126 for _, bname := range tryList {
127 conf, ok := dashboard.Builders[bname]
Brad Fitzpatrick83455d12015-02-19 16:14:20 -0800128 if ok {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800129 tryBuilders = append(tryBuilders, conf)
130 } else {
Brad Fitzpatrick83455d12015-02-19 16:14:20 -0800131 log.Printf("ignoring invalid try builder config %q", bname)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800132 }
133 }
134}
135
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -0800136const (
137 maxStatusDone = 30
138
139 // vmDeleteTimeout is how long before we delete a VM.
140 // In practice this need only be as long as the slowest
141 // builder (plan9 currently), because on startup this program
142 // already deletes all buildlets it doesn't know about
143 // (i.e. ones from a previous instance of the coordinator).
144 vmDeleteTimeout = 45 * time.Minute
145)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800146
David Crawshaw02e988a2015-03-30 21:54:04 -0400147func readGCSFile(name string) ([]byte, error) {
David Crawshaw581ddd12015-04-06 08:09:20 -0400148 if *mode == "dev" {
149 b, ok := testFiles[name]
150 if !ok {
151 return nil, &os.PathError{
152 Op: "open",
153 Path: name,
154 Err: os.ErrNotExist,
155 }
156 }
157 return []byte(b), nil
158 }
159
Andrew Gerranda9469a82015-05-27 14:15:18 -0700160 r, err := storage.NewReader(serviceCtx, devPrefix()+"go-builder-data", name)
David Crawshaw02e988a2015-03-30 21:54:04 -0400161 if err != nil {
162 return nil, err
163 }
164 defer r.Close()
165 return ioutil.ReadAll(r)
166}
167
David Crawshaw581ddd12015-04-06 08:09:20 -0400168// Fake keys signed by a fake CA.
169var testFiles = map[string]string{
170 "farmer-cert.pem": `-----BEGIN CERTIFICATE-----
171MIICljCCAX4CCQCoS+/smvkG2TANBgkqhkiG9w0BAQUFADANMQswCQYDVQQDEwJn
172bzAeFw0xNTA0MDYwMzE3NDJaFw0xNzA0MDUwMzE3NDJaMA0xCzAJBgNVBAMTAmdv
173MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA1NMaVxX8RfCMtQB18azV
174hL6/U7C8W2G+8WXYeFuOpgP2SHnMbsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBis
175yj+drspafnCr4Yp9oJx1xlIhVXGD/SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6V
176aQslRxhPEkLGsXIA/hCucPIplI9jgLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFy
177LiHjEkE7O7PIjjY51btCLep9QSts98zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp
1782QBpsDbVpHYv2VPCPvkdOGU1/nzumsxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptl
179hwIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQBxvUMKsX+DEhZSmc164IuSVJ9ucZ97
180+KWn4nCwnVkI/RrsJpiTj3pZNRkAxq2vmZTpUdU0CgGHdZNXp/6s/GX4cSzFphSf
181WZQN0CG/O50SQ39m7fz/dZ2Xse6EH2grr6KN0QsDhK/RVxecQv57rY9nLFHnC60t
182vJBDC739lWlnsGDxylJNxEk2l5c2rJdn82yGw2G9pQ/LDVAtO1G2rxGkpi4FcpGk
183rNAa6MiwcyFHcAr3OsigLm4Q9bCS6YXfQDvCZGAR91ADXVWDFC1sgBgM3U3+1bGp
184tgXUVKymUvoVq0BiY4BCCYDluoErgZDytLmnUOxrykYi532VpRbbK2ja
185-----END CERTIFICATE-----`,
186 "farmer-key.pem": `-----BEGIN RSA PRIVATE KEY-----
187MIIEowIBAAKCAQEA1NMaVxX8RfCMtQB18azVhL6/U7C8W2G+8WXYeFuOpgP2SHnM
188bsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBisyj+drspafnCr4Yp9oJx1xlIhVXGD
189/SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6VaQslRxhPEkLGsXIA/hCucPIplI9j
190gLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFyLiHjEkE7O7PIjjY51btCLep9QSts
19198zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp2QBpsDbVpHYv2VPCPvkdOGU1/nzu
192msxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptlhwIDAQABAoIBAAJOPyzOWitPzdZw
193KNbzbmS/xEbd1UyQJIds+QlkxIjb5iEm4KYakJd8I2Vj7qVJbOkCxpYVqsoiQRBo
194FP2cptKSGd045/4SrmoFHBNPXp9FaIMKdcmaX+Wjd83XCFHgsm/O4yYaDpYA/n8q
195HFicZxX6Pu8kPkcOXiSx/XzDJYCnuec0GIfiJfbrQEwNLA+Ck2HnFfLy6LyrgCqi
196eqaxyBoLolzjW7guWV6e/ECsnLXx2n/Pj4l1aqIFKlYxOjBIKRqeUsqzMFpOCbrx
197z/scaBuH88hO96jbGZWUAm3R6ZslocQ6TaENYWNVKN1SeGISiE3hRoMAUIu1eHVu
198mEzOjvECgYEA9Ypu04NzVjAHdZRwrP7IiX3+CmbyNatdZXIoagp8boPBYWw7QeL8
199TPwvc3PCSIjxcT+Jv2hHTZ9Ofz9vAm/XJx6Ios9o/uAbytA+RAolQJWtLGuFLKv1
200wxq78iDFcIWq3iPwpl8FJaXeCb/bsNP9jruPhwWWbJVvD1eTif09ZzsCgYEA3ePo
201aQ5S0YrPtaf5r70eSBloe5vveG/kW3EW0QMrN6YlOhGSX+mjdAJk7XI/JW6vVPYS
202aK+g+ZnzV7HL421McuVH8mmwPHi48l5o2FewF54qYfOoTAJS1cjV08j8WtQsrEax
203HHom4m4joQEm0o4QEnTxJDS8/u7T/hhMALxeziUCgYANwevjvgHAWoCQffiyOLRT
204v9N0EcCQcUGSZYsOJfhC2O8E3mOTlXw9dAPUnC/OkJ22krDNILKeDsb/Kja2FD4h
2052vwc4zIm1be47WIPveHIdJp3Wq7jid8DR4QwVNW7MEIaoDjjmX9YVKrUMQPGLJqQ
206XMH19sIu41CNs4J4wM+n8QKBgBiIcFPdP47neBuvnM2vbT+vf3vbO9jnFip+EHW/
207kfGvLwKCmtp77JSRBzOxpAWxfTU5l8N3V6cBPIR/pflZRlCVxSSqRtAI0PoLMjBp
208UZDq7eiylfMBdsMoV2v5Ft28A8xwbHinkNEMOGg+xloVVvWTdG36XsMZCNtZOF4E
209db75AoGBAIk6IW5O2lk9Vc537TCyLpl2HYCP0jI3v6xIkFFolnfHPEgsXLJo9YU8
210crVtB0zy4jzjN/SClc/iaeOzk5Ot+iwSRFBZu2jdt0TRxbG+cd+6vKLs0Baw6kB1
211gpRUwP6i5yhi838rMgurGVFr3O/0Sv7wMx5UNEJ/RopbQ2K/bnwn
212-----END RSA PRIVATE KEY-----`,
213}
214
David Crawshaw02e988a2015-03-30 21:54:04 -0400215func listenAndServeTLS() {
David Crawshawa3dce2c2015-04-07 19:46:19 -0400216 addr := ":443"
217 if *mode == "dev" {
218 addr = ":8119"
219 }
220 ln, err := net.Listen("tcp", addr)
221 if err != nil {
222 log.Fatalf("net.Listen(%s): %v", addr, err)
223 }
224 serveTLS(ln)
225}
226
227func serveTLS(ln net.Listener) {
David Crawshaw02e988a2015-03-30 21:54:04 -0400228 certPEM, err := readGCSFile("farmer-cert.pem")
229 if err != nil {
230 log.Printf("cannot load TLS cert, skipping https: %v", err)
231 return
232 }
233 keyPEM, err := readGCSFile("farmer-key.pem")
234 if err != nil {
235 log.Printf("cannot load TLS key, skipping https: %v", err)
236 return
237 }
238 cert, err := tls.X509KeyPair(certPEM, keyPEM)
239 if err != nil {
240 log.Printf("bad TLS cert: %v", err)
241 return
242 }
243
David Crawshawa3dce2c2015-04-07 19:46:19 -0400244 server := &http.Server{Addr: ln.Addr().String()}
David Crawshaw02e988a2015-03-30 21:54:04 -0400245 config := &tls.Config{
246 NextProtos: []string{"http/1.1"},
247 Certificates: []tls.Certificate{cert},
248 }
David Crawshaw02e988a2015-03-30 21:54:04 -0400249 tlsLn := tls.NewListener(tcpKeepAliveListener{ln.(*net.TCPListener)}, config)
David Crawshawa3dce2c2015-04-07 19:46:19 -0400250 log.Printf("Coordinator serving on: %v", tlsLn.Addr())
David Crawshaw02e988a2015-03-30 21:54:04 -0400251 if err := server.Serve(tlsLn); err != nil {
252 log.Fatalf("serve https: %v", err)
253 }
254}
255
256type tcpKeepAliveListener struct {
257 *net.TCPListener
258}
259
260func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
261 tc, err := ln.AcceptTCP()
262 if err != nil {
263 return
264 }
265 tc.SetKeepAlive(true)
266 tc.SetKeepAlivePeriod(3 * time.Minute)
267 return tc, nil
268}
269
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700270func main() {
271 flag.Parse()
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700272 log.Printf("coordinator version %q starting", Version)
David Crawshaw581ddd12015-04-06 08:09:20 -0400273 err := initGCE()
274 if err != nil {
275 if *mode == "" {
276 *mode = "dev"
277 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800278 log.Printf("VM support disabled due to error initializing GCE: %v", err)
David Crawshaw581ddd12015-04-06 08:09:20 -0400279 } else {
280 if *mode == "" {
281 *mode = "prod"
282 }
283 }
284 switch *mode {
285 case "dev", "prod":
286 log.Printf("Running in %s mode", *mode)
287 default:
288 log.Fatalf("Unknown mode: %q", *mode)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800289 }
290
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700291 http.HandleFunc("/", handleStatus)
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -0800292 http.HandleFunc("/debug/goroutines", handleDebugGoroutines)
David Crawshaweef380f2015-04-30 20:03:01 -0400293 http.HandleFunc("/builders", handleBuilders)
Brad Fitzpatrick777a5bf2015-06-09 12:17:36 -0700294 http.HandleFunc("/temporarylogs", handleLogs)
David Crawshaw581ddd12015-04-06 08:09:20 -0400295 http.HandleFunc("/reverse", handleReverse)
David Crawshawdd0cf9f2015-04-29 17:58:09 -0400296 http.HandleFunc("/style.css", handleStyleCSS)
David Crawshaweef380f2015-04-30 20:03:01 -0400297 http.HandleFunc("/try", handleTryStatus)
David Crawshaw02e988a2015-03-30 21:54:04 -0400298 go func() {
David Crawshaw581ddd12015-04-06 08:09:20 -0400299 if *mode == "dev" {
300 return
301 }
David Crawshaw02e988a2015-03-30 21:54:04 -0400302 err := http.ListenAndServe(":80", nil)
303 if err != nil {
304 log.Fatalf("http.ListenAndServe:80: %v", err)
305 }
306 }()
David Crawshaw66c36dd2015-04-23 10:23:22 -0400307
308 workc := make(chan builderRev)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700309
David Crawshaw581ddd12015-04-06 08:09:20 -0400310 if *mode == "dev" {
311 // TODO(crawshaw): do more in test mode
David Crawshaw66c36dd2015-04-23 10:23:22 -0400312 gcePool.SetEnabled(false)
313 http.HandleFunc("/dosomework/", handleDoSomeWork(workc))
314 } else {
315 go gcePool.cleanUpOldVMs()
316
Andrew Gerrand71716002015-05-18 13:23:24 +1000317 if devCluster {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700318 dashboard.BuildletBucket = "dev-go-builder-data"
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000319 dashboard.Builders = devClusterBuilders()
Andrew Gerrand71716002015-05-18 13:23:24 +1000320 }
321
David Crawshaw66c36dd2015-04-23 10:23:22 -0400322 // Start the Docker processes on this host polling Gerrit and
323 // pinging build.golang.org when new commits are available.
324 startWatchers() // in watcher.go
325
326 go findWorkLoop(workc)
327 go findTryWorkLoop()
328 // TODO(cmang): gccgo will need its own findWorkLoop
David Crawshaw581ddd12015-04-06 08:09:20 -0400329 }
330
David Crawshaw66c36dd2015-04-23 10:23:22 -0400331 go listenAndServeTLS()
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700332
333 ticker := time.NewTicker(1 * time.Minute)
334 for {
335 select {
336 case work := <-workc:
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100337 if !mayBuildRev(work) {
338 continue
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700339 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100340 st, err := newBuild(work)
341 if err != nil {
342 log.Printf("Bad build work params %v: %v", work, err)
343 } else {
344 st.start()
345 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700346 case <-ticker.C:
347 if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
348 // TODO: halt the whole machine to kill the VM or something
349 }
350 }
351 }
352}
353
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000354func devClusterBuilders() map[string]dashboard.BuildConfig {
355 m := map[string]dashboard.BuildConfig{}
356 for _, name := range []string{
357 "linux-amd64",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700358 "linux-amd64-race",
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000359 "windows-amd64-gce",
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700360 "plan9-386",
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000361 } {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700362 m[name] = dashboard.Builders[name]
Andrew Gerrand1826ad42015-05-26 04:34:37 +1000363 }
364 return m
365}
366
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700367func numCurrentBuilds() int {
368 statusMu.Lock()
369 defer statusMu.Unlock()
370 return len(status)
371}
372
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800373func isBuilding(work builderRev) bool {
374 statusMu.Lock()
375 defer statusMu.Unlock()
376 _, building := status[work]
377 return building
378}
379
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800380// mayBuildRev reports whether the build type & revision should be started.
David Crawshaw66c36dd2015-04-23 10:23:22 -0400381// It returns true if it's not already building, and if a reverse buildlet is
382// required, if an appropriate machine is registered.
383func mayBuildRev(rev builderRev) bool {
384 if isBuilding(rev) {
385 return false
386 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700387 if devCluster && numCurrentBuilds() != 0 {
388 return false
389 }
David Crawshawdd57a132015-04-28 16:42:42 -0400390 if dashboard.Builders[rev.name].IsReverse {
391 return reversePool.CanBuild(rev.name)
David Crawshaw66c36dd2015-04-23 10:23:22 -0400392 }
David Crawshawdd57a132015-04-28 16:42:42 -0400393 return true
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700394}
395
396func setStatus(work builderRev, st *buildStatus) {
397 statusMu.Lock()
398 defer statusMu.Unlock()
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100399 // TODO: panic if status[work] already exists. audit all callers.
400 // For instance, what if a trybot is running, and then the CL is merged
401 // and the findWork goroutine picks it up and it has the same commit,
402 // because it didn't need to be rebased in Gerrit's cherrypick?
403 // Could we then have two running with the same key?
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800404 status[work] = st
405}
406
407func markDone(work builderRev) {
408 statusMu.Lock()
409 defer statusMu.Unlock()
410 st, ok := status[work]
411 if !ok {
412 return
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700413 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800414 delete(status, work)
415 if len(statusDone) == maxStatusDone {
416 copy(statusDone, statusDone[1:])
417 statusDone = statusDone[:len(statusDone)-1]
418 }
419 statusDone = append(statusDone, st)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700420}
421
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800422// statusPtrStr disambiguates which status to return if there are
423// multiple in the history (e.g. recent failures where the build
424// didn't finish for reasons outside of all.bash failing)
425func getStatus(work builderRev, statusPtrStr string) *buildStatus {
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700426 statusMu.Lock()
427 defer statusMu.Unlock()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800428 match := func(st *buildStatus) bool {
429 return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr
430 }
431 if st, ok := status[work]; ok && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800432 return st
433 }
434 for _, st := range statusDone {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -0800435 if st.builderRev == work && match(st) {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800436 return st
437 }
438 }
439 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700440}
441
442type byAge []*buildStatus
443
444func (s byAge) Len() int { return len(s) }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100445func (s byAge) Less(i, j int) bool { return s[i].startTime.Before(s[j].startTime) }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700446func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
447
Brad Fitzpatrickc964c3f2015-02-25 16:43:53 -0800448func handleTryStatus(w http.ResponseWriter, r *http.Request) {
449 ts := trySetOfCommitPrefix(r.FormValue("commit"))
450 if ts == nil {
451 http.Error(w, "TryBot result not found (already done, invalid, or not yet discovered from Gerrit). Check Gerrit for results.", http.StatusNotFound)
452 return
453 }
454 ts.mu.Lock()
455 tss := ts.trySetState.clone()
456 ts.mu.Unlock()
457
458 w.Header().Set("Content-Type", "text/html; charset=utf-8")
459 fmt.Fprintf(w, "<html><head><title>trybot status</title></head><body>[<a href='/'>overall status</a>] &gt; %s\n", ts.ChangeID)
460
461 fmt.Fprintf(w, "<h1>trybot status</h1>")
462 fmt.Fprintf(w, "Change-ID: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.ChangeID, ts.ChangeID)
463 fmt.Fprintf(w, "Commit: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.Commit, ts.Commit)
464 fmt.Fprintf(w, "<p>Builds remain: %d</p>\n", tss.remain)
465 fmt.Fprintf(w, "<p>Builds failed: %v</p>\n", tss.failed)
466 fmt.Fprintf(w, "<p>Builds</p><table cellpadding=5 border=1>\n")
467 for _, bs := range tss.builds {
468 status := "<i>(running)</i>"
469 bs.mu.Lock()
470 if !bs.done.IsZero() {
471 if bs.succeeded {
472 status = "pass"
473 } else {
474 status = "<b>FAIL</b>"
475 }
476 }
477 bs.mu.Unlock()
478 fmt.Fprintf(w, "<tr valign=top><td align=left>%s</td><td align=center>%s</td><td><pre>%s</pre></td></tr>\n",
479 bs.name,
480 status,
David Crawshawdd0cf9f2015-04-29 17:58:09 -0400481 bs.HTMLStatusLine())
Brad Fitzpatrickc964c3f2015-02-25 16:43:53 -0800482 }
483 fmt.Fprintf(w, "</table></body></html>")
484}
485
486func trySetOfCommitPrefix(commitPrefix string) *trySet {
487 if commitPrefix == "" {
488 return nil
489 }
490 statusMu.Lock()
491 defer statusMu.Unlock()
492 for k, ts := range tries {
493 if strings.HasPrefix(k.Commit, commitPrefix) {
494 return ts
495 }
496 }
497 return nil
498}
499
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700500func handleLogs(w http.ResponseWriter, r *http.Request) {
Andrew Gerrand234725b2015-06-04 16:45:17 -0700501 br := builderRev{
502 name: r.FormValue("name"),
503 rev: r.FormValue("rev"),
504 subName: r.FormValue("subName"), // may be empty
505 subRev: r.FormValue("subRev"), // may be empty
506 }
507 st := getStatus(br, r.FormValue("st"))
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700508 if st == nil {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800509 http.NotFound(w, r)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700510 return
511 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700512 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
Andrew Gerrandcc49d3b2015-03-12 10:27:49 +1100513 w.Header().Set("X-Content-Type-Options", "nosniff")
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800514 writeStatusHeader(w, st)
515
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -0700516 nostream := r.FormValue("nostream") != ""
517 if nostream || !st.isRunning() {
518 if nostream {
519 fmt.Fprintf(w, "\n\n(live streaming disabled; reload manually to see status)\n")
520 }
Andrew Gerrandff978d72015-05-28 15:01:51 -0700521 st.mu.Lock()
522 defer st.mu.Unlock()
Andrew Gerrandaa078a32015-05-28 16:13:40 -0700523 w.Write(st.output.Bytes())
Andrew Gerrandff978d72015-05-28 15:01:51 -0700524 return
525 }
526
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100527 if !st.hasEvent("pre_exec") {
528 fmt.Fprintf(w, "\n\n(buildlet still starting; no live streaming. reload manually to see status)\n")
529 return
530 }
531
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -0800532 w.(http.Flusher).Flush()
533
Andrew Gerrand5f73aab2015-03-03 10:30:18 +1100534 logs := st.watchLogs()
535 defer st.unregisterWatcher(logs)
536 closed := w.(http.CloseNotifier).CloseNotify()
537 for {
538 select {
539 case b, ok := <-logs:
540 if !ok {
541 return
542 }
543 w.Write(b)
544 w.(http.Flusher).Flush()
545 case <-closed:
546 return
547 }
548 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700549}
550
Brad Fitzpatrick578a4d22015-02-24 21:47:40 -0800551func handleDebugGoroutines(w http.ResponseWriter, r *http.Request) {
552 w.Header().Set("Content-Type", "text/plain; charset=utf-8")
553 buf := make([]byte, 1<<20)
554 buf = buf[:runtime.Stack(buf, true)]
555 w.Write(buf)
556}
557
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800558func writeStatusHeader(w http.ResponseWriter, st *buildStatus) {
559 st.mu.Lock()
560 defer st.mu.Unlock()
561 fmt.Fprintf(w, " builder: %s\n", st.name)
562 fmt.Fprintf(w, " rev: %s\n", st.rev)
Brad Fitzpatrick46d9b002015-05-13 15:55:41 -0700563 workaroundFlush(w)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100564 fmt.Fprintf(w, " buildlet: %s\n", st.bc)
565 fmt.Fprintf(w, " started: %v\n", st.startTime)
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800566 done := !st.done.IsZero()
567 if done {
David du Colombier5e8cfd42015-06-12 17:16:24 +0200568 fmt.Fprintf(w, " ended: %v\n", st.done)
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800569 fmt.Fprintf(w, " success: %v\n", st.succeeded)
570 } else {
571 fmt.Fprintf(w, " status: still running\n")
572 }
573 if len(st.events) > 0 {
574 io.WriteString(w, "\nEvents:\n")
575 st.writeEventsLocked(w, false)
576 }
577 io.WriteString(w, "\nBuild log:\n")
Brad Fitzpatrick46d9b002015-05-13 15:55:41 -0700578 workaroundFlush(w)
579}
580
581// workaroundFlush is an unnecessary flush to work around a bug in Chrome.
582// See https://code.google.com/p/chromium/issues/detail?id=2016 for the details.
583// In summary: a couple unnecessary chunk flushes bypass the content type
584// sniffing which happen (even if unused?), even if you set nosniff as we do
585// in func handleLogs.
586func workaroundFlush(w http.ResponseWriter) {
587 w.(http.Flusher).Flush()
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -0800588}
589
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800590// findWorkLoop polls http://build.golang.org/?mode=json looking for new work
591// for the main dashboard. It does not support gccgo.
592// TODO(bradfitz): it also currently does not support subrepos.
593func findWorkLoop(work chan<- builderRev) {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700594 // Useful for debugging a single run:
595 if devCluster && false {
Andrew Gerrand234725b2015-06-04 16:45:17 -0700596 work <- builderRev{name: "linux-amd64", rev: "c9778ec302b2e0e0d6027e1e0fca892e428d9657", subName: "tools", subRev: "ac303766f5f240c1796eeea3dc9bf34f1261aa35"}
597 //work <- builderRev{name: "linux-amd64", rev: "54789eff385780c54254f822e09505b6222918e2"}
598 //work <- builderRev{name: "windows-amd64-gce", rev: "54789eff385780c54254f822e09505b6222918e2"}
599
600 // Still run findWork but ignore what it does.
601 ignore := make(chan builderRev)
602 go func() {
603 for range ignore {
604 }
605 }()
606 work = ignore
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -0700607 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800608 ticker := time.NewTicker(15 * time.Second)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700609 for {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800610 if err := findWork(work); err != nil {
611 log.Printf("failed to find new work: %v", err)
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700612 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800613 <-ticker.C
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700614 }
615}
616
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800617func findWork(work chan<- builderRev) error {
618 var bs types.BuildStatus
Andrew Gerrand71716002015-05-18 13:23:24 +1000619 res, err := http.Get(dashBase() + "?mode=json")
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800620 if err != nil {
621 return err
622 }
623 defer res.Body.Close()
624 if err := json.NewDecoder(res.Body).Decode(&bs); err != nil {
625 return err
626 }
627 if res.StatusCode != 200 {
628 return fmt.Errorf("unexpected http status %v", res.Status)
629 }
630
631 knownToDashboard := map[string]bool{} // keys are builder
632 for _, b := range bs.Builders {
633 knownToDashboard[b] = true
634 }
635
636 var goRevisions []string
637 for _, br := range bs.Revisions {
638 if br.Repo == "go" {
639 goRevisions = append(goRevisions, br.Revision)
640 } else {
Andrew Gerrand234725b2015-06-04 16:45:17 -0700641 // The dashboard provides only the head revision for
642 // each sub-repo; store it in subrepoHead for later use.
643 subrepoHead.Lock()
644 subrepoHead.m[br.Repo] = br.Revision
645 subrepoHead.Unlock()
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800646 }
647 if len(br.Results) != len(bs.Builders) {
648 return errors.New("bogus JSON response from dashboard: results is too long.")
649 }
650 for i, res := range br.Results {
651 if res != "" {
652 // It's either "ok" or a failure URL.
653 continue
654 }
655 builder := bs.Builders[i]
Andrew Gerrand234725b2015-06-04 16:45:17 -0700656 builderInfo, ok := dashboard.Builders[builder]
657 if !ok || builderInfo.TryOnly {
Brad Fitzpatrickeb52e712015-05-13 18:38:20 -0700658 // Not managed by the coordinator, or a trybot-only one.
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800659 continue
660 }
Andrew Gerrand234725b2015-06-04 16:45:17 -0700661 if br.Repo != "go" && !builderInfo.SplitMakeRun() {
662 // If we don't split make and run then we can't
663 // have a snapshot from which to build sub-repos.
664 continue
665 }
666
667 var rev builderRev
668 if br.Repo == "go" {
669 rev = builderRev{
670 name: bs.Builders[i],
671 rev: br.Revision,
672 }
673 } else {
674 rev = builderRev{
675 name: bs.Builders[i],
676 rev: br.GoRevision,
677 subName: br.Repo,
678 subRev: br.Revision,
679 }
680 if !builderInfo.BuildSubrepos() || !rev.snapshotExists() {
681 // Don't try to build this sub-repo until we have a snapshot.
682 continue
683 }
684 }
685 if !isBuilding(rev) {
686 work <- rev
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700687 }
688 }
689 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800690
691 // And to bootstrap new builders, see if we have any builders
692 // that the dashboard doesn't know about.
Brad Fitzpatrickeb52e712015-05-13 18:38:20 -0700693 for b, builderInfo := range dashboard.Builders {
694 if builderInfo.TryOnly || knownToDashboard[b] {
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800695 continue
696 }
697 for _, rev := range goRevisions {
Andrew Gerrand234725b2015-06-04 16:45:17 -0700698 br := builderRev{name: b, rev: rev}
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800699 if !isBuilding(br) {
700 work <- br
701 }
702 }
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700703 }
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -0800704 return nil
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700705}
706
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800707// findTryWorkLoop is a goroutine which loops periodically and queries
708// Gerrit for TryBot work.
709func findTryWorkLoop() {
710 if errTryDeps != nil {
711 return
712 }
713 ticker := time.NewTicker(60 * time.Second)
714 for {
715 if err := findTryWork(); err != nil {
716 log.Printf("failed to find trybot work: %v", err)
717 }
718 <-ticker.C
719 }
720}
721
722func findTryWork() error {
723 cis, err := gerritClient.QueryChanges("label:Run-TryBot=1 label:TryBot-Result=0 project:go status:open", gerrit.QueryChangesOpt{
724 Fields: []string{"CURRENT_REVISION"},
725 })
726 if err != nil {
727 return err
728 }
729 if len(cis) == 0 {
730 return nil
731 }
732
733 statusMu.Lock()
734 defer statusMu.Unlock()
735
736 tryList = make([]tryKey, 0, len(cis))
737 wanted := map[tryKey]bool{}
738 for _, ci := range cis {
739 if ci.ChangeID == "" || ci.CurrentRevision == "" {
740 log.Printf("Warning: skipping incomplete %#v", ci)
741 continue
742 }
743 key := tryKey{
744 ChangeID: ci.ChangeID,
745 Commit: ci.CurrentRevision,
746 }
747 tryList = append(tryList, key)
748 wanted[key] = true
749 if _, ok := tries[key]; ok {
750 // already in progress
751 continue
752 }
753 tries[key] = newTrySet(key)
754 }
755 for k, ts := range tries {
756 if !wanted[k] {
757 delete(tries, k)
758 go ts.cancelBuilds()
759 }
760 }
761 return nil
762}
763
764type tryKey struct {
765 ChangeID string // I1a27695838409259d1586a0adfa9f92bccf7ceba
766 Commit string // ecf3dffc81dc21408fb02159af352651882a8383
767}
768
769// trySet is a the state of a set of builds of different
770// configurations, all for the same (Change-ID, Commit) pair. The
771// sets which are still wanted (not already submitted or canceled) are
772// stored in the global 'tries' map.
773type trySet struct {
774 // immutable
775 tryKey
776
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700777 // mu guards state and errMsg
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800778 // See LOCK ORDER comment above.
779 mu sync.Mutex
780 trySetState
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700781 errMsg bytes.Buffer
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800782}
783
784type trySetState struct {
785 remain int
786 failed []string // build names
787 builds []*buildStatus
788}
789
790func (ts trySetState) clone() trySetState {
791 return trySetState{
792 remain: ts.remain,
793 failed: append([]string(nil), ts.failed...),
794 builds: append([]*buildStatus(nil), ts.builds...),
795 }
796}
797
798// newTrySet creates a new trySet group of builders for a given key,
799// the (Change-ID, Commit) pair. It also starts goroutines for each
800// build.
801//
802// Must hold statusMu.
803func newTrySet(key tryKey) *trySet {
804 log.Printf("Starting new trybot set for %v", key)
805 ts := &trySet{
806 tryKey: key,
807 trySetState: trySetState{
808 remain: len(tryBuilders),
809 builds: make([]*buildStatus, len(tryBuilders)),
810 },
811 }
812 go ts.notifyStarting()
813 for i, bconf := range tryBuilders {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800814 brev := builderRev{name: bconf.Name, rev: key.Commit}
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100815
816 bs, _ := newBuild(brev)
817 bs.trySet = ts
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800818 status[brev] = bs
819 ts.builds[i] = bs
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100820 go bs.start() // acquires statusMu itself, so in a goroutine
821 go ts.awaitTryBuild(i, bconf, bs)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800822 }
823 return ts
824}
825
826// state returns a copy of the trySet's state.
827func (ts *trySet) state() trySetState {
828 ts.mu.Lock()
829 defer ts.mu.Unlock()
830 return ts.trySetState.clone()
831}
832
833// notifyStarting runs in its own goroutine and posts to Gerrit that
834// the trybots have started on the user's CL with a link of where to watch.
835func (ts *trySet) notifyStarting() {
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700836 msg := "TryBots beginning. Status page: http://farmer.golang.org/try?commit=" + ts.Commit[:8]
837
838 if ci, err := gerritClient.GetChangeDetail(ts.ChangeID); err == nil {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -0700839 if len(ci.Messages) == 0 {
840 log.Printf("No Gerrit comments retrieved on %v", ts.ChangeID)
841 }
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700842 for _, cmi := range ci.Messages {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -0700843 if strings.Contains(cmi.Message, msg) {
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700844 // Dup. Don't spam.
845 return
846 }
847 }
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -0700848 } else {
849 log.Printf("Error getting Gerrit comments on %s: %v", ts.ChangeID, err)
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700850 }
851
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800852 // Ignore error. This isn't critical.
Brad Fitzpatrick4623e1a2015-05-27 13:15:38 -0700853 gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{Message: msg})
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800854}
855
856// awaitTryBuild runs in its own goroutine and waits for a build in a
857// trySet to complete.
858//
859// If the build fails without getting to the end, it sleeps and
860// reschedules it, as long as it's still wanted.
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100861func (ts *trySet) awaitTryBuild(idx int, bconf dashboard.BuildConfig, bs *buildStatus) {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800862 for {
863 WaitCh:
864 for {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100865 timeout := time.NewTimer(10 * time.Minute)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800866 select {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100867 case <-bs.donec:
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800868 timeout.Stop()
869 break WaitCh
870 case <-timeout.C:
871 if !ts.wanted() {
872 // Build was canceled.
873 return
874 }
875 }
876 }
877
878 if bs.hasEvent("done") {
879 ts.noteBuildComplete(bconf, bs)
880 return
881 }
882
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100883 // TODO(bradfitz): rethink this logic. we should only
884 // start a new build if the old one appears dead or
885 // hung.
886
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800887 // Sleep a bit and retry.
888 time.Sleep(30 * time.Second)
889 if !ts.wanted() {
890 return
891 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800892 brev := builderRev{name: bconf.Name, rev: ts.Commit}
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100893 bs, _ = newBuild(brev)
Brad Fitzpatrick9d86d3d2015-04-01 01:26:32 -0700894 bs.trySet = ts
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +0100895 go bs.start()
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800896 ts.mu.Lock()
897 ts.builds[idx] = bs
898 ts.mu.Unlock()
899 }
900}
901
902// wanted reports whether this trySet is still active.
903//
904// If the commmit has been submitted, or change abandoned, or the
905// checkbox unchecked, wanted returns false.
906func (ts *trySet) wanted() bool {
907 statusMu.Lock()
908 defer statusMu.Unlock()
909 _, ok := tries[ts.tryKey]
910 return ok
911}
912
913// cancelBuilds run in its own goroutine and cancels this trySet's
914// currently-active builds because they're no longer wanted.
915func (ts *trySet) cancelBuilds() {
916 // TODO(bradfitz): implement
917}
918
919func (ts *trySet) noteBuildComplete(bconf dashboard.BuildConfig, bs *buildStatus) {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800920 bs.mu.Lock()
921 succeeded := bs.succeeded
922 var buildLog string
923 if !succeeded {
924 buildLog = bs.output.String()
925 }
926 bs.mu.Unlock()
927
928 ts.mu.Lock()
929 ts.remain--
930 remain := ts.remain
931 if !succeeded {
932 ts.failed = append(ts.failed, bconf.Name)
933 }
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700934 numFail := len(ts.failed)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800935 ts.mu.Unlock()
936
937 if !succeeded {
938 s1 := sha1.New()
939 io.WriteString(s1, buildLog)
940 objName := fmt.Sprintf("%s/%s_%x.log", bs.rev[:8], bs.name, s1.Sum(nil)[:4])
Andrew Gerranda9469a82015-05-27 14:15:18 -0700941 wr := storage.NewWriter(serviceCtx, buildLogBucket(), objName)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800942 wr.ContentType = "text/plain; charset=utf-8"
943 wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
944 if _, err := io.WriteString(wr, buildLog); err != nil {
945 log.Printf("Failed to write to GCS: %v", err)
946 return
947 }
948 if err := wr.Close(); err != nil {
949 log.Printf("Failed to write to GCS: %v", err)
950 return
951 }
Andrew Gerranda9469a82015-05-27 14:15:18 -0700952 failLogURL := fmt.Sprintf("https://storage.googleapis.com/%s/%s", buildLogBucket(), objName)
Brad Fitzpatrick777a5bf2015-06-09 12:17:36 -0700953
954 bs.mu.Lock()
955 bs.failURL = failLogURL
956 bs.mu.Unlock()
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700957 ts.mu.Lock()
958 fmt.Fprintf(&ts.errMsg, "Failed on %s: %s\n", bs.name, failLogURL)
959 ts.mu.Unlock()
960
961 if numFail == 1 && remain > 0 {
962 if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{
963 Message: fmt.Sprintf(
964 "This change failed on %s:\n"+
965 "See %s\n\n"+
966 "Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.",
967 bs.name, failLogURL),
968 }); err != nil {
969 log.Printf("Failed to call Gerrit: %v", err)
970 return
971 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800972 }
973 }
974
975 if remain == 0 {
976 score, msg := 1, "TryBots are happy."
Brad Fitzpatrickacb32b42015-05-11 09:14:37 -0700977 if numFail > 0 {
978 ts.mu.Lock()
979 errMsg := ts.errMsg.String()
980 ts.mu.Unlock()
981 score, msg = -1, fmt.Sprintf("%d of %d TryBots failed:\n%s\nConsult https://build.golang.org/ to see whether they are new failures.",
982 numFail, len(ts.builds), errMsg)
Brad Fitzpatrick421541f2015-02-11 22:57:39 -0800983 }
984 if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{
985 Message: msg,
986 Labels: map[string]int{
987 "TryBot-Result": score,
988 },
989 }); err != nil {
990 log.Printf("Failed to call Gerrit: %v", err)
991 return
992 }
993 }
994}
995
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800996// builderRev is a build configuration type and a revision.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -0700997type builderRev struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -0800998 name string // e.g. "linux-amd64-race"
Andrew Gerrand234725b2015-06-04 16:45:17 -0700999 rev string // lowercase hex core repo git hash
1000
1001 // optional sub-repository details (both must be present)
1002 subName string // e.g. "net"
1003 subRev string // lowercase hex sub-repo git hash
1004}
1005
1006func (br builderRev) isSubrepo() bool {
1007 return br.subName != ""
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07001008}
1009
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001010type eventTimeLogger interface {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001011 logEventTime(event string, optText ...string)
1012}
1013
1014var ErrCanceled = errors.New("canceled")
1015
1016// Cancel is a channel that's closed by the caller when the request is no longer
1017// desired. The function receiving a cancel should return ErrCanceled whenever
1018// Cancel becomes readable.
1019type Cancel <-chan struct{}
1020
1021func (c Cancel) IsCanceled() bool {
1022 select {
1023 case <-c:
1024 return true
1025 default:
1026 return false
1027 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001028}
1029
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001030type BuildletPool interface {
1031 // GetBuildlet returns a new buildlet client.
1032 //
1033 // The machineType is the machine type (e.g. "linux-amd64-race").
1034 //
1035 // The rev is git hash. Implementations should not use it for
1036 // anything except for log messages or VM naming.
1037 //
1038 // Clients must Close when done with the client.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001039 GetBuildlet(cancel Cancel, machineType, rev string, el eventTimeLogger) (*buildlet.Client, error)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001040
1041 String() string // TODO(bradfitz): more status stuff
Brad Fitzpatrickc1d98dc2015-01-07 15:44:25 -08001042}
1043
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001044// GetBuildlets creates up to n buildlets and sends them on the returned channel
1045// before closing the channel.
1046func GetBuildlets(cancel Cancel, pool BuildletPool, n int, machineType, rev string, el eventTimeLogger) <-chan *buildlet.Client {
1047 ch := make(chan *buildlet.Client) // NOT buffered
1048 var wg sync.WaitGroup
1049 wg.Add(n)
1050 for i := 0; i < n; i++ {
1051 go func() {
1052 defer wg.Done()
1053 bc, err := pool.GetBuildlet(cancel, machineType, rev, el)
1054 if err != nil {
1055 if err != ErrCanceled {
1056 log.Printf("failed to get a %s buildlet for rev %s: %v", machineType, rev, err)
1057 }
1058 return
1059 }
1060 el.logEventTime("helper_ready")
1061 select {
1062 case ch <- bc:
1063 case <-cancel:
1064 el.logEventTime("helper_killed_before_use")
1065 bc.Close()
1066 return
1067 }
1068 }()
1069 }
1070 go func() {
1071 wg.Wait()
1072 close(ch)
1073 }()
1074 return ch
1075}
1076
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001077func poolForConf(conf dashboard.BuildConfig) (BuildletPool, error) {
1078 if conf.VMImage != "" {
1079 return gcePool, nil
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001080 }
David Crawshaw66c36dd2015-04-23 10:23:22 -04001081 return reversePool, nil
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001082}
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001083
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001084func newBuild(rev builderRev) (*buildStatus, error) {
1085 // Note: can't acquire statusMu in newBuild, as this is called
1086 // from findTryWork -> newTrySet, which holds statusMu.
1087
1088 conf, ok := dashboard.Builders[rev.name]
1089 if !ok {
1090 return nil, fmt.Errorf("unknown builder type %q", rev.name)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001091 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001092 return &buildStatus{
1093 builderRev: rev,
1094 conf: conf,
1095 donec: make(chan struct{}),
1096 startTime: time.Now(),
1097 }, nil
1098}
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001099
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001100// start sets the st.startTime and starts the build in a new goroutine.
1101// If it returns an error, st is not modified and a new goroutine has not
1102// been started.
1103// The build status's donec channel is closed on when the build is complete
1104// in either direction.
1105func (st *buildStatus) start() {
1106 setStatus(st.builderRev, st)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001107 go func() {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001108 err := st.build()
Brad Fitzpatrickf3c01932015-01-15 16:29:16 -08001109 if err != nil {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001110 fmt.Fprintf(st, "\n\nError: %v\n", err)
1111 }
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08001112 st.setDone(err == nil)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001113 markDone(st.builderRev)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001114 }()
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001115}
1116
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001117func (st *buildStatus) buildletType() string {
1118 if v := st.conf.BuildletType; v != "" {
1119 return v
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001120 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001121 return st.conf.Name
1122}
1123
1124func (st *buildStatus) buildletPool() (BuildletPool, error) {
1125 buildletType := st.buildletType()
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001126 bconf, ok := dashboard.Builders[buildletType]
1127 if !ok {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001128 return nil, fmt.Errorf("invalid BuildletType %q for %q", buildletType, st.conf.Name)
Brad Fitzpatrickac39ba82015-05-14 13:39:58 -07001129 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001130 return poolForConf(bconf)
1131}
1132
1133func (st *buildStatus) expectedMakeBashDuration() time.Duration {
1134 // TODO: base this on historical measurements, instead of statically configured.
1135 // TODO: move this to dashboard/builders.go? But once we based on on historical
1136 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
1137 // better in this file.
1138 goos, goarch := st.conf.GOOS(), st.conf.GOARCH()
1139
1140 if goos == "plan9" {
1141 return 2500 * time.Millisecond
1142 }
1143 if goos == "linux" {
1144 if goarch == "arm" {
1145 return 4 * time.Minute
1146 }
1147 return 1000 * time.Millisecond
1148 }
1149 if goos == "windows" {
1150 return 1000 * time.Millisecond
1151 }
1152
1153 return 1500 * time.Millisecond
1154}
1155
1156func (st *buildStatus) expectedBuildletStartDuration() time.Duration {
1157 // TODO: move this to dashboard/builders.go? But once we based on on historical
1158 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
1159 // better in this file.
1160 pool, _ := st.buildletPool()
1161 switch pool.(type) {
1162 case *gceBuildletPool:
1163 return time.Minute
1164 case *reverseBuildletPool:
1165 goos, arch := st.conf.GOOS(), st.conf.GOARCH()
1166 if goos == "darwin" {
1167 if arch == "arm" && arch == "arm64" {
1168 // iOS; idle or it's not.
1169 return 0
1170 }
1171 if arch == "amd64" || arch == "386" {
1172 return 0 // TODO: remove this once we're using VMware
1173 return 1 * time.Minute // VMware boot of hermetic OS X
1174 }
1175 }
1176 if goos == "linux" && arch == "arm" {
1177 // Scaleway. Ready or not.
1178 return 0
1179 }
1180 }
1181 return 0
1182}
1183
1184// getHelpersReadySoon waits a bit (as a function of the build
1185// configuration) and starts getting the buildlets for test sharding
1186// ready, such that they're ready when make.bash is done. But we don't
1187// want to start too early, lest we waste idle resources during make.bash.
1188func (st *buildStatus) getHelpersReadySoon() {
Andrew Gerrand234725b2015-06-04 16:45:17 -07001189 if st.isSubrepo() || st.conf.NumTestHelpers == 0 {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001190 return
1191 }
1192 time.AfterFunc(st.expectedMakeBashDuration()-st.expectedBuildletStartDuration(),
1193 func() {
1194 st.logEventTime("starting_helpers")
1195 st.getHelpers() // and ignore the result.
1196 })
1197}
1198
1199// getHelpers returns a channel of buildlet test helpers, with an item
1200// sent as they become available. The channel is closed at the end.
1201func (st *buildStatus) getHelpers() <-chan *buildlet.Client {
1202 st.onceInitHelpers.Do(st.onceInitHelpersFunc)
1203 return st.helpers
1204}
1205
1206func (st *buildStatus) onceInitHelpersFunc() {
1207 pool, _ := st.buildletPool() // won't return an error since we called it already
1208 st.helpers = GetBuildlets(st.donec, pool, st.conf.NumTestHelpers, st.buildletType(), st.rev, st)
1209}
1210
Andrew Gerrand234725b2015-06-04 16:45:17 -07001211// We should try to build from a snapshot if this is a subrepo build, we can
1212// expect there to be a snapshot (splitmakerun), and the snapshot exists.
1213func (st *buildStatus) useSnapshot() bool {
1214 st.mu.Lock()
1215 defer st.mu.Unlock()
1216 if st.useSnapshotMemo != nil {
1217 return *st.useSnapshotMemo
1218 }
1219 b := st.isSubrepo() && st.conf.SplitMakeRun() && st.snapshotExists()
1220 st.useSnapshotMemo = &b
1221 return b
1222}
1223
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001224func (st *buildStatus) build() error {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001225 pool, err := st.buildletPool()
Brad Fitzpatrickf3c01932015-01-15 16:29:16 -08001226 if err != nil {
1227 return err
1228 }
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001229 st.logEventTime("get_buildlet")
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001230 bc, err := pool.GetBuildlet(nil, st.buildletType(), st.rev, st)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001231 if err != nil {
1232 return fmt.Errorf("failed to get a buildlet: %v", err)
1233 }
1234 defer bc.Close()
Brad Fitzpatrick378fb292015-06-10 13:59:42 -07001235 defer nukeIfBroken(bc)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01001236 st.mu.Lock()
1237 st.bc = bc
1238 st.mu.Unlock()
1239
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001240 st.logEventTime("got_buildlet", bc.IPPort())
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001241
Andrew Gerrand234725b2015-06-04 16:45:17 -07001242 if st.useSnapshot() {
1243 st.logEventTime("start_write_snapshot_tar")
1244 if err := bc.PutTarFromURL(st.snapshotURL(), "go"); err != nil {
1245 return fmt.Errorf("failed to put snapshot to buildlet: %v", err)
1246 }
1247 st.logEventTime("end_write_snapshot_tar")
1248 } else {
1249 // Write the Go source and bootstrap tool chain in parallel.
1250 var grp syncutil.Group
1251 grp.Go(st.writeGoSource)
1252 grp.Go(st.writeBootstrapToolchain)
1253 if err := grp.Err(); err != nil {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001254 return err
1255 }
Brad Fitzpatrick20d84832015-01-21 10:03:07 -08001256 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001257
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001258 execStartTime := time.Now()
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001259 st.logEventTime("pre_exec")
Andrew Gerrand306c6b72015-06-11 13:44:48 -07001260 fmt.Fprintf(st, "%s at %v", st.name, st.rev)
1261 if st.isSubrepo() {
1262 fmt.Fprintf(st, " building %v at %v", st.subName, st.subRev)
1263 }
1264 fmt.Fprint(st, "\n\n")
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001265
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001266 var remoteErr error
1267 if st.conf.SplitMakeRun() {
Andrew Gerrand234725b2015-06-04 16:45:17 -07001268 remoteErr, err = st.runAllSharded()
Brad Fitzpatrick7d9b0362015-05-27 11:51:27 -07001269 } else {
Andrew Gerrand234725b2015-06-04 16:45:17 -07001270 remoteErr, err = st.runAllLegacy()
Andrew Gerrandfb774882015-05-21 14:02:38 +10001271 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001272 doneMsg := "all tests passed"
1273 if remoteErr != nil {
1274 doneMsg = "with test failures"
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001275 } else if err != nil {
1276 doneMsg = "comm error: " + err.Error()
1277 }
1278 if err != nil {
1279 // Return the error *before* we create the magic
1280 // "done" event. (which the try coordinator looks for)
1281 return err
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001282 }
1283 st.logEventTime("done", doneMsg) // "done" is a magic value
1284
1285 if devPause {
1286 st.logEventTime("DEV_MAIN_SLEEP")
1287 time.Sleep(5 * time.Minute)
1288 }
Andrew Gerrandfb774882015-05-21 14:02:38 +10001289
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001290 if st.trySet == nil {
1291 var buildLog string
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001292 if remoteErr != nil {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001293 buildLog = st.logs()
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001294 }
Andrew Gerrand234725b2015-06-04 16:45:17 -07001295 if err := recordResult(st.builderRev, remoteErr == nil, buildLog, time.Since(execStartTime)); err != nil {
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08001296 if remoteErr != nil {
1297 return fmt.Errorf("Remote error was %q but failed to report it to the dashboard: %v", remoteErr, err)
1298 }
1299 return fmt.Errorf("Build succeeded but failed to report it to the dashboard: %v", err)
1300 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001301 }
Brad Fitzpatrickf8c24842015-01-16 09:54:03 -08001302 if remoteErr != nil {
Andrew Gerrand234725b2015-06-04 16:45:17 -07001303 return remoteErr
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08001304 }
1305 return nil
1306}
1307
Andrew Gerrand234725b2015-06-04 16:45:17 -07001308// runAllSharded runs make.bash and then shards the test execution.
1309// remoteErr and err are as described at the top of this file.
1310func (st *buildStatus) runAllSharded() (remoteErr, err error) {
1311 st.getHelpersReadySoon()
1312
1313 remoteErr, err = st.runMake()
1314 if err != nil {
1315 return nil, err
1316 }
1317 if remoteErr != nil {
1318 return fmt.Errorf("build failed: %v", remoteErr), nil
1319 }
1320
1321 if err := st.doSnapshot(); err != nil {
1322 return nil, err
1323 }
1324
1325 if st.isSubrepo() {
1326 remoteErr, err = st.runSubrepoTests()
1327 } else {
1328 remoteErr, err = st.runTests(st.getHelpers())
1329 }
1330 if err != nil {
1331 return nil, fmt.Errorf("runTests: %v", err)
1332 }
1333 if remoteErr != nil {
1334 return fmt.Errorf("tests failed: %v", remoteErr), nil
1335 }
1336 return nil, nil
1337}
1338
1339// runMake builds the tool chain.
1340// remoteErr and err are as described at the top of this file.
1341func (st *buildStatus) runMake() (remoteErr, err error) {
1342 // Don't do this if we're using a pre-built snapshot.
1343 if st.useSnapshot() {
1344 return nil, nil
1345 }
1346
1347 // Build the source code.
1348 makeScript := st.conf.MakeScript()
1349 t0 := time.Now()
1350 remoteErr, err = st.bc.Exec(path.Join("go", makeScript), buildlet.ExecOpts{
1351 Output: st,
1352 OnStartExec: func() {
1353 st.logEventTime("running_exec", makeScript)
1354 },
1355 ExtraEnv: st.conf.Env(),
1356 Debug: true,
1357 Args: st.conf.MakeScriptArgs(),
1358 })
1359 if err != nil {
1360 return nil, err
1361 }
1362 st.logEventTime("exec_done", fmt.Sprintf("%s in %v", makeScript, time.Since(t0)))
1363 if remoteErr != nil {
1364 return fmt.Errorf("make script failed: %v", remoteErr), nil
1365 }
1366 return nil, nil
1367}
1368
1369// runAllLegacy executes all.bash (or .bat, or whatever) in the traditional way.
1370// remoteErr and err are as described at the top of this file.
1371//
1372// TODO(bradfitz,adg): delete this function when all builders
1373// can split make & run (and then delete the SplitMakeRun method)
1374func (st *buildStatus) runAllLegacy() (remoteErr, err error) {
1375 st.logEventTime("legacy_all_path")
1376 allScript := st.conf.AllScript()
1377 t0 := time.Now()
1378 remoteErr, err = st.bc.Exec(path.Join("go", allScript), buildlet.ExecOpts{
1379 Output: st,
1380 OnStartExec: func() {
1381 st.logEventTime("running_exec", allScript)
1382 },
1383 ExtraEnv: st.conf.Env(),
1384 Debug: true,
1385 Args: st.conf.AllScriptArgs(),
1386 })
1387 if err != nil {
Brad Fitzpatrick522e1052015-06-10 18:09:34 -07001388 return nil, err
Andrew Gerrand234725b2015-06-04 16:45:17 -07001389 }
1390 st.logEventTime("exec_done", fmt.Sprintf("%s in %v", allScript, time.Since(t0)))
1391 if remoteErr != nil {
1392 return fmt.Errorf("all script failed: %v", remoteErr), nil
1393 }
1394 return nil, nil
1395}
1396
1397func (st *buildStatus) doSnapshot() error {
1398 // If we're using a pre-built snapshot, don't make another.
1399 if st.useSnapshot() {
1400 return nil
1401 }
1402
1403 if err := st.cleanForSnapshot(); err != nil {
1404 return fmt.Errorf("cleanForSnapshot: %v", err)
1405 }
1406 if err := st.writeSnapshot(); err != nil {
1407 return fmt.Errorf("writeSnapshot: %v", err)
1408 }
1409 return nil
1410}
1411
1412func (br *builderRev) snapshotExists() bool {
1413 resp, err := http.Head(br.snapshotURL())
1414 return err == nil && resp.StatusCode == http.StatusOK
1415}
1416
1417func (st *buildStatus) writeGoSource() error {
1418 // Write the VERSION file.
1419 st.logEventTime("start_write_version_tar")
1420 if err := st.bc.PutTar(versionTgz(st.rev), "go"); err != nil {
1421 return fmt.Errorf("writing VERSION tgz: %v", err)
1422 }
1423
1424 st.logEventTime("fetch_go_tar")
1425 tarReader, err := getSourceTgz(st, "go", st.rev)
1426 if err != nil {
1427 return err
1428 }
1429 st.logEventTime("start_write_go_tar")
1430 if err := st.bc.PutTar(tarReader, "go"); err != nil {
1431 return fmt.Errorf("writing tarball from Gerrit: %v", err)
1432 }
1433 st.logEventTime("end_write_go_tar")
1434 return nil
1435}
1436
1437func (st *buildStatus) writeBootstrapToolchain() error {
1438 if st.conf.Go14URL == "" {
1439 return nil
1440 }
1441 st.logEventTime("start_write_go14_tar")
1442 if err := st.bc.PutTarFromURL(st.conf.Go14URL, "go1.4"); err != nil {
1443 return err
1444 }
1445 st.logEventTime("end_write_go14_tar")
1446 return nil
1447}
1448
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001449var cleanForSnapshotFiles = []string{
1450 "go/doc/gopher",
1451 "go/pkg/bootstrap",
1452}
1453
1454func (st *buildStatus) cleanForSnapshot() error {
1455 st.logEventTime("clean_for_snapshot")
1456 defer st.logEventTime("clean_for_snapshot_done")
1457
1458 return st.bc.RemoveAll(cleanForSnapshotFiles...)
1459}
1460
Andrew Gerrand234725b2015-06-04 16:45:17 -07001461// snapshotObjectName is the cloud storage object name of the
1462// built Go tree for this builder and Go rev (not the sub-repo).
1463// The entries inside this tarball do not begin with "go/".
1464func (br *builderRev) snapshotObjectName() string {
1465 return fmt.Sprintf("%v/%v/%v.tar.gz", "go", br.name, br.rev)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001466}
1467
Andrew Gerrand234725b2015-06-04 16:45:17 -07001468// snapshotURL is the absolute URL of the snapshot object (see above).
1469func (br *builderRev) snapshotURL() string {
1470 return fmt.Sprintf("https://storage.googleapis.com/%s/%s", snapBucket(), br.snapshotObjectName())
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001471}
1472
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001473func (st *buildStatus) writeSnapshot() error {
1474 st.logEventTime("write_snapshot")
1475 defer st.logEventTime("write_snapshot_done")
1476
1477 tgz, err := st.bc.GetTar("go")
1478 if err != nil {
1479 return err
1480 }
1481 defer tgz.Close()
1482
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001483 wr := storage.NewWriter(serviceCtx, snapBucket(), st.snapshotObjectName())
Andrew Gerrandc53a5772015-05-27 14:29:36 -07001484 wr.ContentType = "application/octet-stream"
1485 wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
1486 if _, err := io.Copy(wr, tgz); err != nil {
1487 wr.Close()
1488 return err
1489 }
1490
1491 return wr.Close()
1492}
1493
Brad Fitzpatrick44de54d2015-05-27 20:06:32 -07001494func (st *buildStatus) distTestList() (names []string, err error) {
1495 var buf bytes.Buffer
1496 remoteErr, err := st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
1497 Output: &buf,
1498 ExtraEnv: st.conf.Env(),
1499 OnStartExec: func() { st.logEventTime("discovering_tests") },
1500 Path: []string{"$WORKDIR/go/bin", "$PATH"},
1501 Args: []string{"tool", "dist", "test", "--no-rebuild", "--list"},
1502 })
1503 if err != nil {
1504 return nil, fmt.Errorf("Exec error: %v, %s", remoteErr, buf.Bytes())
1505 }
1506 if remoteErr != nil {
1507 return nil, fmt.Errorf("Remote error: %v, %s", remoteErr, buf.Bytes())
1508 }
1509 return strings.Fields(buf.String()), nil
1510}
1511
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001512func (st *buildStatus) newTestSet(names []string) *testSet {
1513 set := &testSet{
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001514 st: st,
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001515 }
1516 for _, name := range names {
1517 set.items = append(set.items, &testItem{
1518 set: set,
1519 name: name,
1520 duration: testDuration(name),
1521 take: make(chan token, 1),
1522 done: make(chan token),
1523 })
1524 }
1525 return set
1526}
1527
1528func partitionGoTests(tests []string) (sets [][]string) {
1529 var srcTests []string
1530 var cmdTests []string
1531 for _, name := range tests {
1532 if strings.HasPrefix(name, "go_test:cmd/") {
1533 cmdTests = append(cmdTests, name)
1534 } else if strings.HasPrefix(name, "go_test:") {
1535 srcTests = append(srcTests, name)
1536 }
1537 }
1538 sort.Strings(srcTests)
1539 sort.Strings(cmdTests)
1540 goTests := append(srcTests, cmdTests...)
1541
1542 const sizeThres = 10 * time.Second
1543
1544 var curSet []string
1545 var curDur time.Duration
1546
1547 flush := func() {
1548 if len(curSet) > 0 {
1549 sets = append(sets, curSet)
1550 curSet = nil
1551 curDur = 0
1552 }
1553 }
1554 for _, name := range goTests {
1555 d := testDuration(name) - minGoTestSpeed // subtract 'go' tool overhead
1556 if curDur+d > sizeThres {
1557 flush() // no-op if empty
1558 }
1559 curSet = append(curSet, name)
1560 curDur += d
1561 }
1562
1563 flush()
1564 return
1565}
1566
1567var minGoTestSpeed = (func() time.Duration {
1568 var min Seconds
1569 for name, secs := range fixedTestDuration {
1570 if !strings.HasPrefix(name, "go_test:") {
1571 continue
1572 }
1573 if min == 0 || secs < min {
1574 min = secs
1575 }
1576 }
1577 return min.Duration()
1578})()
1579
1580type Seconds float64
1581
1582func (s Seconds) Duration() time.Duration {
1583 return time.Duration(float64(s) * float64(time.Second))
1584}
1585
1586// in seconds on Linux/amd64 (once on 2015-05-28), each
1587// by themselves. There seems to be a 0.6s+ overhead
1588// from the go tool which goes away if they're combined.
1589var fixedTestDuration = map[string]Seconds{
1590 "go_test:archive/tar": 1.30,
1591 "go_test:archive/zip": 1.68,
1592 "go_test:bufio": 1.61,
1593 "go_test:bytes": 1.50,
1594 "go_test:compress/bzip2": 0.82,
1595 "go_test:compress/flate": 1.73,
1596 "go_test:compress/gzip": 0.82,
1597 "go_test:compress/lzw": 0.86,
1598 "go_test:compress/zlib": 1.78,
1599 "go_test:container/heap": 0.69,
1600 "go_test:container/list": 0.72,
1601 "go_test:container/ring": 0.64,
1602 "go_test:crypto/aes": 0.79,
1603 "go_test:crypto/cipher": 0.96,
1604 "go_test:crypto/des": 0.96,
1605 "go_test:crypto/dsa": 0.75,
1606 "go_test:crypto/ecdsa": 0.86,
1607 "go_test:crypto/elliptic": 1.06,
1608 "go_test:crypto/hmac": 0.67,
1609 "go_test:crypto/md5": 0.77,
1610 "go_test:crypto/rand": 0.89,
1611 "go_test:crypto/rc4": 0.71,
1612 "go_test:crypto/rsa": 1.17,
1613 "go_test:crypto/sha1": 0.75,
1614 "go_test:crypto/sha256": 0.68,
1615 "go_test:crypto/sha512": 0.67,
1616 "go_test:crypto/subtle": 0.56,
1617 "go_test:crypto/tls": 3.29,
1618 "go_test:crypto/x509": 2.81,
1619 "go_test:database/sql": 1.75,
1620 "go_test:database/sql/driver": 0.64,
1621 "go_test:debug/dwarf": 0.77,
1622 "go_test:debug/elf": 1.41,
1623 "go_test:debug/gosym": 1.45,
1624 "go_test:debug/macho": 0.97,
1625 "go_test:debug/pe": 0.79,
1626 "go_test:debug/plan9obj": 0.73,
1627 "go_test:encoding/ascii85": 0.64,
1628 "go_test:encoding/asn1": 1.16,
1629 "go_test:encoding/base32": 0.79,
1630 "go_test:encoding/base64": 0.82,
1631 "go_test:encoding/binary": 0.96,
1632 "go_test:encoding/csv": 0.67,
1633 "go_test:encoding/gob": 2.70,
1634 "go_test:encoding/hex": 0.66,
1635 "go_test:encoding/json": 2.20,
1636 "test:errors": 0.54,
1637 "go_test:expvar": 1.36,
1638 "go_test:flag": 0.92,
1639 "go_test:fmt": 2.02,
1640 "go_test:go/ast": 1.44,
1641 "go_test:go/build": 1.42,
1642 "go_test:go/constant": 0.92,
1643 "go_test:go/doc": 1.51,
1644 "go_test:go/format": 0.73,
1645 "go_test:go/internal/gcimporter": 1.30,
1646 "go_test:go/parser": 1.30,
1647 "go_test:go/printer": 1.61,
1648 "go_test:go/scanner": 0.89,
1649 "go_test:go/token": 0.92,
1650 "go_test:go/types": 5.24,
1651 "go_test:hash/adler32": 0.62,
1652 "go_test:hash/crc32": 0.68,
1653 "go_test:hash/crc64": 0.55,
1654 "go_test:hash/fnv": 0.66,
1655 "go_test:html": 0.74,
1656 "go_test:html/template": 1.93,
1657 "go_test:image": 1.42,
1658 "go_test:image/color": 0.77,
1659 "go_test:image/draw": 1.32,
1660 "go_test:image/gif": 1.15,
1661 "go_test:image/jpeg": 1.32,
1662 "go_test:image/png": 1.23,
1663 "go_test:index/suffixarray": 0.79,
1664 "go_test:internal/singleflight": 0.66,
1665 "go_test:io": 0.97,
1666 "go_test:io/ioutil": 0.73,
1667 "go_test:log": 0.72,
1668 "go_test:log/syslog": 2.93,
1669 "go_test:math": 1.59,
1670 "go_test:math/big": 3.75,
1671 "go_test:math/cmplx": 0.81,
1672 "go_test:math/rand": 1.15,
1673 "go_test:mime": 1.01,
1674 "go_test:mime/multipart": 1.51,
1675 "go_test:mime/quotedprintable": 0.95,
1676 "go_test:net": 6.71,
1677 "go_test:net/http": 9.41,
1678 "go_test:net/http/cgi": 2.00,
1679 "go_test:net/http/cookiejar": 1.51,
1680 "go_test:net/http/fcgi": 1.43,
1681 "go_test:net/http/httptest": 1.36,
1682 "go_test:net/http/httputil": 1.54,
1683 "go_test:net/http/internal": 0.68,
1684 "go_test:net/internal/socktest": 0.58,
1685 "go_test:net/mail": 0.92,
1686 "go_test:net/rpc": 1.95,
1687 "go_test:net/rpc/jsonrpc": 1.50,
1688 "go_test:net/smtp": 1.43,
1689 "go_test:net/textproto": 1.01,
1690 "go_test:net/url": 1.45,
1691 "go_test:os": 1.88,
1692 "go_test:os/exec": 2.13,
1693 "go_test:os/signal": 4.22,
1694 "go_test:os/user": 0.93,
1695 "go_test:path": 0.68,
1696 "go_test:path/filepath": 1.14,
1697 "go_test:reflect": 3.42,
1698 "go_test:regexp": 1.65,
1699 "go_test:regexp/syntax": 1.40,
1700 "go_test:runtime": 21.02,
1701 "go_test:runtime/debug": 0.79,
1702 "go_test:runtime/pprof": 8.01,
1703 "go_test:sort": 0.96,
1704 "go_test:strconv": 1.60,
1705 "go_test:strings": 1.51,
1706 "go_test:sync": 1.05,
1707 "go_test:sync/atomic": 1.13,
1708 "go_test:syscall": 1.69,
1709 "go_test:testing": 3.70,
1710 "go_test:testing/quick": 0.74,
1711 "go_test:text/scanner": 0.79,
1712 "go_test:text/tabwriter": 0.71,
1713 "go_test:text/template": 1.65,
1714 "go_test:text/template/parse": 1.25,
1715 "go_test:time": 4.20,
1716 "go_test:unicode": 0.68,
1717 "go_test:unicode/utf16": 0.77,
1718 "go_test:unicode/utf8": 0.71,
1719 "go_test:cmd/addr2line": 1.73,
1720 "go_test:cmd/api": 1.33,
1721 "go_test:cmd/asm/internal/asm": 1.24,
1722 "go_test:cmd/asm/internal/lex": 0.91,
1723 "go_test:cmd/compile/internal/big": 5.26,
1724 "go_test:cmd/cover": 3.32,
1725 "go_test:cmd/fix": 1.26,
Brad Fitzpatrick378fb292015-06-10 13:59:42 -07001726 "go_test:cmd/go": 36,
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001727 "go_test:cmd/gofmt": 1.06,
1728 "go_test:cmd/internal/goobj": 0.65,
1729 "go_test:cmd/internal/obj": 1.16,
1730 "go_test:cmd/internal/obj/x86": 1.04,
1731 "go_test:cmd/internal/rsc.io/arm/armasm": 1.92,
1732 "go_test:cmd/internal/rsc.io/x86/x86asm": 2.22,
1733 "go_test:cmd/newlink": 1.48,
1734 "go_test:cmd/nm": 1.84,
1735 "go_test:cmd/objdump": 3.60,
1736 "go_test:cmd/pack": 2.64,
1737 "go_test:cmd/pprof/internal/profile": 1.29,
1738 "runtime:cpu124": 44.78,
1739 "sync_cpu": 1.01,
1740 "cgo_stdio": 1.53,
1741 "cgo_life": 1.56,
1742 "cgo_test": 45.60,
1743 "race": 42.55,
1744 "testgodefs": 2.37,
1745 "testso": 2.72,
1746 "testcarchive": 11.11,
1747 "testcshared": 15.80,
1748 "testshared": 7.13,
1749 "testasan": 2.56,
1750 "cgo_errors": 7.03,
1751 "testsigfwd": 2.74,
1752 "doc_progs": 5.38,
1753 "wiki": 3.56,
1754 "shootout": 11.34,
1755 "bench_go1": 3.72,
1756 "test": 45, // old, but valid for a couple weeks from 2015-06-04
1757 "test:0_5": 10,
1758 "test:1_5": 10,
1759 "test:2_5": 10,
1760 "test:3_5": 10,
1761 "test:4_5": 10,
1762 "codewalk": 2.42,
1763 "api": 7.38,
1764}
1765
1766// testDuration predicts how long the dist test 'name' will take.
1767// It's only a scheduling guess.
1768func testDuration(name string) time.Duration {
1769 if secs, ok := fixedTestDuration[name]; ok {
1770 return secs.Duration()
1771 }
1772 return minGoTestSpeed * 2
1773}
1774
Andrew Gerrand234725b2015-06-04 16:45:17 -07001775func (st *buildStatus) runSubrepoTests() (remoteErr, err error) {
1776 st.logEventTime("fetching_subrepo", st.subName)
1777
1778 workDir, err := st.bc.WorkDir()
1779 if err != nil {
1780 log.Printf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err)
1781 return
1782 }
1783 goroot := st.conf.FilePathJoin(workDir, "go")
1784 gopath := st.conf.FilePathJoin(workDir, "gopath")
1785
1786 fetched := map[string]bool{}
1787 toFetch := []string{st.subName}
1788
1789 // fetch checks out the provided sub-repo to the buildlet's workspace.
1790 fetch := func(repo, rev string) error {
1791 fetched[repo] = true
1792 tgz, err := getSourceTgz(st, repo, rev)
1793 if err != nil {
1794 return err
1795 }
1796 return st.bc.PutTar(tgz, "gopath/src/"+subrepoPrefix+repo)
1797 }
1798
1799 // findDeps uses 'go list' on the checked out repo to find its
1800 // dependencies, and adds any not-yet-fetched deps to toFetched.
1801 findDeps := func(repo string) error {
1802 repoPath := subrepoPrefix + repo
1803 var buf bytes.Buffer
1804 rErr, err := st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
1805 Output: &buf,
1806 ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot, "GOPATH="+gopath),
1807 Path: []string{"$WORKDIR/go/bin", "$PATH"},
1808 Args: []string{"list", "-f", `{{range .Deps}}{{printf "%v\n" .}}{{end}}`, repoPath + "/..."},
1809 })
1810 if err != nil {
1811 return fmt.Errorf("exec go list on buildlet: %v", err)
1812 }
1813 if rErr != nil {
1814 return fmt.Errorf("go list error on buildlet: %v\n%s", rErr, buf.Bytes())
1815 }
1816 for _, p := range strings.Fields(buf.String()) {
1817 if !strings.HasPrefix(p, subrepoPrefix) || strings.HasPrefix(p, repoPath) {
1818 continue
1819 }
1820 repo = strings.TrimPrefix(p, subrepoPrefix)
1821 if i := strings.Index(repo, "/"); i >= 0 {
1822 repo = repo[:i]
1823 }
1824 if !fetched[repo] {
1825 toFetch = append(toFetch, repo)
1826 }
1827 }
1828 return nil
1829 }
1830
1831 // Recursively fetch the repo and its dependencies.
1832 // Dependencies are always fetched at master, which isn't
1833 // great but the dashboard data model doesn't track
1834 // sub-repo dependencies. TODO(adg): fix this somehow??
1835 for i := 0; i < len(toFetch); i++ {
1836 repo := toFetch[i]
1837 if fetched[repo] {
1838 continue
1839 }
1840 // Fetch the HEAD revision by default.
1841 subrepoHead.Lock()
1842 rev := subrepoHead.m[repo]
1843 subrepoHead.Unlock()
1844 if rev == "" {
1845 rev = "master" // should happen rarely; ok if it does.
1846 }
1847 // For the repo under test, choose that specific revision.
1848 if i == 0 {
1849 rev = st.subRev
1850 }
1851 if err := fetch(repo, rev); err != nil {
1852 return nil, err
1853 }
1854 if err := findDeps(repo); err != nil {
1855 return nil, err
1856 }
1857 }
1858
1859 st.logEventTime("starting_tests", st.subName)
1860 defer st.logEventTime("tests_complete")
1861 return st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
1862 Output: st,
1863 ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot, "GOPATH="+gopath),
1864 Path: []string{"$WORKDIR/go/bin", "$PATH"},
1865 Args: []string{"test", "-short", subrepoPrefix + st.subName + "/..."},
1866 })
1867}
1868
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001869// runTests is only called for builders which support a split make/run
1870// (should be everything, at least soon). Currently (2015-05-27) iOS
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001871// and Android and Nacl do not.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001872func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err error) {
1873 testNames, err := st.distTestList()
1874 if err != nil {
1875 return nil, fmt.Errorf("distTestList: %v", err)
1876 }
1877 set := st.newTestSet(testNames)
1878 st.logEventTime("starting_tests", fmt.Sprintf("%d tests", len(set.items)))
1879 startTime := time.Now()
1880
1881 // We use our original buildlet to run the tests in order, to
1882 // make the streaming somewhat smooth and not incredibly
1883 // lumpy. The rest of the buildlets run the largest tests
1884 // first (critical path scheduling).
1885 go func() {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001886 for {
1887 tis, ok := set.testsToRunInOrder()
1888 if !ok {
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001889 select {
1890 case <-st.donec:
1891 return
1892 case <-time.After(5 * time.Second):
1893 }
1894 continue
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001895 }
1896 goroot := "" // no need to override; main buildlet's GOROOT is baked into binaries
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001897 st.runTestsOnBuildlet(st.bc, tis, goroot)
1898 }
1899 }()
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001900 go func() {
1901 for helper := range helpers {
1902 go func(bc *buildlet.Client) {
1903 defer st.logEventTime("closed_helper", bc.IPPort())
1904 defer bc.Close()
Brad Fitzpatrick378fb292015-06-10 13:59:42 -07001905 defer nukeIfBroken(bc)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001906 if devPause {
1907 defer time.Sleep(5 * time.Minute)
1908 defer st.logEventTime("DEV_HELPER_SLEEP", bc.IPPort())
1909 }
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001910 st.logEventTime("got_helper", bc.String())
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001911 if err := bc.PutTarFromURL(st.snapshotURL(), "go"); err != nil {
1912 log.Printf("failed to extract snapshot for helper %s: %v", bc.IPPort(), err)
1913 return
1914 }
1915 workDir, err := bc.WorkDir()
1916 if err != nil {
1917 log.Printf("error discovering workdir for helper %s: %v", bc.IPPort(), err)
1918 return
1919 }
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001920 st.logEventTime("setup_helper", bc.String())
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001921 goroot := st.conf.FilePathJoin(workDir, "go")
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001922 for !bc.IsBroken() {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001923 tis, ok := set.testsToRunBiggestFirst()
1924 if !ok {
1925 st.logEventTime("biggest_tests_complete", bc.IPPort())
1926 return
1927 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001928 st.runTestsOnBuildlet(bc, tis, goroot)
1929 }
1930 }(helper)
1931 }
1932 }()
1933
1934 var lastBanner string
1935 var serialDuration time.Duration
1936 for _, ti := range set.items {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07001937 AwaitDone:
1938 for {
1939 select {
1940 case <-ti.done: // wait for success
1941 break AwaitDone
1942 case <-time.After(30 * time.Second):
1943 st.logEventTime("still_waiting_on_test", ti.name)
1944 }
1945 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001946
1947 serialDuration += ti.execDuration
1948 if len(ti.output) > 0 {
1949 banner, out := parseOutputAndBanner(ti.output)
1950 if banner != lastBanner {
1951 lastBanner = banner
1952 fmt.Fprintf(st, "\n##### %s\n", banner)
1953 }
1954 if devCluster {
1955 out = bytes.TrimSuffix(out, nl)
1956 st.Write(out)
1957 fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize)
1958 } else {
1959 st.Write(out)
1960 }
1961 }
1962
1963 if ti.remoteErr != nil {
1964 set.cancelAll()
1965 return fmt.Errorf("dist test failed: %s: %v", ti.name, ti.remoteErr), nil
1966 }
1967 }
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07001968 elapsed := time.Since(startTime)
1969 var msg string
1970 if st.conf.NumTestHelpers > 0 {
1971 msg = fmt.Sprintf("took %v; aggregate %v; saved %v", elapsed, serialDuration, serialDuration-elapsed)
1972 } else {
1973 msg = fmt.Sprintf("took %v", elapsed)
1974 }
1975 st.logEventTime("tests_complete", msg)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07001976 fmt.Fprintf(st, "\nAll tests passed.\n")
1977 return nil, nil
1978}
1979
1980const (
1981 banner = "XXXBANNERXXX:" // flag passed to dist
1982 bannerPrefix = "\n" + banner // with the newline added by dist
1983)
1984
1985var bannerPrefixBytes = []byte(bannerPrefix)
1986
1987func parseOutputAndBanner(b []byte) (banner string, out []byte) {
1988 if bytes.HasPrefix(b, bannerPrefixBytes) {
1989 b = b[len(bannerPrefixBytes):]
1990 nl := bytes.IndexByte(b, '\n')
1991 if nl != -1 {
1992 banner = string(b[:nl])
1993 b = b[nl+1:]
1994 }
1995 }
1996 return banner, b
1997}
1998
1999// maxTestExecError is the number of test execution failures at which
2000// we give up and stop trying and instead permanently fail the test.
2001// Note that this is not related to whether the test failed remotely,
2002// but whether we were unable to start or complete watching it run.
2003// (A communication error)
2004const maxTestExecErrors = 3
2005
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002006func execTimeout(testNames []string) time.Duration {
2007 // TODO(bradfitz): something smarter probably.
2008 return 10 * time.Minute
2009}
2010
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002011// runTestsOnBuildlet runs tis on bc, using the optional goroot environment variable.
2012func (st *buildStatus) runTestsOnBuildlet(bc *buildlet.Client, tis []*testItem, goroot string) {
2013 names := make([]string, len(tis))
2014 for i, ti := range tis {
2015 names[i] = ti.name
2016 if i > 0 && !strings.HasPrefix(ti.name, "go_test:") {
2017 panic("only go_test:* tests may be merged")
2018 }
2019 }
2020 which := fmt.Sprintf("%s: %v", bc.IPPort(), names)
2021 st.logEventTime("start_tests", which)
2022
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002023 var buf bytes.Buffer
2024 t0 := time.Now()
2025 remoteErr, err := bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
2026 // We set Dir to "." instead of the default ("go/bin") so when the dist tests
2027 // try to run os/exec.Command("go", "test", ...), the LookPath of "go" doesn't
2028 // return "./go.exe" (which exists in the current directory: "go/bin") and then
2029 // fail when dist tries to run the binary in dir "$GOROOT/src", since
2030 // "$GOROOT/src" + "./go.exe" doesn't exist. Perhaps LookPath should return
2031 // an absolute path.
2032 Dir: ".",
2033 Output: &buf, // see "maybe stream lines" TODO below
2034 ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot),
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002035 Timeout: execTimeout(names),
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002036 Path: []string{"$WORKDIR/go/bin", "$PATH"},
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002037 Args: append([]string{
2038 "tool", "dist", "test", "--no-rebuild", "--banner=" + banner,
2039 }, names...),
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002040 })
2041 summary := "ok"
2042 if err != nil {
2043 summary = "commErr=" + err.Error()
2044 } else if remoteErr != nil {
2045 summary = "test failed remotely"
2046 }
2047 execDuration := time.Since(t0)
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002048 st.logEventTime("end_tests", fmt.Sprintf("%s; %s (test exec = %v)", which, summary, execDuration))
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002049 if err != nil {
2050 for _, ti := range tis {
2051 ti.numFail++
2052 st.logf("Execution error running %s on %s: %v (numFails = %d)", ti.name, bc, err, ti.numFail)
2053 if ti.numFail >= maxTestExecErrors {
2054 msg := fmt.Sprintf("Failed to schedule %q test after %d tries.\n", ti.name, maxTestExecErrors)
2055 ti.output = []byte(msg)
2056 ti.remoteErr = errors.New(msg)
2057 close(ti.done)
2058 } else {
2059 ti.retry()
2060 }
2061 }
2062 return
2063 }
2064
2065 out := buf.Bytes()
2066 out = bytes.Replace(out, []byte("\nALL TESTS PASSED (some were excluded)\n"), nil, 1)
2067 out = bytes.Replace(out, []byte("\nALL TESTS PASSED\n"), nil, 1)
2068
2069 for _, ti := range tis {
2070 ti.output = out
2071 ti.remoteErr = remoteErr
2072 ti.execDuration = execDuration
2073 ti.groupSize = len(tis)
2074 ti.shardIPPort = bc.IPPort()
2075 close(ti.done)
2076
2077 // After the first one, make the rest succeed with no output.
2078 // TODO: maybe stream lines (set Output to a line-reading
2079 // Writer instead of &buf). for now we just wait for them in
2080 // ~10 second batches. Doesn't look as smooth on the output,
2081 // though.
2082 out = nil
2083 remoteErr = nil
2084 execDuration = 0
2085 }
2086}
2087
2088type testSet struct {
2089 st *buildStatus
2090 items []*testItem
2091
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002092 mu sync.Mutex
2093 inOrder [][]*testItem
2094 biggestFirst [][]*testItem
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002095}
2096
2097// cancelAll cancels all pending tests.
2098func (s *testSet) cancelAll() {
2099 for _, ti := range s.items {
2100 ti.tryTake() // ignore return value
2101 }
2102}
2103
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002104func (s *testSet) testsToRunInOrder() (chunk []*testItem, ok bool) {
2105 s.mu.Lock()
2106 defer s.mu.Unlock()
2107 if s.inOrder == nil {
2108 s.initInOrder()
2109 }
2110 return s.testsFromSlice(s.inOrder)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002111}
2112
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002113func (s *testSet) testsToRunBiggestFirst() (chunk []*testItem, ok bool) {
2114 s.mu.Lock()
2115 defer s.mu.Unlock()
2116 if s.biggestFirst == nil {
2117 s.initBiggestFirst()
2118 }
2119 return s.testsFromSlice(s.biggestFirst)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002120}
2121
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002122func (s *testSet) testsFromSlice(chunkList [][]*testItem) (chunk []*testItem, ok bool) {
2123 for _, candChunk := range chunkList {
2124 for _, ti := range candChunk {
2125 if ti.tryTake() {
2126 chunk = append(chunk, ti)
2127 }
2128 }
2129 if len(chunk) > 0 {
2130 return chunk, true
2131 }
2132 }
2133 return nil, false
2134}
2135
2136func (s *testSet) initInOrder() {
2137 names := make([]string, len(s.items))
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002138 namedItem := map[string]*testItem{}
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002139 for i, ti := range s.items {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002140 names[i] = ti.name
2141 namedItem[ti.name] = ti
2142 }
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002143
2144 // First do the go_test:* ones. partitionGoTests
2145 // only returns those, which are the ones we merge together.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002146 stdSets := partitionGoTests(names)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002147 for _, set := range stdSets {
2148 tis := make([]*testItem, len(set))
2149 for i, name := range set {
2150 tis[i] = namedItem[name]
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002151 }
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002152 s.inOrder = append(s.inOrder, tis)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002153 }
2154
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002155 // Then do the misc tests, which are always by themselves.
2156 // (No benefit to merging them)
2157 for _, ti := range s.items {
2158 if !strings.HasPrefix(ti.name, "go_test:") {
2159 s.inOrder = append(s.inOrder, []*testItem{ti})
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002160 }
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002161 }
2162}
2163
2164func (s *testSet) initBiggestFirst() {
2165 items := append([]*testItem(nil), s.items...)
2166 sort.Sort(sort.Reverse(byTestDuration(items)))
2167 for _, item := range items {
2168 s.biggestFirst = append(s.biggestFirst, []*testItem{item})
2169 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002170}
2171
2172type testItem struct {
2173 set *testSet
2174 name string // "go_test:sort"
2175 duration time.Duration // optional approximate size
2176
2177 take chan token // buffered size 1: sending takes ownership of rest of fields:
2178
2179 done chan token // closed when done; guards output & failed
2180 numFail int // how many times it's failed to execute
2181
2182 // groupSize is the number of tests which were run together
2183 // along with this one with "go dist test".
2184 // This is 1 for non-std/cmd tests, and usually >1 for std/cmd tests.
2185 groupSize int
2186 shardIPPort string // buildlet's IPPort, for debugging
2187
2188 // the following are only set for the first item in a group:
2189 output []byte
2190 remoteErr error // real test failure (not a communications failure)
2191 execDuration time.Duration // actual time
2192}
2193
2194func (ti *testItem) tryTake() bool {
2195 select {
2196 case ti.take <- token{}:
2197 return true
2198 default:
2199 return false
2200 }
2201}
2202
2203func (ti *testItem) isDone() bool {
2204 select {
2205 case <-ti.done:
2206 return true
2207 default:
2208 return false
2209 }
2210}
2211
2212// retry reschedules the test to run again, if a machine died before
2213// or during execution, so its results aren't yet known.
2214// The caller must own the 'take' semaphore.
2215func (ti *testItem) retry() {
2216 // release it to make it available for somebody else to try later:
2217 <-ti.take
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002218}
2219
2220type byTestDuration []*testItem
2221
2222func (s byTestDuration) Len() int { return len(s) }
2223func (s byTestDuration) Less(i, j int) bool { return s[i].duration < s[j].duration }
2224func (s byTestDuration) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
2225
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002226type eventAndTime struct {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002227 t time.Time
2228 evt string
2229 text string
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07002230}
2231
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002232// buildStatus is the status of a build.
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07002233type buildStatus struct {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002234 // Immutable:
Brad Fitzpatricke428e4b2014-09-03 10:26:52 -07002235 builderRev
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002236 conf dashboard.BuildConfig
2237 startTime time.Time // actually time of newBuild (~same thing)
2238 trySet *trySet // or nil
2239 donec chan struct{} // closed when done
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002240
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002241 onceInitHelpers sync.Once // guards call of onceInitHelpersFunc, to init::
2242 helpers <-chan *buildlet.Client
2243
Andrew Gerrand234725b2015-06-04 16:45:17 -07002244 mu sync.Mutex // guards following
2245 failURL string // if non-empty, permanent URL of failure
2246 bc *buildlet.Client // nil initially, until pool returns one
2247 done time.Time // finished running
2248 succeeded bool // set when done
2249 output bytes.Buffer // stdout and stderr
Brad Fitzpatricke8a3d432015-06-10 18:10:31 -07002250 startedPinging bool // started pinging the go dashboard
Andrew Gerrand234725b2015-06-04 16:45:17 -07002251 events []eventAndTime
2252 watcher []*logWatcher
2253 useSnapshotMemo *bool
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002254}
2255
2256func (st *buildStatus) setDone(succeeded bool) {
2257 st.mu.Lock()
2258 defer st.mu.Unlock()
2259 st.succeeded = succeeded
2260 st.done = time.Now()
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002261 st.notifyWatchersLocked(true)
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002262 close(st.donec)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002263}
2264
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002265func (st *buildStatus) isRunning() bool {
2266 st.mu.Lock()
2267 defer st.mu.Unlock()
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002268 return st.isRunningLocked()
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002269}
2270
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002271func (st *buildStatus) isRunningLocked() bool { return st.done.IsZero() }
2272
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002273func (st *buildStatus) logf(format string, args ...interface{}) {
2274 log.Printf("[build %s %s]: %s", st.name, st.rev, fmt.Sprintf(format, args...))
2275}
2276
2277func (st *buildStatus) logEventTime(event string, optText ...string) {
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002278 st.mu.Lock()
2279 defer st.mu.Unlock()
Brad Fitzpatricke8a3d432015-06-10 18:10:31 -07002280 switch event {
2281 case "creating_gce_instance", "got_machine", "got_buildlet":
2282 if !st.startedPinging {
2283 st.startedPinging = true
2284 go st.pingDashboard()
2285 }
2286 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002287 var text string
2288 if len(optText) > 0 {
2289 if len(optText) > 1 {
2290 panic("usage")
2291 }
2292 text = optText[0]
2293 }
2294 st.events = append(st.events, eventAndTime{
2295 t: time.Now(),
2296 evt: event,
2297 text: text,
2298 })
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002299}
2300
Brad Fitzpatrickf72e62c2015-01-04 21:46:23 -08002301func (st *buildStatus) hasEvent(event string) bool {
2302 st.mu.Lock()
2303 defer st.mu.Unlock()
2304 for _, e := range st.events {
2305 if e.evt == event {
2306 return true
2307 }
2308 }
2309 return false
2310}
2311
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002312// HTMLStatusLine returns the HTML to show within the <pre> block on
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002313// the main page's list of active builds.
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002314func (st *buildStatus) HTMLStatusLine() template.HTML {
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002315 st.mu.Lock()
2316 defer st.mu.Unlock()
2317
2318 urlPrefix := "https://go-review.googlesource.com/#/q/"
2319 if strings.Contains(st.name, "gccgo") {
2320 urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
2321 }
2322
2323 var buf bytes.Buffer
2324 fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002325 st.name, urlPrefix, st.rev, st.rev[:8])
Andrew Gerrand234725b2015-06-04 16:45:17 -07002326 if st.isSubrepo() {
2327 fmt.Fprintf(&buf, " (sub-repo %s rev <a href='%s%s'>%s</a>)",
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002328 st.subName, urlPrefix, st.subRev, st.subRev[:8])
Andrew Gerrand234725b2015-06-04 16:45:17 -07002329 }
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002330 if ts := st.trySet; ts != nil {
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002331 fmt.Fprintf(&buf, " (<a href='/try?commit=%v'>trybot set</a> for <a href='https://go-review.googlesource.com/#/q/%s'>%s</a>)",
2332 ts.Commit[:8],
Brad Fitzpatrick421541f2015-02-11 22:57:39 -08002333 ts.ChangeID, ts.ChangeID[:8])
2334 }
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002335
2336 if st.done.IsZero() {
2337 buf.WriteString(", running")
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002338 fmt.Fprintf(&buf, "; <a href='%s'>build log</a>; %s", st.logsURLLocked(), html.EscapeString(st.bc.String()))
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002339 } else if st.succeeded {
2340 buf.WriteString(", succeeded")
2341 } else {
2342 buf.WriteString(", failed")
Brad Fitzpatrickd4ea0142015-06-12 10:31:58 -07002343 fmt.Fprintf(&buf, "; <a href='%s'>build log</a>; %s", st.logsURLLocked(), html.EscapeString(st.bc.String()))
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002344 }
2345
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002346 t := st.done
2347 if t.IsZero() {
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002348 t = st.startTime
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002349 }
2350 fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -08002351 st.writeEventsLocked(&buf, true)
David Crawshawdd0cf9f2015-04-29 17:58:09 -04002352 return template.HTML(buf.String())
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -08002353}
2354
Brad Fitzpatrick777a5bf2015-06-09 12:17:36 -07002355func (st *buildStatus) logsURLLocked() string {
Andrew Gerrand234725b2015-06-04 16:45:17 -07002356 host := "farmer.golang.org"
2357 if devCluster {
2358 host = externalIP
Brad Fitzpatrick777a5bf2015-06-09 12:17:36 -07002359 }
Andrew Gerrand234725b2015-06-04 16:45:17 -07002360 u := fmt.Sprintf("http://%v/temporarylogs?name=%s&rev=%s&st=%p", host, st.name, st.rev, st)
2361 if st.isSubrepo() {
2362 u += fmt.Sprintf("&subName=%v&subRev=%v", st.subName, st.subRev)
2363 }
2364 return u
Brad Fitzpatrick36d41e92015-01-14 12:31:04 -08002365}
2366
2367// st.mu must be held.
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002368func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool) {
2369 var lastT time.Time
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002370 for i, evt := range st.events {
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002371 lastT = evt.t
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002372 var elapsed string
2373 if i != 0 {
2374 elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds())
2375 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002376 e := evt.evt
2377 text := evt.text
2378 if htmlMode {
2379 if e == "running_exec" {
Brad Fitzpatrick777a5bf2015-06-09 12:17:36 -07002380 e = fmt.Sprintf("<a href='%s'>%s</a>", st.logsURLLocked(), e)
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002381 }
2382 e = "<b>" + e + "</b>"
2383 text = "<i>" + html.EscapeString(text) + "</i>"
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002384 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002385 fmt.Fprintf(w, " %7s %v %s %s\n", elapsed, evt.t.Format(time.RFC3339), e, text)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002386 }
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002387 if st.isRunningLocked() {
2388 fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastT).Seconds()))
2389 }
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002390
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002391}
2392
2393func (st *buildStatus) logs() string {
2394 st.mu.Lock()
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002395 defer st.mu.Unlock()
2396 return st.output.String()
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002397}
2398
2399func (st *buildStatus) Write(p []byte) (n int, err error) {
2400 st.mu.Lock()
2401 defer st.mu.Unlock()
2402 const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002403 plen := len(p)
Brad Fitzpatrick9e9c0a802015-01-02 15:00:54 -08002404 if st.output.Len()+len(p) > maxBufferSize {
2405 p = p[:maxBufferSize-st.output.Len()]
2406 }
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002407 st.output.Write(p) // bytes.Buffer can't fail
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002408 st.notifyWatchersLocked(false)
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002409 return plen, nil
2410}
2411
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002412// logWatcher holds the state of a client watching the logs of a running build.
2413type logWatcher struct {
2414 ch chan []byte
2415 offset int // Offset of seen logs (offset == len(buf) means "up to date")
2416}
2417
2418// watchLogs returns a channel on which the build's logs is sent.
2419// When the build is complete the channel is closed.
2420func (st *buildStatus) watchLogs() <-chan []byte {
2421 st.mu.Lock()
2422 defer st.mu.Unlock()
2423
2424 ch := make(chan []byte, 10) // room for a few log writes
2425 ch <- st.output.Bytes()
Brad Fitzpatrickec2973a2015-03-04 07:30:35 -08002426 if !st.isRunningLocked() {
Andrew Gerrand5f73aab2015-03-03 10:30:18 +11002427 close(ch)
2428 return ch
2429 }
2430
2431 st.watcher = append(st.watcher, &logWatcher{
2432 ch: ch,
2433 offset: st.output.Len(),
2434 })
2435 return ch
2436}
2437
2438// unregisterWatcher removes the provided channel from the list of watchers,
2439// so that it receives no further log data.
2440func (st *buildStatus) unregisterWatcher(ch <-chan []byte) {
2441 st.mu.Lock()
2442 defer st.mu.Unlock()
2443
2444 for i, w := range st.watcher {
2445 if w.ch == ch {
2446 st.watcher = append(st.watcher[:i], st.watcher[i+1:]...)
2447 break
2448 }
2449 }
2450}
2451
2452// notifyWatchersLocked pushes any new log data to watching clients.
2453// If done is true it closes any watcher channels.
2454//
2455// NOTE: st.mu must be held.
2456func (st *buildStatus) notifyWatchersLocked(done bool) {
2457 l := st.output.Len()
2458 for _, w := range st.watcher {
2459 if w.offset < l {
2460 select {
2461 case w.ch <- st.output.Bytes()[w.offset:]:
2462 w.offset = l
2463 default:
2464 // If the receiver isn't ready, drop the write.
2465 }
2466 }
2467 if done {
2468 close(w.ch)
2469 }
2470 }
2471}
2472
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002473func versionTgz(rev string) io.Reader {
2474 var buf bytes.Buffer
2475 zw := gzip.NewWriter(&buf)
2476 tw := tar.NewWriter(zw)
2477
Brad Fitzpatrick7b2f9d72015-03-27 17:45:12 +01002478 // Writing to a bytes.Buffer should never fail, so check
2479 // errors with an explosion:
2480 check := func(err error) {
2481 if err != nil {
2482 panic("previously assumed to never fail: " + err.Error())
2483 }
2484 }
2485
Brad Fitzpatrick8831f362015-01-02 17:16:44 -08002486 contents := fmt.Sprintf("devel " + rev)
2487 check(tw.WriteHeader(&tar.Header{
2488 Name: "VERSION",
2489 Mode: 0644,
2490 Size: int64(len(contents)),
2491 }))
2492 _, err := io.WriteString(tw, contents)
2493 check(err)
2494 check(tw.Close())
2495 check(zw.Close())
2496 return bytes.NewReader(buf.Bytes())
2497}
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002498
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002499var sourceGroup singleflight.Group
2500
Andrew Gerrand234725b2015-06-04 16:45:17 -07002501var sourceCache = lru.New(40) // git rev -> []byte
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002502
2503// repo is go.googlesource.com repo ("go", "net", etc)
2504// rev is git revision.
2505func getSourceTgz(el eventTimeLogger, repo, rev string) (tgz io.Reader, err error) {
2506 fromCache := false
Andrew Gerrand234725b2015-06-04 16:45:17 -07002507 key := fmt.Sprintf("%v-%v", repo, rev)
2508 vi, err, shared := sourceGroup.Do(key, func() (interface{}, error) {
2509 if tgzBytes, ok := sourceCache.Get(key); ok {
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002510 fromCache = true
2511 return tgzBytes, nil
2512 }
2513
2514 for i := 0; i < 10; i++ {
Andrew Gerrand234725b2015-06-04 16:45:17 -07002515 el.logEventTime("fetching_source", fmt.Sprintf("%v from watcher", key))
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002516 tgzBytes, err := getSourceTgzFromWatcher(repo, rev)
2517 if err == nil {
Andrew Gerrand234725b2015-06-04 16:45:17 -07002518 sourceCache.Add(key, tgzBytes)
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002519 return tgzBytes, nil
2520 }
2521 log.Printf("Error fetching source %s/%s from watcher (after %v uptime): %v",
2522 repo, rev, time.Since(processStartTime), err)
2523 // Wait for watcher to start up. Give it a minute until
2524 // we try Gerrit.
2525 time.Sleep(6 * time.Second)
2526 }
2527
Andrew Gerrand234725b2015-06-04 16:45:17 -07002528 el.logEventTime("fetching_source", fmt.Sprintf("%v from gerrit", key))
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002529 tgzBytes, err := getSourceTgzFromGerrit(repo, rev)
2530 if err == nil {
Andrew Gerrand234725b2015-06-04 16:45:17 -07002531 sourceCache.Add(key, tgzBytes)
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002532 }
2533 return tgzBytes, err
2534 })
2535 if err != nil {
2536 return nil, err
2537 }
Andrew Gerrand234725b2015-06-04 16:45:17 -07002538 el.logEventTime("got_source", fmt.Sprintf("%v cache=%v shared=%v", key, fromCache, shared))
Brad Fitzpatrick1b1e0862015-06-04 18:25:50 -07002539 return bytes.NewReader(vi.([]byte)), nil
2540}
2541
2542func getSourceTgzFromGerrit(repo, rev string) (tgz []byte, err error) {
2543 return getSourceTgzFromURL("gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
2544}
2545
2546func getSourceTgzFromWatcher(repo, rev string) (tgz []byte, err error) {
2547 return getSourceTgzFromURL("watcher", repo, rev, "http://"+gitArchiveAddr+"/"+repo+".tar.gz?rev="+rev)
2548}
2549
2550func getSourceTgzFromURL(source, repo, rev, urlStr string) (tgz []byte, err error) {
2551 res, err := http.Get(urlStr)
2552 if err != nil {
2553 return nil, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, source, err)
2554 }
2555 defer res.Body.Close()
2556 if res.StatusCode/100 != 2 {
2557 slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
2558 return nil, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, source, res.Status, slurp)
2559 }
2560 const maxSize = 25 << 20 // seems unlikely; go source is 7.8MB on 2015-06-15
2561 slurp, err := ioutil.ReadAll(io.LimitReader(res.Body, maxSize+1))
2562 if len(slurp) > maxSize && err == nil {
2563 err = fmt.Errorf("body over %d bytes", maxSize)
2564 }
2565 if err != nil {
2566 return nil, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, source, err)
2567 }
2568 return slurp, nil
2569}
2570
Brad Fitzpatrick378fb292015-06-10 13:59:42 -07002571func nukeIfBroken(bc *buildlet.Client) {
2572 if bc.IsBroken() {
2573 // It may not have come from the reverse pool, but it's harmless if
2574 // it didn't.
2575 reversePool.nukeBuildlet(bc)
2576 }
2577}
2578
Brad Fitzpatrick79f3fc02015-05-27 21:51:25 -07002579var nl = []byte("\n")