Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 1 | // Copyright 2014 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // The coordinator runs on GCE and coordinates builds in Docker containers. |
Andrew Gerrand | fa8373a | 2015-01-21 17:25:37 +1100 | [diff] [blame] | 6 | package main // import "golang.org/x/build/cmd/coordinator" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 7 | |
| 8 | import ( |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 9 | "archive/tar" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 10 | "bytes" |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 11 | "compress/gzip" |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 12 | "crypto/sha1" |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 13 | "crypto/tls" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 14 | "encoding/json" |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 15 | "errors" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 16 | "flag" |
| 17 | "fmt" |
Brad Fitzpatrick | d9bbf3c | 2014-12-15 11:51:42 +1100 | [diff] [blame] | 18 | "html" |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 19 | "html/template" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 20 | "io" |
| 21 | "io/ioutil" |
| 22 | "log" |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 23 | "net" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 24 | "net/http" |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 25 | "os" |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 26 | "path" |
Brad Fitzpatrick | 578a4d2 | 2015-02-24 21:47:40 -0800 | [diff] [blame] | 27 | "runtime" |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 28 | "sort" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 29 | "strings" |
| 30 | "sync" |
| 31 | "time" |
Brad Fitzpatrick | b52c224 | 2014-12-30 16:19:06 -0800 | [diff] [blame] | 32 | |
Brad Fitzpatrick | 20d8483 | 2015-01-21 10:03:07 -0800 | [diff] [blame] | 33 | "camlistore.org/pkg/syncutil" |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 34 | |
Andrew Gerrand | fa8373a | 2015-01-21 17:25:37 +1100 | [diff] [blame] | 35 | "golang.org/x/build/buildlet" |
| 36 | "golang.org/x/build/dashboard" |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 37 | "golang.org/x/build/gerrit" |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 38 | "golang.org/x/build/internal/lru" |
| 39 | "golang.org/x/build/internal/singleflight" |
Andrew Gerrand | fa8373a | 2015-01-21 17:25:37 +1100 | [diff] [blame] | 40 | "golang.org/x/build/types" |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 41 | "google.golang.org/cloud/storage" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 42 | ) |
| 43 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 44 | const subrepoPrefix = "golang.org/x/" |
| 45 | |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 46 | var processStartTime = time.Now() |
| 47 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 48 | var Version string // set by linker -X |
| 49 | |
| 50 | // devPause is a debug option to pause for 5 minutes after the build |
| 51 | // finishes before destroying buildlets. |
| 52 | const devPause = false |
| 53 | |
Brad Fitzpatrick | dfe8286 | 2015-03-01 09:23:57 -0800 | [diff] [blame] | 54 | func init() { |
| 55 | // Disabled until we have test sharding. This takes 85+ minutes. |
| 56 | // Test sharding is https://github.com/golang/go/issues/10029 |
| 57 | delete(dashboard.Builders, "linux-arm-qemu") |
| 58 | } |
| 59 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 60 | var ( |
Brad Fitzpatrick | 3900c16 | 2015-02-18 15:01:02 -0800 | [diff] [blame] | 61 | masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.") |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 62 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 63 | // TODO(bradfitz): remove this list and just query it from the compute API: |
| 64 | // http://godoc.org/google.golang.org/api/compute/v1#RegionsService.Get |
| 65 | // and Region.Zones: http://godoc.org/google.golang.org/api/compute/v1#Region |
Brad Fitzpatrick | b52c224 | 2014-12-30 16:19:06 -0800 | [diff] [blame] | 66 | cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)") |
| 67 | |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 68 | mode = flag.String("mode", "", "valid modes are 'dev', 'prod', or '' for auto-detect") |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 69 | ) |
| 70 | |
Andrew Gerrand | a9469a8 | 2015-05-27 14:15:18 -0700 | [diff] [blame] | 71 | func buildLogBucket() string { |
| 72 | return devPrefix() + "go-build-log" |
| 73 | } |
| 74 | |
Andrew Gerrand | c53a577 | 2015-05-27 14:29:36 -0700 | [diff] [blame] | 75 | func snapBucket() string { |
| 76 | return devPrefix() + "go-build-snap" |
| 77 | } |
| 78 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 79 | // LOCK ORDER: |
| 80 | // statusMu, buildStatus.mu, trySet.mu |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 81 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 82 | var ( |
| 83 | startTime = time.Now() |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 84 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 85 | statusMu sync.Mutex // guards the following four structures; see LOCK ORDER comment above |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 86 | status = map[builderRev]*buildStatus{} |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 87 | statusDone []*buildStatus // finished recently, capped to maxStatusDone |
| 88 | tries = map[tryKey]*trySet{} // trybot builds |
| 89 | tryList []tryKey |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 90 | |
| 91 | // subrepoHead contains the hashes of the latest master HEAD |
| 92 | // for each sub-repo. It is populated by findWork. |
| 93 | subrepoHead = struct { |
| 94 | sync.Mutex |
| 95 | m map[string]string // [repo]hash |
| 96 | }{m: map[string]string{}} |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 97 | ) |
| 98 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 99 | // tryBuilders must be VMs. The Docker container builds are going away. |
| 100 | var tryBuilders []dashboard.BuildConfig |
| 101 | |
| 102 | func init() { |
| 103 | tryList := []string{ |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 104 | "misc-compile", |
David Crawshaw | 6c597e3 | 2015-05-01 12:45:09 -0400 | [diff] [blame] | 105 | "darwin-amd64-10_10", |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 106 | "linux-386", |
| 107 | "linux-amd64", |
| 108 | "linux-amd64-race", |
| 109 | "freebsd-386-gce101", |
| 110 | "freebsd-amd64-gce101", |
| 111 | "windows-386-gce", |
| 112 | "windows-amd64-gce", |
| 113 | "openbsd-386-gce56", |
| 114 | "openbsd-amd64-gce56", |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 115 | "plan9-386", |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 116 | "nacl-386", |
| 117 | "nacl-amd64p32", |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 118 | /* "linux-arm-shard_test", |
| 119 | "linux-arm-shard_std_am", |
| 120 | "linux-arm-shard_std_nz", |
| 121 | "linux-arm-shard_runtimecpu", |
| 122 | "linux-arm-shard_cgotest", |
| 123 | "linux-arm-shard_misc", |
| 124 | */ |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 125 | } |
| 126 | for _, bname := range tryList { |
| 127 | conf, ok := dashboard.Builders[bname] |
Brad Fitzpatrick | 83455d1 | 2015-02-19 16:14:20 -0800 | [diff] [blame] | 128 | if ok { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 129 | tryBuilders = append(tryBuilders, conf) |
| 130 | } else { |
Brad Fitzpatrick | 83455d1 | 2015-02-19 16:14:20 -0800 | [diff] [blame] | 131 | log.Printf("ignoring invalid try builder config %q", bname) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
Brad Fitzpatrick | c1d98dc | 2015-01-07 15:44:25 -0800 | [diff] [blame] | 136 | const ( |
| 137 | maxStatusDone = 30 |
| 138 | |
| 139 | // vmDeleteTimeout is how long before we delete a VM. |
| 140 | // In practice this need only be as long as the slowest |
| 141 | // builder (plan9 currently), because on startup this program |
| 142 | // already deletes all buildlets it doesn't know about |
| 143 | // (i.e. ones from a previous instance of the coordinator). |
| 144 | vmDeleteTimeout = 45 * time.Minute |
| 145 | ) |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 146 | |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 147 | func readGCSFile(name string) ([]byte, error) { |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 148 | if *mode == "dev" { |
| 149 | b, ok := testFiles[name] |
| 150 | if !ok { |
| 151 | return nil, &os.PathError{ |
| 152 | Op: "open", |
| 153 | Path: name, |
| 154 | Err: os.ErrNotExist, |
| 155 | } |
| 156 | } |
| 157 | return []byte(b), nil |
| 158 | } |
| 159 | |
Andrew Gerrand | a9469a8 | 2015-05-27 14:15:18 -0700 | [diff] [blame] | 160 | r, err := storage.NewReader(serviceCtx, devPrefix()+"go-builder-data", name) |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 161 | if err != nil { |
| 162 | return nil, err |
| 163 | } |
| 164 | defer r.Close() |
| 165 | return ioutil.ReadAll(r) |
| 166 | } |
| 167 | |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 168 | // Fake keys signed by a fake CA. |
| 169 | var testFiles = map[string]string{ |
| 170 | "farmer-cert.pem": `-----BEGIN CERTIFICATE----- |
| 171 | MIICljCCAX4CCQCoS+/smvkG2TANBgkqhkiG9w0BAQUFADANMQswCQYDVQQDEwJn |
| 172 | bzAeFw0xNTA0MDYwMzE3NDJaFw0xNzA0MDUwMzE3NDJaMA0xCzAJBgNVBAMTAmdv |
| 173 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA1NMaVxX8RfCMtQB18azV |
| 174 | hL6/U7C8W2G+8WXYeFuOpgP2SHnMbsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBis |
| 175 | yj+drspafnCr4Yp9oJx1xlIhVXGD/SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6V |
| 176 | aQslRxhPEkLGsXIA/hCucPIplI9jgLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFy |
| 177 | LiHjEkE7O7PIjjY51btCLep9QSts98zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp |
| 178 | 2QBpsDbVpHYv2VPCPvkdOGU1/nzumsxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptl |
| 179 | hwIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQBxvUMKsX+DEhZSmc164IuSVJ9ucZ97 |
| 180 | +KWn4nCwnVkI/RrsJpiTj3pZNRkAxq2vmZTpUdU0CgGHdZNXp/6s/GX4cSzFphSf |
| 181 | WZQN0CG/O50SQ39m7fz/dZ2Xse6EH2grr6KN0QsDhK/RVxecQv57rY9nLFHnC60t |
| 182 | vJBDC739lWlnsGDxylJNxEk2l5c2rJdn82yGw2G9pQ/LDVAtO1G2rxGkpi4FcpGk |
| 183 | rNAa6MiwcyFHcAr3OsigLm4Q9bCS6YXfQDvCZGAR91ADXVWDFC1sgBgM3U3+1bGp |
| 184 | tgXUVKymUvoVq0BiY4BCCYDluoErgZDytLmnUOxrykYi532VpRbbK2ja |
| 185 | -----END CERTIFICATE-----`, |
| 186 | "farmer-key.pem": `-----BEGIN RSA PRIVATE KEY----- |
| 187 | MIIEowIBAAKCAQEA1NMaVxX8RfCMtQB18azVhL6/U7C8W2G+8WXYeFuOpgP2SHnM |
| 188 | bsUeTiUYWS1xqAxUh3Vl/TT1HIASRDL7kBisyj+drspafnCr4Yp9oJx1xlIhVXGD |
| 189 | /SyHk5oewkjkNEmrFtUT07mT2lmZqD3XJ+6VaQslRxhPEkLGsXIA/hCucPIplI9j |
| 190 | gLY8TmOBhQ7RzXAnk/ayAzDkCgkWB4k/zaFyLiHjEkE7O7PIjjY51btCLep9QSts |
| 191 | 98zojY5oYNj2RdQOZa56MHAlh9hbdpm+P1vp2QBpsDbVpHYv2VPCPvkdOGU1/nzu |
| 192 | msxHy17DcirKP8Tuf6zMf9obeuSlMvUUPptlhwIDAQABAoIBAAJOPyzOWitPzdZw |
| 193 | KNbzbmS/xEbd1UyQJIds+QlkxIjb5iEm4KYakJd8I2Vj7qVJbOkCxpYVqsoiQRBo |
| 194 | FP2cptKSGd045/4SrmoFHBNPXp9FaIMKdcmaX+Wjd83XCFHgsm/O4yYaDpYA/n8q |
| 195 | HFicZxX6Pu8kPkcOXiSx/XzDJYCnuec0GIfiJfbrQEwNLA+Ck2HnFfLy6LyrgCqi |
| 196 | eqaxyBoLolzjW7guWV6e/ECsnLXx2n/Pj4l1aqIFKlYxOjBIKRqeUsqzMFpOCbrx |
| 197 | z/scaBuH88hO96jbGZWUAm3R6ZslocQ6TaENYWNVKN1SeGISiE3hRoMAUIu1eHVu |
| 198 | mEzOjvECgYEA9Ypu04NzVjAHdZRwrP7IiX3+CmbyNatdZXIoagp8boPBYWw7QeL8 |
| 199 | TPwvc3PCSIjxcT+Jv2hHTZ9Ofz9vAm/XJx6Ios9o/uAbytA+RAolQJWtLGuFLKv1 |
| 200 | wxq78iDFcIWq3iPwpl8FJaXeCb/bsNP9jruPhwWWbJVvD1eTif09ZzsCgYEA3ePo |
| 201 | aQ5S0YrPtaf5r70eSBloe5vveG/kW3EW0QMrN6YlOhGSX+mjdAJk7XI/JW6vVPYS |
| 202 | aK+g+ZnzV7HL421McuVH8mmwPHi48l5o2FewF54qYfOoTAJS1cjV08j8WtQsrEax |
| 203 | HHom4m4joQEm0o4QEnTxJDS8/u7T/hhMALxeziUCgYANwevjvgHAWoCQffiyOLRT |
| 204 | v9N0EcCQcUGSZYsOJfhC2O8E3mOTlXw9dAPUnC/OkJ22krDNILKeDsb/Kja2FD4h |
| 205 | 2vwc4zIm1be47WIPveHIdJp3Wq7jid8DR4QwVNW7MEIaoDjjmX9YVKrUMQPGLJqQ |
| 206 | XMH19sIu41CNs4J4wM+n8QKBgBiIcFPdP47neBuvnM2vbT+vf3vbO9jnFip+EHW/ |
| 207 | kfGvLwKCmtp77JSRBzOxpAWxfTU5l8N3V6cBPIR/pflZRlCVxSSqRtAI0PoLMjBp |
| 208 | UZDq7eiylfMBdsMoV2v5Ft28A8xwbHinkNEMOGg+xloVVvWTdG36XsMZCNtZOF4E |
| 209 | db75AoGBAIk6IW5O2lk9Vc537TCyLpl2HYCP0jI3v6xIkFFolnfHPEgsXLJo9YU8 |
| 210 | crVtB0zy4jzjN/SClc/iaeOzk5Ot+iwSRFBZu2jdt0TRxbG+cd+6vKLs0Baw6kB1 |
| 211 | gpRUwP6i5yhi838rMgurGVFr3O/0Sv7wMx5UNEJ/RopbQ2K/bnwn |
| 212 | -----END RSA PRIVATE KEY-----`, |
| 213 | } |
| 214 | |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 215 | func listenAndServeTLS() { |
David Crawshaw | a3dce2c | 2015-04-07 19:46:19 -0400 | [diff] [blame] | 216 | addr := ":443" |
| 217 | if *mode == "dev" { |
| 218 | addr = ":8119" |
| 219 | } |
| 220 | ln, err := net.Listen("tcp", addr) |
| 221 | if err != nil { |
| 222 | log.Fatalf("net.Listen(%s): %v", addr, err) |
| 223 | } |
| 224 | serveTLS(ln) |
| 225 | } |
| 226 | |
| 227 | func serveTLS(ln net.Listener) { |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 228 | certPEM, err := readGCSFile("farmer-cert.pem") |
| 229 | if err != nil { |
| 230 | log.Printf("cannot load TLS cert, skipping https: %v", err) |
| 231 | return |
| 232 | } |
| 233 | keyPEM, err := readGCSFile("farmer-key.pem") |
| 234 | if err != nil { |
| 235 | log.Printf("cannot load TLS key, skipping https: %v", err) |
| 236 | return |
| 237 | } |
| 238 | cert, err := tls.X509KeyPair(certPEM, keyPEM) |
| 239 | if err != nil { |
| 240 | log.Printf("bad TLS cert: %v", err) |
| 241 | return |
| 242 | } |
| 243 | |
David Crawshaw | a3dce2c | 2015-04-07 19:46:19 -0400 | [diff] [blame] | 244 | server := &http.Server{Addr: ln.Addr().String()} |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 245 | config := &tls.Config{ |
| 246 | NextProtos: []string{"http/1.1"}, |
| 247 | Certificates: []tls.Certificate{cert}, |
| 248 | } |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 249 | tlsLn := tls.NewListener(tcpKeepAliveListener{ln.(*net.TCPListener)}, config) |
David Crawshaw | a3dce2c | 2015-04-07 19:46:19 -0400 | [diff] [blame] | 250 | log.Printf("Coordinator serving on: %v", tlsLn.Addr()) |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 251 | if err := server.Serve(tlsLn); err != nil { |
| 252 | log.Fatalf("serve https: %v", err) |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | type tcpKeepAliveListener struct { |
| 257 | *net.TCPListener |
| 258 | } |
| 259 | |
| 260 | func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) { |
| 261 | tc, err := ln.AcceptTCP() |
| 262 | if err != nil { |
| 263 | return |
| 264 | } |
| 265 | tc.SetKeepAlive(true) |
| 266 | tc.SetKeepAlivePeriod(3 * time.Minute) |
| 267 | return tc, nil |
| 268 | } |
| 269 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 270 | func main() { |
| 271 | flag.Parse() |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 272 | log.Printf("coordinator version %q starting", Version) |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 273 | err := initGCE() |
| 274 | if err != nil { |
| 275 | if *mode == "" { |
| 276 | *mode = "dev" |
| 277 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 278 | log.Printf("VM support disabled due to error initializing GCE: %v", err) |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 279 | } else { |
| 280 | if *mode == "" { |
| 281 | *mode = "prod" |
| 282 | } |
| 283 | } |
| 284 | switch *mode { |
| 285 | case "dev", "prod": |
| 286 | log.Printf("Running in %s mode", *mode) |
| 287 | default: |
| 288 | log.Fatalf("Unknown mode: %q", *mode) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 289 | } |
| 290 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 291 | http.HandleFunc("/", handleStatus) |
Brad Fitzpatrick | 578a4d2 | 2015-02-24 21:47:40 -0800 | [diff] [blame] | 292 | http.HandleFunc("/debug/goroutines", handleDebugGoroutines) |
David Crawshaw | eef380f | 2015-04-30 20:03:01 -0400 | [diff] [blame] | 293 | http.HandleFunc("/builders", handleBuilders) |
Brad Fitzpatrick | 777a5bf | 2015-06-09 12:17:36 -0700 | [diff] [blame] | 294 | http.HandleFunc("/temporarylogs", handleLogs) |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 295 | http.HandleFunc("/reverse", handleReverse) |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 296 | http.HandleFunc("/style.css", handleStyleCSS) |
David Crawshaw | eef380f | 2015-04-30 20:03:01 -0400 | [diff] [blame] | 297 | http.HandleFunc("/try", handleTryStatus) |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 298 | go func() { |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 299 | if *mode == "dev" { |
| 300 | return |
| 301 | } |
David Crawshaw | 02e988a | 2015-03-30 21:54:04 -0400 | [diff] [blame] | 302 | err := http.ListenAndServe(":80", nil) |
| 303 | if err != nil { |
| 304 | log.Fatalf("http.ListenAndServe:80: %v", err) |
| 305 | } |
| 306 | }() |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 307 | |
| 308 | workc := make(chan builderRev) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 309 | |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 310 | if *mode == "dev" { |
| 311 | // TODO(crawshaw): do more in test mode |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 312 | gcePool.SetEnabled(false) |
| 313 | http.HandleFunc("/dosomework/", handleDoSomeWork(workc)) |
| 314 | } else { |
| 315 | go gcePool.cleanUpOldVMs() |
| 316 | |
Andrew Gerrand | 7171600 | 2015-05-18 13:23:24 +1000 | [diff] [blame] | 317 | if devCluster { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 318 | dashboard.BuildletBucket = "dev-go-builder-data" |
Andrew Gerrand | 1826ad4 | 2015-05-26 04:34:37 +1000 | [diff] [blame] | 319 | dashboard.Builders = devClusterBuilders() |
Andrew Gerrand | 7171600 | 2015-05-18 13:23:24 +1000 | [diff] [blame] | 320 | } |
| 321 | |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 322 | // Start the Docker processes on this host polling Gerrit and |
| 323 | // pinging build.golang.org when new commits are available. |
| 324 | startWatchers() // in watcher.go |
| 325 | |
| 326 | go findWorkLoop(workc) |
| 327 | go findTryWorkLoop() |
| 328 | // TODO(cmang): gccgo will need its own findWorkLoop |
David Crawshaw | 581ddd1 | 2015-04-06 08:09:20 -0400 | [diff] [blame] | 329 | } |
| 330 | |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 331 | go listenAndServeTLS() |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 332 | |
| 333 | ticker := time.NewTicker(1 * time.Minute) |
| 334 | for { |
| 335 | select { |
| 336 | case work := <-workc: |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 337 | if !mayBuildRev(work) { |
| 338 | continue |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 339 | } |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 340 | st, err := newBuild(work) |
| 341 | if err != nil { |
| 342 | log.Printf("Bad build work params %v: %v", work, err) |
| 343 | } else { |
| 344 | st.start() |
| 345 | } |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 346 | case <-ticker.C: |
| 347 | if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) { |
| 348 | // TODO: halt the whole machine to kill the VM or something |
| 349 | } |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | |
Andrew Gerrand | 1826ad4 | 2015-05-26 04:34:37 +1000 | [diff] [blame] | 354 | func devClusterBuilders() map[string]dashboard.BuildConfig { |
| 355 | m := map[string]dashboard.BuildConfig{} |
| 356 | for _, name := range []string{ |
| 357 | "linux-amd64", |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 358 | "linux-amd64-race", |
Andrew Gerrand | 1826ad4 | 2015-05-26 04:34:37 +1000 | [diff] [blame] | 359 | "windows-amd64-gce", |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 360 | "plan9-386", |
Andrew Gerrand | 1826ad4 | 2015-05-26 04:34:37 +1000 | [diff] [blame] | 361 | } { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 362 | m[name] = dashboard.Builders[name] |
Andrew Gerrand | 1826ad4 | 2015-05-26 04:34:37 +1000 | [diff] [blame] | 363 | } |
| 364 | return m |
| 365 | } |
| 366 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 367 | func numCurrentBuilds() int { |
| 368 | statusMu.Lock() |
| 369 | defer statusMu.Unlock() |
| 370 | return len(status) |
| 371 | } |
| 372 | |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 373 | func isBuilding(work builderRev) bool { |
| 374 | statusMu.Lock() |
| 375 | defer statusMu.Unlock() |
| 376 | _, building := status[work] |
| 377 | return building |
| 378 | } |
| 379 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 380 | // mayBuildRev reports whether the build type & revision should be started. |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 381 | // It returns true if it's not already building, and if a reverse buildlet is |
| 382 | // required, if an appropriate machine is registered. |
| 383 | func mayBuildRev(rev builderRev) bool { |
| 384 | if isBuilding(rev) { |
| 385 | return false |
| 386 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 387 | if devCluster && numCurrentBuilds() != 0 { |
| 388 | return false |
| 389 | } |
David Crawshaw | dd57a13 | 2015-04-28 16:42:42 -0400 | [diff] [blame] | 390 | if dashboard.Builders[rev.name].IsReverse { |
| 391 | return reversePool.CanBuild(rev.name) |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 392 | } |
David Crawshaw | dd57a13 | 2015-04-28 16:42:42 -0400 | [diff] [blame] | 393 | return true |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 394 | } |
| 395 | |
| 396 | func setStatus(work builderRev, st *buildStatus) { |
| 397 | statusMu.Lock() |
| 398 | defer statusMu.Unlock() |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 399 | // TODO: panic if status[work] already exists. audit all callers. |
| 400 | // For instance, what if a trybot is running, and then the CL is merged |
| 401 | // and the findWork goroutine picks it up and it has the same commit, |
| 402 | // because it didn't need to be rebased in Gerrit's cherrypick? |
| 403 | // Could we then have two running with the same key? |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 404 | status[work] = st |
| 405 | } |
| 406 | |
| 407 | func markDone(work builderRev) { |
| 408 | statusMu.Lock() |
| 409 | defer statusMu.Unlock() |
| 410 | st, ok := status[work] |
| 411 | if !ok { |
| 412 | return |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 413 | } |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 414 | delete(status, work) |
| 415 | if len(statusDone) == maxStatusDone { |
| 416 | copy(statusDone, statusDone[1:]) |
| 417 | statusDone = statusDone[:len(statusDone)-1] |
| 418 | } |
| 419 | statusDone = append(statusDone, st) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 420 | } |
| 421 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 422 | // statusPtrStr disambiguates which status to return if there are |
| 423 | // multiple in the history (e.g. recent failures where the build |
| 424 | // didn't finish for reasons outside of all.bash failing) |
| 425 | func getStatus(work builderRev, statusPtrStr string) *buildStatus { |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 426 | statusMu.Lock() |
| 427 | defer statusMu.Unlock() |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 428 | match := func(st *buildStatus) bool { |
| 429 | return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr |
| 430 | } |
| 431 | if st, ok := status[work]; ok && match(st) { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 432 | return st |
| 433 | } |
| 434 | for _, st := range statusDone { |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 435 | if st.builderRev == work && match(st) { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 436 | return st |
| 437 | } |
| 438 | } |
| 439 | return nil |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 440 | } |
| 441 | |
| 442 | type byAge []*buildStatus |
| 443 | |
| 444 | func (s byAge) Len() int { return len(s) } |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 445 | func (s byAge) Less(i, j int) bool { return s[i].startTime.Before(s[j].startTime) } |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 446 | func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| 447 | |
Brad Fitzpatrick | c964c3f | 2015-02-25 16:43:53 -0800 | [diff] [blame] | 448 | func handleTryStatus(w http.ResponseWriter, r *http.Request) { |
| 449 | ts := trySetOfCommitPrefix(r.FormValue("commit")) |
| 450 | if ts == nil { |
| 451 | http.Error(w, "TryBot result not found (already done, invalid, or not yet discovered from Gerrit). Check Gerrit for results.", http.StatusNotFound) |
| 452 | return |
| 453 | } |
| 454 | ts.mu.Lock() |
| 455 | tss := ts.trySetState.clone() |
| 456 | ts.mu.Unlock() |
| 457 | |
| 458 | w.Header().Set("Content-Type", "text/html; charset=utf-8") |
| 459 | fmt.Fprintf(w, "<html><head><title>trybot status</title></head><body>[<a href='/'>overall status</a>] > %s\n", ts.ChangeID) |
| 460 | |
| 461 | fmt.Fprintf(w, "<h1>trybot status</h1>") |
| 462 | fmt.Fprintf(w, "Change-ID: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.ChangeID, ts.ChangeID) |
| 463 | fmt.Fprintf(w, "Commit: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br>\n", ts.Commit, ts.Commit) |
| 464 | fmt.Fprintf(w, "<p>Builds remain: %d</p>\n", tss.remain) |
| 465 | fmt.Fprintf(w, "<p>Builds failed: %v</p>\n", tss.failed) |
| 466 | fmt.Fprintf(w, "<p>Builds</p><table cellpadding=5 border=1>\n") |
| 467 | for _, bs := range tss.builds { |
| 468 | status := "<i>(running)</i>" |
| 469 | bs.mu.Lock() |
| 470 | if !bs.done.IsZero() { |
| 471 | if bs.succeeded { |
| 472 | status = "pass" |
| 473 | } else { |
| 474 | status = "<b>FAIL</b>" |
| 475 | } |
| 476 | } |
| 477 | bs.mu.Unlock() |
| 478 | fmt.Fprintf(w, "<tr valign=top><td align=left>%s</td><td align=center>%s</td><td><pre>%s</pre></td></tr>\n", |
| 479 | bs.name, |
| 480 | status, |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 481 | bs.HTMLStatusLine()) |
Brad Fitzpatrick | c964c3f | 2015-02-25 16:43:53 -0800 | [diff] [blame] | 482 | } |
| 483 | fmt.Fprintf(w, "</table></body></html>") |
| 484 | } |
| 485 | |
| 486 | func trySetOfCommitPrefix(commitPrefix string) *trySet { |
| 487 | if commitPrefix == "" { |
| 488 | return nil |
| 489 | } |
| 490 | statusMu.Lock() |
| 491 | defer statusMu.Unlock() |
| 492 | for k, ts := range tries { |
| 493 | if strings.HasPrefix(k.Commit, commitPrefix) { |
| 494 | return ts |
| 495 | } |
| 496 | } |
| 497 | return nil |
| 498 | } |
| 499 | |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 500 | func handleLogs(w http.ResponseWriter, r *http.Request) { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 501 | br := builderRev{ |
| 502 | name: r.FormValue("name"), |
| 503 | rev: r.FormValue("rev"), |
| 504 | subName: r.FormValue("subName"), // may be empty |
| 505 | subRev: r.FormValue("subRev"), // may be empty |
| 506 | } |
| 507 | st := getStatus(br, r.FormValue("st")) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 508 | if st == nil { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 509 | http.NotFound(w, r) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 510 | return |
| 511 | } |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 512 | w.Header().Set("Content-Type", "text/plain; charset=utf-8") |
Andrew Gerrand | cc49d3b | 2015-03-12 10:27:49 +1100 | [diff] [blame] | 513 | w.Header().Set("X-Content-Type-Options", "nosniff") |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 514 | writeStatusHeader(w, st) |
| 515 | |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 516 | nostream := r.FormValue("nostream") != "" |
| 517 | if nostream || !st.isRunning() { |
| 518 | if nostream { |
| 519 | fmt.Fprintf(w, "\n\n(live streaming disabled; reload manually to see status)\n") |
| 520 | } |
Andrew Gerrand | ff978d7 | 2015-05-28 15:01:51 -0700 | [diff] [blame] | 521 | st.mu.Lock() |
| 522 | defer st.mu.Unlock() |
Andrew Gerrand | aa078a3 | 2015-05-28 16:13:40 -0700 | [diff] [blame] | 523 | w.Write(st.output.Bytes()) |
Andrew Gerrand | ff978d7 | 2015-05-28 15:01:51 -0700 | [diff] [blame] | 524 | return |
| 525 | } |
| 526 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 527 | if !st.hasEvent("pre_exec") { |
| 528 | fmt.Fprintf(w, "\n\n(buildlet still starting; no live streaming. reload manually to see status)\n") |
| 529 | return |
| 530 | } |
| 531 | |
Brad Fitzpatrick | ec2973a | 2015-03-04 07:30:35 -0800 | [diff] [blame] | 532 | w.(http.Flusher).Flush() |
| 533 | |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 534 | logs := st.watchLogs() |
| 535 | defer st.unregisterWatcher(logs) |
| 536 | closed := w.(http.CloseNotifier).CloseNotify() |
| 537 | for { |
| 538 | select { |
| 539 | case b, ok := <-logs: |
| 540 | if !ok { |
| 541 | return |
| 542 | } |
| 543 | w.Write(b) |
| 544 | w.(http.Flusher).Flush() |
| 545 | case <-closed: |
| 546 | return |
| 547 | } |
| 548 | } |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 549 | } |
| 550 | |
Brad Fitzpatrick | 578a4d2 | 2015-02-24 21:47:40 -0800 | [diff] [blame] | 551 | func handleDebugGoroutines(w http.ResponseWriter, r *http.Request) { |
| 552 | w.Header().Set("Content-Type", "text/plain; charset=utf-8") |
| 553 | buf := make([]byte, 1<<20) |
| 554 | buf = buf[:runtime.Stack(buf, true)] |
| 555 | w.Write(buf) |
| 556 | } |
| 557 | |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 558 | func writeStatusHeader(w http.ResponseWriter, st *buildStatus) { |
| 559 | st.mu.Lock() |
| 560 | defer st.mu.Unlock() |
| 561 | fmt.Fprintf(w, " builder: %s\n", st.name) |
| 562 | fmt.Fprintf(w, " rev: %s\n", st.rev) |
Brad Fitzpatrick | 46d9b00 | 2015-05-13 15:55:41 -0700 | [diff] [blame] | 563 | workaroundFlush(w) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 564 | fmt.Fprintf(w, " buildlet: %s\n", st.bc) |
| 565 | fmt.Fprintf(w, " started: %v\n", st.startTime) |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 566 | done := !st.done.IsZero() |
| 567 | if done { |
David du Colombier | 5e8cfd4 | 2015-06-12 17:16:24 +0200 | [diff] [blame] | 568 | fmt.Fprintf(w, " ended: %v\n", st.done) |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 569 | fmt.Fprintf(w, " success: %v\n", st.succeeded) |
| 570 | } else { |
| 571 | fmt.Fprintf(w, " status: still running\n") |
| 572 | } |
| 573 | if len(st.events) > 0 { |
| 574 | io.WriteString(w, "\nEvents:\n") |
| 575 | st.writeEventsLocked(w, false) |
| 576 | } |
| 577 | io.WriteString(w, "\nBuild log:\n") |
Brad Fitzpatrick | 46d9b00 | 2015-05-13 15:55:41 -0700 | [diff] [blame] | 578 | workaroundFlush(w) |
| 579 | } |
| 580 | |
| 581 | // workaroundFlush is an unnecessary flush to work around a bug in Chrome. |
| 582 | // See https://code.google.com/p/chromium/issues/detail?id=2016 for the details. |
| 583 | // In summary: a couple unnecessary chunk flushes bypass the content type |
| 584 | // sniffing which happen (even if unused?), even if you set nosniff as we do |
| 585 | // in func handleLogs. |
| 586 | func workaroundFlush(w http.ResponseWriter) { |
| 587 | w.(http.Flusher).Flush() |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 588 | } |
| 589 | |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 590 | // findWorkLoop polls http://build.golang.org/?mode=json looking for new work |
| 591 | // for the main dashboard. It does not support gccgo. |
| 592 | // TODO(bradfitz): it also currently does not support subrepos. |
| 593 | func findWorkLoop(work chan<- builderRev) { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 594 | // Useful for debugging a single run: |
| 595 | if devCluster && false { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 596 | work <- builderRev{name: "linux-amd64", rev: "c9778ec302b2e0e0d6027e1e0fca892e428d9657", subName: "tools", subRev: "ac303766f5f240c1796eeea3dc9bf34f1261aa35"} |
| 597 | //work <- builderRev{name: "linux-amd64", rev: "54789eff385780c54254f822e09505b6222918e2"} |
| 598 | //work <- builderRev{name: "windows-amd64-gce", rev: "54789eff385780c54254f822e09505b6222918e2"} |
| 599 | |
| 600 | // Still run findWork but ignore what it does. |
| 601 | ignore := make(chan builderRev) |
| 602 | go func() { |
| 603 | for range ignore { |
| 604 | } |
| 605 | }() |
| 606 | work = ignore |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 607 | } |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 608 | ticker := time.NewTicker(15 * time.Second) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 609 | for { |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 610 | if err := findWork(work); err != nil { |
| 611 | log.Printf("failed to find new work: %v", err) |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 612 | } |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 613 | <-ticker.C |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 614 | } |
| 615 | } |
| 616 | |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 617 | func findWork(work chan<- builderRev) error { |
| 618 | var bs types.BuildStatus |
Andrew Gerrand | 7171600 | 2015-05-18 13:23:24 +1000 | [diff] [blame] | 619 | res, err := http.Get(dashBase() + "?mode=json") |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 620 | if err != nil { |
| 621 | return err |
| 622 | } |
| 623 | defer res.Body.Close() |
| 624 | if err := json.NewDecoder(res.Body).Decode(&bs); err != nil { |
| 625 | return err |
| 626 | } |
| 627 | if res.StatusCode != 200 { |
| 628 | return fmt.Errorf("unexpected http status %v", res.Status) |
| 629 | } |
| 630 | |
| 631 | knownToDashboard := map[string]bool{} // keys are builder |
| 632 | for _, b := range bs.Builders { |
| 633 | knownToDashboard[b] = true |
| 634 | } |
| 635 | |
| 636 | var goRevisions []string |
| 637 | for _, br := range bs.Revisions { |
| 638 | if br.Repo == "go" { |
| 639 | goRevisions = append(goRevisions, br.Revision) |
| 640 | } else { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 641 | // The dashboard provides only the head revision for |
| 642 | // each sub-repo; store it in subrepoHead for later use. |
| 643 | subrepoHead.Lock() |
| 644 | subrepoHead.m[br.Repo] = br.Revision |
| 645 | subrepoHead.Unlock() |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 646 | } |
| 647 | if len(br.Results) != len(bs.Builders) { |
| 648 | return errors.New("bogus JSON response from dashboard: results is too long.") |
| 649 | } |
| 650 | for i, res := range br.Results { |
| 651 | if res != "" { |
| 652 | // It's either "ok" or a failure URL. |
| 653 | continue |
| 654 | } |
| 655 | builder := bs.Builders[i] |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 656 | builderInfo, ok := dashboard.Builders[builder] |
| 657 | if !ok || builderInfo.TryOnly { |
Brad Fitzpatrick | eb52e71 | 2015-05-13 18:38:20 -0700 | [diff] [blame] | 658 | // Not managed by the coordinator, or a trybot-only one. |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 659 | continue |
| 660 | } |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 661 | if br.Repo != "go" && !builderInfo.SplitMakeRun() { |
| 662 | // If we don't split make and run then we can't |
| 663 | // have a snapshot from which to build sub-repos. |
| 664 | continue |
| 665 | } |
| 666 | |
| 667 | var rev builderRev |
| 668 | if br.Repo == "go" { |
| 669 | rev = builderRev{ |
| 670 | name: bs.Builders[i], |
| 671 | rev: br.Revision, |
| 672 | } |
| 673 | } else { |
| 674 | rev = builderRev{ |
| 675 | name: bs.Builders[i], |
| 676 | rev: br.GoRevision, |
| 677 | subName: br.Repo, |
| 678 | subRev: br.Revision, |
| 679 | } |
| 680 | if !builderInfo.BuildSubrepos() || !rev.snapshotExists() { |
| 681 | // Don't try to build this sub-repo until we have a snapshot. |
| 682 | continue |
| 683 | } |
| 684 | } |
| 685 | if !isBuilding(rev) { |
| 686 | work <- rev |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 687 | } |
| 688 | } |
| 689 | } |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 690 | |
| 691 | // And to bootstrap new builders, see if we have any builders |
| 692 | // that the dashboard doesn't know about. |
Brad Fitzpatrick | eb52e71 | 2015-05-13 18:38:20 -0700 | [diff] [blame] | 693 | for b, builderInfo := range dashboard.Builders { |
| 694 | if builderInfo.TryOnly || knownToDashboard[b] { |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 695 | continue |
| 696 | } |
| 697 | for _, rev := range goRevisions { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 698 | br := builderRev{name: b, rev: rev} |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 699 | if !isBuilding(br) { |
| 700 | work <- br |
| 701 | } |
| 702 | } |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 703 | } |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 704 | return nil |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 705 | } |
| 706 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 707 | // findTryWorkLoop is a goroutine which loops periodically and queries |
| 708 | // Gerrit for TryBot work. |
| 709 | func findTryWorkLoop() { |
| 710 | if errTryDeps != nil { |
| 711 | return |
| 712 | } |
| 713 | ticker := time.NewTicker(60 * time.Second) |
| 714 | for { |
| 715 | if err := findTryWork(); err != nil { |
| 716 | log.Printf("failed to find trybot work: %v", err) |
| 717 | } |
| 718 | <-ticker.C |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | func findTryWork() error { |
| 723 | cis, err := gerritClient.QueryChanges("label:Run-TryBot=1 label:TryBot-Result=0 project:go status:open", gerrit.QueryChangesOpt{ |
| 724 | Fields: []string{"CURRENT_REVISION"}, |
| 725 | }) |
| 726 | if err != nil { |
| 727 | return err |
| 728 | } |
| 729 | if len(cis) == 0 { |
| 730 | return nil |
| 731 | } |
| 732 | |
| 733 | statusMu.Lock() |
| 734 | defer statusMu.Unlock() |
| 735 | |
| 736 | tryList = make([]tryKey, 0, len(cis)) |
| 737 | wanted := map[tryKey]bool{} |
| 738 | for _, ci := range cis { |
| 739 | if ci.ChangeID == "" || ci.CurrentRevision == "" { |
| 740 | log.Printf("Warning: skipping incomplete %#v", ci) |
| 741 | continue |
| 742 | } |
| 743 | key := tryKey{ |
| 744 | ChangeID: ci.ChangeID, |
| 745 | Commit: ci.CurrentRevision, |
| 746 | } |
| 747 | tryList = append(tryList, key) |
| 748 | wanted[key] = true |
| 749 | if _, ok := tries[key]; ok { |
| 750 | // already in progress |
| 751 | continue |
| 752 | } |
| 753 | tries[key] = newTrySet(key) |
| 754 | } |
| 755 | for k, ts := range tries { |
| 756 | if !wanted[k] { |
| 757 | delete(tries, k) |
| 758 | go ts.cancelBuilds() |
| 759 | } |
| 760 | } |
| 761 | return nil |
| 762 | } |
| 763 | |
| 764 | type tryKey struct { |
| 765 | ChangeID string // I1a27695838409259d1586a0adfa9f92bccf7ceba |
| 766 | Commit string // ecf3dffc81dc21408fb02159af352651882a8383 |
| 767 | } |
| 768 | |
| 769 | // trySet is a the state of a set of builds of different |
| 770 | // configurations, all for the same (Change-ID, Commit) pair. The |
| 771 | // sets which are still wanted (not already submitted or canceled) are |
| 772 | // stored in the global 'tries' map. |
| 773 | type trySet struct { |
| 774 | // immutable |
| 775 | tryKey |
| 776 | |
Brad Fitzpatrick | acb32b4 | 2015-05-11 09:14:37 -0700 | [diff] [blame] | 777 | // mu guards state and errMsg |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 778 | // See LOCK ORDER comment above. |
| 779 | mu sync.Mutex |
| 780 | trySetState |
Brad Fitzpatrick | acb32b4 | 2015-05-11 09:14:37 -0700 | [diff] [blame] | 781 | errMsg bytes.Buffer |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 782 | } |
| 783 | |
| 784 | type trySetState struct { |
| 785 | remain int |
| 786 | failed []string // build names |
| 787 | builds []*buildStatus |
| 788 | } |
| 789 | |
| 790 | func (ts trySetState) clone() trySetState { |
| 791 | return trySetState{ |
| 792 | remain: ts.remain, |
| 793 | failed: append([]string(nil), ts.failed...), |
| 794 | builds: append([]*buildStatus(nil), ts.builds...), |
| 795 | } |
| 796 | } |
| 797 | |
| 798 | // newTrySet creates a new trySet group of builders for a given key, |
| 799 | // the (Change-ID, Commit) pair. It also starts goroutines for each |
| 800 | // build. |
| 801 | // |
| 802 | // Must hold statusMu. |
| 803 | func newTrySet(key tryKey) *trySet { |
| 804 | log.Printf("Starting new trybot set for %v", key) |
| 805 | ts := &trySet{ |
| 806 | tryKey: key, |
| 807 | trySetState: trySetState{ |
| 808 | remain: len(tryBuilders), |
| 809 | builds: make([]*buildStatus, len(tryBuilders)), |
| 810 | }, |
| 811 | } |
| 812 | go ts.notifyStarting() |
| 813 | for i, bconf := range tryBuilders { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 814 | brev := builderRev{name: bconf.Name, rev: key.Commit} |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 815 | |
| 816 | bs, _ := newBuild(brev) |
| 817 | bs.trySet = ts |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 818 | status[brev] = bs |
| 819 | ts.builds[i] = bs |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 820 | go bs.start() // acquires statusMu itself, so in a goroutine |
| 821 | go ts.awaitTryBuild(i, bconf, bs) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 822 | } |
| 823 | return ts |
| 824 | } |
| 825 | |
| 826 | // state returns a copy of the trySet's state. |
| 827 | func (ts *trySet) state() trySetState { |
| 828 | ts.mu.Lock() |
| 829 | defer ts.mu.Unlock() |
| 830 | return ts.trySetState.clone() |
| 831 | } |
| 832 | |
| 833 | // notifyStarting runs in its own goroutine and posts to Gerrit that |
| 834 | // the trybots have started on the user's CL with a link of where to watch. |
| 835 | func (ts *trySet) notifyStarting() { |
Brad Fitzpatrick | 4623e1a | 2015-05-27 13:15:38 -0700 | [diff] [blame] | 836 | msg := "TryBots beginning. Status page: http://farmer.golang.org/try?commit=" + ts.Commit[:8] |
| 837 | |
| 838 | if ci, err := gerritClient.GetChangeDetail(ts.ChangeID); err == nil { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 839 | if len(ci.Messages) == 0 { |
| 840 | log.Printf("No Gerrit comments retrieved on %v", ts.ChangeID) |
| 841 | } |
Brad Fitzpatrick | 4623e1a | 2015-05-27 13:15:38 -0700 | [diff] [blame] | 842 | for _, cmi := range ci.Messages { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 843 | if strings.Contains(cmi.Message, msg) { |
Brad Fitzpatrick | 4623e1a | 2015-05-27 13:15:38 -0700 | [diff] [blame] | 844 | // Dup. Don't spam. |
| 845 | return |
| 846 | } |
| 847 | } |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 848 | } else { |
| 849 | log.Printf("Error getting Gerrit comments on %s: %v", ts.ChangeID, err) |
Brad Fitzpatrick | 4623e1a | 2015-05-27 13:15:38 -0700 | [diff] [blame] | 850 | } |
| 851 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 852 | // Ignore error. This isn't critical. |
Brad Fitzpatrick | 4623e1a | 2015-05-27 13:15:38 -0700 | [diff] [blame] | 853 | gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{Message: msg}) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 854 | } |
| 855 | |
| 856 | // awaitTryBuild runs in its own goroutine and waits for a build in a |
| 857 | // trySet to complete. |
| 858 | // |
| 859 | // If the build fails without getting to the end, it sleeps and |
| 860 | // reschedules it, as long as it's still wanted. |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 861 | func (ts *trySet) awaitTryBuild(idx int, bconf dashboard.BuildConfig, bs *buildStatus) { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 862 | for { |
| 863 | WaitCh: |
| 864 | for { |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 865 | timeout := time.NewTimer(10 * time.Minute) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 866 | select { |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 867 | case <-bs.donec: |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 868 | timeout.Stop() |
| 869 | break WaitCh |
| 870 | case <-timeout.C: |
| 871 | if !ts.wanted() { |
| 872 | // Build was canceled. |
| 873 | return |
| 874 | } |
| 875 | } |
| 876 | } |
| 877 | |
| 878 | if bs.hasEvent("done") { |
| 879 | ts.noteBuildComplete(bconf, bs) |
| 880 | return |
| 881 | } |
| 882 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 883 | // TODO(bradfitz): rethink this logic. we should only |
| 884 | // start a new build if the old one appears dead or |
| 885 | // hung. |
| 886 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 887 | // Sleep a bit and retry. |
| 888 | time.Sleep(30 * time.Second) |
| 889 | if !ts.wanted() { |
| 890 | return |
| 891 | } |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 892 | brev := builderRev{name: bconf.Name, rev: ts.Commit} |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 893 | bs, _ = newBuild(brev) |
Brad Fitzpatrick | 9d86d3d | 2015-04-01 01:26:32 -0700 | [diff] [blame] | 894 | bs.trySet = ts |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 895 | go bs.start() |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 896 | ts.mu.Lock() |
| 897 | ts.builds[idx] = bs |
| 898 | ts.mu.Unlock() |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | // wanted reports whether this trySet is still active. |
| 903 | // |
| 904 | // If the commmit has been submitted, or change abandoned, or the |
| 905 | // checkbox unchecked, wanted returns false. |
| 906 | func (ts *trySet) wanted() bool { |
| 907 | statusMu.Lock() |
| 908 | defer statusMu.Unlock() |
| 909 | _, ok := tries[ts.tryKey] |
| 910 | return ok |
| 911 | } |
| 912 | |
| 913 | // cancelBuilds run in its own goroutine and cancels this trySet's |
| 914 | // currently-active builds because they're no longer wanted. |
| 915 | func (ts *trySet) cancelBuilds() { |
| 916 | // TODO(bradfitz): implement |
| 917 | } |
| 918 | |
| 919 | func (ts *trySet) noteBuildComplete(bconf dashboard.BuildConfig, bs *buildStatus) { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 920 | bs.mu.Lock() |
| 921 | succeeded := bs.succeeded |
| 922 | var buildLog string |
| 923 | if !succeeded { |
| 924 | buildLog = bs.output.String() |
| 925 | } |
| 926 | bs.mu.Unlock() |
| 927 | |
| 928 | ts.mu.Lock() |
| 929 | ts.remain-- |
| 930 | remain := ts.remain |
| 931 | if !succeeded { |
| 932 | ts.failed = append(ts.failed, bconf.Name) |
| 933 | } |
Brad Fitzpatrick | acb32b4 | 2015-05-11 09:14:37 -0700 | [diff] [blame] | 934 | numFail := len(ts.failed) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 935 | ts.mu.Unlock() |
| 936 | |
| 937 | if !succeeded { |
| 938 | s1 := sha1.New() |
| 939 | io.WriteString(s1, buildLog) |
| 940 | objName := fmt.Sprintf("%s/%s_%x.log", bs.rev[:8], bs.name, s1.Sum(nil)[:4]) |
Andrew Gerrand | a9469a8 | 2015-05-27 14:15:18 -0700 | [diff] [blame] | 941 | wr := storage.NewWriter(serviceCtx, buildLogBucket(), objName) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 942 | wr.ContentType = "text/plain; charset=utf-8" |
| 943 | wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader}) |
| 944 | if _, err := io.WriteString(wr, buildLog); err != nil { |
| 945 | log.Printf("Failed to write to GCS: %v", err) |
| 946 | return |
| 947 | } |
| 948 | if err := wr.Close(); err != nil { |
| 949 | log.Printf("Failed to write to GCS: %v", err) |
| 950 | return |
| 951 | } |
Andrew Gerrand | a9469a8 | 2015-05-27 14:15:18 -0700 | [diff] [blame] | 952 | failLogURL := fmt.Sprintf("https://storage.googleapis.com/%s/%s", buildLogBucket(), objName) |
Brad Fitzpatrick | 777a5bf | 2015-06-09 12:17:36 -0700 | [diff] [blame] | 953 | |
| 954 | bs.mu.Lock() |
| 955 | bs.failURL = failLogURL |
| 956 | bs.mu.Unlock() |
Brad Fitzpatrick | acb32b4 | 2015-05-11 09:14:37 -0700 | [diff] [blame] | 957 | ts.mu.Lock() |
| 958 | fmt.Fprintf(&ts.errMsg, "Failed on %s: %s\n", bs.name, failLogURL) |
| 959 | ts.mu.Unlock() |
| 960 | |
| 961 | if numFail == 1 && remain > 0 { |
| 962 | if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{ |
| 963 | Message: fmt.Sprintf( |
| 964 | "This change failed on %s:\n"+ |
| 965 | "See %s\n\n"+ |
| 966 | "Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.", |
| 967 | bs.name, failLogURL), |
| 968 | }); err != nil { |
| 969 | log.Printf("Failed to call Gerrit: %v", err) |
| 970 | return |
| 971 | } |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 972 | } |
| 973 | } |
| 974 | |
| 975 | if remain == 0 { |
| 976 | score, msg := 1, "TryBots are happy." |
Brad Fitzpatrick | acb32b4 | 2015-05-11 09:14:37 -0700 | [diff] [blame] | 977 | if numFail > 0 { |
| 978 | ts.mu.Lock() |
| 979 | errMsg := ts.errMsg.String() |
| 980 | ts.mu.Unlock() |
| 981 | score, msg = -1, fmt.Sprintf("%d of %d TryBots failed:\n%s\nConsult https://build.golang.org/ to see whether they are new failures.", |
| 982 | numFail, len(ts.builds), errMsg) |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 983 | } |
| 984 | if err := gerritClient.SetReview(ts.ChangeID, ts.Commit, gerrit.ReviewInput{ |
| 985 | Message: msg, |
| 986 | Labels: map[string]int{ |
| 987 | "TryBot-Result": score, |
| 988 | }, |
| 989 | }); err != nil { |
| 990 | log.Printf("Failed to call Gerrit: %v", err) |
| 991 | return |
| 992 | } |
| 993 | } |
| 994 | } |
| 995 | |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 996 | // builderRev is a build configuration type and a revision. |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 997 | type builderRev struct { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 998 | name string // e.g. "linux-amd64-race" |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 999 | rev string // lowercase hex core repo git hash |
| 1000 | |
| 1001 | // optional sub-repository details (both must be present) |
| 1002 | subName string // e.g. "net" |
| 1003 | subRev string // lowercase hex sub-repo git hash |
| 1004 | } |
| 1005 | |
| 1006 | func (br builderRev) isSubrepo() bool { |
| 1007 | return br.subName != "" |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 1008 | } |
| 1009 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1010 | type eventTimeLogger interface { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1011 | logEventTime(event string, optText ...string) |
| 1012 | } |
| 1013 | |
| 1014 | var ErrCanceled = errors.New("canceled") |
| 1015 | |
| 1016 | // Cancel is a channel that's closed by the caller when the request is no longer |
| 1017 | // desired. The function receiving a cancel should return ErrCanceled whenever |
| 1018 | // Cancel becomes readable. |
| 1019 | type Cancel <-chan struct{} |
| 1020 | |
| 1021 | func (c Cancel) IsCanceled() bool { |
| 1022 | select { |
| 1023 | case <-c: |
| 1024 | return true |
| 1025 | default: |
| 1026 | return false |
| 1027 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1028 | } |
| 1029 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1030 | type BuildletPool interface { |
| 1031 | // GetBuildlet returns a new buildlet client. |
| 1032 | // |
| 1033 | // The machineType is the machine type (e.g. "linux-amd64-race"). |
| 1034 | // |
| 1035 | // The rev is git hash. Implementations should not use it for |
| 1036 | // anything except for log messages or VM naming. |
| 1037 | // |
| 1038 | // Clients must Close when done with the client. |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1039 | GetBuildlet(cancel Cancel, machineType, rev string, el eventTimeLogger) (*buildlet.Client, error) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1040 | |
| 1041 | String() string // TODO(bradfitz): more status stuff |
Brad Fitzpatrick | c1d98dc | 2015-01-07 15:44:25 -0800 | [diff] [blame] | 1042 | } |
| 1043 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1044 | // GetBuildlets creates up to n buildlets and sends them on the returned channel |
| 1045 | // before closing the channel. |
| 1046 | func GetBuildlets(cancel Cancel, pool BuildletPool, n int, machineType, rev string, el eventTimeLogger) <-chan *buildlet.Client { |
| 1047 | ch := make(chan *buildlet.Client) // NOT buffered |
| 1048 | var wg sync.WaitGroup |
| 1049 | wg.Add(n) |
| 1050 | for i := 0; i < n; i++ { |
| 1051 | go func() { |
| 1052 | defer wg.Done() |
| 1053 | bc, err := pool.GetBuildlet(cancel, machineType, rev, el) |
| 1054 | if err != nil { |
| 1055 | if err != ErrCanceled { |
| 1056 | log.Printf("failed to get a %s buildlet for rev %s: %v", machineType, rev, err) |
| 1057 | } |
| 1058 | return |
| 1059 | } |
| 1060 | el.logEventTime("helper_ready") |
| 1061 | select { |
| 1062 | case ch <- bc: |
| 1063 | case <-cancel: |
| 1064 | el.logEventTime("helper_killed_before_use") |
| 1065 | bc.Close() |
| 1066 | return |
| 1067 | } |
| 1068 | }() |
| 1069 | } |
| 1070 | go func() { |
| 1071 | wg.Wait() |
| 1072 | close(ch) |
| 1073 | }() |
| 1074 | return ch |
| 1075 | } |
| 1076 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1077 | func poolForConf(conf dashboard.BuildConfig) (BuildletPool, error) { |
| 1078 | if conf.VMImage != "" { |
| 1079 | return gcePool, nil |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1080 | } |
David Crawshaw | 66c36dd | 2015-04-23 10:23:22 -0400 | [diff] [blame] | 1081 | return reversePool, nil |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1082 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1083 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1084 | func newBuild(rev builderRev) (*buildStatus, error) { |
| 1085 | // Note: can't acquire statusMu in newBuild, as this is called |
| 1086 | // from findTryWork -> newTrySet, which holds statusMu. |
| 1087 | |
| 1088 | conf, ok := dashboard.Builders[rev.name] |
| 1089 | if !ok { |
| 1090 | return nil, fmt.Errorf("unknown builder type %q", rev.name) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1091 | } |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1092 | return &buildStatus{ |
| 1093 | builderRev: rev, |
| 1094 | conf: conf, |
| 1095 | donec: make(chan struct{}), |
| 1096 | startTime: time.Now(), |
| 1097 | }, nil |
| 1098 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1099 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1100 | // start sets the st.startTime and starts the build in a new goroutine. |
| 1101 | // If it returns an error, st is not modified and a new goroutine has not |
| 1102 | // been started. |
| 1103 | // The build status's donec channel is closed on when the build is complete |
| 1104 | // in either direction. |
| 1105 | func (st *buildStatus) start() { |
| 1106 | setStatus(st.builderRev, st) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1107 | go func() { |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1108 | err := st.build() |
Brad Fitzpatrick | f3c0193 | 2015-01-15 16:29:16 -0800 | [diff] [blame] | 1109 | if err != nil { |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1110 | fmt.Fprintf(st, "\n\nError: %v\n", err) |
| 1111 | } |
Brad Fitzpatrick | ec2973a | 2015-03-04 07:30:35 -0800 | [diff] [blame] | 1112 | st.setDone(err == nil) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1113 | markDone(st.builderRev) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1114 | }() |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1115 | } |
| 1116 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1117 | func (st *buildStatus) buildletType() string { |
| 1118 | if v := st.conf.BuildletType; v != "" { |
| 1119 | return v |
Brad Fitzpatrick | ac39ba8 | 2015-05-14 13:39:58 -0700 | [diff] [blame] | 1120 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1121 | return st.conf.Name |
| 1122 | } |
| 1123 | |
| 1124 | func (st *buildStatus) buildletPool() (BuildletPool, error) { |
| 1125 | buildletType := st.buildletType() |
Brad Fitzpatrick | ac39ba8 | 2015-05-14 13:39:58 -0700 | [diff] [blame] | 1126 | bconf, ok := dashboard.Builders[buildletType] |
| 1127 | if !ok { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1128 | return nil, fmt.Errorf("invalid BuildletType %q for %q", buildletType, st.conf.Name) |
Brad Fitzpatrick | ac39ba8 | 2015-05-14 13:39:58 -0700 | [diff] [blame] | 1129 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1130 | return poolForConf(bconf) |
| 1131 | } |
| 1132 | |
| 1133 | func (st *buildStatus) expectedMakeBashDuration() time.Duration { |
| 1134 | // TODO: base this on historical measurements, instead of statically configured. |
| 1135 | // TODO: move this to dashboard/builders.go? But once we based on on historical |
| 1136 | // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably |
| 1137 | // better in this file. |
| 1138 | goos, goarch := st.conf.GOOS(), st.conf.GOARCH() |
| 1139 | |
| 1140 | if goos == "plan9" { |
| 1141 | return 2500 * time.Millisecond |
| 1142 | } |
| 1143 | if goos == "linux" { |
| 1144 | if goarch == "arm" { |
| 1145 | return 4 * time.Minute |
| 1146 | } |
| 1147 | return 1000 * time.Millisecond |
| 1148 | } |
| 1149 | if goos == "windows" { |
| 1150 | return 1000 * time.Millisecond |
| 1151 | } |
| 1152 | |
| 1153 | return 1500 * time.Millisecond |
| 1154 | } |
| 1155 | |
| 1156 | func (st *buildStatus) expectedBuildletStartDuration() time.Duration { |
| 1157 | // TODO: move this to dashboard/builders.go? But once we based on on historical |
| 1158 | // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably |
| 1159 | // better in this file. |
| 1160 | pool, _ := st.buildletPool() |
| 1161 | switch pool.(type) { |
| 1162 | case *gceBuildletPool: |
| 1163 | return time.Minute |
| 1164 | case *reverseBuildletPool: |
| 1165 | goos, arch := st.conf.GOOS(), st.conf.GOARCH() |
| 1166 | if goos == "darwin" { |
| 1167 | if arch == "arm" && arch == "arm64" { |
| 1168 | // iOS; idle or it's not. |
| 1169 | return 0 |
| 1170 | } |
| 1171 | if arch == "amd64" || arch == "386" { |
| 1172 | return 0 // TODO: remove this once we're using VMware |
| 1173 | return 1 * time.Minute // VMware boot of hermetic OS X |
| 1174 | } |
| 1175 | } |
| 1176 | if goos == "linux" && arch == "arm" { |
| 1177 | // Scaleway. Ready or not. |
| 1178 | return 0 |
| 1179 | } |
| 1180 | } |
| 1181 | return 0 |
| 1182 | } |
| 1183 | |
| 1184 | // getHelpersReadySoon waits a bit (as a function of the build |
| 1185 | // configuration) and starts getting the buildlets for test sharding |
| 1186 | // ready, such that they're ready when make.bash is done. But we don't |
| 1187 | // want to start too early, lest we waste idle resources during make.bash. |
| 1188 | func (st *buildStatus) getHelpersReadySoon() { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1189 | if st.isSubrepo() || st.conf.NumTestHelpers == 0 { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1190 | return |
| 1191 | } |
| 1192 | time.AfterFunc(st.expectedMakeBashDuration()-st.expectedBuildletStartDuration(), |
| 1193 | func() { |
| 1194 | st.logEventTime("starting_helpers") |
| 1195 | st.getHelpers() // and ignore the result. |
| 1196 | }) |
| 1197 | } |
| 1198 | |
| 1199 | // getHelpers returns a channel of buildlet test helpers, with an item |
| 1200 | // sent as they become available. The channel is closed at the end. |
| 1201 | func (st *buildStatus) getHelpers() <-chan *buildlet.Client { |
| 1202 | st.onceInitHelpers.Do(st.onceInitHelpersFunc) |
| 1203 | return st.helpers |
| 1204 | } |
| 1205 | |
| 1206 | func (st *buildStatus) onceInitHelpersFunc() { |
| 1207 | pool, _ := st.buildletPool() // won't return an error since we called it already |
| 1208 | st.helpers = GetBuildlets(st.donec, pool, st.conf.NumTestHelpers, st.buildletType(), st.rev, st) |
| 1209 | } |
| 1210 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1211 | // We should try to build from a snapshot if this is a subrepo build, we can |
| 1212 | // expect there to be a snapshot (splitmakerun), and the snapshot exists. |
| 1213 | func (st *buildStatus) useSnapshot() bool { |
| 1214 | st.mu.Lock() |
| 1215 | defer st.mu.Unlock() |
| 1216 | if st.useSnapshotMemo != nil { |
| 1217 | return *st.useSnapshotMemo |
| 1218 | } |
| 1219 | b := st.isSubrepo() && st.conf.SplitMakeRun() && st.snapshotExists() |
| 1220 | st.useSnapshotMemo = &b |
| 1221 | return b |
| 1222 | } |
| 1223 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1224 | func (st *buildStatus) build() error { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1225 | pool, err := st.buildletPool() |
Brad Fitzpatrick | f3c0193 | 2015-01-15 16:29:16 -0800 | [diff] [blame] | 1226 | if err != nil { |
| 1227 | return err |
| 1228 | } |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1229 | st.logEventTime("get_buildlet") |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1230 | bc, err := pool.GetBuildlet(nil, st.buildletType(), st.rev, st) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1231 | if err != nil { |
| 1232 | return fmt.Errorf("failed to get a buildlet: %v", err) |
| 1233 | } |
| 1234 | defer bc.Close() |
Brad Fitzpatrick | 378fb29 | 2015-06-10 13:59:42 -0700 | [diff] [blame] | 1235 | defer nukeIfBroken(bc) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 1236 | st.mu.Lock() |
| 1237 | st.bc = bc |
| 1238 | st.mu.Unlock() |
| 1239 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1240 | st.logEventTime("got_buildlet", bc.IPPort()) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1241 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1242 | if st.useSnapshot() { |
| 1243 | st.logEventTime("start_write_snapshot_tar") |
| 1244 | if err := bc.PutTarFromURL(st.snapshotURL(), "go"); err != nil { |
| 1245 | return fmt.Errorf("failed to put snapshot to buildlet: %v", err) |
| 1246 | } |
| 1247 | st.logEventTime("end_write_snapshot_tar") |
| 1248 | } else { |
| 1249 | // Write the Go source and bootstrap tool chain in parallel. |
| 1250 | var grp syncutil.Group |
| 1251 | grp.Go(st.writeGoSource) |
| 1252 | grp.Go(st.writeBootstrapToolchain) |
| 1253 | if err := grp.Err(); err != nil { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1254 | return err |
| 1255 | } |
Brad Fitzpatrick | 20d8483 | 2015-01-21 10:03:07 -0800 | [diff] [blame] | 1256 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1257 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1258 | execStartTime := time.Now() |
Brad Fitzpatrick | f8c2484 | 2015-01-16 09:54:03 -0800 | [diff] [blame] | 1259 | st.logEventTime("pre_exec") |
Andrew Gerrand | 306c6b7 | 2015-06-11 13:44:48 -0700 | [diff] [blame] | 1260 | fmt.Fprintf(st, "%s at %v", st.name, st.rev) |
| 1261 | if st.isSubrepo() { |
| 1262 | fmt.Fprintf(st, " building %v at %v", st.subName, st.subRev) |
| 1263 | } |
| 1264 | fmt.Fprint(st, "\n\n") |
Brad Fitzpatrick | f8c2484 | 2015-01-16 09:54:03 -0800 | [diff] [blame] | 1265 | |
Brad Fitzpatrick | 7d9b036 | 2015-05-27 11:51:27 -0700 | [diff] [blame] | 1266 | var remoteErr error |
| 1267 | if st.conf.SplitMakeRun() { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1268 | remoteErr, err = st.runAllSharded() |
Brad Fitzpatrick | 7d9b036 | 2015-05-27 11:51:27 -0700 | [diff] [blame] | 1269 | } else { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1270 | remoteErr, err = st.runAllLegacy() |
Andrew Gerrand | fb77488 | 2015-05-21 14:02:38 +1000 | [diff] [blame] | 1271 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1272 | doneMsg := "all tests passed" |
| 1273 | if remoteErr != nil { |
| 1274 | doneMsg = "with test failures" |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1275 | } else if err != nil { |
| 1276 | doneMsg = "comm error: " + err.Error() |
| 1277 | } |
| 1278 | if err != nil { |
| 1279 | // Return the error *before* we create the magic |
| 1280 | // "done" event. (which the try coordinator looks for) |
| 1281 | return err |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1282 | } |
| 1283 | st.logEventTime("done", doneMsg) // "done" is a magic value |
| 1284 | |
| 1285 | if devPause { |
| 1286 | st.logEventTime("DEV_MAIN_SLEEP") |
| 1287 | time.Sleep(5 * time.Minute) |
| 1288 | } |
Andrew Gerrand | fb77488 | 2015-05-21 14:02:38 +1000 | [diff] [blame] | 1289 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 1290 | if st.trySet == nil { |
| 1291 | var buildLog string |
Brad Fitzpatrick | f8c2484 | 2015-01-16 09:54:03 -0800 | [diff] [blame] | 1292 | if remoteErr != nil { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 1293 | buildLog = st.logs() |
Brad Fitzpatrick | f8c2484 | 2015-01-16 09:54:03 -0800 | [diff] [blame] | 1294 | } |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1295 | if err := recordResult(st.builderRev, remoteErr == nil, buildLog, time.Since(execStartTime)); err != nil { |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 1296 | if remoteErr != nil { |
| 1297 | return fmt.Errorf("Remote error was %q but failed to report it to the dashboard: %v", remoteErr, err) |
| 1298 | } |
| 1299 | return fmt.Errorf("Build succeeded but failed to report it to the dashboard: %v", err) |
| 1300 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1301 | } |
Brad Fitzpatrick | f8c2484 | 2015-01-16 09:54:03 -0800 | [diff] [blame] | 1302 | if remoteErr != nil { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1303 | return remoteErr |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 1304 | } |
| 1305 | return nil |
| 1306 | } |
| 1307 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1308 | // runAllSharded runs make.bash and then shards the test execution. |
| 1309 | // remoteErr and err are as described at the top of this file. |
| 1310 | func (st *buildStatus) runAllSharded() (remoteErr, err error) { |
| 1311 | st.getHelpersReadySoon() |
| 1312 | |
| 1313 | remoteErr, err = st.runMake() |
| 1314 | if err != nil { |
| 1315 | return nil, err |
| 1316 | } |
| 1317 | if remoteErr != nil { |
| 1318 | return fmt.Errorf("build failed: %v", remoteErr), nil |
| 1319 | } |
| 1320 | |
| 1321 | if err := st.doSnapshot(); err != nil { |
| 1322 | return nil, err |
| 1323 | } |
| 1324 | |
| 1325 | if st.isSubrepo() { |
| 1326 | remoteErr, err = st.runSubrepoTests() |
| 1327 | } else { |
| 1328 | remoteErr, err = st.runTests(st.getHelpers()) |
| 1329 | } |
| 1330 | if err != nil { |
| 1331 | return nil, fmt.Errorf("runTests: %v", err) |
| 1332 | } |
| 1333 | if remoteErr != nil { |
| 1334 | return fmt.Errorf("tests failed: %v", remoteErr), nil |
| 1335 | } |
| 1336 | return nil, nil |
| 1337 | } |
| 1338 | |
| 1339 | // runMake builds the tool chain. |
| 1340 | // remoteErr and err are as described at the top of this file. |
| 1341 | func (st *buildStatus) runMake() (remoteErr, err error) { |
| 1342 | // Don't do this if we're using a pre-built snapshot. |
| 1343 | if st.useSnapshot() { |
| 1344 | return nil, nil |
| 1345 | } |
| 1346 | |
| 1347 | // Build the source code. |
| 1348 | makeScript := st.conf.MakeScript() |
| 1349 | t0 := time.Now() |
| 1350 | remoteErr, err = st.bc.Exec(path.Join("go", makeScript), buildlet.ExecOpts{ |
| 1351 | Output: st, |
| 1352 | OnStartExec: func() { |
| 1353 | st.logEventTime("running_exec", makeScript) |
| 1354 | }, |
| 1355 | ExtraEnv: st.conf.Env(), |
| 1356 | Debug: true, |
| 1357 | Args: st.conf.MakeScriptArgs(), |
| 1358 | }) |
| 1359 | if err != nil { |
| 1360 | return nil, err |
| 1361 | } |
| 1362 | st.logEventTime("exec_done", fmt.Sprintf("%s in %v", makeScript, time.Since(t0))) |
| 1363 | if remoteErr != nil { |
| 1364 | return fmt.Errorf("make script failed: %v", remoteErr), nil |
| 1365 | } |
| 1366 | return nil, nil |
| 1367 | } |
| 1368 | |
| 1369 | // runAllLegacy executes all.bash (or .bat, or whatever) in the traditional way. |
| 1370 | // remoteErr and err are as described at the top of this file. |
| 1371 | // |
| 1372 | // TODO(bradfitz,adg): delete this function when all builders |
| 1373 | // can split make & run (and then delete the SplitMakeRun method) |
| 1374 | func (st *buildStatus) runAllLegacy() (remoteErr, err error) { |
| 1375 | st.logEventTime("legacy_all_path") |
| 1376 | allScript := st.conf.AllScript() |
| 1377 | t0 := time.Now() |
| 1378 | remoteErr, err = st.bc.Exec(path.Join("go", allScript), buildlet.ExecOpts{ |
| 1379 | Output: st, |
| 1380 | OnStartExec: func() { |
| 1381 | st.logEventTime("running_exec", allScript) |
| 1382 | }, |
| 1383 | ExtraEnv: st.conf.Env(), |
| 1384 | Debug: true, |
| 1385 | Args: st.conf.AllScriptArgs(), |
| 1386 | }) |
| 1387 | if err != nil { |
Brad Fitzpatrick | 522e105 | 2015-06-10 18:09:34 -0700 | [diff] [blame] | 1388 | return nil, err |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1389 | } |
| 1390 | st.logEventTime("exec_done", fmt.Sprintf("%s in %v", allScript, time.Since(t0))) |
| 1391 | if remoteErr != nil { |
| 1392 | return fmt.Errorf("all script failed: %v", remoteErr), nil |
| 1393 | } |
| 1394 | return nil, nil |
| 1395 | } |
| 1396 | |
| 1397 | func (st *buildStatus) doSnapshot() error { |
| 1398 | // If we're using a pre-built snapshot, don't make another. |
| 1399 | if st.useSnapshot() { |
| 1400 | return nil |
| 1401 | } |
| 1402 | |
| 1403 | if err := st.cleanForSnapshot(); err != nil { |
| 1404 | return fmt.Errorf("cleanForSnapshot: %v", err) |
| 1405 | } |
| 1406 | if err := st.writeSnapshot(); err != nil { |
| 1407 | return fmt.Errorf("writeSnapshot: %v", err) |
| 1408 | } |
| 1409 | return nil |
| 1410 | } |
| 1411 | |
| 1412 | func (br *builderRev) snapshotExists() bool { |
| 1413 | resp, err := http.Head(br.snapshotURL()) |
| 1414 | return err == nil && resp.StatusCode == http.StatusOK |
| 1415 | } |
| 1416 | |
| 1417 | func (st *buildStatus) writeGoSource() error { |
| 1418 | // Write the VERSION file. |
| 1419 | st.logEventTime("start_write_version_tar") |
| 1420 | if err := st.bc.PutTar(versionTgz(st.rev), "go"); err != nil { |
| 1421 | return fmt.Errorf("writing VERSION tgz: %v", err) |
| 1422 | } |
| 1423 | |
| 1424 | st.logEventTime("fetch_go_tar") |
| 1425 | tarReader, err := getSourceTgz(st, "go", st.rev) |
| 1426 | if err != nil { |
| 1427 | return err |
| 1428 | } |
| 1429 | st.logEventTime("start_write_go_tar") |
| 1430 | if err := st.bc.PutTar(tarReader, "go"); err != nil { |
| 1431 | return fmt.Errorf("writing tarball from Gerrit: %v", err) |
| 1432 | } |
| 1433 | st.logEventTime("end_write_go_tar") |
| 1434 | return nil |
| 1435 | } |
| 1436 | |
| 1437 | func (st *buildStatus) writeBootstrapToolchain() error { |
| 1438 | if st.conf.Go14URL == "" { |
| 1439 | return nil |
| 1440 | } |
| 1441 | st.logEventTime("start_write_go14_tar") |
| 1442 | if err := st.bc.PutTarFromURL(st.conf.Go14URL, "go1.4"); err != nil { |
| 1443 | return err |
| 1444 | } |
| 1445 | st.logEventTime("end_write_go14_tar") |
| 1446 | return nil |
| 1447 | } |
| 1448 | |
Andrew Gerrand | c53a577 | 2015-05-27 14:29:36 -0700 | [diff] [blame] | 1449 | var cleanForSnapshotFiles = []string{ |
| 1450 | "go/doc/gopher", |
| 1451 | "go/pkg/bootstrap", |
| 1452 | } |
| 1453 | |
| 1454 | func (st *buildStatus) cleanForSnapshot() error { |
| 1455 | st.logEventTime("clean_for_snapshot") |
| 1456 | defer st.logEventTime("clean_for_snapshot_done") |
| 1457 | |
| 1458 | return st.bc.RemoveAll(cleanForSnapshotFiles...) |
| 1459 | } |
| 1460 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1461 | // snapshotObjectName is the cloud storage object name of the |
| 1462 | // built Go tree for this builder and Go rev (not the sub-repo). |
| 1463 | // The entries inside this tarball do not begin with "go/". |
| 1464 | func (br *builderRev) snapshotObjectName() string { |
| 1465 | return fmt.Sprintf("%v/%v/%v.tar.gz", "go", br.name, br.rev) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1466 | } |
| 1467 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1468 | // snapshotURL is the absolute URL of the snapshot object (see above). |
| 1469 | func (br *builderRev) snapshotURL() string { |
| 1470 | return fmt.Sprintf("https://storage.googleapis.com/%s/%s", snapBucket(), br.snapshotObjectName()) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1471 | } |
| 1472 | |
Andrew Gerrand | c53a577 | 2015-05-27 14:29:36 -0700 | [diff] [blame] | 1473 | func (st *buildStatus) writeSnapshot() error { |
| 1474 | st.logEventTime("write_snapshot") |
| 1475 | defer st.logEventTime("write_snapshot_done") |
| 1476 | |
| 1477 | tgz, err := st.bc.GetTar("go") |
| 1478 | if err != nil { |
| 1479 | return err |
| 1480 | } |
| 1481 | defer tgz.Close() |
| 1482 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1483 | wr := storage.NewWriter(serviceCtx, snapBucket(), st.snapshotObjectName()) |
Andrew Gerrand | c53a577 | 2015-05-27 14:29:36 -0700 | [diff] [blame] | 1484 | wr.ContentType = "application/octet-stream" |
| 1485 | wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader}) |
| 1486 | if _, err := io.Copy(wr, tgz); err != nil { |
| 1487 | wr.Close() |
| 1488 | return err |
| 1489 | } |
| 1490 | |
| 1491 | return wr.Close() |
| 1492 | } |
| 1493 | |
Brad Fitzpatrick | 44de54d | 2015-05-27 20:06:32 -0700 | [diff] [blame] | 1494 | func (st *buildStatus) distTestList() (names []string, err error) { |
| 1495 | var buf bytes.Buffer |
| 1496 | remoteErr, err := st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{ |
| 1497 | Output: &buf, |
| 1498 | ExtraEnv: st.conf.Env(), |
| 1499 | OnStartExec: func() { st.logEventTime("discovering_tests") }, |
| 1500 | Path: []string{"$WORKDIR/go/bin", "$PATH"}, |
| 1501 | Args: []string{"tool", "dist", "test", "--no-rebuild", "--list"}, |
| 1502 | }) |
| 1503 | if err != nil { |
| 1504 | return nil, fmt.Errorf("Exec error: %v, %s", remoteErr, buf.Bytes()) |
| 1505 | } |
| 1506 | if remoteErr != nil { |
| 1507 | return nil, fmt.Errorf("Remote error: %v, %s", remoteErr, buf.Bytes()) |
| 1508 | } |
| 1509 | return strings.Fields(buf.String()), nil |
| 1510 | } |
| 1511 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1512 | func (st *buildStatus) newTestSet(names []string) *testSet { |
| 1513 | set := &testSet{ |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1514 | st: st, |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1515 | } |
| 1516 | for _, name := range names { |
| 1517 | set.items = append(set.items, &testItem{ |
| 1518 | set: set, |
| 1519 | name: name, |
| 1520 | duration: testDuration(name), |
| 1521 | take: make(chan token, 1), |
| 1522 | done: make(chan token), |
| 1523 | }) |
| 1524 | } |
| 1525 | return set |
| 1526 | } |
| 1527 | |
| 1528 | func partitionGoTests(tests []string) (sets [][]string) { |
| 1529 | var srcTests []string |
| 1530 | var cmdTests []string |
| 1531 | for _, name := range tests { |
| 1532 | if strings.HasPrefix(name, "go_test:cmd/") { |
| 1533 | cmdTests = append(cmdTests, name) |
| 1534 | } else if strings.HasPrefix(name, "go_test:") { |
| 1535 | srcTests = append(srcTests, name) |
| 1536 | } |
| 1537 | } |
| 1538 | sort.Strings(srcTests) |
| 1539 | sort.Strings(cmdTests) |
| 1540 | goTests := append(srcTests, cmdTests...) |
| 1541 | |
| 1542 | const sizeThres = 10 * time.Second |
| 1543 | |
| 1544 | var curSet []string |
| 1545 | var curDur time.Duration |
| 1546 | |
| 1547 | flush := func() { |
| 1548 | if len(curSet) > 0 { |
| 1549 | sets = append(sets, curSet) |
| 1550 | curSet = nil |
| 1551 | curDur = 0 |
| 1552 | } |
| 1553 | } |
| 1554 | for _, name := range goTests { |
| 1555 | d := testDuration(name) - minGoTestSpeed // subtract 'go' tool overhead |
| 1556 | if curDur+d > sizeThres { |
| 1557 | flush() // no-op if empty |
| 1558 | } |
| 1559 | curSet = append(curSet, name) |
| 1560 | curDur += d |
| 1561 | } |
| 1562 | |
| 1563 | flush() |
| 1564 | return |
| 1565 | } |
| 1566 | |
| 1567 | var minGoTestSpeed = (func() time.Duration { |
| 1568 | var min Seconds |
| 1569 | for name, secs := range fixedTestDuration { |
| 1570 | if !strings.HasPrefix(name, "go_test:") { |
| 1571 | continue |
| 1572 | } |
| 1573 | if min == 0 || secs < min { |
| 1574 | min = secs |
| 1575 | } |
| 1576 | } |
| 1577 | return min.Duration() |
| 1578 | })() |
| 1579 | |
| 1580 | type Seconds float64 |
| 1581 | |
| 1582 | func (s Seconds) Duration() time.Duration { |
| 1583 | return time.Duration(float64(s) * float64(time.Second)) |
| 1584 | } |
| 1585 | |
| 1586 | // in seconds on Linux/amd64 (once on 2015-05-28), each |
| 1587 | // by themselves. There seems to be a 0.6s+ overhead |
| 1588 | // from the go tool which goes away if they're combined. |
| 1589 | var fixedTestDuration = map[string]Seconds{ |
| 1590 | "go_test:archive/tar": 1.30, |
| 1591 | "go_test:archive/zip": 1.68, |
| 1592 | "go_test:bufio": 1.61, |
| 1593 | "go_test:bytes": 1.50, |
| 1594 | "go_test:compress/bzip2": 0.82, |
| 1595 | "go_test:compress/flate": 1.73, |
| 1596 | "go_test:compress/gzip": 0.82, |
| 1597 | "go_test:compress/lzw": 0.86, |
| 1598 | "go_test:compress/zlib": 1.78, |
| 1599 | "go_test:container/heap": 0.69, |
| 1600 | "go_test:container/list": 0.72, |
| 1601 | "go_test:container/ring": 0.64, |
| 1602 | "go_test:crypto/aes": 0.79, |
| 1603 | "go_test:crypto/cipher": 0.96, |
| 1604 | "go_test:crypto/des": 0.96, |
| 1605 | "go_test:crypto/dsa": 0.75, |
| 1606 | "go_test:crypto/ecdsa": 0.86, |
| 1607 | "go_test:crypto/elliptic": 1.06, |
| 1608 | "go_test:crypto/hmac": 0.67, |
| 1609 | "go_test:crypto/md5": 0.77, |
| 1610 | "go_test:crypto/rand": 0.89, |
| 1611 | "go_test:crypto/rc4": 0.71, |
| 1612 | "go_test:crypto/rsa": 1.17, |
| 1613 | "go_test:crypto/sha1": 0.75, |
| 1614 | "go_test:crypto/sha256": 0.68, |
| 1615 | "go_test:crypto/sha512": 0.67, |
| 1616 | "go_test:crypto/subtle": 0.56, |
| 1617 | "go_test:crypto/tls": 3.29, |
| 1618 | "go_test:crypto/x509": 2.81, |
| 1619 | "go_test:database/sql": 1.75, |
| 1620 | "go_test:database/sql/driver": 0.64, |
| 1621 | "go_test:debug/dwarf": 0.77, |
| 1622 | "go_test:debug/elf": 1.41, |
| 1623 | "go_test:debug/gosym": 1.45, |
| 1624 | "go_test:debug/macho": 0.97, |
| 1625 | "go_test:debug/pe": 0.79, |
| 1626 | "go_test:debug/plan9obj": 0.73, |
| 1627 | "go_test:encoding/ascii85": 0.64, |
| 1628 | "go_test:encoding/asn1": 1.16, |
| 1629 | "go_test:encoding/base32": 0.79, |
| 1630 | "go_test:encoding/base64": 0.82, |
| 1631 | "go_test:encoding/binary": 0.96, |
| 1632 | "go_test:encoding/csv": 0.67, |
| 1633 | "go_test:encoding/gob": 2.70, |
| 1634 | "go_test:encoding/hex": 0.66, |
| 1635 | "go_test:encoding/json": 2.20, |
| 1636 | "test:errors": 0.54, |
| 1637 | "go_test:expvar": 1.36, |
| 1638 | "go_test:flag": 0.92, |
| 1639 | "go_test:fmt": 2.02, |
| 1640 | "go_test:go/ast": 1.44, |
| 1641 | "go_test:go/build": 1.42, |
| 1642 | "go_test:go/constant": 0.92, |
| 1643 | "go_test:go/doc": 1.51, |
| 1644 | "go_test:go/format": 0.73, |
| 1645 | "go_test:go/internal/gcimporter": 1.30, |
| 1646 | "go_test:go/parser": 1.30, |
| 1647 | "go_test:go/printer": 1.61, |
| 1648 | "go_test:go/scanner": 0.89, |
| 1649 | "go_test:go/token": 0.92, |
| 1650 | "go_test:go/types": 5.24, |
| 1651 | "go_test:hash/adler32": 0.62, |
| 1652 | "go_test:hash/crc32": 0.68, |
| 1653 | "go_test:hash/crc64": 0.55, |
| 1654 | "go_test:hash/fnv": 0.66, |
| 1655 | "go_test:html": 0.74, |
| 1656 | "go_test:html/template": 1.93, |
| 1657 | "go_test:image": 1.42, |
| 1658 | "go_test:image/color": 0.77, |
| 1659 | "go_test:image/draw": 1.32, |
| 1660 | "go_test:image/gif": 1.15, |
| 1661 | "go_test:image/jpeg": 1.32, |
| 1662 | "go_test:image/png": 1.23, |
| 1663 | "go_test:index/suffixarray": 0.79, |
| 1664 | "go_test:internal/singleflight": 0.66, |
| 1665 | "go_test:io": 0.97, |
| 1666 | "go_test:io/ioutil": 0.73, |
| 1667 | "go_test:log": 0.72, |
| 1668 | "go_test:log/syslog": 2.93, |
| 1669 | "go_test:math": 1.59, |
| 1670 | "go_test:math/big": 3.75, |
| 1671 | "go_test:math/cmplx": 0.81, |
| 1672 | "go_test:math/rand": 1.15, |
| 1673 | "go_test:mime": 1.01, |
| 1674 | "go_test:mime/multipart": 1.51, |
| 1675 | "go_test:mime/quotedprintable": 0.95, |
| 1676 | "go_test:net": 6.71, |
| 1677 | "go_test:net/http": 9.41, |
| 1678 | "go_test:net/http/cgi": 2.00, |
| 1679 | "go_test:net/http/cookiejar": 1.51, |
| 1680 | "go_test:net/http/fcgi": 1.43, |
| 1681 | "go_test:net/http/httptest": 1.36, |
| 1682 | "go_test:net/http/httputil": 1.54, |
| 1683 | "go_test:net/http/internal": 0.68, |
| 1684 | "go_test:net/internal/socktest": 0.58, |
| 1685 | "go_test:net/mail": 0.92, |
| 1686 | "go_test:net/rpc": 1.95, |
| 1687 | "go_test:net/rpc/jsonrpc": 1.50, |
| 1688 | "go_test:net/smtp": 1.43, |
| 1689 | "go_test:net/textproto": 1.01, |
| 1690 | "go_test:net/url": 1.45, |
| 1691 | "go_test:os": 1.88, |
| 1692 | "go_test:os/exec": 2.13, |
| 1693 | "go_test:os/signal": 4.22, |
| 1694 | "go_test:os/user": 0.93, |
| 1695 | "go_test:path": 0.68, |
| 1696 | "go_test:path/filepath": 1.14, |
| 1697 | "go_test:reflect": 3.42, |
| 1698 | "go_test:regexp": 1.65, |
| 1699 | "go_test:regexp/syntax": 1.40, |
| 1700 | "go_test:runtime": 21.02, |
| 1701 | "go_test:runtime/debug": 0.79, |
| 1702 | "go_test:runtime/pprof": 8.01, |
| 1703 | "go_test:sort": 0.96, |
| 1704 | "go_test:strconv": 1.60, |
| 1705 | "go_test:strings": 1.51, |
| 1706 | "go_test:sync": 1.05, |
| 1707 | "go_test:sync/atomic": 1.13, |
| 1708 | "go_test:syscall": 1.69, |
| 1709 | "go_test:testing": 3.70, |
| 1710 | "go_test:testing/quick": 0.74, |
| 1711 | "go_test:text/scanner": 0.79, |
| 1712 | "go_test:text/tabwriter": 0.71, |
| 1713 | "go_test:text/template": 1.65, |
| 1714 | "go_test:text/template/parse": 1.25, |
| 1715 | "go_test:time": 4.20, |
| 1716 | "go_test:unicode": 0.68, |
| 1717 | "go_test:unicode/utf16": 0.77, |
| 1718 | "go_test:unicode/utf8": 0.71, |
| 1719 | "go_test:cmd/addr2line": 1.73, |
| 1720 | "go_test:cmd/api": 1.33, |
| 1721 | "go_test:cmd/asm/internal/asm": 1.24, |
| 1722 | "go_test:cmd/asm/internal/lex": 0.91, |
| 1723 | "go_test:cmd/compile/internal/big": 5.26, |
| 1724 | "go_test:cmd/cover": 3.32, |
| 1725 | "go_test:cmd/fix": 1.26, |
Brad Fitzpatrick | 378fb29 | 2015-06-10 13:59:42 -0700 | [diff] [blame] | 1726 | "go_test:cmd/go": 36, |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1727 | "go_test:cmd/gofmt": 1.06, |
| 1728 | "go_test:cmd/internal/goobj": 0.65, |
| 1729 | "go_test:cmd/internal/obj": 1.16, |
| 1730 | "go_test:cmd/internal/obj/x86": 1.04, |
| 1731 | "go_test:cmd/internal/rsc.io/arm/armasm": 1.92, |
| 1732 | "go_test:cmd/internal/rsc.io/x86/x86asm": 2.22, |
| 1733 | "go_test:cmd/newlink": 1.48, |
| 1734 | "go_test:cmd/nm": 1.84, |
| 1735 | "go_test:cmd/objdump": 3.60, |
| 1736 | "go_test:cmd/pack": 2.64, |
| 1737 | "go_test:cmd/pprof/internal/profile": 1.29, |
| 1738 | "runtime:cpu124": 44.78, |
| 1739 | "sync_cpu": 1.01, |
| 1740 | "cgo_stdio": 1.53, |
| 1741 | "cgo_life": 1.56, |
| 1742 | "cgo_test": 45.60, |
| 1743 | "race": 42.55, |
| 1744 | "testgodefs": 2.37, |
| 1745 | "testso": 2.72, |
| 1746 | "testcarchive": 11.11, |
| 1747 | "testcshared": 15.80, |
| 1748 | "testshared": 7.13, |
| 1749 | "testasan": 2.56, |
| 1750 | "cgo_errors": 7.03, |
| 1751 | "testsigfwd": 2.74, |
| 1752 | "doc_progs": 5.38, |
| 1753 | "wiki": 3.56, |
| 1754 | "shootout": 11.34, |
| 1755 | "bench_go1": 3.72, |
| 1756 | "test": 45, // old, but valid for a couple weeks from 2015-06-04 |
| 1757 | "test:0_5": 10, |
| 1758 | "test:1_5": 10, |
| 1759 | "test:2_5": 10, |
| 1760 | "test:3_5": 10, |
| 1761 | "test:4_5": 10, |
| 1762 | "codewalk": 2.42, |
| 1763 | "api": 7.38, |
| 1764 | } |
| 1765 | |
| 1766 | // testDuration predicts how long the dist test 'name' will take. |
| 1767 | // It's only a scheduling guess. |
| 1768 | func testDuration(name string) time.Duration { |
| 1769 | if secs, ok := fixedTestDuration[name]; ok { |
| 1770 | return secs.Duration() |
| 1771 | } |
| 1772 | return minGoTestSpeed * 2 |
| 1773 | } |
| 1774 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 1775 | func (st *buildStatus) runSubrepoTests() (remoteErr, err error) { |
| 1776 | st.logEventTime("fetching_subrepo", st.subName) |
| 1777 | |
| 1778 | workDir, err := st.bc.WorkDir() |
| 1779 | if err != nil { |
| 1780 | log.Printf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err) |
| 1781 | return |
| 1782 | } |
| 1783 | goroot := st.conf.FilePathJoin(workDir, "go") |
| 1784 | gopath := st.conf.FilePathJoin(workDir, "gopath") |
| 1785 | |
| 1786 | fetched := map[string]bool{} |
| 1787 | toFetch := []string{st.subName} |
| 1788 | |
| 1789 | // fetch checks out the provided sub-repo to the buildlet's workspace. |
| 1790 | fetch := func(repo, rev string) error { |
| 1791 | fetched[repo] = true |
| 1792 | tgz, err := getSourceTgz(st, repo, rev) |
| 1793 | if err != nil { |
| 1794 | return err |
| 1795 | } |
| 1796 | return st.bc.PutTar(tgz, "gopath/src/"+subrepoPrefix+repo) |
| 1797 | } |
| 1798 | |
| 1799 | // findDeps uses 'go list' on the checked out repo to find its |
| 1800 | // dependencies, and adds any not-yet-fetched deps to toFetched. |
| 1801 | findDeps := func(repo string) error { |
| 1802 | repoPath := subrepoPrefix + repo |
| 1803 | var buf bytes.Buffer |
| 1804 | rErr, err := st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{ |
| 1805 | Output: &buf, |
| 1806 | ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot, "GOPATH="+gopath), |
| 1807 | Path: []string{"$WORKDIR/go/bin", "$PATH"}, |
| 1808 | Args: []string{"list", "-f", `{{range .Deps}}{{printf "%v\n" .}}{{end}}`, repoPath + "/..."}, |
| 1809 | }) |
| 1810 | if err != nil { |
| 1811 | return fmt.Errorf("exec go list on buildlet: %v", err) |
| 1812 | } |
| 1813 | if rErr != nil { |
| 1814 | return fmt.Errorf("go list error on buildlet: %v\n%s", rErr, buf.Bytes()) |
| 1815 | } |
| 1816 | for _, p := range strings.Fields(buf.String()) { |
| 1817 | if !strings.HasPrefix(p, subrepoPrefix) || strings.HasPrefix(p, repoPath) { |
| 1818 | continue |
| 1819 | } |
| 1820 | repo = strings.TrimPrefix(p, subrepoPrefix) |
| 1821 | if i := strings.Index(repo, "/"); i >= 0 { |
| 1822 | repo = repo[:i] |
| 1823 | } |
| 1824 | if !fetched[repo] { |
| 1825 | toFetch = append(toFetch, repo) |
| 1826 | } |
| 1827 | } |
| 1828 | return nil |
| 1829 | } |
| 1830 | |
| 1831 | // Recursively fetch the repo and its dependencies. |
| 1832 | // Dependencies are always fetched at master, which isn't |
| 1833 | // great but the dashboard data model doesn't track |
| 1834 | // sub-repo dependencies. TODO(adg): fix this somehow?? |
| 1835 | for i := 0; i < len(toFetch); i++ { |
| 1836 | repo := toFetch[i] |
| 1837 | if fetched[repo] { |
| 1838 | continue |
| 1839 | } |
| 1840 | // Fetch the HEAD revision by default. |
| 1841 | subrepoHead.Lock() |
| 1842 | rev := subrepoHead.m[repo] |
| 1843 | subrepoHead.Unlock() |
| 1844 | if rev == "" { |
| 1845 | rev = "master" // should happen rarely; ok if it does. |
| 1846 | } |
| 1847 | // For the repo under test, choose that specific revision. |
| 1848 | if i == 0 { |
| 1849 | rev = st.subRev |
| 1850 | } |
| 1851 | if err := fetch(repo, rev); err != nil { |
| 1852 | return nil, err |
| 1853 | } |
| 1854 | if err := findDeps(repo); err != nil { |
| 1855 | return nil, err |
| 1856 | } |
| 1857 | } |
| 1858 | |
| 1859 | st.logEventTime("starting_tests", st.subName) |
| 1860 | defer st.logEventTime("tests_complete") |
| 1861 | return st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{ |
| 1862 | Output: st, |
| 1863 | ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot, "GOPATH="+gopath), |
| 1864 | Path: []string{"$WORKDIR/go/bin", "$PATH"}, |
| 1865 | Args: []string{"test", "-short", subrepoPrefix + st.subName + "/..."}, |
| 1866 | }) |
| 1867 | } |
| 1868 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1869 | // runTests is only called for builders which support a split make/run |
| 1870 | // (should be everything, at least soon). Currently (2015-05-27) iOS |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1871 | // and Android and Nacl do not. |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1872 | func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err error) { |
| 1873 | testNames, err := st.distTestList() |
| 1874 | if err != nil { |
| 1875 | return nil, fmt.Errorf("distTestList: %v", err) |
| 1876 | } |
| 1877 | set := st.newTestSet(testNames) |
| 1878 | st.logEventTime("starting_tests", fmt.Sprintf("%d tests", len(set.items))) |
| 1879 | startTime := time.Now() |
| 1880 | |
| 1881 | // We use our original buildlet to run the tests in order, to |
| 1882 | // make the streaming somewhat smooth and not incredibly |
| 1883 | // lumpy. The rest of the buildlets run the largest tests |
| 1884 | // first (critical path scheduling). |
| 1885 | go func() { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1886 | for { |
| 1887 | tis, ok := set.testsToRunInOrder() |
| 1888 | if !ok { |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1889 | select { |
| 1890 | case <-st.donec: |
| 1891 | return |
| 1892 | case <-time.After(5 * time.Second): |
| 1893 | } |
| 1894 | continue |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1895 | } |
| 1896 | goroot := "" // no need to override; main buildlet's GOROOT is baked into binaries |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1897 | st.runTestsOnBuildlet(st.bc, tis, goroot) |
| 1898 | } |
| 1899 | }() |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1900 | go func() { |
| 1901 | for helper := range helpers { |
| 1902 | go func(bc *buildlet.Client) { |
| 1903 | defer st.logEventTime("closed_helper", bc.IPPort()) |
| 1904 | defer bc.Close() |
Brad Fitzpatrick | 378fb29 | 2015-06-10 13:59:42 -0700 | [diff] [blame] | 1905 | defer nukeIfBroken(bc) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1906 | if devPause { |
| 1907 | defer time.Sleep(5 * time.Minute) |
| 1908 | defer st.logEventTime("DEV_HELPER_SLEEP", bc.IPPort()) |
| 1909 | } |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1910 | st.logEventTime("got_helper", bc.String()) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1911 | if err := bc.PutTarFromURL(st.snapshotURL(), "go"); err != nil { |
| 1912 | log.Printf("failed to extract snapshot for helper %s: %v", bc.IPPort(), err) |
| 1913 | return |
| 1914 | } |
| 1915 | workDir, err := bc.WorkDir() |
| 1916 | if err != nil { |
| 1917 | log.Printf("error discovering workdir for helper %s: %v", bc.IPPort(), err) |
| 1918 | return |
| 1919 | } |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1920 | st.logEventTime("setup_helper", bc.String()) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1921 | goroot := st.conf.FilePathJoin(workDir, "go") |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1922 | for !bc.IsBroken() { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1923 | tis, ok := set.testsToRunBiggestFirst() |
| 1924 | if !ok { |
| 1925 | st.logEventTime("biggest_tests_complete", bc.IPPort()) |
| 1926 | return |
| 1927 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1928 | st.runTestsOnBuildlet(bc, tis, goroot) |
| 1929 | } |
| 1930 | }(helper) |
| 1931 | } |
| 1932 | }() |
| 1933 | |
| 1934 | var lastBanner string |
| 1935 | var serialDuration time.Duration |
| 1936 | for _, ti := range set.items { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 1937 | AwaitDone: |
| 1938 | for { |
| 1939 | select { |
| 1940 | case <-ti.done: // wait for success |
| 1941 | break AwaitDone |
| 1942 | case <-time.After(30 * time.Second): |
| 1943 | st.logEventTime("still_waiting_on_test", ti.name) |
| 1944 | } |
| 1945 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1946 | |
| 1947 | serialDuration += ti.execDuration |
| 1948 | if len(ti.output) > 0 { |
| 1949 | banner, out := parseOutputAndBanner(ti.output) |
| 1950 | if banner != lastBanner { |
| 1951 | lastBanner = banner |
| 1952 | fmt.Fprintf(st, "\n##### %s\n", banner) |
| 1953 | } |
| 1954 | if devCluster { |
| 1955 | out = bytes.TrimSuffix(out, nl) |
| 1956 | st.Write(out) |
| 1957 | fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize) |
| 1958 | } else { |
| 1959 | st.Write(out) |
| 1960 | } |
| 1961 | } |
| 1962 | |
| 1963 | if ti.remoteErr != nil { |
| 1964 | set.cancelAll() |
| 1965 | return fmt.Errorf("dist test failed: %s: %v", ti.name, ti.remoteErr), nil |
| 1966 | } |
| 1967 | } |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 1968 | elapsed := time.Since(startTime) |
| 1969 | var msg string |
| 1970 | if st.conf.NumTestHelpers > 0 { |
| 1971 | msg = fmt.Sprintf("took %v; aggregate %v; saved %v", elapsed, serialDuration, serialDuration-elapsed) |
| 1972 | } else { |
| 1973 | msg = fmt.Sprintf("took %v", elapsed) |
| 1974 | } |
| 1975 | st.logEventTime("tests_complete", msg) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 1976 | fmt.Fprintf(st, "\nAll tests passed.\n") |
| 1977 | return nil, nil |
| 1978 | } |
| 1979 | |
| 1980 | const ( |
| 1981 | banner = "XXXBANNERXXX:" // flag passed to dist |
| 1982 | bannerPrefix = "\n" + banner // with the newline added by dist |
| 1983 | ) |
| 1984 | |
| 1985 | var bannerPrefixBytes = []byte(bannerPrefix) |
| 1986 | |
| 1987 | func parseOutputAndBanner(b []byte) (banner string, out []byte) { |
| 1988 | if bytes.HasPrefix(b, bannerPrefixBytes) { |
| 1989 | b = b[len(bannerPrefixBytes):] |
| 1990 | nl := bytes.IndexByte(b, '\n') |
| 1991 | if nl != -1 { |
| 1992 | banner = string(b[:nl]) |
| 1993 | b = b[nl+1:] |
| 1994 | } |
| 1995 | } |
| 1996 | return banner, b |
| 1997 | } |
| 1998 | |
| 1999 | // maxTestExecError is the number of test execution failures at which |
| 2000 | // we give up and stop trying and instead permanently fail the test. |
| 2001 | // Note that this is not related to whether the test failed remotely, |
| 2002 | // but whether we were unable to start or complete watching it run. |
| 2003 | // (A communication error) |
| 2004 | const maxTestExecErrors = 3 |
| 2005 | |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2006 | func execTimeout(testNames []string) time.Duration { |
| 2007 | // TODO(bradfitz): something smarter probably. |
| 2008 | return 10 * time.Minute |
| 2009 | } |
| 2010 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2011 | // runTestsOnBuildlet runs tis on bc, using the optional goroot environment variable. |
| 2012 | func (st *buildStatus) runTestsOnBuildlet(bc *buildlet.Client, tis []*testItem, goroot string) { |
| 2013 | names := make([]string, len(tis)) |
| 2014 | for i, ti := range tis { |
| 2015 | names[i] = ti.name |
| 2016 | if i > 0 && !strings.HasPrefix(ti.name, "go_test:") { |
| 2017 | panic("only go_test:* tests may be merged") |
| 2018 | } |
| 2019 | } |
| 2020 | which := fmt.Sprintf("%s: %v", bc.IPPort(), names) |
| 2021 | st.logEventTime("start_tests", which) |
| 2022 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2023 | var buf bytes.Buffer |
| 2024 | t0 := time.Now() |
| 2025 | remoteErr, err := bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{ |
| 2026 | // We set Dir to "." instead of the default ("go/bin") so when the dist tests |
| 2027 | // try to run os/exec.Command("go", "test", ...), the LookPath of "go" doesn't |
| 2028 | // return "./go.exe" (which exists in the current directory: "go/bin") and then |
| 2029 | // fail when dist tries to run the binary in dir "$GOROOT/src", since |
| 2030 | // "$GOROOT/src" + "./go.exe" doesn't exist. Perhaps LookPath should return |
| 2031 | // an absolute path. |
| 2032 | Dir: ".", |
| 2033 | Output: &buf, // see "maybe stream lines" TODO below |
| 2034 | ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot), |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2035 | Timeout: execTimeout(names), |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2036 | Path: []string{"$WORKDIR/go/bin", "$PATH"}, |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2037 | Args: append([]string{ |
| 2038 | "tool", "dist", "test", "--no-rebuild", "--banner=" + banner, |
| 2039 | }, names...), |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2040 | }) |
| 2041 | summary := "ok" |
| 2042 | if err != nil { |
| 2043 | summary = "commErr=" + err.Error() |
| 2044 | } else if remoteErr != nil { |
| 2045 | summary = "test failed remotely" |
| 2046 | } |
| 2047 | execDuration := time.Since(t0) |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2048 | st.logEventTime("end_tests", fmt.Sprintf("%s; %s (test exec = %v)", which, summary, execDuration)) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2049 | if err != nil { |
| 2050 | for _, ti := range tis { |
| 2051 | ti.numFail++ |
| 2052 | st.logf("Execution error running %s on %s: %v (numFails = %d)", ti.name, bc, err, ti.numFail) |
| 2053 | if ti.numFail >= maxTestExecErrors { |
| 2054 | msg := fmt.Sprintf("Failed to schedule %q test after %d tries.\n", ti.name, maxTestExecErrors) |
| 2055 | ti.output = []byte(msg) |
| 2056 | ti.remoteErr = errors.New(msg) |
| 2057 | close(ti.done) |
| 2058 | } else { |
| 2059 | ti.retry() |
| 2060 | } |
| 2061 | } |
| 2062 | return |
| 2063 | } |
| 2064 | |
| 2065 | out := buf.Bytes() |
| 2066 | out = bytes.Replace(out, []byte("\nALL TESTS PASSED (some were excluded)\n"), nil, 1) |
| 2067 | out = bytes.Replace(out, []byte("\nALL TESTS PASSED\n"), nil, 1) |
| 2068 | |
| 2069 | for _, ti := range tis { |
| 2070 | ti.output = out |
| 2071 | ti.remoteErr = remoteErr |
| 2072 | ti.execDuration = execDuration |
| 2073 | ti.groupSize = len(tis) |
| 2074 | ti.shardIPPort = bc.IPPort() |
| 2075 | close(ti.done) |
| 2076 | |
| 2077 | // After the first one, make the rest succeed with no output. |
| 2078 | // TODO: maybe stream lines (set Output to a line-reading |
| 2079 | // Writer instead of &buf). for now we just wait for them in |
| 2080 | // ~10 second batches. Doesn't look as smooth on the output, |
| 2081 | // though. |
| 2082 | out = nil |
| 2083 | remoteErr = nil |
| 2084 | execDuration = 0 |
| 2085 | } |
| 2086 | } |
| 2087 | |
| 2088 | type testSet struct { |
| 2089 | st *buildStatus |
| 2090 | items []*testItem |
| 2091 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2092 | mu sync.Mutex |
| 2093 | inOrder [][]*testItem |
| 2094 | biggestFirst [][]*testItem |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2095 | } |
| 2096 | |
| 2097 | // cancelAll cancels all pending tests. |
| 2098 | func (s *testSet) cancelAll() { |
| 2099 | for _, ti := range s.items { |
| 2100 | ti.tryTake() // ignore return value |
| 2101 | } |
| 2102 | } |
| 2103 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2104 | func (s *testSet) testsToRunInOrder() (chunk []*testItem, ok bool) { |
| 2105 | s.mu.Lock() |
| 2106 | defer s.mu.Unlock() |
| 2107 | if s.inOrder == nil { |
| 2108 | s.initInOrder() |
| 2109 | } |
| 2110 | return s.testsFromSlice(s.inOrder) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2111 | } |
| 2112 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2113 | func (s *testSet) testsToRunBiggestFirst() (chunk []*testItem, ok bool) { |
| 2114 | s.mu.Lock() |
| 2115 | defer s.mu.Unlock() |
| 2116 | if s.biggestFirst == nil { |
| 2117 | s.initBiggestFirst() |
| 2118 | } |
| 2119 | return s.testsFromSlice(s.biggestFirst) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2120 | } |
| 2121 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2122 | func (s *testSet) testsFromSlice(chunkList [][]*testItem) (chunk []*testItem, ok bool) { |
| 2123 | for _, candChunk := range chunkList { |
| 2124 | for _, ti := range candChunk { |
| 2125 | if ti.tryTake() { |
| 2126 | chunk = append(chunk, ti) |
| 2127 | } |
| 2128 | } |
| 2129 | if len(chunk) > 0 { |
| 2130 | return chunk, true |
| 2131 | } |
| 2132 | } |
| 2133 | return nil, false |
| 2134 | } |
| 2135 | |
| 2136 | func (s *testSet) initInOrder() { |
| 2137 | names := make([]string, len(s.items)) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2138 | namedItem := map[string]*testItem{} |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2139 | for i, ti := range s.items { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2140 | names[i] = ti.name |
| 2141 | namedItem[ti.name] = ti |
| 2142 | } |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2143 | |
| 2144 | // First do the go_test:* ones. partitionGoTests |
| 2145 | // only returns those, which are the ones we merge together. |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2146 | stdSets := partitionGoTests(names) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2147 | for _, set := range stdSets { |
| 2148 | tis := make([]*testItem, len(set)) |
| 2149 | for i, name := range set { |
| 2150 | tis[i] = namedItem[name] |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2151 | } |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2152 | s.inOrder = append(s.inOrder, tis) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2153 | } |
| 2154 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2155 | // Then do the misc tests, which are always by themselves. |
| 2156 | // (No benefit to merging them) |
| 2157 | for _, ti := range s.items { |
| 2158 | if !strings.HasPrefix(ti.name, "go_test:") { |
| 2159 | s.inOrder = append(s.inOrder, []*testItem{ti}) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2160 | } |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2161 | } |
| 2162 | } |
| 2163 | |
| 2164 | func (s *testSet) initBiggestFirst() { |
| 2165 | items := append([]*testItem(nil), s.items...) |
| 2166 | sort.Sort(sort.Reverse(byTestDuration(items))) |
| 2167 | for _, item := range items { |
| 2168 | s.biggestFirst = append(s.biggestFirst, []*testItem{item}) |
| 2169 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2170 | } |
| 2171 | |
| 2172 | type testItem struct { |
| 2173 | set *testSet |
| 2174 | name string // "go_test:sort" |
| 2175 | duration time.Duration // optional approximate size |
| 2176 | |
| 2177 | take chan token // buffered size 1: sending takes ownership of rest of fields: |
| 2178 | |
| 2179 | done chan token // closed when done; guards output & failed |
| 2180 | numFail int // how many times it's failed to execute |
| 2181 | |
| 2182 | // groupSize is the number of tests which were run together |
| 2183 | // along with this one with "go dist test". |
| 2184 | // This is 1 for non-std/cmd tests, and usually >1 for std/cmd tests. |
| 2185 | groupSize int |
| 2186 | shardIPPort string // buildlet's IPPort, for debugging |
| 2187 | |
| 2188 | // the following are only set for the first item in a group: |
| 2189 | output []byte |
| 2190 | remoteErr error // real test failure (not a communications failure) |
| 2191 | execDuration time.Duration // actual time |
| 2192 | } |
| 2193 | |
| 2194 | func (ti *testItem) tryTake() bool { |
| 2195 | select { |
| 2196 | case ti.take <- token{}: |
| 2197 | return true |
| 2198 | default: |
| 2199 | return false |
| 2200 | } |
| 2201 | } |
| 2202 | |
| 2203 | func (ti *testItem) isDone() bool { |
| 2204 | select { |
| 2205 | case <-ti.done: |
| 2206 | return true |
| 2207 | default: |
| 2208 | return false |
| 2209 | } |
| 2210 | } |
| 2211 | |
| 2212 | // retry reschedules the test to run again, if a machine died before |
| 2213 | // or during execution, so its results aren't yet known. |
| 2214 | // The caller must own the 'take' semaphore. |
| 2215 | func (ti *testItem) retry() { |
| 2216 | // release it to make it available for somebody else to try later: |
| 2217 | <-ti.take |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2218 | } |
| 2219 | |
| 2220 | type byTestDuration []*testItem |
| 2221 | |
| 2222 | func (s byTestDuration) Len() int { return len(s) } |
| 2223 | func (s byTestDuration) Less(i, j int) bool { return s[i].duration < s[j].duration } |
| 2224 | func (s byTestDuration) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| 2225 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2226 | type eventAndTime struct { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2227 | t time.Time |
| 2228 | evt string |
| 2229 | text string |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 2230 | } |
| 2231 | |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2232 | // buildStatus is the status of a build. |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 2233 | type buildStatus struct { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2234 | // Immutable: |
Brad Fitzpatrick | e428e4b | 2014-09-03 10:26:52 -0700 | [diff] [blame] | 2235 | builderRev |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 2236 | conf dashboard.BuildConfig |
| 2237 | startTime time.Time // actually time of newBuild (~same thing) |
| 2238 | trySet *trySet // or nil |
| 2239 | donec chan struct{} // closed when done |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2240 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2241 | onceInitHelpers sync.Once // guards call of onceInitHelpersFunc, to init:: |
| 2242 | helpers <-chan *buildlet.Client |
| 2243 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2244 | mu sync.Mutex // guards following |
| 2245 | failURL string // if non-empty, permanent URL of failure |
| 2246 | bc *buildlet.Client // nil initially, until pool returns one |
| 2247 | done time.Time // finished running |
| 2248 | succeeded bool // set when done |
| 2249 | output bytes.Buffer // stdout and stderr |
Brad Fitzpatrick | e8a3d43 | 2015-06-10 18:10:31 -0700 | [diff] [blame] | 2250 | startedPinging bool // started pinging the go dashboard |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2251 | events []eventAndTime |
| 2252 | watcher []*logWatcher |
| 2253 | useSnapshotMemo *bool |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2254 | } |
| 2255 | |
| 2256 | func (st *buildStatus) setDone(succeeded bool) { |
| 2257 | st.mu.Lock() |
| 2258 | defer st.mu.Unlock() |
| 2259 | st.succeeded = succeeded |
| 2260 | st.done = time.Now() |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 2261 | st.notifyWatchersLocked(true) |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 2262 | close(st.donec) |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2263 | } |
| 2264 | |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 2265 | func (st *buildStatus) isRunning() bool { |
| 2266 | st.mu.Lock() |
| 2267 | defer st.mu.Unlock() |
Brad Fitzpatrick | ec2973a | 2015-03-04 07:30:35 -0800 | [diff] [blame] | 2268 | return st.isRunningLocked() |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 2269 | } |
| 2270 | |
Brad Fitzpatrick | ec2973a | 2015-03-04 07:30:35 -0800 | [diff] [blame] | 2271 | func (st *buildStatus) isRunningLocked() bool { return st.done.IsZero() } |
| 2272 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2273 | func (st *buildStatus) logf(format string, args ...interface{}) { |
| 2274 | log.Printf("[build %s %s]: %s", st.name, st.rev, fmt.Sprintf(format, args...)) |
| 2275 | } |
| 2276 | |
| 2277 | func (st *buildStatus) logEventTime(event string, optText ...string) { |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2278 | st.mu.Lock() |
| 2279 | defer st.mu.Unlock() |
Brad Fitzpatrick | e8a3d43 | 2015-06-10 18:10:31 -0700 | [diff] [blame] | 2280 | switch event { |
| 2281 | case "creating_gce_instance", "got_machine", "got_buildlet": |
| 2282 | if !st.startedPinging { |
| 2283 | st.startedPinging = true |
| 2284 | go st.pingDashboard() |
| 2285 | } |
| 2286 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2287 | var text string |
| 2288 | if len(optText) > 0 { |
| 2289 | if len(optText) > 1 { |
| 2290 | panic("usage") |
| 2291 | } |
| 2292 | text = optText[0] |
| 2293 | } |
| 2294 | st.events = append(st.events, eventAndTime{ |
| 2295 | t: time.Now(), |
| 2296 | evt: event, |
| 2297 | text: text, |
| 2298 | }) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2299 | } |
| 2300 | |
Brad Fitzpatrick | f72e62c | 2015-01-04 21:46:23 -0800 | [diff] [blame] | 2301 | func (st *buildStatus) hasEvent(event string) bool { |
| 2302 | st.mu.Lock() |
| 2303 | defer st.mu.Unlock() |
| 2304 | for _, e := range st.events { |
| 2305 | if e.evt == event { |
| 2306 | return true |
| 2307 | } |
| 2308 | } |
| 2309 | return false |
| 2310 | } |
| 2311 | |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 2312 | // HTMLStatusLine returns the HTML to show within the <pre> block on |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2313 | // the main page's list of active builds. |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 2314 | func (st *buildStatus) HTMLStatusLine() template.HTML { |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2315 | st.mu.Lock() |
| 2316 | defer st.mu.Unlock() |
| 2317 | |
| 2318 | urlPrefix := "https://go-review.googlesource.com/#/q/" |
| 2319 | if strings.Contains(st.name, "gccgo") { |
| 2320 | urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r=" |
| 2321 | } |
| 2322 | |
| 2323 | var buf bytes.Buffer |
| 2324 | fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>", |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2325 | st.name, urlPrefix, st.rev, st.rev[:8]) |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2326 | if st.isSubrepo() { |
| 2327 | fmt.Fprintf(&buf, " (sub-repo %s rev <a href='%s%s'>%s</a>)", |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2328 | st.subName, urlPrefix, st.subRev, st.subRev[:8]) |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2329 | } |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 2330 | if ts := st.trySet; ts != nil { |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2331 | fmt.Fprintf(&buf, " (<a href='/try?commit=%v'>trybot set</a> for <a href='https://go-review.googlesource.com/#/q/%s'>%s</a>)", |
| 2332 | ts.Commit[:8], |
Brad Fitzpatrick | 421541f | 2015-02-11 22:57:39 -0800 | [diff] [blame] | 2333 | ts.ChangeID, ts.ChangeID[:8]) |
| 2334 | } |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2335 | |
| 2336 | if st.done.IsZero() { |
| 2337 | buf.WriteString(", running") |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2338 | fmt.Fprintf(&buf, "; <a href='%s'>build log</a>; %s", st.logsURLLocked(), html.EscapeString(st.bc.String())) |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2339 | } else if st.succeeded { |
| 2340 | buf.WriteString(", succeeded") |
| 2341 | } else { |
| 2342 | buf.WriteString(", failed") |
Brad Fitzpatrick | d4ea014 | 2015-06-12 10:31:58 -0700 | [diff] [blame^] | 2343 | fmt.Fprintf(&buf, "; <a href='%s'>build log</a>; %s", st.logsURLLocked(), html.EscapeString(st.bc.String())) |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2344 | } |
| 2345 | |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2346 | t := st.done |
| 2347 | if t.IsZero() { |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 2348 | t = st.startTime |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2349 | } |
| 2350 | fmt.Fprintf(&buf, ", %v ago\n", time.Since(t)) |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 2351 | st.writeEventsLocked(&buf, true) |
David Crawshaw | dd0cf9f | 2015-04-29 17:58:09 -0400 | [diff] [blame] | 2352 | return template.HTML(buf.String()) |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 2353 | } |
| 2354 | |
Brad Fitzpatrick | 777a5bf | 2015-06-09 12:17:36 -0700 | [diff] [blame] | 2355 | func (st *buildStatus) logsURLLocked() string { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2356 | host := "farmer.golang.org" |
| 2357 | if devCluster { |
| 2358 | host = externalIP |
Brad Fitzpatrick | 777a5bf | 2015-06-09 12:17:36 -0700 | [diff] [blame] | 2359 | } |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2360 | u := fmt.Sprintf("http://%v/temporarylogs?name=%s&rev=%s&st=%p", host, st.name, st.rev, st) |
| 2361 | if st.isSubrepo() { |
| 2362 | u += fmt.Sprintf("&subName=%v&subRev=%v", st.subName, st.subRev) |
| 2363 | } |
| 2364 | return u |
Brad Fitzpatrick | 36d41e9 | 2015-01-14 12:31:04 -0800 | [diff] [blame] | 2365 | } |
| 2366 | |
| 2367 | // st.mu must be held. |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2368 | func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool) { |
| 2369 | var lastT time.Time |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2370 | for i, evt := range st.events { |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2371 | lastT = evt.t |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2372 | var elapsed string |
| 2373 | if i != 0 { |
| 2374 | elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds()) |
| 2375 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2376 | e := evt.evt |
| 2377 | text := evt.text |
| 2378 | if htmlMode { |
| 2379 | if e == "running_exec" { |
Brad Fitzpatrick | 777a5bf | 2015-06-09 12:17:36 -0700 | [diff] [blame] | 2380 | e = fmt.Sprintf("<a href='%s'>%s</a>", st.logsURLLocked(), e) |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2381 | } |
| 2382 | e = "<b>" + e + "</b>" |
| 2383 | text = "<i>" + html.EscapeString(text) + "</i>" |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2384 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2385 | fmt.Fprintf(w, " %7s %v %s %s\n", elapsed, evt.t.Format(time.RFC3339), e, text) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2386 | } |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2387 | if st.isRunningLocked() { |
| 2388 | fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastT).Seconds())) |
| 2389 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2390 | |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2391 | } |
| 2392 | |
| 2393 | func (st *buildStatus) logs() string { |
| 2394 | st.mu.Lock() |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 2395 | defer st.mu.Unlock() |
| 2396 | return st.output.String() |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2397 | } |
| 2398 | |
| 2399 | func (st *buildStatus) Write(p []byte) (n int, err error) { |
| 2400 | st.mu.Lock() |
| 2401 | defer st.mu.Unlock() |
| 2402 | const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect. |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2403 | plen := len(p) |
Brad Fitzpatrick | 9e9c0a80 | 2015-01-02 15:00:54 -0800 | [diff] [blame] | 2404 | if st.output.Len()+len(p) > maxBufferSize { |
| 2405 | p = p[:maxBufferSize-st.output.Len()] |
| 2406 | } |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2407 | st.output.Write(p) // bytes.Buffer can't fail |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 2408 | st.notifyWatchersLocked(false) |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2409 | return plen, nil |
| 2410 | } |
| 2411 | |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 2412 | // logWatcher holds the state of a client watching the logs of a running build. |
| 2413 | type logWatcher struct { |
| 2414 | ch chan []byte |
| 2415 | offset int // Offset of seen logs (offset == len(buf) means "up to date") |
| 2416 | } |
| 2417 | |
| 2418 | // watchLogs returns a channel on which the build's logs is sent. |
| 2419 | // When the build is complete the channel is closed. |
| 2420 | func (st *buildStatus) watchLogs() <-chan []byte { |
| 2421 | st.mu.Lock() |
| 2422 | defer st.mu.Unlock() |
| 2423 | |
| 2424 | ch := make(chan []byte, 10) // room for a few log writes |
| 2425 | ch <- st.output.Bytes() |
Brad Fitzpatrick | ec2973a | 2015-03-04 07:30:35 -0800 | [diff] [blame] | 2426 | if !st.isRunningLocked() { |
Andrew Gerrand | 5f73aab | 2015-03-03 10:30:18 +1100 | [diff] [blame] | 2427 | close(ch) |
| 2428 | return ch |
| 2429 | } |
| 2430 | |
| 2431 | st.watcher = append(st.watcher, &logWatcher{ |
| 2432 | ch: ch, |
| 2433 | offset: st.output.Len(), |
| 2434 | }) |
| 2435 | return ch |
| 2436 | } |
| 2437 | |
| 2438 | // unregisterWatcher removes the provided channel from the list of watchers, |
| 2439 | // so that it receives no further log data. |
| 2440 | func (st *buildStatus) unregisterWatcher(ch <-chan []byte) { |
| 2441 | st.mu.Lock() |
| 2442 | defer st.mu.Unlock() |
| 2443 | |
| 2444 | for i, w := range st.watcher { |
| 2445 | if w.ch == ch { |
| 2446 | st.watcher = append(st.watcher[:i], st.watcher[i+1:]...) |
| 2447 | break |
| 2448 | } |
| 2449 | } |
| 2450 | } |
| 2451 | |
| 2452 | // notifyWatchersLocked pushes any new log data to watching clients. |
| 2453 | // If done is true it closes any watcher channels. |
| 2454 | // |
| 2455 | // NOTE: st.mu must be held. |
| 2456 | func (st *buildStatus) notifyWatchersLocked(done bool) { |
| 2457 | l := st.output.Len() |
| 2458 | for _, w := range st.watcher { |
| 2459 | if w.offset < l { |
| 2460 | select { |
| 2461 | case w.ch <- st.output.Bytes()[w.offset:]: |
| 2462 | w.offset = l |
| 2463 | default: |
| 2464 | // If the receiver isn't ready, drop the write. |
| 2465 | } |
| 2466 | } |
| 2467 | if done { |
| 2468 | close(w.ch) |
| 2469 | } |
| 2470 | } |
| 2471 | } |
| 2472 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2473 | func versionTgz(rev string) io.Reader { |
| 2474 | var buf bytes.Buffer |
| 2475 | zw := gzip.NewWriter(&buf) |
| 2476 | tw := tar.NewWriter(zw) |
| 2477 | |
Brad Fitzpatrick | 7b2f9d7 | 2015-03-27 17:45:12 +0100 | [diff] [blame] | 2478 | // Writing to a bytes.Buffer should never fail, so check |
| 2479 | // errors with an explosion: |
| 2480 | check := func(err error) { |
| 2481 | if err != nil { |
| 2482 | panic("previously assumed to never fail: " + err.Error()) |
| 2483 | } |
| 2484 | } |
| 2485 | |
Brad Fitzpatrick | 8831f36 | 2015-01-02 17:16:44 -0800 | [diff] [blame] | 2486 | contents := fmt.Sprintf("devel " + rev) |
| 2487 | check(tw.WriteHeader(&tar.Header{ |
| 2488 | Name: "VERSION", |
| 2489 | Mode: 0644, |
| 2490 | Size: int64(len(contents)), |
| 2491 | })) |
| 2492 | _, err := io.WriteString(tw, contents) |
| 2493 | check(err) |
| 2494 | check(tw.Close()) |
| 2495 | check(zw.Close()) |
| 2496 | return bytes.NewReader(buf.Bytes()) |
| 2497 | } |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2498 | |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2499 | var sourceGroup singleflight.Group |
| 2500 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2501 | var sourceCache = lru.New(40) // git rev -> []byte |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2502 | |
| 2503 | // repo is go.googlesource.com repo ("go", "net", etc) |
| 2504 | // rev is git revision. |
| 2505 | func getSourceTgz(el eventTimeLogger, repo, rev string) (tgz io.Reader, err error) { |
| 2506 | fromCache := false |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2507 | key := fmt.Sprintf("%v-%v", repo, rev) |
| 2508 | vi, err, shared := sourceGroup.Do(key, func() (interface{}, error) { |
| 2509 | if tgzBytes, ok := sourceCache.Get(key); ok { |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2510 | fromCache = true |
| 2511 | return tgzBytes, nil |
| 2512 | } |
| 2513 | |
| 2514 | for i := 0; i < 10; i++ { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2515 | el.logEventTime("fetching_source", fmt.Sprintf("%v from watcher", key)) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2516 | tgzBytes, err := getSourceTgzFromWatcher(repo, rev) |
| 2517 | if err == nil { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2518 | sourceCache.Add(key, tgzBytes) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2519 | return tgzBytes, nil |
| 2520 | } |
| 2521 | log.Printf("Error fetching source %s/%s from watcher (after %v uptime): %v", |
| 2522 | repo, rev, time.Since(processStartTime), err) |
| 2523 | // Wait for watcher to start up. Give it a minute until |
| 2524 | // we try Gerrit. |
| 2525 | time.Sleep(6 * time.Second) |
| 2526 | } |
| 2527 | |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2528 | el.logEventTime("fetching_source", fmt.Sprintf("%v from gerrit", key)) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2529 | tgzBytes, err := getSourceTgzFromGerrit(repo, rev) |
| 2530 | if err == nil { |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2531 | sourceCache.Add(key, tgzBytes) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2532 | } |
| 2533 | return tgzBytes, err |
| 2534 | }) |
| 2535 | if err != nil { |
| 2536 | return nil, err |
| 2537 | } |
Andrew Gerrand | 234725b | 2015-06-04 16:45:17 -0700 | [diff] [blame] | 2538 | el.logEventTime("got_source", fmt.Sprintf("%v cache=%v shared=%v", key, fromCache, shared)) |
Brad Fitzpatrick | 1b1e086 | 2015-06-04 18:25:50 -0700 | [diff] [blame] | 2539 | return bytes.NewReader(vi.([]byte)), nil |
| 2540 | } |
| 2541 | |
| 2542 | func getSourceTgzFromGerrit(repo, rev string) (tgz []byte, err error) { |
| 2543 | return getSourceTgzFromURL("gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz") |
| 2544 | } |
| 2545 | |
| 2546 | func getSourceTgzFromWatcher(repo, rev string) (tgz []byte, err error) { |
| 2547 | return getSourceTgzFromURL("watcher", repo, rev, "http://"+gitArchiveAddr+"/"+repo+".tar.gz?rev="+rev) |
| 2548 | } |
| 2549 | |
| 2550 | func getSourceTgzFromURL(source, repo, rev, urlStr string) (tgz []byte, err error) { |
| 2551 | res, err := http.Get(urlStr) |
| 2552 | if err != nil { |
| 2553 | return nil, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, source, err) |
| 2554 | } |
| 2555 | defer res.Body.Close() |
| 2556 | if res.StatusCode/100 != 2 { |
| 2557 | slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10)) |
| 2558 | return nil, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, source, res.Status, slurp) |
| 2559 | } |
| 2560 | const maxSize = 25 << 20 // seems unlikely; go source is 7.8MB on 2015-06-15 |
| 2561 | slurp, err := ioutil.ReadAll(io.LimitReader(res.Body, maxSize+1)) |
| 2562 | if len(slurp) > maxSize && err == nil { |
| 2563 | err = fmt.Errorf("body over %d bytes", maxSize) |
| 2564 | } |
| 2565 | if err != nil { |
| 2566 | return nil, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, source, err) |
| 2567 | } |
| 2568 | return slurp, nil |
| 2569 | } |
| 2570 | |
Brad Fitzpatrick | 378fb29 | 2015-06-10 13:59:42 -0700 | [diff] [blame] | 2571 | func nukeIfBroken(bc *buildlet.Client) { |
| 2572 | if bc.IsBroken() { |
| 2573 | // It may not have come from the reverse pool, but it's harmless if |
| 2574 | // it didn't. |
| 2575 | reversePool.nukeBuildlet(bc) |
| 2576 | } |
| 2577 | } |
| 2578 | |
Brad Fitzpatrick | 79f3fc0 | 2015-05-27 21:51:25 -0700 | [diff] [blame] | 2579 | var nl = []byte("\n") |