| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Command watcher watches the specified repository for new commits |
| // and reports them to the build dashboard. |
| package main // import "golang.org/x/tools/dashboard/watcher" |
| |
| import ( |
| "bufio" |
| "bytes" |
| "encoding/json" |
| "errors" |
| "flag" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "log" |
| "net/http" |
| "net/url" |
| "os" |
| "os/exec" |
| "path" |
| "path/filepath" |
| "runtime" |
| "sort" |
| "strings" |
| "sync" |
| "time" |
| ) |
| |
| const ( |
| goBase = "https://go.googlesource.com/" |
| watcherVersion = 3 // must match dashboard/app/build/handler.go's watcherVersion |
| origin = "origin/" |
| master = origin + "master" // name of the master branch |
| metaURL = goBase + "?b=master&format=JSON" |
| ) |
| |
| var ( |
| repoURL = flag.String("repo", goBase+"go", "Repository URL") |
| dashboard = flag.String("dash", "https://build.golang.org/", "Dashboard URL (must end in /)") |
| keyFile = flag.String("key", defaultKeyFile, "Build dashboard key file") |
| pollInterval = flag.Duration("poll", 10*time.Second, "Remote repo poll interval") |
| network = flag.Bool("network", true, "Enable network calls (disable for testing)") |
| ) |
| |
| var ( |
| defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey") |
| dashboardKey = "" |
| networkSeen = make(map[string]bool) // track known hashes for testing |
| ) |
| |
| func main() { |
| flag.Parse() |
| go pollGerritAndTickle() |
| |
| err := run() |
| fmt.Fprintln(os.Stderr, err) |
| os.Exit(1) |
| } |
| |
| // run is a little wrapper so we can use defer and return to signal |
| // errors. It should only return a non-nil error. |
| func run() error { |
| if !strings.HasSuffix(*dashboard, "/") { |
| return errors.New("dashboard URL (-dashboard) must end in /") |
| } |
| |
| if k, err := readKey(); err != nil { |
| return err |
| } else { |
| dashboardKey = k |
| } |
| |
| dir, err := ioutil.TempDir("", "watcher") |
| if err != nil { |
| return err |
| } |
| defer os.RemoveAll(dir) |
| |
| errc := make(chan error) |
| |
| go func() { |
| r, err := NewRepo(dir, *repoURL, "") |
| if err != nil { |
| errc <- err |
| return |
| } |
| errc <- r.Watch() |
| }() |
| |
| subrepos, err := subrepoList() |
| if err != nil { |
| return err |
| } |
| for _, path := range subrepos { |
| go func(path string) { |
| url := goBase + strings.TrimPrefix(path, "golang.org/x/") |
| r, err := NewRepo(dir, url, path) |
| if err != nil { |
| errc <- err |
| return |
| } |
| errc <- r.Watch() |
| }(path) |
| } |
| |
| // Must be non-nil. |
| return <-errc |
| } |
| |
| // Repo represents a repository to be watched. |
| type Repo struct { |
| root string // on-disk location of the git repo |
| path string // base import path for repo (blank for main repo) |
| commits map[string]*Commit // keyed by full commit hash (40 lowercase hex digits) |
| branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default) |
| } |
| |
| // NewRepo checks out a new instance of the Mercurial repository |
| // specified by url to a new directory inside dir. |
| // The path argument is the base import path of the repository, |
| // and should be empty for the main Go repo. |
| func NewRepo(dir, url, path string) (*Repo, error) { |
| r := &Repo{ |
| path: path, |
| root: filepath.Join(dir, filepath.Base(path)), |
| commits: make(map[string]*Commit), |
| branches: make(map[string]*Branch), |
| } |
| |
| r.logf("cloning %v", url) |
| cmd := exec.Command("git", "clone", url, r.root) |
| if out, err := cmd.CombinedOutput(); err != nil { |
| return nil, fmt.Errorf("%v\n\n%s", err, out) |
| } |
| |
| r.logf("loading commit log") |
| if err := r.update(false); err != nil { |
| return nil, err |
| } |
| |
| r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits)) |
| return r, nil |
| } |
| |
| // Watch continuously runs "git fetch" in the repo, checks for |
| // new commits, and posts any new commits to the dashboard. |
| // It only returns a non-nil error. |
| func (r *Repo) Watch() error { |
| tickler := repoTickler(r.name()) |
| for { |
| if err := r.fetch(); err != nil { |
| return err |
| } |
| if err := r.update(true); err != nil { |
| return err |
| } |
| remotes, err := r.remotes() |
| if err != nil { |
| return err |
| } |
| for _, name := range remotes { |
| b, ok := r.branches[name] |
| if !ok { |
| // skip branch; must be already merged |
| continue |
| } |
| if err := r.postNewCommits(b); err != nil { |
| return err |
| } |
| } |
| // We still run a timer but a very slow one, just |
| // in case the mechanism updating the repo tickler |
| // breaks for some reason. |
| timer := time.NewTimer(5 * time.Minute) |
| select { |
| case <-tickler: |
| timer.Stop() |
| case <-timer.C: |
| } |
| } |
| } |
| |
| func (r *Repo) name() string { |
| if r.path == "" { |
| return "go" |
| } |
| return path.Base(r.path) |
| } |
| |
| func (r *Repo) logf(format string, args ...interface{}) { |
| log.Printf(r.name()+": "+format, args...) |
| } |
| |
| // postNewCommits looks for unseen commits on the specified branch and |
| // posts them to the dashboard. |
| func (r *Repo) postNewCommits(b *Branch) error { |
| if b.Head == b.LastSeen { |
| return nil |
| } |
| c := b.LastSeen |
| if c == nil { |
| // Haven't seen anything on this branch yet: |
| if b.Name == master { |
| // For the master branch, bootstrap by creating a dummy |
| // commit with a lone child that is the initial commit. |
| c = &Commit{} |
| for _, c2 := range r.commits { |
| if c2.Parent == "" { |
| c.children = []*Commit{c2} |
| break |
| } |
| } |
| if c.children == nil { |
| return fmt.Errorf("couldn't find initial commit") |
| } |
| } else { |
| // Find the commit that this branch forked from. |
| base, err := r.mergeBase(b.Name, master) |
| if err != nil { |
| return err |
| } |
| var ok bool |
| c, ok = r.commits[base] |
| if !ok { |
| return fmt.Errorf("couldn't find base commit: %v", base) |
| } |
| } |
| } |
| if err := r.postChildren(b, c); err != nil { |
| return err |
| } |
| b.LastSeen = b.Head |
| return nil |
| } |
| |
| // postChildren posts to the dashboard all descendants of the given parent. |
| // It ignores descendants that are not on the given branch. |
| func (r *Repo) postChildren(b *Branch, parent *Commit) error { |
| for _, c := range parent.children { |
| if c.Branch != b.Name { |
| continue |
| } |
| if err := r.postCommit(c); err != nil { |
| return err |
| } |
| } |
| for _, c := range parent.children { |
| if err := r.postChildren(b, c); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // postCommit sends a commit to the build dashboard. |
| func (r *Repo) postCommit(c *Commit) error { |
| r.logf("sending commit to dashboard: %v", c) |
| |
| t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date) |
| if err != nil { |
| return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err) |
| } |
| dc := struct { |
| PackagePath string // (empty for main repo commits) |
| Hash string |
| ParentHash string |
| |
| User string |
| Desc string |
| Time time.Time |
| Branch string |
| |
| NeedsBenchmarking bool |
| }{ |
| PackagePath: r.path, |
| Hash: c.Hash, |
| ParentHash: c.Parent, |
| |
| User: c.Author, |
| Desc: c.Desc, |
| Time: t, |
| Branch: strings.TrimPrefix(c.Branch, origin), |
| |
| NeedsBenchmarking: c.NeedsBenchmarking(), |
| } |
| b, err := json.Marshal(dc) |
| if err != nil { |
| return fmt.Errorf("postCommit: marshaling request body: %v", err) |
| } |
| |
| if !*network { |
| if c.Parent != "" { |
| if !networkSeen[c.Parent] { |
| r.logf("%v: %v", c.Parent, r.commits[c.Parent]) |
| return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c) |
| } |
| } |
| if networkSeen[c.Hash] { |
| return fmt.Errorf("postCommit: already seen %v", c) |
| } |
| networkSeen[c.Hash] = true |
| return nil |
| } |
| |
| u := fmt.Sprintf("%vcommit?version=%v&key=%v", *dashboard, watcherVersion, dashboardKey) |
| resp, err := http.Post(u, "text/json", bytes.NewReader(b)) |
| if err != nil { |
| return err |
| } |
| defer resp.Body.Close() |
| if resp.StatusCode != 200 { |
| return fmt.Errorf("postCommit: status: %v", resp.Status) |
| } |
| |
| var s struct { |
| Error string |
| } |
| err = json.NewDecoder(resp.Body).Decode(&s) |
| if err != nil { |
| return fmt.Errorf("postCommit: decoding response: %v", err) |
| } |
| if s.Error != "" { |
| return fmt.Errorf("postCommit: error: %v", s.Error) |
| } |
| return nil |
| } |
| |
| // update looks for new commits and branches, |
| // and updates the commits and branches maps. |
| func (r *Repo) update(noisy bool) error { |
| remotes, err := r.remotes() |
| if err != nil { |
| return err |
| } |
| for _, name := range remotes { |
| b := r.branches[name] |
| |
| // Find all unseen commits on this branch. |
| revspec := name |
| if b != nil { |
| // If we know about this branch, |
| // only log commits down to the known head. |
| revspec = b.Head.Hash + ".." + name |
| } else if revspec != master { |
| // If this is an unknown non-master branch, |
| // log up to where it forked from master. |
| base, err := r.mergeBase(name, master) |
| if err != nil { |
| return err |
| } |
| revspec = base + ".." + name |
| } |
| log, err := r.log("--topo-order", revspec) |
| if err != nil { |
| return err |
| } |
| if len(log) == 0 { |
| // No commits to handle; carry on. |
| continue |
| } |
| |
| // Add unknown commits to r.commits. |
| var added []*Commit |
| for _, c := range log { |
| // Sanity check: we shouldn't see the same commit twice. |
| if _, ok := r.commits[c.Hash]; ok { |
| return fmt.Errorf("found commit we already knew about: %v", c.Hash) |
| } |
| if noisy { |
| r.logf("found new commit %v", c) |
| } |
| c.Branch = name |
| r.commits[c.Hash] = c |
| added = append(added, c) |
| } |
| |
| // Link added commits. |
| for _, c := range added { |
| if c.Parent == "" { |
| // This is the initial commit; no parent. |
| r.logf("no parents for initial commit %v", c) |
| continue |
| } |
| // Find parent commit. |
| p, ok := r.commits[c.Parent] |
| if !ok { |
| return fmt.Errorf("can't find parent %q for %v", c.Parent, c) |
| } |
| // Link parent Commit. |
| c.parent = p |
| // Link child Commits. |
| p.children = append(p.children, c) |
| } |
| |
| // Update branch head, or add newly discovered branch. |
| head := log[0] |
| if b != nil { |
| // Known branch; update head. |
| b.Head = head |
| r.logf("updated branch head: %v", b) |
| } else { |
| // It's a new branch; add it. |
| seen, err := r.lastSeen(head.Hash) |
| if err != nil { |
| return err |
| } |
| b = &Branch{Name: name, Head: head, LastSeen: seen} |
| r.branches[name] = b |
| r.logf("found branch: %v", b) |
| } |
| } |
| |
| return nil |
| } |
| |
| // lastSeen finds the most recent commit the dashboard has seen, |
| // starting at the specified head. If the dashboard hasn't seen |
| // any of the commits from head to the beginning, it returns nil. |
| func (r *Repo) lastSeen(head string) (*Commit, error) { |
| h, ok := r.commits[head] |
| if !ok { |
| return nil, fmt.Errorf("lastSeen: can't find %q in commits", head) |
| } |
| |
| var s []*Commit |
| for c := h; c != nil; c = c.parent { |
| s = append(s, c) |
| } |
| |
| var err error |
| i := sort.Search(len(s), func(i int) bool { |
| if err != nil { |
| return false |
| } |
| ok, err = r.dashSeen(s[i].Hash) |
| return ok |
| }) |
| switch { |
| case err != nil: |
| return nil, fmt.Errorf("lastSeen: %v", err) |
| case i < len(s): |
| return s[i], nil |
| default: |
| // Dashboard saw no commits. |
| return nil, nil |
| } |
| } |
| |
| // dashSeen reports whether the build dashboard knows the specified commit. |
| func (r *Repo) dashSeen(hash string) (bool, error) { |
| if !*network { |
| return networkSeen[hash], nil |
| } |
| v := url.Values{"hash": {hash}, "packagePath": {r.path}} |
| u := *dashboard + "commit?" + v.Encode() |
| resp, err := http.Get(u) |
| if err != nil { |
| return false, err |
| } |
| defer resp.Body.Close() |
| if resp.StatusCode != 200 { |
| return false, fmt.Errorf("status: %v", resp.Status) |
| } |
| var s struct { |
| Error string |
| } |
| err = json.NewDecoder(resp.Body).Decode(&s) |
| if err != nil { |
| return false, err |
| } |
| switch s.Error { |
| case "": |
| // Found one. |
| return true, nil |
| case "Commit not found": |
| // Commit not found, keep looking for earlier commits. |
| return false, nil |
| default: |
| return false, fmt.Errorf("dashboard: %v", s.Error) |
| } |
| } |
| |
| // mergeBase returns the hash of the merge base for revspecs a and b. |
| func (r *Repo) mergeBase(a, b string) (string, error) { |
| cmd := exec.Command("git", "merge-base", a, b) |
| cmd.Dir = r.root |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return "", fmt.Errorf("git merge-base: %v", err) |
| } |
| return string(bytes.TrimSpace(out)), nil |
| } |
| |
| // remotes returns a slice of remote branches known to the git repo. |
| // It always puts "origin/master" first. |
| func (r *Repo) remotes() ([]string, error) { |
| cmd := exec.Command("git", "branch", "-r") |
| cmd.Dir = r.root |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("git branch: %v", err) |
| } |
| bs := []string{master} |
| for _, b := range strings.Split(string(out), "\n") { |
| b = strings.TrimSpace(b) |
| // Ignore aliases, blank lines, and master (it's already in bs). |
| if b == "" || strings.Contains(b, "->") || b == master { |
| continue |
| } |
| // Ignore pre-go1 release branches; they are just noise. |
| if strings.HasPrefix(b, origin+"release-branch.r") { |
| continue |
| } |
| bs = append(bs, b) |
| } |
| return bs, nil |
| } |
| |
| const logFormat = `--format=format:%H |
| %P |
| %an <%ae> |
| %cD |
| %B |
| ` + logBoundary |
| |
| const logBoundary = `_-_- magic boundary -_-_` |
| |
| // log runs "git log" with the supplied arguments |
| // and parses the output into Commit values. |
| func (r *Repo) log(dir string, args ...string) ([]*Commit, error) { |
| args = append([]string{"log", "--date=rfc", logFormat}, args...) |
| cmd := exec.Command("git", args...) |
| cmd.Dir = r.root |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("git log %v: %v", strings.Join(args, " "), err) |
| } |
| |
| // We have a commit with description that contains 0x1b byte. |
| // Mercurial does not escape it, but xml.Unmarshal does not accept it. |
| // TODO(adg): do we still need to scrub this? Probably. |
| out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1) |
| |
| var cs []*Commit |
| for _, text := range strings.Split(string(out), logBoundary) { |
| text = strings.TrimSpace(text) |
| if text == "" { |
| continue |
| } |
| p := strings.SplitN(text, "\n", 5) |
| if len(p) != 5 { |
| return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text) |
| } |
| cs = append(cs, &Commit{ |
| Hash: p[0], |
| // TODO(adg): This may break with branch merges. |
| Parent: strings.Split(p[1], " ")[0], |
| Author: p[2], |
| Date: p[3], |
| Desc: strings.TrimSpace(p[4]), |
| // TODO(adg): populate Files |
| }) |
| } |
| return cs, nil |
| } |
| |
| // fetch runs "git fetch" in the repository root. |
| // It tries three times, just in case it failed because of a transient error. |
| func (r *Repo) fetch() error { |
| var err error |
| for tries := 0; tries < 3; tries++ { |
| time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off. |
| cmd := exec.Command("git", "fetch", "--all") |
| cmd.Dir = r.root |
| if out, e := cmd.CombinedOutput(); err != nil { |
| e = fmt.Errorf("%v\n\n%s", e, out) |
| log.Printf("git fetch error %v: %v", r.root, e) |
| if err == nil { |
| err = e |
| } |
| continue |
| } |
| return nil |
| } |
| return err |
| } |
| |
| // Branch represents a Mercurial branch. |
| type Branch struct { |
| Name string |
| Head *Commit |
| LastSeen *Commit // the last commit posted to the dashboard |
| } |
| |
| func (b *Branch) String() string { |
| return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen) |
| } |
| |
| // Commit represents a single Git commit. |
| type Commit struct { |
| Hash string |
| Author string |
| Date string // Format: "Mon, 2 Jan 2006 15:04:05 -0700" |
| Desc string // Plain text, first line is a short description. |
| Parent string |
| Branch string |
| Files string |
| |
| // For walking the graph. |
| parent *Commit |
| children []*Commit |
| } |
| |
| func (c *Commit) String() string { |
| s := c.Hash |
| if c.Branch != "" { |
| s += fmt.Sprintf("[%v]", strings.TrimPrefix(c.Branch, origin)) |
| } |
| s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0]) |
| return s |
| } |
| |
| // NeedsBenchmarking reports whether the Commit needs benchmarking. |
| func (c *Commit) NeedsBenchmarking() bool { |
| // Do not benchmark branch commits, they are usually not interesting |
| // and fall out of the trunk succession. |
| if c.Branch != master { |
| return false |
| } |
| // Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS). |
| for _, f := range strings.Split(c.Files, " ") { |
| if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) && |
| !strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func homeDir() string { |
| switch runtime.GOOS { |
| case "plan9": |
| return os.Getenv("home") |
| case "windows": |
| return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH") |
| } |
| return os.Getenv("HOME") |
| } |
| |
| func readKey() (string, error) { |
| c, err := ioutil.ReadFile(*keyFile) |
| if err != nil { |
| return "", err |
| } |
| return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil |
| } |
| |
| // subrepoList fetches a list of sub-repositories from the dashboard |
| // and returns them as a slice of base import paths. |
| // Eg, []string{"golang.org/x/tools", "golang.org/x/net"}. |
| func subrepoList() ([]string, error) { |
| if !*network { |
| return nil, nil |
| } |
| |
| r, err := http.Get(*dashboard + "packages?kind=subrepo") |
| if err != nil { |
| return nil, fmt.Errorf("subrepo list: %v", err) |
| } |
| defer r.Body.Close() |
| if r.StatusCode != 200 { |
| return nil, fmt.Errorf("subrepo list: got status %v", r.Status) |
| } |
| var resp struct { |
| Response []struct { |
| Path string |
| } |
| Error string |
| } |
| err = json.NewDecoder(r.Body).Decode(&resp) |
| if err != nil { |
| return nil, fmt.Errorf("subrepo list: %v", err) |
| } |
| if resp.Error != "" { |
| return nil, fmt.Errorf("subrepo list: %v", resp.Error) |
| } |
| var pkgs []string |
| for _, r := range resp.Response { |
| pkgs = append(pkgs, r.Path) |
| } |
| return pkgs, nil |
| } |
| |
| var ( |
| ticklerMu sync.Mutex |
| ticklers = make(map[string]chan bool) |
| ) |
| |
| // repo is the gerrit repo: e.g. "go", "net", "crypto", ... |
| func repoTickler(repo string) chan bool { |
| ticklerMu.Lock() |
| defer ticklerMu.Unlock() |
| if c, ok := ticklers[repo]; ok { |
| return c |
| } |
| c := make(chan bool, 1) |
| ticklers[repo] = c |
| return c |
| } |
| |
| // pollGerritAndTickle polls Gerrit's JSON meta URL of all its URLs |
| // and their current branch heads. When this sees that one has |
| // changed, it tickles the channel for that repo and wakes up its |
| // poller, if its poller is in a sleep. |
| func pollGerritAndTickle() { |
| last := map[string]string{} // repo -> last seen hash |
| for { |
| for repo, hash := range gerritMetaMap() { |
| if hash != last[repo] { |
| last[repo] = hash |
| select { |
| case repoTickler(repo) <- true: |
| log.Printf("tickled the %s repo poller", repo) |
| default: |
| } |
| } |
| } |
| time.Sleep(*pollInterval) |
| } |
| } |
| |
| // gerritMetaMap returns the map from repo name (e.g. "go") to its |
| // latest master hash. |
| // The returned map is nil on any transient error. |
| func gerritMetaMap() map[string]string { |
| res, err := http.Get(metaURL) |
| if err != nil { |
| return nil |
| } |
| defer res.Body.Close() |
| defer io.Copy(ioutil.Discard, res.Body) // ensure EOF for keep-alive |
| if res.StatusCode != 200 { |
| return nil |
| } |
| var meta map[string]struct { |
| Branches map[string]string |
| } |
| br := bufio.NewReader(res.Body) |
| // For security reasons or something, this URL starts with ")]}'\n" before |
| // the JSON object. So ignore that. |
| // Shawn Pearce says it's guaranteed to always be just one line, ending in '\n'. |
| for { |
| b, err := br.ReadByte() |
| if err != nil { |
| return nil |
| } |
| if b == '\n' { |
| break |
| } |
| } |
| if err := json.NewDecoder(br).Decode(&meta); err != nil { |
| log.Printf("JSON decoding error from %v: %s", metaURL, err) |
| return nil |
| } |
| m := map[string]string{} |
| for repo, v := range meta { |
| if master, ok := v.Branches["master"]; ok { |
| m[repo] = master |
| } |
| } |
| return m |
| } |