| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // The updateac command updates the CONTRIBUTORS file in the Go repository. |
| // |
| // This binary should be run at the top of GOROOT. |
| // It will try to fetch and update a bunch of subrepos in your GOPATH workspace, |
| // whose location is determined by running go env GOPATH. |
| package main // import "golang.org/x/build/cmd/updatecontrib" |
| |
| import ( |
| "bufio" |
| "bytes" |
| "flag" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "log" |
| "os" |
| "os/exec" |
| "path" |
| "path/filepath" |
| "regexp" |
| "sort" |
| "strings" |
| |
| "golang.org/x/text/collate" |
| "golang.org/x/text/language" |
| ) |
| |
| // TODO: automatically use Gerrit names like we do with GitHub |
| |
| func main() { |
| log.SetFlags(0) |
| |
| flag.Usage = func() { |
| fmt.Fprint(os.Stderr, `Usage: |
| |
| $ cd $(gotip env GOROOT) |
| $ updateac |
| |
| `) |
| flag.PrintDefaults() |
| } |
| flag.Parse() |
| |
| all := gitAuthorEmails() // call first (it will reset CONTRIBUTORS) |
| c := file("CONTRIBUTORS") |
| var actions, warnings, errors bytes.Buffer |
| for _, who := range all { |
| // Skip exact emails that are present in CONTRIBUTORS file. |
| if c.Contains(&acLine{email: who.email}) { |
| continue |
| } |
| if !validName(who.name) { |
| ghUser, err := FetchGitHubInfo(who) |
| if err != nil { |
| fmt.Fprintf(&errors, "Error fetching GitHub name for %s: %v\n", who.Debug(), err) |
| continue |
| } |
| if ghUser == nil { |
| fmt.Fprintf(&warnings, "There is no GitHub user associated with %s, skipping\n", who.Debug()) |
| continue |
| } |
| if validName(ghUser.Name) { |
| // Use the GitHub name since it looks valid. |
| fmt.Fprintf(&actions, "Used GitHub name %q for %s\n", ghUser.Name, who.Debug()) |
| who.name = ghUser.Name |
| } else if (ghUser.Name == ghUser.Login || ghUser.Name == "") && who.name == ghUser.Login { |
| // Special case: if the GitHub name is the same as the GitHub username or empty, |
| // and who.name is the GitHub username, then use "GitHub User @<username> (<ID>)" form. |
| fmt.Fprintf(&actions, "Used GitHub User @%s (%d) form for %s\n", ghUser.Login, ghUser.ID, who.Debug()) |
| who.name = fmt.Sprintf("GitHub User @%s (%d)", ghUser.Login, ghUser.ID) |
| } else { |
| fmt.Fprintf(&warnings, "Found invalid-looking name %q for GitHub user @%s, skipping %v\n", ghUser.Name, ghUser.Login, who.Debug()) |
| continue |
| } |
| } |
| if !c.Contains(who) { |
| c.addLine(who) |
| fmt.Fprintf(&actions, "Added %s <%s>\n", who.name, who.firstEmail()) |
| } else { |
| // The name exists, but with a different email. We don't update lines automatically. (TODO) |
| // We'll need to update "GitHub User" names when they provide a better one. |
| } |
| } |
| if actions.Len() > 0 { |
| fmt.Println("Actions taken (relative to CONTRIBUTORS at origin/master):") |
| lines := strings.SplitAfter(actions.String(), "\n") |
| sort.Strings(lines) |
| os.Stdout.WriteString(strings.Join(lines, "")) |
| } |
| err := sortACFile("CONTRIBUTORS") |
| if err != nil { |
| log.Fatalf("Error sorting CONTRIBUTORS file: %v", err) |
| } |
| if errors.Len() > 0 { |
| log.Printf("\nExiting with errors:") |
| lines := strings.SplitAfter(errors.String(), "\n") |
| sort.Strings(lines) |
| os.Stderr.WriteString(strings.Join(lines, "")) |
| os.Exit(1) |
| } |
| if warnings.Len() > 0 { |
| log.Printf("\nExiting with warnings:") |
| lines := strings.SplitAfter(warnings.String(), "\n") |
| sort.Strings(lines) |
| os.Stderr.WriteString(strings.Join(lines, "")) |
| } |
| } |
| |
| // validName is meant to reject most invalid names with a simple rule, and a whitelist. |
| func validName(name string) bool { |
| if valid, ok := validNames[name]; ok { |
| return valid |
| } |
| return strings.Contains(name, " ") |
| } |
| |
| type acFile struct { |
| name string |
| lines []*acLine |
| byEmail map[string]*acLine // emailNorm(email) to line |
| byName map[string]*acLine // nameNorm(name) to line |
| } |
| |
| func (f *acFile) Contains(who *acLine) bool { |
| for _, email := range who.email { |
| if _, ok := f.byEmail[emailNorm(email)]; ok { |
| return true |
| } |
| } |
| if who.name != "" { |
| if _, ok := f.byName[nameNorm(who.name)]; ok { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func emailNorm(e string) string { |
| return strings.Replace(strings.ToLower(e), ".", "", -1) |
| } |
| |
| func nameNorm(e string) string { |
| return strings.Replace(strings.Replace(strings.ToLower(e), ".", "", -1), ",", "", -1) |
| } |
| |
| func (f *acFile) addLine(line *acLine) { |
| of, err := os.OpenFile(f.name, os.O_WRONLY|os.O_APPEND, 0) |
| if err != nil { |
| log.Fatal(err) |
| } |
| if _, err := io.WriteString(of, line.String()); err != nil { |
| log.Fatal(err) |
| } |
| if err := of.Close(); err != nil { |
| log.Fatal(err) |
| } |
| |
| f.recordLine(line) |
| } |
| |
| func (f *acFile) recordLine(ln *acLine) { |
| for _, email := range ln.email { |
| if _, ok := f.byEmail[emailNorm(email)]; !ok { |
| f.byEmail[emailNorm(email)] = ln |
| } else { |
| // TODO: print for debugging, shouldn't happen |
| } |
| } |
| if _, ok := f.byName[nameNorm(ln.name)]; !ok { |
| f.byName[nameNorm(ln.name)] = ln |
| } else { |
| // TODO: print for debugging, shouldn't happen |
| } |
| f.lines = append(f.lines, ln) |
| } |
| |
| type acLine struct { |
| name string |
| email []string |
| repos map[string]bool |
| firstRepo string |
| firstCommit string |
| } |
| |
| func (w *acLine) firstEmail() string { |
| if len(w.email) > 0 { |
| return w.email[0] |
| } |
| return "" |
| } |
| |
| func (w *acLine) String() string { |
| line := w.name |
| for _, email := range w.email { |
| line += fmt.Sprintf(" <%s>", email) |
| } |
| line += "\n" |
| return line |
| } |
| |
| func (w *acLine) Debug() string { |
| repos := make([]string, 0, len(w.repos)) |
| for k := range w.repos { |
| k = path.Base(k) |
| repos = append(repos, k) |
| } |
| githubOrg, githubRepo := githubOrgRepo(w.firstRepo) |
| email := w.firstEmail() |
| if len(w.email) > 1 { |
| email = fmt.Sprint(w.email) |
| } |
| sort.Strings(repos) |
| return fmt.Sprintf("%s <%s> https://github.com/%s/%s/commit/%s %v", w.name, email, |
| githubOrg, githubRepo, w.firstCommit, repos) |
| } |
| |
| var emailRx = regexp.MustCompile(`<[^>]+>`) |
| |
| func file(name string) *acFile { |
| f, err := os.Open(name) |
| if err != nil { |
| log.Fatal(err) |
| } |
| defer f.Close() |
| s := bufio.NewScanner(f) |
| acf := &acFile{ |
| name: name, |
| byName: make(map[string]*acLine), |
| byEmail: make(map[string]*acLine), |
| } |
| for s.Scan() { |
| t := strings.TrimSpace(s.Text()) |
| if t == "" || t[0] == '#' { |
| continue |
| } |
| ln := new(acLine) |
| ln.name = strings.TrimSpace(emailRx.ReplaceAllStringFunc(t, func(email string) string { |
| email = strings.Trim(email, "<>") |
| ln.email = append(ln.email, email) |
| return "" |
| })) |
| acf.recordLine(ln) |
| } |
| if err := s.Err(); err != nil { |
| log.Fatal(err) |
| } |
| return acf |
| } |
| |
| // repos is a list of all the repositories that are fetched (if missing), |
| // updated, and used to find contributors to add to the CONTRIBUTORS file. |
| // It includes "go", which represents the main Go repository, |
| // and an import path corresponding to each subrepository root. |
| // |
| // TODO(golang.org/issue/36047): Rewrite to use x/build/repos, being |
| // mindful whether each repo is expected to contribute to the main |
| // Go distribution's CONTRIBUTORS file or not. |
| var repos = []string{ |
| "go", // main repo |
| "golang.org/x/arch", |
| "golang.org/x/benchmarks", |
| "golang.org/x/blog", |
| "golang.org/x/build", |
| "golang.org/x/crypto", |
| "golang.org/x/debug", |
| "golang.org/dl", |
| "golang.org/x/exp", |
| "github.com/golang/gddo", // The canonical import path for gddo is on GitHub. |
| "golang.org/x/image", |
| "golang.org/x/lint", |
| "golang.org/x/mobile", |
| "golang.org/x/mod", |
| "golang.org/x/net", |
| "golang.org/x/oauth2", |
| "golang.org/x/perf", |
| "golang.org/x/pkgsite", |
| "golang.org/x/playground", |
| "go.googlesource.com/proposal.git", // It doesn't have an /x/ vanity import path. |
| "golang.org/x/review", |
| "golang.org/x/sync", |
| "golang.org/x/sys", |
| "golang.org/x/talks", |
| "golang.org/x/term", |
| "golang.org/x/text", |
| "golang.org/x/time", |
| "golang.org/x/tools", |
| "golang.org/x/tour", |
| "golang.org/x/vgo", |
| "golang.org/x/website", |
| "golang.org/x/xerrors", |
| } |
| |
| // githubOrgRepo takes an import path (from the forms in the repos global variable) |
| // and returns the GitHub org and repo. |
| func githubOrgRepo(repo string) (githubOrg, githubRepo string) { |
| if repo == "go" { |
| return "golang", "go" |
| } |
| return "golang", strings.TrimSuffix(path.Base(repo), ".git") |
| } |
| |
| func gitAuthorEmails() []*acLine { |
| goPath, err := goPath() |
| if err != nil { |
| log.Fatal(err) |
| } |
| var ret []*acLine |
| seen := map[string]map[string]bool{} // email -> repo -> true |
| for _, repo := range repos { |
| log.Printf("Processing repo: %s", repo) |
| dir := "" |
| if repo != "go" { |
| dir = filepath.Join(goPath, "src", repo) |
| if _, err := os.Stat(dir); os.IsNotExist(err) { |
| log.Printf("go get -d %s ...", repo) |
| cmd := exec.Command("go", "get", "-d", repo) |
| var stderr bytes.Buffer |
| cmd.Stderr = io.MultiWriter(os.Stderr, &stderr) |
| if err := cmd.Run(); err != nil && !bytes.Contains(stderr.Bytes(), []byte(" no Go files ")) { |
| log.Fatal(err) |
| } |
| } |
| } |
| cmd := exec.Command("git", "fetch") |
| cmd.Dir = dir |
| if out, err := cmd.CombinedOutput(); err != nil { |
| log.Fatalf("Error updating repo %q: %v, %s", repo, err, out) |
| } |
| if repo == "go" { |
| // Initialize CONTRIBUTORS file to latest copy from origin/master. |
| exec.Command("git", "checkout", "origin/master", "--", "CONTRIBUTORS").Run() |
| exec.Command("git", "reset").Run() |
| } |
| |
| cmd = exec.Command("git", "log", "--format=%ae/%h/%an", "origin/master") //, "HEAD@{5 years ago}..HEAD") |
| cmd.Dir = dir |
| cmd.Stderr = os.Stderr |
| out, err := cmd.StdoutPipe() |
| if err != nil { |
| log.Fatal(err) |
| } |
| if err := cmd.Start(); err != nil { |
| log.Fatal(err) |
| } |
| s := bufio.NewScanner(out) |
| for s.Scan() { |
| line := s.Text() |
| f := strings.SplitN(line, "/", 3) |
| email, commit, name := f[0], f[1], f[2] |
| if uselessCommit(commit) { |
| continue |
| } |
| for _, phrase := range debugPeople { |
| if strings.Contains(line, phrase) { |
| log.Printf("DEBUG(%q): Repo %q, email %q, commit %s, name %q", phrase, repo, email, commit, name) |
| } |
| } |
| if skipEmail[email] { |
| continue |
| } |
| if v, ok := emailFix[email]; ok { |
| email = v |
| } |
| if v, ok := nameFix[name]; ok { |
| name = v |
| } |
| if userRepos, first := seen[email]; !first { |
| userRepos = map[string]bool{repo: true} |
| seen[email] = userRepos |
| ret = append(ret, &acLine{ |
| name: name, |
| email: []string{email}, |
| repos: userRepos, |
| firstRepo: repo, |
| firstCommit: commit, |
| }) |
| } else { |
| userRepos[repo] = true |
| } |
| } |
| if err := s.Err(); err != nil { |
| log.Fatal(err) |
| } |
| if err := cmd.Wait(); err != nil { |
| log.Fatal(err) |
| } |
| } |
| log.Printf("Done processing all repos.") |
| log.Println() |
| return ret |
| } |
| |
| // goPath returns the output of running go env GOPATH. |
| func goPath() (string, error) { |
| out, err := exec.Command("go", "env", "GOPATH").Output() |
| if err != nil { |
| return "", err |
| } |
| goPath := string(bytes.TrimSpace(out)) |
| if goPath == "" { |
| return "", fmt.Errorf("no GOPATH") |
| } |
| return goPath, nil |
| } |
| |
| // sortACFile sorts the named file in place. |
| func sortACFile(path string) error { |
| f, err := os.Open(path) |
| if err != nil { |
| return err |
| } |
| sorted, err := sortAC(f) |
| f.Close() |
| if err != nil { |
| return err |
| } |
| err = ioutil.WriteFile(path, sorted, 0644) |
| return err |
| } |
| |
| func sortAC(r io.Reader) ([]byte, error) { |
| bs := bufio.NewScanner(r) |
| var header []string |
| var lines []string |
| for bs.Scan() { |
| t := bs.Text() |
| lines = append(lines, t) |
| if t == "# Please keep the list sorted." { |
| header = lines |
| lines = nil |
| continue |
| } |
| } |
| if err := bs.Err(); err != nil { |
| return nil, err |
| } |
| |
| var out bytes.Buffer |
| c := collate.New(language.Und, collate.Loose) |
| c.SortStrings(lines) |
| for _, l := range header { |
| fmt.Fprintln(&out, l) |
| } |
| for _, l := range lines { |
| fmt.Fprintln(&out, l) |
| } |
| return out.Bytes(), nil |
| } |
| |
| func uselessCommit(commit string) bool { |
| switch commit[:7] { |
| case "0d51c71": |
| // I (Brad) forgot to accept a CLA for typo? |
| // https://github.com/golang/net/commit/0d51c71 |
| return true |
| case "ad051cf": |
| // I (Brad) forgot to accept a CLA for typo? 2014. |
| // https://github.com/golang/oauth2/commit/ad051cf |
| return true |
| case "661ac69", "fd68af8", "b036f29": |
| // Motorola sent but never did CLA so it was reverted. |
| return true |
| case "a51e4cc": |
| // khr <khr@khr-glaptop.roam.corp.google.com> https://github.com/golang/go/commit/a51e4cc9ce |
| return true |
| case "198c542": |
| // adg forgot to check CLA, before the Google CLA bot handled it? |
| // Load proxy variables from the environment |
| // https://github.com/golang/gddo/pull/200 |
| return true |
| case "2cfa4c7": |
| // nf (adg) forgot to check CLA, before the Google CLA bot handled it? |
| // https://github.com/golang/gddo/pull/156 |
| return true |
| case "834a0af": |
| // garyburd never checked CLA, prior to Google taking over the project (no CLA checks then) |
| // https://github.com/golang/gddo/pull/105 |
| return true |
| case "da10956": |
| // Actually useless contribution, but no CLA on file under either Owner nor Author mail: |
| // https://code-review.googlesource.com/#/c/2930/ |
| // https://github.com/GoogleCloudPlatform/google-cloud-go/commit/da10956 |
| return true |
| case "0b6b69c": |
| // googlebot approved it, but we can't find the record. Small change. |
| // https://github.com/golang/crypto/pull/35 |
| // https://groups.google.com/a/google.com/d/msg/signcla-users/qpX9Z10YjQI/zjpEBmt_BgAJ |
| return true |
| } |
| return false |
| } |
| |
| var skipEmail = map[string]bool{ |
| "noreply-gerritcodereview@google.com": true, |
| // Easter egg commits. |
| "bwk@research.att.com": true, |
| "research!bwk": true, |
| "bwk": true, |
| } |
| |
| // TODO(dmitshur): Use golang.org/x/build/internal/gophers package to perform some of |
| // the name and email fixes, eliminating the need for current entries in nameFix and emailFix. |
| |
| // nameFix is a map of name -> new name replacements to make. |
| // For example, "named Gopher": "Named Gopher". |
| var nameFix = map[string]string{ |
| "Emmanuel T Odeke": "Emmanuel Odeke", // to prevent a duplicate "Emmanuel T Odeke <emmanuel@orijtech.com>" entry, since "Emmanuel Odeke <emm.odeke@gmail.com> <odeke@ualberta.ca>" already exists |
| "fREW Schmidt": "Frew Schmidt", // to use a normalized name capitalization, based on seeing it used on Medium and LinkedIn |
| } |
| |
| // emailFix is a map of email -> new email replacements to make. |
| // For example, "gopher+bad@example.com": "gopher@example.com". |
| var emailFix = map[string]string{ |
| "haya14busa@gmail.com": "hayabusa1419@gmail.com", // to prevent a duplicate "GitHub User @haya14busa (3797062) <haya14busa@gmail.com>" entry, since "Toshiki Shima <hayabusa1419@gmail.com>" already exists |
| "36011612+steuhs@users.noreply.github.com": "steuhs@users.noreply.github.com", // to prevent a duplicate "Stephen L <36011612+steuhs@users.noreply.github.com>" entry, since "Stephen Lu <steuhs@users.noreply.github.com>" already exists |
| } |
| |
| var validNames = map[string]bool{} |
| |
| var debugPeople = []string{} |