blob: 0932aef70bbc46e1c11ed1a94f8659b0f10a12f9 [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The updateac command updates the CONTRIBUTORS file in the Go repository.
// This binary should be run at the top of GOROOT.
// It will try to fetch and update a bunch of subrepos in your GOPATH workspace,
// whose location is determined by running go env GOPATH.
package main // import ""
import (
// TODO: automatically use Gerrit names like we do with GitHub
func main() {
flag.Usage = func() {
fmt.Fprint(os.Stderr, `Usage:
$ cd $(gotip env GOROOT)
$ updateac
all := gitAuthorEmails() // call first (it will reset CONTRIBUTORS)
c := file("CONTRIBUTORS")
errors := &bytes.Buffer{}
for _, who := range all {
// Skip exact emails that are present in CONTRIBUTORS file.
if c.Contains(&acLine{email:}) {
if !validName( {
ghUser, err := fetchGitHubInfo(who)
if err != nil || ghUser == nil {
fmt.Fprintf(errors, "Error fetching GitHub name for %s: %v\n", who.Debug(), err)
ghName := ghUser.Name
if v, ok := nameFix[ghName]; ok {
ghName = v
if ((ghName == ghUser.Login || ghName == "") && == ghUser.Login) ||
useGitHubName[ghUser.Login] {
ghName = fmt.Sprintf("GitHub User @%s (%d)", ghUser.Login, ghUser.ID)
if validName(ghName) {
log.Printf("Using GitHub name %q for %s", ghName, who.Debug()) = ghName
} else {
fmt.Fprintf(errors, "Invalid-looking name (@%s, %s) %v\n", ghUser.Login, ghUser.Name, who.Debug())
if !c.Contains(who) {
} else {
// The name exists, but with a different email. We don't update lines automatically. (TODO)
// We'll need to update "GitHub User" names when they provide a better one.
log.Printf("Add %s <%s>\n",, who.firstEmail())
err := sortACFile("CONTRIBUTORS")
if err != nil {
log.Fatalf("Error sorting CONTRIBUTORS file: %v", err)
if errors.Len() > 0 {
log.Printf("Exiting with error.")
lines := strings.SplitAfter(errors.String(), "\n")
os.Stderr.WriteString(strings.Join(lines, ""))
// validName is meant to reject most invalid names with a simple rule, and a whitelist.
func validName(name string) bool {
if valid, ok := validNames[name]; ok {
return valid
return strings.Contains(name, " ")
func fetchGitHubUserID(who *acLine) (string, error) {
org, repo := githubOrgRepo(who.firstRepo)
cacheDir, err := githubCacheDir()
if err != nil {
return "", err
cacheFile := filepath.Join(cacheDir, fmt.Sprintf("%s-%s-%s-id", org, repo, who.firstCommit))
if slurp, err := ioutil.ReadFile(cacheFile); err == nil {
return string(slurp), nil
jsonURL := fmt.Sprintf("", org, repo, who.firstCommit)
req, _ := http.NewRequest("GET", jsonURL, nil)
if token, err := ioutil.ReadFile(githubTokenFile()); err == nil {
req.Header.Set("Authorization", "token "+strings.TrimSpace(string(token)))
var jres struct {
Author struct {
ID int
res, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
defer res.Body.Close()
if res.StatusCode != 200 {
return "", fmt.Errorf("%s: %v", jsonURL, res.Status)
if err := json.NewDecoder(res.Body).Decode(&jres); err != nil {
return "", fmt.Errorf("%s: %v", jsonURL, err)
if jres.Author.ID == 0 {
return "", nil // not a registered GitHub user
os.MkdirAll(cacheDir, 0700)
ioutil.WriteFile(cacheFile, []byte(strconv.Itoa(jres.Author.ID)), 0600)
return strconv.Itoa(jres.Author.ID), nil
// GitHubInfo is a subset of the GH API info.
type GitHubInfo struct {
ID int
Name string
Login string
func fetchGitHubInfo(who *acLine) (*GitHubInfo, error) {
id, err := fetchGitHubUserID(who)
if err != nil {
return nil, err
if id == "" {
return nil, fmt.Errorf("failed to fetch GitHub user ID for %v", who)
cacheDir, err := githubCacheDir()
if err != nil {
return nil, err
cacheFile := filepath.Join(cacheDir, fmt.Sprintf("user-id-%s", id))
if slurp, err := ioutil.ReadFile(cacheFile); err == nil {
res := &GitHubInfo{}
if err := json.Unmarshal(slurp, res); err != nil {
return nil, fmt.Errorf("%s: %v", cacheFile, err)
return res, nil
jsonURL := fmt.Sprintf("", id) // undocumented but it works
req, _ := http.NewRequest("GET", jsonURL, nil)
if token, err := ioutil.ReadFile(githubTokenFile()); err == nil {
req.Header.Set("Authorization", "token "+strings.TrimSpace(string(token)))
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("%s: %v", jsonURL, res.Status)
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("%s: %v", jsonURL, err)
jres := &GitHubInfo{}
if err := json.Unmarshal(body, jres); err != nil {
return nil, fmt.Errorf("%s: %v", jsonURL, err)
if jres.ID == 0 {
return nil, fmt.Errorf("%s: malformed response", jsonURL)
os.MkdirAll(cacheDir, 0700)
ioutil.WriteFile(cacheFile, body, 0600)
return jres, nil
func githubCacheDir() (string, error) {
userCacheDir, err := os.UserCacheDir()
if err != nil {
return "", err
return filepath.Join(userCacheDir, "updatecontrib-github"), nil
func githubTokenFile() string {
return filepath.Join(os.Getenv("HOME"), ".github-updatecontrib-token")
type acFile struct {
name string
lines []*acLine
byEmail map[string]*acLine // emailNorm(email) to line
byName map[string]*acLine // nameNorm(name) to line
func (f *acFile) Contains(who *acLine) bool {
for _, email := range {
if _, ok := f.byEmail[emailNorm(email)]; ok {
return true
if != "" {
if _, ok := f.byName[nameNorm(]; ok {
return true
return false
func emailNorm(e string) string {
return strings.Replace(strings.ToLower(e), ".", "", -1)
func nameNorm(e string) string {
return strings.Replace(strings.Replace(strings.ToLower(e), ".", "", -1), ",", "", -1)
func (f *acFile) addLine(line *acLine) {
of, err := os.OpenFile(, os.O_WRONLY|os.O_APPEND, 0)
if err != nil {
if _, err := io.WriteString(of, line.String()); err != nil {
if err := of.Close(); err != nil {
func (f *acFile) recordLine(ln *acLine) {
for _, email := range {
if _, ok := f.byEmail[emailNorm(email)]; !ok {
f.byEmail[emailNorm(email)] = ln
} else {
// TODO: print for debugging, shouldn't happen
if _, ok := f.byName[nameNorm(]; !ok {
f.byName[nameNorm(] = ln
} else {
// TODO: print for debugging, shouldn't happen
f.lines = append(f.lines, ln)
type acLine struct {
name string
email []string
repos map[string]bool
firstRepo string
firstCommit string
func (w *acLine) firstEmail() string {
if len( > 0 {
return ""
func (w *acLine) String() string {
line :=
for _, email := range {
line += fmt.Sprintf(" <%s>", email)
line += "\n"
return line
func (w *acLine) Debug() string {
repos := make([]string, 0, len(w.repos))
for k := range w.repos {
k = path.Base(k)
repos = append(repos, k)
githubOrg, githubRepo := githubOrgRepo(w.firstRepo)
email := w.firstEmail()
if len( > 1 {
email = fmt.Sprint(
return fmt.Sprintf("%s <%s> %v",, email,
githubOrg, githubRepo, w.firstCommit, repos)
// Given an import path (from the forms in the repos global variable),
// returns the github org and repo.
func githubOrgRepo(repo string) (githubOrg, githubRepo string) {
switch repo {
case "go":
return "golang", "go"
case "":
return "google", "google-api-go-client"
case "":
return "GoogleCloudPlatform", "google-cloud-go"
return "golang", path.Base(repo)
var emailRx = regexp.MustCompile(`<[^>]+>`)
func file(name string) *acFile {
f, err := os.Open(name)
if err != nil {
defer f.Close()
s := bufio.NewScanner(f)
acf := &acFile{
name: name,
byName: make(map[string]*acLine),
byEmail: make(map[string]*acLine),
for s.Scan() {
t := strings.TrimSpace(s.Text())
if t == "" || t[0] == '#' {
ln := new(acLine) = strings.TrimSpace(emailRx.ReplaceAllStringFunc(t, func(email string) string {
email = strings.Trim(email, "<>") = append(, email)
return ""
if err := s.Err(); err != nil {
return acf
// repos is a list of all the repositories that are fetched (if missing),
// updated, and used to find contributors to add to the CONTRIBUTORS file.
// It includes "go", which represents the main Go repository,
// and an import path corresponding to each subrepository root.
var repos = []string{
"go", // main repo
"", // The canonical import path for gddo is on GitHub.
"", // It doesn't have an /x/ vanity import path.
func gitAuthorEmails() []*acLine {
goPath, err := goPath()
if err != nil {
var ret []*acLine
seen := map[string]map[string]bool{} // email -> repo -> true
for _, repo := range repos {
log.Printf("Processing repo: %s", repo)
dir := ""
if repo != "go" {
dir = filepath.Join(goPath, "src", repo)
if _, err := os.Stat(dir); os.IsNotExist(err) {
log.Printf("go get -d %s ...", repo)
cmd := exec.Command("go", "get", "-d", repo)
var stderr bytes.Buffer
cmd.Stderr = io.MultiWriter(os.Stderr, &stderr)
if err := cmd.Run(); err != nil && !bytes.Contains(stderr.Bytes(), []byte(" no Go files ")) {
if repo == "go" {
exec.Command("git", "checkout", "origin/master", "--", "CONTRIBUTORS").Run()
exec.Command("git", "reset").Run()
cmd := exec.Command("git", "fetch")
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
log.Fatalf("Error updating repo %q: %v, %s", repo, err, out)
cmd = exec.Command("git", "log", "--format=%ae/%h/%an", "origin/master") //, "HEAD@{5 years ago}..HEAD")
cmd.Dir = dir
cmd.Stderr = os.Stderr
out, err := cmd.StdoutPipe()
if err != nil {
if err := cmd.Start(); err != nil {
s := bufio.NewScanner(out)
for s.Scan() {
line := s.Text()
f := strings.SplitN(line, "/", 3)
email, commit, name := f[0], f[1], f[2]
if uselessCommit(commit) {
for _, phrase := range debugPeople {
if strings.Contains(line, phrase) {
log.Printf("DEBUG(%q): Repo %q, email %q, commit %s, name %q", phrase, repo, email, commit, name)
if skipEmail[email] {
if v, ok := emailFix[email]; ok {
email = v
if v, ok := nameFix[name]; ok {
name = v
if userRepos, first := seen[email]; !first {
userRepos = map[string]bool{repo: true}
seen[email] = userRepos
ret = append(ret, &acLine{
name: name,
email: []string{email},
repos: userRepos,
firstRepo: repo,
firstCommit: commit,
} else {
userRepos[repo] = true
if err := s.Err(); err != nil {
if err := cmd.Wait(); err != nil {
return ret
// goPath returns the output of running go env GOPATH.
func goPath() (string, error) {
out, err := exec.Command("go", "env", "GOPATH").Output()
if err != nil {
return "", err
goPath := string(bytes.TrimSpace(out))
if goPath == "" {
return "", fmt.Errorf("no GOPATH")
return goPath, nil
// sortACFile sorts the named file in place.
func sortACFile(path string) error {
f, err := os.Open(path)
if err != nil {
return err
sorted, err := sortAC(f)
if err != nil {
return err
err = ioutil.WriteFile(path, sorted, 0644)
return err
func sortAC(r io.Reader) ([]byte, error) {
bs := bufio.NewScanner(r)
var header []string
var lines []string
for bs.Scan() {
t := bs.Text()
lines = append(lines, t)
if t == "# Please keep the list sorted." {
header = lines
lines = nil
if err := bs.Err(); err != nil {
return nil, err
var out bytes.Buffer
c := collate.New(language.Und, collate.Loose)
for _, l := range header {
fmt.Fprintln(&out, l)
for _, l := range lines {
fmt.Fprintln(&out, l)
return out.Bytes(), nil
func uselessCommit(commit string) bool {
switch commit[:7] {
case "0d51c71":
// I (Brad) forgot to accept a CLA for typo?
return true
case "ad051cf":
// I (Brad) forgot to accept a CLA for typo? 2014.
return true
case "661ac69", "fd68af8", "b036f29":
// Motorola sent but never did CLA so it was reverted.
return true
case "a51e4cc":
// khr <>
return true
case "198c542":
// adg forgot to check CLA, before the Google CLA bot handled it?
// Load proxy variables from the environment
return true
case "2cfa4c7":
// nf (adg) forgot to check CLA, before the Google CLA bot handled it?
return true
case "834a0af":
// garyburd never checked CLA, prior to Google taking over the project (no CLA checks then)
return true
case "da10956":
// Actually useless contribution, but no CLA on file under either Owner nor Author mail:
return true
case "0b6b69c":
// googlebot approved it, but we can't find the record. Small change.
return true
return false
// Some GitHub users don't have a decent name anywhere. Force use of the "GitHub User @foo" name.
// TODO: override or warn when they add a good name.
var useGitHubName = map[string]bool{}
var skipEmail = map[string]bool{
"": true,
// Easter egg commits.
"": true,
"research!bwk": true,
"bwk": true,
// TODO(dmitshur): Use package to perform some of
// the name and email fixes, eliminating the need for current entries in nameFix and emailFix.
// nameFix is a map of name -> new name replacements to make.
// For example, "named Gopher": "Named Gopher".
var nameFix = map[string]string{
"Emmanuel T Odeke": "Emmanuel Odeke", // to prevent a duplicate "Emmanuel T Odeke <>" entry, since "Emmanuel Odeke <> <>" already exists
// emailFix is a map of email -> new email replacements to make.
// For example, "": "".
var emailFix = map[string]string{
"": "", // to prevent a duplicate "GitHub User @haya14busa (3797062) <>" entry, since "Toshiki Shima <>" already exists
"": "", // to prevent a duplicate "Stephen L <>" entry, since "Stephen Lu <>" already exists
var validNames = map[string]bool{}
var debugPeople = []string{}