Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 1 | // Copyright 2017 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Brad Fitzpatrick | 1a1ef8e | 2017-04-29 21:15:37 +0000 | [diff] [blame] | 5 | // Package godata loads the Go project's corpus of Git, Github, and |
| 6 | // Gerrit activity into memory to allow easy analysis without worrying |
| 7 | // about APIs and their pagination, quotas, and other nuisances and |
| 8 | // limitations. |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 9 | package godata |
| 10 | |
| 11 | import ( |
| 12 | "context" |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 13 | "log" |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 14 | "os" |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 15 | "os/user" |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 16 | "path/filepath" |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 17 | "runtime" |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 18 | |
| 19 | "golang.org/x/build/maintner" |
| 20 | ) |
| 21 | |
Brad Fitzpatrick | c1b987d | 2019-12-06 04:32:09 +0000 | [diff] [blame] | 22 | // Server is the Go project's production maintner log. |
| 23 | const Server = "https://maintner.golang.org/logs" |
| 24 | |
Brad Fitzpatrick | 1a1ef8e | 2017-04-29 21:15:37 +0000 | [diff] [blame] | 25 | // Get returns the Go project's corpus, containing all Git commits, |
| 26 | // Github activity, and Gerrit activity and metadata since the |
| 27 | // beginning of the project. |
| 28 | // |
Chris Broadfoot | 90850ed | 2017-07-08 09:08:42 -0700 | [diff] [blame] | 29 | // Use Corpus.Update to keep the corpus up-to-date. If you do this, you must |
| 30 | // hold the read lock if reading and updating concurrently. |
| 31 | // |
Dmitri Shuralyov | 36ea946 | 2022-03-30 18:41:57 -0400 | [diff] [blame] | 32 | // The initial call to Get will download a few gigabytes of data |
| 33 | // into a directory "golang-maintner" under your operating |
Brad Fitzpatrick | 1a1ef8e | 2017-04-29 21:15:37 +0000 | [diff] [blame] | 34 | // system's user cache directory. Subsequent calls will only download |
| 35 | // what's changed since the previous call. |
| 36 | // |
| 37 | // Even with all the data already cached on local disk, a call to Get |
Dmitri Shuralyov | 36ea946 | 2022-03-30 18:41:57 -0400 | [diff] [blame] | 38 | // takes approximately 15 seconds per gigabyte of mutation log data |
| 39 | // to load it into memory. |
Brad Fitzpatrick | 1a1ef8e | 2017-04-29 21:15:37 +0000 | [diff] [blame] | 40 | // For daemons, use Corpus.Update to incrementally update an |
| 41 | // already-loaded Corpus. |
| 42 | // |
| 43 | // The in-memory representation is about 25% larger than its on-disk |
Dmitri Shuralyov | 36ea946 | 2022-03-30 18:41:57 -0400 | [diff] [blame] | 44 | // size. In April 2022, it's under 4 GB. |
Brad Fitzpatrick | 1a1ef8e | 2017-04-29 21:15:37 +0000 | [diff] [blame] | 45 | // |
Dmitri Shuralyov | 36ea946 | 2022-03-30 18:41:57 -0400 | [diff] [blame] | 46 | // See https://pkg.go.dev/golang.org/x/build/maintner#Corpus for how |
| 47 | // to walk the data structure. |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 48 | func Get(ctx context.Context) (*maintner.Corpus, error) { |
Brad Fitzpatrick | 5dcf3c4 | 2017-05-18 18:29:05 +0000 | [diff] [blame] | 49 | targetDir := Dir() |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 50 | if err := os.MkdirAll(targetDir, 0700); err != nil { |
| 51 | return nil, err |
| 52 | } |
Brad Fitzpatrick | c1b987d | 2019-12-06 04:32:09 +0000 | [diff] [blame] | 53 | mutSrc := maintner.NewNetworkMutationSource(Server, targetDir) |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 54 | corpus := new(maintner.Corpus) |
| 55 | if err := corpus.Initialize(ctx, mutSrc); err != nil { |
Brad Fitzpatrick | b3a49f9 | 2017-03-20 19:58:04 +0000 | [diff] [blame] | 56 | return nil, err |
| 57 | } |
| 58 | return corpus, nil |
| 59 | } |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 60 | |
Brad Fitzpatrick | 5dcf3c4 | 2017-05-18 18:29:05 +0000 | [diff] [blame] | 61 | // Dir returns the directory containing the cached mutation logs. |
| 62 | func Dir() string { |
Jude Pereira | 11e039e | 2017-09-24 11:32:22 +0530 | [diff] [blame] | 63 | return filepath.Join(XdgCacheDir(), "golang-maintner") |
Brad Fitzpatrick | 5dcf3c4 | 2017-05-18 18:29:05 +0000 | [diff] [blame] | 64 | } |
| 65 | |
Jude Pereira | 11e039e | 2017-09-24 11:32:22 +0530 | [diff] [blame] | 66 | // XdgCacheDir returns the XDG Base Directory Specification cache |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 67 | // directory. |
Jude Pereira | 11e039e | 2017-09-24 11:32:22 +0530 | [diff] [blame] | 68 | func XdgCacheDir() string { |
Brad Fitzpatrick | 04f8c52 | 2017-04-29 07:39:57 +0000 | [diff] [blame] | 69 | cache := os.Getenv("XDG_CACHE_HOME") |
| 70 | if cache != "" { |
| 71 | return cache |
| 72 | } |
| 73 | home := homeDir() |
| 74 | // Not XDG but standard for OS X. |
| 75 | if runtime.GOOS == "darwin" { |
| 76 | return filepath.Join(home, "Library/Caches") |
| 77 | } |
| 78 | return filepath.Join(home, ".cache") |
| 79 | } |
| 80 | |
| 81 | func homeDir() string { |
| 82 | if runtime.GOOS == "windows" { |
| 83 | return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH") |
| 84 | } |
| 85 | home := os.Getenv("HOME") |
| 86 | if home != "" { |
| 87 | return home |
| 88 | } |
| 89 | u, err := user.Current() |
| 90 | if err != nil { |
| 91 | log.Fatalf("failed to get home directory or current user: %v", err) |
| 92 | } |
| 93 | return u.HomeDir |
| 94 | } |