blob: 55bb334e4f6ade93536c1f548e8666a944b1a781 [file] [log] [blame]
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +00001// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Brad Fitzpatrick1a1ef8e2017-04-29 21:15:37 +00005// Package godata loads the Go project's corpus of Git, Github, and
6// Gerrit activity into memory to allow easy analysis without worrying
7// about APIs and their pagination, quotas, and other nuisances and
8// limitations.
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +00009package godata
10
11import (
12 "context"
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000013 "log"
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +000014 "os"
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000015 "os/user"
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +000016 "path/filepath"
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000017 "runtime"
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +000018
19 "golang.org/x/build/maintner"
20)
21
Brad Fitzpatrickc1b987d2019-12-06 04:32:09 +000022// Server is the Go project's production maintner log.
23const Server = "https://maintner.golang.org/logs"
24
Brad Fitzpatrick1a1ef8e2017-04-29 21:15:37 +000025// Get returns the Go project's corpus, containing all Git commits,
26// Github activity, and Gerrit activity and metadata since the
27// beginning of the project.
28//
Chris Broadfoot90850ed2017-07-08 09:08:42 -070029// Use Corpus.Update to keep the corpus up-to-date. If you do this, you must
30// hold the read lock if reading and updating concurrently.
31//
Dmitri Shuralyov36ea9462022-03-30 18:41:57 -040032// The initial call to Get will download a few gigabytes of data
33// into a directory "golang-maintner" under your operating
Brad Fitzpatrick1a1ef8e2017-04-29 21:15:37 +000034// system's user cache directory. Subsequent calls will only download
35// what's changed since the previous call.
36//
37// Even with all the data already cached on local disk, a call to Get
Dmitri Shuralyov36ea9462022-03-30 18:41:57 -040038// takes approximately 15 seconds per gigabyte of mutation log data
39// to load it into memory.
Brad Fitzpatrick1a1ef8e2017-04-29 21:15:37 +000040// For daemons, use Corpus.Update to incrementally update an
41// already-loaded Corpus.
42//
43// The in-memory representation is about 25% larger than its on-disk
Dmitri Shuralyov36ea9462022-03-30 18:41:57 -040044// size. In April 2022, it's under 4 GB.
Brad Fitzpatrick1a1ef8e2017-04-29 21:15:37 +000045//
Dmitri Shuralyov36ea9462022-03-30 18:41:57 -040046// See https://pkg.go.dev/golang.org/x/build/maintner#Corpus for how
47// to walk the data structure.
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +000048func Get(ctx context.Context) (*maintner.Corpus, error) {
Brad Fitzpatrick5dcf3c42017-05-18 18:29:05 +000049 targetDir := Dir()
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000050 if err := os.MkdirAll(targetDir, 0700); err != nil {
51 return nil, err
52 }
Brad Fitzpatrickc1b987d2019-12-06 04:32:09 +000053 mutSrc := maintner.NewNetworkMutationSource(Server, targetDir)
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000054 corpus := new(maintner.Corpus)
55 if err := corpus.Initialize(ctx, mutSrc); err != nil {
Brad Fitzpatrickb3a49f92017-03-20 19:58:04 +000056 return nil, err
57 }
58 return corpus, nil
59}
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000060
Brad Fitzpatrick5dcf3c42017-05-18 18:29:05 +000061// Dir returns the directory containing the cached mutation logs.
62func Dir() string {
Jude Pereira11e039e2017-09-24 11:32:22 +053063 return filepath.Join(XdgCacheDir(), "golang-maintner")
Brad Fitzpatrick5dcf3c42017-05-18 18:29:05 +000064}
65
Jude Pereira11e039e2017-09-24 11:32:22 +053066// XdgCacheDir returns the XDG Base Directory Specification cache
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000067// directory.
Jude Pereira11e039e2017-09-24 11:32:22 +053068func XdgCacheDir() string {
Brad Fitzpatrick04f8c522017-04-29 07:39:57 +000069 cache := os.Getenv("XDG_CACHE_HOME")
70 if cache != "" {
71 return cache
72 }
73 home := homeDir()
74 // Not XDG but standard for OS X.
75 if runtime.GOOS == "darwin" {
76 return filepath.Join(home, "Library/Caches")
77 }
78 return filepath.Join(home, ".cache")
79}
80
81func homeDir() string {
82 if runtime.GOOS == "windows" {
83 return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
84 }
85 home := os.Getenv("HOME")
86 if home != "" {
87 return home
88 }
89 u, err := user.Current()
90 if err != nil {
91 log.Fatalf("failed to get home directory or current user: %v", err)
92 }
93 return u.HomeDir
94}