blob: 3faae671423fcf4f15c26d539198280559cd6e55 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package sourcecache provides a cache of code found in Git repositories.
// It takes directly to the Gerrit instance at go.googlesource.com.
// If RegisterGitMirrorDial is called, it will first try to get code from gitmirror before falling back on Gerrit.
package sourcecache
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"net"
"net/http"
"time"
"golang.org/x/build/internal/lru"
"golang.org/x/build/internal/singleflight"
"golang.org/x/build/internal/spanlog"
)
var processStartTime = time.Now()
var sourceGroup singleflight.Group
var sourceCache = lru.New(40) // repo-rev -> source
// source is the cache entry type for sourceCache.
type source struct {
Tgz []byte // Source tarball bytes.
TooBig bool
}
// GetSourceTgz returns a Reader that provides a tgz of the requested source revision.
// repo is go.googlesource.com repo ("go", "net", and so on).
// rev is git revision.
//
// An error of type TooBigError is returned if the compressed tarball exceeds a size that
// on 2021-11-22 was deemed to be enough to meet expected legitimate future needs for a while.
// See golang.org/issue/46379.
func GetSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) {
sp := sl.CreateSpan("get_source", repo+"@"+rev)
defer func() { sp.Done(err) }()
key := fmt.Sprintf("%v-%v", repo, rev)
v, err, _ := sourceGroup.Do(key, func() (interface{}, error) {
if src, ok := sourceCache.Get(key); ok {
return src, nil
}
if gitMirrorClient != nil {
sp := sl.CreateSpan("get_source_from_gitmirror")
src, err := getSourceTgzFromGitMirror(repo, rev)
if err == nil {
sourceCache.Add(key, src)
sp.Done(nil)
return src, nil
}
log.Printf("Error fetching source %s/%s from gitmirror (after %v uptime): %v",
repo, rev, time.Since(processStartTime), err)
sp.Done(errors.New("timeout"))
}
sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key))
src, err := getSourceTgzFromGerrit(repo, rev)
sp.Done(err)
if err == nil {
sourceCache.Add(key, src)
}
return src, err
})
if err != nil {
return nil, err
}
if v.(source).TooBig {
return nil, TooBigError{Repo: repo, Rev: rev, Limit: maxSize(repo)}
}
return bytes.NewReader(v.(source).Tgz), nil
}
// TooBigError is the error returned when the source revision is considered too big.
type TooBigError struct {
Repo string
Rev string
Limit int64 // Max size in bytes.
}
func (e TooBigError) Error() string {
return fmt.Sprintf("rejected because compressed tarball of repository go.googlesource.com/%s at revision %s exceeded a limit of %d MB; see golang.org/issue/46379",
e.Repo, e.Rev, e.Limit/1024/1024)
}
var gitMirrorClient *http.Client
// RegisterGitMirrorDial registers a dial function which will be used to reach gitmirror.
// If used, this function must be called before GetSourceTgz.
func RegisterGitMirrorDial(dial func(context.Context) (net.Conn, error)) {
gitMirrorClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
IdleConnTimeout: 30 * time.Second,
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return dial(ctx)
},
},
}
}
var gerritHTTPClient = &http.Client{
Timeout: 30 * time.Second,
}
func getSourceTgzFromGerrit(repo, rev string) (source, error) {
return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
}
func getSourceTgzFromGitMirror(repo, rev string) (src source, err error) {
for i := 0; i < 2; i++ { // two tries; different pods maybe?
if i > 0 {
time.Sleep(1 * time.Second)
}
// The "gitmirror" hostname is unused:
src, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev)
if err == nil {
return src, nil
}
if tr, ok := http.DefaultTransport.(*http.Transport); ok {
tr.CloseIdleConnections()
}
}
return source{}, err
}
// getSourceTgzFromURL fetches a source tarball from url.
// If url serves more than maxSize bytes, it stops short.
func getSourceTgzFromURL(hc *http.Client, service, repo, rev, url string) (source, error) {
res, err := hc.Get(url)
if err != nil {
return source{}, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, service, err)
}
defer res.Body.Close()
if res.StatusCode/100 != 2 {
slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
return source{}, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, service, res.Status, slurp)
}
// See golang.org/issue/11224 for a discussion on tree filtering.
b, err := ioutil.ReadAll(io.LimitReader(res.Body, maxSize(repo)+1))
if int64(len(b)) > maxSize(repo) && err == nil {
return source{TooBig: true}, nil
}
if err != nil {
return source{}, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, service, err)
}
return source{Tgz: b}, nil
}
// maxSize controls artificial limits on how big of a compressed source tarball
// this package is willing to accept. It's expected humans may need to manage
// these limits every couple of years for the evolving needs of the Go project,
// and ideally not much more often.
//
// repo is a go.googlesource.com repo ("go", "net", and so on).
func maxSize(repo string) int64 {
switch repo {
default:
// As of 2021-11-22, a compressed tarball of Go source is 23 MB,
// x/net is 1.2 MB,
// x/build is 1.1 MB,
// x/tools is 2.9 MB.
return 100 << 20
case "website":
// In 2021, all content in x/blog (52 MB) and x/talks (74 MB) moved
// to x/website. This makes x/website an outlier, with a compressed
// tarball size of 135 MB. Give it some room to grow from there.
return 200 << 20
}
}