// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package gitrepo provides a Git-based implementation of codehost.Repo.
package gitrepo

import (
	"bytes"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"path/filepath"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	"cmd/go/internal/modfetch/codehost"
	"cmd/go/internal/par"
)

// Repo returns the code repository at the given Git remote reference.
func Repo(remote string) (codehost.Repo, error) {
	return newRepoCached(remote, false)
}

// LocalRepo is like Repo but accepts both Git remote references
// and paths to repositories on the local file system.
func LocalRepo(remote string) (codehost.Repo, error) {
	return newRepoCached(remote, true)
}

const workDirType = "git2"

var repoCache par.Cache

func newRepoCached(remote string, localOK bool) (codehost.Repo, error) {
	type key struct {
		remote  string
		localOK bool
	}
	type cached struct {
		repo codehost.Repo
		err  error
	}

	c := repoCache.Do(key{remote, localOK}, func() interface{} {
		repo, err := newRepo(remote, localOK)
		return cached{repo, err}
	}).(cached)

	return c.repo, c.err
}

func newRepo(remote string, localOK bool) (codehost.Repo, error) {
	r := &repo{remote: remote}
	if strings.Contains(remote, "://") {
		// This is a remote path.
		dir, err := codehost.WorkDir(workDirType, r.remote)
		if err != nil {
			return nil, err
		}
		r.dir = dir
		if _, err := os.Stat(filepath.Join(dir, "objects")); err != nil {
			if _, err := codehost.Run(dir, "git", "init", "--bare"); err != nil {
				os.RemoveAll(dir)
				return nil, err
			}
			// We could just say git fetch https://whatever later,
			// but this lets us say git fetch origin instead, which
			// is a little nicer. More importantly, using a named remote
			// avoids a problem with Git LFS. See golang.org/issue/25605.
			if _, err := codehost.Run(dir, "git", "remote", "add", "origin", r.remote); err != nil {
				os.RemoveAll(dir)
				return nil, err
			}
			r.remote = "origin"
		}
	} else {
		// Local path.
		// Disallow colon (not in ://) because sometimes
		// that's rcp-style host:path syntax and sometimes it's not (c:\work).
		// The go command has always insisted on URL syntax for ssh.
		if strings.Contains(remote, ":") {
			return nil, fmt.Errorf("git remote cannot use host:path syntax")
		}
		if !localOK {
			return nil, fmt.Errorf("git remote must not be local directory")
		}
		r.local = true
		info, err := os.Stat(remote)
		if err != nil {
			return nil, err
		}
		if !info.IsDir() {
			return nil, fmt.Errorf("%s exists but is not a directory", remote)
		}
		r.dir = remote
	}
	return r, nil
}

type repo struct {
	remote string
	local  bool
	dir    string

	mu         sync.Mutex // protects fetchLevel, some git repo state
	fetchLevel int

	statCache par.Cache

	refsOnce sync.Once
	refs     map[string]string
	refsErr  error

	localTagsOnce sync.Once
	localTags     map[string]bool
}

const (
	// How much have we fetched into the git repo (in this process)?
	fetchNone = iota // nothing yet
	fetchSome        // shallow fetches of individual hashes
	fetchAll         // "fetch -t origin": get all remote branches and tags
)

// loadLocalTags loads tag references from the local git cache
// into the map r.localTags.
// Should only be called as r.localTagsOnce.Do(r.loadLocalTags).
func (r *repo) loadLocalTags() {
	// The git protocol sends all known refs and ls-remote filters them on the client side,
	// so we might as well record both heads and tags in one shot.
	// Most of the time we only care about tags but sometimes we care about heads too.
	out, err := codehost.Run(r.dir, "git", "tag", "-l")
	if err != nil {
		return
	}

	r.localTags = make(map[string]bool)
	for _, line := range strings.Split(string(out), "\n") {
		if line != "" {
			r.localTags[line] = true
		}
	}
}

// loadRefs loads heads and tags references from the remote into the map r.refs.
// Should only be called as r.refsOnce.Do(r.loadRefs).
func (r *repo) loadRefs() {
	// The git protocol sends all known refs and ls-remote filters them on the client side,
	// so we might as well record both heads and tags in one shot.
	// Most of the time we only care about tags but sometimes we care about heads too.
	out, err := codehost.Run(r.dir, "git", "ls-remote", "-q", r.remote)
	if err != nil {
		r.refsErr = err
		return
	}

	r.refs = make(map[string]string)
	for _, line := range strings.Split(string(out), "\n") {
		f := strings.Fields(line)
		if len(f) != 2 {
			continue
		}
		if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
			r.refs[f[1]] = f[0]
		}
	}
	for ref, hash := range r.refs {
		if strings.HasSuffix(ref, "^{}") { // record unwrapped annotated tag as value of tag
			r.refs[strings.TrimSuffix(ref, "^{}")] = hash
			delete(r.refs, ref)
		}
	}
}

func (r *repo) Tags(prefix string) ([]string, error) {
	r.refsOnce.Do(r.loadRefs)
	if r.refsErr != nil {
		return nil, r.refsErr
	}

	tags := []string{}
	for ref := range r.refs {
		if !strings.HasPrefix(ref, "refs/tags/") {
			continue
		}
		tag := ref[len("refs/tags/"):]
		if !strings.HasPrefix(tag, prefix) {
			continue
		}
		tags = append(tags, tag)
	}
	sort.Strings(tags)
	return tags, nil
}

func (r *repo) Latest() (*codehost.RevInfo, error) {
	r.refsOnce.Do(r.loadRefs)
	if r.refsErr != nil {
		return nil, r.refsErr
	}
	if r.refs["HEAD"] == "" {
		return nil, fmt.Errorf("no commits")
	}
	return r.Stat(r.refs["HEAD"])
}

// findRef finds some ref name for the given hash,
// for use when the server requires giving a ref instead of a hash.
// There may be multiple ref names for a given hash,
// in which case this returns some name - it doesn't matter which.
func (r *repo) findRef(hash string) (ref string, ok bool) {
	r.refsOnce.Do(r.loadRefs)
	for ref, h := range r.refs {
		if h == hash {
			return ref, true
		}
	}
	return "", false
}

func unshallow(gitDir string) []string {
	if _, err := os.Stat(filepath.Join(gitDir, "shallow")); err == nil {
		return []string{"--unshallow"}
	}
	return []string{}
}

// minHashDigits is the minimum number of digits to require
// before accepting a hex digit sequence as potentially identifying
// a specific commit in a git repo. (Of course, users can always
// specify more digits, and many will paste in all 40 digits,
// but many of git's commands default to printing short hashes
// as 7 digits.)
const minHashDigits = 7

// stat stats the given rev in the local repository,
// or else it fetches more info from the remote repository and tries again.
func (r *repo) stat(rev string) (*codehost.RevInfo, error) {
	if r.local {
		return r.statLocal(rev, rev)
	}

	// Fast path: maybe rev is a hash we already have locally.
	if len(rev) >= minHashDigits && len(rev) <= 40 && codehost.AllHex(rev) {
		if info, err := r.statLocal(rev, rev); err == nil {
			return info, nil
		}
	}

	// Maybe rev is a tag we already have locally.
	// (Note that we're excluding branches, which can be stale.)
	r.localTagsOnce.Do(r.loadLocalTags)
	if r.localTags[rev] {
		return r.statLocal(rev, "refs/tags/"+rev)
	}

	// Maybe rev is the name of a tag or branch on the remote server.
	// Or maybe it's the prefix of a hash of a named ref.
	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
	r.refsOnce.Do(r.loadRefs)
	var ref, hash string
	if r.refs["refs/tags/"+rev] != "" {
		ref = "refs/tags/" + rev
		hash = r.refs[ref]
		// Keep rev as is: tags are assumed not to change meaning.
	} else if r.refs["refs/heads/"+rev] != "" {
		ref = "refs/heads/" + rev
		hash = r.refs[ref]
		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
	} else if rev == "HEAD" && r.refs["HEAD"] != "" {
		ref = "HEAD"
		hash = r.refs[ref]
		rev = hash // Replace rev, because meaning of HEAD can change.
	} else if len(rev) >= minHashDigits && len(rev) <= 40 && codehost.AllHex(rev) {
		// At the least, we have a hash prefix we can look up after the fetch below.
		// Maybe we can map it to a full hash using the known refs.
		prefix := rev
		// Check whether rev is prefix of known ref hash.
		for k, h := range r.refs {
			if strings.HasPrefix(h, prefix) {
				if hash != "" && hash != h {
					// Hash is an ambiguous hash prefix.
					// More information will not change that.
					return nil, fmt.Errorf("ambiguous revision %s", rev)
				}
				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
					ref = k
				}
				rev = h
				hash = h
			}
		}
		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
			hash = rev
		}
	} else {
		return nil, fmt.Errorf("unknown revision %s", rev)
	}

	// Protect r.fetchLevel and the "fetch more and more" sequence.
	// TODO(rsc): Add codehost.LockDir and use it for protecting that
	// sequence, so that multiple processes don't collide in their
	// git commands.
	r.mu.Lock()
	defer r.mu.Unlock()

	// If we know a specific commit we need, fetch it.
	if r.fetchLevel <= fetchSome && hash != "" {
		r.fetchLevel = fetchSome
		var refspec string
		if ref != "" {
			// If we do know the ref name, save the mapping locally
			// so that (if it is a tag) it can show up in localTags
			// on a future call. Also, some servers refuse to allow
			// full hashes in ref specs, so prefer a ref name if known.
			refspec = ref + ":" + ref
		} else {
			ref = hash
			refspec = hash
		}
		_, err := codehost.Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec)
		if err == nil {
			return r.statLocal(rev, ref)
		}
		if !strings.Contains(err.Error(), "unadvertised object") && !strings.Contains(err.Error(), "no such remote ref") && !strings.Contains(err.Error(), "does not support shallow") {
			return nil, err
		}
	}

	// Last resort.
	// Fetch all heads and tags and hope the hash we want is in the history.
	if r.fetchLevel < fetchAll {
		r.fetchLevel = fetchAll

		// To work around a protocol version 2 bug that breaks --unshallow,
		// add -c protocol.version=0.
		// TODO(rsc): The bug is believed to be server-side, meaning only
		// on Google's Git servers. Once the servers are fixed, drop the
		// protocol.version=0. See Google-internal bug b/110495752.
		var protoFlag []string
		unshallowFlag := unshallow(r.dir)
		if len(unshallowFlag) > 0 {
			protoFlag = []string{"-c", "protocol.version=0"}
		}
		if _, err := codehost.Run(r.dir, "git", protoFlag, "fetch", unshallowFlag, "-f", "-t", r.remote, "refs/heads/*:refs/heads/*"); err != nil {
			return nil, err
		}
	}

	return r.statLocal(rev, rev)
}

// statLocal returns a codehost.RevInfo describing rev in the local git repository.
// It uses version as info.Version.
func (r *repo) statLocal(version, rev string) (*codehost.RevInfo, error) {
	out, err := codehost.Run(r.dir, "git", "log", "-n1", "--format=format:%H %ct", rev)
	if err != nil {
		if codehost.AllHex(rev) {
			return nil, fmt.Errorf("unknown hash %s", rev)
		}
		return nil, fmt.Errorf("unknown revision %s", rev)
	}
	f := strings.Fields(string(out))
	if len(f) != 2 {
		return nil, fmt.Errorf("unexpected response from git log: %q", out)
	}
	hash := f[0]
	if strings.HasPrefix(hash, version) {
		version = hash // extend to full hash
	}
	t, err := strconv.ParseInt(f[1], 10, 64)
	if err != nil {
		return nil, fmt.Errorf("invalid time from git log: %q", out)
	}

	info := &codehost.RevInfo{
		Name:    hash,
		Short:   codehost.ShortenSHA1(hash),
		Time:    time.Unix(t, 0).UTC(),
		Version: version,
	}
	return info, nil
}

func (r *repo) Stat(rev string) (*codehost.RevInfo, error) {
	type cached struct {
		info *codehost.RevInfo
		err  error
	}
	c := r.statCache.Do(rev, func() interface{} {
		info, err := r.stat(rev)
		return cached{info, err}
	}).(cached)
	return c.info, c.err
}

func (r *repo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
	// TODO: Could use git cat-file --batch.
	info, err := r.Stat(rev) // download rev into local git repo
	if err != nil {
		return nil, err
	}
	out, err := codehost.Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file)
	if err != nil {
		return nil, os.ErrNotExist
	}
	return out, nil
}

func (r *repo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, actualSubdir string, err error) {
	// TODO: Use maxSize or drop it.
	args := []string{}
	if subdir != "" {
		args = append(args, "--", subdir)
	}
	info, err := r.Stat(rev) // download rev into local git repo
	if err != nil {
		return nil, "", err
	}
	archive, err := codehost.Run(r.dir, "git", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
	if err != nil {
		if bytes.Contains(err.(*codehost.RunError).Stderr, []byte("did not match any files")) {
			return nil, "", os.ErrNotExist
		}
		return nil, "", err
	}

	return ioutil.NopCloser(bytes.NewReader(archive)), "", nil
}
