zip: add CreateFromVCS, which creates a module zip from vcs

Updates golang/go#37413

Change-Id: I5ea07a6e4eedc6cb215e4893944f1ab215ea8f2b
Reviewed-on: https://go-review.googlesource.com/c/mod/+/330769
Trust: Jean de Klerk <deklerk@google.com>
Trust: Jay Conrod <jayconrod@google.com>
Run-TryBot: Jean de Klerk <deklerk@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Jay Conrod <jayconrod@google.com>
diff --git a/zip/zip.go b/zip/zip.go
index 0f0e6a3..40606d6 100644
--- a/zip/zip.go
+++ b/zip/zip.go
@@ -53,6 +53,7 @@
 	"io"
 	"io/ioutil"
 	"os"
+	"os/exec"
 	"path"
 	"path/filepath"
 	"strings"
@@ -555,7 +556,7 @@
 		if zerr, ok := err.(*zipError); ok {
 			zerr.path = dir
 		} else if err != nil {
-			err = &zipError{verb: "create zip", path: dir, err: err}
+			err = &zipError{verb: "create zip from directory", path: dir, err: err}
 		}
 	}()
 
@@ -567,6 +568,116 @@
 	return Create(w, m, files)
 }
 
+// CreateFromVCS creates a module zip file for module m from the contents of a
+// VCS repository stored locally. The zip content is written to w.
+//
+// repo must be an absolute path to the base of the repository, such as
+// "/Users/some-user/my-repo".
+//
+// revision is the revision of the repository to create the zip from. Examples
+// include HEAD or SHA sums for git repositories.
+//
+// subdir must be the relative path from the base of the repository, such as
+// "sub/dir". To create a zip from the base of the repository, pass an empty
+// string.
+func CreateFromVCS(w io.Writer, m module.Version, repo, revision, subdir string) (err error) {
+	defer func() {
+		if zerr, ok := err.(*zipError); ok {
+			zerr.path = repo
+		} else if err != nil {
+			err = &zipError{verb: "create zip from version control system", path: repo, err: err}
+		}
+	}()
+
+	var filesToCreate []File
+
+	switch {
+	case isGitRepo(repo):
+		files, err := filesInGitRepo(repo, revision, subdir)
+		if err != nil {
+			return err
+		}
+
+		filesToCreate = files
+	default:
+		return fmt.Errorf("%q does not use a recognised version control system", repo)
+	}
+
+	return Create(w, m, filesToCreate)
+}
+
+// filterGitIgnored filters out any files that are git ignored in the directory.
+func filesInGitRepo(dir, rev, subdir string) ([]File, error) {
+	stderr := bytes.Buffer{}
+	stdout := bytes.Buffer{}
+
+	// Incredibly, git produces different archives depending on whether
+	// it is running on a Windows system or not, in an attempt to normalize
+	// text file line endings. Setting -c core.autocrlf=input means only
+	// translate files on the way into the repo, not on the way out (archive).
+	// The -c core.eol=lf should be unnecessary but set it anyway.
+	//
+	// Note: We use git archive to understand which files are actually included,
+	// ignoring things like .gitignore'd files. We could also use other
+	// techniques like git ls-files, but this approach most closely matches what
+	// the Go command does, which is beneficial.
+	//
+	// Note: some of this code copied from https://go.googlesource.com/go/+/refs/tags/go1.16.5/src/cmd/go/internal/modfetch/codehost/git.go#826.
+	cmd := exec.Command("git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", rev)
+	if subdir != "" {
+		cmd.Args = append(cmd.Args, subdir)
+	}
+	cmd.Dir = dir
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err != nil {
+		return nil, fmt.Errorf("error running `git archive`: %w, %s", err, stderr.String())
+	}
+
+	rawReader := bytes.NewReader(stdout.Bytes())
+	zipReader, err := zip.NewReader(rawReader, int64(stdout.Len()))
+	if err != nil {
+		return nil, err
+	}
+
+	var fs []File
+	for _, zf := range zipReader.File {
+		if !strings.HasPrefix(zf.Name, subdir) || strings.HasSuffix(zf.Name, "/") {
+			continue
+		}
+
+		n := strings.TrimPrefix(zf.Name, subdir)
+		if n == "" {
+			continue
+		}
+		n = strings.TrimPrefix(n, string(filepath.Separator))
+
+		fs = append(fs, zipFile{
+			name: n,
+			f:    zf,
+		})
+	}
+
+	return fs, nil
+}
+
+// isGitRepo reports whether the given directory is a git repo.
+func isGitRepo(dir string) bool {
+	stdout := &bytes.Buffer{}
+	cmd := exec.Command("git", "rev-parse", "--git-dir")
+	cmd.Dir = dir
+	cmd.Stdout = stdout
+	if err := cmd.Run(); err != nil {
+		return false
+	}
+	gitDir := strings.TrimSpace(string(stdout.Bytes()))
+	if !filepath.IsAbs(gitDir) {
+		gitDir = filepath.Join(dir, gitDir)
+	}
+	wantDir := filepath.Join(dir, ".git")
+	return wantDir == gitDir
+}
+
 type dirFile struct {
 	filePath, slashPath string
 	info                os.FileInfo
@@ -576,6 +687,15 @@
 func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
 func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
 
+type zipFile struct {
+	name string
+	f    *zip.File
+}
+
+func (f zipFile) Path() string                 { return f.name }
+func (f zipFile) Lstat() (os.FileInfo, error)  { return f.f.FileInfo(), nil }
+func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
+
 // isVendoredPackage attempts to report whether the given filename is contained
 // in a package whose import path contains (but does not end with) the component
 // "vendor".
diff --git a/zip/zip_test.go b/zip/zip_test.go
index 2444de3..93e60fb 100644
--- a/zip/zip_test.go
+++ b/zip/zip_test.go
@@ -78,26 +78,29 @@
 	return test, nil
 }
 
-func extractTxtarToTempDir(arc *txtar.Archive) (dir string, err error) {
+func extractTxtarToTempDir(arc *txtar.Archive) (dir string, cleanup func(), err error) {
 	dir, err = ioutil.TempDir("", "zip_test-*")
 	if err != nil {
-		return "", err
+		return "", func() {}, err
+	}
+	cleanup = func() {
+		os.RemoveAll(dir)
 	}
 	defer func() {
 		if err != nil {
-			os.RemoveAll(dir)
+			cleanup()
 		}
 	}()
 	for _, f := range arc.Files {
 		filePath := filepath.Join(dir, f.Name)
 		if err := os.MkdirAll(filepath.Dir(filePath), 0777); err != nil {
-			return "", err
+			return "", func() {}, err
 		}
 		if err := ioutil.WriteFile(filePath, f.Data, 0666); err != nil {
-			return "", err
+			return "", func() {}, err
 		}
 	}
-	return dir, nil
+	return dir, cleanup, nil
 }
 
 func extractTxtarToTempZip(arc *txtar.Archive) (zipPath string, err error) {
@@ -269,15 +272,11 @@
 					break
 				}
 			}
-			tmpDir, err := extractTxtarToTempDir(test.archive)
+			tmpDir, cleanup, err := extractTxtarToTempDir(test.archive)
 			if err != nil {
 				t.Fatal(err)
 			}
-			defer func() {
-				if err := os.RemoveAll(tmpDir); err != nil {
-					t.Errorf("removing temp directory: %v", err)
-				}
-			}()
+			defer cleanup()
 
 			// Check the directory.
 			cf, err := modzip.CheckDir(tmpDir)
@@ -461,15 +460,11 @@
 			}
 
 			// Write files to a temporary directory.
-			tmpDir, err := extractTxtarToTempDir(test.archive)
+			tmpDir, cleanup, err := extractTxtarToTempDir(test.archive)
 			if err != nil {
 				t.Fatal(err)
 			}
-			defer func() {
-				if err := os.RemoveAll(tmpDir); err != nil {
-					t.Errorf("removing temp directory: %v", err)
-				}
-			}()
+			defer cleanup()
 
 			// Create zip from the directory.
 			tmpZip, err := ioutil.TempFile("", "TestCreateFromDir-*.zip")
@@ -1468,3 +1463,304 @@
 func (f zipFile) Path() string                 { return f.name }
 func (f zipFile) Lstat() (os.FileInfo, error)  { return f.f.FileInfo(), nil }
 func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
+
+func TestCreateFromVCS_basic(t *testing.T) {
+	// Write files to a temporary directory.
+	tmpDir, cleanup, err := extractTxtarToTempDir(txtar.Parse([]byte(`-- go.mod --
+module example.com/foo/bar
+
+go 1.12
+-- a.go --
+package a
+
+var A = 5
+-- b.go --
+package a
+
+var B = 5
+-- c/c.go --
+package c
+
+var C = 5
+-- d/d.go --
+package c
+
+var D = 5
+-- .gitignore --
+b.go
+c/`)))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	gitInit(t, tmpDir)
+	gitCommit(t, tmpDir)
+
+	for _, tc := range []struct {
+		desc      string
+		subdir    string
+		wantFiles []string
+	}{
+		{
+			desc:      "from root",
+			subdir:    "",
+			wantFiles: []string{"go.mod", "a.go", "d/d.go", ".gitignore"},
+		},
+		{
+			desc:   "from subdir",
+			subdir: "d/",
+			// Note: File paths are zipped as if the subdir were the root. ie d.go instead of d/d.go.
+			wantFiles: []string{"d.go"},
+		},
+	} {
+		t.Run(tc.desc, func(t *testing.T) {
+			// Create zip from the directory.
+			tmpZip := &bytes.Buffer{}
+
+			m := module.Version{Path: "example.com/foo/bar", Version: "v0.0.1"}
+
+			if err := modzip.CreateFromVCS(tmpZip, m, tmpDir, "HEAD", tc.subdir); err != nil {
+				t.Fatal(err)
+			}
+
+			readerAt := bytes.NewReader(tmpZip.Bytes())
+			r, err := zip.NewReader(readerAt, int64(tmpZip.Len()))
+			if err != nil {
+				t.Fatal(err)
+			}
+			var gotFiles []string
+			gotMap := map[string]bool{}
+			for _, f := range r.File {
+				gotMap[f.Name] = true
+				gotFiles = append(gotFiles, f.Name)
+			}
+			wantMap := map[string]bool{}
+			for _, f := range tc.wantFiles {
+				p := filepath.Join("example.com", "foo", "bar@v0.0.1", f)
+				wantMap[p] = true
+			}
+
+			// The things that should be there.
+			for f := range gotMap {
+				if !wantMap[f] {
+					t.Errorf("CreatedFromVCS: zipped file contains %s, but expected it not to", f)
+				}
+			}
+
+			// The things that are missing.
+			for f := range wantMap {
+				if !gotMap[f] {
+					t.Errorf("CreatedFromVCS: zipped file doesn't contain %s, but expected it to. all files: %v", f, gotFiles)
+				}
+			}
+		})
+	}
+}
+
+// Test what the experience of creating a zip from a v2 module is like.
+func TestCreateFromVCS_v2(t *testing.T) {
+	// Write files to a temporary directory.
+	tmpDir, cleanup, err := extractTxtarToTempDir(txtar.Parse([]byte(`-- go.mod --
+module example.com/foo/bar
+
+go 1.12
+-- a.go --
+package a
+
+var A = 5
+-- b.go --
+package a
+
+var B = 5
+-- go.mod --
+module example.com/foo/bar
+
+go 1.12
+-- gaz/v2/a_2.go --
+package a
+
+var C = 9
+-- gaz/v2/b_2.go --
+package a
+
+var B = 11
+-- gaz/v2/go.mod --
+module example.com/foo/bar/v2
+
+go 1.12
+-- .gitignore --
+`)))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	gitInit(t, tmpDir)
+	gitCommit(t, tmpDir)
+
+	// Create zip from the directory.
+	tmpZip := &bytes.Buffer{}
+
+	m := module.Version{Path: "example.com/foo/bar/v2", Version: "v2.0.0"}
+
+	if err := modzip.CreateFromVCS(tmpZip, m, tmpDir, "HEAD", "gaz/v2"); err != nil {
+		t.Fatal(err)
+	}
+
+	readerAt := bytes.NewReader(tmpZip.Bytes())
+	r, err := zip.NewReader(readerAt, int64(tmpZip.Len()))
+	if err != nil {
+		t.Fatal(err)
+	}
+	var gotFiles []string
+	gotMap := map[string]bool{}
+	for _, f := range r.File {
+		gotMap[f.Name] = true
+		gotFiles = append(gotFiles, f.Name)
+	}
+	wantMap := map[string]bool{
+		"example.com/foo/bar/v2@v2.0.0/a_2.go": true,
+		"example.com/foo/bar/v2@v2.0.0/b_2.go": true,
+		"example.com/foo/bar/v2@v2.0.0/go.mod": true,
+	}
+
+	// The things that should be there.
+	for f := range gotMap {
+		if !wantMap[f] {
+			t.Errorf("CreatedFromVCS: zipped file contains %s, but expected it not to", f)
+		}
+	}
+
+	// The things that are missing.
+	for f := range wantMap {
+		if !gotMap[f] {
+			t.Errorf("CreatedFromVCS: zipped file doesn't contain %s, but expected it to. all files: %v", f, gotFiles)
+		}
+	}
+}
+
+func TestCreateFromVCS_nonGitDir(t *testing.T) {
+	// Write files to a temporary directory.
+	tmpDir, cleanup, err := extractTxtarToTempDir(txtar.Parse([]byte(`-- go.mod --
+module example.com/foo/bar
+
+go 1.12
+-- a.go --
+package a
+
+var A = 5
+`)))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	// Create zip from the directory.
+	tmpZip, err := ioutil.TempFile("", "TestCreateFromDir-*.zip")
+	if err != nil {
+		t.Fatal(err)
+	}
+	tmpZipPath := tmpZip.Name()
+	defer func() {
+		tmpZip.Close()
+		os.Remove(tmpZipPath)
+	}()
+
+	m := module.Version{Path: "example.com/foo/bar", Version: "v0.0.1"}
+
+	if err := modzip.CreateFromVCS(tmpZip, m, tmpDir, "HEAD", ""); err == nil {
+		t.Error("CreateFromVCS: expected error, got nil")
+	}
+}
+
+func TestCreateFromVCS_zeroCommitsGitDir(t *testing.T) {
+	// Write files to a temporary directory.
+	tmpDir, cleanup, err := extractTxtarToTempDir(txtar.Parse([]byte(`-- go.mod --
+module example.com/foo/bar
+
+go 1.12
+-- a.go --
+package a
+
+var A = 5
+`)))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	gitInit(t, tmpDir)
+
+	// Create zip from the directory.
+	tmpZip, err := ioutil.TempFile("", "TestCreateFromDir-*.zip")
+	if err != nil {
+		t.Fatal(err)
+	}
+	tmpZipPath := tmpZip.Name()
+	defer func() {
+		tmpZip.Close()
+		os.Remove(tmpZipPath)
+	}()
+
+	m := module.Version{Path: "example.com/foo/bar", Version: "v0.0.1"}
+
+	if err := modzip.CreateFromVCS(tmpZip, m, tmpDir, "HEAD", ""); err == nil {
+		t.Error("CreateFromVCS: expected error, got nil")
+	}
+}
+
+// gitInit runs "git init" at the specified dir.
+//
+// Note: some environments - and trybots - don't have git installed. This
+// function will cause the calling test to be skipped if that's the case.
+func gitInit(t *testing.T, dir string) {
+	t.Helper()
+
+	if _, err := exec.LookPath("git"); err != nil {
+		t.Skip("PATH does not contain git")
+	}
+
+	cmd := exec.Command("git", "init")
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+
+	cmd = exec.Command("git", "config", "user.email", "testing@golangtests.com")
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+
+	cmd = exec.Command("git", "config", "user.name", "This is the zip Go tests")
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func gitCommit(t *testing.T, dir string) {
+	t.Helper()
+
+	if _, err := exec.LookPath("git"); err != nil {
+		t.Skip("PATH does not contain git")
+	}
+
+	cmd := exec.Command("git", "add", "-A")
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Skip("git executable is not available on this machine")
+	}
+
+	cmd = exec.Command("git", "commit", "-m", "some commit")
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+}