cmd/go/internal/modcmd: drop test sources and data from mod -vendor

The module support redefines the package pattern "all" to mean
all packages in the main module, plus their dependencies and
test dependencies, recursively. The idea was to make "go test all"
useful: it tests everything in the main module plus everything
the main module depends on, and so on. A complex project
is probably creating a combination of dependency versions
that has never existed before, so it is only prudent to make it
easy to test those dependencies in that configuration.

The vendor operation also applies to that same "all" set of packages.

But vendoring is freezing a combination that you're happy
with and hopefully have already tested. At that point the rationale
for including all the tests of dependencies is weakened, and
there is a significant space savings to not copying them (and
possibly-large associated test data) into the vendor directory.

This CL changes vendor to apply to all packages in the main
module, plus the (direct and indirect) dependencies of those
packages and their tests. Vendor no longer copies *_test.go into
the repo at all, nor the packages imported by those source files
and not imported otherwise.

Also copy legal files (LICENSE etc) when vendoring.

Fixes golang/go#25672.

Change-Id: I0713d4c751145df827460707e4085801fffa2a51
Reviewed-on: https://go-review.googlesource.com/122256
Reviewed-by: Bryan C. Mills <bcmills@google.com>
diff --git a/vendor/cmd/go/internal/modcmd/mod.go b/vendor/cmd/go/internal/modcmd/mod.go
index 8206971..efdcd8a 100644
--- a/vendor/cmd/go/internal/modcmd/mod.go
+++ b/vendor/cmd/go/internal/modcmd/mod.go
@@ -154,8 +154,8 @@
 don't provide any relevant packages.
 
 The -vendor flag resets the module's vendor directory to include all
-packages needed to build and test all the module's packages and
-their dependencies.
+packages needed to build and test all the module's packages.
+It does not include any test code for the vendored packages.
 
 The -verify flag checks that the dependencies of the current module,
 which are stored in a local downloaded source cache, have not been
diff --git a/vendor/cmd/go/internal/modcmd/vendor.go b/vendor/cmd/go/internal/modcmd/vendor.go
index 6dc1f4a..6a17dc1 100644
--- a/vendor/cmd/go/internal/modcmd/vendor.go
+++ b/vendor/cmd/go/internal/modcmd/vendor.go
@@ -18,10 +18,8 @@
 	"cmd/go/internal/vgo"
 )
 
-var copiedDir map[string]bool
-
 func runVendor() {
-	pkgs := vgo.LoadALL()
+	pkgs := vgo.LoadVendor()
 
 	vdir := filepath.Join(vgo.ModRoot, "vendor")
 	if err := os.RemoveAll(vdir); err != nil {
@@ -38,7 +36,6 @@
 	}
 
 	var buf bytes.Buffer
-	copiedDir = make(map[string]bool)
 	for _, m := range vgo.BuildList()[1:] {
 		if pkgs := modpkgs[m]; len(pkgs) > 0 {
 			repl := ""
@@ -81,28 +78,29 @@
 	if src == "" {
 		fmt.Fprintf(os.Stderr, "internal error: no pkg for %s -> %s\n", pkg, realPath)
 	}
-
-	copyDir(dst, src, false)
+	copyDir(dst, src, matchNonTest)
 	if m := vgo.PackageModule(realPath); m.Path != "" {
-		copyTestdata(m.Path, realPath, dst, src)
+		copyMetadata(m.Path, realPath, dst, src)
 	}
 }
 
-// Copy the testdata directories in parent directories.
-// If the package being vendored is a/b/c,
-// try to copy a/b/c/testdata, a/b/testdata and a/testdata to vendor directory,
-// up to the module root.
-func copyTestdata(modPath, pkg, dst, src string) {
-	testdata := func(dir string) string {
-		return filepath.Join(dir, "testdata")
-	}
-	for {
-		if copiedDir[dst] {
+type metakey struct {
+	modPath string
+	dst     string
+}
+
+var copiedMetadata = make(map[metakey]bool)
+
+// copyMetadata copies metadata files from parents of src to parents of dst,
+// stopping after processing the src parent for modPath.
+func copyMetadata(modPath, pkg, dst, src string) {
+	for parent := 0; ; parent++ {
+		if copiedMetadata[metakey{modPath, dst}] {
 			break
 		}
-		copiedDir[dst] = true
-		if info, err := os.Stat(testdata(src)); err == nil && info.IsDir() {
-			copyDir(testdata(dst), testdata(src), true)
+		copiedMetadata[metakey{modPath, dst}] = true
+		if parent > 0 {
+			copyDir(dst, src, matchMetadata)
 		}
 		if modPath == pkg {
 			break
@@ -113,7 +111,43 @@
 	}
 }
 
-func copyDir(dst, src string, recursive bool) {
+// metaPrefixes is the list of metadata file prefixes.
+// Vendoring copies metadata files from parents of copied directories.
+// Note that this list could be arbitrarily extended, and it is longer
+// in other tools (such as godep or dep). By using this limited set of
+// prefixes and also insisting on capitalized file names, we are trying
+// to nudge people toward more agreement on the naming
+// and also trying to avoid false positives.
+var metaPrefixes = []string{
+	"AUTHORS",
+	"CONTRIBUTORS",
+	"COPYLEFT",
+	"COPYING",
+	"COPYRIGHT",
+	"LEGAL",
+	"LICENSE",
+	"NOTICE",
+	"PATENTS",
+}
+
+// matchMetadata reports whether info is a metadata file.
+func matchMetadata(info os.FileInfo) bool {
+	name := info.Name()
+	for _, p := range metaPrefixes {
+		if strings.HasPrefix(name, p) {
+			return true
+		}
+	}
+	return false
+}
+
+// matchNonTest reports whether info is any non-test file (including non-Go files).
+func matchNonTest(info os.FileInfo) bool {
+	return !strings.HasSuffix(info.Name(), "_test.go")
+}
+
+// copyDir copies all regular files satisfying match(info) from src to dst.
+func copyDir(dst, src string, match func(os.FileInfo) bool) {
 	files, err := ioutil.ReadDir(src)
 	if err != nil {
 		base.Fatalf("vgo vendor: %v", err)
@@ -122,13 +156,7 @@
 		base.Fatalf("vgo vendor: %v", err)
 	}
 	for _, file := range files {
-		if file.IsDir() {
-			if recursive || file.Name() == "testdata" {
-				copyDir(filepath.Join(dst, file.Name()), filepath.Join(src, file.Name()), true)
-			}
-			continue
-		}
-		if !file.Mode().IsRegular() {
+		if file.IsDir() || !file.Mode().IsRegular() || !match(file) {
 			continue
 		}
 		r, err := os.Open(filepath.Join(src, file.Name()))
@@ -148,19 +176,3 @@
 		}
 	}
 }
-
-// hasPathPrefix reports whether the path s begins with the
-// elements in prefix.
-func hasPathPrefix(s, prefix string) bool {
-	switch {
-	default:
-		return false
-	case len(s) == len(prefix):
-		return s == prefix
-	case len(s) > len(prefix):
-		if prefix != "" && prefix[len(prefix)-1] == '/' {
-			return strings.HasPrefix(s, prefix)
-		}
-		return s[len(prefix)] == '/' && s[:len(prefix)] == prefix
-	}
-}
diff --git a/vendor/cmd/go/internal/vgo/load.go b/vendor/cmd/go/internal/vgo/load.go
index 0924483..81861e3 100644
--- a/vendor/cmd/go/internal/vgo/load.go
+++ b/vendor/cmd/go/internal/vgo/load.go
@@ -191,18 +191,33 @@
 // and their dependencies in any other modules, without filtering
 // due to build tags, except "+build ignore".
 // It adds modules to the build list as needed to satisfy new imports.
-// This set is useful for identifying which packages to include in a vendor directory
-// or deciding whether a particular import appears anywhere in a module.
+// This set is useful for deciding whether a particular import is needed
+// anywhere in a module.
 func LoadALL() []string {
+	return loadAll(true)
+}
+
+// LoadVendor is like LoadALL but only follows test dependencies
+// for tests in the main module. Tests in dependency modules are
+// ignored completely.
+// This set is useful for identifying the which packages to include in a vendor directory.
+func LoadVendor() []string {
+	return loadAll(false)
+}
+
+func loadAll(testAll bool) []string {
 	if Init(); !Enabled() {
-		panic("vgo: misuse of LoadALL")
+		panic("vgo: misuse of LoadALL/LoadVendor")
 	}
 	InitMod()
 
 	loaded = newLoader()
 	loaded.isALL = true
 	loaded.tags = anyTags
-	loaded.testAll = true
+	loaded.testAll = testAll
+	if !testAll {
+		loaded.testRoots = true
+	}
 	all := TargetPackages()
 	loaded.load(func() []string { return all })
 	WriteGoMod()
@@ -650,7 +665,9 @@
 // scanDir is like imports.ScanDir but elides known magic imports from the list,
 // so that vgo does not go looking for packages that don't really exist.
 //
-// The only known magic imports are appengine and appengine/*.
+// The standard magic import is "C", for cgo.
+//
+// The only other known magic imports are appengine and appengine/*.
 // These are so old that they predate "go get" and did not use URL-like paths.
 // Most code today now uses google.golang.org/appengine instead,
 // but not all code has been so updated. When we mostly ignore build tags
@@ -663,7 +680,7 @@
 	filter := func(x []string) []string {
 		w := 0
 		for _, pkg := range x {
-			if pkg != "appengine" && !strings.HasPrefix(pkg, "appengine/") &&
+			if pkg != "C" && pkg != "appengine" && !strings.HasPrefix(pkg, "appengine/") &&
 				pkg != "appengine_internal" && !strings.HasPrefix(pkg, "appengine_internal/") {
 				x[w] = pkg
 				w++
diff --git a/vendor/cmd/go/testdata/vendormod/go.mod b/vendor/cmd/go/testdata/vendormod/go.mod
index d0ccb06..74aaa3b 100644
--- a/vendor/cmd/go/testdata/vendormod/go.mod
+++ b/vendor/cmd/go/testdata/vendormod/go.mod
@@ -1,19 +1,19 @@
 module m
 
-replace a v1.0.0 => ./a
-
-replace x v1.0.0 => ./x
-
-replace y v1.0.0 => ./y
-
-replace z v1.0.0 => ./z
-
-replace w v1.0.0 => ./w
-
 require (
 	a v1.0.0
+	mysite/myname/mypkg v1.0.0
 	w v1.0.0 // indirect
 	x v1.0.0
 	y v1.0.0
 	z v1.0.0
 )
+
+replace (
+	a v1.0.0 => ./a
+	mysite/myname/mypkg v1.0.0 => ./mypkg
+	w v1.0.0 => ./w
+	x v1.0.0 => ./x
+	y v1.0.0 => ./y
+	z v1.0.0 => ./z
+)
diff --git a/vendor/cmd/go/testdata/vendormod/mypkg/go.mod b/vendor/cmd/go/testdata/vendormod/mypkg/go.mod
new file mode 100644
index 0000000..311f721
--- /dev/null
+++ b/vendor/cmd/go/testdata/vendormod/mypkg/go.mod
@@ -0,0 +1 @@
+module me
diff --git a/vendor/cmd/go/testdata/vendormod/mypkg/mydir/d.go b/vendor/cmd/go/testdata/vendormod/mypkg/mydir/d.go
new file mode 100644
index 0000000..49d990f
--- /dev/null
+++ b/vendor/cmd/go/testdata/vendormod/mypkg/mydir/d.go
@@ -0,0 +1 @@
+package mydir
diff --git a/vendor/cmd/go/testdata/vendormod/subdir/v1_test.go b/vendor/cmd/go/testdata/vendormod/subdir/v1_test.go
new file mode 100644
index 0000000..eb2863e
--- /dev/null
+++ b/vendor/cmd/go/testdata/vendormod/subdir/v1_test.go
@@ -0,0 +1,3 @@
+package m
+
+import _ "mysite/myname/mypkg/mydir"
diff --git a/vendor/cmd/go/testdata/vendormod/x/x2/dummy.txt b/vendor/cmd/go/testdata/vendormod/x/x2/dummy.txt
new file mode 100644
index 0000000..421376d
--- /dev/null
+++ b/vendor/cmd/go/testdata/vendormod/x/x2/dummy.txt
@@ -0,0 +1 @@
+dummy
diff --git a/vendor/cmd/go/testdata/vendormod/x/x_test.go b/vendor/cmd/go/testdata/vendormod/x/x_test.go
new file mode 100644
index 0000000..7f3de9f
--- /dev/null
+++ b/vendor/cmd/go/testdata/vendormod/x/x_test.go
@@ -0,0 +1,3 @@
+package x
+
+import _ "w"
diff --git a/vendor/cmd/go/vgo_test.go b/vendor/cmd/go/vgo_test.go
index 52a6135..30ef894 100644
--- a/vendor/cmd/go/vgo_test.go
+++ b/vendor/cmd/go/vgo_test.go
@@ -617,6 +617,39 @@
 	tg.run("-vgo", "list", "-f={{.Dir}}", "x")
 	tg.grepStdout(`vendormod[/\\]x$`, "expected x in vendormod/x")
 
+	var toRemove []string
+	defer func() {
+		for _, file := range toRemove {
+			os.Remove(file)
+		}
+	}()
+
+	write := func(name string) {
+		file := filepath.Join(wd, "testdata/vendormod", name)
+		toRemove = append(toRemove, file)
+		tg.must(ioutil.WriteFile(file, []byte("file!"), 0666))
+	}
+	mustHaveVendor := func(name string) {
+		t.Helper()
+		tg.mustExist(filepath.Join(wd, "testdata/vendormod/vendor", name))
+	}
+	mustNotHaveVendor := func(name string) {
+		t.Helper()
+		tg.mustNotExist(filepath.Join(wd, "testdata/vendormod/vendor", name))
+	}
+
+	write("a/foo/AUTHORS.txt")
+	write("a/foo/CONTRIBUTORS")
+	write("a/foo/LICENSE")
+	write("a/foo/PATENTS")
+	write("a/foo/COPYING")
+	write("a/foo/COPYLEFT")
+	write("a/foo/licensed-to-kill")
+	write("w/LICENSE")
+	write("x/NOTICE!")
+	write("x/x2/LICENSE")
+	write("mypkg/LICENSE.txt")
+
 	tg.run("-vgo", "mod", "-vendor", "-v")
 	tg.grepStderr(`^# x v1.0.0 => ./x`, "expected to see module x with replacement")
 	tg.grepStderr(`^x`, "expected to see package x")
@@ -649,6 +682,24 @@
 	tg.runFail("-vgo", "list", "-getmode=local", "-f={{.Dir}}", "newpkg")
 	tg.grepStderr(`disabled by -getmode=local`, "expected -getmode=local to avoid network")
 
+	mustNotHaveVendor("x/testdata")
+	mustNotHaveVendor("a/foo/bar/b/main_test.go")
+
+	mustHaveVendor("a/foo/AUTHORS.txt")
+	mustHaveVendor("a/foo/CONTRIBUTORS")
+	mustHaveVendor("a/foo/LICENSE")
+	mustHaveVendor("a/foo/PATENTS")
+	mustHaveVendor("a/foo/COPYING")
+	mustHaveVendor("a/foo/COPYLEFT")
+	mustHaveVendor("x/NOTICE!")
+	mustHaveVendor("mysite/myname/mypkg/LICENSE.txt")
+
+	mustNotHaveVendor("a/foo/licensed-to-kill")
+	mustNotHaveVendor("w")
+	mustNotHaveVendor("w/LICENSE") // w wasn't copied at all
+	mustNotHaveVendor("x/x2")
+	mustNotHaveVendor("x/x2/LICENSE") // x/x2 wasn't copied at all
+
 	if !testing.Short() {
 		tg.run("-vgo", "build")
 		tg.run("-vgo", "build", "-getmode=vendor")