internal/modules: exclude vendor directories in downloaded modules

For modules that have vendor directories, ecosystem metrics always
results in a loading error. 1% of the whole ecosystem has vendored
dependencies.

Vendor directories in modules downloaded from module proxy can have only
one file: modules.txt. When package loading logic sees a vendor
directory, it assumes the dependencies are there. Because they are in
fact not, loading of packages fails.

We hence remove the vendor directories altogether. This also makes
sense because, starting from go1.24, we'll see modules with vendor
directories being in principle empty, hence not even appearing in
the downloaded zip files.

This change skips unzipping the vendor directory when the module is
downloaded. An alternative approach is to explicitly delete the vendor
directory when analyzing the module. However, that has experimentally
proven unsuccessful. There is likely a file permission error.

Change-Id: I49d1f60e0e1679e586b14724f9fb729b2a8738df
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/608095
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Zvonimir Pavlinovic <zpavlinovic@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/modules/modules.go b/internal/modules/modules.go
index 2589209..7c1693d 100644
--- a/internal/modules/modules.go
+++ b/internal/modules/modules.go
@@ -45,6 +45,14 @@
 		if !strings.HasPrefix(fpath, filepath.Clean(destination)+string(os.PathSeparator)) {
 			return fmt.Errorf("%s is an illegal filepath", fpath)
 		}
+
+		// Do not include vendor directory. They currently contain only modules.txt,
+		// not the dependencies. This makes package loading fail. Starting with go1.24,
+		// there likely won't be any vendor directories at all.
+		if vendored(name) {
+			continue
+		}
+
 		if f.FileInfo().IsDir() {
 			if err := os.MkdirAll(fpath, os.ModePerm); err != nil {
 				return err
@@ -74,3 +82,7 @@
 	}
 	return nil
 }
+
+func vendored(path string) bool {
+	return path == "vendor" || strings.HasPrefix(path, "vendor"+string(os.PathSeparator))
+}
diff --git a/internal/modules/modules_test.go b/internal/modules/modules_test.go
index fbda4bd..802aa94 100644
--- a/internal/modules/modules_test.go
+++ b/internal/modules/modules_test.go
@@ -7,6 +7,7 @@
 import (
 	"archive/zip"
 	"bytes"
+	"os"
 	"path/filepath"
 	"testing"
 )
@@ -20,6 +21,8 @@
 	}{
 		{filepath.Join("golang.org@v0.0.0", "README"), "This is a readme."},
 		{filepath.Join("golang.org@v0.0.0", "main"), "package main"},
+		{filepath.Join("golang.org@v0.0.0", "vendor", "modules.txt"), "# golang.org v1.1.1"},
+		{filepath.Join("golang.org@v0.0.0", "vendorius"), "This is some file with vendor in its name"},
 	}
 	for _, file := range files {
 		f, err := w.Create(file.Name)
@@ -44,10 +47,21 @@
 	}
 
 	tempDir := t.TempDir()
-	if err := writeZip(r, tempDir, ""); err != nil {
+	if err := writeZip(r, tempDir, "golang.org@v0.0.0/"); err != nil {
 		t.Error(err)
 	}
-	if err := writeZip(r, tempDir, "golang.org@v0.0.0"); err != nil {
+	// make sure there are no vendor files
+	fs, err := os.ReadDir(tempDir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, f := range fs {
+		if f.IsDir() && f.Name() == "vendor" {
+			t.Errorf("found unexpected vendor file or dir: %s", f.Name())
+		}
+	}
+
+	if err := writeZip(r, tempDir, ""); err != nil {
 		t.Error(err)
 	}
 }