internal/licenses: work on content directory

Change the license detector so it works on the content directory of
the zip, not the root.

Preserve NewDetector's behavior because it's used elsewhere.

For golang/go#47834

Change-Id: I42ba0c95cf19dc735b08a342f48323490d21aa0c
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/343958
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jamal Carvalho <jamal@golang.org>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index c9fe09b..e0365c8 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -348,7 +348,11 @@
 	logf := func(format string, args ...interface{}) {
 		log.Infof(ctx, format, args...)
 	}
-	d := licenses.NewDetectorFS(modulePath, v, fsys, logf)
+	contentDir, err := fs.Sub(fsys, moduleVersionDir(modulePath, v))
+	if err != nil {
+		return nil, nil, err
+	}
+	d := licenses.NewDetectorFS(modulePath, v, contentDir, logf)
 	allLicenses := d.AllLicenses()
 	packages, packageVersionStates, err := extractPackages(ctx, modulePath, resolvedVersion, requestedVersion, fsys, d, sourceInfo)
 	if errors.Is(err, ErrModuleContainsNoPackages) || errors.Is(err, errMalformedZip) {
diff --git a/internal/licenses/licenses.go b/internal/licenses/licenses.go
index fea9866..34996fe 100644
--- a/internal/licenses/licenses.go
+++ b/internal/licenses/licenses.go
@@ -310,11 +310,16 @@
 // logf is for logging; if nil, no logging is done.
 // Deprecated: use NewDetectorFS.
 func NewDetector(modulePath, version string, zr *zip.Reader, logf func(string, ...interface{})) *Detector {
-	return NewDetectorFS(modulePath, version, zr, logf)
+	sub, err := fs.Sub(zr, modulePath+"@"+version)
+	// This should only fail if the prefix is not a valid path, which shouldn't be possible.
+	if err != nil && logf != nil {
+		logf("fs.Sub: %v", err)
+	}
+	return NewDetectorFS(modulePath, version, sub, logf)
 }
 
 // NewDetectorFS returns a Detector for the given module and version.
-// fsys should hold the module's files.
+// fsys should represent the content directory of the module (not the zip root).
 // logf is for logging; if nil, no logging is done.
 func NewDetectorFS(modulePath, version string, fsys fs.FS, logf func(string, ...interface{})) *Detector {
 	if logf == nil {
@@ -417,16 +422,11 @@
 // The which argument determines the location of the files considered.
 // If paths encounters an error, it logs it and returns nil.
 func (d *Detector) paths(which WhichFiles) []string {
-	// TODO(golang/go#47834): remove references to cdir when higher levels
-	// do so.
-	cdir := contentsDir(d.modulePath, d.version)
-	fsys, err := fs.Sub(d.fsys, cdir)
-	if err != nil {
-		d.logf("Paths: %v", err)
+	if d.fsys == nil {
 		return nil
 	}
 	var paths []string
-	err = fs.WalkDir(fsys, ".", func(pathname string, de fs.DirEntry, err error) error {
+	err := fs.WalkDir(d.fsys, ".", func(pathname string, de fs.DirEntry, err error) error {
 		if err != nil {
 			return err
 		}
@@ -457,11 +457,11 @@
 			d.logf("module.CheckFilePath(%q): %v", pathname, err)
 			return nil
 		}
-		paths = append(paths, path.Join(cdir, pathname))
+		paths = append(paths, pathname)
 		return nil
 	})
 	if err != nil {
-		d.logf("Paths: %v", err)
+		d.logf("licenses.Detector.paths: %v", err)
 		return nil
 	}
 	return paths
@@ -490,7 +490,6 @@
 // If a file cannot be read, the error is logged and a license
 // of type unknown is added.
 func (d *Detector) detectFiles(pathnames []string) []*License {
-	prefix := pathPrefix(contentsDir(d.modulePath, d.version))
 	var licenses []*License
 	for _, p := range pathnames {
 		bytes, err := d.readFile(p)
@@ -499,7 +498,7 @@
 			licenses = append(licenses, &License{
 				Metadata: &Metadata{
 					Types:    []string{unknownLicenseType},
-					FilePath: strings.TrimPrefix(p, prefix),
+					FilePath: p,
 				},
 			})
 			continue
@@ -508,7 +507,7 @@
 		licenses = append(licenses, &License{
 			Metadata: &Metadata{
 				Types:    types,
-				FilePath: strings.TrimPrefix(p, prefix),
+				FilePath: p,
 				Coverage: cov,
 			},
 			Contents: bytes,
@@ -596,15 +595,3 @@
 	sort.Strings(s)
 	return s
 }
-
-func contentsDir(modulePath, version string) string {
-	return modulePath + "@" + version
-}
-
-// pathPrefix appends a "/" to its argument if the argument is non-empty.
-func pathPrefix(s string) string {
-	if s != "" {
-		return s + "/"
-	}
-	return ""
-}
diff --git a/internal/licenses/licenses_test.go b/internal/licenses/licenses_test.go
index e7ccfde..3bf06e3 100644
--- a/internal/licenses/licenses_test.go
+++ b/internal/licenses/licenses_test.go
@@ -9,6 +9,7 @@
 	"bytes"
 	"fmt"
 	"io"
+	"io/fs"
 	"io/ioutil"
 	"log"
 	"math"
@@ -370,22 +371,22 @@
 	}{
 		{
 			RootFiles,
-			[]string{"m@v1/LICENSE", "m@v1/LICENCE", "m@v1/License", "m@v1/COPYING", "m@v1/LICENSE.md",
-				"m@v1/liCeNse"},
+			[]string{"LICENSE", "LICENCE", "License", "COPYING", "LICENSE.md",
+				"liCeNse"},
 		},
 		{
 			NonRootFiles,
 			[]string{
-				"m@v1/foo/LICENSE", "m@v1/foo/LICENSE.md", "m@v1/foo/LICENCE", "m@v1/foo/License",
-				"m@v1/foo/COPYING", "m@v1/pkg/vendor/LICENSE", "m@v1/foo/license",
+				"foo/LICENSE", "foo/LICENSE.md", "foo/LICENCE", "foo/License",
+				"foo/COPYING", "pkg/vendor/LICENSE", "foo/license",
 			},
 		},
 		{
 			AllFiles,
 			[]string{
-				"m@v1/LICENSE", "m@v1/LICENCE", "m@v1/License", "m@v1/COPYING", "m@v1/LICENSE.md",
-				"m@v1/liCeNse", "m@v1/foo/LICENSE", "m@v1/foo/LICENSE.md", "m@v1/foo/LICENCE", "m@v1/foo/License",
-				"m@v1/foo/license", "m@v1/foo/COPYING", "m@v1/pkg/vendor/LICENSE",
+				"LICENSE", "LICENCE", "License", "COPYING", "LICENSE.md",
+				"liCeNse", "foo/LICENSE", "foo/LICENSE.md", "foo/LICENCE", "foo/License",
+				"foo/license", "foo/COPYING", "pkg/vendor/LICENSE",
 			},
 		},
 	} {
@@ -659,7 +660,7 @@
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
-			zr := newZipReader(t, contentsDir(module, version), test.contents)
+			zr := newZipReader(t, module+"@"+version, test.contents)
 			d := NewDetector(module, version, zr, nil)
 			gotRedist, gotLics := d.PackageInfo("dir/pkg")
 			if gotRedist != test.wantRedist {
@@ -680,6 +681,24 @@
 	}
 }
 
+func TestInvalidContentDirPath(t *testing.T) {
+	// Make sure we don't crash if the zip's content directory path is invalid according to fs.ValidPath.
+	invalidPath := "a//v1.0.0"
+	if fs.ValidPath(invalidPath) {
+		t.Fatalf("%q is valid", invalidPath)
+	}
+	zr := newZipReader(t, invalidPath, map[string]string{"a": "b"})
+	d := NewDetector("a", "v1.0.0", zr, nil)
+	got := d.ModuleLicenses()
+	if len(got) != 0 {
+		t.Errorf("got %v, want nothing", got)
+	}
+	got = d.AllLicenses()
+	if len(got) != 0 {
+		t.Errorf("got %v, want nothing", got)
+	}
+}
+
 // newZipReader creates an in-memory zip of the given contents and returns a reader to it.
 func newZipReader(t *testing.T, contentsDir string, contents map[string]string) *zip.Reader {
 	var buf bytes.Buffer