internal/fetch: list known alternative module paths

Some modules don't have go.mod files. Nonetheless there is clearly a
canonical module path, and various forks that should be marked as
alternative. A major example is github.com/Azure/azure-sdk-for-go,
the canonical path, and forks like gopkg.in/Azure/azure-sdk-for-go.
For large modules like the Azure SDK, these forks consume a lot of
processing bandwidth.

Although we already have a mechanism for this (see largefork.go), it
requires both contents and version numbers to match a known list of
modules and versions.  The list can get out of date, and it doesn't
include every possible version. For instance, the gopkg.in Azure fork
consists of many pseudo-versions that aren't in the proxy list.

This CL adds a list of module paths that are known to be forks.
They will be marked as alternative modules regardless of version
or contents.

Fixes golang/go#52329.

Change-Id: Ie374da67f0f42f50be551a1078b667686453b447
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/400357
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julieqiu@google.com>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 60158a7..6c8d34c 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -108,11 +108,16 @@
 		return err
 	}
 
-	// If there is no go.mod file in the zip, try another way to detect
-	// alternative modules: compare the zip signature to a list of known ones to
-	// see if this is a fork. The intent is to avoid processing certain known
-	// large modules, not to find every fork.
+	// If there is no go.mod file in the zip, try other ways to detect
+	// alternative modules:
+	// 1. Compare the module path to a list of known alternative module paths.
+	// 2. Compare the zip signature to a list of known ones to see if this is a
+	//    fork. The intent is to avoid processing certain known large modules, not
+	//    to find every fork.
 	if !fr.HasGoMod {
+		if modPath := knownAlternativeFor(fr.ModulePath); modPath != "" {
+			return fmt.Errorf("known alternative to %s: %w", modPath, derrors.AlternativeModule)
+		}
 		forkedModule, err := forkedFrom(contentDir, fr.ModulePath, fr.ResolvedVersion)
 		if err != nil {
 			return err
diff --git a/internal/fetch/fetch_test.go b/internal/fetch/fetch_test.go
index 49a7230..237d897 100644
--- a/internal/fetch/fetch_test.go
+++ b/internal/fetch/fetch_test.go
@@ -191,6 +191,13 @@
 			wantHasGoMod:  true,
 		},
 		{
+			name:          "known alternative",
+			mod:           moduleKnownAlternative,
+			wantErr:       derrors.AlternativeModule,
+			wantGoModPath: "github.com/msopentech/azure-sdk-for-go",
+			wantHasGoMod:  false,
+		},
+		{
 			name:          "empty module",
 			mod:           moduleEmpty,
 			wantErr:       derrors.BadModule,
diff --git a/internal/fetch/fetchdata_test.go b/internal/fetch/fetchdata_test.go
index 7ce0058..e74fc6d 100644
--- a/internal/fetch/fetchdata_test.go
+++ b/internal/fetch/fetchdata_test.go
@@ -825,6 +825,16 @@
 	},
 }
 
+// See known_alternatives.go
+var moduleKnownAlternative = &testModule{
+	mod: &proxytest.Module{
+		ModulePath: "github.com/msopentech/azure-sdk-for-go",
+	},
+	fr: &FetchResult{
+		Status: derrors.ToStatus(derrors.AlternativeModule),
+	},
+}
+
 var moduleStdMaster = &testModule{
 	mod: &proxytest.Module{
 		ModulePath: stdlib.ModulePath,
diff --git a/internal/fetch/gen_zip_signatures.go b/internal/fetch/gen_zip_signatures.go
index 91d026e..7222d73 100644
--- a/internal/fetch/gen_zip_signatures.go
+++ b/internal/fetch/gen_zip_signatures.go
@@ -45,7 +45,7 @@
 }{
 	{"github.com/aws/aws-sdk-go", "v1.14.30"},
 	{"github.com/kubernetes/kubernetes", "v1.15.0-alpha.0"},
-	{"github.com/Azure/azure-sdk-for-go", "v57.2.0"},
+	{"github.com/Azure/azure-sdk-for-go", "v63.2.0"},
 	{"github.com/ethereum/go-ethereum", "v1.9.7"},
 	{"github.com/moby/moby", "v20.10.8"},
 	{"github.com/influxdata/influxdb", "v1.7.9"},
diff --git a/internal/fetch/known_alternatives.go b/internal/fetch/known_alternatives.go
new file mode 100644
index 0000000..5207eeb
--- /dev/null
+++ b/internal/fetch/known_alternatives.go
@@ -0,0 +1,38 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package fetch
+
+import "golang.org/x/mod/module"
+
+// knownAlternatives lists module paths that are known to be forks of other
+// modules.
+// For example, github.com/msopentech/azure-sdk-for-go
+// is an alternative to github.com/Azure/azure-sdk-for-go.
+// Map keys are case-sensitive and should not include a final major version
+// like "/v3" or ".v3" for gopkg.in paths.
+//
+// When a module has a go.mod file, we can detect alternatives by comparing the
+// module path with the path in the go.mod file. This list is for modules
+// without go.mod files.
+var knownAlternatives = map[string]string{
+	"github.com/msopentech/azure-sdk-for-go":       "github.com/Azure/azure-sdk-for-go",
+	"github.com/MSOpenTech/azure-sdk-for-go":       "github.com/Azure/azure-sdk-for-go",
+	"gopkg.in/Azure/azure-sdk-for-go":              "github.com/Azure/azure-sdk-for-go",
+	"github.com/masslessparticle/azure-sdk-for-go": "github.com/Azure/azure-sdk-for-go",
+	"github.com/aliyun/alibaba-cloud-sdk-go":       "github.com/Azure/azure-sdk-for-go",
+	"github.com/johnstairs/azure-sdk-for-go	": "github.com/Azure/azure-sdk-for-go",
+	"github.com/shopify/sarama": "github.com/Shopify/sarama",
+}
+
+// knownAlternativeFor returns the module that the given module path is an alternative to,
+// or the empty string if there is no such module.
+//
+// It consults the knownAlternatives map, ignoring version suffixes.
+func knownAlternativeFor(modulePath string) string {
+	key, _, ok := module.SplitPathVersion(modulePath)
+	if !ok {
+		return ""
+	}
+	return knownAlternatives[key]
+}