internal/fetch: keep packages with too-large doc

Instead of returning an error if a package's documentation is too
large, replace the documentation with a short string that says it's
too big, and proceed.

Fixes b/158556376.

Change-Id: Ib9754da73eea4b60bcbe422e122b2188b6ae668a
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/766359
CI-Result: Cloud Build <devtools-proctor-result-processor@system.gserviceaccount.com>
Reviewed-by: Julie Qiu <julieqiu@google.com>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index e170994..38b248f 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -388,8 +388,7 @@
 			incompleteDirs[innerPath] = true
 			status = derrors.PackageInvalidContents
 			errMsg = err.Error()
-		} else if lpe := (*LargePackageError)(nil); errors.As(err, &lpe) {
-			incompleteDirs[innerPath] = true
+		} else if errors.Is(err, dochtml.ErrTooLarge) {
 			status = derrors.PackageDocumentationHTMLTooLarge
 			errMsg = err.Error()
 		} else if err != nil {
@@ -472,14 +471,6 @@
 	return false
 }
 
-// LargePackageError represents an error where the rendered
-// documentation HTML for a package is excessively large.
-type LargePackageError struct {
-	Err error // Not nil.
-}
-
-func (lpe *LargePackageError) Error() string { return lpe.Err.Error() }
-
 // BadPackageError represents an error loading a package
 // because its contents do not make up a valid package.
 //
@@ -504,16 +495,19 @@
 // several build contexts in turn. The first build context in the list to produce
 // a non-empty package is used. If none of them result in a package, then
 // loadPackage returns nil, nil.
+//
+// If the package is fine except that its documentation is too large, loadPackage
+// returns both a package and a non-nil error with dochtml.ErrTooLarge in its chain.
 func loadPackage(ctx context.Context, zipGoFiles []*zip.File, innerPath, modulePath string, sourceInfo *source.Info) (*internal.LegacyPackage, error) {
 	ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
 	defer span.End()
 	for _, env := range goEnvs {
 		pkg, err := loadPackageWithBuildContext(ctx, env.GOOS, env.GOARCH, zipGoFiles, innerPath, modulePath, sourceInfo)
-		if err != nil {
+		if err != nil && !errors.Is(err, dochtml.ErrTooLarge) {
 			return nil, err
 		}
 		if pkg != nil {
-			return pkg, nil
+			return pkg, err
 		}
 	}
 	return nil, nil
@@ -522,6 +516,8 @@
 // httpPost allows package fetch tests to stub out playground URL fetches.
 var httpPost = http.Post
 
+const docTooLargeReplacement = `<p>Documentation is too large to display.</p>`
+
 // loadPackageWithBuildContext loads a Go package made of .go files in zipGoFiles
 // using a build context constructed from the given GOOS and GOARCH values.
 // modulePath is stdlib.ModulePath for the Go standard library and the module
@@ -535,8 +531,6 @@
 //
 // It returns a nil LegacyPackage if the directory doesn't contain a Go package
 // or all .go files have been excluded by constraints.
-// A *LargePackageError error is returned if the rendered
-// package documentation HTML exceeds a limit.
 // A *BadPackageError error is returned if the directory
 // contains .go files but do not make up a valid package.
 func loadPackageWithBuildContext(ctx context.Context, goos, goarch string, zipGoFiles []*zip.File, innerPath, modulePath string, sourceInfo *source.Info) (_ *internal.LegacyPackage, err error) {
@@ -664,10 +658,10 @@
 	docHTML, err := dochtml.Render(fset, d, dochtml.RenderOptions{
 		SourceLinkFunc: sourceLinkFunc,
 		PlayURLFunc:    playURLFunc,
-		Limit:          MaxDocumentationHTML,
+		Limit:          int64(MaxDocumentationHTML),
 	})
 	if errors.Is(err, dochtml.ErrTooLarge) {
-		return nil, &LargePackageError{Err: err}
+		docHTML = docTooLargeReplacement
 	} else if err != nil {
 		return nil, fmt.Errorf("dochtml.Render: %v", err)
 	}
@@ -685,7 +679,7 @@
 		DocumentationHTML: docHTML,
 		GOOS:              goos,
 		GOARCH:            goarch,
-	}, nil
+	}, err
 }
 
 // matchingFiles returns a map from file names to their contents, read from zipGoFiles.
diff --git a/internal/fetch/fetch_test.go b/internal/fetch/fetch_test.go
index f2f83aa..0ad0c49 100644
--- a/internal/fetch/fetch_test.go
+++ b/internal/fetch/fetch_test.go
@@ -54,6 +54,9 @@
 	}
 	defer func() { httpPost = origPost }()
 
+	defer func(oldmax int) { MaxDocumentationHTML = oldmax }(MaxDocumentationHTML)
+	MaxDocumentationHTML = 1 * megabyte
+
 	for _, test := range []struct {
 		name string
 		mod  *testModule
@@ -66,6 +69,7 @@
 		{name: "module with build constraints", mod: moduleBuildConstraints},
 		{name: "module with packages with bad import paths", mod: moduleBadImportPath},
 		{name: "module with documentation", mod: moduleDocTest},
+		{name: "documentation too large", mod: moduleDocTooLarge},
 		{name: "module with package-level example", mod: modulePackageExample},
 		{name: "module with function example", mod: moduleFuncExample},
 		{name: "module with type example", mod: moduleTypeExample},
@@ -90,7 +94,6 @@
 			if got.Error != nil {
 				t.Fatal(got.Error)
 			}
-
 			d := licenseDetector(ctx, t, modulePath, version, proxyClient)
 			fr := cleanFetchResult(test.mod.fr, d)
 			sortFetchResult(fr)
diff --git a/internal/fetch/fetchdata_test.go b/internal/fetch/fetchdata_test.go
index 7c03d32..ff412c0 100644
--- a/internal/fetch/fetchdata_test.go
+++ b/internal/fetch/fetchdata_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"net/http"
+	"strings"
 
 	"golang.org/x/pkgsite/internal"
 	"golang.org/x/pkgsite/internal/derrors"
@@ -546,6 +547,51 @@
 	},
 }
 
+var moduleDocTooLarge = &testModule{
+	mod: &proxy.TestModule{
+		ModulePath: "bigdoc.test",
+		Files: map[string]string{
+			"LICENSE": testhelper.BSD0License,
+			"doc.go": "// This documentation is big.\n" +
+				strings.Repeat("// Too big.\n", 200_000) +
+				"package bigdoc",
+		},
+	},
+	fr: &FetchResult{
+		Status:    derrors.ToHTTPStatus(derrors.HasIncompletePackages),
+		GoModPath: "bigdoc.test",
+		Module: &internal.Module{
+			LegacyModuleInfo: internal.LegacyModuleInfo{
+				ModuleInfo: internal.ModuleInfo{
+					ModulePath: "bigdoc.test",
+					HasGoMod:   false,
+				},
+			},
+			Directories: []*internal.DirectoryNew{
+				{
+					Path:   "bigdoc.test",
+					V1Path: "bigdoc.test",
+					Package: &internal.PackageNew{
+						Name: "bigdoc",
+						Documentation: &internal.Documentation{
+							Synopsis: "This documentation is big.",
+							HTML:     docTooLargeReplacement,
+						},
+					},
+				},
+			},
+		},
+		PackageVersionStates: []*internal.PackageVersionState{
+			{
+				PackagePath: "bigdoc.test",
+				ModulePath:  "bigdoc.test",
+				Version:     "v1.0.0",
+				Status:      derrors.ToHTTPStatus(derrors.PackageDocumentationHTMLTooLarge),
+			},
+		},
+	},
+}
+
 var moduleWasm = &testModule{
 	mod: &proxy.TestModule{
 		ModulePath: "github.com/my/module/js",
diff --git a/internal/fetch/limit.go b/internal/fetch/limit.go
index e6a9112..c8c4aaf 100644
--- a/internal/fetch/limit.go
+++ b/internal/fetch/limit.go
@@ -13,13 +13,14 @@
 	// The fetch process should fail if it encounters a file exceeding
 	// this limit.
 	MaxFileSize = 30 * megabyte
-
-	// MaxDocumentationHTML is a limit on the rendered documentation
-	// HTML size.
-	//
-	// The current limit of 10 MB was based on the largest packages that
-	// gddo has encountered. See https://github.com/golang/gddo/issues/635.
-	MaxDocumentationHTML = 10 * megabyte
 )
 
+// MaxDocumentationHTML is a limit on the rendered documentation HTML size.
+//
+// The current limit of is based on the largest packages that
+// gddo has encountered. See https://github.com/golang/gddo/issues/635.
+//
+// It is a variable for testing.
+var MaxDocumentationHTML = 10 * megabyte
+
 const megabyte = 1000 * 1000