internal/fetch: keep packages with too-large doc
Instead of returning an error if a package's documentation is too
large, replace the documentation with a short string that says it's
too big, and proceed.
Fixes b/158556376.
Change-Id: Ib9754da73eea4b60bcbe422e122b2188b6ae668a
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/766359
CI-Result: Cloud Build <devtools-proctor-result-processor@system.gserviceaccount.com>
Reviewed-by: Julie Qiu <julieqiu@google.com>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index e170994..38b248f 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -388,8 +388,7 @@
incompleteDirs[innerPath] = true
status = derrors.PackageInvalidContents
errMsg = err.Error()
- } else if lpe := (*LargePackageError)(nil); errors.As(err, &lpe) {
- incompleteDirs[innerPath] = true
+ } else if errors.Is(err, dochtml.ErrTooLarge) {
status = derrors.PackageDocumentationHTMLTooLarge
errMsg = err.Error()
} else if err != nil {
@@ -472,14 +471,6 @@
return false
}
-// LargePackageError represents an error where the rendered
-// documentation HTML for a package is excessively large.
-type LargePackageError struct {
- Err error // Not nil.
-}
-
-func (lpe *LargePackageError) Error() string { return lpe.Err.Error() }
-
// BadPackageError represents an error loading a package
// because its contents do not make up a valid package.
//
@@ -504,16 +495,19 @@
// several build contexts in turn. The first build context in the list to produce
// a non-empty package is used. If none of them result in a package, then
// loadPackage returns nil, nil.
+//
+// If the package is fine except that its documentation is too large, loadPackage
+// returns both a package and a non-nil error with dochtml.ErrTooLarge in its chain.
func loadPackage(ctx context.Context, zipGoFiles []*zip.File, innerPath, modulePath string, sourceInfo *source.Info) (*internal.LegacyPackage, error) {
ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
defer span.End()
for _, env := range goEnvs {
pkg, err := loadPackageWithBuildContext(ctx, env.GOOS, env.GOARCH, zipGoFiles, innerPath, modulePath, sourceInfo)
- if err != nil {
+ if err != nil && !errors.Is(err, dochtml.ErrTooLarge) {
return nil, err
}
if pkg != nil {
- return pkg, nil
+ return pkg, err
}
}
return nil, nil
@@ -522,6 +516,8 @@
// httpPost allows package fetch tests to stub out playground URL fetches.
var httpPost = http.Post
+const docTooLargeReplacement = `<p>Documentation is too large to display.</p>`
+
// loadPackageWithBuildContext loads a Go package made of .go files in zipGoFiles
// using a build context constructed from the given GOOS and GOARCH values.
// modulePath is stdlib.ModulePath for the Go standard library and the module
@@ -535,8 +531,6 @@
//
// It returns a nil LegacyPackage if the directory doesn't contain a Go package
// or all .go files have been excluded by constraints.
-// A *LargePackageError error is returned if the rendered
-// package documentation HTML exceeds a limit.
// A *BadPackageError error is returned if the directory
// contains .go files but do not make up a valid package.
func loadPackageWithBuildContext(ctx context.Context, goos, goarch string, zipGoFiles []*zip.File, innerPath, modulePath string, sourceInfo *source.Info) (_ *internal.LegacyPackage, err error) {
@@ -664,10 +658,10 @@
docHTML, err := dochtml.Render(fset, d, dochtml.RenderOptions{
SourceLinkFunc: sourceLinkFunc,
PlayURLFunc: playURLFunc,
- Limit: MaxDocumentationHTML,
+ Limit: int64(MaxDocumentationHTML),
})
if errors.Is(err, dochtml.ErrTooLarge) {
- return nil, &LargePackageError{Err: err}
+ docHTML = docTooLargeReplacement
} else if err != nil {
return nil, fmt.Errorf("dochtml.Render: %v", err)
}
@@ -685,7 +679,7 @@
DocumentationHTML: docHTML,
GOOS: goos,
GOARCH: goarch,
- }, nil
+ }, err
}
// matchingFiles returns a map from file names to their contents, read from zipGoFiles.
diff --git a/internal/fetch/fetch_test.go b/internal/fetch/fetch_test.go
index f2f83aa..0ad0c49 100644
--- a/internal/fetch/fetch_test.go
+++ b/internal/fetch/fetch_test.go
@@ -54,6 +54,9 @@
}
defer func() { httpPost = origPost }()
+ defer func(oldmax int) { MaxDocumentationHTML = oldmax }(MaxDocumentationHTML)
+ MaxDocumentationHTML = 1 * megabyte
+
for _, test := range []struct {
name string
mod *testModule
@@ -66,6 +69,7 @@
{name: "module with build constraints", mod: moduleBuildConstraints},
{name: "module with packages with bad import paths", mod: moduleBadImportPath},
{name: "module with documentation", mod: moduleDocTest},
+ {name: "documentation too large", mod: moduleDocTooLarge},
{name: "module with package-level example", mod: modulePackageExample},
{name: "module with function example", mod: moduleFuncExample},
{name: "module with type example", mod: moduleTypeExample},
@@ -90,7 +94,6 @@
if got.Error != nil {
t.Fatal(got.Error)
}
-
d := licenseDetector(ctx, t, modulePath, version, proxyClient)
fr := cleanFetchResult(test.mod.fr, d)
sortFetchResult(fr)
diff --git a/internal/fetch/fetchdata_test.go b/internal/fetch/fetchdata_test.go
index 7c03d32..ff412c0 100644
--- a/internal/fetch/fetchdata_test.go
+++ b/internal/fetch/fetchdata_test.go
@@ -6,6 +6,7 @@
import (
"net/http"
+ "strings"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
@@ -546,6 +547,51 @@
},
}
+var moduleDocTooLarge = &testModule{
+ mod: &proxy.TestModule{
+ ModulePath: "bigdoc.test",
+ Files: map[string]string{
+ "LICENSE": testhelper.BSD0License,
+ "doc.go": "// This documentation is big.\n" +
+ strings.Repeat("// Too big.\n", 200_000) +
+ "package bigdoc",
+ },
+ },
+ fr: &FetchResult{
+ Status: derrors.ToHTTPStatus(derrors.HasIncompletePackages),
+ GoModPath: "bigdoc.test",
+ Module: &internal.Module{
+ LegacyModuleInfo: internal.LegacyModuleInfo{
+ ModuleInfo: internal.ModuleInfo{
+ ModulePath: "bigdoc.test",
+ HasGoMod: false,
+ },
+ },
+ Directories: []*internal.DirectoryNew{
+ {
+ Path: "bigdoc.test",
+ V1Path: "bigdoc.test",
+ Package: &internal.PackageNew{
+ Name: "bigdoc",
+ Documentation: &internal.Documentation{
+ Synopsis: "This documentation is big.",
+ HTML: docTooLargeReplacement,
+ },
+ },
+ },
+ },
+ },
+ PackageVersionStates: []*internal.PackageVersionState{
+ {
+ PackagePath: "bigdoc.test",
+ ModulePath: "bigdoc.test",
+ Version: "v1.0.0",
+ Status: derrors.ToHTTPStatus(derrors.PackageDocumentationHTMLTooLarge),
+ },
+ },
+ },
+}
+
var moduleWasm = &testModule{
mod: &proxy.TestModule{
ModulePath: "github.com/my/module/js",
diff --git a/internal/fetch/limit.go b/internal/fetch/limit.go
index e6a9112..c8c4aaf 100644
--- a/internal/fetch/limit.go
+++ b/internal/fetch/limit.go
@@ -13,13 +13,14 @@
// The fetch process should fail if it encounters a file exceeding
// this limit.
MaxFileSize = 30 * megabyte
-
- // MaxDocumentationHTML is a limit on the rendered documentation
- // HTML size.
- //
- // The current limit of 10 MB was based on the largest packages that
- // gddo has encountered. See https://github.com/golang/gddo/issues/635.
- MaxDocumentationHTML = 10 * megabyte
)
+// MaxDocumentationHTML is a limit on the rendered documentation HTML size.
+//
+// The current limit of is based on the largest packages that
+// gddo has encountered. See https://github.com/golang/gddo/issues/635.
+//
+// It is a variable for testing.
+var MaxDocumentationHTML = 10 * megabyte
+
const megabyte = 1000 * 1000