Merge pull request #231 from garyburd/master
Improve handling of vendored packages, fix issues
diff --git a/database/index.go b/database/index.go
index 91511fb..3986cdf 100644
--- a/database/index.go
+++ b/database/index.go
@@ -103,23 +103,22 @@
return result
}
-func isExcludedPath(path string) bool {
- if strings.HasSuffix(path, ".go") ||
- strings.HasPrefix(path, "gist.github.com/") ||
- strings.Contains(path, "/internal/") ||
- strings.HasSuffix(path, "/internal") ||
- strings.Contains(path, "/third_party/") {
- return true
- }
- return false
-}
+// vendorPat matches the path of a vendored package.
+var vendorPat = regexp.MustCompile(
+ // match directories used by tools to vendor packages.
+ `/(?:_?third_party|vendors|Godeps/_workspace/src)/` +
+ // match a domain name.
+ `[^./]+\.[^/]+`)
func documentScore(pdoc *doc.Package) float64 {
if pdoc.Name == "" ||
- pdoc.IsCmd ||
pdoc.DeadEndFork ||
len(pdoc.Errors) > 0 ||
- isExcludedPath(pdoc.ImportPath) {
+ strings.HasSuffix(pdoc.ImportPath, ".go") ||
+ strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") ||
+ strings.HasSuffix(pdoc.ImportPath, "/internal") ||
+ strings.Contains(pdoc.ImportPath, "/internal/") ||
+ vendorPat.MatchString(pdoc.ImportPath) {
return 0
}
@@ -129,31 +128,48 @@
}
}
- if !pdoc.IsCmd && !pdoc.Truncated &&
- len(pdoc.Consts) == 0 &&
- len(pdoc.Vars) == 0 &&
- len(pdoc.Funcs) == 0 &&
- len(pdoc.Types) == 0 &&
- len(pdoc.Examples) == 0 {
- return 0
- }
-
r := 1.0
- if pdoc.IsCmd && !importsGoPackages(pdoc) {
- // Penalize commands that don't use the "go/*" packages.
- r *= 0.9
- }
- if pdoc.Doc == "" || pdoc.Synopsis == "" {
- r *= 0.95
- }
- if path.Base(pdoc.ImportPath) != pdoc.Name {
- r *= 0.9
- }
- for i := 0; i < strings.Count(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/"); i++ {
- r *= 0.99
- }
- if strings.Index(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/src/") > 0 {
- r *= 0.95
+ if pdoc.IsCmd {
+ if pdoc.Doc == "" {
+ // Do not include command in index if it does not have documentation.
+ return 0
+ }
+ if !importsGoPackages(pdoc) {
+ // Penalize commands that don't use the "go/*" packages.
+ r *= 0.9
+ }
+ } else {
+ if !pdoc.Truncated &&
+ len(pdoc.Consts) == 0 &&
+ len(pdoc.Vars) == 0 &&
+ len(pdoc.Funcs) == 0 &&
+ len(pdoc.Types) == 0 &&
+ len(pdoc.Examples) == 0 {
+ // Do not include package in index if it does not have exports.
+ return 0
+ }
+ if pdoc.Doc == "" {
+ // Penalty for no documentation.
+ r *= 0.95
+ }
+ if path.Base(pdoc.ImportPath) != pdoc.Name {
+ // Penalty for last element of path != package name.
+ r *= 0.9
+ }
+ for i := 0; i < strings.Count(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/"); i++ {
+ // Penalty for deeply nested packages.
+ r *= 0.99
+ }
+ if strings.Index(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/src/") > 0 {
+ r *= 0.95
+ }
+ for _, p := range pdoc.Imports {
+ if vendorPat.MatchString(p) {
+ // Penalize packages that import vendored packages.
+ r *= 0.1
+ break
+ }
+ }
}
return r
}
diff --git a/database/index_test.go b/database/index_test.go
index 7b13dbf..ea40f69 100644
--- a/database/index_test.go
+++ b/database/index_test.go
@@ -100,21 +100,30 @@
}
}
-func TestExcludedPath(t *testing.T) {
- tests := []struct {
- path string
- expected bool
- }{
- {"code.google.com/p/go.text/internal/ucd", true},
- {"code.google.com/p/go.text/internal", true},
- {"camlistore.org/third_party/bazil.org/fuse", true},
- {"bazil.org/fuse", false},
- }
+var vendorPatTests = []struct {
+ path string
+ match bool
+}{
+ {"camlistore.org/third_party/github.com/user/repo", true},
+ {"camlistore.org/third_party/dir", false},
+ {"camlistore.org/third_party", false},
+ {"camlistore.org/xthird_party/github.com/user/repo", false},
+ {"camlistore.org/third_partyx/github.com/user/repo", false},
- for _, tt := range tests {
- actual := isExcludedPath(tt.path)
- if actual != tt.expected {
- t.Errorf("isExcludedPath=%t, want %t for %s", actual, tt.expected, tt.path)
+ {"example.org/_third_party/github.com/user/repo/dir", true},
+ {"example.org/_third_party/dir", false},
+
+ {"github.com/user/repo/Godeps/_workspace/src/github.com/user/repo", true},
+ {"github.com/user/repo/Godeps/_workspace/src/dir", false},
+
+ {"github.com/user/repo", false},
+}
+
+func TestVendorPat(t *testing.T) {
+ for _, tt := range vendorPatTests {
+ match := vendorPat.MatchString(tt.path)
+ if match != tt.match {
+ t.Errorf("match(%q) = %v, want %v", tt.path, match, match)
}
}
}
diff --git a/gddo-server/crawl.go b/gddo-server/crawl.go
index 1a8dd50..5df4a57 100644
--- a/gddo-server/crawl.go
+++ b/gddo-server/crawl.go
@@ -8,7 +8,6 @@
import (
"log"
- "regexp"
"strings"
"time"
@@ -16,16 +15,6 @@
"github.com/golang/gddo/gosrc"
)
-var nestedProjectPat = regexp.MustCompile(`/(?:github\.com|launchpad\.net|code\.google\.com/p|bitbucket\.org|labix\.org)/`)
-
-func exists(path string) bool {
- b, err := db.Exists(path)
- if err != nil {
- b = false
- }
- return b
-}
-
// crawlDoc fetches the package documentation from the VCS and updates the database.
func crawlDoc(source string, importPath string, pdoc *doc.Package, hasSubdirs bool, nextCrawl time.Time) (*doc.Package, error) {
message := []interface{}{source}
@@ -61,9 +50,6 @@
// Old import path for Go sub-repository.
pdoc = nil
err = gosrc.NotFoundError{Message: "old Go sub-repo", Redirect: "golang.org/x/" + importPath[len("code.google.com/p/go."):]}
- } else if m := nestedProjectPat.FindStringIndex(importPath); m != nil && exists(importPath[m[0]+1:]) {
- pdoc = nil
- err = gosrc.NotFoundError{Message: "copy of other project."}
} else if blocked, e := db.IsBlocked(importPath); blocked && e == nil {
pdoc = nil
err = gosrc.NotFoundError{Message: "blocked."}
diff --git a/gosrc/path.go b/gosrc/path.go
index c162df6..e10529a 100644
--- a/gosrc/path.go
+++ b/gosrc/path.go
@@ -15,7 +15,7 @@
)
var validHost = regexp.MustCompile(`^[-a-z0-9]+(?:\.[-a-z0-9]+)+$`)
-var validPathElement = regexp.MustCompile(`^[-A-Za-z0-9~+][-A-Za-z0-9_.]*$`)
+var validPathElement = regexp.MustCompile(`^[-A-Za-z0-9~+_][-A-Za-z0-9_.]*$`)
func isValidPathElement(s string) bool {
return validPathElement.MatchString(s) && s != "testdata"
diff --git a/gosrc/path_test.go b/gosrc/path_test.go
index 024c186..8cad7a7 100644
--- a/gosrc/path_test.go
+++ b/gosrc/path_test.go
@@ -19,6 +19,7 @@
"example.com/foo.git",
"launchpad.net/~user/foo/trunk",
"launchpad.net/~user/+junk/version",
+ "github.com/user/repo/_ok/x",
}
var badImportPaths = []string{
@@ -28,7 +29,6 @@
"favicon.ico",
"exmpple.com",
"github.com/user/repo/testdata/x",
- "github.com/user/repo/_ignore/x",
"github.com/user/repo/.ignore/x",
}