gosrc: Archive noise packages instead of deletion.
gddo should let users visit any go-gettable package. This change will
remove those noise packages from the search index but not the db.
This fixes #420
Change-Id: Ifb1d731655ca5e6d31bfc0bd4c7657ec165b9902
Reviewed-on: https://go-review.googlesource.com/24793
Reviewed-by: Andrew Gerrand <adg@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/database/database.go b/database/database.go
index d0c827d..1a82e02 100644
--- a/database/database.go
+++ b/database/database.go
@@ -280,12 +280,19 @@
return err
}
ctx := bgCtx()
- if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
- log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
- }
- if old != nil {
- if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
+ if score > 0 {
+ if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
+ log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
+ }
+
+ if old != nil {
+ if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
+ return err
+ }
+ }
+ } else {
+ if err := deleteIndex(ctx, id); err != nil {
return err
}
}
@@ -369,26 +376,24 @@
return nil
}
-var setNextCrawlEtagScript = redis.NewScript(0, `
- local root = ARGV[1]
- local etag = ARGV[2]
- local nextCrawl = ARGV[3]
+var setNextCrawlScript = redis.NewScript(0, `
+ local path = ARGV[1]
+ local nextCrawl = ARGV[2]
- local pkgs = redis.call('SORT', 'index:project:' .. root, 'GET', '#', 'GET', 'pkg:*->etag')
-
- for i=1,#pkgs,2 do
- if pkgs[i+1] == etag then
- redis.call('ZADD', 'nextCrawl', nextCrawl, pkgs[i])
- redis.call('HSET', 'pkg:' .. pkgs[i], 'crawl', nextCrawl)
- end
+ local id = redis.call('HGET', 'ids', path)
+ if not id then
+ return false
end
+
+ redis.call('ZADD', 'nextCrawl', nextCrawl, id)
+ redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
`)
-// SetNextCrawlEtag sets the next crawl time for all packages in the project with the given etag.
-func (db *Database) SetNextCrawlEtag(projectRoot string, etag string, t time.Time) error {
+// SetNextCrawl sets the next crawl time for a package.
+func (db *Database) SetNextCrawl(path string, t time.Time) error {
c := db.Pool.Get()
defer c.Close()
- _, err := setNextCrawlEtagScript.Do(c, normalizeProjectRoot(projectRoot), etag, t.Unix())
+ _, err := setNextCrawlScript.Do(c, path, t.Unix())
return err
}
diff --git a/database/index.go b/database/index.go
index 149a480..6fa30b6 100644
--- a/database/index.go
+++ b/database/index.go
@@ -160,7 +160,7 @@
func documentScore(pdoc *doc.Package) float64 {
if pdoc.Name == "" ||
- pdoc.DeadEndFork ||
+ pdoc.Status != gosrc.Active ||
len(pdoc.Errors) > 0 ||
strings.HasSuffix(pdoc.ImportPath, ".go") ||
strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") ||
diff --git a/doc/builder.go b/doc/builder.go
index d60646c..8a723d1 100644
--- a/doc/builder.go
+++ b/doc/builder.go
@@ -373,7 +373,7 @@
}
// PackageVersion is modified when previously stored packages are invalid.
-const PackageVersion = "7"
+const PackageVersion = "8"
type Package struct {
// The import path for this package.
@@ -397,8 +397,8 @@
// Version control system: git, hg, bzr, ...
VCS string
- // Version control: belongs to a dead end fork
- DeadEndFork bool
+ // Version control: active or suppressed.
+ Status gosrc.DirectoryStatus
// Whether the package is a fork of another one.
Fork bool
@@ -503,7 +503,7 @@
BrowseURL: dir.BrowseURL,
Etag: PackageVersion + "-" + dir.Etag,
VCS: dir.VCS,
- DeadEndFork: dir.DeadEndFork,
+ Status: dir.Status,
Subdirectories: dir.Subdirectories,
Fork: dir.Fork,
Stars: dir.Stars,
diff --git a/gddo-server/assets/templates/common.html b/gddo-server/assets/templates/common.html
index a38dee5..bcbf071 100644
--- a/gddo-server/assets/templates/common.html
+++ b/gddo-server/assets/templates/common.html
@@ -105,7 +105,7 @@
{{if not .Updated.IsZero}}Updated <span class="timeago" title="{{.Updated.Format "2006-01-02T15:04:05Z"}}">{{.Updated.Format "2006-01-02"}}</span>{{if or (equal .GOOS "windows") (equal .GOOS "darwin")}} with GOOS={{.GOOS}}{{end}}.{{end}}
<a href="javascript:document.getElementsByName('x-refresh')[0].submit();" title="Refresh this page from the source.">Refresh now</a>.
<a href="?tools">Tools</a> for package owners.
- {{if .DeadEndFork}}This is a dead-end fork (no commits since the fork).{{end}}
+ {{.StatusDescription}}
{{end}}
{{with $.pdoc.Errors}}
<p>The <a href="http://golang.org/cmd/go/#Download_and_install_packages_and_dependencies">go get</a>
diff --git a/gddo-server/background.go b/gddo-server/background.go
index 9b3c8c1..bf96593 100644
--- a/gddo-server/background.go
+++ b/gddo-server/background.go
@@ -86,8 +86,8 @@
}
if _, err = crawlDoc("crawl", pdoc.ImportPath, pdoc, len(pkgs) > 0, nextCrawl); err != nil {
// Touch package so that crawl advances to next package.
- if err := db.SetNextCrawlEtag(pdoc.ProjectRoot, pdoc.Etag, time.Now().Add(*maxAge/3)); err != nil {
- log.Printf("ERROR db.TouchLastCrawl(%q): %v", pdoc.ImportPath, err)
+ if err := db.SetNextCrawl(pdoc.ImportPath, time.Now().Add(*maxAge/3)); err != nil {
+ log.Printf("ERROR db.SetNextCrawl(%q): %v", pdoc.ImportPath, err)
}
}
return nil
diff --git a/gddo-server/crawl.go b/gddo-server/crawl.go
index 6fd960d..d110f18 100644
--- a/gddo-server/crawl.go
+++ b/gddo-server/crawl.go
@@ -7,6 +7,7 @@
package main
import (
+ "fmt"
"log"
"regexp"
"strings"
@@ -77,30 +78,28 @@
if err == nil {
message = append(message, "put:", pdoc.Etag)
- if err := db.Put(pdoc, nextCrawl, false); err != nil {
- log.Printf("ERROR db.Put(%q): %v", importPath, err)
+ if err := put(pdoc, nextCrawl); err != nil {
+ log.Println(err)
}
return pdoc, nil
} else if e, ok := err.(gosrc.NotModifiedError); ok {
- if !pdoc.IsCmd && isInactivePkg(importPath, e.Since) {
- message = append(message, "delete inactive")
- if err := db.Delete(importPath); err != nil {
- log.Printf("ERROR db.Delete(%q): %v", importPath, err)
+ if pdoc.Status == gosrc.Active && !isActivePkg(importPath, e.Status) {
+ if e.Status == gosrc.NoRecentCommits {
+ e.Status = gosrc.Inactive
}
- return nil, e
- }
- // Touch the package without updating and move on to next one.
- message = append(message, "touch")
- if err := db.SetNextCrawlEtag(pdoc.ProjectRoot, pdoc.Etag, nextCrawl); err != nil {
- log.Printf("ERROR db.SetNextCrawlEtag(%q): %v", importPath, err)
+ message = append(message, "archive", e)
+ pdoc.Status = e.Status
+ if err := db.Put(pdoc, nextCrawl, false); err != nil {
+ log.Printf("ERROR db.Put(%q): %v", importPath, err)
+ }
+ } else {
+ // Touch the package without updating and move on to next one.
+ message = append(message, "touch")
+ if err := db.SetNextCrawl(importPath, nextCrawl); err != nil {
+ log.Printf("ERROR db.SetNextCrawl(%q): %v", importPath, err)
+ }
}
return pdoc, nil
- } else if err == gosrc.ErrQuickFork {
- message = append(message, "delete", err)
- if err := db.Delete(importPath); err != nil {
- log.Printf("ERROR db.Delete(%q): %v", importPath, err)
- }
- return nil, err
} else if e, ok := err.(gosrc.NotFoundError); ok {
message = append(message, "notfound:", e)
if err := db.Delete(importPath); err != nil {
@@ -113,16 +112,30 @@
}
}
-// isInactivePkg reports whether the specified package is not imported
-// and has not been modified in 2 years.
-func isInactivePkg(pkg string, lastCommitted time.Time) bool {
- if lastCommitted.IsZero() ||
- time.Now().Before(lastCommitted.Add(2*365*24*time.Hour)) {
- return false
+func put(pdoc *doc.Package, nextCrawl time.Time) error {
+ if pdoc.Status == gosrc.NoRecentCommits &&
+ isActivePkg(pdoc.ImportPath, gosrc.NoRecentCommits) {
+ pdoc.Status = gosrc.Active
}
- n, err := db.ImporterCount(pkg)
- if err != nil {
- log.Printf("ERROR db.ImporterCount(%q): %v", pkg, err)
+ if err := db.Put(pdoc, nextCrawl, false); err != nil {
+ return fmt.Errorf("ERROR db.Put(%q): %v", pdoc.ImportPath, err)
}
- return n == 0
+ return nil
+}
+
+// isActivePkg reports whether a package is considered active,
+// either because its directory is active or because it is imported by another package.
+func isActivePkg(pkg string, status gosrc.DirectoryStatus) bool {
+ switch status {
+ case gosrc.Active:
+ return true
+ case gosrc.NoRecentCommits:
+ // It should be inactive only if it has no imports as well.
+ n, err := db.ImporterCount(pkg)
+ if err != nil {
+ log.Printf("ERROR db.ImporterCount(%q): %v", pkg, err)
+ }
+ return n > 0
+ }
+ return false
}
diff --git a/gddo-server/template.go b/gddo-server/template.go
index 2da07a0..d9032ac 100644
--- a/gddo-server/template.go
+++ b/gddo-server/template.go
@@ -270,6 +270,19 @@
return htemp.HTML(buf.String())
}
+func (pdoc *tdoc) StatusDescription() htemp.HTML {
+ desc := ""
+ switch pdoc.Package.Status {
+ case gosrc.DeadEndFork:
+ desc = "This is a dead-end fork (no commits since the fork)."
+ case gosrc.QuickFork:
+ desc = "This is a quick bug-fix fork (has fewer than three commits, and only during the week it was created)."
+ case gosrc.Inactive:
+ desc = "This is an inactive package (no imports and no commits in at least two years)."
+ }
+ return htemp.HTML(desc)
+}
+
func formatPathFrag(path, fragment string) string {
if len(path) > 0 && path[0] != '/' {
path = "/" + path
diff --git a/gosrc/bitbucket.go b/gosrc/bitbucket.go
index 76cea46..375e1f9 100644
--- a/gosrc/bitbucket.go
+++ b/gosrc/bitbucket.go
@@ -120,6 +120,11 @@
return nil, err
}
+ status := Active
+ if isBitbucketDeadEndFork(repo) {
+ status = DeadEndFork
+ }
+
return &Directory{
BrowseURL: expand("https://bitbucket.org/{owner}/{repo}/src/{tag}{dir}", match),
Etag: etag,
@@ -130,7 +135,7 @@
ProjectURL: expand("https://bitbucket.org/{owner}/{repo}/", match),
Subdirectories: contents.Directories,
VCS: match["vcs"],
- DeadEndFork: isBitbucketDeadEndFork(repo),
+ Status: status,
Fork: repo.IsFork,
Stars: repo.Followers,
}, nil
diff --git a/gosrc/github.go b/gosrc/github.go
index 49a00df..c1818ee 100644
--- a/gosrc/github.go
+++ b/gosrc/github.go
@@ -72,23 +72,34 @@
return nil, err
}
+ status := Active
var commits []*githubCommit
url := expand("https://api.github.com/repos/{owner}/{repo}/commits", match)
- url += fmt.Sprintf("?since=%s", repo.CreatedAt.Format(time.RFC3339))
if match["dir"] != "" {
- url += fmt.Sprintf("&path=%s", match["dir"])
+ url += fmt.Sprintf("?path=%s", match["dir"])
}
if _, err := c.getJSON(url, &commits); err != nil {
return nil, err
}
- if repo.Fork && isQuickFork(commits, repo.CreatedAt) {
- return nil, ErrQuickFork
- }
if len(commits) == 0 {
return nil, NotFoundError{Message: "package directory changed or removed"}
}
+
+ lastCommitted := commits[0].Commit.Committer.Date
+ if lastCommitted.Add(ExpiresAfter).Before(time.Now()) {
+ status = NoRecentCommits
+ } else if repo.Fork {
+ if repo.PushedAt.Before(repo.CreatedAt) {
+ status = DeadEndFork
+ } else if isQuickFork(commits, repo.CreatedAt) {
+ status = QuickFork
+ }
+ }
if commits[0].ID == savedEtag {
- return nil, NotModifiedError{Since: commits[0].Commit.Committer.Date}
+ return nil, NotModifiedError{
+ Since: lastCommitted,
+ Status: status,
+ }
}
var contents []*struct {
@@ -150,8 +161,6 @@
browseURL = expand("https://github.com/{owner}/{repo}/tree{dir}", match)
}
- isDeadEndFork := repo.Fork && repo.PushedAt.Before(repo.CreatedAt)
-
return &Directory{
BrowseURL: browseURL,
Etag: commits[0].ID,
@@ -162,7 +171,7 @@
ProjectURL: expand("https://github.com/{owner}/{repo}", match),
Subdirectories: subdirs,
VCS: "git",
- DeadEndFork: isDeadEndFork,
+ Status: status,
Fork: repo.Fork,
Stars: repo.Stars,
}, nil
@@ -171,19 +180,18 @@
// isQuickFork reports whether the repository is a "quick fork":
// it has fewer than 3 commits, all within a week of the repo creation, createdAt.
func isQuickFork(commits []*githubCommit, createdAt time.Time) bool {
- if len(commits) > 2 {
- return false
- }
oneWeekOld := createdAt.Add(7 * 24 * time.Hour)
if oneWeekOld.After(time.Now()) {
return false // a newborn baby of a repository
}
+ n := 0
for _, commit := range commits {
if commit.Commit.Committer.Date.After(oneWeekOld) {
return false
}
+ n++
}
- return true
+ return n < 3
}
func getGitHubPresentation(client *http.Client, match map[string]string) (*Presentation, error) {
diff --git a/gosrc/gosrc.go b/gosrc/gosrc.go
index df33ee1..966821f 100644
--- a/gosrc/gosrc.go
+++ b/gosrc/gosrc.go
@@ -19,6 +19,8 @@
"time"
)
+const ExpiresAfter = 2 * 365 * 24 * time.Hour // Package with no commits and imports expires.
+
// File represents a file.
type File struct {
// File name with no directory.
@@ -31,6 +33,19 @@
BrowseURL string
}
+type DirectoryStatus int
+
+const (
+ Active DirectoryStatus = iota
+ DeadEndFork // Forks with no commits
+ QuickFork // Forks with less than 3 commits, all within a week from creation
+ NoRecentCommits // No commits for ExpiresAfter
+
+ // No commits for ExpiresAfter and no imports.
+ // This is a status derived from NoRecentCommits and the imports count information in the db.
+ Inactive
+)
+
// Directory describes a directory on a version control service.
type Directory struct {
// The import path for this package.
@@ -51,8 +66,8 @@
// Version control system: git, hg, bzr, ...
VCS string
- // Version control: belongs to a dead end fork
- DeadEndFork bool
+ // Version control: active or should be suppressed.
+ Status DirectoryStatus
// Cache validation tag. This tag is not necessarily an HTTP entity tag.
// The tag is "" if there is no meaningful cache validation for the VCS.
@@ -114,15 +129,21 @@
}
type NotModifiedError struct {
- Since time.Time
+ Since time.Time
+ Status DirectoryStatus
}
func (e NotModifiedError) Error() string {
- return fmt.Sprintf("package not modified since %s", e.Since.Format(time.RFC1123))
+ msg := "package not modified"
+ if !e.Since.IsZero() {
+ msg += fmt.Sprintf(" since %s", e.Since.Format(time.RFC1123))
+ }
+ if e.Status == QuickFork {
+ msg += " (package is a quick fork)"
+ }
+ return msg
}
-var ErrQuickFork = errors.New("package is a quick bug-fix fork")
-
var errNoMatch = errors.New("no match")
// service represents a source code control service.