gosrc: Archive noise packages instead of deletion.

gddo should let users visit any go-gettable package. This change will
remove those noise packages from the search index but not the db.

This fixes #420

Change-Id: Ifb1d731655ca5e6d31bfc0bd4c7657ec165b9902
Reviewed-on: https://go-review.googlesource.com/24793
Reviewed-by: Andrew Gerrand <adg@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/database/database.go b/database/database.go
index d0c827d..1a82e02 100644
--- a/database/database.go
+++ b/database/database.go
@@ -280,12 +280,19 @@
 			return err
 		}
 		ctx := bgCtx()
-		if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
-			log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
-		}
 
-		if old != nil {
-			if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
+		if score > 0 {
+			if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
+				log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
+			}
+
+			if old != nil {
+				if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
+					return err
+				}
+			}
+		} else {
+			if err := deleteIndex(ctx, id); err != nil {
 				return err
 			}
 		}
@@ -369,26 +376,24 @@
 	return nil
 }
 
-var setNextCrawlEtagScript = redis.NewScript(0, `
-    local root = ARGV[1]
-    local etag = ARGV[2]
-    local nextCrawl = ARGV[3]
+var setNextCrawlScript = redis.NewScript(0, `
+    local path = ARGV[1]
+    local nextCrawl = ARGV[2]
 
-    local pkgs = redis.call('SORT', 'index:project:' .. root, 'GET', '#',  'GET', 'pkg:*->etag')
-
-    for i=1,#pkgs,2 do
-        if pkgs[i+1] == etag then
-            redis.call('ZADD', 'nextCrawl', nextCrawl, pkgs[i])
-            redis.call('HSET', 'pkg:' .. pkgs[i], 'crawl', nextCrawl)
-        end
+    local id = redis.call('HGET', 'ids', path)
+    if not id then
+        return false
     end
+
+    redis.call('ZADD', 'nextCrawl', nextCrawl, id)
+    redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
 `)
 
-// SetNextCrawlEtag sets the next crawl time for all packages in the project with the given etag.
-func (db *Database) SetNextCrawlEtag(projectRoot string, etag string, t time.Time) error {
+// SetNextCrawl sets the next crawl time for a package.
+func (db *Database) SetNextCrawl(path string, t time.Time) error {
 	c := db.Pool.Get()
 	defer c.Close()
-	_, err := setNextCrawlEtagScript.Do(c, normalizeProjectRoot(projectRoot), etag, t.Unix())
+	_, err := setNextCrawlScript.Do(c, path, t.Unix())
 	return err
 }
 
diff --git a/database/index.go b/database/index.go
index 149a480..6fa30b6 100644
--- a/database/index.go
+++ b/database/index.go
@@ -160,7 +160,7 @@
 
 func documentScore(pdoc *doc.Package) float64 {
 	if pdoc.Name == "" ||
-		pdoc.DeadEndFork ||
+		pdoc.Status != gosrc.Active ||
 		len(pdoc.Errors) > 0 ||
 		strings.HasSuffix(pdoc.ImportPath, ".go") ||
 		strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") ||
diff --git a/doc/builder.go b/doc/builder.go
index d60646c..8a723d1 100644
--- a/doc/builder.go
+++ b/doc/builder.go
@@ -373,7 +373,7 @@
 }
 
 // PackageVersion is modified when previously stored packages are invalid.
-const PackageVersion = "7"
+const PackageVersion = "8"
 
 type Package struct {
 	// The import path for this package.
@@ -397,8 +397,8 @@
 	// Version control system: git, hg, bzr, ...
 	VCS string
 
-	// Version control: belongs to a dead end fork
-	DeadEndFork bool
+	// Version control: active or suppressed.
+	Status gosrc.DirectoryStatus
 
 	// Whether the package is a fork of another one.
 	Fork bool
@@ -503,7 +503,7 @@
 		BrowseURL:      dir.BrowseURL,
 		Etag:           PackageVersion + "-" + dir.Etag,
 		VCS:            dir.VCS,
-		DeadEndFork:    dir.DeadEndFork,
+		Status:         dir.Status,
 		Subdirectories: dir.Subdirectories,
 		Fork:           dir.Fork,
 		Stars:          dir.Stars,
diff --git a/gddo-server/assets/templates/common.html b/gddo-server/assets/templates/common.html
index a38dee5..bcbf071 100644
--- a/gddo-server/assets/templates/common.html
+++ b/gddo-server/assets/templates/common.html
@@ -105,7 +105,7 @@
   {{if not .Updated.IsZero}}Updated <span class="timeago" title="{{.Updated.Format "2006-01-02T15:04:05Z"}}">{{.Updated.Format "2006-01-02"}}</span>{{if or (equal .GOOS "windows") (equal .GOOS "darwin")}} with GOOS={{.GOOS}}{{end}}.{{end}}
   <a href="javascript:document.getElementsByName('x-refresh')[0].submit();" title="Refresh this page from the source.">Refresh now</a>.
   <a href="?tools">Tools</a> for package owners.
-  {{if .DeadEndFork}}This is a dead-end fork (no commits since the fork).{{end}}
+  {{.StatusDescription}}
 {{end}}
 {{with $.pdoc.Errors}}
     <p>The <a href="http://golang.org/cmd/go/#Download_and_install_packages_and_dependencies">go get</a>
diff --git a/gddo-server/background.go b/gddo-server/background.go
index 9b3c8c1..bf96593 100644
--- a/gddo-server/background.go
+++ b/gddo-server/background.go
@@ -86,8 +86,8 @@
 	}
 	if _, err = crawlDoc("crawl", pdoc.ImportPath, pdoc, len(pkgs) > 0, nextCrawl); err != nil {
 		// Touch package so that crawl advances to next package.
-		if err := db.SetNextCrawlEtag(pdoc.ProjectRoot, pdoc.Etag, time.Now().Add(*maxAge/3)); err != nil {
-			log.Printf("ERROR db.TouchLastCrawl(%q): %v", pdoc.ImportPath, err)
+		if err := db.SetNextCrawl(pdoc.ImportPath, time.Now().Add(*maxAge/3)); err != nil {
+			log.Printf("ERROR db.SetNextCrawl(%q): %v", pdoc.ImportPath, err)
 		}
 	}
 	return nil
diff --git a/gddo-server/crawl.go b/gddo-server/crawl.go
index 6fd960d..d110f18 100644
--- a/gddo-server/crawl.go
+++ b/gddo-server/crawl.go
@@ -7,6 +7,7 @@
 package main
 
 import (
+	"fmt"
 	"log"
 	"regexp"
 	"strings"
@@ -77,30 +78,28 @@
 
 	if err == nil {
 		message = append(message, "put:", pdoc.Etag)
-		if err := db.Put(pdoc, nextCrawl, false); err != nil {
-			log.Printf("ERROR db.Put(%q): %v", importPath, err)
+		if err := put(pdoc, nextCrawl); err != nil {
+			log.Println(err)
 		}
 		return pdoc, nil
 	} else if e, ok := err.(gosrc.NotModifiedError); ok {
-		if !pdoc.IsCmd && isInactivePkg(importPath, e.Since) {
-			message = append(message, "delete inactive")
-			if err := db.Delete(importPath); err != nil {
-				log.Printf("ERROR db.Delete(%q): %v", importPath, err)
+		if pdoc.Status == gosrc.Active && !isActivePkg(importPath, e.Status) {
+			if e.Status == gosrc.NoRecentCommits {
+				e.Status = gosrc.Inactive
 			}
-			return nil, e
-		}
-		// Touch the package without updating and move on to next one.
-		message = append(message, "touch")
-		if err := db.SetNextCrawlEtag(pdoc.ProjectRoot, pdoc.Etag, nextCrawl); err != nil {
-			log.Printf("ERROR db.SetNextCrawlEtag(%q): %v", importPath, err)
+			message = append(message, "archive", e)
+			pdoc.Status = e.Status
+			if err := db.Put(pdoc, nextCrawl, false); err != nil {
+				log.Printf("ERROR db.Put(%q): %v", importPath, err)
+			}
+		} else {
+			// Touch the package without updating and move on to next one.
+			message = append(message, "touch")
+			if err := db.SetNextCrawl(importPath, nextCrawl); err != nil {
+				log.Printf("ERROR db.SetNextCrawl(%q): %v", importPath, err)
+			}
 		}
 		return pdoc, nil
-	} else if err == gosrc.ErrQuickFork {
-		message = append(message, "delete", err)
-		if err := db.Delete(importPath); err != nil {
-			log.Printf("ERROR db.Delete(%q): %v", importPath, err)
-		}
-		return nil, err
 	} else if e, ok := err.(gosrc.NotFoundError); ok {
 		message = append(message, "notfound:", e)
 		if err := db.Delete(importPath); err != nil {
@@ -113,16 +112,30 @@
 	}
 }
 
-// isInactivePkg reports whether the specified package is not imported
-// and has not been modified in 2 years.
-func isInactivePkg(pkg string, lastCommitted time.Time) bool {
-	if lastCommitted.IsZero() ||
-		time.Now().Before(lastCommitted.Add(2*365*24*time.Hour)) {
-		return false
+func put(pdoc *doc.Package, nextCrawl time.Time) error {
+	if pdoc.Status == gosrc.NoRecentCommits &&
+		isActivePkg(pdoc.ImportPath, gosrc.NoRecentCommits) {
+		pdoc.Status = gosrc.Active
 	}
-	n, err := db.ImporterCount(pkg)
-	if err != nil {
-		log.Printf("ERROR db.ImporterCount(%q): %v", pkg, err)
+	if err := db.Put(pdoc, nextCrawl, false); err != nil {
+		return fmt.Errorf("ERROR db.Put(%q): %v", pdoc.ImportPath, err)
 	}
-	return n == 0
+	return nil
+}
+
+// isActivePkg reports whether a package is considered active,
+// either because its directory is active or because it is imported by another package.
+func isActivePkg(pkg string, status gosrc.DirectoryStatus) bool {
+	switch status {
+	case gosrc.Active:
+		return true
+	case gosrc.NoRecentCommits:
+		// It should be inactive only if it has no imports as well.
+		n, err := db.ImporterCount(pkg)
+		if err != nil {
+			log.Printf("ERROR db.ImporterCount(%q): %v", pkg, err)
+		}
+		return n > 0
+	}
+	return false
 }
diff --git a/gddo-server/template.go b/gddo-server/template.go
index 2da07a0..d9032ac 100644
--- a/gddo-server/template.go
+++ b/gddo-server/template.go
@@ -270,6 +270,19 @@
 	return htemp.HTML(buf.String())
 }
 
+func (pdoc *tdoc) StatusDescription() htemp.HTML {
+	desc := ""
+	switch pdoc.Package.Status {
+	case gosrc.DeadEndFork:
+		desc = "This is a dead-end fork (no commits since the fork)."
+	case gosrc.QuickFork:
+		desc = "This is a quick bug-fix fork (has fewer than three commits, and only during the week it was created)."
+	case gosrc.Inactive:
+		desc = "This is an inactive package (no imports and no commits in at least two years)."
+	}
+	return htemp.HTML(desc)
+}
+
 func formatPathFrag(path, fragment string) string {
 	if len(path) > 0 && path[0] != '/' {
 		path = "/" + path
diff --git a/gosrc/bitbucket.go b/gosrc/bitbucket.go
index 76cea46..375e1f9 100644
--- a/gosrc/bitbucket.go
+++ b/gosrc/bitbucket.go
@@ -120,6 +120,11 @@
 		return nil, err
 	}
 
+	status := Active
+	if isBitbucketDeadEndFork(repo) {
+		status = DeadEndFork
+	}
+
 	return &Directory{
 		BrowseURL:      expand("https://bitbucket.org/{owner}/{repo}/src/{tag}{dir}", match),
 		Etag:           etag,
@@ -130,7 +135,7 @@
 		ProjectURL:     expand("https://bitbucket.org/{owner}/{repo}/", match),
 		Subdirectories: contents.Directories,
 		VCS:            match["vcs"],
-		DeadEndFork:    isBitbucketDeadEndFork(repo),
+		Status:         status,
 		Fork:           repo.IsFork,
 		Stars:          repo.Followers,
 	}, nil
diff --git a/gosrc/github.go b/gosrc/github.go
index 49a00df..c1818ee 100644
--- a/gosrc/github.go
+++ b/gosrc/github.go
@@ -72,23 +72,34 @@
 		return nil, err
 	}
 
+	status := Active
 	var commits []*githubCommit
 	url := expand("https://api.github.com/repos/{owner}/{repo}/commits", match)
-	url += fmt.Sprintf("?since=%s", repo.CreatedAt.Format(time.RFC3339))
 	if match["dir"] != "" {
-		url += fmt.Sprintf("&path=%s", match["dir"])
+		url += fmt.Sprintf("?path=%s", match["dir"])
 	}
 	if _, err := c.getJSON(url, &commits); err != nil {
 		return nil, err
 	}
-	if repo.Fork && isQuickFork(commits, repo.CreatedAt) {
-		return nil, ErrQuickFork
-	}
 	if len(commits) == 0 {
 		return nil, NotFoundError{Message: "package directory changed or removed"}
 	}
+
+	lastCommitted := commits[0].Commit.Committer.Date
+	if lastCommitted.Add(ExpiresAfter).Before(time.Now()) {
+		status = NoRecentCommits
+	} else if repo.Fork {
+		if repo.PushedAt.Before(repo.CreatedAt) {
+			status = DeadEndFork
+		} else if isQuickFork(commits, repo.CreatedAt) {
+			status = QuickFork
+		}
+	}
 	if commits[0].ID == savedEtag {
-		return nil, NotModifiedError{Since: commits[0].Commit.Committer.Date}
+		return nil, NotModifiedError{
+			Since:  lastCommitted,
+			Status: status,
+		}
 	}
 
 	var contents []*struct {
@@ -150,8 +161,6 @@
 		browseURL = expand("https://github.com/{owner}/{repo}/tree{dir}", match)
 	}
 
-	isDeadEndFork := repo.Fork && repo.PushedAt.Before(repo.CreatedAt)
-
 	return &Directory{
 		BrowseURL:      browseURL,
 		Etag:           commits[0].ID,
@@ -162,7 +171,7 @@
 		ProjectURL:     expand("https://github.com/{owner}/{repo}", match),
 		Subdirectories: subdirs,
 		VCS:            "git",
-		DeadEndFork:    isDeadEndFork,
+		Status:         status,
 		Fork:           repo.Fork,
 		Stars:          repo.Stars,
 	}, nil
@@ -171,19 +180,18 @@
 // isQuickFork reports whether the repository is a "quick fork":
 // it has fewer than 3 commits, all within a week of the repo creation, createdAt.
 func isQuickFork(commits []*githubCommit, createdAt time.Time) bool {
-	if len(commits) > 2 {
-		return false
-	}
 	oneWeekOld := createdAt.Add(7 * 24 * time.Hour)
 	if oneWeekOld.After(time.Now()) {
 		return false // a newborn baby of a repository
 	}
+	n := 0
 	for _, commit := range commits {
 		if commit.Commit.Committer.Date.After(oneWeekOld) {
 			return false
 		}
+		n++
 	}
-	return true
+	return n < 3
 }
 
 func getGitHubPresentation(client *http.Client, match map[string]string) (*Presentation, error) {
diff --git a/gosrc/gosrc.go b/gosrc/gosrc.go
index df33ee1..966821f 100644
--- a/gosrc/gosrc.go
+++ b/gosrc/gosrc.go
@@ -19,6 +19,8 @@
 	"time"
 )
 
+const ExpiresAfter = 2 * 365 * 24 * time.Hour // Package with no commits and imports expires.
+
 // File represents a file.
 type File struct {
 	// File name with no directory.
@@ -31,6 +33,19 @@
 	BrowseURL string
 }
 
+type DirectoryStatus int
+
+const (
+	Active          DirectoryStatus = iota
+	DeadEndFork                     // Forks with no commits
+	QuickFork                       // Forks with less than 3 commits, all within a week from creation
+	NoRecentCommits                 // No commits for ExpiresAfter
+
+	// No commits for ExpiresAfter and no imports.
+	// This is a status derived from NoRecentCommits and the imports count information in the db.
+	Inactive
+)
+
 // Directory describes a directory on a version control service.
 type Directory struct {
 	// The import path for this package.
@@ -51,8 +66,8 @@
 	// Version control system: git, hg, bzr, ...
 	VCS string
 
-	// Version control: belongs to a dead end fork
-	DeadEndFork bool
+	// Version control: active or should be suppressed.
+	Status DirectoryStatus
 
 	// Cache validation tag. This tag is not necessarily an HTTP entity tag.
 	// The tag is "" if there is no meaningful cache validation for the VCS.
@@ -114,15 +129,21 @@
 }
 
 type NotModifiedError struct {
-	Since time.Time
+	Since  time.Time
+	Status DirectoryStatus
 }
 
 func (e NotModifiedError) Error() string {
-	return fmt.Sprintf("package not modified since %s", e.Since.Format(time.RFC1123))
+	msg := "package not modified"
+	if !e.Since.IsZero() {
+		msg += fmt.Sprintf(" since %s", e.Since.Format(time.RFC1123))
+	}
+	if e.Status == QuickFork {
+		msg += " (package is a quick fork)"
+	}
+	return msg
 }
 
-var ErrQuickFork = errors.New("package is a quick bug-fix fork")
-
 var errNoMatch = errors.New("no match")
 
 // service represents a source code control service.