blob: f72a23e4691aa778f1f0513a782174ac03bd173c [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd.
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"strings"
"time"
"cloud.google.com/go/pubsub"
"github.com/golang/gddo/doc"
"github.com/golang/gddo/gosrc"
)
var (
testdataPat = regexp.MustCompile(`/testdata(?:/|$)`)
)
// crawlNote is a message sent to Pub/Sub when a crawl occurs.
// It is encoded as JSON, so changes should match its
// compatibility requirements.
type crawlNote struct {
ImportPath string
}
func (s *server) publishCrawl(ctx context.Context, importPath string) {
if s.crawlTopic == nil {
return
}
note := &crawlNote{ImportPath: importPath}
b, err := json.Marshal(note)
if err != nil {
log.Printf("Encoding crawlNote: %v", err)
return
}
s.crawlTopic.Publish(ctx, &pubsub.Message{Data: b})
}
// crawlDoc fetches the package documentation from the VCS and updates the database.
func (s *server) crawlDoc(ctx context.Context, source string, importPath string, pdoc *doc.Package, hasSubdirs bool, nextCrawl time.Time) (*doc.Package, error) {
message := []interface{}{source}
defer func() {
message = append(message, importPath)
log.Println(message...)
}()
if !nextCrawl.IsZero() {
d := time.Since(nextCrawl) / time.Hour
if d > 0 {
message = append(message, "late:", int64(d))
}
}
etag := ""
if pdoc != nil {
etag = pdoc.Etag
message = append(message, "etag:", etag)
}
start := time.Now()
var err error
if strings.HasPrefix(importPath, "code.google.com/p/go.") {
// Old import path for Go sub-repository.
pdoc = nil
err = gosrc.NotFoundError{Message: "old Go sub-repo", Redirect: "golang.org/x/" + importPath[len("code.google.com/p/go."):]}
} else if blocked, e := s.db.IsBlocked(importPath); blocked && e == nil {
pdoc = nil
err = gosrc.NotFoundError{Message: "blocked."}
} else if testdataPat.MatchString(importPath) {
pdoc = nil
err = gosrc.NotFoundError{Message: "testdata."}
} else {
var pdocNew *doc.Package
pdocNew, err = doc.Get(ctx, s.httpClient, importPath, etag)
message = append(message, "fetch:", int64(time.Since(start)/time.Millisecond))
if err == nil && pdocNew.Name == "" && !hasSubdirs {
for _, e := range pdocNew.Errors {
message = append(message, "err:", e)
}
pdoc = nil
err = gosrc.NotFoundError{Message: "no Go files or subdirs"}
} else if _, ok := err.(gosrc.NotModifiedError); !ok {
pdoc = pdocNew
}
}
maxAge := s.v.GetDuration(ConfigMaxAge)
nextCrawl = start.Add(maxAge)
switch {
case strings.HasPrefix(importPath, "github.com/") || (pdoc != nil && len(pdoc.Errors) > 0):
nextCrawl = start.Add(maxAge * 7)
case strings.HasPrefix(importPath, "gist.github.com/"):
// Don't spend time on gists. It's silly thing to do.
nextCrawl = start.Add(maxAge * 30)
}
if err == nil {
message = append(message, "put:", pdoc.Etag)
if err := s.put(ctx, pdoc, nextCrawl); err != nil {
log.Println(err)
}
s.publishCrawl(ctx, importPath)
return pdoc, nil
} else if e, ok := err.(gosrc.NotModifiedError); ok {
if pdoc.Status == gosrc.Active && !s.isActivePkg(importPath, e.Status) {
if e.Status == gosrc.NoRecentCommits {
e.Status = gosrc.Inactive
}
message = append(message, "archive", e)
pdoc.Status = e.Status
if err := s.db.Put(ctx, pdoc, nextCrawl, false); err != nil {
log.Printf("ERROR db.Put(%q): %v", importPath, err)
}
} else {
// Touch the package without updating and move on to next one.
message = append(message, "touch")
if err := s.db.SetNextCrawl(importPath, nextCrawl); err != nil {
log.Printf("ERROR db.SetNextCrawl(%q): %v", importPath, err)
}
}
s.publishCrawl(ctx, importPath)
return pdoc, nil
} else if e, ok := err.(gosrc.NotFoundError); ok {
message = append(message, "notfound:", e)
if err := s.db.Delete(ctx, importPath); err != nil {
log.Printf("ERROR db.Delete(%q): %v", importPath, err)
}
return nil, e
} else {
message = append(message, "ERROR:", err)
return nil, err
}
}
func (s *server) put(ctx context.Context, pdoc *doc.Package, nextCrawl time.Time) error {
if pdoc.Status == gosrc.NoRecentCommits &&
s.isActivePkg(pdoc.ImportPath, gosrc.NoRecentCommits) {
pdoc.Status = gosrc.Active
}
if err := s.db.Put(ctx, pdoc, nextCrawl, false); err != nil {
return fmt.Errorf("ERROR db.Put(%q): %v", pdoc.ImportPath, err)
}
return nil
}
// isActivePkg reports whether a package is considered active,
// either because its directory is active or because it is imported by another package.
func (s *server) isActivePkg(pkg string, status gosrc.DirectoryStatus) bool {
switch status {
case gosrc.Active:
return true
case gosrc.NoRecentCommits:
// It should be inactive only if it has no imports as well.
n, err := s.db.ImporterCount(pkg)
if err != nil {
log.Printf("ERROR db.ImporterCount(%q): %v", pkg, err)
}
return n > 0
}
return false
}