gosrc: include package updated time in the request header.

gddo-server: pass pdoc instead of nil in the refresh call.

The package updated time is included in the header to check if it is
recently updated when requesting from VCS. Currently we use http ETag
as a general way to compare changes, which still consumes one API
request rate when requesting from Github. However, Github accepts
conditional requests using "If-Modified-Since" to check recent updates
time without counting against the rate limit if a 304 is received.

Package document is included in the refresh call to avoid unlimited
manual refreshing. The current design to allow such refresh to force
updating the package seems unnecessary.

These changes are targeting at reducing Github API request rate in order
to give us more head room when we do batch updates to do type analysis.

Change-Id: I4ec81c92662cd23dc96e5a5e141f1945b127db2c
Reviewed-on: https://go-review.googlesource.com/22506
Reviewed-by: Alan Donovan <adonovan@google.com>
Reviewed-by: Andrew Gerrand <adg@golang.org>
diff --git a/doc/get.go b/doc/get.go
index 611bfd0..977e3fb 100644
--- a/doc/get.go
+++ b/doc/get.go
@@ -12,9 +12,10 @@
 	"go/doc"
 	"net/http"
 	"strings"
+	"time"
 )
 
-func Get(client *http.Client, importPath string, etag string) (*Package, error) {
+func Get(client *http.Client, importPath string, etag string, updated time.Time) (*Package, error) {
 
 	const versionPrefix = PackageVersion + "-"
 
@@ -24,7 +25,7 @@
 		etag = ""
 	}
 
-	dir, err := gosrc.Get(client, importPath, etag)
+	dir, err := gosrc.Get(client, importPath, etag, updated)
 	if err != nil {
 		return nil, err
 	}
diff --git a/doc/print.go b/doc/print.go
index b199bbb..6950c79 100644
--- a/doc/print.go
+++ b/doc/print.go
@@ -41,8 +41,7 @@
 	if *local {
 		gosrc.SetLocalDevMode(os.Getenv("GOPATH"))
 	}
-	pdoc, err = doc.Get(http.DefaultClient, path, *etag)
-	//}
+	pdoc, err = doc.Get(http.DefaultClient, path, *etag, pdoc.Updated)
 	if err != nil {
 		log.Fatal(err)
 	}
diff --git a/gddo-server/crawl.go b/gddo-server/crawl.go
index fb23fa7..23516c3 100644
--- a/gddo-server/crawl.go
+++ b/gddo-server/crawl.go
@@ -53,7 +53,11 @@
 		err = gosrc.NotFoundError{Message: "testdata."}
 	} else {
 		var pdocNew *doc.Package
-		pdocNew, err = doc.Get(httpClient, importPath, etag)
+		updated := time.Time{}
+		if pdoc != nil {
+			updated = pdoc.Updated
+		}
+		pdocNew, err = doc.Get(httpClient, importPath, etag, updated)
 		message = append(message, "fetch:", int64(time.Since(start)/time.Millisecond))
 		if err == nil && pdocNew.Name == "" && !hasSubdirs {
 			for _, e := range pdocNew.Errors {
@@ -84,6 +88,11 @@
 		return pdoc, nil
 	case err == gosrc.ErrNotModified:
 		message = append(message, "touch")
+		// If the package is not modified, set the updated time
+		pdoc.Updated = time.Now().UTC()
+		if err := db.Put(pdoc, nextCrawl, false); err != nil {
+			log.Printf("ERROR db.Put(%q): %v", importPath, err)
+		}
 		if err := db.SetNextCrawlEtag(pdoc.ProjectRoot, pdoc.Etag, nextCrawl); err != nil {
 			log.Printf("ERROR db.SetNextCrawlEtag(%q): %v", importPath, err)
 		}
diff --git a/gddo-server/main.go b/gddo-server/main.go
index 5cb3ed5..77785a7 100644
--- a/gddo-server/main.go
+++ b/gddo-server/main.go
@@ -302,7 +302,9 @@
 				log.Printf("ERROR db.IncrementPopularScore(%s): %v", pdoc.ImportPath, err)
 			}
 		}
-		gceLogger.LogEvent(resp, req, nil)
+		if gceLogger != nil {
+			gceLogger.LogEvent(resp, req, nil)
+		}
 
 		template := "dir"
 		switch {
@@ -417,13 +419,13 @@
 
 func serveRefresh(resp http.ResponseWriter, req *http.Request) error {
 	importPath := req.Form.Get("path")
-	_, pkgs, _, err := db.Get(importPath)
+	pdoc, pkgs, _, err := db.Get(importPath)
 	if err != nil {
 		return err
 	}
 	c := make(chan error, 1)
 	go func() {
-		_, err := crawlDoc("rfrsh", importPath, nil, len(pkgs) > 0, time.Time{})
+		_, err := crawlDoc("rfrsh", importPath, pdoc, len(pkgs) > 0, time.Time{})
 		c <- err
 	}()
 	select {
diff --git a/gosrc/bitbucket.go b/gosrc/bitbucket.go
index 437349c..219e87d 100644
--- a/gosrc/bitbucket.go
+++ b/gosrc/bitbucket.go
@@ -32,7 +32,7 @@
 	} `json:"fork_of"`
 }
 
-func getBitbucketDir(client *http.Client, match map[string]string, savedEtag string) (*Directory, error) {
+func getBitbucketDir(client *http.Client, match map[string]string, savedEtag string, updated time.Time) (*Directory, error) {
 	var repo *bitbucketRepo
 	c := &httpClient{client: client}
 
diff --git a/gosrc/github.go b/gosrc/github.go
index 5ae969f..32e5e84 100644
--- a/gosrc/github.go
+++ b/gosrc/github.go
@@ -48,9 +48,17 @@
 	return &RemoteError{resp.Request.URL.Host, fmt.Errorf("%d: (%s)", resp.StatusCode, resp.Request.URL.String())}
 }
 
-func getGitHubDir(client *http.Client, match map[string]string, savedEtag string) (*Directory, error) {
+func getGitHubDir(client *http.Client, match map[string]string, savedEtag string, updated time.Time) (*Directory, error) {
 
-	c := &httpClient{client: client, errFn: gitHubError}
+	c := &httpClient{
+		client: client,
+		errFn:  gitHubError,
+	}
+	if !updated.IsZero() {
+		// http.TimeFormat is used since GitHub API will count against our rate limit
+		// if we use any timezone other than "GMT".
+		c.header = http.Header{"If-Modified-Since": {updated.Format(http.TimeFormat)}}
+	}
 
 	type refJSON struct {
 		Object struct {
@@ -65,6 +73,11 @@
 
 	resp, err := c.getJSON(expand("https://api.github.com/repos/{owner}/{repo}/git/refs", match), &refs)
 	if err != nil {
+		if resp != nil {
+			if last, err := time.Parse(http.TimeFormat, resp.Header.Get("Last-Modified")); err == nil && last.Before(updated) {
+				return nil, ErrNotModified
+			}
+		}
 		return nil, err
 	}
 
@@ -284,8 +297,14 @@
 	}, nil
 }
 
-func getGistDir(client *http.Client, match map[string]string, savedEtag string) (*Directory, error) {
-	c := &httpClient{client: client, errFn: gitHubError}
+func getGistDir(client *http.Client, match map[string]string, savedEtag string, updated time.Time) (*Directory, error) {
+	c := &httpClient{
+		client: client,
+		errFn:  gitHubError,
+	}
+	if !updated.IsZero() {
+		c.header = http.Header{"If-Modified-Since": {updated.Format(http.TimeFormat)}}
+	}
 
 	var gist struct {
 		Files map[string]struct {
diff --git a/gosrc/google.go b/gosrc/google.go
index 1a47b59..d52d403 100644
--- a/gosrc/google.go
+++ b/gosrc/google.go
@@ -12,6 +12,7 @@
 	"net/url"
 	"regexp"
 	"strings"
+	"time"
 )
 
 func init() {
@@ -48,7 +49,7 @@
 	return c.err(resp)
 }
 
-func getGoogleDir(client *http.Client, match map[string]string, savedEtag string) (*Directory, error) {
+func getGoogleDir(client *http.Client, match map[string]string, savedEtag string, updated time.Time) (*Directory, error) {
 	setupGoogleMatch(match)
 	c := &httpClient{client: client}
 
diff --git a/gosrc/gosrc.go b/gosrc/gosrc.go
index 1505737..7953ea8 100644
--- a/gosrc/gosrc.go
+++ b/gosrc/gosrc.go
@@ -16,6 +16,7 @@
 	"path"
 	"regexp"
 	"strings"
+	"time"
 )
 
 // File represents a file.
@@ -114,7 +115,7 @@
 type service struct {
 	pattern         *regexp.Regexp
 	prefix          string
-	get             func(*http.Client, map[string]string, string) (*Directory, error)
+	get             func(*http.Client, map[string]string, string, time.Time) (*Directory, error)
 	getPresentation func(*http.Client, map[string]string) (*Presentation, error)
 	getProject      func(*http.Client, map[string]string) (*Project, error)
 }
@@ -304,7 +305,7 @@
 // getVCSDirFn is called by getDynamic to fetch source using VCS commands. The
 // default value here does nothing. If the code is not built for App Engine,
 // then getvCSDirFn is set getVCSDir, the function that actually does the work.
-var getVCSDirFn = func(client *http.Client, m map[string]string, etag string) (*Directory, error) {
+var getVCSDirFn = func(client *http.Client, m map[string]string, etag string, updated time.Time) (*Directory, error) {
 	return nil, errNoMatch
 }
 
@@ -340,7 +341,7 @@
 	dirName := importPath[len(im.projectRoot):]
 
 	resolvedPath := repo + dirName
-	dir, err := getStatic(client, resolvedPath, etag)
+	dir, err := getStatic(client, resolvedPath, etag, time.Time{})
 	if err == errNoMatch {
 		resolvedPath = repo + "." + im.vcs + dirName
 		match := map[string]string{
@@ -351,7 +352,7 @@
 			"scheme":     proto,
 			"vcs":        im.vcs,
 		}
-		dir, err = getVCSDirFn(client, match, etag)
+		dir, err = getVCSDirFn(client, match, etag, time.Time{})
 	}
 	if err != nil || dir == nil {
 		return nil, err
@@ -406,7 +407,7 @@
 
 // getStatic gets a diretory from a statically known service. getStatic
 // returns errNoMatch if the import path is not recognized.
-func getStatic(client *http.Client, importPath, etag string) (*Directory, error) {
+func getStatic(client *http.Client, importPath, etag string, updated time.Time) (*Directory, error) {
 	for _, s := range services {
 		if s.get == nil {
 			continue
@@ -416,7 +417,7 @@
 			return nil, err
 		}
 		if match != nil {
-			dir, err := s.get(client, match, etag)
+			dir, err := s.get(client, match, etag, updated)
 			if dir != nil {
 				dir.ImportPath = importPath
 				dir.ResolvedPath = importPath
@@ -427,14 +428,14 @@
 	return nil, errNoMatch
 }
 
-func Get(client *http.Client, importPath string, etag string) (dir *Directory, err error) {
+func Get(client *http.Client, importPath string, etag string, updated time.Time) (dir *Directory, err error) {
 	switch {
 	case localPath != "":
 		dir, err = getLocal(importPath)
 	case IsGoRepoPath(importPath):
 		dir, err = getStandardDir(client, importPath, etag)
 	case IsValidRemotePath(importPath):
-		dir, err = getStatic(client, importPath, etag)
+		dir, err = getStatic(client, importPath, etag, updated)
 		if err == errNoMatch {
 			dir, err = getDynamic(client, importPath, etag)
 		}
diff --git a/gosrc/launchpad.go b/gosrc/launchpad.go
index 5cda037..fe3586b 100644
--- a/gosrc/launchpad.go
+++ b/gosrc/launchpad.go
@@ -18,6 +18,7 @@
 	"regexp"
 	"sort"
 	"strings"
+	"time"
 )
 
 func init() {
@@ -41,7 +42,7 @@
 	copy(p[j*md5.Size:], temp[:])
 }
 
-func getLaunchpadDir(client *http.Client, match map[string]string, savedEtag string) (*Directory, error) {
+func getLaunchpadDir(client *http.Client, match map[string]string, savedEtag string, updated time.Time) (*Directory, error) {
 	c := &httpClient{client: client}
 
 	if match["project"] != "" && match["series"] != "" {
diff --git a/gosrc/vcs.go b/gosrc/vcs.go
index 6fbb545..e2750f0 100644
--- a/gosrc/vcs.go
+++ b/gosrc/vcs.go
@@ -245,7 +245,7 @@
 	return "", NotFoundError{Message: "Last changed revision not found"}
 }
 
-func getVCSDir(client *http.Client, match map[string]string, etagSaved string) (*Directory, error) {
+func getVCSDir(client *http.Client, match map[string]string, etagSaved string, updated time.Time) (*Directory, error) {
 	cmd := vcsCmds[match["vcs"]]
 	if cmd == nil {
 		return nil, NotFoundError{Message: expand("VCS not supported: {vcs}", match)}