godoc: init corpus in a separate goroutine in http mode

Currently, in http mode the server blocks until the corpus
has been initialized. This can cause considerable delay
if the user workspace is significantly large and the files
are not present in the buffer cache.

This CL spawns off the initialization in a separate goroutine
if httpMode is set and turns on a flag when it's done.
The http handler checks the flag and returns an error response
if it has not been set.

The check is only performed for the path prefixes handled by the
handlerServer struct. Other paths do not call the GetPageInfo() function
and hence can return immediately. This preserves maximum responsiveness
of the server.

Also adds an additional print statement in verbose mode

Note: This is a re-do of a previous CL golang.org/cl/88695 which was
incorrect committed without running tests. This CL fixes that test.

Fixes golang/go#13278

Change-Id: I80c801f32af007312090d3783a2ea2c6f92cad66
Reviewed-on: https://go-review.googlesource.com/93215
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/cmd/godoc/godoc_test.go b/cmd/godoc/godoc_test.go
index fe87128..a2552d9 100644
--- a/cmd/godoc/godoc_test.go
+++ b/cmd/godoc/godoc_test.go
@@ -154,19 +154,32 @@
 	waitForServer(t,
 		fmt.Sprintf("http://%v/", addr),
 		"The Go Programming Language",
-		15*time.Second)
+		15*time.Second,
+		false)
 }
 
 func waitForSearchReady(t *testing.T, addr string) {
 	waitForServer(t,
 		fmt.Sprintf("http://%v/search?q=FALLTHROUGH", addr),
 		"The list of tokens.",
-		2*time.Minute)
+		2*time.Minute,
+		false)
+}
+
+func waitUntilScanComplete(t *testing.T, addr string) {
+	waitForServer(t,
+		fmt.Sprintf("http://%v/pkg", addr),
+		"Scan is not yet complete",
+		2*time.Minute,
+		true,
+	)
+	// setting reverse as true, which means this waits
+	// until the string is not returned in the response anymore
 }
 
 const pollInterval = 200 * time.Millisecond
 
-func waitForServer(t *testing.T, url, match string, timeout time.Duration) {
+func waitForServer(t *testing.T, url, match string, timeout time.Duration, reverse bool) {
 	// "health check" duplicated from x/tools/cmd/tipgodoc/tip.go
 	deadline := time.Now().Add(timeout)
 	for time.Now().Before(deadline) {
@@ -177,9 +190,13 @@
 		}
 		rbody, err := ioutil.ReadAll(res.Body)
 		res.Body.Close()
-		if err == nil && res.StatusCode == http.StatusOK &&
-			bytes.Contains(rbody, []byte(match)) {
-			return
+		if err == nil && res.StatusCode == http.StatusOK {
+			if bytes.Contains(rbody, []byte(match)) && !reverse {
+				return
+			}
+			if !bytes.Contains(rbody, []byte(match)) && reverse {
+				return
+			}
 		}
 	}
 	t.Fatalf("Server failed to respond in %v", timeout)
@@ -229,6 +246,7 @@
 		waitForSearchReady(t, addr)
 	} else {
 		waitForServerReady(t, addr)
+		waitUntilScanComplete(t, addr)
 	}
 
 	tests := []struct {
diff --git a/cmd/godoc/main.go b/cmd/godoc/main.go
index 227e29b..901e992 100644
--- a/cmd/godoc/main.go
+++ b/cmd/godoc/main.go
@@ -153,6 +153,13 @@
 	log.Fatalf("too many redirects")
 }
 
+func initCorpus(corpus *godoc.Corpus) {
+	err := corpus.Init()
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
 func main() {
 	flag.Usage = usage
 	flag.Parse()
@@ -231,8 +238,10 @@
 		corpus.IndexEnabled = true
 	}
 	if *writeIndex || httpMode || *urlFlag != "" {
-		if err := corpus.Init(); err != nil {
-			log.Fatal(err)
+		if httpMode {
+			go initCorpus(corpus)
+		} else {
+			initCorpus(corpus)
 		}
 	}
 
@@ -323,6 +332,9 @@
 		}
 
 		// Start http server.
+		if *verbose {
+			log.Println("starting HTTP server")
+		}
 		if err := http.ListenAndServe(*httpAddr, handler); err != nil {
 			log.Fatalf("ListenAndServe %s: %v", *httpAddr, err)
 		}
diff --git a/godoc/corpus.go b/godoc/corpus.go
index f2c7ebb..8e38365 100644
--- a/godoc/corpus.go
+++ b/godoc/corpus.go
@@ -7,6 +7,7 @@
 import (
 	"errors"
 	pathpkg "path"
+	"sync"
 	"time"
 
 	"golang.org/x/tools/godoc/analysis"
@@ -103,6 +104,10 @@
 
 	// Analysis is the result of type and pointer analysis.
 	Analysis analysis.Result
+
+	// flag to check whether a corpus is initialized or not
+	initMu   sync.RWMutex
+	initDone bool
 }
 
 // NewCorpus returns a new Corpus from a filesystem.
@@ -136,13 +141,15 @@
 // Init initializes Corpus, once options on Corpus are set.
 // It must be called before any subsequent method calls.
 func (c *Corpus) Init() error {
-	// TODO(bradfitz): do this in a goroutine because newDirectory might block for a long time?
-	// It used to be sometimes done in a goroutine before, at least in HTTP server mode.
 	if err := c.initFSTree(); err != nil {
 		return err
 	}
 	c.updateMetadata()
 	go c.refreshMetadataLoop()
+
+	c.initMu.Lock()
+	c.initDone = true
+	c.initMu.Unlock()
 	return nil
 }
 
diff --git a/godoc/server.go b/godoc/server.go
index 3b452e5..3ebff5c 100644
--- a/godoc/server.go
+++ b/godoc/server.go
@@ -7,6 +7,7 @@
 import (
 	"bytes"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"go/ast"
 	"go/build"
@@ -248,6 +249,12 @@
 	}
 
 	relpath := pathpkg.Clean(r.URL.Path[len(h.stripPrefix)+1:])
+
+	if !h.corpusInitialized() {
+		h.p.ServeError(w, r, relpath, errors.New("Scan is not yet complete. Please retry after a few moments"))
+		return
+	}
+
 	abspath := pathpkg.Join(h.fsRoot, relpath)
 	mode := h.p.GetPageInfoMode(r)
 	if relpath == builtinPkgPath {
@@ -322,6 +329,12 @@
 	})
 }
 
+func (h *handlerServer) corpusInitialized() bool {
+	h.c.initMu.RLock()
+	defer h.c.initMu.RUnlock()
+	return h.c.initDone
+}
+
 type PageInfoMode uint
 
 const (