database: Redesign search functions with GAE Search API.

This change includes the new indexing functions using GAE Search API
inside indexae.go. A flag in the main package gae_search is used to
control whether to use the new search functions.

The final goal is to replace index.go with the new methods and deprecate
the current stemming algorithm and use the one from Search API.
Functions to replace original ones are implemented and labeled "2" in
their names.

Local development is unaffected with gae_search flag turned off. Local
deployment with GAE Search API needs to use dev_appserver and an
app.yaml file, which is included. The instruction on how to do so will
be added to the wiki page when the code is submitted.

Change-Id: Ia889684176bafb2d6eac075061c06a733667c914
Reviewed-on: https://go-review.googlesource.com/23794
Reviewed-by: Alan Donovan <adonovan@google.com>
Reviewed-by: Andrew Gerrand <adg@golang.org>
diff --git a/database/database.go b/database/database.go
index 16abbac..100bf62 100644
--- a/database/database.go
+++ b/database/database.go
@@ -46,9 +46,12 @@
 	"time"
 
 	"github.com/garyburd/redigo/redis"
+	"github.com/golang/snappy"
+	"golang.org/x/net/context"
+	"google.golang.org/appengine"
+
 	"github.com/golang/gddo/doc"
 	"github.com/golang/gddo/gosrc"
-	"github.com/golang/snappy"
 )
 
 type Database struct {
@@ -72,6 +75,7 @@
 	redisServer      = flag.String("db-server", "redis://127.0.0.1:6379", "URI of Redis server.")
 	redisIdleTimeout = flag.Duration("db-idle-timeout", 250*time.Second, "Close Redis connections after remaining idle for this duration.")
 	redisLog         = flag.Bool("db-log", false, "Log database commands")
+	GAESearch        = flag.Bool("gae_search", false, "Use GAE Search API in the search function.")
 )
 
 func dialDb() (c redis.Conn, err error) {
@@ -198,6 +202,8 @@
 	return err
 }
 
+var bgCtx = appengine.BackgroundContext // replaced by tests
+
 // Put adds the package documentation to the database.
 func (db *Database) Put(pdoc *doc.Package, nextCrawl time.Time, hide bool) error {
 	c := db.Pool.Get()
@@ -246,11 +252,35 @@
 		t = nextCrawl.Unix()
 	}
 
-	_, err := putScript.Do(c, pdoc.ImportPath, pdoc.Synopsis, score, gobBytes, strings.Join(terms, " "), pdoc.Etag, kind, t)
+	// Get old version of the package to extract its imports.
+	// If the package does not exist, both oldDoc and err will be nil.
+	old, _, err := db.getDoc(c, pdoc.ImportPath)
 	if err != nil {
 		return err
 	}
 
+	_, err = putScript.Do(c, pdoc.ImportPath, pdoc.Synopsis, score, gobBytes, strings.Join(terms, " "), pdoc.Etag, kind, t)
+	if err != nil {
+		return err
+	}
+
+	if *GAESearch {
+		id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath)
+		if err != nil {
+			return err
+		}
+		ctx := bgCtx()
+		if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
+			log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
+		}
+
+		if old != nil {
+			if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
+				return err
+			}
+		}
+	}
+
 	if nextCrawl.IsZero() {
 		// Skip crawling related packages if this is not a full save.
 		return nil
@@ -287,6 +317,48 @@
 	return err
 }
 
+// pkgIDAndImportCount returns the ID and import count of a specified package.
+func pkgIDAndImportCount(c redis.Conn, path string) (id string, numImported int, err error) {
+	numImported, err = redis.Int(c.Do("SCARD", "index:import:"+path))
+	if err != nil {
+		return
+	}
+	id, err = redis.String(c.Do("HGET", "ids", path))
+	if err == redis.ErrNil {
+		return "", 0, nil
+	}
+	return id, numImported, nil
+}
+
+func updateImportsIndex(c redis.Conn, ctx context.Context, oldDoc, newDoc *doc.Package) error {
+	// Create a map to store any import change since last time we indexed the package.
+	changes := make(map[string]bool)
+	for _, p := range oldDoc.Imports {
+		if gosrc.IsValidRemotePath(p) {
+			changes[p] = true
+		}
+	}
+	for _, p := range newDoc.Imports {
+		if gosrc.IsValidRemotePath(p) {
+			delete(changes, p)
+		}
+	}
+
+	// For each import change, re-index that package with updated NumImported.
+	// In practice this should not happen often and when it does, the changes are
+	// likely to be a small amount.
+	for p, _ := range changes {
+		id, n, err := pkgIDAndImportCount(c, p)
+		if err != nil {
+			return err
+		}
+		if id != "" {
+			PutIndex(ctx, nil, id, -1, n)
+		}
+	}
+	return nil
+}
+
 var setNextCrawlEtagScript = redis.NewScript(0, `
     local root = ARGV[1]
     local etag = ARGV[2]
diff --git a/database/database_test.go b/database/database_test.go
index 5d21e78..fe5a1da 100644
--- a/database/database_test.go
+++ b/database/database_test.go
@@ -14,6 +14,9 @@
 	"time"
 
 	"github.com/garyburd/redigo/redis"
+	"golang.org/x/net/context"
+	"google.golang.org/appengine/aetest"
+
 	"github.com/golang/gddo/doc"
 )
 
@@ -35,7 +38,7 @@
 	defer c.Close()
 	n, err := redis.Int(c.Do("DBSIZE"))
 	if n != 0 || err != nil {
-		t.Fatalf("DBSIZE returned %d, %v", n, err)
+		t.Errorf("DBSIZE returned %d, %v", n, err)
 	}
 	return &Database{Pool: p}
 }
@@ -48,6 +51,14 @@
 
 func TestPutGet(t *testing.T) {
 	var nextCrawl = time.Unix(time.Now().Add(time.Hour).Unix(), 0).UTC()
+	ctx, done, err := aetest.NewContext()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer done()
+	bgCtx = func() context.Context {
+		return ctx
+	}
 
 	db := newDB(t)
 	defer closeDB(db)
diff --git a/database/index.go b/database/index.go
index 05f5864..149a480 100644
--- a/database/index.go
+++ b/database/index.go
@@ -21,7 +21,9 @@
 }
 
 func isTermSep(r rune) bool {
-	return unicode.IsSpace(r) || unicode.IsPunct(r) && r != '.' || unicode.IsSymbol(r)
+	return unicode.IsSpace(r) ||
+		r != '.' && unicode.IsPunct(r) ||
+		unicode.IsSymbol(r)
 }
 
 func normalizeProjectRoot(projectRoot string) string {
@@ -45,7 +47,7 @@
 	}
 
 	// Trim the trailing period at the end of any sentence.
-	return stem(strings.Trim(s, "."))
+	return stem(strings.TrimSuffix(s, "."))
 }
 
 var httpPat = regexp.MustCompile(`https?://\S+`)
diff --git a/database/indexae.go b/database/indexae.go
new file mode 100644
index 0000000..8000761
--- /dev/null
+++ b/database/indexae.go
@@ -0,0 +1,120 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd.
+
+package database
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"strings"
+	"unicode"
+
+	"golang.org/x/net/context"
+	"google.golang.org/appengine/search"
+
+	"github.com/golang/gddo/doc"
+)
+
+// PackageDocument defines the data structure used to represent a package document
+// in the search index.
+type PackageDocument struct {
+	Name        search.Atom
+	Path        string
+	Synopsis    string
+	Score       float64
+	ImportCount float64
+}
+
+// PutIndex creates or updates a package entry in the search index. id identifies the document in the index.
+// If pdoc is non-nil, PutIndex will update the package's name, path and synopsis supplied by pdoc.
+// pdoc must be non-nil for a package's first call to PutIndex.
+// PutIndex updates the Score to score, if non-negative.
+func PutIndex(c context.Context, pdoc *doc.Package, id string, score float64, importCount int) error {
+	if id == "" {
+		return errors.New("indexae: no id assigned")
+	}
+	idx, err := search.Open("packages")
+	if err != nil {
+		return err
+	}
+
+	var pkg PackageDocument
+	if err := idx.Get(c, id, &pkg); err != nil {
+		if err != search.ErrNoSuchDocument {
+			return err
+		} else if pdoc == nil {
+			// Cannot update a non-existing document.
+			return errors.New("indexae: cannot create new document with nil pdoc")
+		}
+		// No such document in the index, fall through.
+	}
+
+	// Update document information accordingly.
+	if pdoc != nil {
+		pkg.Name = search.Atom(pdoc.ProjectName)
+		pkg.Path = pdoc.ImportPath
+		pkg.Synopsis = pdoc.Synopsis
+	}
+	if score >= 0 {
+		pkg.Score = score
+	}
+	pkg.ImportCount = float64(importCount)
+
+	if _, err := idx.Put(c, id, &pkg); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Search searches the packages index for a given query. A path-like query string
+// will be passed in unchanged, whereas single words will be stemmed.
+func Search(c context.Context, q string) ([]Package, error) {
+	index, err := search.Open("packages")
+	if err != nil {
+		return nil, err
+	}
+	var pkgs []Package
+	opt := &search.SearchOptions{
+		Sort: &search.SortOptions{
+			Expressions: []search.SortExpression{
+				{Expr: "Score * log(10 + ImportCount)"},
+			},
+		},
+	}
+	for it := index.Search(c, parseQuery2(q), opt); ; {
+		var pkg PackageDocument
+		_, err := it.Next(&pkg)
+		if err == search.Done {
+			break
+		}
+		if err != nil {
+			return nil, err
+		}
+		pkgs = append(pkgs, Package{pkg.Path, pkg.Synopsis})
+	}
+	return pkgs, nil
+}
+
+func parseQuery2(q string) string {
+	var buf bytes.Buffer
+	for _, s := range strings.FieldsFunc(q, isTermSep2) {
+		if strings.ContainsAny(s, "./") {
+			// Quote terms with / or . for path like query.
+			fmt.Fprintf(&buf, "%q ", s)
+		} else {
+			// Stem for single word terms.
+			fmt.Fprintf(&buf, "~%v ", s)
+		}
+	}
+	return buf.String()
+}
+
+func isTermSep2(r rune) bool {
+	return unicode.IsSpace(r) ||
+		r != '.' && r != '/' && unicode.IsPunct(r) ||
+		unicode.IsSymbol(r)
+}
diff --git a/database/indexae_test.go b/database/indexae_test.go
new file mode 100644
index 0000000..84a636a
--- /dev/null
+++ b/database/indexae_test.go
@@ -0,0 +1,122 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd.
+
+package database
+
+import (
+	"math"
+	"strconv"
+	"testing"
+
+	"google.golang.org/appengine/aetest"
+	"google.golang.org/appengine/search"
+
+	"github.com/golang/gddo/doc"
+)
+
+var pdoc = &doc.Package{
+	ImportPath:  "github.com/golang/test",
+	ProjectName: "test",
+	Synopsis:    "This is a test package.",
+}
+
+func TestPutIndexWithEmptyId(t *testing.T) {
+	c, done, err := aetest.NewContext()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer done()
+
+	if err := PutIndex(c, nil, "", 0, 0); err == nil {
+		t.Errorf("PutIndex succeeded unexpectedly")
+	}
+}
+
+func TestPutIndexCreateNilDoc(t *testing.T) {
+	c, done, err := aetest.NewContext()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer done()
+
+	if err := PutIndex(c, nil, "12345", -1, 2); err == nil {
+		t.Errorf("PutIndex succeeded unexpectedly")
+	}
+}
+
+func TestPutIndexNewPackageAndUpdate(t *testing.T) {
+	c, done, err := aetest.NewContext()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer done()
+
+	// Put a new package into search index.
+	if err := PutIndex(c, pdoc, "12345", 0.99, 1); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify the package was put in is as expected.
+	idx, err := search.Open("packages")
+	if err != nil {
+		t.Fatal(err)
+	}
+	var got PackageDocument
+	if err = idx.Get(c, "12345", &got); err != nil && err != search.ErrNoSuchDocument {
+		t.Fatal(err)
+	}
+	wanted := PackageDocument{
+		Name:        search.Atom(pdoc.ProjectName),
+		Path:        pdoc.ImportPath,
+		Synopsis:    pdoc.Synopsis,
+		Score:       0.99,
+		ImportCount: 1,
+	}
+	if got != wanted {
+		t.Errorf("PutIndex got %v, want %v", got, wanted)
+	}
+
+	// Update the import count of the package.
+	if err := PutIndex(c, nil, "12345", -1, 2); err != nil {
+		t.Fatal(err)
+	}
+	if err := idx.Get(c, "12345", &got); err != nil && err != search.ErrNoSuchDocument {
+		t.Fatal(err)
+	}
+	wanted.ImportCount = 2
+	if got != wanted {
+		t.Errorf("PutIndex got %v, want %v", got, wanted)
+	}
+}
+
+func TestSearchResultSorted(t *testing.T) {
+	c, done, err := aetest.NewContext()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer done()
+
+	// Put multiple packages into the search index and the search result
+	// should be sorted properly.
+	id := "1"
+	for i := 2; i < 6; i++ {
+		id += strconv.Itoa(i)
+		pdoc.Synopsis = id
+		if err := PutIndex(c, pdoc, id, math.Pow(0.95, float64(i)), 10*i); err != nil {
+			t.Fatal(err)
+		}
+	}
+	got, err := Search(c, "test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	wanted := []string{"1234", "12345", "123", "12"}
+	for i, p := range got {
+		if p.Synopsis != wanted[i] {
+			t.Errorf("PutIndex got %v, want %v", p, wanted[i])
+		}
+	}
+}
diff --git a/gddo-server/app.yaml b/gddo-server/app.yaml
new file mode 100644
index 0000000..8994fbc
--- /dev/null
+++ b/gddo-server/app.yaml
@@ -0,0 +1,10 @@
+# This YAML file is used for local deployment with GAE development environment.
+runtime: go
+vm: true
+api_version: 1
+threadsafe: true
+
+handlers:
+- url: /.*
+  script: IGNORED
+  secure: always
diff --git a/gddo-server/main.go b/gddo-server/main.go
index 0b0ef45..a68edb3 100644
--- a/gddo-server/main.go
+++ b/gddo-server/main.go
@@ -31,6 +31,7 @@
 
 	"golang.org/x/net/context"
 	"golang.org/x/oauth2/google"
+	"google.golang.org/appengine"
 	"google.golang.org/cloud"
 	"google.golang.org/cloud/compute/metadata"
 	"google.golang.org/cloud/logging"
@@ -572,7 +573,16 @@
 		}
 	}
 
-	pkgs, err := db.Query(q)
+	var (
+		pkgs []database.Package
+		err  error
+	)
+	if *database.GAESearch {
+		ctx := appengine.NewContext(req)
+		pkgs, err = database.Search(ctx, q)
+	} else {
+		pkgs, err = db.Query(q)
+	}
 	if err != nil {
 		return err
 	}
@@ -1009,5 +1019,10 @@
 		gceLogger = newGCELogger(logc)
 	}
 
-	log.Fatal(http.ListenAndServe(*httpAddr, root))
+	if *database.GAESearch {
+		http.Handle("/", root)
+		appengine.Main()
+	} else {
+		log.Fatal(http.ListenAndServe(*httpAddr, root))
+	}
 }