Merge pull request #327 from garyburd/impl

Improve synopsis text indexing
diff --git a/database/index.go b/database/index.go
index 3986cdf..69279bb 100644
--- a/database/index.go
+++ b/database/index.go
@@ -48,6 +48,60 @@
 
 var httpPat = regexp.MustCompile(`https?://\S+`)
 
+func collectSynopsisTerms(terms map[string]bool, synopsis string) {
+
+	synopsis = httpPat.ReplaceAllLiteralString(synopsis, "")
+
+	fields := strings.FieldsFunc(synopsis, isTermSep)
+	for i := range fields {
+		fields[i] = strings.ToLower(fields[i])
+	}
+
+	// Ignore boilerplate in the following common patterns:
+	//  Package foo ...
+	//  Command foo ...
+	//  Package foo implements ... (and provides, contains)
+	//  The foo package ...
+	//  The foo package implements ...
+	//  The foo command ...
+
+	checkPackageVerb := false
+	switch {
+	case len(fields) >= 1 && fields[0] == "package":
+		fields = fields[1:]
+		checkPackageVerb = true
+	case len(fields) >= 1 && fields[0] == "command":
+		fields = fields[1:]
+	case len(fields) >= 3 && fields[0] == "the" && fields[2] == "package":
+		fields[2] = fields[1]
+		fields = fields[2:]
+		checkPackageVerb = true
+	case len(fields) >= 3 && fields[0] == "the" && fields[2] == "command":
+		fields[2] = fields[1]
+		fields = fields[2:]
+	}
+
+	if checkPackageVerb && len(fields) >= 2 &&
+		(fields[1] == "implements" || fields[1] == "provides" || fields[1] == "contains") {
+		fields[1] = fields[0]
+		fields = fields[1:]
+	}
+
+	for _, s := range fields {
+		if !stopWord[s] {
+			terms[term(s)] = true
+		}
+	}
+}
+
+func termSlice(terms map[string]bool) []string {
+	result := make([]string, 0, len(terms))
+	for term := range terms {
+		result = append(result, term)
+	}
+	return result
+}
+
 func documentTerms(pdoc *doc.Package, score float64) []string {
 
 	terms := make(map[string]bool)
@@ -87,20 +141,11 @@
 
 		// Synopsis
 
-		synopsis := httpPat.ReplaceAllLiteralString(pdoc.Synopsis, "")
-		for i, s := range strings.FieldsFunc(synopsis, isTermSep) {
-			s = strings.ToLower(s)
-			if !stopWord[s] && (i > 3 || s != "package") {
-				terms[term(s)] = true
-			}
-		}
+		collectSynopsisTerms(terms, pdoc.Synopsis)
+
 	}
 
-	result := make([]string, 0, len(terms))
-	for term := range terms {
-		result = append(result, term)
-	}
-	return result
+	return termSlice(terms)
 }
 
 // vendorPat matches the path of a vendored package.
diff --git a/database/index_test.go b/database/index_test.go
index ea40f69..e0092bf 100644
--- a/database/index_test.go
+++ b/database/index_test.go
@@ -95,7 +95,7 @@
 		sort.Strings(terms)
 		sort.Strings(tt.terms)
 		if !reflect.DeepEqual(terms, tt.terms) {
-			t.Errorf("documentTerms(%s)=%#v, want %#v", tt.pdoc.ImportPath, terms, tt.terms)
+			t.Errorf("documentTerms(%s) ->\n got: %#v\nwant: %#v", tt.pdoc.ImportPath, terms, tt.terms)
 		}
 	}
 }
@@ -127,3 +127,76 @@
 		}
 	}
 }
+
+var synopsisTermTests = []struct {
+	synopsis string
+	terms    []string
+}{
+	{
+		"Package foo implements bar.",
+		[]string{"bar", "foo"},
+	},
+	{
+		"Package foo provides bar.",
+		[]string{"bar", "foo"},
+	},
+	{
+		"The foo package provides bar.",
+		[]string{"bar", "foo"},
+	},
+	{
+		"Package foo contains an implementation of bar.",
+		[]string{"bar", "foo", "impl"},
+	},
+	{
+		"Package foo is awesome",
+		[]string{"awesom", "foo"},
+	},
+	{
+		"The foo package is awesome",
+		[]string{"awesom", "foo"},
+	},
+	{
+		"The foo command is awesome",
+		[]string{"awesom", "foo"},
+	},
+	{
+		"Command foo is awesome",
+		[]string{"awesom", "foo"},
+	},
+	{
+		"The foo package",
+		[]string{"foo"},
+	},
+	{
+		"Package foo",
+		[]string{"foo"},
+	},
+	{
+		"Command foo",
+		[]string{"foo"},
+	},
+	{
+		"Package",
+		[]string{},
+	},
+	{
+		"Command",
+		[]string{},
+	},
+}
+
+func TestSynopsisTerms(t *testing.T) {
+	for _, tt := range synopsisTermTests {
+		terms := make(map[string]bool)
+		collectSynopsisTerms(terms, tt.synopsis)
+
+		actual := termSlice(terms)
+		expected := tt.terms
+		sort.Strings(actual)
+		sort.Strings(expected)
+		if !reflect.DeepEqual(actual, expected) {
+			t.Errorf("%q ->\n got: %#v\nwant: %#v", tt.synopsis, actual, expected)
+		}
+	}
+}
diff --git a/database/stop.go b/database/stop.go
index 3b23e34..2a519fd 100644
--- a/database/stop.go
+++ b/database/stop.go
@@ -73,8 +73,6 @@
 how
 i
 if
-implement
-implements
 in
 into
 is