Merge pull request #327 from garyburd/impl
Improve synopsis text indexing
diff --git a/database/index.go b/database/index.go
index 3986cdf..69279bb 100644
--- a/database/index.go
+++ b/database/index.go
@@ -48,6 +48,60 @@
var httpPat = regexp.MustCompile(`https?://\S+`)
+func collectSynopsisTerms(terms map[string]bool, synopsis string) {
+
+ synopsis = httpPat.ReplaceAllLiteralString(synopsis, "")
+
+ fields := strings.FieldsFunc(synopsis, isTermSep)
+ for i := range fields {
+ fields[i] = strings.ToLower(fields[i])
+ }
+
+ // Ignore boilerplate in the following common patterns:
+ // Package foo ...
+ // Command foo ...
+ // Package foo implements ... (and provides, contains)
+ // The foo package ...
+ // The foo package implements ...
+ // The foo command ...
+
+ checkPackageVerb := false
+ switch {
+ case len(fields) >= 1 && fields[0] == "package":
+ fields = fields[1:]
+ checkPackageVerb = true
+ case len(fields) >= 1 && fields[0] == "command":
+ fields = fields[1:]
+ case len(fields) >= 3 && fields[0] == "the" && fields[2] == "package":
+ fields[2] = fields[1]
+ fields = fields[2:]
+ checkPackageVerb = true
+ case len(fields) >= 3 && fields[0] == "the" && fields[2] == "command":
+ fields[2] = fields[1]
+ fields = fields[2:]
+ }
+
+ if checkPackageVerb && len(fields) >= 2 &&
+ (fields[1] == "implements" || fields[1] == "provides" || fields[1] == "contains") {
+ fields[1] = fields[0]
+ fields = fields[1:]
+ }
+
+ for _, s := range fields {
+ if !stopWord[s] {
+ terms[term(s)] = true
+ }
+ }
+}
+
+func termSlice(terms map[string]bool) []string {
+ result := make([]string, 0, len(terms))
+ for term := range terms {
+ result = append(result, term)
+ }
+ return result
+}
+
func documentTerms(pdoc *doc.Package, score float64) []string {
terms := make(map[string]bool)
@@ -87,20 +141,11 @@
// Synopsis
- synopsis := httpPat.ReplaceAllLiteralString(pdoc.Synopsis, "")
- for i, s := range strings.FieldsFunc(synopsis, isTermSep) {
- s = strings.ToLower(s)
- if !stopWord[s] && (i > 3 || s != "package") {
- terms[term(s)] = true
- }
- }
+ collectSynopsisTerms(terms, pdoc.Synopsis)
+
}
- result := make([]string, 0, len(terms))
- for term := range terms {
- result = append(result, term)
- }
- return result
+ return termSlice(terms)
}
// vendorPat matches the path of a vendored package.
diff --git a/database/index_test.go b/database/index_test.go
index ea40f69..e0092bf 100644
--- a/database/index_test.go
+++ b/database/index_test.go
@@ -95,7 +95,7 @@
sort.Strings(terms)
sort.Strings(tt.terms)
if !reflect.DeepEqual(terms, tt.terms) {
- t.Errorf("documentTerms(%s)=%#v, want %#v", tt.pdoc.ImportPath, terms, tt.terms)
+ t.Errorf("documentTerms(%s) ->\n got: %#v\nwant: %#v", tt.pdoc.ImportPath, terms, tt.terms)
}
}
}
@@ -127,3 +127,76 @@
}
}
}
+
+var synopsisTermTests = []struct {
+ synopsis string
+ terms []string
+}{
+ {
+ "Package foo implements bar.",
+ []string{"bar", "foo"},
+ },
+ {
+ "Package foo provides bar.",
+ []string{"bar", "foo"},
+ },
+ {
+ "The foo package provides bar.",
+ []string{"bar", "foo"},
+ },
+ {
+ "Package foo contains an implementation of bar.",
+ []string{"bar", "foo", "impl"},
+ },
+ {
+ "Package foo is awesome",
+ []string{"awesom", "foo"},
+ },
+ {
+ "The foo package is awesome",
+ []string{"awesom", "foo"},
+ },
+ {
+ "The foo command is awesome",
+ []string{"awesom", "foo"},
+ },
+ {
+ "Command foo is awesome",
+ []string{"awesom", "foo"},
+ },
+ {
+ "The foo package",
+ []string{"foo"},
+ },
+ {
+ "Package foo",
+ []string{"foo"},
+ },
+ {
+ "Command foo",
+ []string{"foo"},
+ },
+ {
+ "Package",
+ []string{},
+ },
+ {
+ "Command",
+ []string{},
+ },
+}
+
+func TestSynopsisTerms(t *testing.T) {
+ for _, tt := range synopsisTermTests {
+ terms := make(map[string]bool)
+ collectSynopsisTerms(terms, tt.synopsis)
+
+ actual := termSlice(terms)
+ expected := tt.terms
+ sort.Strings(actual)
+ sort.Strings(expected)
+ if !reflect.DeepEqual(actual, expected) {
+ t.Errorf("%q ->\n got: %#v\nwant: %#v", tt.synopsis, actual, expected)
+ }
+ }
+}
diff --git a/database/stop.go b/database/stop.go
index 3b23e34..2a519fd 100644
--- a/database/stop.go
+++ b/database/stop.go
@@ -73,8 +73,6 @@
how
i
if
-implement
-implements
in
into
is