blob: 43e629d51ef127fafc9666496950c025d8128eb5 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fuzzy_test
import (
"go/ast"
"go/token"
"sort"
"testing"
"golang.org/x/tools/go/packages"
. "golang.org/x/tools/internal/fuzzy"
)
func TestSymbolMatchIndex(t *testing.T) {
tests := []struct {
pattern, input string
want int
}{
{"test", "foo.TestFoo", 4},
{"test", "test", 0},
{"test", "Test", 0},
{"test", "est", -1},
{"t", "shortest", 7},
{"", "foo", -1},
{"", string([]rune{0}), -1}, // verify that we don't default to an empty pattern.
{"anything", "", -1},
}
for _, test := range tests {
matcher := NewSymbolMatcher(test.pattern)
if got, _ := matcher.Match([]string{test.input}); got != test.want {
t.Errorf("NewSymbolMatcher(%q).Match(%q) = %v, _, want %v, _", test.pattern, test.input, got, test.want)
}
}
}
func TestSymbolRanking(t *testing.T) {
// query -> symbols to match, in ascending order of score
queryRanks := map[string][]string{
"test": {
"this.is.better.than.most",
"test.foo.bar",
"thebest",
"atest",
"test.foo",
"testage",
"tTest",
"foo.test",
},
"parseside": { // golang/go#60201
"yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE",
"parseContext.parse_sidebyside",
},
"cvb": {
"filecache_test.testIPCValueB",
"cover.Boundary",
},
"dho": {
"gocommand.DebugHangingGoCommands",
"protocol.DocumentHighlightOptions",
},
"flg": {
"completion.FALLTHROUGH",
"main.flagGoCmd",
},
"fvi": {
"godoc.fileIndexVersion",
"macho.FlagSubsectionsViaSymbols",
},
}
for query, symbols := range queryRanks {
t.Run(query, func(t *testing.T) {
matcher := NewSymbolMatcher(query)
prev := 0.0
for _, sym := range symbols {
_, score := matcher.Match([]string{sym})
t.Logf("Match(%q) = %v", sym, score)
if score <= prev {
t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
}
prev = score
}
})
}
}
func TestMatcherSimilarities(t *testing.T) {
// This test compares the fuzzy matcher with the symbol matcher on a corpus
// of qualified identifiers extracted from x/tools.
//
// These two matchers are not expected to agree, but inspecting differences
// can be useful for finding interesting ranking edge cases.
t.Skip("unskip this test to compare matchers")
idents := collectIdentifiers(t)
t.Logf("collected %d unique identifiers", len(idents))
// TODO: use go1.21 slices.MaxFunc.
topMatch := func(score func(string) float64) string {
top := ""
topScore := 0.0
for _, cand := range idents {
if s := score(cand); s > topScore {
top = cand
topScore = s
}
}
return top
}
agreed := 0
total := 0
bad := 0
patterns := generatePatterns()
for _, pattern := range patterns {
total++
fm := NewMatcher(pattern)
topFuzzy := topMatch(func(input string) float64 {
return float64(fm.Score(input))
})
sm := NewSymbolMatcher(pattern)
topSymbol := topMatch(func(input string) float64 {
_, score := sm.Match([]string{input})
return score
})
switch {
case topFuzzy == "" && topSymbol != "":
if false {
// The fuzzy matcher has a bug where it misses some matches; for this
// test we only care about the symbol matcher.
t.Logf("%q matched %q but no fuzzy match", pattern, topSymbol)
}
total--
bad++
case topFuzzy != "" && topSymbol == "":
t.Fatalf("%q matched %q but no symbol match", pattern, topFuzzy)
case topFuzzy == topSymbol:
agreed++
default:
// Enable this log to see mismatches.
if false {
t.Logf("mismatch for %q: fuzzy: %q, symbol: %q", pattern, topFuzzy, topSymbol)
}
}
}
t.Logf("fuzzy matchers agreed on %d out of %d queries (%d bad)", agreed, total, bad)
}
func collectIdentifiers(tb testing.TB) []string {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedSyntax | packages.NeedFiles,
Tests: true,
}
pkgs, err := packages.Load(cfg, "golang.org/x/tools/...")
if err != nil {
tb.Fatal(err)
}
uniqueIdents := make(map[string]bool)
decls := 0
for _, pkg := range pkgs {
for _, f := range pkg.Syntax {
for _, decl := range f.Decls {
decls++
switch decl := decl.(type) {
case *ast.GenDecl:
for _, spec := range decl.Specs {
switch decl.Tok {
case token.IMPORT:
case token.TYPE:
name := spec.(*ast.TypeSpec).Name.Name
qualified := pkg.Name + "." + name
uniqueIdents[qualified] = true
case token.CONST, token.VAR:
for _, n := range spec.(*ast.ValueSpec).Names {
qualified := pkg.Name + "." + n.Name
uniqueIdents[qualified] = true
}
}
}
}
}
}
}
var idents []string
for k := range uniqueIdents {
idents = append(idents, k)
}
sort.Strings(idents)
return idents
}
func generatePatterns() []string {
var patterns []string
for x := 'a'; x <= 'z'; x++ {
for y := 'a'; y <= 'z'; y++ {
for z := 'a'; z <= 'z'; z++ {
patterns = append(patterns, string(x)+string(y)+string(z))
}
}
}
return patterns
}
// Test that we strongly prefer exact matches.
//
// In golang/go#60027, we preferred "Runner" for the query "rune" over several
// results containing the word "rune" exactly. Following this observation,
// scoring was tweaked to more strongly emphasize sequential characters and
// exact matches.
func TestSymbolRanking_Issue60027(t *testing.T) {
matcher := NewSymbolMatcher("rune")
// symbols to match, in ascending order of ranking.
symbols := []string{
"Runner",
"singleRuneParam",
"Config.ifsRune",
"Parser.rune",
}
prev := 0.0
for _, sym := range symbols {
_, score := matcher.Match([]string{sym})
t.Logf("Match(%q) = %v", sym, score)
if score < prev {
t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
}
prev = score
}
}
func TestChunkedMatch(t *testing.T) {
matcher := NewSymbolMatcher("test")
_, want := matcher.Match([]string{"test"})
chunked := [][]string{
{"", "test"},
{"test", ""},
{"te", "st"},
}
for _, chunks := range chunked {
offset, score := matcher.Match(chunks)
if offset != 0 || score != want {
t.Errorf("Match(%v) = %v, %v, want 0, 1.0", chunks, offset, score)
}
}
}