| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package fuzzy_test |
| |
| import ( |
| "go/ast" |
| "go/token" |
| "sort" |
| "testing" |
| |
| "golang.org/x/tools/go/packages" |
| . "golang.org/x/tools/internal/fuzzy" |
| ) |
| |
| func TestSymbolMatchIndex(t *testing.T) { |
| tests := []struct { |
| pattern, input string |
| want int |
| }{ |
| {"test", "foo.TestFoo", 4}, |
| {"test", "test", 0}, |
| {"test", "Test", 0}, |
| {"test", "est", -1}, |
| {"t", "shortest", 7}, |
| {"", "foo", -1}, |
| {"", string([]rune{0}), -1}, // verify that we don't default to an empty pattern. |
| {"anything", "", -1}, |
| } |
| |
| for _, test := range tests { |
| matcher := NewSymbolMatcher(test.pattern) |
| if got, _ := matcher.Match([]string{test.input}); got != test.want { |
| t.Errorf("NewSymbolMatcher(%q).Match(%q) = %v, _, want %v, _", test.pattern, test.input, got, test.want) |
| } |
| } |
| } |
| |
| func TestSymbolRanking(t *testing.T) { |
| |
| // query -> symbols to match, in ascending order of score |
| queryRanks := map[string][]string{ |
| "test": { |
| "this.is.better.than.most", |
| "test.foo.bar", |
| "thebest", |
| "atest", |
| "test.foo", |
| "testage", |
| "tTest", |
| "foo.test", |
| }, |
| "parseside": { // golang/go#60201 |
| "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE", |
| "parseContext.parse_sidebyside", |
| }, |
| "cvb": { |
| "filecache_test.testIPCValueB", |
| "cover.Boundary", |
| }, |
| "dho": { |
| "gocommand.DebugHangingGoCommands", |
| "protocol.DocumentHighlightOptions", |
| }, |
| "flg": { |
| "completion.FALLTHROUGH", |
| "main.flagGoCmd", |
| }, |
| "fvi": { |
| "godoc.fileIndexVersion", |
| "macho.FlagSubsectionsViaSymbols", |
| }, |
| } |
| |
| for query, symbols := range queryRanks { |
| t.Run(query, func(t *testing.T) { |
| matcher := NewSymbolMatcher(query) |
| prev := 0.0 |
| for _, sym := range symbols { |
| _, score := matcher.Match([]string{sym}) |
| t.Logf("Match(%q) = %v", sym, score) |
| if score <= prev { |
| t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev) |
| } |
| prev = score |
| } |
| }) |
| } |
| } |
| |
| func TestMatcherSimilarities(t *testing.T) { |
| // This test compares the fuzzy matcher with the symbol matcher on a corpus |
| // of qualified identifiers extracted from x/tools. |
| // |
| // These two matchers are not expected to agree, but inspecting differences |
| // can be useful for finding interesting ranking edge cases. |
| t.Skip("unskip this test to compare matchers") |
| |
| idents := collectIdentifiers(t) |
| t.Logf("collected %d unique identifiers", len(idents)) |
| |
| // TODO: use go1.21 slices.MaxFunc. |
| topMatch := func(score func(string) float64) string { |
| top := "" |
| topScore := 0.0 |
| for _, cand := range idents { |
| if s := score(cand); s > topScore { |
| top = cand |
| topScore = s |
| } |
| } |
| return top |
| } |
| |
| agreed := 0 |
| total := 0 |
| bad := 0 |
| patterns := generatePatterns() |
| for _, pattern := range patterns { |
| total++ |
| |
| fm := NewMatcher(pattern) |
| topFuzzy := topMatch(func(input string) float64 { |
| return float64(fm.Score(input)) |
| }) |
| sm := NewSymbolMatcher(pattern) |
| topSymbol := topMatch(func(input string) float64 { |
| _, score := sm.Match([]string{input}) |
| return score |
| }) |
| switch { |
| case topFuzzy == "" && topSymbol != "": |
| if false { |
| // The fuzzy matcher has a bug where it misses some matches; for this |
| // test we only care about the symbol matcher. |
| t.Logf("%q matched %q but no fuzzy match", pattern, topSymbol) |
| } |
| total-- |
| bad++ |
| case topFuzzy != "" && topSymbol == "": |
| t.Fatalf("%q matched %q but no symbol match", pattern, topFuzzy) |
| case topFuzzy == topSymbol: |
| agreed++ |
| default: |
| // Enable this log to see mismatches. |
| if false { |
| t.Logf("mismatch for %q: fuzzy: %q, symbol: %q", pattern, topFuzzy, topSymbol) |
| } |
| } |
| } |
| t.Logf("fuzzy matchers agreed on %d out of %d queries (%d bad)", agreed, total, bad) |
| } |
| |
| func collectIdentifiers(tb testing.TB) []string { |
| cfg := &packages.Config{ |
| Mode: packages.NeedName | packages.NeedSyntax | packages.NeedFiles, |
| Tests: true, |
| } |
| pkgs, err := packages.Load(cfg, "golang.org/x/tools/...") |
| if err != nil { |
| tb.Fatal(err) |
| } |
| uniqueIdents := make(map[string]bool) |
| decls := 0 |
| for _, pkg := range pkgs { |
| for _, f := range pkg.Syntax { |
| for _, decl := range f.Decls { |
| decls++ |
| switch decl := decl.(type) { |
| case *ast.GenDecl: |
| for _, spec := range decl.Specs { |
| switch decl.Tok { |
| case token.IMPORT: |
| case token.TYPE: |
| name := spec.(*ast.TypeSpec).Name.Name |
| qualified := pkg.Name + "." + name |
| uniqueIdents[qualified] = true |
| case token.CONST, token.VAR: |
| for _, n := range spec.(*ast.ValueSpec).Names { |
| qualified := pkg.Name + "." + n.Name |
| uniqueIdents[qualified] = true |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| var idents []string |
| for k := range uniqueIdents { |
| idents = append(idents, k) |
| } |
| sort.Strings(idents) |
| return idents |
| } |
| |
| func generatePatterns() []string { |
| var patterns []string |
| for x := 'a'; x <= 'z'; x++ { |
| for y := 'a'; y <= 'z'; y++ { |
| for z := 'a'; z <= 'z'; z++ { |
| patterns = append(patterns, string(x)+string(y)+string(z)) |
| } |
| } |
| } |
| return patterns |
| } |
| |
| // Test that we strongly prefer exact matches. |
| // |
| // In golang/go#60027, we preferred "Runner" for the query "rune" over several |
| // results containing the word "rune" exactly. Following this observation, |
| // scoring was tweaked to more strongly emphasize sequential characters and |
| // exact matches. |
| func TestSymbolRanking_Issue60027(t *testing.T) { |
| matcher := NewSymbolMatcher("rune") |
| |
| // symbols to match, in ascending order of ranking. |
| symbols := []string{ |
| "Runner", |
| "singleRuneParam", |
| "Config.ifsRune", |
| "Parser.rune", |
| } |
| prev := 0.0 |
| for _, sym := range symbols { |
| _, score := matcher.Match([]string{sym}) |
| t.Logf("Match(%q) = %v", sym, score) |
| if score < prev { |
| t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev) |
| } |
| prev = score |
| } |
| } |
| |
| func TestChunkedMatch(t *testing.T) { |
| matcher := NewSymbolMatcher("test") |
| _, want := matcher.Match([]string{"test"}) |
| chunked := [][]string{ |
| {"", "test"}, |
| {"test", ""}, |
| {"te", "st"}, |
| } |
| |
| for _, chunks := range chunked { |
| offset, score := matcher.Match(chunks) |
| if offset != 0 || score != want { |
| t.Errorf("Match(%v) = %v, %v, want 0, 1.0", chunks, offset, score) |
| } |
| } |
| } |