internal/fuzzy/symbol_test.go - tools.git - Git at Google

 // Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package fuzzy_test

 import (
 	"go/ast"
 	"go/token"
 	"sort"
 	"testing"

 	"golang.org/x/tools/go/packages"
 	. "golang.org/x/tools/internal/fuzzy"
 )

 func TestSymbolMatchIndex(t *testing.T) {
 	tests := []struct {
 		pattern, input string
 		want           int
 	}{
 		{"test", "foo.TestFoo", 4},
 		{"test", "test", 0},
 		{"test", "Test", 0},
 		{"test", "est", -1},
 		{"t", "shortest", 7},
 		{"", "foo", -1},
 		{"", string([]rune{0}), -1}, // verify that we don't default to an empty pattern.
 		{"anything", "", -1},
 	}

 	for _, test := range tests {
 		matcher := NewSymbolMatcher(test.pattern)
 		if got, _ := matcher.Match([]string{test.input}); got != test.want {
 			t.Errorf("NewSymbolMatcher(%q).Match(%q) = %v, _, want %v, _", test.pattern, test.input, got, test.want)
 		}
 	}
 }

 func TestSymbolRanking(t *testing.T) {

 	// query -> symbols to match, in ascending order of score
 	queryRanks := map[string][]string{
 		"test": {
 			"this.is.better.than.most",
 			"test.foo.bar",
 			"thebest",
 			"atest",
 			"test.foo",
 			"testage",
 			"tTest",
 			"foo.test",
 		},
 		"parseside": { // golang/go#60201
 			"yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE",
 			"parseContext.parse_sidebyside",
 		},
 		"cvb": {
 			"filecache_test.testIPCValueB",
 			"cover.Boundary",
 		},
 		"dho": {
 			"gocommand.DebugHangingGoCommands",
 			"protocol.DocumentHighlightOptions",
 		},
 		"flg": {
 			"completion.FALLTHROUGH",
 			"main.flagGoCmd",
 		},
 		"fvi": {
 			"godoc.fileIndexVersion",
 			"macho.FlagSubsectionsViaSymbols",
 		},
 	}

 	for query, symbols := range queryRanks {
 		t.Run(query, func(t *testing.T) {
 			matcher := NewSymbolMatcher(query)
 			prev := 0.0
 			for _, sym := range symbols {
 				_, score := matcher.Match([]string{sym})
 				t.Logf("Match(%q) = %v", sym, score)
 				if score <= prev {
 					t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
 				}
 				prev = score
 			}
 		})
 	}
 }

 func TestMatcherSimilarities(t *testing.T) {
 	// This test compares the fuzzy matcher with the symbol matcher on a corpus
 	// of qualified identifiers extracted from x/tools.
 	//
 	// These two matchers are not expected to agree, but inspecting differences
 	// can be useful for finding interesting ranking edge cases.
 	t.Skip("unskip this test to compare matchers")

 	idents := collectIdentifiers(t)
 	t.Logf("collected %d unique identifiers", len(idents))

 	// TODO: use go1.21 slices.MaxFunc.
 	topMatch := func(score func(string) float64) string {
 		top := ""
 		topScore := 0.0
 		for _, cand := range idents {
 			if s := score(cand); s > topScore {
 				top = cand
 				topScore = s
 			}
 		}
 		return top
 	}

 	agreed := 0
 	total := 0
 	bad := 0
 	patterns := generatePatterns()
 	for _, pattern := range patterns {
 		total++

 		fm := NewMatcher(pattern)
 		topFuzzy := topMatch(func(input string) float64 {
 			return float64(fm.Score(input))
 		})
 		sm := NewSymbolMatcher(pattern)
 		topSymbol := topMatch(func(input string) float64 {
 			_, score := sm.Match([]string{input})
 			return score
 		})
 		switch {
 		case topFuzzy == "" && topSymbol != "":
 			if false {
 				// The fuzzy matcher has a bug where it misses some matches; for this
 				// test we only care about the symbol matcher.
 				t.Logf("%q matched %q but no fuzzy match", pattern, topSymbol)
 			}
 			total--
 			bad++
 		case topFuzzy != "" && topSymbol == "":
 			t.Fatalf("%q matched %q but no symbol match", pattern, topFuzzy)
 		case topFuzzy == topSymbol:
 			agreed++
 		default:
 			// Enable this log to see mismatches.
 			if false {
 				t.Logf("mismatch for %q: fuzzy: %q, symbol: %q", pattern, topFuzzy, topSymbol)
 			}
 		}
 	}
 	t.Logf("fuzzy matchers agreed on %d out of %d queries (%d bad)", agreed, total, bad)
 }

 func collectIdentifiers(tb testing.TB) []string {
 	cfg := &packages.Config{
 		Mode:  packages.NeedName | packages.NeedSyntax | packages.NeedFiles,
 		Tests: true,
 	}
 	pkgs, err := packages.Load(cfg, "golang.org/x/tools/...")
 	if err != nil {
 		tb.Fatal(err)
 	}
 	uniqueIdents := make(map[string]bool)
 	decls := 0
 	for _, pkg := range pkgs {
 		for _, f := range pkg.Syntax {
 			for _, decl := range f.Decls {
 				decls++
 				switch decl := decl.(type) {
 				case *ast.GenDecl:
 					for _, spec := range decl.Specs {
 						switch decl.Tok {
 						case token.IMPORT:
 						case token.TYPE:
 							name := spec.(*ast.TypeSpec).Name.Name
 							qualified := pkg.Name + "." + name
 							uniqueIdents[qualified] = true
 						case token.CONST, token.VAR:
 							for _, n := range spec.(*ast.ValueSpec).Names {
 								qualified := pkg.Name + "." + n.Name
 								uniqueIdents[qualified] = true
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	var idents []string
 	for k := range uniqueIdents {
 		idents = append(idents, k)
 	}
 	sort.Strings(idents)
 	return idents
 }

 func generatePatterns() []string {
 	var patterns []string
 	for x := 'a'; x <= 'z'; x++ {
 		for y := 'a'; y <= 'z'; y++ {
 			for z := 'a'; z <= 'z'; z++ {
 				patterns = append(patterns, string(x)+string(y)+string(z))
 			}
 		}
 	}
 	return patterns
 }

 // Test that we strongly prefer exact matches.
 //
 // In golang/go#60027, we preferred "Runner" for the query "rune" over several
 // results containing the word "rune" exactly. Following this observation,
 // scoring was tweaked to more strongly emphasize sequential characters and
 // exact matches.
 func TestSymbolRanking_Issue60027(t *testing.T) {
 	matcher := NewSymbolMatcher("rune")

 	// symbols to match, in ascending order of ranking.
 	symbols := []string{
 		"Runner",
 		"singleRuneParam",
 		"Config.ifsRune",
 		"Parser.rune",
 	}
 	prev := 0.0
 	for _, sym := range symbols {
 		_, score := matcher.Match([]string{sym})
 		t.Logf("Match(%q) = %v", sym, score)
 		if score < prev {
 			t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
 		}
 		prev = score
 	}
 }

 func TestChunkedMatch(t *testing.T) {
 	matcher := NewSymbolMatcher("test")
 	_, want := matcher.Match([]string{"test"})
 	chunked := [][]string{
 		{"", "test"},
 		{"test", ""},
 		{"te", "st"},
 	}

 	for _, chunks := range chunked {
 		offset, score := matcher.Match(chunks)
 		if offset != 0 || score != want {
 			t.Errorf("Match(%v) = %v, %v, want 0, 1.0", chunks, offset, score)
 		}
 	}
 }
	// Copyright 2021 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package fuzzy_test

	import (
	"go/ast"
	"go/token"
	"sort"
	"testing"

	"golang.org/x/tools/go/packages"
	. "golang.org/x/tools/internal/fuzzy"
	)

	func TestSymbolMatchIndex(t *testing.T) {
	tests := []struct {
	pattern, input string
	want int
	}{
	{"test", "foo.TestFoo", 4},
	{"test", "test", 0},
	{"test", "Test", 0},
	{"test", "est", -1},
	{"t", "shortest", 7},
	{"", "foo", -1},
	{"", string([]rune{0}), -1}, // verify that we don't default to an empty pattern.
	{"anything", "", -1},
	}

	for _, test := range tests {
	matcher := NewSymbolMatcher(test.pattern)
	if got, _ := matcher.Match([]string{test.input}); got != test.want {
	t.Errorf("NewSymbolMatcher(%q).Match(%q) = %v, _, want %v, _", test.pattern, test.input, got, test.want)
	}
	}
	}

	func TestSymbolRanking(t *testing.T) {

	// query -> symbols to match, in ascending order of score
	queryRanks := map[string][]string{
	"test": {
	"this.is.better.than.most",
	"test.foo.bar",
	"thebest",
	"atest",
	"test.foo",
	"testage",
	"tTest",
	"foo.test",
	},
	"parseside": { // golang/go#60201
	"yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE",
	"parseContext.parse_sidebyside",
	},
	"cvb": {
	"filecache_test.testIPCValueB",
	"cover.Boundary",
	},
	"dho": {
	"gocommand.DebugHangingGoCommands",
	"protocol.DocumentHighlightOptions",
	},
	"flg": {
	"completion.FALLTHROUGH",
	"main.flagGoCmd",
	},
	"fvi": {
	"godoc.fileIndexVersion",
	"macho.FlagSubsectionsViaSymbols",
	},
	}

	for query, symbols := range queryRanks {
	t.Run(query, func(t *testing.T) {
	matcher := NewSymbolMatcher(query)
	prev := 0.0
	for _, sym := range symbols {
	_, score := matcher.Match([]string{sym})
	t.Logf("Match(%q) = %v", sym, score)
	if score <= prev {
	t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
	}
	prev = score
	}
	})
	}
	}

	func TestMatcherSimilarities(t *testing.T) {
	// This test compares the fuzzy matcher with the symbol matcher on a corpus
	// of qualified identifiers extracted from x/tools.
	//
	// These two matchers are not expected to agree, but inspecting differences
	// can be useful for finding interesting ranking edge cases.
	t.Skip("unskip this test to compare matchers")

	idents := collectIdentifiers(t)
	t.Logf("collected %d unique identifiers", len(idents))

	// TODO: use go1.21 slices.MaxFunc.
	topMatch := func(score func(string) float64) string {
	top := ""
	topScore := 0.0
	for _, cand := range idents {
	if s := score(cand); s > topScore {
	top = cand
	topScore = s
	}
	}
	return top
	}

	agreed := 0
	total := 0
	bad := 0
	patterns := generatePatterns()
	for _, pattern := range patterns {
	total++

	fm := NewMatcher(pattern)
	topFuzzy := topMatch(func(input string) float64 {
	return float64(fm.Score(input))
	})
	sm := NewSymbolMatcher(pattern)
	topSymbol := topMatch(func(input string) float64 {
	_, score := sm.Match([]string{input})
	return score
	})
	switch {
	case topFuzzy == "" && topSymbol != "":
	if false {
	// The fuzzy matcher has a bug where it misses some matches; for this
	// test we only care about the symbol matcher.
	t.Logf("%q matched %q but no fuzzy match", pattern, topSymbol)
	}
	total--
	bad++
	case topFuzzy != "" && topSymbol == "":
	t.Fatalf("%q matched %q but no symbol match", pattern, topFuzzy)
	case topFuzzy == topSymbol:
	agreed++
	default:
	// Enable this log to see mismatches.
	if false {
	t.Logf("mismatch for %q: fuzzy: %q, symbol: %q", pattern, topFuzzy, topSymbol)
	}
	}
	}
	t.Logf("fuzzy matchers agreed on %d out of %d queries (%d bad)", agreed, total, bad)
	}

	func collectIdentifiers(tb testing.TB) []string {
	cfg := &packages.Config{
	Mode: packages.NeedName \| packages.NeedSyntax \| packages.NeedFiles,
	Tests: true,
	}
	pkgs, err := packages.Load(cfg, "golang.org/x/tools/...")
	if err != nil {
	tb.Fatal(err)
	}
	uniqueIdents := make(map[string]bool)
	decls := 0
	for _, pkg := range pkgs {
	for _, f := range pkg.Syntax {
	for _, decl := range f.Decls {
	decls++
	switch decl := decl.(type) {
	case *ast.GenDecl:
	for _, spec := range decl.Specs {
	switch decl.Tok {
	case token.IMPORT:
	case token.TYPE:
	name := spec.(*ast.TypeSpec).Name.Name
	qualified := pkg.Name + "." + name
	uniqueIdents[qualified] = true
	case token.CONST, token.VAR:
	for _, n := range spec.(*ast.ValueSpec).Names {
	qualified := pkg.Name + "." + n.Name
	uniqueIdents[qualified] = true
	}
	}
	}
	}
	}
	}
	}
	var idents []string
	for k := range uniqueIdents {
	idents = append(idents, k)
	}
	sort.Strings(idents)
	return idents
	}

	func generatePatterns() []string {
	var patterns []string
	for x := 'a'; x <= 'z'; x++ {
	for y := 'a'; y <= 'z'; y++ {
	for z := 'a'; z <= 'z'; z++ {
	patterns = append(patterns, string(x)+string(y)+string(z))
	}
	}
	}
	return patterns
	}

	// Test that we strongly prefer exact matches.
	//
	// In golang/go#60027, we preferred "Runner" for the query "rune" over several
	// results containing the word "rune" exactly. Following this observation,
	// scoring was tweaked to more strongly emphasize sequential characters and
	// exact matches.
	func TestSymbolRanking_Issue60027(t *testing.T) {
	matcher := NewSymbolMatcher("rune")

	// symbols to match, in ascending order of ranking.
	symbols := []string{
	"Runner",
	"singleRuneParam",
	"Config.ifsRune",
	"Parser.rune",
	}
	prev := 0.0
	for _, sym := range symbols {
	_, score := matcher.Match([]string{sym})
	t.Logf("Match(%q) = %v", sym, score)
	if score < prev {
	t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev)
	}
	prev = score
	}
	}

	func TestChunkedMatch(t *testing.T) {
	matcher := NewSymbolMatcher("test")
	_, want := matcher.Match([]string{"test"})
	chunked := [][]string{
	{"", "test"},
	{"test", ""},
	{"te", "st"},
	}

	for _, chunks := range chunked {
	offset, score := matcher.Match(chunks)
	if offset != 0 \|\| score != want {
	t.Errorf("Match(%v) = %v, %v, want 0, 1.0", chunks, offset, score)
	}
	}
	}