internal/lsp: update the fuzzy matcher to operate on chunks
We can avoid allocating strings when performing workspace symbol search
by having the fuzzy match operate directly on chunks.
When operating on a single string, this slows down the matcher slightly
(perhaps 10%) due to copying bytes rather than accessing the string
directly. We could work around this using unsafe, but this could also be
mitigated by generics.
Benchmark ("test" in x/tools): 48ms->46ms
Benchmark ("test" in kubernetes): 868ms->857ms
Change-Id: Icf0f15aaa5cc3c875cf157a7b90db801045d9ed4
Reviewed-on: https://go-review.googlesource.com/c/tools/+/338694
Trust: Robert Findley <rfindley@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
diff --git a/internal/lsp/fuzzy/input.go b/internal/lsp/fuzzy/input.go
index ac37703..c103816 100644
--- a/internal/lsp/fuzzy/input.go
+++ b/internal/lsp/fuzzy/input.go
@@ -27,23 +27,23 @@
// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
// or when it filled the output. If output is nil, then it gets created.
-func RuneRoles(str string, reuse []RuneRole) []RuneRole {
+func RuneRoles(candidate []byte, reuse []RuneRole) []RuneRole {
var output []RuneRole
- if cap(reuse) < len(str) {
- output = make([]RuneRole, 0, len(str))
+ if cap(reuse) < len(candidate) {
+ output = make([]RuneRole, 0, len(candidate))
} else {
output = reuse[:0]
}
prev, prev2 := rtNone, rtNone
- for i := 0; i < len(str); i++ {
- r := rune(str[i])
+ for i := 0; i < len(candidate); i++ {
+ r := rune(candidate[i])
role := RNone
curr := rtLower
- if str[i] <= unicode.MaxASCII {
- curr = runeType(rt[str[i]] - '0')
+ if candidate[i] <= unicode.MaxASCII {
+ curr = runeType(rt[candidate[i]] - '0')
}
if curr == rtLower {
@@ -58,7 +58,7 @@
if prev == rtUpper {
// This and previous characters are both upper case.
- if i+1 == len(str) {
+ if i+1 == len(candidate) {
// This is last character, previous was also uppercase -> this is UCTail
// i.e., (current char is C): aBC / BC / ABC
role = RUCTail
@@ -118,11 +118,26 @@
return input[start+1 : end+1]
}
-// ToLower transforms the input string to lower case, which is stored in the output byte slice.
+// fromChunks copies string chunks into the given buffer.
+func fromChunks(chunks []string, buffer []byte) []byte {
+ ii := 0
+ for _, chunk := range chunks {
+ for i := 0; i < len(chunk); i++ {
+ if ii >= cap(buffer) {
+ break
+ }
+ buffer[ii] = chunk[i]
+ ii++
+ }
+ }
+ return buffer[:ii]
+}
+
+// toLower transforms the input string to lower case, which is stored in the output byte slice.
// The lower casing considers only ASCII values - non ASCII values are left unmodified.
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
// created.
-func ToLower(input string, reuse []byte) []byte {
+func toLower(input []byte, reuse []byte) []byte {
output := reuse
if cap(reuse) < len(input) {
output = make([]byte, len(input))
@@ -130,7 +145,7 @@
for i := 0; i < len(input); i++ {
r := rune(input[i])
- if r <= unicode.MaxASCII {
+ if input[i] <= unicode.MaxASCII {
if 'A' <= r && r <= 'Z' {
r += 'a' - 'A'
}
diff --git a/internal/lsp/fuzzy/input_test.go b/internal/lsp/fuzzy/input_test.go
index dffafa5..0228347 100644
--- a/internal/lsp/fuzzy/input_test.go
+++ b/internal/lsp/fuzzy/input_test.go
@@ -36,7 +36,7 @@
func TestRoles(t *testing.T) {
for _, tc := range rolesTests {
gotRoles := make([]fuzzy.RuneRole, len(tc.str))
- fuzzy.RuneRoles(tc.str, gotRoles)
+ fuzzy.RuneRoles([]byte(tc.str), gotRoles)
got := rolesString(gotRoles)
if got != tc.want {
t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want)
@@ -68,7 +68,7 @@
func TestWordSplit(t *testing.T) {
for _, tc := range wordSplitTests {
- roles := fuzzy.RuneRoles(tc.input, nil)
+ roles := fuzzy.RuneRoles([]byte(tc.input), nil)
var got []string
consumer := func(i, j int) {
@@ -120,7 +120,7 @@
func TestLastSegment(t *testing.T) {
for _, tc := range lastSegmentSplitTests {
- roles := fuzzy.RuneRoles(tc.str, nil)
+ roles := fuzzy.RuneRoles([]byte(tc.str), nil)
got := fuzzy.LastSegment(tc.str, roles)
@@ -135,7 +135,7 @@
out := make([]fuzzy.RuneRole, len(str))
for i := 0; i < b.N; i++ {
- fuzzy.RuneRoles(str, out)
+ fuzzy.RuneRoles([]byte(str), out)
}
b.SetBytes(int64(len(str)))
}
diff --git a/internal/lsp/fuzzy/matcher.go b/internal/lsp/fuzzy/matcher.go
index 16a6430..265cdcf 100644
--- a/internal/lsp/fuzzy/matcher.go
+++ b/internal/lsp/fuzzy/matcher.go
@@ -51,8 +51,12 @@
lastCandidateLen int // in bytes
lastCandidateMatched bool
- // Here we save the last candidate in lower-case. This is basically a byte slice we reuse for
- // performance reasons, so the slice is not reallocated for every candidate.
+ // Reusable buffers to avoid allocating for every candidate.
+ // - inputBuf stores the concatenated input chunks
+ // - lowerBuf stores the last candidate in lower-case
+ // - rolesBuf stores the calculated roles for each rune in the last
+ // candidate.
+ inputBuf [MaxInputSize]byte
lowerBuf [MaxInputSize]byte
rolesBuf [MaxInputSize]RuneRole
}
@@ -72,7 +76,7 @@
m := &Matcher{
pattern: pattern,
- patternLower: ToLower(pattern, nil),
+ patternLower: toLower([]byte(pattern), nil),
}
for i, c := range m.patternLower {
@@ -88,7 +92,7 @@
m.patternShort = m.patternLower
}
- m.patternRoles = RuneRoles(pattern, nil)
+ m.patternRoles = RuneRoles([]byte(pattern), nil)
if len(pattern) > 0 {
maxCharScore := 4
@@ -102,10 +106,15 @@
// This is not designed for parallel use. Multiple candidates must be scored sequentially.
// Returns a score between 0 and 1 (0 - no match, 1 - perfect match).
func (m *Matcher) Score(candidate string) float32 {
+ return m.ScoreChunks([]string{candidate})
+}
+
+func (m *Matcher) ScoreChunks(chunks []string) float32 {
+ candidate := fromChunks(chunks, m.inputBuf[:])
if len(candidate) > MaxInputSize {
candidate = candidate[:MaxInputSize]
}
- lower := ToLower(candidate, m.lowerBuf[:])
+ lower := toLower(candidate, m.lowerBuf[:])
m.lastCandidateLen = len(candidate)
if len(m.pattern) == 0 {
@@ -174,7 +183,7 @@
return ret
}
-func (m *Matcher) match(candidate string, candidateLower []byte) bool {
+func (m *Matcher) match(candidate []byte, candidateLower []byte) bool {
i, j := 0, 0
for ; i < len(candidateLower) && j < len(m.patternLower); i++ {
if candidateLower[i] == m.patternLower[j] {
@@ -192,7 +201,7 @@
return true
}
-func (m *Matcher) computeScore(candidate string, candidateLower []byte) int {
+func (m *Matcher) computeScore(candidate []byte, candidateLower []byte) int {
pattLen, candLen := len(m.pattern), len(candidate)
for j := 0; j <= len(m.pattern); j++ {
diff --git a/internal/lsp/source/workspace_symbol.go b/internal/lsp/source/workspace_symbol.go
index 0661bc5..3591b08 100644
--- a/internal/lsp/source/workspace_symbol.go
+++ b/internal/lsp/source/workspace_symbol.go
@@ -233,8 +233,7 @@
default:
fm := fuzzy.NewMatcher(field)
f = func(chunks []string) (int, float64) {
- s := strings.Join(chunks, "")
- score := float64(fm.Score(s))
+ score := float64(fm.ScoreChunks(chunks))
ranges := fm.MatchedRanges()
if len(ranges) > 0 {
return ranges[0], score