Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 1 | // Copyright 2020 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package source |
| 6 | |
| 7 | import ( |
| 8 | "context" |
| 9 | "fmt" |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 10 | "path" |
| 11 | "path/filepath" |
| 12 | "regexp" |
| 13 | "runtime" |
| 14 | "sort" |
| 15 | "strings" |
| 16 | "unicode" |
| 17 | |
Alan Donovan | 26a95e6 | 2022-10-07 10:40:32 -0400 | [diff] [blame] | 18 | "golang.org/x/tools/gopls/internal/lsp/protocol" |
| 19 | "golang.org/x/tools/gopls/internal/span" |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 20 | "golang.org/x/tools/internal/event" |
| 21 | "golang.org/x/tools/internal/fuzzy" |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 22 | ) |
| 23 | |
| 24 | // Symbol holds a precomputed symbol value. Note: we avoid using the |
| 25 | // protocol.SymbolInformation struct here in order to reduce the size of each |
| 26 | // symbol. |
| 27 | type Symbol struct { |
| 28 | Name string |
| 29 | Kind protocol.SymbolKind |
| 30 | Range protocol.Range |
| 31 | } |
| 32 | |
| 33 | // maxSymbols defines the maximum number of symbol results that should ever be |
| 34 | // sent in response to a client. |
| 35 | const maxSymbols = 100 |
| 36 | |
| 37 | // WorkspaceSymbols matches symbols across all views using the given query, |
| 38 | // according to the match semantics parameterized by matcherType and style. |
| 39 | // |
| 40 | // The workspace symbol method is defined in the spec as follows: |
| 41 | // |
| 42 | // The workspace symbol request is sent from the client to the server to |
| 43 | // list project-wide symbols matching the query string. |
| 44 | // |
| 45 | // It is unclear what "project-wide" means here, but given the parameters of |
| 46 | // workspace/symbol do not include any workspace identifier, then it has to be |
| 47 | // assumed that "project-wide" means "across all workspaces". Hence why |
| 48 | // WorkspaceSymbols receives the views []View. |
| 49 | // |
| 50 | // However, it then becomes unclear what it would mean to call WorkspaceSymbols |
| 51 | // with a different configured SymbolMatcher per View. Therefore we assume that |
| 52 | // Session level configuration will define the SymbolMatcher to be used for the |
| 53 | // WorkspaceSymbols method. |
| 54 | func WorkspaceSymbols(ctx context.Context, matcher SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) { |
| 55 | ctx, done := event.Start(ctx, "source.WorkspaceSymbols") |
| 56 | defer done() |
| 57 | if query == "" { |
| 58 | return nil, nil |
| 59 | } |
| 60 | |
| 61 | var s symbolizer |
| 62 | switch style { |
| 63 | case DynamicSymbols: |
| 64 | s = dynamicSymbolMatch |
| 65 | case FullyQualifiedSymbols: |
| 66 | s = fullyQualifiedSymbolMatch |
| 67 | case PackageQualifiedSymbols: |
| 68 | s = packageSymbolMatch |
| 69 | default: |
| 70 | panic(fmt.Errorf("unknown symbol style: %v", style)) |
| 71 | } |
| 72 | |
| 73 | return collectSymbols(ctx, views, matcher, s, query) |
| 74 | } |
| 75 | |
| 76 | // A matcherFunc returns the index and score of a symbol match. |
| 77 | // |
| 78 | // See the comment for symbolCollector for more information. |
| 79 | type matcherFunc func(chunks []string) (int, float64) |
| 80 | |
| 81 | // A symbolizer returns the best symbol match for a name with pkg, according to |
| 82 | // some heuristic. The symbol name is passed as the slice nameParts of logical |
| 83 | // name pieces. For example, for myType.field the caller can pass either |
| 84 | // []string{"myType.field"} or []string{"myType.", "field"}. |
| 85 | // |
| 86 | // See the comment for symbolCollector for more information. |
| 87 | // |
| 88 | // The space argument is an empty slice with spare capacity that may be used |
| 89 | // to allocate the result. |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 90 | type symbolizer func(space []string, name string, pkg *Metadata, m matcherFunc) ([]string, float64) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 91 | |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 92 | func fullyQualifiedSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) { |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 93 | if _, score := dynamicSymbolMatch(space, name, pkg, matcher); score > 0 { |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 94 | return append(space, string(pkg.PkgPath), ".", name), score |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 95 | } |
| 96 | return nil, 0 |
| 97 | } |
| 98 | |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 99 | func dynamicSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) { |
| 100 | if IsCommandLineArguments(pkg.ID) { |
Robert Findley | b253314 | 2022-10-10 13:50:45 -0400 | [diff] [blame] | 101 | // command-line-arguments packages have a non-sensical package path, so |
| 102 | // just use their package name. |
| 103 | return packageSymbolMatch(space, name, pkg, matcher) |
| 104 | } |
| 105 | |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 106 | var score float64 |
| 107 | |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 108 | endsInPkgName := strings.HasSuffix(string(pkg.PkgPath), string(pkg.Name)) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 109 | |
| 110 | // If the package path does not end in the package name, we need to check the |
| 111 | // package-qualified symbol as an extra pass first. |
| 112 | if !endsInPkgName { |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 113 | pkgQualified := append(space, string(pkg.Name), ".", name) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 114 | idx, score := matcher(pkgQualified) |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 115 | nameStart := len(pkg.Name) + 1 |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 116 | if score > 0 { |
| 117 | // If our match is contained entirely within the unqualified portion, |
| 118 | // just return that. |
| 119 | if idx >= nameStart { |
| 120 | return append(space, name), score |
| 121 | } |
| 122 | // Lower the score for matches that include the package name. |
| 123 | return pkgQualified, score * 0.8 |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | // Now try matching the fully qualified symbol. |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 128 | fullyQualified := append(space, string(pkg.PkgPath), ".", name) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 129 | idx, score := matcher(fullyQualified) |
| 130 | |
| 131 | // As above, check if we matched just the unqualified symbol name. |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 132 | nameStart := len(pkg.PkgPath) + 1 |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 133 | if idx >= nameStart { |
| 134 | return append(space, name), score |
| 135 | } |
| 136 | |
| 137 | // If our package path ends in the package name, we'll have skipped the |
| 138 | // initial pass above, so check if we matched just the package-qualified |
| 139 | // name. |
| 140 | if endsInPkgName && idx >= 0 { |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 141 | pkgStart := len(pkg.PkgPath) - len(pkg.Name) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 142 | if idx >= pkgStart { |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 143 | return append(space, string(pkg.Name), ".", name), score |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 144 | } |
| 145 | } |
| 146 | |
| 147 | // Our match was not contained within the unqualified or package qualified |
| 148 | // symbol. Return the fully qualified symbol but discount the score. |
| 149 | return fullyQualified, score * 0.6 |
| 150 | } |
| 151 | |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 152 | func packageSymbolMatch(space []string, name string, pkg *Metadata, matcher matcherFunc) ([]string, float64) { |
| 153 | qualified := append(space, string(pkg.Name), ".", name) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 154 | if _, s := matcher(qualified); s > 0 { |
| 155 | return qualified, s |
| 156 | } |
| 157 | return nil, 0 |
| 158 | } |
| 159 | |
| 160 | func buildMatcher(matcher SymbolMatcher, query string) matcherFunc { |
| 161 | switch matcher { |
| 162 | case SymbolFuzzy: |
| 163 | return parseQuery(query, newFuzzyMatcher) |
| 164 | case SymbolFastFuzzy: |
| 165 | return parseQuery(query, func(query string) matcherFunc { |
| 166 | return fuzzy.NewSymbolMatcher(query).Match |
| 167 | }) |
| 168 | case SymbolCaseSensitive: |
| 169 | return matchExact(query) |
| 170 | case SymbolCaseInsensitive: |
| 171 | q := strings.ToLower(query) |
| 172 | exact := matchExact(q) |
| 173 | wrapper := []string{""} |
| 174 | return func(chunks []string) (int, float64) { |
| 175 | s := strings.Join(chunks, "") |
| 176 | wrapper[0] = strings.ToLower(s) |
| 177 | return exact(wrapper) |
| 178 | } |
| 179 | } |
| 180 | panic(fmt.Errorf("unknown symbol matcher: %v", matcher)) |
| 181 | } |
| 182 | |
| 183 | func newFuzzyMatcher(query string) matcherFunc { |
| 184 | fm := fuzzy.NewMatcher(query) |
| 185 | return func(chunks []string) (int, float64) { |
| 186 | score := float64(fm.ScoreChunks(chunks)) |
| 187 | ranges := fm.MatchedRanges() |
| 188 | if len(ranges) > 0 { |
| 189 | return ranges[0], score |
| 190 | } |
| 191 | return -1, score |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | // parseQuery parses a field-separated symbol query, extracting the special |
| 196 | // characters listed below, and returns a matcherFunc corresponding to the AND |
| 197 | // of all field queries. |
| 198 | // |
| 199 | // Special characters: |
| 200 | // |
| 201 | // ^ match exact prefix |
| 202 | // $ match exact suffix |
| 203 | // ' match exact |
| 204 | // |
| 205 | // In all three of these special queries, matches are 'smart-cased', meaning |
| 206 | // they are case sensitive if the symbol query contains any upper-case |
| 207 | // characters, and case insensitive otherwise. |
| 208 | func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc { |
| 209 | fields := strings.Fields(q) |
| 210 | if len(fields) == 0 { |
| 211 | return func([]string) (int, float64) { return -1, 0 } |
| 212 | } |
| 213 | var funcs []matcherFunc |
| 214 | for _, field := range fields { |
| 215 | var f matcherFunc |
| 216 | switch { |
| 217 | case strings.HasPrefix(field, "^"): |
| 218 | prefix := field[1:] |
| 219 | f = smartCase(prefix, func(chunks []string) (int, float64) { |
| 220 | s := strings.Join(chunks, "") |
| 221 | if strings.HasPrefix(s, prefix) { |
| 222 | return 0, 1 |
| 223 | } |
| 224 | return -1, 0 |
| 225 | }) |
| 226 | case strings.HasPrefix(field, "'"): |
| 227 | exact := field[1:] |
| 228 | f = smartCase(exact, matchExact(exact)) |
| 229 | case strings.HasSuffix(field, "$"): |
| 230 | suffix := field[0 : len(field)-1] |
| 231 | f = smartCase(suffix, func(chunks []string) (int, float64) { |
| 232 | s := strings.Join(chunks, "") |
| 233 | if strings.HasSuffix(s, suffix) { |
| 234 | return len(s) - len(suffix), 1 |
| 235 | } |
| 236 | return -1, 0 |
| 237 | }) |
| 238 | default: |
| 239 | f = newMatcher(field) |
| 240 | } |
| 241 | funcs = append(funcs, f) |
| 242 | } |
| 243 | if len(funcs) == 1 { |
| 244 | return funcs[0] |
| 245 | } |
| 246 | return comboMatcher(funcs).match |
| 247 | } |
| 248 | |
| 249 | func matchExact(exact string) matcherFunc { |
| 250 | return func(chunks []string) (int, float64) { |
| 251 | s := strings.Join(chunks, "") |
| 252 | if idx := strings.LastIndex(s, exact); idx >= 0 { |
| 253 | return idx, 1 |
| 254 | } |
| 255 | return -1, 0 |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | // smartCase returns a matcherFunc that is case-sensitive if q contains any |
| 260 | // upper-case characters, and case-insensitive otherwise. |
| 261 | func smartCase(q string, m matcherFunc) matcherFunc { |
| 262 | insensitive := strings.ToLower(q) == q |
| 263 | wrapper := []string{""} |
| 264 | return func(chunks []string) (int, float64) { |
| 265 | s := strings.Join(chunks, "") |
| 266 | if insensitive { |
| 267 | s = strings.ToLower(s) |
| 268 | } |
| 269 | wrapper[0] = s |
| 270 | return m(wrapper) |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | type comboMatcher []matcherFunc |
| 275 | |
| 276 | func (c comboMatcher) match(chunks []string) (int, float64) { |
| 277 | score := 1.0 |
| 278 | first := 0 |
| 279 | for _, f := range c { |
| 280 | idx, s := f(chunks) |
| 281 | if idx < first { |
| 282 | first = idx |
| 283 | } |
| 284 | score *= s |
| 285 | } |
| 286 | return first, score |
| 287 | } |
| 288 | |
| 289 | // collectSymbols calls snapshot.Symbols to walk the syntax trees of |
| 290 | // all files in the views' current snapshots, and returns a sorted, |
| 291 | // scored list of symbols that best match the parameters. |
| 292 | // |
| 293 | // How it matches symbols is parameterized by two interfaces: |
| 294 | // - A matcherFunc determines how well a string symbol matches a query. It |
| 295 | // returns a non-negative score indicating the quality of the match. A score |
| 296 | // of zero indicates no match. |
| 297 | // - A symbolizer determines how we extract the symbol for an object. This |
| 298 | // enables the 'symbolStyle' configuration option. |
| 299 | func collectSymbols(ctx context.Context, views []View, matcherType SymbolMatcher, symbolizer symbolizer, query string) ([]protocol.SymbolInformation, error) { |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 300 | // Extract symbols from all files. |
| 301 | var work []symbolFile |
| 302 | var roots []string |
| 303 | seen := make(map[span.URI]bool) |
| 304 | // TODO(adonovan): opt: parallelize this loop? How often is len > 1? |
| 305 | for _, v := range views { |
Robert Findley | c4c6aa6 | 2023-01-19 20:24:55 -0500 | [diff] [blame] | 306 | snapshot, release, err := v.Snapshot() |
| 307 | if err != nil { |
| 308 | continue // view is shut down; continue with others |
| 309 | } |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 310 | defer release() |
| 311 | |
| 312 | // Use the root view URIs for determining (lexically) |
| 313 | // whether a URI is in any open workspace. |
| 314 | roots = append(roots, strings.TrimRight(string(v.Folder()), "/")) |
| 315 | |
| 316 | filters := v.Options().DirectoryFilters |
| 317 | filterer := NewFilterer(filters) |
| 318 | folder := filepath.ToSlash(v.Folder().Filename()) |
Rob Findley | 8e9b185 | 2023-05-01 12:44:43 -0400 | [diff] [blame] | 319 | |
| 320 | workspaceOnly := true |
| 321 | if v.Options().SymbolScope == AllSymbolScope { |
| 322 | workspaceOnly = false |
| 323 | } |
| 324 | symbols, err := snapshot.Symbols(ctx, workspaceOnly) |
Robert Findley | e5b9948 | 2023-02-15 22:26:54 -0500 | [diff] [blame] | 325 | if err != nil { |
| 326 | return nil, err |
| 327 | } |
Rob Findley | 8e9b185 | 2023-05-01 12:44:43 -0400 | [diff] [blame] | 328 | |
Robert Findley | e5b9948 | 2023-02-15 22:26:54 -0500 | [diff] [blame] | 329 | for uri, syms := range symbols { |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 330 | norm := filepath.ToSlash(uri.Filename()) |
| 331 | nm := strings.TrimPrefix(norm, folder) |
| 332 | if filterer.Disallow(nm) { |
| 333 | continue |
| 334 | } |
| 335 | // Only scan each file once. |
| 336 | if seen[uri] { |
| 337 | continue |
| 338 | } |
Alan Donovan | b35949e | 2023-04-20 14:53:41 -0400 | [diff] [blame] | 339 | meta, err := NarrowestMetadataForFile(ctx, snapshot, uri) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 340 | if err != nil { |
| 341 | event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err) |
| 342 | continue |
| 343 | } |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 344 | seen[uri] = true |
Alan Donovan | b35949e | 2023-04-20 14:53:41 -0400 | [diff] [blame] | 345 | work = append(work, symbolFile{uri, meta, syms}) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 346 | } |
| 347 | } |
| 348 | |
| 349 | // Match symbols in parallel. |
| 350 | // Each worker has its own symbolStore, |
| 351 | // which we merge at the end. |
| 352 | nmatchers := runtime.GOMAXPROCS(-1) // matching is CPU bound |
| 353 | results := make(chan *symbolStore) |
| 354 | for i := 0; i < nmatchers; i++ { |
| 355 | go func(i int) { |
| 356 | matcher := buildMatcher(matcherType, query) |
| 357 | store := new(symbolStore) |
| 358 | // Assign files to workers in round-robin fashion. |
| 359 | for j := i; j < len(work); j += nmatchers { |
| 360 | matchFile(store, symbolizer, matcher, roots, work[j]) |
| 361 | } |
| 362 | results <- store |
| 363 | }(i) |
| 364 | } |
| 365 | |
| 366 | // Gather and merge results as they arrive. |
| 367 | var unified symbolStore |
| 368 | for i := 0; i < nmatchers; i++ { |
| 369 | store := <-results |
| 370 | for _, syms := range store.res { |
| 371 | unified.store(syms) |
| 372 | } |
| 373 | } |
| 374 | return unified.results(), nil |
| 375 | } |
| 376 | |
| 377 | type Filterer struct { |
| 378 | // Whether a filter is excluded depends on the operator (first char of the raw filter). |
| 379 | // Slices filters and excluded then should have the same length. |
| 380 | filters []*regexp.Regexp |
| 381 | excluded []bool |
| 382 | } |
| 383 | |
| 384 | // NewFilterer computes regular expression form of all raw filters |
| 385 | func NewFilterer(rawFilters []string) *Filterer { |
| 386 | var f Filterer |
| 387 | for _, filter := range rawFilters { |
| 388 | filter = path.Clean(filepath.ToSlash(filter)) |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 389 | // TODO(dungtuanle): fix: validate [+-] prefix. |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 390 | op, prefix := filter[0], filter[1:] |
| 391 | // convertFilterToRegexp adds "/" at the end of prefix to handle cases where a filter is a prefix of another filter. |
| 392 | // For example, it prevents [+foobar, -foo] from excluding "foobar". |
| 393 | f.filters = append(f.filters, convertFilterToRegexp(filepath.ToSlash(prefix))) |
| 394 | f.excluded = append(f.excluded, op == '-') |
| 395 | } |
| 396 | |
| 397 | return &f |
| 398 | } |
| 399 | |
| 400 | // Disallow return true if the path is excluded from the filterer's filters. |
| 401 | func (f *Filterer) Disallow(path string) bool { |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 402 | // Ensure trailing but not leading slash. |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 403 | path = strings.TrimPrefix(path, "/") |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 404 | if !strings.HasSuffix(path, "/") { |
| 405 | path += "/" |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 406 | } |
| 407 | |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 408 | // TODO(adonovan): opt: iterate in reverse and break at first match. |
| 409 | excluded := false |
| 410 | for i, filter := range f.filters { |
| 411 | if filter.MatchString(path) { |
| 412 | excluded = f.excluded[i] // last match wins |
| 413 | } |
| 414 | } |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 415 | return excluded |
| 416 | } |
| 417 | |
| 418 | // convertFilterToRegexp replaces glob-like operator substrings in a string file path to their equivalent regex forms. |
| 419 | // Supporting glob-like operators: |
| 420 | // - **: match zero or more complete path segments |
| 421 | func convertFilterToRegexp(filter string) *regexp.Regexp { |
| 422 | if filter == "" { |
| 423 | return regexp.MustCompile(".*") |
| 424 | } |
| 425 | var ret strings.Builder |
| 426 | ret.WriteString("^") |
| 427 | segs := strings.Split(filter, "/") |
| 428 | for _, seg := range segs { |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 429 | // Inv: seg != "" since path is clean. |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 430 | if seg == "**" { |
| 431 | ret.WriteString(".*") |
| 432 | } else { |
| 433 | ret.WriteString(regexp.QuoteMeta(seg)) |
| 434 | } |
| 435 | ret.WriteString("/") |
| 436 | } |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 437 | pattern := ret.String() |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 438 | |
Alan Donovan | f1c8f7f | 2022-10-28 12:07:01 -0400 | [diff] [blame] | 439 | // Remove unnecessary "^.*" prefix, which increased |
| 440 | // BenchmarkWorkspaceSymbols time by ~20% (even though |
| 441 | // filter CPU time increased by only by ~2.5%) when the |
| 442 | // default filter was changed to "**/node_modules". |
| 443 | pattern = strings.TrimPrefix(pattern, "^.*") |
| 444 | |
| 445 | return regexp.MustCompile(pattern) |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 446 | } |
| 447 | |
| 448 | // symbolFile holds symbol information for a single file. |
| 449 | type symbolFile struct { |
| 450 | uri span.URI |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 451 | md *Metadata |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 452 | syms []Symbol |
| 453 | } |
| 454 | |
| 455 | // matchFile scans a symbol file and adds matching symbols to the store. |
| 456 | func matchFile(store *symbolStore, symbolizer symbolizer, matcher matcherFunc, roots []string, i symbolFile) { |
| 457 | space := make([]string, 0, 3) |
| 458 | for _, sym := range i.syms { |
| 459 | symbolParts, score := symbolizer(space, sym.Name, i.md, matcher) |
| 460 | |
| 461 | // Check if the score is too low before applying any downranking. |
| 462 | if store.tooLow(score) { |
| 463 | continue |
| 464 | } |
| 465 | |
| 466 | // Factors to apply to the match score for the purpose of downranking |
| 467 | // results. |
| 468 | // |
| 469 | // These numbers were crudely calibrated based on trial-and-error using a |
| 470 | // small number of sample queries. Adjust as necessary. |
| 471 | // |
| 472 | // All factors are multiplicative, meaning if more than one applies they are |
| 473 | // multiplied together. |
| 474 | const ( |
Alan Donovan | e8f417a | 2023-04-21 14:54:28 -0400 | [diff] [blame] | 475 | // nonWorkspaceFactor is applied to symbols outside the workspace. |
| 476 | // Developers are less likely to want to jump to code that they |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 477 | // are not actively working on. |
| 478 | nonWorkspaceFactor = 0.5 |
Alan Donovan | e8f417a | 2023-04-21 14:54:28 -0400 | [diff] [blame] | 479 | // nonWorkspaceUnexportedFactor is applied to unexported symbols outside |
| 480 | // the workspace. Since one wouldn't usually jump to unexported |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 481 | // symbols to understand a package API, they are particularly irrelevant. |
| 482 | nonWorkspaceUnexportedFactor = 0.5 |
| 483 | // every field or method nesting level to access the field decreases |
| 484 | // the score by a factor of 1.0 - depth*depthFactor, up to a depth of |
| 485 | // 3. |
Rob Findley | ddfa220 | 2023-05-08 17:03:33 -0400 | [diff] [blame] | 486 | // |
| 487 | // Use a small constant here, as this exists mostly to break ties |
| 488 | // (e.g. given a type Foo and a field x.Foo, prefer Foo). |
| 489 | depthFactor = 0.01 |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 490 | ) |
| 491 | |
| 492 | startWord := true |
| 493 | exported := true |
| 494 | depth := 0.0 |
| 495 | for _, r := range sym.Name { |
| 496 | if startWord && !unicode.IsUpper(r) { |
| 497 | exported = false |
| 498 | } |
| 499 | if r == '.' { |
| 500 | startWord = true |
| 501 | depth++ |
| 502 | } else { |
| 503 | startWord = false |
| 504 | } |
| 505 | } |
| 506 | |
Rob Findley | 8e9b185 | 2023-05-01 12:44:43 -0400 | [diff] [blame] | 507 | // TODO(rfindley): use metadata to determine if the file is in a workspace |
| 508 | // package, rather than this heuristic. |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 509 | inWorkspace := false |
| 510 | for _, root := range roots { |
| 511 | if strings.HasPrefix(string(i.uri), root) { |
| 512 | inWorkspace = true |
| 513 | break |
| 514 | } |
| 515 | } |
| 516 | |
| 517 | // Apply downranking based on workspace position. |
| 518 | if !inWorkspace { |
| 519 | score *= nonWorkspaceFactor |
| 520 | if !exported { |
| 521 | score *= nonWorkspaceUnexportedFactor |
| 522 | } |
| 523 | } |
| 524 | |
| 525 | // Apply downranking based on symbol depth. |
| 526 | if depth > 3 { |
| 527 | depth = 3 |
| 528 | } |
| 529 | score *= 1.0 - depth*depthFactor |
| 530 | |
| 531 | if store.tooLow(score) { |
| 532 | continue |
| 533 | } |
| 534 | |
| 535 | si := symbolInformation{ |
| 536 | score: score, |
| 537 | symbol: strings.Join(symbolParts, ""), |
| 538 | kind: sym.Kind, |
| 539 | uri: i.uri, |
| 540 | rng: sym.Range, |
Alan Donovan | 85bf7a8 | 2022-11-18 12:03:11 -0500 | [diff] [blame] | 541 | container: string(i.md.PkgPath), |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 542 | } |
| 543 | store.store(si) |
| 544 | } |
| 545 | } |
| 546 | |
| 547 | type symbolStore struct { |
| 548 | res [maxSymbols]symbolInformation |
| 549 | } |
| 550 | |
| 551 | // store inserts si into the sorted results, if si has a high enough score. |
| 552 | func (sc *symbolStore) store(si symbolInformation) { |
| 553 | if sc.tooLow(si.score) { |
| 554 | return |
| 555 | } |
| 556 | insertAt := sort.Search(len(sc.res), func(i int) bool { |
| 557 | // Sort by score, then symbol length, and finally lexically. |
| 558 | if sc.res[i].score != si.score { |
| 559 | return sc.res[i].score < si.score |
| 560 | } |
| 561 | if len(sc.res[i].symbol) != len(si.symbol) { |
| 562 | return len(sc.res[i].symbol) > len(si.symbol) |
| 563 | } |
| 564 | return sc.res[i].symbol > si.symbol |
| 565 | }) |
| 566 | if insertAt < len(sc.res)-1 { |
| 567 | copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1]) |
| 568 | } |
| 569 | sc.res[insertAt] = si |
| 570 | } |
| 571 | |
| 572 | func (sc *symbolStore) tooLow(score float64) bool { |
| 573 | return score <= sc.res[len(sc.res)-1].score |
| 574 | } |
| 575 | |
| 576 | func (sc *symbolStore) results() []protocol.SymbolInformation { |
| 577 | var res []protocol.SymbolInformation |
| 578 | for _, si := range sc.res { |
| 579 | if si.score <= 0 { |
| 580 | return res |
| 581 | } |
| 582 | res = append(res, si.asProtocolSymbolInformation()) |
| 583 | } |
| 584 | return res |
| 585 | } |
| 586 | |
Robert Findley | b15dac2 | 2022-08-30 14:40:12 -0400 | [diff] [blame] | 587 | // symbolInformation is a cut-down version of protocol.SymbolInformation that |
| 588 | // allows struct values of this type to be used as map keys. |
| 589 | type symbolInformation struct { |
| 590 | score float64 |
| 591 | symbol string |
| 592 | container string |
| 593 | kind protocol.SymbolKind |
| 594 | uri span.URI |
| 595 | rng protocol.Range |
| 596 | } |
| 597 | |
| 598 | // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value. |
| 599 | // |
| 600 | // TODO: work out how to handle tags if/when they are needed. |
| 601 | func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation { |
| 602 | return protocol.SymbolInformation{ |
| 603 | Name: s.symbol, |
| 604 | Kind: s.kind, |
| 605 | Location: protocol.Location{ |
| 606 | URI: protocol.URIFromSpanURI(s.uri), |
| 607 | Range: s.rng, |
| 608 | }, |
| 609 | ContainerName: s.container, |
| 610 | } |
| 611 | } |