blob: f29784095d3885e18baae32d28ee595eedd24b83 [file] [log] [blame]
Alan Donovan2fa6ca12022-12-29 13:04:08 -05001// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package methodsets defines an incremental, serializable index of
6// method-set information that allows efficient 'implements' queries
7// across packages of the workspace without using the type checker.
8//
9// This package provides only the "global" (all workspace) search; the
10// "local" search within a given package uses a different
11// implementation based on type-checker data structures for a single
12// package plus variants; see ../implementation2.go.
13// The local algorithm is more precise as it tests function-local types too.
14//
15// A global index of function-local types is challenging since they
16// may reference other local types, for which we would need to invent
17// stable names, an unsolved problem described in passing in Go issue
18// 57497. The global algorithm also does not index anonymous interface
19// types, even outside function bodies.
20//
21// Consequently, global results are not symmetric: applying the
22// operation twice may not get you back where you started.
23package methodsets
24
25// DESIGN
26//
27// See https://go.dev/cl/452060 for a minimal exposition of the algorithm.
28//
29// For each method, we compute a fingerprint: a string representing
30// the method name and type such that equal fingerprint strings mean
31// identical method types.
32//
33// For efficiency, the fingerprint is reduced to a single bit
34// of a uint64, so that the method set can be represented as
35// the union of those method bits (a uint64 bitmask).
36// Assignability thus reduces to a subset check on bitmasks
37// followed by equality checks on fingerprints.
38//
39// In earlier experiments, using 128-bit masks instead of 64 reduced
40// the number of candidates by about 2x. Using (like a Bloom filter) a
41// different hash function to compute a second 64-bit mask and
42// performing a second mask test reduced it by about 4x.
43// Neither had much effect on the running time, presumably because a
44// single 64-bit mask is quite effective. See CL 452060 for details.
45
46import (
47 "fmt"
48 "go/token"
49 "go/types"
50 "hash/crc32"
51 "strconv"
52 "strings"
53
Alan Donovan37c69d82023-01-20 17:57:21 -050054 "golang.org/x/tools/go/types/objectpath"
Alan Donovan2fa6ca12022-12-29 13:04:08 -050055 "golang.org/x/tools/gopls/internal/lsp/safetoken"
56 "golang.org/x/tools/internal/typeparams"
Alan Donovanf98fce22023-02-23 13:49:41 -050057 "golang.org/x/tools/internal/typesinternal"
Alan Donovan2fa6ca12022-12-29 13:04:08 -050058)
59
60// An Index records the non-empty method sets of all package-level
61// types in a package in a form that permits assignability queries
62// without the type checker.
63type Index struct {
64 pkg gobPackage
65}
66
67// NewIndex returns a new index of method-set information for all
68// package-level types in the specified package.
69func NewIndex(fset *token.FileSet, pkg *types.Package) *Index {
70 return new(indexBuilder).build(fset, pkg)
71}
72
73// A Location records the extent of an identifier in byte-offset form.
74//
75// Conversion to protocol (UTF-16) form is done by the caller after a
76// search, not during index construction.
Alan Donovan2fa6ca12022-12-29 13:04:08 -050077type Location struct {
78 Filename string
79 Start, End int // byte offsets
80}
81
82// A Key represents the method set of a given type in a form suitable
83// to pass to the (*Index).Search method of many different Indexes.
84type Key struct {
85 mset gobMethodSet // note: lacks position information
86}
87
88// KeyOf returns the search key for the method sets of a given type.
89// It returns false if the type has no methods.
90func KeyOf(t types.Type) (Key, bool) {
Alan Donovan37c69d82023-01-20 17:57:21 -050091 mset := methodSetInfo(t, nil)
Alan Donovan2fa6ca12022-12-29 13:04:08 -050092 if mset.Mask == 0 {
93 return Key{}, false // no methods
94 }
95 return Key{mset}, true
96}
97
Alan Donovan37c69d82023-01-20 17:57:21 -050098// A Result reports a matching type or method in a method-set search.
99type Result struct {
100 Location Location // location of the type or method
101
102 // methods only:
103 PkgPath string // path of declaring package (may differ due to embedding)
104 ObjectPath objectpath.Path // path of method within declaring package
105}
106
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500107// Search reports each type that implements (or is implemented by) the
108// type that produced the search key. If methodID is nonempty, only
109// that method of each type is reported.
Alan Donovan37c69d82023-01-20 17:57:21 -0500110//
111// The result does not include the error.Error method.
112// TODO(adonovan): give this special case a more systematic treatment.
113func (index *Index) Search(key Key, methodID string) []Result {
114 var results []Result
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500115 for _, candidate := range index.pkg.MethodSets {
116 // Traditionally this feature doesn't report
117 // interface/interface elements of the relation.
118 // I think that's a mistake.
119 // TODO(adonovan): UX: change it, here and in the local implementation.
120 if candidate.IsInterface && key.mset.IsInterface {
121 continue
122 }
123 if !satisfies(candidate, key.mset) && !satisfies(key.mset, candidate) {
124 continue
125 }
126
127 if candidate.Tricky {
128 // If any interface method is tricky then extra
129 // checking may be needed to eliminate a false positive.
130 // TODO(adonovan): implement it.
131 }
132
133 if methodID == "" {
Alan Donovan37c69d82023-01-20 17:57:21 -0500134 results = append(results, Result{Location: index.location(candidate.Posn)})
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500135 } else {
136 for _, m := range candidate.Methods {
137 // Here we exploit knowledge of the shape of the fingerprint string.
138 if strings.HasPrefix(m.Fingerprint, methodID) &&
139 m.Fingerprint[len(methodID)] == '(' {
Alan Donovan37c69d82023-01-20 17:57:21 -0500140
141 // Don't report error.Error among the results:
142 // it has no true source location, no package,
143 // and is excluded from the xrefs index.
144 if m.PkgPath == 0 || m.ObjectPath == 0 {
145 if methodID != "Error" {
146 panic("missing info for" + methodID)
147 }
148 continue
149 }
150
151 results = append(results, Result{
152 Location: index.location(m.Posn),
153 PkgPath: index.pkg.Strings[m.PkgPath],
154 ObjectPath: objectpath.Path(index.pkg.Strings[m.ObjectPath]),
155 })
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500156 break
157 }
158 }
159 }
160 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500161 return results
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500162}
163
164// satisfies does a fast check for whether x satisfies y.
165func satisfies(x, y gobMethodSet) bool {
166 return y.IsInterface && x.Mask&y.Mask == y.Mask && subset(y, x)
167}
168
169// subset reports whether method set x is a subset of y.
170func subset(x, y gobMethodSet) bool {
171outer:
172 for _, mx := range x.Methods {
173 for _, my := range y.Methods {
174 if mx.Sum == my.Sum && mx.Fingerprint == my.Fingerprint {
175 continue outer // found; try next x method
176 }
177 }
178 return false // method of x not found in y
179 }
180 return true // all methods of x found in y
181}
182
183func (index *Index) location(posn gobPosition) Location {
184 return Location{
Alan Donovan37c69d82023-01-20 17:57:21 -0500185 Filename: index.pkg.Strings[posn.File],
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500186 Start: posn.Offset,
187 End: posn.Offset + posn.Len,
188 }
189}
190
191// An indexBuilder builds an index for a single package.
192type indexBuilder struct {
193 gobPackage
Alan Donovan37c69d82023-01-20 17:57:21 -0500194 stringIndex map[string]int
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500195}
196
197// build adds to the index all package-level named types of the specified package.
198func (b *indexBuilder) build(fset *token.FileSet, pkg *types.Package) *Index {
Alan Donovan37c69d82023-01-20 17:57:21 -0500199 _ = b.string("") // 0 => ""
200
201 objectPos := func(obj types.Object) gobPosition {
202 posn := safetoken.StartPosition(fset, obj.Pos())
203 return gobPosition{b.string(posn.Filename), posn.Offset, len(obj.Name())}
204 }
205
Alan Donovanf98fce22023-02-23 13:49:41 -0500206 objectpathFor := typesinternal.NewObjectpathFunc()
207
Alan Donovan37c69d82023-01-20 17:57:21 -0500208 // setindexInfo sets the (Posn, PkgPath, ObjectPath) fields for each method declaration.
209 setIndexInfo := func(m *gobMethod, method *types.Func) {
210 // error.Error has empty Position, PkgPath, and ObjectPath.
211 if method.Pkg() == nil {
212 return
213 }
214
215 m.Posn = objectPos(method)
216 m.PkgPath = b.string(method.Pkg().Path())
217
218 // Instantiations of generic methods don't have an
219 // object path, so we use the generic.
Alan Donovanf98fce22023-02-23 13:49:41 -0500220 if p, err := objectpathFor(typeparams.OriginMethod(method)); err != nil {
Alan Donovan37c69d82023-01-20 17:57:21 -0500221 panic(err) // can't happen for a method of a package-level type
222 } else {
223 m.ObjectPath = b.string(string(p))
224 }
225 }
226
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500227 // We ignore aliases, though in principle they could define a
228 // struct{...} or interface{...} type, or an instantiation of
229 // a generic, that has a novel method set.
230 scope := pkg.Scope()
231 for _, name := range scope.Names() {
232 if tname, ok := scope.Lookup(name).(*types.TypeName); ok && !tname.IsAlias() {
Alan Donovan37c69d82023-01-20 17:57:21 -0500233 if mset := methodSetInfo(tname.Type(), setIndexInfo); mset.Mask != 0 {
234 mset.Posn = objectPos(tname)
235 // Only record types with non-trivial method sets.
236 b.MethodSets = append(b.MethodSets, mset)
237 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500238 }
239 }
240
241 return &Index{pkg: b.gobPackage}
242}
243
Alan Donovan37c69d82023-01-20 17:57:21 -0500244// string returns a small integer that encodes the string.
245func (b *indexBuilder) string(s string) int {
246 i, ok := b.stringIndex[s]
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500247 if !ok {
Alan Donovan37c69d82023-01-20 17:57:21 -0500248 i = len(b.Strings)
249 if b.stringIndex == nil {
250 b.stringIndex = make(map[string]int)
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500251 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500252 b.stringIndex[s] = i
253 b.Strings = append(b.Strings, s)
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500254 }
255 return i
256}
257
Alan Donovan37c69d82023-01-20 17:57:21 -0500258// methodSetInfo returns the method-set fingerprint of a type.
259// It calls the optional setIndexInfo function for each gobMethod.
260// This is used during index construction, but not search (KeyOf),
261// to store extra information.
262func methodSetInfo(t types.Type, setIndexInfo func(*gobMethod, *types.Func)) gobMethodSet {
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500263 // For non-interface types, use *T
264 // (if T is not already a pointer)
265 // since it may have more methods.
266 mset := types.NewMethodSet(EnsurePointer(t))
267
268 // Convert the method set into a compact summary.
269 var mask uint64
270 tricky := false
271 methods := make([]gobMethod, mset.Len())
272 for i := 0; i < mset.Len(); i++ {
273 m := mset.At(i).Obj().(*types.Func)
274 fp, isTricky := fingerprint(m)
275 if isTricky {
276 tricky = true
277 }
278 sum := crc32.ChecksumIEEE([]byte(fp))
Alan Donovan37c69d82023-01-20 17:57:21 -0500279 methods[i] = gobMethod{Fingerprint: fp, Sum: sum}
280 if setIndexInfo != nil {
281 setIndexInfo(&methods[i], m) // set Position, PkgPath, ObjectPath
282 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500283 mask |= 1 << uint64(((sum>>24)^(sum>>16)^(sum>>8)^sum)&0x3f)
284 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500285 return gobMethodSet{
286 IsInterface: types.IsInterface(t),
287 Tricky: tricky,
288 Mask: mask,
289 Methods: methods,
290 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500291}
292
293// EnsurePointer wraps T in a types.Pointer if T is a named, non-interface type.
294// This is useful to make sure you consider a named type's full method set.
295func EnsurePointer(T types.Type) types.Type {
296 if _, ok := T.(*types.Named); ok && !types.IsInterface(T) {
297 return types.NewPointer(T)
298 }
299
300 return T
301}
302
303// fingerprint returns an encoding of a method signature such that two
304// methods with equal encodings have identical types, except for a few
305// tricky types whose encodings may spuriously match and whose exact
306// identity computation requires the type checker to eliminate false
307// positives (which are rare). The boolean result indicates whether
308// the result was one of these tricky types.
309//
310// In the standard library, 99.8% of package-level types have a
311// non-tricky method-set. The most common exceptions are due to type
312// parameters.
313//
314// The fingerprint string starts with method.Id() + "(".
315func fingerprint(method *types.Func) (string, bool) {
316 var buf strings.Builder
317 tricky := false
318 var fprint func(t types.Type)
319 fprint = func(t types.Type) {
320 switch t := t.(type) {
321 case *types.Named:
322 tname := t.Obj()
323 if tname.Pkg() != nil {
324 buf.WriteString(strconv.Quote(tname.Pkg().Path()))
325 buf.WriteByte('.')
326 } else if tname.Name() != "error" {
327 panic(tname) // error is the only named type with no package
328 }
329 buf.WriteString(tname.Name())
330
331 case *types.Array:
332 fmt.Fprintf(&buf, "[%d]", t.Len())
333 fprint(t.Elem())
334
335 case *types.Slice:
336 buf.WriteString("[]")
337 fprint(t.Elem())
338
339 case *types.Pointer:
340 buf.WriteByte('*')
341 fprint(t.Elem())
342
343 case *types.Map:
344 buf.WriteString("map[")
345 fprint(t.Key())
346 buf.WriteByte(']')
347 fprint(t.Elem())
348
349 case *types.Chan:
350 switch t.Dir() {
351 case types.SendRecv:
352 buf.WriteString("chan ")
353 case types.SendOnly:
354 buf.WriteString("<-chan ")
355 case types.RecvOnly:
356 buf.WriteString("chan<- ")
357 }
358 fprint(t.Elem())
359
360 case *types.Tuple:
361 buf.WriteByte('(')
362 for i := 0; i < t.Len(); i++ {
363 if i > 0 {
364 buf.WriteByte(',')
365 }
366 fprint(t.At(i).Type())
367 }
368 buf.WriteByte(')')
369
370 case *types.Basic:
371 // Use canonical names for uint8 and int32 aliases.
372 switch t.Kind() {
373 case types.Byte:
374 buf.WriteString("byte")
375 case types.Rune:
376 buf.WriteString("rune")
377 default:
378 buf.WriteString(t.String())
379 }
380
381 case *types.Signature:
382 buf.WriteString("func")
383 fprint(t.Params())
384 if t.Variadic() {
385 buf.WriteString("...") // not quite Go syntax
386 }
387 fprint(t.Results())
388
389 case *types.Struct:
390 // Non-empty unnamed struct types in method
391 // signatures are vanishingly rare.
392 buf.WriteString("struct{")
393 for i := 0; i < t.NumFields(); i++ {
394 if i > 0 {
395 buf.WriteByte(';')
396 }
397 f := t.Field(i)
398 // This isn't quite right for embedded type aliases.
399 // (See types.TypeString(StructType) and #44410 for context.)
400 // But this is vanishingly rare.
401 if !f.Embedded() {
402 buf.WriteString(f.Id())
403 buf.WriteByte(' ')
404 }
405 fprint(f.Type())
406 if tag := t.Tag(i); tag != "" {
407 buf.WriteByte(' ')
408 buf.WriteString(strconv.Quote(tag))
409 }
410 }
411 buf.WriteString("}")
412
413 case *types.Interface:
414 if t.NumMethods() == 0 {
415 buf.WriteString("any") // common case
416 } else {
417 // Interface assignability is particularly
418 // tricky due to the possibility of recursion.
419 tricky = true
420 // We could still give more disambiguating precision
421 // than "..." if we wanted to.
422 buf.WriteString("interface{...}")
423 }
424
425 case *typeparams.TypeParam:
426 tricky = true
427 // TODO(adonovan): refine this by adding a numeric suffix
428 // indicating the index among the receiver type's parameters.
429 buf.WriteByte('?')
430
431 default: // incl. *types.Union
432 panic(t)
433 }
434 }
435
436 buf.WriteString(method.Id()) // e.g. "pkg.Type"
437 sig := method.Type().(*types.Signature)
438 fprint(sig.Params())
439 fprint(sig.Results())
440 return buf.String(), tricky
441}
442
443// -- serial format of index --
444
445// The cost of gob encoding and decoding for most packages in x/tools
446// is under 50us, with occasional peaks of around 1-3ms.
447// The encoded indexes are around 1KB-50KB.
448
449// A gobPackage records the method set of each package-level type for a single package.
450type gobPackage struct {
Alan Donovan37c69d82023-01-20 17:57:21 -0500451 Strings []string // index of strings used by gobPosition.File, gobMethod.{Pkg,Object}Path
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500452 MethodSets []gobMethodSet
453}
454
455// A gobMethodSet records the method set of a single type.
456type gobMethodSet struct {
457 Posn gobPosition
458 IsInterface bool
459 Tricky bool // at least one method is tricky; assignability requires go/types
460 Mask uint64 // mask with 1 bit from each of methods[*].sum
461 Methods []gobMethod
462}
463
464// A gobMethod records the name, type, and position of a single method.
465type gobMethod struct {
Alan Donovan37c69d82023-01-20 17:57:21 -0500466 Fingerprint string // string of form "methodID(params...)(results)"
467 Sum uint32 // checksum of fingerprint
468
469 // index records only (zero in KeyOf; also for index of error.Error).
470 Posn gobPosition // location of method declaration
471 PkgPath int // path of package containing method declaration
472 ObjectPath int // object path of method relative to PkgPath
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500473}
474
475// A gobPosition records the file, offset, and length of an identifier.
476type gobPosition struct {
Alan Donovan37c69d82023-01-20 17:57:21 -0500477 File int // index into gopPackage.Strings
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500478 Offset, Len int // in bytes
479}