blob: 1ade7402421470c830508ee9e0d5d44e9b219f67 [file] [log] [blame]
Alan Donovan2fa6ca12022-12-29 13:04:08 -05001// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package methodsets defines an incremental, serializable index of
6// method-set information that allows efficient 'implements' queries
7// across packages of the workspace without using the type checker.
8//
9// This package provides only the "global" (all workspace) search; the
10// "local" search within a given package uses a different
11// implementation based on type-checker data structures for a single
Alan Donovan537c4aa2023-03-13 17:42:29 -040012// package plus variants; see ../implementation.go.
Alan Donovan2fa6ca12022-12-29 13:04:08 -050013// The local algorithm is more precise as it tests function-local types too.
14//
15// A global index of function-local types is challenging since they
16// may reference other local types, for which we would need to invent
17// stable names, an unsolved problem described in passing in Go issue
18// 57497. The global algorithm also does not index anonymous interface
19// types, even outside function bodies.
20//
21// Consequently, global results are not symmetric: applying the
22// operation twice may not get you back where you started.
23package methodsets
24
25// DESIGN
26//
27// See https://go.dev/cl/452060 for a minimal exposition of the algorithm.
28//
29// For each method, we compute a fingerprint: a string representing
30// the method name and type such that equal fingerprint strings mean
31// identical method types.
32//
33// For efficiency, the fingerprint is reduced to a single bit
34// of a uint64, so that the method set can be represented as
35// the union of those method bits (a uint64 bitmask).
36// Assignability thus reduces to a subset check on bitmasks
37// followed by equality checks on fingerprints.
38//
39// In earlier experiments, using 128-bit masks instead of 64 reduced
40// the number of candidates by about 2x. Using (like a Bloom filter) a
41// different hash function to compute a second 64-bit mask and
42// performing a second mask test reduced it by about 4x.
43// Neither had much effect on the running time, presumably because a
44// single 64-bit mask is quite effective. See CL 452060 for details.
45
46import (
47 "fmt"
48 "go/token"
49 "go/types"
50 "hash/crc32"
51 "strconv"
52 "strings"
53
Alan Donovan37c69d82023-01-20 17:57:21 -050054 "golang.org/x/tools/go/types/objectpath"
Alan Donovan03275ec2023-07-17 17:59:35 -040055 "golang.org/x/tools/gopls/internal/lsp/frob"
Alan Donovan2fa6ca12022-12-29 13:04:08 -050056 "golang.org/x/tools/gopls/internal/lsp/safetoken"
57 "golang.org/x/tools/internal/typeparams"
58)
59
60// An Index records the non-empty method sets of all package-level
61// types in a package in a form that permits assignability queries
62// without the type checker.
63type Index struct {
64 pkg gobPackage
65}
66
Robert Findley21d22562023-02-21 12:26:27 -050067// Decode decodes the given gob-encoded data as an Index.
68func Decode(data []byte) *Index {
69 var pkg gobPackage
Alan Donovan03275ec2023-07-17 17:59:35 -040070 packageCodec.Decode(data, &pkg)
Robert Findley21d22562023-02-21 12:26:27 -050071 return &Index{pkg}
72}
73
74// Encode encodes the receiver as gob-encoded data.
75func (index *Index) Encode() []byte {
Alan Donovan03275ec2023-07-17 17:59:35 -040076 return packageCodec.Encode(index.pkg)
Robert Findley21d22562023-02-21 12:26:27 -050077}
78
Alan Donovan2fa6ca12022-12-29 13:04:08 -050079// NewIndex returns a new index of method-set information for all
80// package-level types in the specified package.
81func NewIndex(fset *token.FileSet, pkg *types.Package) *Index {
82 return new(indexBuilder).build(fset, pkg)
83}
84
85// A Location records the extent of an identifier in byte-offset form.
86//
87// Conversion to protocol (UTF-16) form is done by the caller after a
88// search, not during index construction.
Alan Donovan2fa6ca12022-12-29 13:04:08 -050089type Location struct {
90 Filename string
91 Start, End int // byte offsets
92}
93
94// A Key represents the method set of a given type in a form suitable
95// to pass to the (*Index).Search method of many different Indexes.
96type Key struct {
97 mset gobMethodSet // note: lacks position information
98}
99
100// KeyOf returns the search key for the method sets of a given type.
101// It returns false if the type has no methods.
102func KeyOf(t types.Type) (Key, bool) {
Alan Donovan37c69d82023-01-20 17:57:21 -0500103 mset := methodSetInfo(t, nil)
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500104 if mset.Mask == 0 {
105 return Key{}, false // no methods
106 }
107 return Key{mset}, true
108}
109
Alan Donovan37c69d82023-01-20 17:57:21 -0500110// A Result reports a matching type or method in a method-set search.
111type Result struct {
112 Location Location // location of the type or method
113
114 // methods only:
115 PkgPath string // path of declaring package (may differ due to embedding)
116 ObjectPath objectpath.Path // path of method within declaring package
117}
118
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500119// Search reports each type that implements (or is implemented by) the
120// type that produced the search key. If methodID is nonempty, only
121// that method of each type is reported.
Alan Donovan37c69d82023-01-20 17:57:21 -0500122//
123// The result does not include the error.Error method.
124// TODO(adonovan): give this special case a more systematic treatment.
125func (index *Index) Search(key Key, methodID string) []Result {
126 var results []Result
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500127 for _, candidate := range index.pkg.MethodSets {
128 // Traditionally this feature doesn't report
129 // interface/interface elements of the relation.
130 // I think that's a mistake.
131 // TODO(adonovan): UX: change it, here and in the local implementation.
132 if candidate.IsInterface && key.mset.IsInterface {
133 continue
134 }
135 if !satisfies(candidate, key.mset) && !satisfies(key.mset, candidate) {
136 continue
137 }
138
139 if candidate.Tricky {
140 // If any interface method is tricky then extra
141 // checking may be needed to eliminate a false positive.
142 // TODO(adonovan): implement it.
143 }
144
145 if methodID == "" {
Alan Donovan37c69d82023-01-20 17:57:21 -0500146 results = append(results, Result{Location: index.location(candidate.Posn)})
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500147 } else {
148 for _, m := range candidate.Methods {
149 // Here we exploit knowledge of the shape of the fingerprint string.
150 if strings.HasPrefix(m.Fingerprint, methodID) &&
151 m.Fingerprint[len(methodID)] == '(' {
Alan Donovan37c69d82023-01-20 17:57:21 -0500152
153 // Don't report error.Error among the results:
154 // it has no true source location, no package,
155 // and is excluded from the xrefs index.
156 if m.PkgPath == 0 || m.ObjectPath == 0 {
157 if methodID != "Error" {
158 panic("missing info for" + methodID)
159 }
160 continue
161 }
162
163 results = append(results, Result{
164 Location: index.location(m.Posn),
165 PkgPath: index.pkg.Strings[m.PkgPath],
166 ObjectPath: objectpath.Path(index.pkg.Strings[m.ObjectPath]),
167 })
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500168 break
169 }
170 }
171 }
172 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500173 return results
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500174}
175
176// satisfies does a fast check for whether x satisfies y.
177func satisfies(x, y gobMethodSet) bool {
178 return y.IsInterface && x.Mask&y.Mask == y.Mask && subset(y, x)
179}
180
181// subset reports whether method set x is a subset of y.
182func subset(x, y gobMethodSet) bool {
183outer:
184 for _, mx := range x.Methods {
185 for _, my := range y.Methods {
186 if mx.Sum == my.Sum && mx.Fingerprint == my.Fingerprint {
187 continue outer // found; try next x method
188 }
189 }
190 return false // method of x not found in y
191 }
192 return true // all methods of x found in y
193}
194
195func (index *Index) location(posn gobPosition) Location {
196 return Location{
Alan Donovan37c69d82023-01-20 17:57:21 -0500197 Filename: index.pkg.Strings[posn.File],
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500198 Start: posn.Offset,
199 End: posn.Offset + posn.Len,
200 }
201}
202
203// An indexBuilder builds an index for a single package.
204type indexBuilder struct {
205 gobPackage
Alan Donovan37c69d82023-01-20 17:57:21 -0500206 stringIndex map[string]int
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500207}
208
209// build adds to the index all package-level named types of the specified package.
210func (b *indexBuilder) build(fset *token.FileSet, pkg *types.Package) *Index {
Alan Donovan37c69d82023-01-20 17:57:21 -0500211 _ = b.string("") // 0 => ""
212
213 objectPos := func(obj types.Object) gobPosition {
214 posn := safetoken.StartPosition(fset, obj.Pos())
215 return gobPosition{b.string(posn.Filename), posn.Offset, len(obj.Name())}
216 }
217
Alan Donovana5338c92023-02-23 13:49:41 -0500218 objectpathFor := new(objectpath.Encoder).For
Alan Donovanf98fce22023-02-23 13:49:41 -0500219
Alan Donovan37c69d82023-01-20 17:57:21 -0500220 // setindexInfo sets the (Posn, PkgPath, ObjectPath) fields for each method declaration.
221 setIndexInfo := func(m *gobMethod, method *types.Func) {
222 // error.Error has empty Position, PkgPath, and ObjectPath.
223 if method.Pkg() == nil {
224 return
225 }
226
227 m.Posn = objectPos(method)
228 m.PkgPath = b.string(method.Pkg().Path())
229
230 // Instantiations of generic methods don't have an
231 // object path, so we use the generic.
Alan Donovanf98fce22023-02-23 13:49:41 -0500232 if p, err := objectpathFor(typeparams.OriginMethod(method)); err != nil {
Alan Donovan37c69d82023-01-20 17:57:21 -0500233 panic(err) // can't happen for a method of a package-level type
234 } else {
235 m.ObjectPath = b.string(string(p))
236 }
237 }
238
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500239 // We ignore aliases, though in principle they could define a
240 // struct{...} or interface{...} type, or an instantiation of
241 // a generic, that has a novel method set.
242 scope := pkg.Scope()
243 for _, name := range scope.Names() {
244 if tname, ok := scope.Lookup(name).(*types.TypeName); ok && !tname.IsAlias() {
Alan Donovan37c69d82023-01-20 17:57:21 -0500245 if mset := methodSetInfo(tname.Type(), setIndexInfo); mset.Mask != 0 {
246 mset.Posn = objectPos(tname)
247 // Only record types with non-trivial method sets.
248 b.MethodSets = append(b.MethodSets, mset)
249 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500250 }
251 }
252
253 return &Index{pkg: b.gobPackage}
254}
255
Alan Donovan37c69d82023-01-20 17:57:21 -0500256// string returns a small integer that encodes the string.
257func (b *indexBuilder) string(s string) int {
258 i, ok := b.stringIndex[s]
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500259 if !ok {
Alan Donovan37c69d82023-01-20 17:57:21 -0500260 i = len(b.Strings)
261 if b.stringIndex == nil {
262 b.stringIndex = make(map[string]int)
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500263 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500264 b.stringIndex[s] = i
265 b.Strings = append(b.Strings, s)
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500266 }
267 return i
268}
269
Alan Donovan37c69d82023-01-20 17:57:21 -0500270// methodSetInfo returns the method-set fingerprint of a type.
271// It calls the optional setIndexInfo function for each gobMethod.
272// This is used during index construction, but not search (KeyOf),
273// to store extra information.
274func methodSetInfo(t types.Type, setIndexInfo func(*gobMethod, *types.Func)) gobMethodSet {
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500275 // For non-interface types, use *T
276 // (if T is not already a pointer)
277 // since it may have more methods.
278 mset := types.NewMethodSet(EnsurePointer(t))
279
280 // Convert the method set into a compact summary.
281 var mask uint64
282 tricky := false
283 methods := make([]gobMethod, mset.Len())
284 for i := 0; i < mset.Len(); i++ {
285 m := mset.At(i).Obj().(*types.Func)
286 fp, isTricky := fingerprint(m)
287 if isTricky {
288 tricky = true
289 }
290 sum := crc32.ChecksumIEEE([]byte(fp))
Alan Donovan37c69d82023-01-20 17:57:21 -0500291 methods[i] = gobMethod{Fingerprint: fp, Sum: sum}
292 if setIndexInfo != nil {
293 setIndexInfo(&methods[i], m) // set Position, PkgPath, ObjectPath
294 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500295 mask |= 1 << uint64(((sum>>24)^(sum>>16)^(sum>>8)^sum)&0x3f)
296 }
Alan Donovan37c69d82023-01-20 17:57:21 -0500297 return gobMethodSet{
298 IsInterface: types.IsInterface(t),
299 Tricky: tricky,
300 Mask: mask,
301 Methods: methods,
302 }
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500303}
304
305// EnsurePointer wraps T in a types.Pointer if T is a named, non-interface type.
306// This is useful to make sure you consider a named type's full method set.
307func EnsurePointer(T types.Type) types.Type {
308 if _, ok := T.(*types.Named); ok && !types.IsInterface(T) {
309 return types.NewPointer(T)
310 }
311
312 return T
313}
314
315// fingerprint returns an encoding of a method signature such that two
316// methods with equal encodings have identical types, except for a few
317// tricky types whose encodings may spuriously match and whose exact
318// identity computation requires the type checker to eliminate false
319// positives (which are rare). The boolean result indicates whether
320// the result was one of these tricky types.
321//
322// In the standard library, 99.8% of package-level types have a
323// non-tricky method-set. The most common exceptions are due to type
324// parameters.
325//
326// The fingerprint string starts with method.Id() + "(".
327func fingerprint(method *types.Func) (string, bool) {
328 var buf strings.Builder
329 tricky := false
330 var fprint func(t types.Type)
331 fprint = func(t types.Type) {
332 switch t := t.(type) {
333 case *types.Named:
334 tname := t.Obj()
335 if tname.Pkg() != nil {
336 buf.WriteString(strconv.Quote(tname.Pkg().Path()))
337 buf.WriteByte('.')
Rob Findley947adca2023-05-31 22:10:59 -0400338 } else if tname.Name() != "error" && tname.Name() != "comparable" {
339 panic(tname) // error and comparable the only named types with no package
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500340 }
341 buf.WriteString(tname.Name())
342
343 case *types.Array:
344 fmt.Fprintf(&buf, "[%d]", t.Len())
345 fprint(t.Elem())
346
347 case *types.Slice:
348 buf.WriteString("[]")
349 fprint(t.Elem())
350
351 case *types.Pointer:
352 buf.WriteByte('*')
353 fprint(t.Elem())
354
355 case *types.Map:
356 buf.WriteString("map[")
357 fprint(t.Key())
358 buf.WriteByte(']')
359 fprint(t.Elem())
360
361 case *types.Chan:
362 switch t.Dir() {
363 case types.SendRecv:
364 buf.WriteString("chan ")
365 case types.SendOnly:
366 buf.WriteString("<-chan ")
367 case types.RecvOnly:
368 buf.WriteString("chan<- ")
369 }
370 fprint(t.Elem())
371
372 case *types.Tuple:
373 buf.WriteByte('(')
374 for i := 0; i < t.Len(); i++ {
375 if i > 0 {
376 buf.WriteByte(',')
377 }
378 fprint(t.At(i).Type())
379 }
380 buf.WriteByte(')')
381
382 case *types.Basic:
383 // Use canonical names for uint8 and int32 aliases.
384 switch t.Kind() {
385 case types.Byte:
386 buf.WriteString("byte")
387 case types.Rune:
388 buf.WriteString("rune")
389 default:
390 buf.WriteString(t.String())
391 }
392
393 case *types.Signature:
394 buf.WriteString("func")
395 fprint(t.Params())
396 if t.Variadic() {
397 buf.WriteString("...") // not quite Go syntax
398 }
399 fprint(t.Results())
400
401 case *types.Struct:
402 // Non-empty unnamed struct types in method
403 // signatures are vanishingly rare.
404 buf.WriteString("struct{")
405 for i := 0; i < t.NumFields(); i++ {
406 if i > 0 {
407 buf.WriteByte(';')
408 }
409 f := t.Field(i)
410 // This isn't quite right for embedded type aliases.
411 // (See types.TypeString(StructType) and #44410 for context.)
412 // But this is vanishingly rare.
413 if !f.Embedded() {
414 buf.WriteString(f.Id())
415 buf.WriteByte(' ')
416 }
417 fprint(f.Type())
418 if tag := t.Tag(i); tag != "" {
419 buf.WriteByte(' ')
420 buf.WriteString(strconv.Quote(tag))
421 }
422 }
423 buf.WriteString("}")
424
425 case *types.Interface:
426 if t.NumMethods() == 0 {
427 buf.WriteString("any") // common case
428 } else {
429 // Interface assignability is particularly
430 // tricky due to the possibility of recursion.
431 tricky = true
432 // We could still give more disambiguating precision
433 // than "..." if we wanted to.
434 buf.WriteString("interface{...}")
435 }
436
437 case *typeparams.TypeParam:
438 tricky = true
439 // TODO(adonovan): refine this by adding a numeric suffix
440 // indicating the index among the receiver type's parameters.
441 buf.WriteByte('?')
442
443 default: // incl. *types.Union
444 panic(t)
445 }
446 }
447
448 buf.WriteString(method.Id()) // e.g. "pkg.Type"
449 sig := method.Type().(*types.Signature)
450 fprint(sig.Params())
451 fprint(sig.Results())
452 return buf.String(), tricky
453}
454
455// -- serial format of index --
456
Alan Donovan03275ec2023-07-17 17:59:35 -0400457// (The name says gob but in fact we use frob.)
458// var packageCodec = frob.For[gobPackage]()
459var packageCodec = frob.CodecFor117(new(gobPackage))
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500460
461// A gobPackage records the method set of each package-level type for a single package.
462type gobPackage struct {
Alan Donovan37c69d82023-01-20 17:57:21 -0500463 Strings []string // index of strings used by gobPosition.File, gobMethod.{Pkg,Object}Path
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500464 MethodSets []gobMethodSet
465}
466
467// A gobMethodSet records the method set of a single type.
468type gobMethodSet struct {
469 Posn gobPosition
470 IsInterface bool
471 Tricky bool // at least one method is tricky; assignability requires go/types
472 Mask uint64 // mask with 1 bit from each of methods[*].sum
473 Methods []gobMethod
474}
475
476// A gobMethod records the name, type, and position of a single method.
477type gobMethod struct {
Alan Donovan37c69d82023-01-20 17:57:21 -0500478 Fingerprint string // string of form "methodID(params...)(results)"
479 Sum uint32 // checksum of fingerprint
480
481 // index records only (zero in KeyOf; also for index of error.Error).
482 Posn gobPosition // location of method declaration
483 PkgPath int // path of package containing method declaration
484 ObjectPath int // object path of method relative to PkgPath
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500485}
486
487// A gobPosition records the file, offset, and length of an identifier.
488type gobPosition struct {
Robert Findley21d22562023-02-21 12:26:27 -0500489 File int // index into gobPackage.Strings
Alan Donovan2fa6ca12022-12-29 13:04:08 -0500490 Offset, Len int // in bytes
491}