blob: 81d5d0070dab5d33413861e3ad375f7e48a01ca9 [file] [log] [blame]
Alan Donovan2fa6ca12022-12-29 13:04:08 -05001// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package methodsets defines an incremental, serializable index of
6// method-set information that allows efficient 'implements' queries
7// across packages of the workspace without using the type checker.
8//
9// This package provides only the "global" (all workspace) search; the
10// "local" search within a given package uses a different
11// implementation based on type-checker data structures for a single
12// package plus variants; see ../implementation2.go.
13// The local algorithm is more precise as it tests function-local types too.
14//
15// A global index of function-local types is challenging since they
16// may reference other local types, for which we would need to invent
17// stable names, an unsolved problem described in passing in Go issue
18// 57497. The global algorithm also does not index anonymous interface
19// types, even outside function bodies.
20//
21// Consequently, global results are not symmetric: applying the
22// operation twice may not get you back where you started.
23package methodsets
24
25// DESIGN
26//
27// See https://go.dev/cl/452060 for a minimal exposition of the algorithm.
28//
29// For each method, we compute a fingerprint: a string representing
30// the method name and type such that equal fingerprint strings mean
31// identical method types.
32//
33// For efficiency, the fingerprint is reduced to a single bit
34// of a uint64, so that the method set can be represented as
35// the union of those method bits (a uint64 bitmask).
36// Assignability thus reduces to a subset check on bitmasks
37// followed by equality checks on fingerprints.
38//
39// In earlier experiments, using 128-bit masks instead of 64 reduced
40// the number of candidates by about 2x. Using (like a Bloom filter) a
41// different hash function to compute a second 64-bit mask and
42// performing a second mask test reduced it by about 4x.
43// Neither had much effect on the running time, presumably because a
44// single 64-bit mask is quite effective. See CL 452060 for details.
45
46import (
47 "fmt"
48 "go/token"
49 "go/types"
50 "hash/crc32"
51 "strconv"
52 "strings"
53
54 "golang.org/x/tools/gopls/internal/lsp/safetoken"
55 "golang.org/x/tools/internal/typeparams"
56)
57
58// An Index records the non-empty method sets of all package-level
59// types in a package in a form that permits assignability queries
60// without the type checker.
61type Index struct {
62 pkg gobPackage
63}
64
65// NewIndex returns a new index of method-set information for all
66// package-level types in the specified package.
67func NewIndex(fset *token.FileSet, pkg *types.Package) *Index {
68 return new(indexBuilder).build(fset, pkg)
69}
70
71// A Location records the extent of an identifier in byte-offset form.
72//
73// Conversion to protocol (UTF-16) form is done by the caller after a
74// search, not during index construction.
75// TODO(adonovan): opt: reconsider this choice, if FileHandles, not
76// ParsedGoFiles were to provide ColumnMapper-like functionality.
77// (Column mapping is currently associated with parsing,
78// but non-parsed and even non-Go files need it too.)
79// Since type checking requires reading (but not parsing) all
80// dependencies' Go files, we could do the conversion at type-checking
81// time at little extra cost in that case.
82type Location struct {
83 Filename string
84 Start, End int // byte offsets
85}
86
87// A Key represents the method set of a given type in a form suitable
88// to pass to the (*Index).Search method of many different Indexes.
89type Key struct {
90 mset gobMethodSet // note: lacks position information
91}
92
93// KeyOf returns the search key for the method sets of a given type.
94// It returns false if the type has no methods.
95func KeyOf(t types.Type) (Key, bool) {
96 mset := methodSetInfo(func(types.Object) (_ gobPosition) { return }, t, gobPosition{})
97 if mset.Mask == 0 {
98 return Key{}, false // no methods
99 }
100 return Key{mset}, true
101}
102
103// Search reports each type that implements (or is implemented by) the
104// type that produced the search key. If methodID is nonempty, only
105// that method of each type is reported.
106// The result is the location of each type or method.
107func (index *Index) Search(key Key, methodID string) []Location {
108 var locs []Location
109 for _, candidate := range index.pkg.MethodSets {
110 // Traditionally this feature doesn't report
111 // interface/interface elements of the relation.
112 // I think that's a mistake.
113 // TODO(adonovan): UX: change it, here and in the local implementation.
114 if candidate.IsInterface && key.mset.IsInterface {
115 continue
116 }
117 if !satisfies(candidate, key.mset) && !satisfies(key.mset, candidate) {
118 continue
119 }
120
121 if candidate.Tricky {
122 // If any interface method is tricky then extra
123 // checking may be needed to eliminate a false positive.
124 // TODO(adonovan): implement it.
125 }
126
127 if methodID == "" {
128 locs = append(locs, index.location(candidate.Posn))
129 } else {
130 for _, m := range candidate.Methods {
131 // Here we exploit knowledge of the shape of the fingerprint string.
132 if strings.HasPrefix(m.Fingerprint, methodID) &&
133 m.Fingerprint[len(methodID)] == '(' {
134 locs = append(locs, index.location(m.Posn))
135 break
136 }
137 }
138 }
139 }
140 return locs
141}
142
143// satisfies does a fast check for whether x satisfies y.
144func satisfies(x, y gobMethodSet) bool {
145 return y.IsInterface && x.Mask&y.Mask == y.Mask && subset(y, x)
146}
147
148// subset reports whether method set x is a subset of y.
149func subset(x, y gobMethodSet) bool {
150outer:
151 for _, mx := range x.Methods {
152 for _, my := range y.Methods {
153 if mx.Sum == my.Sum && mx.Fingerprint == my.Fingerprint {
154 continue outer // found; try next x method
155 }
156 }
157 return false // method of x not found in y
158 }
159 return true // all methods of x found in y
160}
161
162func (index *Index) location(posn gobPosition) Location {
163 return Location{
164 Filename: index.pkg.Filenames[posn.File],
165 Start: posn.Offset,
166 End: posn.Offset + posn.Len,
167 }
168}
169
170// An indexBuilder builds an index for a single package.
171type indexBuilder struct {
172 gobPackage
173 filenameIndex map[string]int
174}
175
176// build adds to the index all package-level named types of the specified package.
177func (b *indexBuilder) build(fset *token.FileSet, pkg *types.Package) *Index {
178 // We ignore aliases, though in principle they could define a
179 // struct{...} or interface{...} type, or an instantiation of
180 // a generic, that has a novel method set.
181 scope := pkg.Scope()
182 for _, name := range scope.Names() {
183 if tname, ok := scope.Lookup(name).(*types.TypeName); ok && !tname.IsAlias() {
184 b.add(fset, tname)
185 }
186 }
187
188 return &Index{pkg: b.gobPackage}
189}
190
191func (b *indexBuilder) add(fset *token.FileSet, tname *types.TypeName) {
192 objectPos := func(obj types.Object) gobPosition {
193 posn := safetoken.StartPosition(fset, obj.Pos())
194 return gobPosition{b.fileIndex(posn.Filename), posn.Offset, len(obj.Name())}
195 }
196 if mset := methodSetInfo(objectPos, tname.Type(), objectPos(tname)); mset.Mask != 0 {
197 // Only record types with non-trivial method sets.
198 b.MethodSets = append(b.MethodSets, mset)
199 }
200}
201
202// fileIndex returns a small integer that encodes the file name.
203func (b *indexBuilder) fileIndex(filename string) int {
204 i, ok := b.filenameIndex[filename]
205 if !ok {
206 i = len(b.Filenames)
207 if b.filenameIndex == nil {
208 b.filenameIndex = make(map[string]int)
209 }
210 b.filenameIndex[filename] = i
211 b.Filenames = append(b.Filenames, filename)
212 }
213 return i
214}
215
216// methodSetInfo returns the method-set fingerprint
217// of a type and records its position (typePosn)
218// and the position of each of its methods m,
219// as provided by objectPos(m).
220func methodSetInfo(objectPos func(types.Object) gobPosition, t types.Type, typePosn gobPosition) gobMethodSet {
221 // For non-interface types, use *T
222 // (if T is not already a pointer)
223 // since it may have more methods.
224 mset := types.NewMethodSet(EnsurePointer(t))
225
226 // Convert the method set into a compact summary.
227 var mask uint64
228 tricky := false
229 methods := make([]gobMethod, mset.Len())
230 for i := 0; i < mset.Len(); i++ {
231 m := mset.At(i).Obj().(*types.Func)
232 fp, isTricky := fingerprint(m)
233 if isTricky {
234 tricky = true
235 }
236 sum := crc32.ChecksumIEEE([]byte(fp))
237 methods[i] = gobMethod{fp, sum, objectPos(m)}
238 mask |= 1 << uint64(((sum>>24)^(sum>>16)^(sum>>8)^sum)&0x3f)
239 }
240 return gobMethodSet{typePosn, types.IsInterface(t), tricky, mask, methods}
241}
242
243// EnsurePointer wraps T in a types.Pointer if T is a named, non-interface type.
244// This is useful to make sure you consider a named type's full method set.
245func EnsurePointer(T types.Type) types.Type {
246 if _, ok := T.(*types.Named); ok && !types.IsInterface(T) {
247 return types.NewPointer(T)
248 }
249
250 return T
251}
252
253// fingerprint returns an encoding of a method signature such that two
254// methods with equal encodings have identical types, except for a few
255// tricky types whose encodings may spuriously match and whose exact
256// identity computation requires the type checker to eliminate false
257// positives (which are rare). The boolean result indicates whether
258// the result was one of these tricky types.
259//
260// In the standard library, 99.8% of package-level types have a
261// non-tricky method-set. The most common exceptions are due to type
262// parameters.
263//
264// The fingerprint string starts with method.Id() + "(".
265func fingerprint(method *types.Func) (string, bool) {
266 var buf strings.Builder
267 tricky := false
268 var fprint func(t types.Type)
269 fprint = func(t types.Type) {
270 switch t := t.(type) {
271 case *types.Named:
272 tname := t.Obj()
273 if tname.Pkg() != nil {
274 buf.WriteString(strconv.Quote(tname.Pkg().Path()))
275 buf.WriteByte('.')
276 } else if tname.Name() != "error" {
277 panic(tname) // error is the only named type with no package
278 }
279 buf.WriteString(tname.Name())
280
281 case *types.Array:
282 fmt.Fprintf(&buf, "[%d]", t.Len())
283 fprint(t.Elem())
284
285 case *types.Slice:
286 buf.WriteString("[]")
287 fprint(t.Elem())
288
289 case *types.Pointer:
290 buf.WriteByte('*')
291 fprint(t.Elem())
292
293 case *types.Map:
294 buf.WriteString("map[")
295 fprint(t.Key())
296 buf.WriteByte(']')
297 fprint(t.Elem())
298
299 case *types.Chan:
300 switch t.Dir() {
301 case types.SendRecv:
302 buf.WriteString("chan ")
303 case types.SendOnly:
304 buf.WriteString("<-chan ")
305 case types.RecvOnly:
306 buf.WriteString("chan<- ")
307 }
308 fprint(t.Elem())
309
310 case *types.Tuple:
311 buf.WriteByte('(')
312 for i := 0; i < t.Len(); i++ {
313 if i > 0 {
314 buf.WriteByte(',')
315 }
316 fprint(t.At(i).Type())
317 }
318 buf.WriteByte(')')
319
320 case *types.Basic:
321 // Use canonical names for uint8 and int32 aliases.
322 switch t.Kind() {
323 case types.Byte:
324 buf.WriteString("byte")
325 case types.Rune:
326 buf.WriteString("rune")
327 default:
328 buf.WriteString(t.String())
329 }
330
331 case *types.Signature:
332 buf.WriteString("func")
333 fprint(t.Params())
334 if t.Variadic() {
335 buf.WriteString("...") // not quite Go syntax
336 }
337 fprint(t.Results())
338
339 case *types.Struct:
340 // Non-empty unnamed struct types in method
341 // signatures are vanishingly rare.
342 buf.WriteString("struct{")
343 for i := 0; i < t.NumFields(); i++ {
344 if i > 0 {
345 buf.WriteByte(';')
346 }
347 f := t.Field(i)
348 // This isn't quite right for embedded type aliases.
349 // (See types.TypeString(StructType) and #44410 for context.)
350 // But this is vanishingly rare.
351 if !f.Embedded() {
352 buf.WriteString(f.Id())
353 buf.WriteByte(' ')
354 }
355 fprint(f.Type())
356 if tag := t.Tag(i); tag != "" {
357 buf.WriteByte(' ')
358 buf.WriteString(strconv.Quote(tag))
359 }
360 }
361 buf.WriteString("}")
362
363 case *types.Interface:
364 if t.NumMethods() == 0 {
365 buf.WriteString("any") // common case
366 } else {
367 // Interface assignability is particularly
368 // tricky due to the possibility of recursion.
369 tricky = true
370 // We could still give more disambiguating precision
371 // than "..." if we wanted to.
372 buf.WriteString("interface{...}")
373 }
374
375 case *typeparams.TypeParam:
376 tricky = true
377 // TODO(adonovan): refine this by adding a numeric suffix
378 // indicating the index among the receiver type's parameters.
379 buf.WriteByte('?')
380
381 default: // incl. *types.Union
382 panic(t)
383 }
384 }
385
386 buf.WriteString(method.Id()) // e.g. "pkg.Type"
387 sig := method.Type().(*types.Signature)
388 fprint(sig.Params())
389 fprint(sig.Results())
390 return buf.String(), tricky
391}
392
393// -- serial format of index --
394
395// The cost of gob encoding and decoding for most packages in x/tools
396// is under 50us, with occasional peaks of around 1-3ms.
397// The encoded indexes are around 1KB-50KB.
398
399// A gobPackage records the method set of each package-level type for a single package.
400type gobPackage struct {
401 Filenames []string // see gobPosition.File
402 MethodSets []gobMethodSet
403}
404
405// A gobMethodSet records the method set of a single type.
406type gobMethodSet struct {
407 Posn gobPosition
408 IsInterface bool
409 Tricky bool // at least one method is tricky; assignability requires go/types
410 Mask uint64 // mask with 1 bit from each of methods[*].sum
411 Methods []gobMethod
412}
413
414// A gobMethod records the name, type, and position of a single method.
415type gobMethod struct {
416 Fingerprint string // string of form "methodID(params...)(results)"
417 Sum uint32 // checksum of fingerprint
418 Posn gobPosition // location of method declaration
419}
420
421// A gobPosition records the file, offset, and length of an identifier.
422type gobPosition struct {
423 File int // index into Index.filenames
424 Offset, Len int // in bytes
425}