Alan Donovan | 2fa6ca1 | 2022-12-29 13:04:08 -0500 | [diff] [blame] | 1 | // Copyright 2023 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package methodsets defines an incremental, serializable index of |
| 6 | // method-set information that allows efficient 'implements' queries |
| 7 | // across packages of the workspace without using the type checker. |
| 8 | // |
| 9 | // This package provides only the "global" (all workspace) search; the |
| 10 | // "local" search within a given package uses a different |
| 11 | // implementation based on type-checker data structures for a single |
| 12 | // package plus variants; see ../implementation2.go. |
| 13 | // The local algorithm is more precise as it tests function-local types too. |
| 14 | // |
| 15 | // A global index of function-local types is challenging since they |
| 16 | // may reference other local types, for which we would need to invent |
| 17 | // stable names, an unsolved problem described in passing in Go issue |
| 18 | // 57497. The global algorithm also does not index anonymous interface |
| 19 | // types, even outside function bodies. |
| 20 | // |
| 21 | // Consequently, global results are not symmetric: applying the |
| 22 | // operation twice may not get you back where you started. |
| 23 | package methodsets |
| 24 | |
| 25 | // DESIGN |
| 26 | // |
| 27 | // See https://go.dev/cl/452060 for a minimal exposition of the algorithm. |
| 28 | // |
| 29 | // For each method, we compute a fingerprint: a string representing |
| 30 | // the method name and type such that equal fingerprint strings mean |
| 31 | // identical method types. |
| 32 | // |
| 33 | // For efficiency, the fingerprint is reduced to a single bit |
| 34 | // of a uint64, so that the method set can be represented as |
| 35 | // the union of those method bits (a uint64 bitmask). |
| 36 | // Assignability thus reduces to a subset check on bitmasks |
| 37 | // followed by equality checks on fingerprints. |
| 38 | // |
| 39 | // In earlier experiments, using 128-bit masks instead of 64 reduced |
| 40 | // the number of candidates by about 2x. Using (like a Bloom filter) a |
| 41 | // different hash function to compute a second 64-bit mask and |
| 42 | // performing a second mask test reduced it by about 4x. |
| 43 | // Neither had much effect on the running time, presumably because a |
| 44 | // single 64-bit mask is quite effective. See CL 452060 for details. |
| 45 | |
| 46 | import ( |
| 47 | "fmt" |
| 48 | "go/token" |
| 49 | "go/types" |
| 50 | "hash/crc32" |
| 51 | "strconv" |
| 52 | "strings" |
| 53 | |
| 54 | "golang.org/x/tools/gopls/internal/lsp/safetoken" |
| 55 | "golang.org/x/tools/internal/typeparams" |
| 56 | ) |
| 57 | |
| 58 | // An Index records the non-empty method sets of all package-level |
| 59 | // types in a package in a form that permits assignability queries |
| 60 | // without the type checker. |
| 61 | type Index struct { |
| 62 | pkg gobPackage |
| 63 | } |
| 64 | |
| 65 | // NewIndex returns a new index of method-set information for all |
| 66 | // package-level types in the specified package. |
| 67 | func NewIndex(fset *token.FileSet, pkg *types.Package) *Index { |
| 68 | return new(indexBuilder).build(fset, pkg) |
| 69 | } |
| 70 | |
| 71 | // A Location records the extent of an identifier in byte-offset form. |
| 72 | // |
| 73 | // Conversion to protocol (UTF-16) form is done by the caller after a |
| 74 | // search, not during index construction. |
| 75 | // TODO(adonovan): opt: reconsider this choice, if FileHandles, not |
| 76 | // ParsedGoFiles were to provide ColumnMapper-like functionality. |
| 77 | // (Column mapping is currently associated with parsing, |
| 78 | // but non-parsed and even non-Go files need it too.) |
| 79 | // Since type checking requires reading (but not parsing) all |
| 80 | // dependencies' Go files, we could do the conversion at type-checking |
| 81 | // time at little extra cost in that case. |
| 82 | type Location struct { |
| 83 | Filename string |
| 84 | Start, End int // byte offsets |
| 85 | } |
| 86 | |
| 87 | // A Key represents the method set of a given type in a form suitable |
| 88 | // to pass to the (*Index).Search method of many different Indexes. |
| 89 | type Key struct { |
| 90 | mset gobMethodSet // note: lacks position information |
| 91 | } |
| 92 | |
| 93 | // KeyOf returns the search key for the method sets of a given type. |
| 94 | // It returns false if the type has no methods. |
| 95 | func KeyOf(t types.Type) (Key, bool) { |
| 96 | mset := methodSetInfo(func(types.Object) (_ gobPosition) { return }, t, gobPosition{}) |
| 97 | if mset.Mask == 0 { |
| 98 | return Key{}, false // no methods |
| 99 | } |
| 100 | return Key{mset}, true |
| 101 | } |
| 102 | |
| 103 | // Search reports each type that implements (or is implemented by) the |
| 104 | // type that produced the search key. If methodID is nonempty, only |
| 105 | // that method of each type is reported. |
| 106 | // The result is the location of each type or method. |
| 107 | func (index *Index) Search(key Key, methodID string) []Location { |
| 108 | var locs []Location |
| 109 | for _, candidate := range index.pkg.MethodSets { |
| 110 | // Traditionally this feature doesn't report |
| 111 | // interface/interface elements of the relation. |
| 112 | // I think that's a mistake. |
| 113 | // TODO(adonovan): UX: change it, here and in the local implementation. |
| 114 | if candidate.IsInterface && key.mset.IsInterface { |
| 115 | continue |
| 116 | } |
| 117 | if !satisfies(candidate, key.mset) && !satisfies(key.mset, candidate) { |
| 118 | continue |
| 119 | } |
| 120 | |
| 121 | if candidate.Tricky { |
| 122 | // If any interface method is tricky then extra |
| 123 | // checking may be needed to eliminate a false positive. |
| 124 | // TODO(adonovan): implement it. |
| 125 | } |
| 126 | |
| 127 | if methodID == "" { |
| 128 | locs = append(locs, index.location(candidate.Posn)) |
| 129 | } else { |
| 130 | for _, m := range candidate.Methods { |
| 131 | // Here we exploit knowledge of the shape of the fingerprint string. |
| 132 | if strings.HasPrefix(m.Fingerprint, methodID) && |
| 133 | m.Fingerprint[len(methodID)] == '(' { |
| 134 | locs = append(locs, index.location(m.Posn)) |
| 135 | break |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | return locs |
| 141 | } |
| 142 | |
| 143 | // satisfies does a fast check for whether x satisfies y. |
| 144 | func satisfies(x, y gobMethodSet) bool { |
| 145 | return y.IsInterface && x.Mask&y.Mask == y.Mask && subset(y, x) |
| 146 | } |
| 147 | |
| 148 | // subset reports whether method set x is a subset of y. |
| 149 | func subset(x, y gobMethodSet) bool { |
| 150 | outer: |
| 151 | for _, mx := range x.Methods { |
| 152 | for _, my := range y.Methods { |
| 153 | if mx.Sum == my.Sum && mx.Fingerprint == my.Fingerprint { |
| 154 | continue outer // found; try next x method |
| 155 | } |
| 156 | } |
| 157 | return false // method of x not found in y |
| 158 | } |
| 159 | return true // all methods of x found in y |
| 160 | } |
| 161 | |
| 162 | func (index *Index) location(posn gobPosition) Location { |
| 163 | return Location{ |
| 164 | Filename: index.pkg.Filenames[posn.File], |
| 165 | Start: posn.Offset, |
| 166 | End: posn.Offset + posn.Len, |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | // An indexBuilder builds an index for a single package. |
| 171 | type indexBuilder struct { |
| 172 | gobPackage |
| 173 | filenameIndex map[string]int |
| 174 | } |
| 175 | |
| 176 | // build adds to the index all package-level named types of the specified package. |
| 177 | func (b *indexBuilder) build(fset *token.FileSet, pkg *types.Package) *Index { |
| 178 | // We ignore aliases, though in principle they could define a |
| 179 | // struct{...} or interface{...} type, or an instantiation of |
| 180 | // a generic, that has a novel method set. |
| 181 | scope := pkg.Scope() |
| 182 | for _, name := range scope.Names() { |
| 183 | if tname, ok := scope.Lookup(name).(*types.TypeName); ok && !tname.IsAlias() { |
| 184 | b.add(fset, tname) |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | return &Index{pkg: b.gobPackage} |
| 189 | } |
| 190 | |
| 191 | func (b *indexBuilder) add(fset *token.FileSet, tname *types.TypeName) { |
| 192 | objectPos := func(obj types.Object) gobPosition { |
| 193 | posn := safetoken.StartPosition(fset, obj.Pos()) |
| 194 | return gobPosition{b.fileIndex(posn.Filename), posn.Offset, len(obj.Name())} |
| 195 | } |
| 196 | if mset := methodSetInfo(objectPos, tname.Type(), objectPos(tname)); mset.Mask != 0 { |
| 197 | // Only record types with non-trivial method sets. |
| 198 | b.MethodSets = append(b.MethodSets, mset) |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | // fileIndex returns a small integer that encodes the file name. |
| 203 | func (b *indexBuilder) fileIndex(filename string) int { |
| 204 | i, ok := b.filenameIndex[filename] |
| 205 | if !ok { |
| 206 | i = len(b.Filenames) |
| 207 | if b.filenameIndex == nil { |
| 208 | b.filenameIndex = make(map[string]int) |
| 209 | } |
| 210 | b.filenameIndex[filename] = i |
| 211 | b.Filenames = append(b.Filenames, filename) |
| 212 | } |
| 213 | return i |
| 214 | } |
| 215 | |
| 216 | // methodSetInfo returns the method-set fingerprint |
| 217 | // of a type and records its position (typePosn) |
| 218 | // and the position of each of its methods m, |
| 219 | // as provided by objectPos(m). |
| 220 | func methodSetInfo(objectPos func(types.Object) gobPosition, t types.Type, typePosn gobPosition) gobMethodSet { |
| 221 | // For non-interface types, use *T |
| 222 | // (if T is not already a pointer) |
| 223 | // since it may have more methods. |
| 224 | mset := types.NewMethodSet(EnsurePointer(t)) |
| 225 | |
| 226 | // Convert the method set into a compact summary. |
| 227 | var mask uint64 |
| 228 | tricky := false |
| 229 | methods := make([]gobMethod, mset.Len()) |
| 230 | for i := 0; i < mset.Len(); i++ { |
| 231 | m := mset.At(i).Obj().(*types.Func) |
| 232 | fp, isTricky := fingerprint(m) |
| 233 | if isTricky { |
| 234 | tricky = true |
| 235 | } |
| 236 | sum := crc32.ChecksumIEEE([]byte(fp)) |
| 237 | methods[i] = gobMethod{fp, sum, objectPos(m)} |
| 238 | mask |= 1 << uint64(((sum>>24)^(sum>>16)^(sum>>8)^sum)&0x3f) |
| 239 | } |
| 240 | return gobMethodSet{typePosn, types.IsInterface(t), tricky, mask, methods} |
| 241 | } |
| 242 | |
| 243 | // EnsurePointer wraps T in a types.Pointer if T is a named, non-interface type. |
| 244 | // This is useful to make sure you consider a named type's full method set. |
| 245 | func EnsurePointer(T types.Type) types.Type { |
| 246 | if _, ok := T.(*types.Named); ok && !types.IsInterface(T) { |
| 247 | return types.NewPointer(T) |
| 248 | } |
| 249 | |
| 250 | return T |
| 251 | } |
| 252 | |
| 253 | // fingerprint returns an encoding of a method signature such that two |
| 254 | // methods with equal encodings have identical types, except for a few |
| 255 | // tricky types whose encodings may spuriously match and whose exact |
| 256 | // identity computation requires the type checker to eliminate false |
| 257 | // positives (which are rare). The boolean result indicates whether |
| 258 | // the result was one of these tricky types. |
| 259 | // |
| 260 | // In the standard library, 99.8% of package-level types have a |
| 261 | // non-tricky method-set. The most common exceptions are due to type |
| 262 | // parameters. |
| 263 | // |
| 264 | // The fingerprint string starts with method.Id() + "(". |
| 265 | func fingerprint(method *types.Func) (string, bool) { |
| 266 | var buf strings.Builder |
| 267 | tricky := false |
| 268 | var fprint func(t types.Type) |
| 269 | fprint = func(t types.Type) { |
| 270 | switch t := t.(type) { |
| 271 | case *types.Named: |
| 272 | tname := t.Obj() |
| 273 | if tname.Pkg() != nil { |
| 274 | buf.WriteString(strconv.Quote(tname.Pkg().Path())) |
| 275 | buf.WriteByte('.') |
| 276 | } else if tname.Name() != "error" { |
| 277 | panic(tname) // error is the only named type with no package |
| 278 | } |
| 279 | buf.WriteString(tname.Name()) |
| 280 | |
| 281 | case *types.Array: |
| 282 | fmt.Fprintf(&buf, "[%d]", t.Len()) |
| 283 | fprint(t.Elem()) |
| 284 | |
| 285 | case *types.Slice: |
| 286 | buf.WriteString("[]") |
| 287 | fprint(t.Elem()) |
| 288 | |
| 289 | case *types.Pointer: |
| 290 | buf.WriteByte('*') |
| 291 | fprint(t.Elem()) |
| 292 | |
| 293 | case *types.Map: |
| 294 | buf.WriteString("map[") |
| 295 | fprint(t.Key()) |
| 296 | buf.WriteByte(']') |
| 297 | fprint(t.Elem()) |
| 298 | |
| 299 | case *types.Chan: |
| 300 | switch t.Dir() { |
| 301 | case types.SendRecv: |
| 302 | buf.WriteString("chan ") |
| 303 | case types.SendOnly: |
| 304 | buf.WriteString("<-chan ") |
| 305 | case types.RecvOnly: |
| 306 | buf.WriteString("chan<- ") |
| 307 | } |
| 308 | fprint(t.Elem()) |
| 309 | |
| 310 | case *types.Tuple: |
| 311 | buf.WriteByte('(') |
| 312 | for i := 0; i < t.Len(); i++ { |
| 313 | if i > 0 { |
| 314 | buf.WriteByte(',') |
| 315 | } |
| 316 | fprint(t.At(i).Type()) |
| 317 | } |
| 318 | buf.WriteByte(')') |
| 319 | |
| 320 | case *types.Basic: |
| 321 | // Use canonical names for uint8 and int32 aliases. |
| 322 | switch t.Kind() { |
| 323 | case types.Byte: |
| 324 | buf.WriteString("byte") |
| 325 | case types.Rune: |
| 326 | buf.WriteString("rune") |
| 327 | default: |
| 328 | buf.WriteString(t.String()) |
| 329 | } |
| 330 | |
| 331 | case *types.Signature: |
| 332 | buf.WriteString("func") |
| 333 | fprint(t.Params()) |
| 334 | if t.Variadic() { |
| 335 | buf.WriteString("...") // not quite Go syntax |
| 336 | } |
| 337 | fprint(t.Results()) |
| 338 | |
| 339 | case *types.Struct: |
| 340 | // Non-empty unnamed struct types in method |
| 341 | // signatures are vanishingly rare. |
| 342 | buf.WriteString("struct{") |
| 343 | for i := 0; i < t.NumFields(); i++ { |
| 344 | if i > 0 { |
| 345 | buf.WriteByte(';') |
| 346 | } |
| 347 | f := t.Field(i) |
| 348 | // This isn't quite right for embedded type aliases. |
| 349 | // (See types.TypeString(StructType) and #44410 for context.) |
| 350 | // But this is vanishingly rare. |
| 351 | if !f.Embedded() { |
| 352 | buf.WriteString(f.Id()) |
| 353 | buf.WriteByte(' ') |
| 354 | } |
| 355 | fprint(f.Type()) |
| 356 | if tag := t.Tag(i); tag != "" { |
| 357 | buf.WriteByte(' ') |
| 358 | buf.WriteString(strconv.Quote(tag)) |
| 359 | } |
| 360 | } |
| 361 | buf.WriteString("}") |
| 362 | |
| 363 | case *types.Interface: |
| 364 | if t.NumMethods() == 0 { |
| 365 | buf.WriteString("any") // common case |
| 366 | } else { |
| 367 | // Interface assignability is particularly |
| 368 | // tricky due to the possibility of recursion. |
| 369 | tricky = true |
| 370 | // We could still give more disambiguating precision |
| 371 | // than "..." if we wanted to. |
| 372 | buf.WriteString("interface{...}") |
| 373 | } |
| 374 | |
| 375 | case *typeparams.TypeParam: |
| 376 | tricky = true |
| 377 | // TODO(adonovan): refine this by adding a numeric suffix |
| 378 | // indicating the index among the receiver type's parameters. |
| 379 | buf.WriteByte('?') |
| 380 | |
| 381 | default: // incl. *types.Union |
| 382 | panic(t) |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | buf.WriteString(method.Id()) // e.g. "pkg.Type" |
| 387 | sig := method.Type().(*types.Signature) |
| 388 | fprint(sig.Params()) |
| 389 | fprint(sig.Results()) |
| 390 | return buf.String(), tricky |
| 391 | } |
| 392 | |
| 393 | // -- serial format of index -- |
| 394 | |
| 395 | // The cost of gob encoding and decoding for most packages in x/tools |
| 396 | // is under 50us, with occasional peaks of around 1-3ms. |
| 397 | // The encoded indexes are around 1KB-50KB. |
| 398 | |
| 399 | // A gobPackage records the method set of each package-level type for a single package. |
| 400 | type gobPackage struct { |
| 401 | Filenames []string // see gobPosition.File |
| 402 | MethodSets []gobMethodSet |
| 403 | } |
| 404 | |
| 405 | // A gobMethodSet records the method set of a single type. |
| 406 | type gobMethodSet struct { |
| 407 | Posn gobPosition |
| 408 | IsInterface bool |
| 409 | Tricky bool // at least one method is tricky; assignability requires go/types |
| 410 | Mask uint64 // mask with 1 bit from each of methods[*].sum |
| 411 | Methods []gobMethod |
| 412 | } |
| 413 | |
| 414 | // A gobMethod records the name, type, and position of a single method. |
| 415 | type gobMethod struct { |
| 416 | Fingerprint string // string of form "methodID(params...)(results)" |
| 417 | Sum uint32 // checksum of fingerprint |
| 418 | Posn gobPosition // location of method declaration |
| 419 | } |
| 420 | |
| 421 | // A gobPosition records the file, offset, and length of an identifier. |
| 422 | type gobPosition struct { |
| 423 | File int // index into Index.filenames |
| 424 | Offset, Len int // in bytes |
| 425 | } |