| // Copyright 2020 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package intern lets you make smaller comparable values by boxing |
| // a larger comparable value (such as a 16 byte string header) down |
| // into a globally unique 8 byte pointer. |
| // |
| // The globally unique pointers are garbage collected with weak |
| // references and finalizers. This package hides that. |
| package intern |
| |
| import ( |
| "internal/godebug" |
| "runtime" |
| "sync" |
| "unsafe" |
| ) |
| |
| // A Value pointer is the handle to an underlying comparable value. |
| // See func Get for how Value pointers may be used. |
| type Value struct { |
| _ [0]func() // prevent people from accidentally using value type as comparable |
| cmpVal any |
| // resurrected is guarded by mu (for all instances of Value). |
| // It is set true whenever v is synthesized from a uintptr. |
| resurrected bool |
| } |
| |
| // Get returns the comparable value passed to the Get func |
| // that returned v. |
| func (v *Value) Get() any { return v.cmpVal } |
| |
| // key is a key in our global value map. |
| // It contains type-specialized fields to avoid allocations |
| // when converting common types to empty interfaces. |
| type key struct { |
| s string |
| cmpVal any |
| // isString reports whether key contains a string. |
| // Without it, the zero value of key is ambiguous. |
| isString bool |
| } |
| |
| // keyFor returns a key to use with cmpVal. |
| func keyFor(cmpVal any) key { |
| if s, ok := cmpVal.(string); ok { |
| return key{s: s, isString: true} |
| } |
| return key{cmpVal: cmpVal} |
| } |
| |
| // Value returns a *Value built from k. |
| func (k key) Value() *Value { |
| if k.isString { |
| return &Value{cmpVal: k.s} |
| } |
| return &Value{cmpVal: k.cmpVal} |
| } |
| |
| var ( |
| // mu guards valMap, a weakref map of *Value by underlying value. |
| // It also guards the resurrected field of all *Values. |
| mu sync.Mutex |
| valMap = map[key]uintptr{} // to uintptr(*Value) |
| valSafe = safeMap() // non-nil in safe+leaky mode |
| ) |
| |
| // safeMap returns a non-nil map if we're in safe-but-leaky mode, |
| // as controlled by GODEBUG=intern=leaky |
| func safeMap() map[key]*Value { |
| if godebug.Get("intern") == "leaky" { |
| return map[key]*Value{} |
| } |
| return nil |
| } |
| |
| // Get returns a pointer representing the comparable value cmpVal. |
| // |
| // The returned pointer will be the same for Get(v) and Get(v2) |
| // if and only if v == v2, and can be used as a map key. |
| func Get(cmpVal any) *Value { |
| return get(keyFor(cmpVal)) |
| } |
| |
| // GetByString is identical to Get, except that it is specialized for strings. |
| // This avoids an allocation from putting a string into an interface{} |
| // to pass as an argument to Get. |
| func GetByString(s string) *Value { |
| return get(key{s: s, isString: true}) |
| } |
| |
| // We play unsafe games that violate Go's rules (and assume a non-moving |
| // collector). So we quiet Go here. |
| // See the comment below Get for more implementation details. |
| // |
| //go:nocheckptr |
| func get(k key) *Value { |
| mu.Lock() |
| defer mu.Unlock() |
| |
| var v *Value |
| if valSafe != nil { |
| v = valSafe[k] |
| } else if addr, ok := valMap[k]; ok { |
| v = (*Value)(unsafe.Pointer(addr)) |
| v.resurrected = true |
| } |
| if v != nil { |
| return v |
| } |
| v = k.Value() |
| if valSafe != nil { |
| valSafe[k] = v |
| } else { |
| // SetFinalizer before uintptr conversion (theoretical concern; |
| // see https://github.com/go4org/intern/issues/13) |
| runtime.SetFinalizer(v, finalize) |
| valMap[k] = uintptr(unsafe.Pointer(v)) |
| } |
| return v |
| } |
| |
| func finalize(v *Value) { |
| mu.Lock() |
| defer mu.Unlock() |
| if v.resurrected { |
| // We lost the race. Somebody resurrected it while we |
| // were about to finalize it. Try again next round. |
| v.resurrected = false |
| runtime.SetFinalizer(v, finalize) |
| return |
| } |
| delete(valMap, keyFor(v.cmpVal)) |
| } |
| |
| // Interning is simple if you don't require that unused values be |
| // garbage collectable. But we do require that; we don't want to be |
| // DOS vector. We do this by using a uintptr to hide the pointer from |
| // the garbage collector, and using a finalizer to eliminate the |
| // pointer when no other code is using it. |
| // |
| // The obvious implementation of this is to use a |
| // map[interface{}]uintptr-of-*interface{}, and set up a finalizer to |
| // delete from the map. Unfortunately, this is racy. Because pointers |
| // are being created in violation of Go's unsafety rules, it's |
| // possible to create a pointer to a value concurrently with the GC |
| // concluding that the value can be collected. There are other races |
| // that break the equality invariant as well, but the use-after-free |
| // will cause a runtime crash. |
| // |
| // To make this work, the finalizer needs to know that no references |
| // have been unsafely created since the finalizer was set up. To do |
| // this, values carry a "resurrected" sentinel, which gets set |
| // whenever a pointer is unsafely created. If the finalizer encounters |
| // the sentinel, it clears the sentinel and delays collection for one |
| // additional GC cycle, by re-installing itself as finalizer. This |
| // ensures that the unsafely created pointer is visible to the GC, and |
| // will correctly prevent collection. |
| // |
| // This technique does mean that interned values that get reused take |
| // at least 3 GC cycles to fully collect (1 to clear the sentinel, 1 |
| // to clean up the unsafe map, 1 to be actually deleted). |
| // |
| // @ianlancetaylor commented in |
| // https://github.com/golang/go/issues/41303#issuecomment-717401656 |
| // that it is possible to implement weak references in terms of |
| // finalizers without unsafe. Unfortunately, the approach he outlined |
| // does not work here, for two reasons. First, there is no way to |
| // construct a strong pointer out of a weak pointer; our map stores |
| // weak pointers, but we must return strong pointers to callers. |
| // Second, and more fundamentally, we must return not just _a_ strong |
| // pointer to callers, but _the same_ strong pointer to callers. In |
| // order to return _the same_ strong pointer to callers, we must track |
| // it, which is exactly what we cannot do with strong pointers. |
| // |
| // See https://github.com/inetaf/netaddr/issues/53 for more |
| // discussion, and https://github.com/go4org/intern/issues/2 for an |
| // illustration of the subtleties at play. |