compiler, runtime, reflect: generate unique type descriptors

Currently, the compiler already generates common symbols for type
descriptors, so the type descriptors are unique. However, when a
type is created through reflection, it is not deduplicated with
compiler-generated types. As a consequence, we cannot assume type
descriptors are unique, and cannot use pointer equality to
compare them. Also, when constructing a reflect.Type, it has to
go through a canonicalization map, which introduces overhead to
reflect.TypeOf, and lock contentions in concurrent programs.

In order for the reflect package to deduplicate types with
compiler-created types, we register all the compiler-created type
descriptors at startup time. The reflect package, when it needs
to create a type, looks up the registry of compiler-created types
before creates a new one. There is no lock contention since the
registry is read-only after initialization.

This lets us get rid of the canonicalization map, and also makes
it possible to compare type descriptors with pointer equality.

Change-Id: Ia828bba0694e9b67398269959ed7b65b4b1b43b7
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/179598
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/go/gogo.cc b/go/gogo.cc
index b97367c..ce9bffb 100644
--- a/go/gogo.cc
+++ b/go/gogo.cc
@@ -64,6 +64,7 @@
     named_types_are_converted_(false),
     analysis_sets_(),
     gc_roots_(),
+    type_descriptors_(),
     imported_inlinable_functions_(),
     imported_inline_functions_()
 {
@@ -903,6 +904,139 @@
   init_stmts.push_back(this->backend()->expression_statement(init_bfn, bcall));
 }
 
+// Build the list of type descriptors defined in this package. This is to help
+// the reflect package to find compiler-generated types.
+
+// type typeDescriptorList struct {
+// 	 count int
+// 	 types [...]unsafe.Pointer
+// }
+
+static Struct_type*
+type_descriptor_list_type(unsigned long len)
+{
+  Location builtin_loc = Linemap::predeclared_location();
+  Type* int_type = Type::lookup_integer_type("int");
+  Type* ptr_type = Type::make_pointer_type(Type::make_void_type());
+  // Avoid creating zero-length type.
+  unsigned long nelems = (len != 0 ? len : 1);
+  Expression* len_expr = Expression::make_integer_ul(nelems, NULL,
+                                                     builtin_loc);
+  Array_type* array_type = Type::make_array_type(ptr_type, len_expr);
+  array_type->set_is_array_incomparable();
+  Struct_type* list_type =
+    Type::make_builtin_struct_type(2, "count", int_type,
+                                   "types", array_type);
+  return list_type;
+}
+
+void
+Gogo::build_type_descriptor_list()
+{
+  // Create the list type
+  Location builtin_loc = Linemap::predeclared_location();
+  unsigned long len = this->type_descriptors_.size();
+  Struct_type* list_type = type_descriptor_list_type(len);
+  Btype* bt = list_type->get_backend(this);
+  Btype* bat = list_type->field(1)->type()->get_backend(this);
+
+  // Create the variable
+  std::string name = this->type_descriptor_list_symbol(this->package_);
+  Bvariable* bv = this->backend()->implicit_variable(name, name, bt,
+                                                     false, true, false,
+                                                     0);
+
+  // Build the initializer
+  std::vector<unsigned long> indexes;
+  std::vector<Bexpression*> vals;
+  std::vector<Type*>::iterator p = this->type_descriptors_.begin();
+  for (unsigned long i = 0; i < len; ++i, ++p)
+    {
+      Bexpression* bexpr = (*p)->type_descriptor_pointer(this,
+                                                         builtin_loc);
+      indexes.push_back(i);
+      vals.push_back(bexpr);
+    }
+  Bexpression* barray =
+    this->backend()->array_constructor_expression(bat, indexes, vals,
+                                                  builtin_loc);
+
+  Translate_context context(this, NULL, NULL, NULL);
+  std::vector<Bexpression*> fields;
+  Expression* len_expr = Expression::make_integer_ul(len, NULL,
+                                                     builtin_loc);
+  fields.push_back(len_expr->get_backend(&context));
+  fields.push_back(barray);
+  Bexpression* binit =
+    this->backend()->constructor_expression(bt, fields, builtin_loc);
+
+  this->backend()->implicit_variable_set_init(bv, name, bt, false,
+                                              true, false, binit);
+}
+
+// Register the type descriptors with the runtime.  This is to help
+// the reflect package to find compiler-generated types.
+
+void
+Gogo::register_type_descriptors(std::vector<Bstatement*>& init_stmts,
+                                Bfunction* init_bfn)
+{
+  // Create the list type
+  Location builtin_loc = Linemap::predeclared_location();
+  Struct_type* list_type = type_descriptor_list_type(1);
+  Btype* bt = list_type->get_backend(this);
+
+  // Build a list of lists.
+  std::vector<unsigned long> indexes;
+  std::vector<Bexpression*> vals;
+  unsigned long i = 0;
+  for (Packages::iterator it = this->packages_.begin();
+       it != this->packages_.end();
+       ++it)
+    {
+      if (it->second->pkgpath() == "unsafe")
+        continue;
+
+      std::string name = this->type_descriptor_list_symbol(it->second);
+      Bvariable* bv =
+        this->backend()->implicit_variable_reference(name, name, bt);
+      Bexpression* bexpr = this->backend()->var_expression(bv, builtin_loc);
+      bexpr = this->backend()->address_expression(bexpr, builtin_loc);
+
+      indexes.push_back(i);
+      vals.push_back(bexpr);
+      i++;
+    }
+  Expression* len_expr = Expression::make_integer_ul(i, NULL, builtin_loc);
+  Type* list_ptr_type = Type::make_pointer_type(list_type);
+  Type* list_array_type = Type::make_array_type(list_ptr_type, len_expr);
+  Btype* bat = list_array_type->get_backend(this);
+  Bexpression* barray =
+    this->backend()->array_constructor_expression(bat, indexes, vals,
+                                                  builtin_loc);
+
+  // Create a variable holding the list.
+  std::string name = this->typelists_symbol();
+  Bvariable* bv = this->backend()->implicit_variable(name, name, bat,
+                                                     true, true, false,
+                                                     0);
+  this->backend()->implicit_variable_set_init(bv, name, bat, true, true,
+                                              false, barray);
+
+  // Build the call in main package's init function.
+  Translate_context context(this, NULL, NULL, NULL);
+  Bexpression* bexpr = this->backend()->var_expression(bv, builtin_loc);
+  bexpr = this->backend()->address_expression(bexpr, builtin_loc);
+  Type* array_ptr_type = Type::make_pointer_type(list_array_type);
+  Expression* expr = Expression::make_backend(bexpr, array_ptr_type,
+                                              builtin_loc);
+  expr = Runtime::make_call(Runtime::REGISTER_TYPE_DESCRIPTORS,
+                            builtin_loc, 2, len_expr->copy(), expr);
+  Bexpression* bcall = expr->get_backend(&context);
+  init_stmts.push_back(this->backend()->expression_statement(init_bfn,
+                                                             bcall));
+}
+
 // Build the decl for the initialization function.
 
 Named_object*
@@ -1411,7 +1545,6 @@
     {
       init_fndecl = this->initialization_function_decl();
       init_bfn = init_fndecl->func_value()->get_or_make_decl(this, init_fndecl);
-      this->init_imports(init_stmts, init_bfn);
     }
 
   // A list of variable initializations.
@@ -1585,6 +1718,22 @@
        ++p)
     (*p)->get_backend(this, const_decls, type_decls, func_decls);
 
+  // Build the list of type descriptors.
+  this->build_type_descriptor_list();
+
+  if (this->is_main_package())
+    {
+      // Register the type descriptor lists, so that at run time
+      // the reflect package can find compiler-created types, and
+      // deduplicate if the same type is created with reflection.
+      // This needs to be done before calling any package's init
+      // function, as it may create type through reflection.
+      this->register_type_descriptors(init_stmts, init_bfn);
+
+      // Initialize imported packages.
+      this->init_imports(init_stmts, init_bfn);
+    }
+
   // Register global variables with the garbage collector.
   this->register_gc_vars(var_gc, init_stmts, init_bfn);
 
diff --git a/go/gogo.h b/go/gogo.h
index 5b77d6d..91e3bdf 100644
--- a/go/gogo.h
+++ b/go/gogo.h
@@ -617,6 +617,11 @@
     this->gc_roots_.push_back(expr);
   }
 
+  // Add a type to the descriptor list.
+  void
+  add_type_descriptor(Type* type)
+  { this->type_descriptors_.push_back(type); }
+
   // Traverse the tree.  See the Traverse class.
   void
   traverse(Traverse*);
@@ -901,6 +906,14 @@
   std::string
   type_descriptor_name(Type*, Named_type*);
 
+  // Return the name of the type descriptor list symbol of a package.
+  std::string
+  type_descriptor_list_symbol(Package*);
+
+  // Return the name of the list of all type descriptor lists.
+  std::string
+  typelists_symbol();
+
   // Return the assembler name for the GC symbol for a type.
   std::string
   gc_symbol_name(Type*);
@@ -967,6 +980,15 @@
                    std::vector<Bstatement*>&,
                    Bfunction* init_bfunction);
 
+  // Build the list of type descriptors.
+  void
+  build_type_descriptor_list();
+
+  // Register the type descriptors with the runtime.
+  void
+  register_type_descriptors(std::vector<Bstatement*>&,
+                            Bfunction* init_bfunction);
+
   void
   propagate_writebarrierrec();
 
@@ -1108,6 +1130,8 @@
   std::vector<Analysis_set> analysis_sets_;
   // A list of objects to add to the GC roots.
   std::vector<Expression*> gc_roots_;
+  // A list of type descriptors that we need to register.
+  std::vector<Type*> type_descriptors_;
   // A list of function declarations with imported bodies that we may
   // want to inline.
   std::vector<Named_object*> imported_inlinable_functions_;
diff --git a/go/names.cc b/go/names.cc
index d9ae591..e109cfc 100644
--- a/go/names.cc
+++ b/go/names.cc
@@ -146,6 +146,12 @@
 // and is named __go_init_main.  For other packages it is
 // PKGPATH..import.
 //
+// In each pacakge there is a list of all the type descriptors defined
+// in this package.  The name of the list is PKGPATH..types.
+//
+// In the main package it gathers all the type descriptor lists in a
+// single list, named go..typelists.
+//
 // The type literal encoding is essentially a single line version of
 // the type literal, such as "struct { pkgpath.i int; J int }".  In
 // this representation unexported names use their pkgpath, exported
@@ -985,6 +991,23 @@
   return ret;
 }
 
+// Return the name of the type descriptor list symbol of a package.
+
+std::string
+Gogo::type_descriptor_list_symbol(Package* pkg)
+{
+  return pkg->pkgpath_symbol() + "..types";
+}
+
+// Return the name of the list of all type descriptor lists.  This is
+// only used in the main package.
+
+std::string
+Gogo::typelists_symbol()
+{
+  return "go..typelists";
+}
+
 // Return the name for the GC symbol for a type.  This is used to
 // initialize the gcdata field of a type descriptor.  This is a local
 // name never referenced outside of this assembly file.  (Note that
diff --git a/go/runtime.def b/go/runtime.def
index 226eeac..b0e6861 100644
--- a/go/runtime.def
+++ b/go/runtime.def
@@ -212,6 +212,10 @@
 // Register roots (global variables) for the garbage collector.
 DEF_GO_RUNTIME(REGISTER_GC_ROOTS, "runtime.registerGCRoots", P1(POINTER), R0())
 
+// Register type descriptors.
+DEF_GO_RUNTIME(REGISTER_TYPE_DESCRIPTORS, "runtime.registerTypeDescriptors",
+               P2(INT, POINTER), R0())
+
 
 // Allocate memory.
 DEF_GO_RUNTIME(NEW, "runtime.newobject", P1(TYPE), R1(POINTER))
diff --git a/go/types.cc b/go/types.cc
index 1b96dc1..011a7af 100644
--- a/go/types.cc
+++ b/go/types.cc
@@ -1413,6 +1413,23 @@
 					     var_name, false, is_common,
 					     initializer_btype, loc,
 					     binitializer);
+
+  // For types that may be created by reflection, add it to the
+  // list of which we will register the type descriptor to the
+  // runtime.
+  // Do not add generated incomparable array/struct types, see
+  // issue #22605.
+  if (is_common
+      && (this->points_to() != NULL
+          || this->channel_type() != NULL
+          || this->map_type() != NULL
+          || this->function_type() != NULL
+          || this->is_slice_type()
+          || (this->struct_type() != NULL
+              && !this->struct_type()->is_struct_incomparable())
+          || (this->array_type() != NULL
+              && !this->array_type()->is_array_incomparable())))
+  gogo->add_type_descriptor(this);
 }
 
 // Return true if this type descriptor is defined in a different
diff --git a/libgo/go/reflect/type.go b/libgo/go/reflect/type.go
index fb2e5d4..8493d87 100644
--- a/libgo/go/reflect/type.go
+++ b/libgo/go/reflect/type.go
@@ -1105,15 +1105,14 @@
 		return &pi.(*ptrType).rtype
 	}
 
+	// Look in known types.
 	s := "*" + *t.string
-
-	canonicalTypeLock.RLock()
-	r, ok := canonicalType[s]
-	canonicalTypeLock.RUnlock()
-	if ok {
-		p := (*ptrType)(unsafe.Pointer(r.(*rtype)))
-		pi, _ := ptrMap.LoadOrStore(t, p)
-		return &pi.(*ptrType).rtype
+	if tt := lookupType(s); tt != nil {
+		p := (*ptrType)(unsafe.Pointer(tt))
+		if p.elem == t {
+			pi, _ := ptrMap.LoadOrStore(t, p)
+			return &pi.(*ptrType).rtype
+		}
 	}
 
 	// Create a new ptrType starting with the description
@@ -1138,10 +1137,7 @@
 	pp.ptrToThis = nil
 	pp.elem = t
 
-	q := canonicalize(&pp.rtype)
-	p := (*ptrType)(unsafe.Pointer(q.(*rtype)))
-
-	pi, _ := ptrMap.LoadOrStore(t, p)
+	pi, _ := ptrMap.LoadOrStore(t, &pp)
 	return &pi.(*ptrType).rtype
 }
 
@@ -1447,6 +1443,13 @@
 	case BothDir:
 		s = "chan " + *typ.string
 	}
+	if tt := lookupType(s); tt != nil {
+		ch := (*chanType)(unsafe.Pointer(tt))
+		if ch.elem == typ && ch.dir == uintptr(dir) {
+			ti, _ := lookupCache.LoadOrStore(ckey, tt)
+			return ti.(Type)
+		}
+	}
 
 	// Make a channel type.
 	var ichan interface{} = (chan unsafe.Pointer)(nil)
@@ -1472,10 +1475,8 @@
 	ch.uncommonType = nil
 	ch.ptrToThis = nil
 
-	// Canonicalize before storing in lookupCache
-	ti := toType(&ch.rtype)
-	lookupCache.Store(ckey, ti.(*rtype))
-	return ti
+	ti, _ := lookupCache.LoadOrStore(ckey, &ch.rtype)
+	return ti.(Type)
 }
 
 func ismapkey(*rtype) bool // implemented in runtime
@@ -1502,6 +1503,13 @@
 
 	// Look in known types.
 	s := "map[" + *ktyp.string + "]" + *etyp.string
+	if tt := lookupType(s); tt != nil {
+		mt := (*mapType)(unsafe.Pointer(tt))
+		if mt.key == ktyp && mt.elem == etyp {
+			ti, _ := lookupCache.LoadOrStore(ckey, tt)
+			return ti.(Type)
+		}
+	}
 
 	// Make a map type.
 	// Note: flag values must match those used in the TMAP case
@@ -1544,10 +1552,8 @@
 		mt.flags |= 16
 	}
 
-	// Canonicalize before storing in lookupCache
-	ti := toType(&mt.rtype)
-	lookupCache.Store(ckey, ti.(*rtype))
-	return ti
+	ti, _ := lookupCache.LoadOrStore(ckey, &mt.rtype)
+	return ti.(Type)
 }
 
 // FuncOf returns the function type with the given argument and result types.
@@ -1625,15 +1631,17 @@
 	}
 
 	str := funcStr(ft)
+	if tt := lookupType(str); tt != nil {
+		if haveIdenticalUnderlyingType(&ft.rtype, tt, true) {
+			return addToCache(tt)
+		}
+	}
 
 	// Populate the remaining fields of ft and store in cache.
 	ft.string = &str
 	ft.uncommonType = nil
 	ft.ptrToThis = nil
-
-	// Canonicalize before storing in funcLookupCache
-	tc := toType(&ft.rtype)
-	return addToCache(tc.(*rtype))
+	return addToCache(&ft.rtype)
 }
 
 // funcStr builds a string representation of a funcType.
@@ -1873,6 +1881,13 @@
 
 	// Look in known types.
 	s := "[]" + *typ.string
+	if tt := lookupType(s); tt != nil {
+		slice := (*sliceType)(unsafe.Pointer(tt))
+		if slice.elem == typ {
+			ti, _ := lookupCache.LoadOrStore(ckey, tt)
+			return ti.(Type)
+		}
+	}
 
 	// Make a slice type.
 	var islice interface{} = ([]unsafe.Pointer)(nil)
@@ -1888,10 +1903,8 @@
 	slice.uncommonType = nil
 	slice.ptrToThis = nil
 
-	// Canonicalize before storing in lookupCache
-	ti := toType(&slice.rtype)
-	lookupCache.Store(ckey, ti.(*rtype))
-	return ti
+	ti, _ := lookupCache.LoadOrStore(ckey, &slice.rtype)
+	return ti.(Type)
 }
 
 // The structLookupCache caches StructOf lookups.
@@ -2106,6 +2119,13 @@
 		return t
 	}
 
+	// Look in known types.
+	if tt := lookupType(str); tt != nil {
+		if haveIdenticalUnderlyingType(&typ.rtype, tt, true) {
+			return addToCache(tt)
+		}
+	}
+
 	typ.string = &str
 	typ.hash = hash
 	typ.size = size
@@ -2214,10 +2234,7 @@
 
 	typ.uncommonType = nil
 	typ.ptrToThis = nil
-
-	// Canonicalize before storing in structLookupCache
-	ti := toType(&typ.rtype)
-	return addToCache(ti.(*rtype))
+	return addToCache(&typ.rtype)
 }
 
 func runtimeStructField(field StructField) structField {
@@ -2300,6 +2317,13 @@
 
 	// Look in known types.
 	s := "[" + strconv.Itoa(count) + "]" + *typ.string
+	if tt := lookupType(s); tt != nil {
+		array := (*arrayType)(unsafe.Pointer(tt))
+		if array.elem == typ {
+			ti, _ := lookupCache.LoadOrStore(ckey, tt)
+			return ti.(Type)
+		}
+	}
 
 	// Make an array type.
 	var iarray interface{} = [1]unsafe.Pointer{}
@@ -2451,10 +2475,8 @@
 		}
 	}
 
-	// Canonicalize before storing in lookupCache
-	ti := toType(&array.rtype)
-	lookupCache.Store(ckey, ti.(*rtype))
-	return ti
+	ti, _ := lookupCache.LoadOrStore(ckey, &array.rtype)
+	return ti.(Type)
 }
 
 func appendVarint(x []byte, v uintptr) []byte {
@@ -2466,42 +2488,19 @@
 }
 
 // toType converts from a *rtype to a Type that can be returned
-// to the client of package reflect. In gc, the only concern is that
-// a nil *rtype must be replaced by a nil Type, but in gccgo this
-// function takes care of ensuring that multiple *rtype for the same
-// type are coalesced into a single Type.
-var canonicalType = make(map[string]Type)
-
-var canonicalTypeLock sync.RWMutex
-
-func canonicalize(t Type) Type {
-	if t == nil {
-		return nil
-	}
-	s := t.rawString()
-	canonicalTypeLock.RLock()
-	if r, ok := canonicalType[s]; ok {
-		canonicalTypeLock.RUnlock()
-		return r
-	}
-	canonicalTypeLock.RUnlock()
-	canonicalTypeLock.Lock()
-	if r, ok := canonicalType[s]; ok {
-		canonicalTypeLock.Unlock()
-		return r
-	}
-	canonicalType[s] = t
-	canonicalTypeLock.Unlock()
-	return t
-}
-
+// to the client of package reflect. The only concern is that
+// a nil *rtype must be replaced by a nil Type.
 func toType(p *rtype) Type {
 	if p == nil {
 		return nil
 	}
-	return canonicalize(p)
+	return p
 }
 
+// Look up a compiler-generated type descriptor.
+// Implemented in runtime.
+func lookupType(s string) *rtype
+
 // ifaceIndir reports whether t is stored indirectly in an interface value.
 func ifaceIndir(t *rtype) bool {
 	return t.kind&kindDirectIface == 0
diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go
index 5cafa38..3bdb8f1 100644
--- a/libgo/go/runtime/type.go
+++ b/libgo/go/runtime/type.go
@@ -6,7 +6,11 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"runtime/internal/sys"
+	"unsafe"
+)
 
 type _type struct {
 	size       uintptr
@@ -45,19 +49,8 @@
 }
 
 // Return whether two type descriptors are equal.
-// This is gccgo-specific, as gccgo, unlike gc, permits multiple
-// independent descriptors for a single type.
 func eqtype(t1, t2 *_type) bool {
-	switch {
-	case t1 == t2:
-		return true
-	case t1 == nil || t2 == nil:
-		return false
-	case t1.kind != t2.kind || t1.hash != t2.hash:
-		return false
-	default:
-		return t1.string() == t2.string()
-	}
+	return t1 == t2
 }
 
 type method struct {
@@ -164,3 +157,62 @@
 	typ    _type
 	fields []structfield
 }
+
+// typeDescriptorList holds a list of type descriptors generated
+// by the compiler. This is used for the compiler to register
+// type descriptors to the runtime.
+// The layout is known to the compiler.
+//go:notinheap
+type typeDescriptorList struct {
+	count int
+	types [1]uintptr // variable length
+}
+
+// typelist holds all type descriptors generated by the comiler.
+// This is for the reflect package to deduplicate type descriptors
+// when it creates a type that is also a compiler-generated type.
+var typelist struct {
+	initialized uint32
+	lists       []*typeDescriptorList // one element per package
+	types       map[string]uintptr    // map from a type's string to *_type, lazily populated
+	// TODO: use a sorted array instead?
+}
+var typelistLock mutex
+
+// The compiler generates a call of this function in the main
+// package's init function, to register compiler-generated
+// type descriptors.
+// p points to a list of *typeDescriptorList, n is the length
+// of the list.
+//go:linkname registerTypeDescriptors runtime.registerTypeDescriptors
+func registerTypeDescriptors(n int, p unsafe.Pointer) {
+	*(*slice)(unsafe.Pointer(&typelist.lists)) = slice{p, n, n}
+}
+
+// The reflect package uses this function to look up a compiler-
+// generated type descriptor.
+//go:linkname reflect_lookupType reflect.lookupType
+func reflect_lookupType(s string) *_type {
+	// Lazy initialization. We don't need to do this if we never create
+	// types through reflection.
+	if atomic.Load(&typelist.initialized) == 0 {
+		lock(&typelistLock)
+		if atomic.Load(&typelist.initialized) == 0 {
+			n := 0
+			for _, list := range typelist.lists {
+				n += list.count
+			}
+			typelist.types = make(map[string]uintptr, n)
+			for _, list := range typelist.lists {
+				for i := 0; i < list.count; i++ {
+					typ := *(**_type)(add(unsafe.Pointer(&list.types), uintptr(i)*sys.PtrSize))
+					typelist.types[typ.string()] = uintptr(unsafe.Pointer(typ))
+				}
+			}
+			atomic.Store(&typelist.initialized, 1)
+		}
+		unlock(&typelistLock)
+	}
+
+	return (*_type)(unsafe.Pointer(typelist.types[s]))
+}