go/types: fix cycle detection

For Go 1.13, we rewrote the go/types cycle detection scheme. Unfortunately,
it was a bit too clever and introduced a bug (#34333). Here's an example:

type A struct {
	f1 *B
	f2 B
}

type B A

When type-checking this code, the first cycle A->*B->B->A (via field f1)
is ok because there's a pointer indirection. Though in the process B is
considered "type-checked" (and painted/marked from "grey" to black").
When type-checking f2, since B is already completely set up, go/types
doesn't complain about the invalid cycle A->B->A (via field f2) anymore.
On the other hand, with the fields f1, f2 swapped:

type A struct {
	f2 B
	f1 *B
}

go/types reports an error because the cycle A->B->A is type-checked first.
In general, we cannot know the "right" order in which types need to be
type-checked.

This CL fixes the issue as follows:

1) The global object path cycle detection does not take (pointer, function,
   reference type) indirections into account anymore for cycle detection.
   That mechanism was incorrect to start with and the primary cause for this
   issue. As a consequence we don't need Checker.indirectType and indir anymore.

2) After processing type declarations, Checker.validType is called to
   verify that a type doesn't expand indefinitively. This corresponds
   essentially to cmd/compile's dowidth computation (without size computation).

3) Cycles involving only defined types (e.g.: type (A B; B C; C A))
   require separate attention as those must now be detected when resolving
   "forward chains" of type declarations. Checker.underlying was changed
   to detect these cycles.

All three cycle detection mechanism use an object path ([]Object) to
report cycles. The cycle error reporting mechanism is now factored out
into Checker.cycleError and used by all three mechanisms. It also makes
an attempt to report the cycle starting with the "first" (earliest in the
source) object.

Fixes #34333.

Change-Id: I2c6446445e47344cc2cd034d3c74b1c345b8c1e6
Reviewed-on: https://go-review.googlesource.com/c/go/+/196338
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
diff --git a/src/go/types/decl.go b/src/go/types/decl.go
index 11d2ee4..4485ea8 100644
--- a/src/go/types/decl.go
+++ b/src/go/types/decl.go
@@ -53,11 +53,11 @@
 // For the meaning of def, see Checker.definedType, in typexpr.go.
 func (check *Checker) objDecl(obj Object, def *Named) {
 	if trace {
-		check.trace(obj.Pos(), "-- checking %s %s (objPath = %s)", obj.color(), obj, pathString(check.objPath))
+		check.trace(obj.Pos(), "-- checking %s (%s, objPath = %s)", obj, obj.color(), pathString(check.objPath))
 		check.indent++
 		defer func() {
 			check.indent--
-			check.trace(obj.Pos(), "=> %s", obj)
+			check.trace(obj.Pos(), "=> %s (%s)", obj, obj.color())
 		}()
 	}
 
@@ -198,13 +198,6 @@
 	}
 }
 
-// indir is a sentinel type name that is pushed onto the object path
-// to indicate an "indirection" in the dependency from one type name
-// to the next. For instance, for "type p *p" the object path contains
-// p followed by indir, indicating that there's an indirection *p.
-// Indirections are used to break type cycles.
-var indir = NewTypeName(token.NoPos, nil, "*", nil)
-
 // typeCycle checks if the cycle starting with obj is valid and
 // reports an error if it is not.
 // TODO(gri) rename s/typeCycle/cycle/ once we don't need the other
@@ -221,52 +214,34 @@
 		}
 	}
 
-	// Given the number of constants and variables (nval) in the cycle
-	// and the cycle length (ncycle = number of named objects in the cycle),
-	// we distinguish between cycles involving only constants and variables
-	// (nval = ncycle), cycles involving types (and functions) only
-	// (nval == 0), and mixed cycles (nval != 0 && nval != ncycle).
-	// We ignore functions at the moment (taking them into account correctly
-	// is complicated and it doesn't improve error reporting significantly).
-	//
-	// A cycle must have at least one indirection and one type definition
-	// to be permitted: If there is no indirection, the size of the type
-	// cannot be computed (it's either infinite or 0); if there is no type
-	// definition, we have a sequence of alias type names which will expand
-	// ad infinitum.
-	var nval, ncycle int
-	var hasIndir, hasTDef bool
+	// Count cycle objects.
 	assert(obj.color() >= grey)
 	start := obj.color() - grey // index of obj in objPath
 	cycle := check.objPath[start:]
-	ncycle = len(cycle) // including indirections
+	nval := 0 // number of (constant or variable) values in the cycle
+	ndef := 0 // number of type definitions in the cycle
 	for _, obj := range cycle {
 		switch obj := obj.(type) {
 		case *Const, *Var:
 			nval++
 		case *TypeName:
-			if obj == indir {
-				ncycle-- // don't count (indirections are not objects)
-				hasIndir = true
+			// Determine if the type name is an alias or not. For
+			// package-level objects, use the object map which
+			// provides syntactic information (which doesn't rely
+			// on the order in which the objects are set up). For
+			// local objects, we can rely on the order, so use
+			// the object's predicate.
+			// TODO(gri) It would be less fragile to always access
+			// the syntactic information. We should consider storing
+			// this information explicitly in the object.
+			var alias bool
+			if d := check.objMap[obj]; d != nil {
+				alias = d.alias // package-level object
 			} else {
-				// Determine if the type name is an alias or not. For
-				// package-level objects, use the object map which
-				// provides syntactic information (which doesn't rely
-				// on the order in which the objects are set up). For
-				// local objects, we can rely on the order, so use
-				// the object's predicate.
-				// TODO(gri) It would be less fragile to always access
-				// the syntactic information. We should consider storing
-				// this information explicitly in the object.
-				var alias bool
-				if d := check.objMap[obj]; d != nil {
-					alias = d.alias // package-level object
-				} else {
-					alias = obj.IsAlias() // function local object
-				}
-				if !alias {
-					hasTDef = true
-				}
+				alias = obj.IsAlias() // function local object
+			}
+			if !alias {
+				ndef++
 			}
 		case *Func:
 			// ignored for now
@@ -276,8 +251,8 @@
 	}
 
 	if trace {
-		check.trace(obj.Pos(), "## cycle detected: objPath = %s->%s (len = %d)", pathString(cycle), obj.Name(), ncycle)
-		check.trace(obj.Pos(), "## cycle contains: %d values, has indirection = %v, has type definition = %v", nval, hasIndir, hasTDef)
+		check.trace(obj.Pos(), "## cycle detected: objPath = %s->%s (len = %d)", pathString(cycle), obj.Name(), len(cycle))
+		check.trace(obj.Pos(), "## cycle contains: %d values, %d type definitions", nval, ndef)
 		defer func() {
 			if isCycle {
 				check.trace(obj.Pos(), "=> error: cycle is invalid")
@@ -288,32 +263,110 @@
 	// A cycle involving only constants and variables is invalid but we
 	// ignore them here because they are reported via the initialization
 	// cycle check.
-	if nval == ncycle {
+	if nval == len(cycle) {
 		return false
 	}
 
-	// A cycle involving only types (and possibly functions) must have at
-	// least one indirection and one type definition to be permitted: If
-	// there is no indirection, the size of the type cannot be computed
-	// (it's either infinite or 0); if there is no type definition, we
+	// A cycle involving only types (and possibly functions) must have at least
+	// one type definition to be permitted: If there is no type definition, we
 	// have a sequence of alias type names which will expand ad infinitum.
-	if nval == 0 && hasIndir && hasTDef {
+	if nval == 0 && ndef > 0 {
 		return false // cycle is permitted
 	}
 
-	// report cycle
-	check.errorf(obj.Pos(), "illegal cycle in declaration of %s", obj.Name())
-	for _, obj := range cycle {
-		if obj == indir {
-			continue // don't print indir sentinels
-		}
-		check.errorf(obj.Pos(), "\t%s refers to", obj.Name()) // secondary error, \t indented
-	}
-	check.errorf(obj.Pos(), "\t%s", obj.Name())
+	check.cycleError(cycle)
 
 	return true
 }
 
+type typeInfo uint
+
+// validType verifies that the given type does not "expand" infinitely
+// producing a cycle in the type graph. Cycles are detected by marking
+// defined types.
+// (Cycles involving alias types, as in "type A = [10]A" are detected
+// earlier, via the objDecl cycle detection mechanism.)
+func (check *Checker) validType(typ Type, path []Object) typeInfo {
+	const (
+		unknown typeInfo = iota
+		marked
+		valid
+		invalid
+	)
+
+	switch t := typ.(type) {
+	case *Array:
+		return check.validType(t.elem, path)
+
+	case *Struct:
+		for _, f := range t.fields {
+			if check.validType(f.typ, path) == invalid {
+				return invalid
+			}
+		}
+
+	case *Interface:
+		for _, etyp := range t.embeddeds {
+			if check.validType(etyp, path) == invalid {
+				return invalid
+			}
+		}
+
+	case *Named:
+		switch t.info {
+		case unknown:
+			t.info = marked
+			t.info = check.validType(t.underlying, append(path, t.obj))
+		case marked:
+			// cycle detected
+			for i, tn := range path {
+				if tn == t.obj {
+					check.cycleError(path[i:])
+					t.info = invalid
+					t.underlying = Typ[Invalid]
+					return t.info
+				}
+			}
+			panic("internal error: cycle start not found")
+		}
+		return t.info
+	}
+
+	return valid
+}
+
+// cycleError reports a declaration cycle starting with
+// the object in cycle that is "first" in the source.
+func (check *Checker) cycleError(cycle []Object) {
+	// TODO(gri) Should we start with the last (rather than the first) object in the cycle
+	//           since that is the earliest point in the source where we start seeing the
+	//           cycle? That would be more consistent with other error messages.
+	i := firstInSrc(cycle)
+	obj := cycle[i]
+	check.errorf(obj.Pos(), "illegal cycle in declaration of %s", obj.Name())
+	for range cycle {
+		check.errorf(obj.Pos(), "\t%s refers to", obj.Name()) // secondary error, \t indented
+		i++
+		if i >= len(cycle) {
+			i = 0
+		}
+		obj = cycle[i]
+	}
+	check.errorf(obj.Pos(), "\t%s", obj.Name())
+}
+
+// firstInSrc reports the index of the object with the "smallest"
+// source position in path. path must not be empty.
+func firstInSrc(path []Object) int {
+	fst, pos := 0, path[0].Pos()
+	for i, t := range path[1:] {
+		if t.Pos() < pos {
+			fst, pos = i+1, t.Pos()
+		}
+	}
+	return fst
+}
+
 func (check *Checker) constDecl(obj *Const, typ, init ast.Expr) {
 	assert(obj.typ == nil)
 
@@ -409,15 +462,53 @@
 
 // underlying returns the underlying type of typ; possibly by following
 // forward chains of named types. Such chains only exist while named types
-// are incomplete.
-func underlying(typ Type) Type {
+// are incomplete. If an underlying type is found, resolve the chain by
+// setting the underlying type for each defined type in the chain before
+// returning it.
+//
+// If no underlying type is found, a cycle error is reported and Typ[Invalid]
+// is used as underlying type for each defined type in the chain and returned
+// as result.
+func (check *Checker) underlying(typ Type) Type {
+	// If typ is not a defined type, its underlying type is itself.
+	n0, _ := typ.(*Named)
+	if n0 == nil {
+		return typ // nothing to do
+	}
+
+	// If the underlying type of a defined type is not a defined
+	// type, then that is the desired underlying type.
+	typ = n0.underlying
+	n, _ := typ.(*Named)
+	if n == nil {
+		return typ // common case
+	}
+
+	// Otherwise, follow the forward chain.
+	seen := map[*Named]int{n0: 0, n: 1}
+	path := []Object{n0.obj, n.obj}
 	for {
-		n, _ := typ.(*Named)
+		typ = n.underlying
+		n, _ = typ.(*Named)
 		if n == nil {
+			break // end of chain
+		}
+
+		if i, ok := seen[n]; ok {
+			// cycle
+			check.cycleError(path[i:])
+			typ = Typ[Invalid]
 			break
 		}
-		typ = n.underlying
+
+		seen[n] = len(seen)
+		path = append(path, n.obj)
 	}
+
+	for n := range seen {
+		n.underlying = typ
+	}
+
 	return typ
 }
 
@@ -430,6 +521,10 @@
 func (check *Checker) typeDecl(obj *TypeName, typ ast.Expr, def *Named, alias bool) {
 	assert(obj.typ == nil)
 
+	check.later(func() {
+		check.validType(obj.typ, nil)
+	})
+
 	if alias {
 
 		obj.typ = Typ[Invalid]
@@ -456,8 +551,8 @@
 		// The type of C is the (named) type of A which is incomplete,
 		// and which has as its underlying type the named type B.
 		// Determine the (final, unnamed) underlying type by resolving
-		// any forward chain (they always end in an unnamed type).
-		named.underlying = underlying(named.underlying)
+		// any forward chain.
+		named.underlying = check.underlying(named)
 
 	}
 
diff --git a/src/go/types/expr.go b/src/go/types/expr.go
index 0edd278..d49ccdf 100644
--- a/src/go/types/expr.go
+++ b/src/go/types/expr.go
@@ -1157,12 +1157,9 @@
 			}
 
 		case *Array:
-			// Prevent crash if the array referred to is not yet set up.
-			// This is a stop-gap solution; a better approach would use the mechanism of
-			// Checker.ident (typexpr.go) using a path of types. But that would require
-			// passing the path everywhere (all expression-checking methods, not just
-			// type expression checking), and we're not set up for that (quite possibly
-			// an indication that cycle detection needs to be rethought). Was issue #18643.
+			// Prevent crash if the array referred to is not yet set up. Was issue #18643.
+			// This is a stop-gap solution. Should use Checker.objPath to report entire
+			// path starting with earliest declaration in the source. TODO(gri) fix this.
 			if utyp.elem == nil {
 				check.error(e.Pos(), "illegal cycle in type declaration")
 				goto Error
diff --git a/src/go/types/testdata/cycles.src b/src/go/types/testdata/cycles.src
index a9af46a..7f9fc89 100644
--- a/src/go/types/testdata/cycles.src
+++ b/src/go/types/testdata/cycles.src
@@ -23,8 +23,10 @@
 	A0 /* ERROR cycle */ [10]A0
 	A1 [10]*A1
 
-	A2 /* ERROR cycle */ [10]A3
-	A3 [10]A4
+	// TODO(gri) It would be nicer to report the cycle starting
+	//           with A2 (also below, for S4). See issue #34771.
+	A2 [10]A3
+	A3 /* ERROR cycle */ [10]A4
 	A4 A2
 
 	A5 [10]A6
@@ -39,8 +41,8 @@
 	S2 struct{ _ *S2 }
 	S3 struct{ *S3 }
 
-	S4 /* ERROR cycle */ struct{ S5 }
-	S5 struct{ S6 }
+	S4 struct{ S5 }
+	S5 /* ERROR cycle */ struct{ S6 }
 	S6 S4
 
 	// pointers
@@ -147,7 +149,7 @@
 // test cases for issue 18643
 // (type cycle detection when non-type expressions are involved)
 type (
-	T14 /* ERROR cycle */ [len(T14{})]int
+	T14 [len(T14 /* ERROR cycle */ {})]int
 	T15 [][len(T15 /* ERROR cycle */ {})]int
 	T16 map[[len(T16 /* ERROR cycle */ {1:2})]int]int
 	T17 map[int][len(T17 /* ERROR cycle */ {1:2})]int
diff --git a/src/go/types/testdata/cycles5.src b/src/go/types/testdata/cycles5.src
index aa6528a..397adcc 100644
--- a/src/go/types/testdata/cycles5.src
+++ b/src/go/types/testdata/cycles5.src
@@ -188,3 +188,13 @@
 
 var c14 /* ERROR cycle */ T14
 type T14 [uintptr(unsafe.Sizeof(&c14))]byte
+
+// issue #34333
+type T15 /* ERROR cycle */ struct {
+	f func() T16
+	b T16
+}
+
+type T16 struct {
+	T15
+}
\ No newline at end of file
diff --git a/src/go/types/testdata/decls0.src b/src/go/types/testdata/decls0.src
index 5b84722..5501b65 100644
--- a/src/go/types/testdata/decls0.src
+++ b/src/go/types/testdata/decls0.src
@@ -72,10 +72,6 @@
 	a /* ERROR "illegal cycle" */ a
 	a /* ERROR "redeclared" */ int
 
-	// where the cycle error appears depends on the
-	// order in which declarations are processed
-	// (which depends on the order in which a map
-	// is iterated through)
 	b /* ERROR "illegal cycle" */ c
 	c d
 	d e
diff --git a/src/go/types/type.go b/src/go/types/type.go
index 6263da0..a490d92 100644
--- a/src/go/types/type.go
+++ b/src/go/types/type.go
@@ -448,6 +448,7 @@
 
 // A Named represents a named type.
 type Named struct {
+	info       typeInfo  // for cycle detection
 	obj        *TypeName // corresponding declared object
 	underlying Type      // possibly a *Named during setup; never a *Named once set up completely
 	methods    []*Func   // methods declared for this type (not the method set of this type); signatures are type-checked lazily
diff --git a/src/go/types/typexpr.go b/src/go/types/typexpr.go
index b0d04f5..d5837c4 100644
--- a/src/go/types/typexpr.go
+++ b/src/go/types/typexpr.go
@@ -142,16 +142,6 @@
 	return
 }
 
-// indirectType is like typ but it also breaks the (otherwise) infinite size of recursive
-// types by introducing an indirection. It should be called for components of types that
-// are not laid out in place in memory, such as pointer base types, slice or map element
-// types, function parameter types, etc.
-func (check *Checker) indirectType(e ast.Expr) Type {
-	check.push(indir)
-	defer check.pop()
-	return check.definedType(e, nil)
-}
-
 // funcType type-checks a function or method type.
 func (check *Checker) funcType(sig *Signature, recvPar *ast.FieldList, ftyp *ast.FuncType) {
 	scope := NewScope(check.scope, token.NoPos, token.NoPos, "function")
@@ -273,7 +263,7 @@
 		} else {
 			typ := new(Slice)
 			def.setUnderlying(typ)
-			typ.elem = check.indirectType(e.Elt)
+			typ.elem = check.typ(e.Elt)
 			return typ
 		}
 
@@ -286,7 +276,7 @@
 	case *ast.StarExpr:
 		typ := new(Pointer)
 		def.setUnderlying(typ)
-		typ.base = check.indirectType(e.X)
+		typ.base = check.typ(e.X)
 		return typ
 
 	case *ast.FuncType:
@@ -305,8 +295,8 @@
 		typ := new(Map)
 		def.setUnderlying(typ)
 
-		typ.key = check.indirectType(e.Key)
-		typ.elem = check.indirectType(e.Value)
+		typ.key = check.typ(e.Key)
+		typ.elem = check.typ(e.Value)
 
 		// spec: "The comparison operators == and != must be fully defined
 		// for operands of the key type; thus the key type must not be a
@@ -340,7 +330,7 @@
 		}
 
 		typ.dir = dir
-		typ.elem = check.indirectType(e.Value)
+		typ.elem = check.typ(e.Value)
 		return typ
 
 	default:
@@ -421,7 +411,7 @@
 				// ignore ... and continue
 			}
 		}
-		typ := check.indirectType(ftype)
+		typ := check.typ(ftype)
 		// The parser ensures that f.Tag is nil and we don't
 		// care if a constructed AST contains a non-nil tag.
 		if len(field.Names) > 0 {
@@ -483,7 +473,7 @@
 				continue // ignore
 			}
 
-			typ := check.indirectType(f.Type)
+			typ := check.typ(f.Type)
 			sig, _ := typ.(*Signature)
 			if sig == nil {
 				if typ != Typ[Invalid] {
@@ -508,8 +498,9 @@
 			// it if it's a valid interface.
 			typ := check.typ(f.Type)
 
-			if _, ok := underlying(typ).(*Interface); !ok {
-				if typ != Typ[Invalid] {
+			utyp := check.underlying(typ)
+			if _, ok := utyp.(*Interface); !ok {
+				if utyp != Typ[Invalid] {
 					check.errorf(f.Type.Pos(), "%s is not an interface", typ)
 				}
 				continue
@@ -555,7 +546,12 @@
 		}()
 	}
 
-	ityp.allMethods = markComplete // avoid infinite recursion
+	// An infinitely expanding interface (due to a cycle) is detected
+	// elsewhere (Checker.validType), so here we simply assume we only
+	// have valid interfaces. Mark the interface as complete to avoid
+	// infinite recursion if the validType check occurs later for some
+	// reason.
+	ityp.allMethods = markComplete
 
 	// Methods of embedded interfaces are collected unchanged; i.e., the identity
 	// of a method I.m's Func Object of an interface I is the same as that of
@@ -599,7 +595,12 @@
 	posList := check.posMap[ityp]
 	for i, typ := range ityp.embeddeds {
 		pos := posList[i] // embedding position
-		typ := underlying(typ).(*Interface)
+		typ, ok := check.underlying(typ).(*Interface)
+		if !ok {
+			// An error was reported when collecting the embedded types.
+			// Ignore it.
+			continue
+		}
 		check.completeInterface(typ)
 		for _, m := range typ.allMethods {
 			addMethod(pos, m, false) // use embedding position pos rather than m.pos