internal/astutil: preserve [...] composite literals in PurgeFuncBodies

PurgeFuncBodies deletes the contents of every outermost {...} region
except struct/interface type bodies. That includes the bodies of
[...]T composite literals, whose element count is part of the type:
purging [...]int{1,2,3} to [...]int{} changes the type from [3]int to
[0]int, and similarly affects constant values such as
len([...]int{1,2,3}).

This was harmless for the existing callers (typerefs and unimported
completion both ignore composite-literal elements anyway), but it
prevents using the purged source as a type-shape fingerprint for
caching.

Add a single ellipsis flag, set when "...]" is seen and consumed by
the next non-struct/interface "{", to preserve [...] literal bodies
verbatim. All other composite literals are still purged, since their
contents don't affect the type of the enclosing declaration.

Add a table-driven test for the cases that motivated the change, and
update the property test's reference oracle to match.

Updates golang/go#69523

Change-Id: I52b3d41848d5c674685cd90f2be57e94dd5e0bfc
Reviewed-on: https://go-review.googlesource.com/c/tools/+/766140
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Madeline Kalil <mkalil@google.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Robert Findley <rfindley@golang.org>
diff --git a/internal/astutil/purge.go b/internal/astutil/purge.go
index 81ac46a..d0a3941 100644
--- a/internal/astutil/purge.go
+++ b/internal/astutil/purge.go
@@ -12,9 +12,14 @@
 )
 
 // PurgeFuncBodies returns a copy of src in which the contents of each
-// outermost {...} region except struct and interface types have been
-// deleted. This reduces the amount of work required to parse the
-// top-level declarations.
+// outermost {...} region have been deleted, except for struct and
+// interface type bodies and the bodies of length-elided array
+// literals ([...]T), whose element count is part of the type. It
+// includes function bodies, function-literal bodies, and the bodies
+// of slice, map, and explicitly-sized array composite literals (whose
+// contents don't affect the type of the enclosing declaration). This
+// reduces the amount of work required to parse the top-level
+// declarations.
 //
 // PurgeFuncBodies does not preserve newlines or position information.
 // Also, if the input is invalid, parsing the output of
@@ -22,11 +27,12 @@
 // on parser error recovery.
 func PurgeFuncBodies(src []byte) []byte {
 	// Destroy the content of any {...}-bracketed regions that are
-	// not immediately preceded by a "struct" or "interface"
-	// token.  That includes function bodies, composite literals,
-	// switch/select bodies, and all blocks of statements.
-	// This will lead to non-void functions that don't have return
-	// statements, which of course is a type error, but that's ok.
+	// not immediately preceded by a "struct" or "interface" token,
+	// and that are not the body of a length-elided array literal.
+	// That includes function bodies, switch/select bodies, and most
+	// composite literals; this will lead to non-void functions that
+	// don't have return statements, which of course is a type error,
+	// but that's ok.
 
 	var out bytes.Buffer
 	file := token.NewFileSet().AddFile("", -1, len(src))
@@ -34,7 +40,8 @@
 	sc.Init(file, src, nil, 0)
 	var prev token.Token
 	var cursor int         // last consumed src offset
-	var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
+	var braces []token.Pos // stack of unclosed braces, or -1 for a region we preserve
+	var ellipsis bool      // saw "[...]" not yet consumed by a literal-body "{"
 	for {
 		pos, tok, _ := sc.Scan()
 		if tok == token.EOF {
@@ -44,9 +51,23 @@
 		case token.COMMENT:
 			// TODO(adonovan): opt: skip, to save an estimated 20% of time.
 
+		case token.SEMICOLON:
+			ellipsis = false
+
+		case token.RBRACK:
+			// "...]" occurs only in the array-type prefix of a
+			// composite literal; variadic "..." is followed by
+			// a type or ")", never "]".
+			if prev == token.ELLIPSIS {
+				ellipsis = true
+			}
+
 		case token.LBRACE:
 			if prev == token.STRUCT || prev == token.INTERFACE {
-				pos = -1
+				pos = -1 // type body: preserve (don't consume ellipsis)
+			} else if ellipsis {
+				pos = -1 // [...]T literal body: preserve
+				ellipsis = false
 			}
 			braces = append(braces, pos)
 
@@ -55,7 +76,7 @@
 				top := braces[last]
 				braces = braces[:last]
 				if top < 0 {
-					// struct/interface type: leave alone
+					// preserve
 				} else if len(braces) == 0 { // toplevel only
 					// Delete {...} body.
 					start := file.Offset(top)
diff --git a/internal/astutil/purge_test.go b/internal/astutil/purge_test.go
index 3de6bdc..6db426d 100644
--- a/internal/astutil/purge_test.go
+++ b/internal/astutil/purge_test.go
@@ -17,6 +17,133 @@
 	"golang.org/x/tools/internal/testenv"
 )
 
+func TestPurgeFuncBodiesCases(t *testing.T) {
+	tests := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{
+			name: "func decl",
+			in:   "package p\nfunc F(x int) string { return fmt.Sprint(x) }\n",
+			want: "package p\nfunc F(x int) string {}\n",
+		},
+		{
+			name: "method decl",
+			in:   "package p\nfunc (r R) M() int { return r.x }\n",
+			want: "package p\nfunc (r R) M() int {}\n",
+		},
+		{
+			name: "generic func decl",
+			in:   "package p\nfunc F[T any](x T) T { return x }\n",
+			want: "package p\nfunc F[T any](x T) T {}\n",
+		},
+		{
+			name: "no body (assembly)",
+			in:   "package p\nfunc F()\nfunc G() { x() }\n",
+			want: "package p\nfunc F()\nfunc G() {}\n",
+		},
+		// Result types containing braces: type literal preserved,
+		// only the body is purged.
+		{
+			name: "struct result type",
+			in:   "package p\nfunc F() struct{A int} { panic(0) }\n",
+			want: "package p\nfunc F() struct{A int} {}\n",
+		},
+		{
+			name: "interface result type",
+			in:   "package p\nfunc F() interface{M()} { return nil }\n",
+			want: "package p\nfunc F() interface{M()} {}\n",
+		},
+		{
+			name: "ptr struct result type",
+			in:   "package p\nfunc F() *struct{A int} { return nil }\n",
+			want: "package p\nfunc F() *struct{A int} {}\n",
+		},
+		{
+			name: "func type result",
+			in:   "package p\nfunc F() func() int { return nil }\n",
+			want: "package p\nfunc F() func() int {}\n",
+		},
+		// Length-elided array literals ([...]T) are preserved: the
+		// element count is part of the type. (Nested elements are
+		// preserved verbatim along with the outer body.)
+		{
+			name: "auto-sized array",
+			in:   "package p\nvar X = [...]int{1, 2, 3}\n",
+			want: "package p\nvar X = [...]int{1, 2, 3}\n",
+		},
+		{
+			name: "auto-sized array with key",
+			in:   "package p\nvar X = [...]int{5: 0}\n",
+			want: "package p\nvar X = [...]int{5: 0}\n",
+		},
+		{
+			name: "auto-sized array of struct",
+			in:   "package p\nvar X = [...]struct{A int}{{1}, {2}}\n",
+			want: "package p\nvar X = [...]struct{A int}{{1}, {2}}\n",
+		},
+		{
+			name: "const from auto-sized array",
+			in:   "package p\nconst N = len([...]int{1, 2, 3})\n",
+			want: "package p\nconst N = len([...]int{1, 2, 3})\n",
+		},
+		{
+			name: "auto-sized array then unrelated literal",
+			in:   "package p\nvar X = [...]int{1}\nvar Y = T{2}\n",
+			want: "package p\nvar X = [...]int{1}\nvar Y = T{}\n",
+		},
+		// Other composite literals are purged: their contents don't
+		// affect the type of the enclosing declaration.
+		{
+			name: "slice composite literal",
+			in:   "package p\nvar X = []int{1, 2, 3}\n",
+			want: "package p\nvar X = []int{}\n",
+		},
+		{
+			name: "fixed-size array composite literal",
+			in:   "package p\nvar X = [3]int{1, 2, 3}\n",
+			want: "package p\nvar X = [3]int{}\n",
+		},
+		{
+			name: "map composite literal",
+			in:   "package p\nvar X = map[string]func(){\"a\": f}\n",
+			want: "package p\nvar X = map[string]func(){}\n",
+		},
+		{
+			name: "struct composite literal",
+			in:   "package p\nvar X = struct{Y int}{Y: 1}\n",
+			want: "package p\nvar X = struct{Y int}{}\n",
+		},
+		// Func-literal bodies are purged.
+		{
+			name: "func literal in initializer",
+			in:   "package p\nvar F = func() int { return 1 }\n",
+			want: "package p\nvar F = func() int {}\n",
+		},
+		// Type-declaration bodies are preserved.
+		{
+			name: "type decl struct body",
+			in:   "package p\ntype T struct{ X int }\n",
+			want: "package p\ntype T struct{ X int }\n",
+		},
+		// "..." in non-array contexts does not trigger preservation.
+		{
+			name: "variadic param",
+			in:   "package p\nfunc F(x ...int) { y(x) }\n",
+			want: "package p\nfunc F(x ...int) {}\n",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := string(astutil.PurgeFuncBodies([]byte(tt.in)))
+			if got != tt.want {
+				t.Errorf("PurgeFuncBodies:\n in: %q\ngot: %q\nwant: %q", tt.in, got, tt.want)
+			}
+		})
+	}
+}
+
 // TestPurgeFuncBodies tests PurgeFuncBodies by comparing it against a
 // (less efficient) reference implementation that purges after parsing.
 func TestPurgeFuncBodies(t *testing.T) {
@@ -60,6 +187,14 @@
 				case *ast.FuncLit:
 					n.Body.List = nil
 				case *ast.CompositeLit:
+					if at, _ := n.Type.(*ast.ArrayType); at != nil {
+						if _, ok := at.Len.(*ast.Ellipsis); ok {
+							// [...]T literal: preserve verbatim
+							// (don't recur, since nested elements
+							// are preserved too).
+							return false
+						}
+					}
 					n.Elts = nil
 				}
 				return true