internal/astutil: preserve [...] composite literals in PurgeFuncBodies
PurgeFuncBodies deletes the contents of every outermost {...} region
except struct/interface type bodies. That includes the bodies of
[...]T composite literals, whose element count is part of the type:
purging [...]int{1,2,3} to [...]int{} changes the type from [3]int to
[0]int, and similarly affects constant values such as
len([...]int{1,2,3}).
This was harmless for the existing callers (typerefs and unimported
completion both ignore composite-literal elements anyway), but it
prevents using the purged source as a type-shape fingerprint for
caching.
Add a single ellipsis flag, set when "...]" is seen and consumed by
the next non-struct/interface "{", to preserve [...] literal bodies
verbatim. All other composite literals are still purged, since their
contents don't affect the type of the enclosing declaration.
Add a table-driven test for the cases that motivated the change, and
update the property test's reference oracle to match.
Updates golang/go#69523
Change-Id: I52b3d41848d5c674685cd90f2be57e94dd5e0bfc
Reviewed-on: https://go-review.googlesource.com/c/tools/+/766140
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Madeline Kalil <mkalil@google.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Robert Findley <rfindley@golang.org>
diff --git a/internal/astutil/purge.go b/internal/astutil/purge.go
index 81ac46a..d0a3941 100644
--- a/internal/astutil/purge.go
+++ b/internal/astutil/purge.go
@@ -12,9 +12,14 @@
)
// PurgeFuncBodies returns a copy of src in which the contents of each
-// outermost {...} region except struct and interface types have been
-// deleted. This reduces the amount of work required to parse the
-// top-level declarations.
+// outermost {...} region have been deleted, except for struct and
+// interface type bodies and the bodies of length-elided array
+// literals ([...]T), whose element count is part of the type. It
+// includes function bodies, function-literal bodies, and the bodies
+// of slice, map, and explicitly-sized array composite literals (whose
+// contents don't affect the type of the enclosing declaration). This
+// reduces the amount of work required to parse the top-level
+// declarations.
//
// PurgeFuncBodies does not preserve newlines or position information.
// Also, if the input is invalid, parsing the output of
@@ -22,11 +27,12 @@
// on parser error recovery.
func PurgeFuncBodies(src []byte) []byte {
// Destroy the content of any {...}-bracketed regions that are
- // not immediately preceded by a "struct" or "interface"
- // token. That includes function bodies, composite literals,
- // switch/select bodies, and all blocks of statements.
- // This will lead to non-void functions that don't have return
- // statements, which of course is a type error, but that's ok.
+ // not immediately preceded by a "struct" or "interface" token,
+ // and that are not the body of a length-elided array literal.
+ // That includes function bodies, switch/select bodies, and most
+ // composite literals; this will lead to non-void functions that
+ // don't have return statements, which of course is a type error,
+ // but that's ok.
var out bytes.Buffer
file := token.NewFileSet().AddFile("", -1, len(src))
@@ -34,7 +40,8 @@
sc.Init(file, src, nil, 0)
var prev token.Token
var cursor int // last consumed src offset
- var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
+ var braces []token.Pos // stack of unclosed braces, or -1 for a region we preserve
+ var ellipsis bool // saw "[...]" not yet consumed by a literal-body "{"
for {
pos, tok, _ := sc.Scan()
if tok == token.EOF {
@@ -44,9 +51,23 @@
case token.COMMENT:
// TODO(adonovan): opt: skip, to save an estimated 20% of time.
+ case token.SEMICOLON:
+ ellipsis = false
+
+ case token.RBRACK:
+ // "...]" occurs only in the array-type prefix of a
+ // composite literal; variadic "..." is followed by
+ // a type or ")", never "]".
+ if prev == token.ELLIPSIS {
+ ellipsis = true
+ }
+
case token.LBRACE:
if prev == token.STRUCT || prev == token.INTERFACE {
- pos = -1
+ pos = -1 // type body: preserve (don't consume ellipsis)
+ } else if ellipsis {
+ pos = -1 // [...]T literal body: preserve
+ ellipsis = false
}
braces = append(braces, pos)
@@ -55,7 +76,7 @@
top := braces[last]
braces = braces[:last]
if top < 0 {
- // struct/interface type: leave alone
+ // preserve
} else if len(braces) == 0 { // toplevel only
// Delete {...} body.
start := file.Offset(top)
diff --git a/internal/astutil/purge_test.go b/internal/astutil/purge_test.go
index 3de6bdc..6db426d 100644
--- a/internal/astutil/purge_test.go
+++ b/internal/astutil/purge_test.go
@@ -17,6 +17,133 @@
"golang.org/x/tools/internal/testenv"
)
+func TestPurgeFuncBodiesCases(t *testing.T) {
+ tests := []struct {
+ name string
+ in string
+ want string
+ }{
+ {
+ name: "func decl",
+ in: "package p\nfunc F(x int) string { return fmt.Sprint(x) }\n",
+ want: "package p\nfunc F(x int) string {}\n",
+ },
+ {
+ name: "method decl",
+ in: "package p\nfunc (r R) M() int { return r.x }\n",
+ want: "package p\nfunc (r R) M() int {}\n",
+ },
+ {
+ name: "generic func decl",
+ in: "package p\nfunc F[T any](x T) T { return x }\n",
+ want: "package p\nfunc F[T any](x T) T {}\n",
+ },
+ {
+ name: "no body (assembly)",
+ in: "package p\nfunc F()\nfunc G() { x() }\n",
+ want: "package p\nfunc F()\nfunc G() {}\n",
+ },
+ // Result types containing braces: type literal preserved,
+ // only the body is purged.
+ {
+ name: "struct result type",
+ in: "package p\nfunc F() struct{A int} { panic(0) }\n",
+ want: "package p\nfunc F() struct{A int} {}\n",
+ },
+ {
+ name: "interface result type",
+ in: "package p\nfunc F() interface{M()} { return nil }\n",
+ want: "package p\nfunc F() interface{M()} {}\n",
+ },
+ {
+ name: "ptr struct result type",
+ in: "package p\nfunc F() *struct{A int} { return nil }\n",
+ want: "package p\nfunc F() *struct{A int} {}\n",
+ },
+ {
+ name: "func type result",
+ in: "package p\nfunc F() func() int { return nil }\n",
+ want: "package p\nfunc F() func() int {}\n",
+ },
+ // Length-elided array literals ([...]T) are preserved: the
+ // element count is part of the type. (Nested elements are
+ // preserved verbatim along with the outer body.)
+ {
+ name: "auto-sized array",
+ in: "package p\nvar X = [...]int{1, 2, 3}\n",
+ want: "package p\nvar X = [...]int{1, 2, 3}\n",
+ },
+ {
+ name: "auto-sized array with key",
+ in: "package p\nvar X = [...]int{5: 0}\n",
+ want: "package p\nvar X = [...]int{5: 0}\n",
+ },
+ {
+ name: "auto-sized array of struct",
+ in: "package p\nvar X = [...]struct{A int}{{1}, {2}}\n",
+ want: "package p\nvar X = [...]struct{A int}{{1}, {2}}\n",
+ },
+ {
+ name: "const from auto-sized array",
+ in: "package p\nconst N = len([...]int{1, 2, 3})\n",
+ want: "package p\nconst N = len([...]int{1, 2, 3})\n",
+ },
+ {
+ name: "auto-sized array then unrelated literal",
+ in: "package p\nvar X = [...]int{1}\nvar Y = T{2}\n",
+ want: "package p\nvar X = [...]int{1}\nvar Y = T{}\n",
+ },
+ // Other composite literals are purged: their contents don't
+ // affect the type of the enclosing declaration.
+ {
+ name: "slice composite literal",
+ in: "package p\nvar X = []int{1, 2, 3}\n",
+ want: "package p\nvar X = []int{}\n",
+ },
+ {
+ name: "fixed-size array composite literal",
+ in: "package p\nvar X = [3]int{1, 2, 3}\n",
+ want: "package p\nvar X = [3]int{}\n",
+ },
+ {
+ name: "map composite literal",
+ in: "package p\nvar X = map[string]func(){\"a\": f}\n",
+ want: "package p\nvar X = map[string]func(){}\n",
+ },
+ {
+ name: "struct composite literal",
+ in: "package p\nvar X = struct{Y int}{Y: 1}\n",
+ want: "package p\nvar X = struct{Y int}{}\n",
+ },
+ // Func-literal bodies are purged.
+ {
+ name: "func literal in initializer",
+ in: "package p\nvar F = func() int { return 1 }\n",
+ want: "package p\nvar F = func() int {}\n",
+ },
+ // Type-declaration bodies are preserved.
+ {
+ name: "type decl struct body",
+ in: "package p\ntype T struct{ X int }\n",
+ want: "package p\ntype T struct{ X int }\n",
+ },
+ // "..." in non-array contexts does not trigger preservation.
+ {
+ name: "variadic param",
+ in: "package p\nfunc F(x ...int) { y(x) }\n",
+ want: "package p\nfunc F(x ...int) {}\n",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := string(astutil.PurgeFuncBodies([]byte(tt.in)))
+ if got != tt.want {
+ t.Errorf("PurgeFuncBodies:\n in: %q\ngot: %q\nwant: %q", tt.in, got, tt.want)
+ }
+ })
+ }
+}
+
// TestPurgeFuncBodies tests PurgeFuncBodies by comparing it against a
// (less efficient) reference implementation that purges after parsing.
func TestPurgeFuncBodies(t *testing.T) {
@@ -60,6 +187,14 @@
case *ast.FuncLit:
n.Body.List = nil
case *ast.CompositeLit:
+ if at, _ := n.Type.(*ast.ArrayType); at != nil {
+ if _, ok := at.Len.(*ast.Ellipsis); ok {
+ // [...]T literal: preserve verbatim
+ // (don't recur, since nested elements
+ // are preserved too).
+ return false
+ }
+ }
n.Elts = nil
}
return true