internal/memoize: switch from GC-driven to explicit deletion The GC-based cache has given us a number of problems. First, memory leaks driven by reference cycles: the Go runtime cannot collect cycles involving finalizers, which prevents us from writing natural code in Bind callbacks. If we screw it up, we get a mysterious leak that takes a long time to track down. Second, the behavior is generally mysterious; it's hard to predict how long a value lasts, and harder to tell if a value being live is a bug. Third, we think that it may be interacting poorly with the GC, resulting in unnecessary memory usage. The structure of the values we put in the cache is not actually that complicated -- there are only 5 significant types: parse, typecheck, analyze, parse mod, and analyze mod. Managing them manually should not be conceptually difficult, and in fact we already do most of the work in (*snapshot).clone. In this CL the cache adds the concept of "generations", which function as reference counts on cache entries. Entries are still global and shared across generations, but will be explicitly deleted once no generations refer to them. The idea is that each snapshot is a new generation, and can inherit entries from the previous snapshot or leave them behind to be deleted. One obvious risk of this scheme is that we'll leave dangling references to values without actually inheriting them across generations. To prevent that, getting a value requires passing in the generation at which it's being read, and an error will be returned if that generation is dead. Change-Id: I4b30891efd7be4e10f2b84f4c067b0dee43dcf9c Reviewed-on: https://go-review.googlesource.com/c/tools/+/242838 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rebecca Stambler <rstambler@golang.org> Reviewed-by: Robert Findley <rfindley@google.com>

commit: c1903db4dbfe5ab8e2ec704e203535dae53c2adc [log] [tgz]
author: Heschi Kreinick <heschi@google.com> Fri Jul 24 17:17:13 2020 -0400
committer: Heschi Kreinick <heschi@google.com> Mon Aug 10 19:02:17 2020 +0000
tree: c14a32f8294f2396cb37e5947287e93c31e74040
parent: 74a6bbb3463be9c30a133a80d4e5bbdbd0b6ee2c [diff] [blame]
diff --git a/internal/memoize/memoize_test.go b/internal/memoize/memoize_test.go
index 00d003a..e6e7b0b 100644
--- a/internal/memoize/memoize_test.go
+++ b/internal/memoize/memoize_test.go

@@ -5,230 +5,65 @@
 package memoize_test
 
 import (
-	"bytes"
 	"context"
-	"fmt"
-	"io"
-	"runtime"
 	"strings"
 	"testing"
-	"time"
 
 	"golang.org/x/tools/internal/memoize"
 )
 
-func TestStore(t *testing.T) {
-	ctx := context.Background()
+func TestGet(t *testing.T) {
 	s := &memoize.Store{}
-	logBuffer := &bytes.Buffer{}
-	ctx = context.WithValue(ctx, "logger", logBuffer)
+	g := s.Generation("x")
 
-	// These tests check the behavior of the Bind and Get functions.
-	// They confirm that the functions only ever run once for a given value.
-	for _, test := range []struct {
-		name, key, want string
-	}{
-		{
-			name: "nothing",
-		},
-		{
-			name: "get 1",
-			key:  "_1",
-			want: `
-start @1
-simple a = A
-simple b = B
-simple c = C
-end @1 =  A B C
-`[1:],
-		},
-		{
-			name: "redo 1",
-			key:  "_1",
-			want: ``,
-		},
-		{
-			name: "get 2",
-			key:  "_2",
-			want: `
-start @2
-simple d = D
-simple e = E
-simple f = F
-end @2 =  D E F
-`[1:],
-		},
-		{
-			name: "redo 2",
-			key:  "_2",
-			want: ``,
-		},
-		{
-			name: "get 3",
-			key:  "_3",
-			want: `
-start @3
-end @3 =  @1[ A B C] @2[ D E F]
-`[1:],
-		},
-		{
-			name: "get 4",
-			key:  "_4",
-			want: `
-start @3
-simple g = G
-error ERR = fail
-simple h = H
-end @3 =  G !fail H
-`[1:],
-		},
-	} {
-		s.Bind(test.key, generate(s, test.key)).Get(ctx, nil)
-		got := logBuffer.String()
-		if got != test.want {
-			t.Errorf("at %q expected:\n%v\ngot:\n%s", test.name, test.want, got)
-		}
-		logBuffer.Reset()
-	}
+	evaled := 0
 
-	// This test checks that values are garbage collected and removed from the
-	// store when they are no longer used.
-
-	pinned := []string{"b", "_1", "_3"}             // keys to pin in memory
-	unpinned := []string{"a", "c", "d", "_2", "_4"} // keys to garbage collect
-
-	// Handles maintain a strong reference to their values.
-	// By generating handles for the pinned keys and keeping the pins alive in memory,
-	// we ensure these keys stay cached.
-	var pins []*memoize.Handle
-	for _, key := range pinned {
-		h := s.Bind(key, generate(s, key))
-		h.Get(ctx, nil)
-		pins = append(pins, h)
-	}
-
-	// Force the garbage collector to run.
-	runAllFinalizers(t)
-
-	// Confirm our expectation that pinned values should remain cached,
-	// and unpinned values should be garbage collected.
-	for _, k := range pinned {
-		if v := s.Find(k); v == nil {
-			t.Errorf("pinned value %q was nil", k)
-		}
-	}
-	for _, k := range unpinned {
-		if v := s.Find(k); v != nil {
-			t.Errorf("unpinned value %q should be nil, was %v", k, v)
-		}
-	}
-
-	// This forces the pins to stay alive until this point in the function.
-	runtime.KeepAlive(pins)
-}
-
-func runAllFinalizers(t *testing.T) {
-	// The following is very tricky, so be very when careful changing it.
-	// It relies on behavior of finalizers that is not guaranteed.
-
-	// First, run the GC to queue the finalizers.
-	runtime.GC()
-
-	// wait is used to signal that the finalizers are all done.
-	wait := make(chan struct{})
-
-	// Register a finalizer against an immediately collectible object.
-	//
-	// The finalizer will signal on the wait channel once it executes,
-	// and it was the most recently registered finalizer,
-	// so the wait channel will be closed when all of the finalizers have run.
-	runtime.SetFinalizer(&struct{ s string }{"obj"}, func(_ interface{}) { close(wait) })
-
-	// Now, run the GC again to pick up the tracker object above.
-	runtime.GC()
-
-	// Wait for the finalizers to run or a timeout.
-	select {
-	case <-wait:
-	case <-time.Tick(time.Second):
-		t.Fatalf("finalizers had not run after 1 second")
+	h := g.Bind("key", func(context.Context, memoize.Arg) interface{} {
+		evaled++
+		return "res"
+	})
+	expectGet(t, h, g, "res")
+	expectGet(t, h, g, "res")
+	if evaled != 1 {
+		t.Errorf("got %v calls to function, wanted 1", evaled)
 	}
 }
 
-type stringOrError struct {
-	value string
-	err   error
-}
-
-func (v *stringOrError) String() string {
-	if v.err != nil {
-		return v.err.Error()
-	}
-	return v.value
-}
-
-func asValue(v interface{}) *stringOrError {
-	if v == nil {
-		return nil
-	}
-	return v.(*stringOrError)
-}
-
-func generate(s *memoize.Store, key interface{}) memoize.Function {
-	return func(ctx context.Context, _ memoize.Arg) interface{} {
-		name := key.(string)
-		switch name {
-		case "":
-			return nil
-		case "err":
-			return logGenerator(ctx, s, "ERR", "", fmt.Errorf("fail"))
-		case "_1":
-			return joinValues(ctx, s, "@1", "a", "b", "c")
-		case "_2":
-			return joinValues(ctx, s, "@2", "d", "e", "f")
-		case "_3":
-			return joinValues(ctx, s, "@3", "_1", "_2")
-		case "_4":
-			return joinValues(ctx, s, "@3", "g", "err", "h")
-		default:
-			return logGenerator(ctx, s, name, strings.ToUpper(name), nil)
-		}
+func expectGet(t *testing.T, h *memoize.Handle, g *memoize.Generation, wantV interface{}) {
+	gotV, gotErr := h.Get(context.Background(), g, nil)
+	if gotV != wantV || gotErr != nil {
+		t.Fatalf("Get() = %v, %v, wanted %v, nil", gotV, gotErr, wantV)
 	}
 }
 
-// logGenerator generates a *stringOrError value, while logging to the store's logger.
-func logGenerator(ctx context.Context, s *memoize.Store, name string, v string, err error) *stringOrError {
-	// Get the logger from the context.
-	w := ctx.Value("logger").(io.Writer)
-
-	if err != nil {
-		fmt.Fprintf(w, "error %v = %v\n", name, err)
-	} else {
-		fmt.Fprintf(w, "simple %v = %v\n", name, v)
+func expectGetError(t *testing.T, h *memoize.Handle, g *memoize.Generation, substr string) {
+	gotV, gotErr := h.Get(context.Background(), g, nil)
+	if gotErr == nil || !strings.Contains(gotErr.Error(), substr) {
+		t.Fatalf("Get() = %v, %v, wanted err %q", gotV, gotErr, substr)
 	}
-	return &stringOrError{value: v, err: err}
 }
+func TestGenerations(t *testing.T) {
+	s := &memoize.Store{}
+	// Evaluate key in g1.
+	g1 := s.Generation("g1")
+	h1 := g1.Bind("key", func(context.Context, memoize.Arg) interface{} { return "res" })
+	expectGet(t, h1, g1, "res")
 
-// joinValues binds a list of keys to their values, while logging to the store's logger.
-func joinValues(ctx context.Context, s *memoize.Store, name string, keys ...string) *stringOrError {
-	// Get the logger from the context.
-	w := ctx.Value("logger").(io.Writer)
+	// Get key in g2. It should inherit the value from g1.
+	g2 := s.Generation("g2")
+	h2 := g2.Bind("key", func(context.Context, memoize.Arg) interface{} {
+		t.Fatal("h2 should not need evaluation")
+		return "error"
+	})
+	expectGet(t, h2, g2, "res")
 
-	fmt.Fprintf(w, "start %v\n", name)
-	value := ""
-	for _, key := range keys {
-		i, err := s.Bind(key, generate(s, key)).Get(ctx, nil)
-		if err != nil {
-			return &stringOrError{err: err}
-		}
-		if v := asValue(i); v == nil {
-			value = value + " <nil>"
-		} else if v.err != nil {
-			value = value + " !" + v.err.Error()
-		} else {
-			value = value + " " + v.value
-		}
-	}
-	fmt.Fprintf(w, "end %v = %v\n", name, value)
-	return &stringOrError{value: fmt.Sprintf("%s[%v]", name, value)}
+	// With g1 destroyed, g2 should still work.
+	g1.Destroy()
+	expectGet(t, h2, g2, "res")
+
+	// With all generations destroyed, key should be re-evaluated.
+	g2.Destroy()
+	g3 := s.Generation("g3")
+	h3 := g3.Bind("key", func(context.Context, memoize.Arg) interface{} { return "new res" })
+	expectGet(t, h3, g3, "new res")
 }
commit	c1903db4dbfe5ab8e2ec704e203535dae53c2adc	[log] [tgz]
author	Heschi Kreinick <heschi@google.com>	Fri Jul 24 17:17:13 2020 -0400
committer	Heschi Kreinick <heschi@google.com>	Mon Aug 10 19:02:17 2020 +0000
tree	c14a32f8294f2396cb37e5947287e93c31e74040
parent	74a6bbb3463be9c30a133a80d4e5bbdbd0b6ee2c [diff] [blame]