go/packages: deduplicate file parsing

When loading packages from source, many files are being parsed
repeatedly, for example due to test variants. While the median number of
times a file gets parsed is 2, it is significantly higher (up to 28
times) when parsing the standard library, because of test variant
shenanigans.

By caching file contents and their parsed representations we can cut
down on processing time and garbage produced. When loading individual
packages or 3rd party projects, the effect is rather small. However when
loading the entire standard library, the effect is substantial.

name       old time/op    new time/op    delta
Jaeger-8      2.95s ± 7%     2.84s ± 8%     ~     (p=0.089 n=10+10)
Std-8         4.96s ± 7%     4.23s ± 3%  -14.62%  (p=0.000 n=9+9)
Strconv-8     892ms ±34%     877ms ±21%     ~     (p=0.853 n=10+10)

name       old alloc/op   new alloc/op   delta
Jaeger-8     1.22GB ± 0%    1.21GB ± 0%   -0.84%  (p=0.000 n=10+10)
Std-8        2.57GB ± 0%    2.20GB ± 0%  -14.61%  (p=0.000 n=10+8)
Strconv-8     201MB ± 1%     200MB ± 1%     ~     (p=0.105 n=10+10)

name       old allocs/op  new allocs/op  delta
Jaeger-8      12.7M ± 0%     12.4M ± 0%   -1.82%  (p=0.000 n=9+10)
Std-8         26.4M ± 0%     17.3M ± 0%  -34.62%  (p=0.000 n=9+9)
Strconv-8     1.94M ± 0%     1.91M ± 0%   -1.50%  (p=0.000 n=10+10)

When loading std, peak RSS decreases from 1.96 GB to 1.57 GB.

While we're here, we simplify our ParseFile implementation. The contract
of ParseFile specifies that implementations must use src for parsing,
and use filename only for display purposes. As such, we mustn't ever
call it with a nil src, making the check for a nil src in our own
implementation superfluous.

Change-Id: I33daac20fc52ccdb3187a336633f712d01b71d86
Reviewed-on: https://go-review.googlesource.com/c/tools/+/171377
Run-TryBot: Ian Cottrell <iancottrell@google.com>
Run-TryBot: Michael Matloob <matloob@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
diff --git a/go/packages/packages.go b/go/packages/packages.go
index 4639fcd..eedd43b 100644
--- a/go/packages/packages.go
+++ b/go/packages/packages.go
@@ -418,8 +418,10 @@
 type loader struct {
 	pkgs map[string]*loaderPackage
 	Config
-	sizes    types.Sizes
-	exportMu sync.Mutex // enforces mutual exclusion of exportdata operations
+	sizes        types.Sizes
+	parseCache   map[string]*parseValue
+	parseCacheMu sync.Mutex
+	exportMu     sync.Mutex // enforces mutual exclusion of exportdata operations
 
 	// TODO(matloob): Add an implied mode here and use that instead of mode.
 	// Implied mode would contain all the fields we need the data for so we can
@@ -428,8 +430,16 @@
 	// where we need certain modes right.
 }
 
+type parseValue struct {
+	f     *ast.File
+	err   error
+	ready chan struct{}
+}
+
 func newLoader(cfg *Config) *loader {
-	ld := &loader{}
+	ld := &loader{
+		parseCache: map[string]*parseValue{},
+	}
 	if cfg != nil {
 		ld.Config = *cfg
 	}
@@ -457,12 +467,8 @@
 		// because we load source if export data is missing.
 		if ld.ParseFile == nil {
 			ld.ParseFile = func(fset *token.FileSet, filename string, src []byte) (*ast.File, error) {
-				var isrc interface{}
-				if src != nil {
-					isrc = src
-				}
 				const mode = parser.AllErrors | parser.ParseComments
-				return parser.ParseFile(fset, filename, isrc, mode)
+				return parser.ParseFile(fset, filename, src, mode)
 			}
 		}
 	}
@@ -864,6 +870,42 @@
 // the number of parallel I/O calls per process.
 var ioLimit = make(chan bool, 20)
 
+func (ld *loader) parseFile(filename string) (*ast.File, error) {
+	ld.parseCacheMu.Lock()
+	v, ok := ld.parseCache[filename]
+	if ok {
+		// cache hit
+		ld.parseCacheMu.Unlock()
+		<-v.ready
+	} else {
+		// cache miss
+		v = &parseValue{ready: make(chan struct{})}
+		ld.parseCache[filename] = v
+		ld.parseCacheMu.Unlock()
+
+		var src []byte
+		for f, contents := range ld.Config.Overlay {
+			if sameFile(f, filename) {
+				src = contents
+			}
+		}
+		var err error
+		if src == nil {
+			ioLimit <- true // wait
+			src, err = ioutil.ReadFile(filename)
+			<-ioLimit // signal
+		}
+		if err != nil {
+			v.err = err
+		} else {
+			v.f, v.err = ld.ParseFile(ld.Fset, filename, src)
+		}
+
+		close(v.ready)
+	}
+	return v.f, v.err
+}
+
 // parseFiles reads and parses the Go source files and returns the ASTs
 // of the ones that could be at least partially parsed, along with a
 // list of I/O and parse errors encountered.
@@ -884,24 +926,7 @@
 		}
 		wg.Add(1)
 		go func(i int, filename string) {
-			ioLimit <- true // wait
-			// ParseFile may return both an AST and an error.
-			var src []byte
-			for f, contents := range ld.Config.Overlay {
-				if sameFile(f, filename) {
-					src = contents
-				}
-			}
-			var err error
-			if src == nil {
-				src, err = ioutil.ReadFile(filename)
-			}
-			if err != nil {
-				parsed[i], errors[i] = nil, err
-			} else {
-				parsed[i], errors[i] = ld.ParseFile(ld.Fset, filename, src)
-			}
-			<-ioLimit // signal
+			parsed[i], errors[i] = ld.parseFile(filename)
 			wg.Done()
 		}(i, file)
 	}