godoc: fix runaway goroutine use

Previously we would spin up a new goroutine for every directory
we needed to parse. Since parent directories cannot exit until
children have exited, in large GOPATH directories we could have 8000+
goroutines evaluating directories concurrently. The race detector uses
tsan, and tsan can handle a maximum of 8192 concurrent threads at one
time.

Limit the number of concurrent threads. It is difficult to do this
in a way that does not involve a child being blocked on a parent
completing to spin up goroutines for itself to complete. Solve this by
completing work on the main thread if there are no additional workers
available to help complete work.

I was expecting limiting the concurrency to hurt performance, but it
actually significantly improves it. Benchmarks were performed using CL
94904 at tip, and with goroutines at 2, 4, and 8 times the number of
CPU's.

$ benchstat tip.benchmark gated-2.benchmark gated-4.benchmark gated-8.benchmark
name \ time/op    tip.benchmark  gated-2.benchmark  gated-4.benchmark  gated-8.benchmark
NewDirectory-4       293ms ± 2%         262ms ± 4%         252ms ± 4%         253ms ± 2%

name \ alloc/op   tip.benchmark  gated-2.benchmark  gated-4.benchmark  gated-8.benchmark
NewDirectory-4       218MB ± 0%         218MB ± 0%         218MB ± 0%         218MB ± 0%

name \ allocs/op  tip.benchmark  gated-2.benchmark  gated-4.benchmark  gated-8.benchmark
NewDirectory-4        513k ± 0%          508k ± 0%          509k ± 0%          510k ± 0%

Fixes golang/go#22110.

Change-Id: If01f78f1fc53cd195e4f8f6988c3c39b3c275992
Reviewed-on: https://go-review.googlesource.com/94955
Reviewed-by: Yury Smolsky <yury@smolsky.by>
Reviewed-by: Kevin Burke <kev@inburke.com>
diff --git a/godoc/dirtrees.go b/godoc/dirtrees.go
index 2b14a41..fed6ee1 100644
--- a/godoc/dirtrees.go
+++ b/godoc/dirtrees.go
@@ -13,6 +13,7 @@
 	"log"
 	"os"
 	pathpkg "path"
+	"runtime"
 	"strings"
 )
 
@@ -55,7 +56,13 @@
 
 // ioGate is a semaphore controlling VFS activity (ReadDir, parseFile, etc).
 // Send before an operation and receive after.
-var ioGate = make(chan bool, 20)
+var ioGate = make(chan struct{}, 20)
+
+// workGate controls the number of concurrent workers. Too many concurrent
+// workers and performance degrades and the race detector gets overwhelmed. If
+// we cannot check out a concurrent worker, work is performed by the main thread
+// instead of spinning up another goroutine.
+var workGate = make(chan struct{}, runtime.NumCPU()*4)
 
 func (b *treeBuilder) newDirTree(fset *token.FileSet, path, name string, depth int) *Directory {
 	if name == testdataDirName {
@@ -88,7 +95,7 @@
 		}
 	}
 
-	ioGate <- true
+	ioGate <- struct{}{}
 	list, err := b.c.fs.ReadDir(path)
 	<-ioGate
 	if err != nil {
@@ -101,23 +108,34 @@
 
 	// determine number of subdirectories and if there are package files
 	var dirchs []chan *Directory
+	var dirs []*Directory
 
 	for _, d := range list {
 		filename := pathpkg.Join(path, d.Name())
 		switch {
 		case isPkgDir(d):
-			ch := make(chan *Directory, 1)
-			dirchs = append(dirchs, ch)
 			name := d.Name()
-			go func() {
-				ch <- b.newDirTree(fset, filename, name, depth+1)
-			}()
+			select {
+			case workGate <- struct{}{}:
+				ch := make(chan *Directory, 1)
+				dirchs = append(dirchs, ch)
+				go func() {
+					ch <- b.newDirTree(fset, filename, name, depth+1)
+					<-workGate
+				}()
+			default:
+				// no free workers, do work synchronously
+				dir := b.newDirTree(fset, filename, name, depth+1)
+				if dir != nil {
+					dirs = append(dirs, dir)
+				}
+			}
 		case !haveSummary && isPkgFile(d):
 			// looks like a package file, but may just be a file ending in ".go";
 			// don't just count it yet (otherwise we may end up with hasPkgFiles even
 			// though the directory doesn't contain any real package files - was bug)
 			// no "optimal" package synopsis yet; continue to collect synopses
-			ioGate <- true
+			ioGate <- struct{}{}
 			const flags = parser.ParseComments | parser.PackageClauseOnly
 			file, err := b.c.parseFile(fset, filename, flags)
 			<-ioGate
@@ -149,7 +167,6 @@
 	}
 
 	// create subdirectory tree
-	var dirs []*Directory
 	for _, ch := range dirchs {
 		if d := <-ch; d != nil {
 			dirs = append(dirs, d)