cmd/go/internal/test: scale the WaitDelay for test subprocesses Prior to CL 456116 we had an arbitrary 5-second delay after a test times out before we kill the test. In CL 456116, I reused that arbitrary 5-second delay as the WaitDelay as well, but on slower builders it does not seem to be generous enough. Instead of hard-coding the delay, for tests with a finite timout we now use a hard-coded fraction of the overall timeout. That will probably give delays that are longer than strictly necessary for very long timeouts, but if the user is willing to wait for a very long timeout they can probably wait a little longer for I/O too. Fixes #58230. Updates #24050. Change-Id: Ifbf3e576c034c721aa00cd17bf88563474b09955 Reviewed-on: https://go-review.googlesource.com/c/go/+/464555 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com> Run-TryBot: Bryan Mills <bcmills@google.com> Auto-Submit: Bryan Mills <bcmills@google.com>

commit: 88a36c9e9a511ec6ad218633bce1e82f25e54d35 [log] [tgz]
author: Bryan C. Mills <bcmills@google.com> Wed Feb 01 16:44:21 2023 -0500
committer: Gopher Robot <gobot@golang.org> Thu Feb 02 20:23:27 2023 +0000
tree: f04169d114d8d943ea595b0ae0d261845aa0e181
parent: fc86770d87360ddf0346bf407a80935fe2dddec8 [diff]
diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go
index 48760ba..2500461 100644
--- a/src/cmd/go/internal/test/test.go
+++ b/src/cmd/go/internal/test/test.go

@@ -580,6 +580,7 @@
 	testHelp bool // -help option passed to test via -args
 
 	testKillTimeout = 100 * 365 * 24 * time.Hour // backup alarm; defaults to about a century if no timeout is set
+	testWaitDelay   time.Duration                // how long to wait for output to close after a test binary exits; zero means unlimited
 	testCacheExpire time.Time                    // ignore cached test results before this time
 
 	testBlockProfile, testCPUProfile, testMemProfile, testMutexProfile, testTrace string // profiling flag that limits test to one package
@@ -754,7 +755,35 @@
 	// Don't set this if fuzzing, since it should be able to run
 	// indefinitely.
 	if testTimeout > 0 && testFuzz == "" {
-		testKillTimeout = testTimeout + 1*time.Minute
+		// The WaitDelay for the test process depends on both the OS I/O and
+		// scheduling overhead and the amount of I/O generated by the test just
+		// before it exits. We set the minimum at 5 seconds to account for the OS
+		// overhead, and scale it up from there proportional to the overall test
+		// timeout on the assumption that the time to write and read a goroutine
+		// dump from a timed-out test process scales roughly with the overall
+		// running time of the test.
+		//
+		// This is probably too generous when the timeout is very long, but it seems
+		// better to hard-code a scale factor than to hard-code a constant delay.
+		if wd := testTimeout / 10; wd < 5*time.Second {
+			testWaitDelay = 5 * time.Second
+		} else {
+			testWaitDelay = wd
+		}
+
+		// We expect the test binary to terminate itself (and dump stacks) after
+		// exactly testTimeout. We give it up to one WaitDelay or one minute,
+		// whichever is longer, to finish dumping stacks before we send it an
+		// external signal: if the process has a lot of goroutines, dumping stacks
+		// after the timeout can take a while.
+		//
+		// After the signal is delivered, the test process may have up to one
+		// additional WaitDelay to finish writing its output streams.
+		if testWaitDelay < 1*time.Minute {
+			testKillTimeout = testTimeout + 1*time.Minute
+		} else {
+			testKillTimeout = testTimeout + testWaitDelay
+		}
 	}
 
 	// Read testcache expiration time, if present.
@@ -1336,8 +1365,7 @@
 		}
 		return err
 	}
-	// Give the test five seconds to exit after the signal before resorting to Kill.
-	cmd.WaitDelay = 5 * time.Second
+	cmd.WaitDelay = testWaitDelay
 
 	base.StartSigHandlers()
 	t0 := time.Now()
@@ -1375,6 +1403,8 @@
 			fmt.Fprintf(cmd.Stdout, "*** Test killed with %v: ran too long (%v).\n", base.SignalTrace, testKillTimeout)
 		} else if cancelKilled {
 			fmt.Fprintf(cmd.Stdout, "*** Test killed: ran too long (%v).\n", testKillTimeout)
+		} else if errors.Is(err, exec.ErrWaitDelay) {
+			fmt.Fprintf(cmd.Stdout, "*** Test I/O incomplete %v after exiting.\n", cmd.WaitDelay)
 		}
 		var ee *exec.ExitError
 		if len(out) == 0 || !errors.As(err, &ee) || !ee.Exited() {

diff --git a/src/cmd/go/testdata/script/test_timeout_stdin.txt b/src/cmd/go/testdata/script/test_timeout_stdin.txt
index 606ffa7..f2de0a6 100644
--- a/src/cmd/go/testdata/script/test_timeout_stdin.txt
+++ b/src/cmd/go/testdata/script/test_timeout_stdin.txt

@@ -1,7 +1,7 @@
 # Regression test for https://go.dev/issue/24050:
 # a test that exits with an I/O stream held open
 # should fail after a reasonable delay, not wait forever.
-# (As of the time of writing, that delay is 5 seconds,
+# (As of the time of writing, that delay is 10% of the timeout,
 # but this test does not depend on its specific value.)
 
 [short] skip 'runs a test that hangs until its WaitDelay expires'
@@ -18,7 +18,7 @@
 	# exit due to a failed write to that pipe.
 
 stdout '^--- PASS: TestOrphanCmd .*\nPASS\nstdin closed'
-stdout '^exec: WaitDelay expired before I/O complete\nFAIL\s+example\s+\d+(\.\d+)?s'
+stdout '^\*\*\* Test I/O incomplete \d+.* after exiting\.\nexec: WaitDelay expired before I/O complete\nFAIL\s+example\s+\d+(\.\d+)?s'
 
 -- go.mod --
 module example
commit	88a36c9e9a511ec6ad218633bce1e82f25e54d35	[log] [tgz]
author	Bryan C. Mills <bcmills@google.com>	Wed Feb 01 16:44:21 2023 -0500
committer	Gopher Robot <gobot@golang.org>	Thu Feb 02 20:23:27 2023 +0000
tree	f04169d114d8d943ea595b0ae0d261845aa0e181
parent	fc86770d87360ddf0346bf407a80935fe2dddec8 [diff]