syscall: improve TestSetuidEtc() /proc/ parsing against races TestSetuidEtc() was failing sporadically on linux-ppc64. From the three https://build.golang.org/ logs, it looked like the logged errors could be associated with threads dying, but proc reads were, in some way, racing with their demise. Exploring ways to increase thread demise, revealed that races of this type can happen on non-ppc64 systems, and that os.IsNotExist(err) was not a sufficient error condition test for a thread's status file disappearing. This change includes a fix for that to. The actual issue on linux-ppc64 appears to be tied to PID reaping and reuse latency on whatever the build test environment is for linux-ppc64-buildlet. I suspect this can happen on any linux system, however, especially where the container has a limited PID range. The fix for this, limited to the test (the runtime syscall support is unchanged), is to confirm that the Pid for the interrogated thread's /proc/<TID>/status file confirms that it is still associated with the test-process' PID. linux-ppc64-buildlet: go/bin/go test syscall -run=TestSetuidEtc -count=10000 ok syscall 104.285s Fixes #42462 Change-Id: I55c84ab8361003570a405fa52ffec4949bf91113 Reviewed-on: https://go-review.googlesource.com/c/go/+/268717 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Trust: Tobias Klauser <tobias.klauser@gmail.com>

commit: f2e58c6d4239f27db284dfe442fa62bb3c0c5b23 [log] [tgz]
author: Andrew G. Morgan <agm@google.com> Mon Nov 09 19:28:04 2020 -0800
committer: Ian Lance Taylor <iant@golang.org> Wed Nov 11 20:49:53 2020 +0000
tree: d1cfc9fe7d8db9b92c0af870b34cd5db685f878f
parent: 4c174a7ba66724f8f9a1915c8f4868a8b3aaf219 [diff]
diff --git a/misc/cgo/test/issue1435.go b/misc/cgo/test/issue1435.go
index 155d33b..a1c7cac 100644
--- a/misc/cgo/test/issue1435.go
+++ b/misc/cgo/test/issue1435.go

@@ -62,28 +62,60 @@
 // compareStatus is used to confirm the contents of the thread
 // specific status files match expectations.
 func compareStatus(filter, expect string) error {
-	expected := filter + "\t" + expect
+	expected := filter + expect
 	pid := syscall.Getpid()
 	fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid))
 	if err != nil {
 		return fmt.Errorf("unable to find %d tasks: %v", pid, err)
 	}
+	expectedProc := fmt.Sprintf("Pid:\t%d", pid)
+	foundAThread := false
 	for _, f := range fs {
 		tf := fmt.Sprintf("/proc/%s/status", f.Name())
 		d, err := ioutil.ReadFile(tf)
 		if err != nil {
-			return fmt.Errorf("unable to read %q: %v", tf, err)
+			// There are a surprising number of ways this
+			// can error out on linux.  We've seen all of
+			// the following, so treat any error here as
+			// equivalent to the "process is gone":
+			//    os.IsNotExist(err),
+			//    "... : no such process",
+			//    "... : bad file descriptor.
+			continue
 		}
 		lines := strings.Split(string(d), "\n")
 		for _, line := range lines {
+			// Different kernel vintages pad differently.
+			line = strings.TrimSpace(line)
+			if strings.HasPrefix(line, "Pid:\t") {
+				// On loaded systems, it is possible
+				// for a TID to be reused really
+				// quickly. As such, we need to
+				// validate that the thread status
+				// info we just read is a task of the
+				// same process PID as we are
+				// currently running, and not a
+				// recently terminated thread
+				// resurfaced in a different process.
+				if line != expectedProc {
+					break
+				}
+				// Fall through in the unlikely case
+				// that filter at some point is
+				// "Pid:\t".
+			}
 			if strings.HasPrefix(line, filter) {
 				if line != expected {
-					return fmt.Errorf("%s %s (bad)\n", tf, line)
+					return fmt.Errorf("%q got:%q want:%q (bad) [pid=%d file:'%s' %v]\n", tf, line, expected, pid, string(d), expectedProc)
 				}
+				foundAThread = true
 				break
 			}
 		}
 	}
+	if !foundAThread {
+		return fmt.Errorf("found no thread /proc/<TID>/status files for process %q", expectedProc)
+	}
 	return nil
 }
 
@@ -110,34 +142,34 @@
 		fn             func() error
 		filter, expect string
 	}{
-		{call: "Setegid(1)", fn: func() error { return syscall.Setegid(1) }, filter: "Gid:", expect: "0\t1\t0\t1"},
-		{call: "Setegid(0)", fn: func() error { return syscall.Setegid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+		{call: "Setegid(1)", fn: func() error { return syscall.Setegid(1) }, filter: "Gid:", expect: "\t0\t1\t0\t1"},
+		{call: "Setegid(0)", fn: func() error { return syscall.Setegid(0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Seteuid(1)", fn: func() error { return syscall.Seteuid(1) }, filter: "Uid:", expect: "0\t1\t0\t1"},
-		{call: "Setuid(0)", fn: func() error { return syscall.Setuid(0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+		{call: "Seteuid(1)", fn: func() error { return syscall.Seteuid(1) }, filter: "Uid:", expect: "\t0\t1\t0\t1"},
+		{call: "Setuid(0)", fn: func() error { return syscall.Setuid(0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Setgid(1)", fn: func() error { return syscall.Setgid(1) }, filter: "Gid:", expect: "1\t1\t1\t1"},
-		{call: "Setgid(0)", fn: func() error { return syscall.Setgid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+		{call: "Setgid(1)", fn: func() error { return syscall.Setgid(1) }, filter: "Gid:", expect: "\t1\t1\t1\t1"},
+		{call: "Setgid(0)", fn: func() error { return syscall.Setgid(0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Setgroups([]int{0,1,2,3})", fn: func() error { return syscall.Setgroups([]int{0, 1, 2, 3}) }, filter: "Groups:", expect: "0 1 2 3 "},
-		{call: "Setgroups(nil)", fn: func() error { return syscall.Setgroups(nil) }, filter: "Groups:", expect: " "},
-		{call: "Setgroups([]int{0})", fn: func() error { return syscall.Setgroups([]int{0}) }, filter: "Groups:", expect: "0 "},
+		{call: "Setgroups([]int{0,1,2,3})", fn: func() error { return syscall.Setgroups([]int{0, 1, 2, 3}) }, filter: "Groups:", expect: "\t0 1 2 3"},
+		{call: "Setgroups(nil)", fn: func() error { return syscall.Setgroups(nil) }, filter: "Groups:", expect: ""},
+		{call: "Setgroups([]int{0})", fn: func() error { return syscall.Setgroups([]int{0}) }, filter: "Groups:", expect: "\t0"},
 
-		{call: "Setregid(101,0)", fn: func() error { return syscall.Setregid(101, 0) }, filter: "Gid:", expect: "101\t0\t0\t0"},
-		{call: "Setregid(0,102)", fn: func() error { return syscall.Setregid(0, 102) }, filter: "Gid:", expect: "0\t102\t102\t102"},
-		{call: "Setregid(0,0)", fn: func() error { return syscall.Setregid(0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+		{call: "Setregid(101,0)", fn: func() error { return syscall.Setregid(101, 0) }, filter: "Gid:", expect: "\t101\t0\t0\t0"},
+		{call: "Setregid(0,102)", fn: func() error { return syscall.Setregid(0, 102) }, filter: "Gid:", expect: "\t0\t102\t102\t102"},
+		{call: "Setregid(0,0)", fn: func() error { return syscall.Setregid(0, 0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Setreuid(1,0)", fn: func() error { return syscall.Setreuid(1, 0) }, filter: "Uid:", expect: "1\t0\t0\t0"},
-		{call: "Setreuid(0,2)", fn: func() error { return syscall.Setreuid(0, 2) }, filter: "Uid:", expect: "0\t2\t2\t2"},
-		{call: "Setreuid(0,0)", fn: func() error { return syscall.Setreuid(0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+		{call: "Setreuid(1,0)", fn: func() error { return syscall.Setreuid(1, 0) }, filter: "Uid:", expect: "\t1\t0\t0\t0"},
+		{call: "Setreuid(0,2)", fn: func() error { return syscall.Setreuid(0, 2) }, filter: "Uid:", expect: "\t0\t2\t2\t2"},
+		{call: "Setreuid(0,0)", fn: func() error { return syscall.Setreuid(0, 0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Setresgid(101,0,102)", fn: func() error { return syscall.Setresgid(101, 0, 102) }, filter: "Gid:", expect: "101\t0\t102\t0"},
-		{call: "Setresgid(0,102,101)", fn: func() error { return syscall.Setresgid(0, 102, 101) }, filter: "Gid:", expect: "0\t102\t101\t102"},
-		{call: "Setresgid(0,0,0)", fn: func() error { return syscall.Setresgid(0, 0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+		{call: "Setresgid(101,0,102)", fn: func() error { return syscall.Setresgid(101, 0, 102) }, filter: "Gid:", expect: "\t101\t0\t102\t0"},
+		{call: "Setresgid(0,102,101)", fn: func() error { return syscall.Setresgid(0, 102, 101) }, filter: "Gid:", expect: "\t0\t102\t101\t102"},
+		{call: "Setresgid(0,0,0)", fn: func() error { return syscall.Setresgid(0, 0, 0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"},
 
-		{call: "Setresuid(1,0,2)", fn: func() error { return syscall.Setresuid(1, 0, 2) }, filter: "Uid:", expect: "1\t0\t2\t0"},
-		{call: "Setresuid(0,2,1)", fn: func() error { return syscall.Setresuid(0, 2, 1) }, filter: "Uid:", expect: "0\t2\t1\t2"},
-		{call: "Setresuid(0,0,0)", fn: func() error { return syscall.Setresuid(0, 0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+		{call: "Setresuid(1,0,2)", fn: func() error { return syscall.Setresuid(1, 0, 2) }, filter: "Uid:", expect: "\t1\t0\t2\t0"},
+		{call: "Setresuid(0,2,1)", fn: func() error { return syscall.Setresuid(0, 2, 1) }, filter: "Uid:", expect: "\t0\t2\t1\t2"},
+		{call: "Setresuid(0,0,0)", fn: func() error { return syscall.Setresuid(0, 0, 0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"},
 	}
 
 	for i, v := range vs {

diff --git a/src/syscall/syscall_linux_test.go b/src/syscall/syscall_linux_test.go
index 41ae8cc..9276432 100644
--- a/src/syscall/syscall_linux_test.go
+++ b/src/syscall/syscall_linux_test.go

@@ -547,30 +547,54 @@
 	if err != nil {
 		return fmt.Errorf("unable to find %d tasks: %v", pid, err)
 	}
+	expectedProc := fmt.Sprintf("Pid:\t%d", pid)
+	foundAThread := false
 	for _, f := range fs {
 		tf := fmt.Sprintf("/proc/%s/status", f.Name())
 		d, err := ioutil.ReadFile(tf)
-		if os.IsNotExist(err) {
-			// We are racing against threads dying, which
-			// is out of our control, so ignore the
-			// missing file and skip to the next one.
-			continue
-		}
 		if err != nil {
-			return fmt.Errorf("unable to read %q: %v", tf, err)
+			// There are a surprising number of ways this
+			// can error out on linux.  We've seen all of
+			// the following, so treat any error here as
+			// equivalent to the "process is gone":
+			//    os.IsNotExist(err),
+			//    "... : no such process",
+			//    "... : bad file descriptor.
+			continue
 		}
 		lines := strings.Split(string(d), "\n")
 		for _, line := range lines {
 			// Different kernel vintages pad differently.
 			line = strings.TrimSpace(line)
+			if strings.HasPrefix(line, "Pid:\t") {
+				// On loaded systems, it is possible
+				// for a TID to be reused really
+				// quickly. As such, we need to
+				// validate that the thread status
+				// info we just read is a task of the
+				// same process PID as we are
+				// currently running, and not a
+				// recently terminated thread
+				// resurfaced in a different process.
+				if line != expectedProc {
+					break
+				}
+				// Fall through in the unlikely case
+				// that filter at some point is
+				// "Pid:\t".
+			}
 			if strings.HasPrefix(line, filter) {
 				if line != expected {
-					return fmt.Errorf("%q got:%q want:%q (bad)\n", tf, line, expected)
+					return fmt.Errorf("%q got:%q want:%q (bad) [pid=%d file:'%s' %v]\n", tf, line, expected, pid, string(d), expectedProc)
 				}
+				foundAThread = true
 				break
 			}
 		}
 	}
+	if !foundAThread {
+		return fmt.Errorf("found no thread /proc/<TID>/status files for process %q", expectedProc)
+	}
 	return nil
 }
commit	f2e58c6d4239f27db284dfe442fa62bb3c0c5b23	[log] [tgz]
author	Andrew G. Morgan <agm@google.com>	Mon Nov 09 19:28:04 2020 -0800
committer	Ian Lance Taylor <iant@golang.org>	Wed Nov 11 20:49:53 2020 +0000
tree	d1cfc9fe7d8db9b92c0af870b34cd5db685f878f
parent	4c174a7ba66724f8f9a1915c8f4868a8b3aaf219 [diff]