| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build linux |
| |
| package syscall_test |
| |
| import ( |
| "bytes" |
| "errors" |
| "flag" |
| "fmt" |
| "internal/testenv" |
| "io" |
| "io/fs" |
| "os" |
| "os/exec" |
| "os/user" |
| "path" |
| "path/filepath" |
| "runtime" |
| "strconv" |
| "strings" |
| "syscall" |
| "testing" |
| "unsafe" |
| ) |
| |
| // isNotSupported reports whether err may indicate that a system call is |
| // not supported by the current platform or execution environment. |
| func isNotSupported(err error) bool { |
| if err == nil { |
| return false |
| } |
| |
| var errno syscall.Errno |
| if errors.As(err, &errno) { |
| switch errno { |
| case syscall.ENOSYS, syscall.ENOTSUP: |
| // Explicitly not supported. |
| return true |
| case syscall.EPERM, syscall.EROFS: |
| // User lacks permission: either the call requires root permission and the |
| // user is not root, or the call is denied by a container security policy. |
| return true |
| case syscall.EINVAL: |
| // Some containers return EINVAL instead of EPERM if a system call is |
| // denied by security policy. |
| return true |
| } |
| } |
| |
| if errors.Is(err, fs.ErrPermission) { |
| return true |
| } |
| |
| // TODO(#41198): Also return true if errors.Is(err, errors.ErrUnsupported). |
| |
| return false |
| } |
| |
| // whoamiNEWUSER returns a command that runs "whoami" with CLONE_NEWUSER, |
| // mapping uid and gid 0 to the actual uid and gid of the test. |
| func whoamiNEWUSER(t *testing.T, uid, gid int, setgroups bool) *exec.Cmd { |
| t.Helper() |
| testenv.MustHaveExecPath(t, "whoami") |
| cmd := testenv.Command(t, "whoami") |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Cloneflags: syscall.CLONE_NEWUSER, |
| UidMappings: []syscall.SysProcIDMap{ |
| {ContainerID: 0, HostID: uid, Size: 1}, |
| }, |
| GidMappings: []syscall.SysProcIDMap{ |
| {ContainerID: 0, HostID: gid, Size: 1}, |
| }, |
| GidMappingsEnableSetgroups: setgroups, |
| } |
| return cmd |
| } |
| |
| func TestCloneNEWUSERAndRemap(t *testing.T) { |
| for _, setgroups := range []bool{false, true} { |
| setgroups := setgroups |
| t.Run(fmt.Sprintf("setgroups=%v", setgroups), func(t *testing.T) { |
| uid := os.Getuid() |
| gid := os.Getgid() |
| |
| cmd := whoamiNEWUSER(t, uid, gid, setgroups) |
| out, err := cmd.CombinedOutput() |
| t.Logf("%v: %v", cmd, err) |
| |
| if uid != 0 && setgroups { |
| t.Logf("as non-root, expected permission error due to unprivileged gid_map") |
| if !os.IsPermission(err) { |
| if err == nil { |
| t.Skipf("unexpected success: probably old kernel without security fix?") |
| } |
| if isNotSupported(err) { |
| t.Skipf("skipping: CLONE_NEWUSER appears to be unsupported") |
| } |
| t.Fatalf("got non-permission error") // Already logged above. |
| } |
| return |
| } |
| |
| if err != nil { |
| if isNotSupported(err) { |
| // May be inside a container that disallows CLONE_NEWUSER. |
| t.Skipf("skipping: CLONE_NEWUSER appears to be unsupported") |
| } |
| t.Fatalf("unexpected command failure; output:\n%s", out) |
| } |
| |
| sout := strings.TrimSpace(string(out)) |
| want := "root" |
| if sout != want { |
| t.Fatalf("whoami = %q; want %q", out, want) |
| } |
| }) |
| } |
| } |
| |
| func TestEmptyCredGroupsDisableSetgroups(t *testing.T) { |
| cmd := whoamiNEWUSER(t, os.Getuid(), os.Getgid(), false) |
| cmd.SysProcAttr.Credential = &syscall.Credential{} |
| if err := cmd.Run(); err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: %v: %v", cmd, err) |
| } |
| t.Fatal(err) |
| } |
| } |
| |
| func TestUnshare(t *testing.T) { |
| path := "/proc/net/dev" |
| if _, err := os.Stat(path); err != nil { |
| if os.IsNotExist(err) { |
| t.Skip("kernel doesn't support proc filesystem") |
| } |
| if os.IsPermission(err) { |
| t.Skip("unable to test proc filesystem due to permissions") |
| } |
| t.Fatal(err) |
| } |
| |
| orig, err := os.ReadFile(path) |
| if err != nil { |
| t.Fatal(err) |
| } |
| origLines := strings.Split(strings.TrimSpace(string(orig)), "\n") |
| |
| cmd := testenv.Command(t, "cat", path) |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Unshareflags: syscall.CLONE_NEWNET, |
| } |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| // CLONE_NEWNET does not appear to be supported. |
| t.Skipf("skipping due to permission error: %v", err) |
| } |
| t.Fatalf("Cmd failed with err %v, output: %s", err, out) |
| } |
| |
| // Check there is only the local network interface |
| sout := strings.TrimSpace(string(out)) |
| if !strings.Contains(sout, "lo:") { |
| t.Fatalf("Expected lo network interface to exist, got %s", sout) |
| } |
| |
| lines := strings.Split(sout, "\n") |
| if len(lines) >= len(origLines) { |
| t.Fatalf("Got %d lines of output, want <%d", len(lines), len(origLines)) |
| } |
| } |
| |
| func TestGroupCleanup(t *testing.T) { |
| testenv.MustHaveExecPath(t, "id") |
| cmd := testenv.Command(t, "id") |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Credential: &syscall.Credential{ |
| Uid: 0, |
| Gid: 0, |
| }, |
| } |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: %v: %v", cmd, err) |
| } |
| t.Fatalf("Cmd failed with err %v, output: %s", err, out) |
| } |
| strOut := strings.TrimSpace(string(out)) |
| t.Logf("id: %s", strOut) |
| |
| expected := "uid=0(root) gid=0(root)" |
| // Just check prefix because some distros reportedly output a |
| // context parameter; see https://golang.org/issue/16224. |
| // Alpine does not output groups; see https://golang.org/issue/19938. |
| if !strings.HasPrefix(strOut, expected) { |
| t.Errorf("expected prefix: %q", expected) |
| } |
| } |
| |
| func TestGroupCleanupUserNamespace(t *testing.T) { |
| testenv.MustHaveExecPath(t, "id") |
| cmd := testenv.Command(t, "id") |
| uid, gid := os.Getuid(), os.Getgid() |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Cloneflags: syscall.CLONE_NEWUSER, |
| Credential: &syscall.Credential{ |
| Uid: uint32(uid), |
| Gid: uint32(gid), |
| }, |
| UidMappings: []syscall.SysProcIDMap{ |
| {ContainerID: 0, HostID: uid, Size: 1}, |
| }, |
| GidMappings: []syscall.SysProcIDMap{ |
| {ContainerID: 0, HostID: gid, Size: 1}, |
| }, |
| } |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: %v: %v", cmd, err) |
| } |
| t.Fatalf("Cmd failed with err %v, output: %s", err, out) |
| } |
| strOut := strings.TrimSpace(string(out)) |
| t.Logf("id: %s", strOut) |
| |
| // As in TestGroupCleanup, just check prefix. |
| // The actual groups and contexts seem to vary from one distro to the next. |
| expected := "uid=0(root) gid=0(root) groups=0(root)" |
| if !strings.HasPrefix(strOut, expected) { |
| t.Errorf("expected prefix: %q", expected) |
| } |
| } |
| |
| // Test for https://go.dev/issue/19661: unshare fails because systemd |
| // has forced / to be shared |
| func TestUnshareMountNameSpace(t *testing.T) { |
| testenv.MustHaveExec(t) |
| |
| if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" { |
| dir := flag.Args()[0] |
| err := syscall.Mount("none", dir, "proc", 0, "") |
| if err != nil { |
| fmt.Fprintf(os.Stderr, "unshare: mount %v failed: %#v", dir, err) |
| os.Exit(2) |
| } |
| os.Exit(0) |
| } |
| |
| exe, err := os.Executable() |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| d := t.TempDir() |
| t.Cleanup(func() { |
| // If the subprocess fails to unshare the parent directory, force-unmount it |
| // so that the test can clean it up. |
| if _, err := os.Stat(d); err == nil { |
| syscall.Unmount(d, syscall.MNT_FORCE) |
| } |
| }) |
| cmd := testenv.Command(t, exe, "-test.run=TestUnshareMountNameSpace", d) |
| cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") |
| cmd.SysProcAttr = &syscall.SysProcAttr{Unshareflags: syscall.CLONE_NEWNS} |
| |
| o, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: could not start process with CLONE_NEWNS: %v", err) |
| } |
| t.Fatalf("unshare failed: %v\n%s", err, o) |
| } |
| |
| // How do we tell if the namespace was really unshared? It turns out |
| // to be simple: just try to remove the directory. If it's still mounted |
| // on the rm will fail with EBUSY. |
| if err := os.Remove(d); err != nil { |
| t.Errorf("rmdir failed on %v: %v", d, err) |
| } |
| } |
| |
| // Test for Issue 20103: unshare fails when chroot is used |
| func TestUnshareMountNameSpaceChroot(t *testing.T) { |
| if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" { |
| dir := flag.Args()[0] |
| err := syscall.Mount("none", dir, "proc", 0, "") |
| if err != nil { |
| fmt.Fprintf(os.Stderr, "unshare: mount %v failed: %#v", dir, err) |
| os.Exit(2) |
| } |
| os.Exit(0) |
| } |
| |
| d := t.TempDir() |
| |
| // Since we are doing a chroot, we need the binary there, |
| // and it must be statically linked. |
| testenv.MustHaveGoBuild(t) |
| x := filepath.Join(d, "syscall.test") |
| t.Cleanup(func() { |
| // If the subprocess fails to unshare the parent directory, force-unmount it |
| // so that the test can clean it up. |
| if _, err := os.Stat(d); err == nil { |
| syscall.Unmount(d, syscall.MNT_FORCE) |
| } |
| }) |
| |
| cmd := testenv.Command(t, testenv.GoToolPath(t), "test", "-c", "-o", x, "syscall") |
| cmd.Env = append(cmd.Environ(), "CGO_ENABLED=0") |
| if o, err := cmd.CombinedOutput(); err != nil { |
| t.Fatalf("Build of syscall in chroot failed, output %v, err %v", o, err) |
| } |
| |
| cmd = testenv.Command(t, "/syscall.test", "-test.run=TestUnshareMountNameSpaceChroot", "/") |
| cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") |
| cmd.SysProcAttr = &syscall.SysProcAttr{Chroot: d, Unshareflags: syscall.CLONE_NEWNS} |
| |
| o, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: could not start process with CLONE_NEWNS and Chroot %q: %v", d, err) |
| } |
| t.Fatalf("unshare failed: %v\n%s", err, o) |
| } |
| |
| // How do we tell if the namespace was really unshared? It turns out |
| // to be simple: just try to remove the executable. If it's still mounted |
| // on, the rm will fail. |
| if err := os.Remove(x); err != nil { |
| t.Errorf("rm failed on %v: %v", x, err) |
| } |
| if err := os.Remove(d); err != nil { |
| t.Errorf("rmdir failed on %v: %v", d, err) |
| } |
| } |
| |
| // Test for Issue 29789: unshare fails when uid/gid mapping is specified |
| func TestUnshareUidGidMapping(t *testing.T) { |
| if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" { |
| defer os.Exit(0) |
| if err := syscall.Chroot(os.TempDir()); err != nil { |
| fmt.Fprintln(os.Stderr, err) |
| os.Exit(2) |
| } |
| } |
| |
| if os.Getuid() == 0 { |
| t.Skip("test exercises unprivileged user namespace, fails with privileges") |
| } |
| |
| testenv.MustHaveExec(t) |
| exe, err := os.Executable() |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| cmd := testenv.Command(t, exe, "-test.run=TestUnshareUidGidMapping") |
| cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Unshareflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUSER, |
| GidMappingsEnableSetgroups: false, |
| UidMappings: []syscall.SysProcIDMap{ |
| { |
| ContainerID: 0, |
| HostID: syscall.Getuid(), |
| Size: 1, |
| }, |
| }, |
| GidMappings: []syscall.SysProcIDMap{ |
| { |
| ContainerID: 0, |
| HostID: syscall.Getgid(), |
| Size: 1, |
| }, |
| }, |
| } |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: could not start process with CLONE_NEWNS and CLONE_NEWUSER: %v", err) |
| } |
| t.Fatalf("Cmd failed with err %v, output: %s", err, out) |
| } |
| } |
| |
| func prepareCgroupFD(t *testing.T) (int, string) { |
| t.Helper() |
| |
| const O_PATH = 0x200000 // Same for all architectures, but for some reason not defined in syscall for 386||amd64. |
| |
| // Requires cgroup v2. |
| const prefix = "/sys/fs/cgroup" |
| selfCg, err := os.ReadFile("/proc/self/cgroup") |
| if err != nil { |
| if os.IsNotExist(err) || os.IsPermission(err) { |
| t.Skip(err) |
| } |
| t.Fatal(err) |
| } |
| |
| // Expect a single line like this: |
| // 0::/user.slice/user-1000.slice/user@1000.service/app.slice/vte-spawn-891992a2-efbb-4f28-aedb-b24f9e706770.scope |
| // Otherwise it's either cgroup v1 or a hybrid hierarchy. |
| if bytes.Count(selfCg, []byte("\n")) > 1 { |
| t.Skip("cgroup v2 not available") |
| } |
| cg := bytes.TrimPrefix(selfCg, []byte("0::")) |
| if len(cg) == len(selfCg) { // No prefix found. |
| t.Skipf("cgroup v2 not available (/proc/self/cgroup contents: %q)", selfCg) |
| } |
| |
| // Need clone3 with CLONE_INTO_CGROUP support. |
| _, err = syscall.ForkExec("non-existent binary", nil, &syscall.ProcAttr{ |
| Sys: &syscall.SysProcAttr{ |
| UseCgroupFD: true, |
| CgroupFD: -1, |
| }, |
| }) |
| if isNotSupported(err) { |
| t.Skipf("clone3 with CLONE_INTO_CGROUP not available: %v", err) |
| } |
| |
| // Need an ability to create a sub-cgroup. |
| subCgroup, err := os.MkdirTemp(prefix+string(bytes.TrimSpace(cg)), "subcg-") |
| if err != nil { |
| // ErrPermission or EROFS (#57262) when running in an unprivileged container. |
| // ErrNotExist when cgroupfs is not mounted in chroot/schroot. |
| if os.IsNotExist(err) || isNotSupported(err) { |
| t.Skipf("skipping: %v", err) |
| } |
| t.Fatal(err) |
| } |
| t.Cleanup(func() { syscall.Rmdir(subCgroup) }) |
| |
| cgroupFD, err := syscall.Open(subCgroup, O_PATH, 0) |
| if err != nil { |
| t.Fatal(&os.PathError{Op: "open", Path: subCgroup, Err: err}) |
| } |
| t.Cleanup(func() { syscall.Close(cgroupFD) }) |
| |
| return cgroupFD, "/" + path.Base(subCgroup) |
| } |
| |
| func TestUseCgroupFD(t *testing.T) { |
| testenv.MustHaveExec(t) |
| exe, err := os.Executable() |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| fd, suffix := prepareCgroupFD(t) |
| |
| cmd := testenv.Command(t, exe, "-test.run=TestUseCgroupFDHelper") |
| cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| UseCgroupFD: true, |
| CgroupFD: fd, |
| } |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| t.Fatalf("Cmd failed with err %v, output: %s", err, out) |
| } |
| // NB: this wouldn't work with cgroupns. |
| if !bytes.HasSuffix(bytes.TrimSpace(out), []byte(suffix)) { |
| t.Fatalf("got: %q, want: a line that ends with %q", out, suffix) |
| } |
| } |
| |
| func TestUseCgroupFDHelper(*testing.T) { |
| if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { |
| return |
| } |
| defer os.Exit(0) |
| // Read and print own cgroup path. |
| selfCg, err := os.ReadFile("/proc/self/cgroup") |
| if err != nil { |
| fmt.Fprintln(os.Stderr, err) |
| os.Exit(2) |
| } |
| fmt.Print(string(selfCg)) |
| } |
| |
| type capHeader struct { |
| version uint32 |
| pid int32 |
| } |
| |
| type capData struct { |
| effective uint32 |
| permitted uint32 |
| inheritable uint32 |
| } |
| |
| const CAP_SYS_TIME = 25 |
| const CAP_SYSLOG = 34 |
| |
| type caps struct { |
| hdr capHeader |
| data [2]capData |
| } |
| |
| func getCaps() (caps, error) { |
| var c caps |
| |
| // Get capability version |
| if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(nil)), 0); errno != 0 { |
| return c, fmt.Errorf("SYS_CAPGET: %v", errno) |
| } |
| |
| // Get current capabilities |
| if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(&c.data[0])), 0); errno != 0 { |
| return c, fmt.Errorf("SYS_CAPGET: %v", errno) |
| } |
| |
| return c, nil |
| } |
| |
| func TestAmbientCaps(t *testing.T) { |
| testAmbientCaps(t, false) |
| } |
| |
| func TestAmbientCapsUserns(t *testing.T) { |
| testAmbientCaps(t, true) |
| } |
| |
| func testAmbientCaps(t *testing.T, userns bool) { |
| if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" { |
| caps, err := getCaps() |
| if err != nil { |
| fmt.Fprintln(os.Stderr, err) |
| os.Exit(2) |
| } |
| if caps.data[0].effective&(1<<uint(CAP_SYS_TIME)) == 0 { |
| fmt.Fprintln(os.Stderr, "CAP_SYS_TIME unexpectedly not in the effective capability mask") |
| os.Exit(2) |
| } |
| if caps.data[1].effective&(1<<uint(CAP_SYSLOG&31)) == 0 { |
| fmt.Fprintln(os.Stderr, "CAP_SYSLOG unexpectedly not in the effective capability mask") |
| os.Exit(2) |
| } |
| os.Exit(0) |
| } |
| |
| // skip on android, due to lack of lookup support |
| if runtime.GOOS == "android" { |
| t.Skip("skipping test on android; see Issue 27327") |
| } |
| |
| u, err := user.Lookup("nobody") |
| if err != nil { |
| t.Fatal(err) |
| } |
| uid, err := strconv.ParseInt(u.Uid, 0, 32) |
| if err != nil { |
| t.Fatal(err) |
| } |
| gid, err := strconv.ParseInt(u.Gid, 0, 32) |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| // Copy the test binary to a temporary location which is readable by nobody. |
| f, err := os.CreateTemp("", "gotest") |
| if err != nil { |
| t.Fatal(err) |
| } |
| t.Cleanup(func() { |
| f.Close() |
| os.Remove(f.Name()) |
| }) |
| |
| testenv.MustHaveExec(t) |
| exe, err := os.Executable() |
| if err != nil { |
| t.Fatal(err) |
| } |
| |
| e, err := os.Open(exe) |
| if err != nil { |
| t.Fatal(err) |
| } |
| defer e.Close() |
| if _, err := io.Copy(f, e); err != nil { |
| t.Fatal(err) |
| } |
| if err := f.Chmod(0755); err != nil { |
| t.Fatal(err) |
| } |
| if err := f.Close(); err != nil { |
| t.Fatal(err) |
| } |
| |
| cmd := testenv.Command(t, f.Name(), "-test.run="+t.Name()) |
| cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1") |
| cmd.Stdout = os.Stdout |
| cmd.Stderr = os.Stderr |
| cmd.SysProcAttr = &syscall.SysProcAttr{ |
| Credential: &syscall.Credential{ |
| Uid: uint32(uid), |
| Gid: uint32(gid), |
| }, |
| AmbientCaps: []uintptr{CAP_SYS_TIME, CAP_SYSLOG}, |
| } |
| if userns { |
| cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER |
| const nobody = 65534 |
| uid := os.Getuid() |
| gid := os.Getgid() |
| cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ |
| ContainerID: int(nobody), |
| HostID: int(uid), |
| Size: int(1), |
| }} |
| cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ |
| ContainerID: int(nobody), |
| HostID: int(gid), |
| Size: int(1), |
| }} |
| |
| // Set credentials to run as user and group nobody. |
| cmd.SysProcAttr.Credential = &syscall.Credential{ |
| Uid: nobody, |
| Gid: nobody, |
| } |
| } |
| if err := cmd.Run(); err != nil { |
| if isNotSupported(err) { |
| t.Skipf("skipping: %v: %v", cmd, err) |
| } |
| t.Fatal(err.Error()) |
| } |
| } |