From d1b1145cace8b968307f9311ff611e4bb810710c Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Mon, 9 Dec 2019 21:50:16 -0800 Subject: syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke Trust: Ian Lance Taylor Trust: Michael Pratt Run-TryBot: Emmanuel Odeke Reviewed-by: Michael Pratt Reviewed-by: Austin Clements --- src/syscall/syscall_linux_amd64.go | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src/syscall/syscall_linux_amd64.go') diff --git a/src/syscall/syscall_linux_amd64.go b/src/syscall/syscall_linux_amd64.go index bf340d9996..5518f44a07 100644 --- a/src/syscall/syscall_linux_amd64.go +++ b/src/syscall/syscall_linux_amd64.go @@ -30,11 +30,7 @@ const _SYS_setgroups = SYS_SETGROUPS //sys sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) //sys Setfsgid(gid int) (err error) //sys Setfsuid(uid int) (err error) -//sysnb Setregid(rgid int, egid int) (err error) -//sysnb Setresgid(rgid int, egid int, sgid int) (err error) -//sysnb Setresuid(ruid int, euid int, suid int) (err error) //sysnb Setrlimit(resource int, rlim *Rlimit) (err error) -//sysnb Setreuid(ruid int, euid int) (err error) //sys Shutdown(fd int, how int) (err error) //sys Splice(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int64, err error) //sys Statfs(path string, buf *Statfs_t) (err error) @@ -47,7 +43,6 @@ const _SYS_setgroups = SYS_SETGROUPS //sys connect(s int, addr unsafe.Pointer, addrlen _Socklen) (err error) //sys fstatat(fd int, path string, stat *Stat_t, flags int) (err error) = SYS_NEWFSTATAT //sysnb getgroups(n int, list *_Gid_t) (nn int, err error) -//sysnb setgroups(n int, list *_Gid_t) (err error) //sys getsockopt(s int, level int, name int, val unsafe.Pointer, vallen *_Socklen) (err error) //sys setsockopt(s int, level int, name int, val unsafe.Pointer, vallen uintptr) (err error) //sysnb socket(domain int, typ int, proto int) (fd int, err error) -- cgit v1.3 From 3a819e8998af1db3bdd34eb2ab059a3c534c6def Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Wed, 28 Oct 2020 13:35:57 -0700 Subject: syscall: handle undefined r2 value on linux-ppc64x This change fixes two failng tests on linux-ppc64x: - TestAllThreadsSyscall() exposed a real bug in the ppc64x support: - It turns out that the r2 syscall return value is not defined on all architectures. Notably linux-ppc64x so address that by introducing a private architectural constant in the syscall package, archHonorsR2: true if r2 has a determanistic value. - TestSetuidEtc() was sensitive to /proc//status content: - The amount of padding space has changed with kernel vintage. - Stress testing revealed a race with /proc files disappearing. Fixes #42178 Change-Id: Ie6fc0b8f2f94a409ac0e5756e73bfce113274709 Reviewed-on: https://go-review.googlesource.com/c/go/+/266202 Run-TryBot: Ian Lance Taylor Reviewed-by: Emmanuel Odeke Reviewed-by: Ian Lance Taylor TryBot-Result: Go Bot --- src/syscall/syscall_linux.go | 8 +++-- src/syscall/syscall_linux_386.go | 6 ++++ src/syscall/syscall_linux_amd64.go | 6 ++++ src/syscall/syscall_linux_arm.go | 6 ++++ src/syscall/syscall_linux_arm64.go | 6 ++++ src/syscall/syscall_linux_mips64x.go | 6 ++++ src/syscall/syscall_linux_mipsx.go | 6 ++++ src/syscall/syscall_linux_ppc64x.go | 6 ++++ src/syscall/syscall_linux_riscv64.go | 6 ++++ src/syscall/syscall_linux_s390x.go | 6 ++++ src/syscall/syscall_linux_test.go | 60 +++++++++++++++++++----------------- 11 files changed, 91 insertions(+), 31 deletions(-) (limited to 'src/syscall/syscall_linux_amd64.go') diff --git a/src/syscall/syscall_linux.go b/src/syscall/syscall_linux.go index 54e5cfc2f2..3041f6f8fc 100644 --- a/src/syscall/syscall_linux.go +++ b/src/syscall/syscall_linux.go @@ -1003,7 +1003,9 @@ func (pc *allThreadsCaller) doSyscall(initial bool) bool { pc.r1 = r1 pc.r2 = r2 pc.err = err - } else if pc.r1 != r1 || pc.r2 != r2 || pc.err != err { + } else if pc.r1 != r1 || (archHonorsR2 && pc.r2 != r2) || pc.err != err { + print("trap:", pc.trap, ", a123=[", pc.a1, ",", pc.a2, ",", pc.a3, "]\n") + print("results: got {r1=", r1, ",r2=", r2, ",err=", err, "}, want {r1=", pc.r1, ",r2=", pc.r2, ",r3=", pc.err, "}\n") panic("AllThreadsSyscall results differ between threads; runtime corrupted") } return err == 0 @@ -1019,7 +1021,9 @@ func (pc *allThreadsCaller) doSyscall6(initial bool) bool { pc.r1 = r1 pc.r2 = r2 pc.err = err - } else if pc.r1 != r1 || pc.r2 != r2 || pc.err != err { + } else if pc.r1 != r1 || (archHonorsR2 && pc.r2 != r2) || pc.err != err { + print("trap:", pc.trap, ", a123456=[", pc.a1, ",", pc.a2, ",", pc.a3, ",", pc.a4, ",", pc.a5, ",", pc.a6, "]\n") + print("results: got {r1=", r1, ",r2=", r2, ",err=", err, "}, want {r1=", pc.r1, ",r2=", pc.r2, ",r3=", pc.err, "}\n") panic("AllThreadsSyscall6 results differ between threads; runtime corrupted") } return err == 0 diff --git a/src/syscall/syscall_linux_386.go b/src/syscall/syscall_linux_386.go index dd5f2735d8..ed52647403 100644 --- a/src/syscall/syscall_linux_386.go +++ b/src/syscall/syscall_linux_386.go @@ -6,6 +6,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS32 func setTimespec(sec, nsec int64) Timespec { diff --git a/src/syscall/syscall_linux_amd64.go b/src/syscall/syscall_linux_amd64.go index 5518f44a07..5df3f796d1 100644 --- a/src/syscall/syscall_linux_amd64.go +++ b/src/syscall/syscall_linux_amd64.go @@ -4,6 +4,12 @@ package syscall +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS //sys Dup2(oldfd int, newfd int) (err error) diff --git a/src/syscall/syscall_linux_arm.go b/src/syscall/syscall_linux_arm.go index 61133a59fb..4a3729f898 100644 --- a/src/syscall/syscall_linux_arm.go +++ b/src/syscall/syscall_linux_arm.go @@ -6,6 +6,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". [EABI assumed.] +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS32 func setTimespec(sec, nsec int64) Timespec { diff --git a/src/syscall/syscall_linux_arm64.go b/src/syscall/syscall_linux_arm64.go index 16382102c8..f575c84c93 100644 --- a/src/syscall/syscall_linux_arm64.go +++ b/src/syscall/syscall_linux_arm64.go @@ -6,6 +6,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS func EpollCreate(size int) (fd int, err error) { diff --git a/src/syscall/syscall_linux_mips64x.go b/src/syscall/syscall_linux_mips64x.go index 4986baa319..ab25b7be6f 100644 --- a/src/syscall/syscall_linux_mips64x.go +++ b/src/syscall/syscall_linux_mips64x.go @@ -7,6 +7,12 @@ package syscall +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS //sys Dup2(oldfd int, newfd int) (err error) diff --git a/src/syscall/syscall_linux_mipsx.go b/src/syscall/syscall_linux_mipsx.go index 5126b0e43c..377946fc92 100644 --- a/src/syscall/syscall_linux_mipsx.go +++ b/src/syscall/syscall_linux_mipsx.go @@ -9,6 +9,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS func Syscall9(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err Errno) diff --git a/src/syscall/syscall_linux_ppc64x.go b/src/syscall/syscall_linux_ppc64x.go index bb2d904b5f..45bf667407 100644 --- a/src/syscall/syscall_linux_ppc64x.go +++ b/src/syscall/syscall_linux_ppc64x.go @@ -7,6 +7,12 @@ package syscall +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = false + const _SYS_setgroups = SYS_SETGROUPS //sys Dup2(oldfd int, newfd int) (err error) diff --git a/src/syscall/syscall_linux_riscv64.go b/src/syscall/syscall_linux_riscv64.go index aa1014f8ae..2a0fe64d25 100644 --- a/src/syscall/syscall_linux_riscv64.go +++ b/src/syscall/syscall_linux_riscv64.go @@ -6,6 +6,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS func EpollCreate(size int) (fd int, err error) { diff --git a/src/syscall/syscall_linux_s390x.go b/src/syscall/syscall_linux_s390x.go index dc97d5c65a..0f6f6277bb 100644 --- a/src/syscall/syscall_linux_s390x.go +++ b/src/syscall/syscall_linux_s390x.go @@ -6,6 +6,12 @@ package syscall import "unsafe" +// archHonorsR2 captures the fact that r2 is honored by the +// runtime.GOARCH. Syscall conventions are generally r1, r2, err := +// syscall(trap, ...). Not all architectures define r2 in their +// ABI. See "man syscall". +const archHonorsR2 = true + const _SYS_setgroups = SYS_SETGROUPS //sys Dup2(oldfd int, newfd int) (err error) diff --git a/src/syscall/syscall_linux_test.go b/src/syscall/syscall_linux_test.go index 0742ef5b07..41ae8cc5a1 100644 --- a/src/syscall/syscall_linux_test.go +++ b/src/syscall/syscall_linux_test.go @@ -410,9 +410,6 @@ const ( // syscalls that execute on all OSThreads - with which to support // POSIX semantics for security state changes. func TestAllThreadsSyscall(t *testing.T) { - if runtime.GOARCH == "ppc64" { - t.Skip("skipping on linux/ppc64; see issue #42178") - } if _, _, err := syscall.AllThreadsSyscall(syscall.SYS_PRCTL, PR_SET_KEEPCAPS, 0, 0); err == syscall.ENOTSUP { t.Skip("AllThreadsSyscall disabled with cgo") } @@ -544,7 +541,7 @@ func TestAllThreadsSyscall(t *testing.T) { // compareStatus is used to confirm the contents of the thread // specific status files match expectations. func compareStatus(filter, expect string) error { - expected := filter + "\t" + expect + expected := filter + expect pid := syscall.Getpid() fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid)) if err != nil { @@ -553,14 +550,22 @@ func compareStatus(filter, expect string) error { for _, f := range fs { tf := fmt.Sprintf("/proc/%s/status", f.Name()) d, err := ioutil.ReadFile(tf) + if os.IsNotExist(err) { + // We are racing against threads dying, which + // is out of our control, so ignore the + // missing file and skip to the next one. + continue + } if err != nil { return fmt.Errorf("unable to read %q: %v", tf, err) } lines := strings.Split(string(d), "\n") for _, line := range lines { + // Different kernel vintages pad differently. + line = strings.TrimSpace(line) if strings.HasPrefix(line, filter) { if line != expected { - return fmt.Errorf("%s %s (bad)\n", tf, line) + return fmt.Errorf("%q got:%q want:%q (bad)\n", tf, line, expected) } break } @@ -580,9 +585,6 @@ func compareStatus(filter, expect string) error { // the syscalls. Care should be taken to mirror any enhancements to // this test here in that file too. func TestSetuidEtc(t *testing.T) { - if runtime.GOARCH == "ppc64" { - t.Skip("skipping on linux/ppc64; see issue #42178") - } if syscall.Getuid() != 0 { t.Skip("skipping root only test") } @@ -591,34 +593,34 @@ func TestSetuidEtc(t *testing.T) { fn func() error filter, expect string }{ - {call: "Setegid(1)", fn: func() error { return syscall.Setegid(1) }, filter: "Gid:", expect: "0\t1\t0\t1"}, - {call: "Setegid(0)", fn: func() error { return syscall.Setegid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"}, + {call: "Setegid(1)", fn: func() error { return syscall.Setegid(1) }, filter: "Gid:", expect: "\t0\t1\t0\t1"}, + {call: "Setegid(0)", fn: func() error { return syscall.Setegid(0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"}, - {call: "Seteuid(1)", fn: func() error { return syscall.Seteuid(1) }, filter: "Uid:", expect: "0\t1\t0\t1"}, - {call: "Setuid(0)", fn: func() error { return syscall.Setuid(0) }, filter: "Uid:", expect: "0\t0\t0\t0"}, + {call: "Seteuid(1)", fn: func() error { return syscall.Seteuid(1) }, filter: "Uid:", expect: "\t0\t1\t0\t1"}, + {call: "Setuid(0)", fn: func() error { return syscall.Setuid(0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"}, - {call: "Setgid(1)", fn: func() error { return syscall.Setgid(1) }, filter: "Gid:", expect: "1\t1\t1\t1"}, - {call: "Setgid(0)", fn: func() error { return syscall.Setgid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"}, + {call: "Setgid(1)", fn: func() error { return syscall.Setgid(1) }, filter: "Gid:", expect: "\t1\t1\t1\t1"}, + {call: "Setgid(0)", fn: func() error { return syscall.Setgid(0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"}, - {call: "Setgroups([]int{0,1,2,3})", fn: func() error { return syscall.Setgroups([]int{0, 1, 2, 3}) }, filter: "Groups:", expect: "0 1 2 3 "}, - {call: "Setgroups(nil)", fn: func() error { return syscall.Setgroups(nil) }, filter: "Groups:", expect: " "}, - {call: "Setgroups([]int{0})", fn: func() error { return syscall.Setgroups([]int{0}) }, filter: "Groups:", expect: "0 "}, + {call: "Setgroups([]int{0,1,2,3})", fn: func() error { return syscall.Setgroups([]int{0, 1, 2, 3}) }, filter: "Groups:", expect: "\t0 1 2 3"}, + {call: "Setgroups(nil)", fn: func() error { return syscall.Setgroups(nil) }, filter: "Groups:", expect: ""}, + {call: "Setgroups([]int{0})", fn: func() error { return syscall.Setgroups([]int{0}) }, filter: "Groups:", expect: "\t0"}, - {call: "Setregid(101,0)", fn: func() error { return syscall.Setregid(101, 0) }, filter: "Gid:", expect: "101\t0\t0\t0"}, - {call: "Setregid(0,102)", fn: func() error { return syscall.Setregid(0, 102) }, filter: "Gid:", expect: "0\t102\t102\t102"}, - {call: "Setregid(0,0)", fn: func() error { return syscall.Setregid(0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"}, + {call: "Setregid(101,0)", fn: func() error { return syscall.Setregid(101, 0) }, filter: "Gid:", expect: "\t101\t0\t0\t0"}, + {call: "Setregid(0,102)", fn: func() error { return syscall.Setregid(0, 102) }, filter: "Gid:", expect: "\t0\t102\t102\t102"}, + {call: "Setregid(0,0)", fn: func() error { return syscall.Setregid(0, 0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"}, - {call: "Setreuid(1,0)", fn: func() error { return syscall.Setreuid(1, 0) }, filter: "Uid:", expect: "1\t0\t0\t0"}, - {call: "Setreuid(0,2)", fn: func() error { return syscall.Setreuid(0, 2) }, filter: "Uid:", expect: "0\t2\t2\t2"}, - {call: "Setreuid(0,0)", fn: func() error { return syscall.Setreuid(0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"}, + {call: "Setreuid(1,0)", fn: func() error { return syscall.Setreuid(1, 0) }, filter: "Uid:", expect: "\t1\t0\t0\t0"}, + {call: "Setreuid(0,2)", fn: func() error { return syscall.Setreuid(0, 2) }, filter: "Uid:", expect: "\t0\t2\t2\t2"}, + {call: "Setreuid(0,0)", fn: func() error { return syscall.Setreuid(0, 0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"}, - {call: "Setresgid(101,0,102)", fn: func() error { return syscall.Setresgid(101, 0, 102) }, filter: "Gid:", expect: "101\t0\t102\t0"}, - {call: "Setresgid(0,102,101)", fn: func() error { return syscall.Setresgid(0, 102, 101) }, filter: "Gid:", expect: "0\t102\t101\t102"}, - {call: "Setresgid(0,0,0)", fn: func() error { return syscall.Setresgid(0, 0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"}, + {call: "Setresgid(101,0,102)", fn: func() error { return syscall.Setresgid(101, 0, 102) }, filter: "Gid:", expect: "\t101\t0\t102\t0"}, + {call: "Setresgid(0,102,101)", fn: func() error { return syscall.Setresgid(0, 102, 101) }, filter: "Gid:", expect: "\t0\t102\t101\t102"}, + {call: "Setresgid(0,0,0)", fn: func() error { return syscall.Setresgid(0, 0, 0) }, filter: "Gid:", expect: "\t0\t0\t0\t0"}, - {call: "Setresuid(1,0,2)", fn: func() error { return syscall.Setresuid(1, 0, 2) }, filter: "Uid:", expect: "1\t0\t2\t0"}, - {call: "Setresuid(0,2,1)", fn: func() error { return syscall.Setresuid(0, 2, 1) }, filter: "Uid:", expect: "0\t2\t1\t2"}, - {call: "Setresuid(0,0,0)", fn: func() error { return syscall.Setresuid(0, 0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"}, + {call: "Setresuid(1,0,2)", fn: func() error { return syscall.Setresuid(1, 0, 2) }, filter: "Uid:", expect: "\t1\t0\t2\t0"}, + {call: "Setresuid(0,2,1)", fn: func() error { return syscall.Setresuid(0, 2, 1) }, filter: "Uid:", expect: "\t0\t2\t1\t2"}, + {call: "Setresuid(0,0,0)", fn: func() error { return syscall.Setresuid(0, 0, 0) }, filter: "Uid:", expect: "\t0\t0\t0\t0"}, } for i, v := range vs { -- cgit v1.3