aboutsummaryrefslogtreecommitdiff
path: root/src/syscall
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2022-02-04 17:15:28 -0500
committerMichael Pratt <mpratt@google.com>2022-02-15 15:40:35 +0000
commit0a5fae2a0e965024f692b95f7e857904a274fcb6 (patch)
tree393819d9f85f5be1f54bc480f7c6763859bc8997 /src/syscall
parent0b321c9a7c0055dfd3f875dea930a28690659211 (diff)
downloadgo-0a5fae2a0e965024f692b95f7e857904a274fcb6.tar.xz
runtime, syscall: reimplement AllThreadsSyscall using only signals.
In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'src/syscall')
-rw-r--r--src/syscall/syscall_linux.go90
-rw-r--r--src/syscall/syscall_linux_386.go6
-rw-r--r--src/syscall/syscall_linux_amd64.go6
-rw-r--r--src/syscall/syscall_linux_arm.go6
-rw-r--r--src/syscall/syscall_linux_arm64.go6
-rw-r--r--src/syscall/syscall_linux_mips64x.go6
-rw-r--r--src/syscall/syscall_linux_mipsx.go6
-rw-r--r--src/syscall/syscall_linux_ppc64x.go6
-rw-r--r--src/syscall/syscall_linux_riscv64.go6
-rw-r--r--src/syscall/syscall_linux_s390x.go6
-rw-r--r--src/syscall/syscall_linux_test.go71
11 files changed, 80 insertions, 135 deletions
diff --git a/src/syscall/syscall_linux.go b/src/syscall/syscall_linux.go
index abcf1d5dfe..e3891b0855 100644
--- a/src/syscall/syscall_linux.go
+++ b/src/syscall/syscall_linux.go
@@ -958,62 +958,11 @@ func Getpgrp() (pid int) {
//sysnb Setsid() (pid int, err error)
//sysnb Settimeofday(tv *Timeval) (err error)
-// allThreadsCaller holds the input and output state for performing a
-// allThreadsSyscall that needs to synchronize all OS thread state. Linux
-// generally does not always support this natively, so we have to
-// manipulate the runtime to fix things up.
-type allThreadsCaller struct {
- // arguments
- trap, a1, a2, a3, a4, a5, a6 uintptr
-
- // return values (only set by 0th invocation)
- r1, r2 uintptr
-
- // err is the error code
- err Errno
-}
-
-// doSyscall is a callback for executing a syscall on the current m
-// (OS thread).
-//go:nosplit
-//go:norace
-func (pc *allThreadsCaller) doSyscall(initial bool) bool {
- r1, r2, err := RawSyscall(pc.trap, pc.a1, pc.a2, pc.a3)
- if initial {
- pc.r1 = r1
- pc.r2 = r2
- pc.err = err
- } else if pc.r1 != r1 || (archHonorsR2 && pc.r2 != r2) || pc.err != err {
- print("trap:", pc.trap, ", a123=[", pc.a1, ",", pc.a2, ",", pc.a3, "]\n")
- print("results: got {r1=", r1, ",r2=", r2, ",err=", err, "}, want {r1=", pc.r1, ",r2=", pc.r2, ",r3=", pc.err, "}\n")
- panic("AllThreadsSyscall results differ between threads; runtime corrupted")
- }
- return err == 0
-}
-
-// doSyscall6 is a callback for executing a syscall6 on the current m
-// (OS thread).
-//go:nosplit
-//go:norace
-func (pc *allThreadsCaller) doSyscall6(initial bool) bool {
- r1, r2, err := RawSyscall6(pc.trap, pc.a1, pc.a2, pc.a3, pc.a4, pc.a5, pc.a6)
- if initial {
- pc.r1 = r1
- pc.r2 = r2
- pc.err = err
- } else if pc.r1 != r1 || (archHonorsR2 && pc.r2 != r2) || pc.err != err {
- print("trap:", pc.trap, ", a123456=[", pc.a1, ",", pc.a2, ",", pc.a3, ",", pc.a4, ",", pc.a5, ",", pc.a6, "]\n")
- print("results: got {r1=", r1, ",r2=", r2, ",err=", err, "}, want {r1=", pc.r1, ",r2=", pc.r2, ",r3=", pc.err, "}\n")
- panic("AllThreadsSyscall6 results differ between threads; runtime corrupted")
- }
- return err == 0
-}
-
-// Provided by runtime.syscall_runtime_doAllThreadsSyscall which
-// serializes the world and invokes the fn on each OS thread (what the
-// runtime refers to as m's). Once this function returns, all threads
-// are in sync.
-func runtime_doAllThreadsSyscall(fn func(bool) bool)
+// Provided by runtime.syscall_runtime_doAllThreadsSyscall which stops the
+// world and invokes the syscall on each OS thread. Once this function returns,
+// all threads are in sync.
+//go:uintptrescapes
+func runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr)
// AllThreadsSyscall performs a syscall on each OS thread of the Go
// runtime. It first invokes the syscall on one thread. Should that
@@ -1035,17 +984,8 @@ func AllThreadsSyscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) {
if cgo_libc_setegid != nil {
return minus1, minus1, ENOTSUP
}
- pc := &allThreadsCaller{
- trap: trap,
- a1: a1,
- a2: a2,
- a3: a3,
- }
- runtime_doAllThreadsSyscall(pc.doSyscall)
- r1 = pc.r1
- r2 = pc.r2
- err = pc.err
- return
+ r1, r2, errno := runtime_doAllThreadsSyscall(trap, a1, a2, a3, 0, 0, 0)
+ return r1, r2, Errno(errno)
}
// AllThreadsSyscall6 is like AllThreadsSyscall, but extended to six
@@ -1055,20 +995,8 @@ func AllThreadsSyscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, e
if cgo_libc_setegid != nil {
return minus1, minus1, ENOTSUP
}
- pc := &allThreadsCaller{
- trap: trap,
- a1: a1,
- a2: a2,
- a3: a3,
- a4: a4,
- a5: a5,
- a6: a6,
- }
- runtime_doAllThreadsSyscall(pc.doSyscall6)
- r1 = pc.r1
- r2 = pc.r2
- err = pc.err
- return
+ r1, r2, errno := runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6)
+ return r1, r2, Errno(errno)
}
// linked by runtime.cgocall.go
diff --git a/src/syscall/syscall_linux_386.go b/src/syscall/syscall_linux_386.go
index 98442055d8..a3a5870a17 100644
--- a/src/syscall/syscall_linux_386.go
+++ b/src/syscall/syscall_linux_386.go
@@ -6,12 +6,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS32
func setTimespec(sec, nsec int64) Timespec {
diff --git a/src/syscall/syscall_linux_amd64.go b/src/syscall/syscall_linux_amd64.go
index 04acd063fa..26b40ffe9b 100644
--- a/src/syscall/syscall_linux_amd64.go
+++ b/src/syscall/syscall_linux_amd64.go
@@ -4,12 +4,6 @@
package syscall
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
//sys Dup2(oldfd int, newfd int) (err error)
diff --git a/src/syscall/syscall_linux_arm.go b/src/syscall/syscall_linux_arm.go
index f2f342e7ed..58f376f350 100644
--- a/src/syscall/syscall_linux_arm.go
+++ b/src/syscall/syscall_linux_arm.go
@@ -6,12 +6,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall". [EABI assumed.]
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS32
func setTimespec(sec, nsec int64) Timespec {
diff --git a/src/syscall/syscall_linux_arm64.go b/src/syscall/syscall_linux_arm64.go
index 990e732f35..f3c6c48d06 100644
--- a/src/syscall/syscall_linux_arm64.go
+++ b/src/syscall/syscall_linux_arm64.go
@@ -6,12 +6,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
func EpollCreate(size int) (fd int, err error) {
diff --git a/src/syscall/syscall_linux_mips64x.go b/src/syscall/syscall_linux_mips64x.go
index 7c9dd80614..7be1664637 100644
--- a/src/syscall/syscall_linux_mips64x.go
+++ b/src/syscall/syscall_linux_mips64x.go
@@ -6,12 +6,6 @@
package syscall
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
//sys Dup2(oldfd int, newfd int) (err error)
diff --git a/src/syscall/syscall_linux_mipsx.go b/src/syscall/syscall_linux_mipsx.go
index 741eeb14bb..97188d3895 100644
--- a/src/syscall/syscall_linux_mipsx.go
+++ b/src/syscall/syscall_linux_mipsx.go
@@ -8,12 +8,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
func Syscall9(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err Errno)
diff --git a/src/syscall/syscall_linux_ppc64x.go b/src/syscall/syscall_linux_ppc64x.go
index cc1b72e0e7..ac42b20598 100644
--- a/src/syscall/syscall_linux_ppc64x.go
+++ b/src/syscall/syscall_linux_ppc64x.go
@@ -6,12 +6,6 @@
package syscall
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = false
-
const _SYS_setgroups = SYS_SETGROUPS
//sys Dup2(oldfd int, newfd int) (err error)
diff --git a/src/syscall/syscall_linux_riscv64.go b/src/syscall/syscall_linux_riscv64.go
index bcb89c6e9a..4331a19e8d 100644
--- a/src/syscall/syscall_linux_riscv64.go
+++ b/src/syscall/syscall_linux_riscv64.go
@@ -6,12 +6,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
func EpollCreate(size int) (fd int, err error) {
diff --git a/src/syscall/syscall_linux_s390x.go b/src/syscall/syscall_linux_s390x.go
index 123664f5b2..ff99024788 100644
--- a/src/syscall/syscall_linux_s390x.go
+++ b/src/syscall/syscall_linux_s390x.go
@@ -6,12 +6,6 @@ package syscall
import "unsafe"
-// archHonorsR2 captures the fact that r2 is honored by the
-// runtime.GOARCH. Syscall conventions are generally r1, r2, err :=
-// syscall(trap, ...). Not all architectures define r2 in their
-// ABI. See "man syscall".
-const archHonorsR2 = true
-
const _SYS_setgroups = SYS_SETGROUPS
//sys Dup2(oldfd int, newfd int) (err error)
diff --git a/src/syscall/syscall_linux_test.go b/src/syscall/syscall_linux_test.go
index 8d828be015..0444b64266 100644
--- a/src/syscall/syscall_linux_test.go
+++ b/src/syscall/syscall_linux_test.go
@@ -15,6 +15,7 @@ import (
"sort"
"strconv"
"strings"
+ "sync"
"syscall"
"testing"
"unsafe"
@@ -565,3 +566,73 @@ func TestSetuidEtc(t *testing.T) {
}
}
}
+
+// TestAllThreadsSyscallError verifies that errors are properly returned when
+// the syscall fails on the original thread.
+func TestAllThreadsSyscallError(t *testing.T) {
+ // SYS_CAPGET takes pointers as the first two arguments. Since we pass
+ // 0, we expect to get EFAULT back.
+ r1, r2, err := syscall.AllThreadsSyscall(syscall.SYS_CAPGET, 0, 0, 0)
+ if err == syscall.ENOTSUP {
+ t.Skip("AllThreadsSyscall disabled with cgo")
+ }
+ if err != syscall.EFAULT {
+ t.Errorf("AllThreadSyscall(SYS_CAPGET) got %d, %d, %v, want err %v", r1, r2, err, syscall.EFAULT)
+ }
+}
+
+// TestAllThreadsSyscallBlockedSyscall confirms that AllThreadsSyscall
+// can interrupt threads in long-running system calls. This test will
+// deadlock if this doesn't work correctly.
+func TestAllThreadsSyscallBlockedSyscall(t *testing.T) {
+ if _, _, err := syscall.AllThreadsSyscall(syscall.SYS_PRCTL, PR_SET_KEEPCAPS, 0, 0); err == syscall.ENOTSUP {
+ t.Skip("AllThreadsSyscall disabled with cgo")
+ }
+
+ rd, wr, err := os.Pipe()
+ if err != nil {
+ t.Fatalf("unable to obtain a pipe: %v", err)
+ }
+
+ // Perform a blocking read on the pipe.
+ var wg sync.WaitGroup
+ ready := make(chan bool)
+ wg.Add(1)
+ go func() {
+ data := make([]byte, 1)
+
+ // To narrow the window we have to wait for this
+ // goroutine to block in read, synchronize just before
+ // calling read.
+ ready <- true
+
+ // We use syscall.Read directly to avoid the poller.
+ // This will return when the write side is closed.
+ n, err := syscall.Read(int(rd.Fd()), data)
+ if !(n == 0 && err == nil) {
+ t.Errorf("expected read to return 0, got %d, %s", n, err)
+ }
+
+ // Clean up rd and also ensure rd stays reachable so
+ // it doesn't get closed by GC.
+ rd.Close()
+ wg.Done()
+ }()
+ <-ready
+
+ // Loop here to give the goroutine more time to block in read.
+ // Generally this will trigger on the first iteration anyway.
+ pid := syscall.Getpid()
+ for i := 0; i < 100; i++ {
+ if id, _, e := syscall.AllThreadsSyscall(syscall.SYS_GETPID, 0, 0, 0); e != 0 {
+ t.Errorf("[%d] getpid failed: %v", i, e)
+ } else if int(id) != pid {
+ t.Errorf("[%d] getpid got=%d, want=%d", i, id, pid)
+ }
+ // Provide an explicit opportunity for this goroutine
+ // to change Ms.
+ runtime.Gosched()
+ }
+ wr.Close()
+ wg.Wait()
+}