aboutsummaryrefslogtreecommitdiff
path: root/src/syscall/exec_linux.go
diff options
context:
space:
mode:
authorKir Kolyshkin <kolyshkin@gmail.com>2023-08-16 19:20:54 -0700
committerGopher Robot <gobot@golang.org>2023-09-07 19:11:15 +0000
commitda7ee57f40069eda3488b2e51dfa878bdd5816af (patch)
treed0830dc406737df376008d1ff52f684a66ed1039 /src/syscall/exec_linux.go
parent584d646559eb6c5942410f2ba2d2806f2627c2a2 (diff)
downloadgo-da7ee57f40069eda3488b2e51dfa878bdd5816af.tar.xz
syscall: add support to get pidfd from ForkExec on Linux
Add PidFD support, so that if the PidFD pointer in SysProcAttr is not nil, ForkExec (and thus all its users) obtains a pidfd from the kernel during clone(), and writes the result (or -1, if the functionality is not supported by the kernel) into *PidFD. The functionality to get pidfd is implemented for both clone3 and clone. For the latter, an extra argument to rawVforkSyscall is needed, thus the change in asm files. Add a trivial test case checking the obtained pidfd can be used to send a signal to a process, using pidfd_send_signal. To test clone3 code path, add a flag available to tests only. Updates #51246. Change-Id: I2212b69e1a657163c31b4a6245b076bc495777a3 Reviewed-on: https://go-review.googlesource.com/c/go/+/520266 Auto-Submit: Ian Lance Taylor <iant@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Ian Lance Taylor <iant@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
Diffstat (limited to 'src/syscall/exec_linux.go')
-rw-r--r--src/syscall/exec_linux.go35
1 files changed, 25 insertions, 10 deletions
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go
index dfbb38ac16..791b263b2a 100644
--- a/src/syscall/exec_linux.go
+++ b/src/syscall/exec_linux.go
@@ -101,11 +101,17 @@ type SysProcAttr struct {
AmbientCaps []uintptr // Ambient capabilities (Linux only)
UseCgroupFD bool // Whether to make use of the CgroupFD field.
CgroupFD int // File descriptor of a cgroup to put the new process into.
+ // PidFD, if not nil, is used to store the pidfd of a child, if the
+ // functionality is supported by the kernel, or -1. Note *PidFD is
+ // changed only if the process starts successfully.
+ PidFD *int
}
var (
none = [...]byte{'n', 'o', 'n', 'e', 0}
slash = [...]byte{'/', 0}
+
+ forceClone3 = false // Used by unit tests only.
)
// Implemented in runtime package.
@@ -235,6 +241,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
uidmap, setgroups, gidmap []byte
clone3 *cloneArgs
pgrp int32
+ pidfd _C_int = -1
dirfd int
cred *Credential
ngroups, groups uintptr
@@ -289,18 +296,22 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
if sys.Cloneflags&CLONE_NEWUSER == 0 && sys.Unshareflags&CLONE_NEWUSER == 0 {
flags |= CLONE_VFORK | CLONE_VM
}
+ if sys.PidFD != nil {
+ flags |= CLONE_PIDFD
+ }
// Whether to use clone3.
- if sys.UseCgroupFD {
- clone3 = &cloneArgs{
- flags: uint64(flags) | CLONE_INTO_CGROUP,
- exitSignal: uint64(SIGCHLD),
- cgroup: uint64(sys.CgroupFD),
- }
- } else if flags&CLONE_NEWTIME != 0 {
+ if sys.UseCgroupFD || flags&CLONE_NEWTIME != 0 || forceClone3 {
clone3 = &cloneArgs{
flags: uint64(flags),
exitSignal: uint64(SIGCHLD),
}
+ if sys.UseCgroupFD {
+ clone3.flags |= CLONE_INTO_CGROUP
+ clone3.cgroup = uint64(sys.CgroupFD)
+ }
+ if sys.PidFD != nil {
+ clone3.pidFD = uint64(uintptr(unsafe.Pointer(&pidfd)))
+ }
}
// About to call fork.
@@ -308,14 +319,14 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
runtime_BeforeFork()
locked = true
if clone3 != nil {
- pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3))
+ pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3), 0)
} else {
flags |= uintptr(SIGCHLD)
if runtime.GOARCH == "s390x" {
// On Linux/s390, the first two arguments of clone(2) are swapped.
- pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags)
+ pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags, uintptr(unsafe.Pointer(&pidfd)))
} else {
- pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0)
+ pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0, uintptr(unsafe.Pointer(&pidfd)))
}
}
if err1 != 0 || pid != 0 {
@@ -330,6 +341,10 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
// Fork succeeded, now in child.
+ if sys.PidFD != nil {
+ *sys.PidFD = int(pidfd)
+ }
+
// Enable the "keep capabilities" flag to set ambient capabilities later.
if len(sys.AmbientCaps) > 0 {
_, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0)