diff options
| author | Kir Kolyshkin <kolyshkin@gmail.com> | 2023-08-16 19:20:54 -0700 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2023-09-07 19:11:15 +0000 |
| commit | da7ee57f40069eda3488b2e51dfa878bdd5816af (patch) | |
| tree | d0830dc406737df376008d1ff52f684a66ed1039 /src/syscall/exec_linux.go | |
| parent | 584d646559eb6c5942410f2ba2d2806f2627c2a2 (diff) | |
| download | go-da7ee57f40069eda3488b2e51dfa878bdd5816af.tar.xz | |
syscall: add support to get pidfd from ForkExec on Linux
Add PidFD support, so that if the PidFD pointer in SysProcAttr is not
nil, ForkExec (and thus all its users) obtains a pidfd from the kernel
during clone(), and writes the result (or -1, if the functionality
is not supported by the kernel) into *PidFD.
The functionality to get pidfd is implemented for both clone3 and clone.
For the latter, an extra argument to rawVforkSyscall is needed, thus the
change in asm files.
Add a trivial test case checking the obtained pidfd can be used to send
a signal to a process, using pidfd_send_signal. To test clone3 code path,
add a flag available to tests only.
Updates #51246.
Change-Id: I2212b69e1a657163c31b4a6245b076bc495777a3
Reviewed-on: https://go-review.googlesource.com/c/go/+/520266
Auto-Submit: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
Diffstat (limited to 'src/syscall/exec_linux.go')
| -rw-r--r-- | src/syscall/exec_linux.go | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index dfbb38ac16..791b263b2a 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -101,11 +101,17 @@ type SysProcAttr struct { AmbientCaps []uintptr // Ambient capabilities (Linux only) UseCgroupFD bool // Whether to make use of the CgroupFD field. CgroupFD int // File descriptor of a cgroup to put the new process into. + // PidFD, if not nil, is used to store the pidfd of a child, if the + // functionality is supported by the kernel, or -1. Note *PidFD is + // changed only if the process starts successfully. + PidFD *int } var ( none = [...]byte{'n', 'o', 'n', 'e', 0} slash = [...]byte{'/', 0} + + forceClone3 = false // Used by unit tests only. ) // Implemented in runtime package. @@ -235,6 +241,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att uidmap, setgroups, gidmap []byte clone3 *cloneArgs pgrp int32 + pidfd _C_int = -1 dirfd int cred *Credential ngroups, groups uintptr @@ -289,18 +296,22 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att if sys.Cloneflags&CLONE_NEWUSER == 0 && sys.Unshareflags&CLONE_NEWUSER == 0 { flags |= CLONE_VFORK | CLONE_VM } + if sys.PidFD != nil { + flags |= CLONE_PIDFD + } // Whether to use clone3. - if sys.UseCgroupFD { - clone3 = &cloneArgs{ - flags: uint64(flags) | CLONE_INTO_CGROUP, - exitSignal: uint64(SIGCHLD), - cgroup: uint64(sys.CgroupFD), - } - } else if flags&CLONE_NEWTIME != 0 { + if sys.UseCgroupFD || flags&CLONE_NEWTIME != 0 || forceClone3 { clone3 = &cloneArgs{ flags: uint64(flags), exitSignal: uint64(SIGCHLD), } + if sys.UseCgroupFD { + clone3.flags |= CLONE_INTO_CGROUP + clone3.cgroup = uint64(sys.CgroupFD) + } + if sys.PidFD != nil { + clone3.pidFD = uint64(uintptr(unsafe.Pointer(&pidfd))) + } } // About to call fork. @@ -308,14 +319,14 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att runtime_BeforeFork() locked = true if clone3 != nil { - pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3)) + pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3), 0) } else { flags |= uintptr(SIGCHLD) if runtime.GOARCH == "s390x" { // On Linux/s390, the first two arguments of clone(2) are swapped. - pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags) + pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags, uintptr(unsafe.Pointer(&pidfd))) } else { - pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0) + pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0, uintptr(unsafe.Pointer(&pidfd))) } } if err1 != 0 || pid != 0 { @@ -330,6 +341,10 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att // Fork succeeded, now in child. + if sys.PidFD != nil { + *sys.PidFD = int(pidfd) + } + // Enable the "keep capabilities" flag to set ambient capabilities later. if len(sys.AmbientCaps) > 0 { _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0) |
