diff options
Diffstat (limited to 'src/syscall/exec_linux.go')
| -rw-r--r-- | src/syscall/exec_linux.go | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index dfbb38ac16..791b263b2a 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -101,11 +101,17 @@ type SysProcAttr struct { AmbientCaps []uintptr // Ambient capabilities (Linux only) UseCgroupFD bool // Whether to make use of the CgroupFD field. CgroupFD int // File descriptor of a cgroup to put the new process into. + // PidFD, if not nil, is used to store the pidfd of a child, if the + // functionality is supported by the kernel, or -1. Note *PidFD is + // changed only if the process starts successfully. + PidFD *int } var ( none = [...]byte{'n', 'o', 'n', 'e', 0} slash = [...]byte{'/', 0} + + forceClone3 = false // Used by unit tests only. ) // Implemented in runtime package. @@ -235,6 +241,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att uidmap, setgroups, gidmap []byte clone3 *cloneArgs pgrp int32 + pidfd _C_int = -1 dirfd int cred *Credential ngroups, groups uintptr @@ -289,18 +296,22 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att if sys.Cloneflags&CLONE_NEWUSER == 0 && sys.Unshareflags&CLONE_NEWUSER == 0 { flags |= CLONE_VFORK | CLONE_VM } + if sys.PidFD != nil { + flags |= CLONE_PIDFD + } // Whether to use clone3. - if sys.UseCgroupFD { - clone3 = &cloneArgs{ - flags: uint64(flags) | CLONE_INTO_CGROUP, - exitSignal: uint64(SIGCHLD), - cgroup: uint64(sys.CgroupFD), - } - } else if flags&CLONE_NEWTIME != 0 { + if sys.UseCgroupFD || flags&CLONE_NEWTIME != 0 || forceClone3 { clone3 = &cloneArgs{ flags: uint64(flags), exitSignal: uint64(SIGCHLD), } + if sys.UseCgroupFD { + clone3.flags |= CLONE_INTO_CGROUP + clone3.cgroup = uint64(sys.CgroupFD) + } + if sys.PidFD != nil { + clone3.pidFD = uint64(uintptr(unsafe.Pointer(&pidfd))) + } } // About to call fork. @@ -308,14 +319,14 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att runtime_BeforeFork() locked = true if clone3 != nil { - pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3)) + pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3), 0) } else { flags |= uintptr(SIGCHLD) if runtime.GOARCH == "s390x" { // On Linux/s390, the first two arguments of clone(2) are swapped. - pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags) + pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags, uintptr(unsafe.Pointer(&pidfd))) } else { - pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0) + pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0, uintptr(unsafe.Pointer(&pidfd))) } } if err1 != 0 || pid != 0 { @@ -330,6 +341,10 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att // Fork succeeded, now in child. + if sys.PidFD != nil { + *sys.PidFD = int(pidfd) + } + // Enable the "keep capabilities" flag to set ambient capabilities later. if len(sys.AmbientCaps) > 0 { _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0) |
