diff options
Diffstat (limited to 'src/syscall/exec_linux.go')
| -rw-r--r-- | src/syscall/exec_linux.go | 70 |
1 files changed, 45 insertions, 25 deletions
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index e631cd6470..db52360717 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -63,15 +63,46 @@ func runtime_AfterForkInChild() // functions that do not grow the stack. //go:norace func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { + // Set up and fork. This returns immediately in the parent or + // if there's an error. + r1, err1, p, locked := forkAndExecInChild1(argv0, argv, envv, chroot, dir, attr, sys, pipe) + if locked { + runtime_AfterFork() + } + if err1 != 0 { + return 0, err1 + } + + // parent; return PID + pid = int(r1) + + if sys.UidMappings != nil || sys.GidMappings != nil { + Close(p[0]) + err := writeUidGidMappings(pid, sys) + var err2 Errno + if err != nil { + err2 = err.(Errno) + } + RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) + Close(p[1]) + } + + return pid, 0 +} + +//go:norace +func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) { + // vfork requires that the child not touch any of the parent's + // active stack frames. Hence, the child does all post-fork + // processing in this stack frame and never returns, while the + // parent returns immediately from this frame and does all + // post-fork processing in the outer frame. // Declare all variables at top in case any // declarations require heap allocation (e.g., err1). var ( - r1 uintptr - err1 Errno err2 Errno nextfd int i int - p [2]int ) // Record parent PID so child can test if it has died. @@ -94,13 +125,15 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr // synchronizing writing of User ID/Group ID mappings. if sys.UidMappings != nil || sys.GidMappings != nil { if err := forkExecPipe(p[:]); err != nil { - return 0, err.(Errno) + err1 = err.(Errno) + return } } // About to call fork. // No more allocation or calls of non-assembly functions. runtime_BeforeFork() + locked = true switch { case runtime.GOARCH == "amd64" && sys.Cloneflags&CLONE_NEWUSER == 0: r1, err1 = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|sys.Cloneflags) @@ -109,27 +142,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr default: r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) } - if err1 != 0 { - runtime_AfterFork() - return 0, err1 - } - - if r1 != 0 { - // parent; return PID - runtime_AfterFork() - pid = int(r1) - - if sys.UidMappings != nil || sys.GidMappings != nil { - Close(p[0]) - err := writeUidGidMappings(pid, sys) - if err != nil { - err2 = err.(Errno) - } - RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) - Close(p[1]) - } - - return pid, 0 + if err1 != 0 || r1 != 0 { + // If we're in the parent, we must return immediately + // so we're not in the same stack frame as the child. + // This can at most use the return PC, which the child + // will not modify, and the results of + // rawVforkSyscall, which must have been written after + // the child was replaced. + return } // Fork succeeded, now in child. |
