From 30c18878730434027dbefd343aad74963a1fdc48 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 1 Oct 2020 17:22:38 -0400 Subject: runtime,cmd/cgo: simplify C -> Go call path This redesigns the way calls work from C to exported Go functions. It removes several steps from the call path, makes cmd/cgo no longer sensitive to the Go calling convention, and eliminates the use of reflectcall from cgo. In order to avoid generating a large amount of FFI glue between the C and Go ABIs, the cgo tool has long depended on generating a C function that marshals the arguments into a struct, and then the actual ABI switch happens in functions with fixed signatures that simply take a pointer to this struct. In a way, this CL simply pushes this idea further. Currently, the cgo tool generates this argument struct in the exact layout of the Go stack frame and depends on reflectcall to unpack it into the appropriate Go call (even though it's actually reflectcall'ing a function generated by cgo). In this CL, we decouple this struct from the Go stack layout. Instead, cgo generates a Go function that takes the struct, unpacks it, and calls the exported function. Since this generated function has a generic signature (like the rest of the call path), we don't need reflectcall and can instead depend on the Go compiler itself to implement the call to the exported Go function. One complication is that syscall.NewCallback on Windows, which converts a Go function into a C function pointer, depends on cgocallback's current dynamic calling approach since the signatures of the callbacks aren't known statically. For this specific case, we continue to depend on reflectcall. Really, the current approach makes some overly simplistic assumptions about translating the C ABI to the Go ABI. Now we're at least in a much better position to do a proper ABI translation. For comparison, the current cgo call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> _cgoexp_GoF (generated Go function) -> cgocallback (in asm_*.s) -> cgocallback_gofunc (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> reflectcall (in asm_*.s) -> _cgoexpwrap_GoF (generated Go function) -> p.GoF Now the call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> cgocallback (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> _cgoexp_GoF (generated Go function) -> p.GoF Notably: 1. We combine _cgoexp_GoF and _cgoexpwrap_GoF and move the combined operation to the end of the sequence. This combined function also handles reflectcall's previous role. 2. We combined cgocallback and cgocallback_gofunc since the only purpose of having both was to convert a raw PC into a Go function value. We instead construct the Go function value in cgocallbackg1. 3. cgocallbackg1 no longer reaches backwards through the stack to get the arguments to cgocallback_gofunc. Instead, we just pass the arguments down. 4. Currently, we need an explicit msanwrite to mark the results struct as written because reflectcall doesn't do this. Now, the results are written by regular Go assignments, so the Go compiler generates the necessary MSAN annotations. This also means we no longer need to track the size of the arguments frame. Updates #40724, since now we don't need to teach cgo about the register ABI or change how it uses reflectcall. Change-Id: I7840489a2597962aeb670e0c1798a16a7359c94f Reviewed-on: https://go-review.googlesource.com/c/go/+/258938 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/asm_386.s | 56 +++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) (limited to 'src/runtime/asm_386.s') diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 11863fba39..a54b68e03d 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -702,25 +702,9 @@ nosave: MOVL AX, ret+8(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 - LEAL fn+0(FP), AX - MOVL AX, 0(SP) - MOVL frame+4(FP), AX - MOVL AX, 4(SP) - MOVL framesize+8(FP), AX - MOVL AX, 8(SP) - MOVL ctxt+12(FP), AX - MOVL AX, 12(SP) - MOVL $runtime·cgocallback_gofunc(SB), AX - CALL AX - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 +TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places below NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -780,34 +764,36 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. // - // In the new goroutine, 4(SP) holds the saved oldm (DX) register. - // 8(SP) is unused. + // In the new goroutine, 12(SP) holds the saved oldm (DX) register. MOVL m_curg(BP), SI MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI MOVL (g_sched+gobuf_pc)(SI), BP - MOVL BP, -4(DI) - MOVL ctxt+12(FP), CX - LEAL -(4+12)(DI), SP - MOVL DX, 4(SP) - MOVL CX, 0(SP) + MOVL BP, -4(DI) // "push" return PC on the g stack + // Gather our arguments into registers. + MOVL fn+0(FP), AX + MOVL frame+4(FP), BX + MOVL ctxt+8(FP), CX + LEAL -(4+16)(DI), SP // Must match declared frame size + MOVL DX, 12(SP) + MOVL AX, 0(SP) + MOVL BX, 4(SP) + MOVL CX, 8(SP) CALL runtime·cgocallbackg(SB) - MOVL 4(SP), DX + MOVL 12(SP), DX // Restore g->sched (== m->curg->sched) from saved values. get_tls(CX) MOVL g(CX), SI - MOVL 12(SP), BP + MOVL 16(SP), BP // Must match declared frame size MOVL BP, (g_sched+gobuf_pc)(SI) - LEAL (12+4)(SP), DI + LEAL (16+4)(SP), DI // Must match declared frame size MOVL DI, (g_sched+gobuf_sp)(SI) // Switch back to m->g0's stack and restore m->g0->sched.sp. -- cgit v1.3 From c91dffbc9aeaacd087eb0c0c3f718739bc5f8c4a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 7 Oct 2020 22:53:52 -0400 Subject: runtime: tidy cgocallback On amd64 and 386, we have a very roundabout way of remembering that we need to dropm on return that currently involves saving a zero to needm's argument slot and later bringing it back. Just store the zero. This also makes amd64 and 386 more consistent with cgocallback on all other platforms: rather than saving the old M to the G stack, they now save it to a named slot on the G0 stack. The needm function no longer needs a dummy argument to get the SP, so we drop that. Change-Id: I7e84bb4a5ff9552de70dcf41d8accf02310535e7 Reviewed-on: https://go-review.googlesource.com/c/go/+/263268 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/asm_386.s | 18 +++++++----------- src/runtime/asm_amd64.s | 16 ++++++---------- src/runtime/proc.go | 6 +++--- src/runtime/signal_unix.go | 6 +++--- 4 files changed, 19 insertions(+), 27 deletions(-) (limited to 'src/runtime/asm_386.s') diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index a54b68e03d..fa3b1be339 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -704,7 +704,7 @@ nosave: // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places below +TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -722,13 +722,12 @@ TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places CMPL BP, $0 JEQ needm MOVL g_m(BP), BP - MOVL BP, DX // saved copy of oldm + MOVL BP, savedm-4(SP) // saved copy of oldm JMP havem needm: - MOVL $0, 0(SP) MOVL $runtime·needm(SB), AX CALL AX - MOVL 0(SP), DX + MOVL $0, savedm-4(SP) // dropm on return get_tls(CX) MOVL g(CX), BP MOVL g_m(BP), BP @@ -769,8 +768,6 @@ havem: // Once we switch to the curg stack, the pushed PC will appear // to be the return PC of cgocallback, so that the traceback // will seamlessly trace back into the earlier calls. - // - // In the new goroutine, 12(SP) holds the saved oldm (DX) register. MOVL m_curg(BP), SI MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -780,20 +777,18 @@ havem: MOVL fn+0(FP), AX MOVL frame+4(FP), BX MOVL ctxt+8(FP), CX - LEAL -(4+16)(DI), SP // Must match declared frame size - MOVL DX, 12(SP) + LEAL -(4+12)(DI), SP // Must match declared frame size MOVL AX, 0(SP) MOVL BX, 4(SP) MOVL CX, 8(SP) CALL runtime·cgocallbackg(SB) - MOVL 12(SP), DX // Restore g->sched (== m->curg->sched) from saved values. get_tls(CX) MOVL g(CX), SI - MOVL 16(SP), BP // Must match declared frame size + MOVL 12(SP), BP // Must match declared frame size MOVL BP, (g_sched+gobuf_pc)(SI) - LEAL (16+4)(SP), DI // Must match declared frame size + LEAL (12+4)(SP), DI // Must match declared frame size MOVL DI, (g_sched+gobuf_sp)(SI) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -809,6 +804,7 @@ havem: // If the m on entry was nil, we called needm above to borrow an m // for the duration of the call. Since the call is over, return it with dropm. + MOVL savedm-4(SP), DX CMPL DX, $0 JNE 3(PC) MOVL $runtime·dropm(SB), AX diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 3d5d9c4d58..19a3bb2d7d 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -693,7 +693,7 @@ nosave: // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback(SB),NOSPLIT,$32-24 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -711,13 +711,12 @@ TEXT ·cgocallback(SB),NOSPLIT,$32-24 CMPQ BX, $0 JEQ needm MOVQ g_m(BX), BX - MOVQ BX, R8 // holds oldm until end of function + MOVQ BX, savedm-8(SP) // saved copy of oldm JMP havem needm: - MOVQ $0, 0(SP) - MOVQ $runtime·needm(SB), AX + MOVQ $runtime·needm(SB), AX CALL AX - MOVQ 0(SP), R8 + MOVQ $0, savedm-8(SP) // dropm on return get_tls(CX) MOVQ g(CX), BX MOVQ g_m(BX), BX @@ -758,8 +757,6 @@ havem: // Once we switch to the curg stack, the pushed PC will appear // to be the return PC of cgocallback, so that the traceback // will seamlessly trace back into the earlier calls. - // - // In the new goroutine, 24(SP) holds the saved R8. MOVQ m_curg(BX), SI MOVQ SI, g(CX) MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -776,12 +773,10 @@ havem: SUBQ AX, DI // Allocate the same frame size on the g stack MOVQ DI, SP - MOVQ R8, 24(SP) MOVQ BX, 0(SP) MOVQ CX, 8(SP) MOVQ DX, 16(SP) CALL runtime·cgocallbackg(SB) - MOVQ 24(SP), R8 // Compute the size of the frame again. FP and SP have // completely different values here than they did above, @@ -811,7 +806,8 @@ havem: // If the m on entry was nil, we called needm above to borrow an m // for the duration of the call. Since the call is over, return it with dropm. - CMPQ R8, $0 + MOVQ savedm-8(SP), BX + CMPQ BX, $0 JNE 3(PC) MOVQ $runtime·dropm(SB), AX CALL AX diff --git a/src/runtime/proc.go b/src/runtime/proc.go index c629fd45f0..ec4e6d8751 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1695,7 +1695,7 @@ func allocm(_p_ *p, fn func(), id int64) *m { // When the callback is done with the m, it calls dropm to // put the m back on the list. //go:nosplit -func needm(x byte) { +func needm() { if (iscgo || GOOS == "windows") && !cgoHasExtraM { // Can happen if C/C++ code calls Go from a global ctor. // Can also happen on Windows if a global ctor uses a @@ -1740,8 +1740,8 @@ func needm(x byte) { // which is more than enough for us. setg(mp.g0) _g_ := getg() - _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024 - _g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024 + _g_.stack.hi = getcallersp() + 1024 + _g_.stack.lo = getcallersp() - 32*1024 _g_.stackguard0 = _g_.stack.lo + _StackGuard // Initialize this thread to use the m. diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index e8b6f95d8f..9318a9b8bc 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -504,14 +504,14 @@ func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool { sigaltstack(nil, &st) if st.ss_flags&_SS_DISABLE != 0 { setg(nil) - needm(0) + needm() noSignalStack(sig) dropm() } stsp := uintptr(unsafe.Pointer(st.ss_sp)) if sp < stsp || sp >= stsp+st.ss_size { setg(nil) - needm(0) + needm() sigNotOnStack(sig) dropm() } @@ -951,7 +951,7 @@ func badsignal(sig uintptr, c *sigctxt) { exit(2) *(*uintptr)(unsafe.Pointer(uintptr(123))) = 2 } - needm(0) + needm() if !sigsend(uint32(sig)) { // A foreign thread received the signal sig, and the // Go code does not want to handle it. -- cgit v1.3