From 30c18878730434027dbefd343aad74963a1fdc48 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 1 Oct 2020 17:22:38 -0400 Subject: runtime,cmd/cgo: simplify C -> Go call path This redesigns the way calls work from C to exported Go functions. It removes several steps from the call path, makes cmd/cgo no longer sensitive to the Go calling convention, and eliminates the use of reflectcall from cgo. In order to avoid generating a large amount of FFI glue between the C and Go ABIs, the cgo tool has long depended on generating a C function that marshals the arguments into a struct, and then the actual ABI switch happens in functions with fixed signatures that simply take a pointer to this struct. In a way, this CL simply pushes this idea further. Currently, the cgo tool generates this argument struct in the exact layout of the Go stack frame and depends on reflectcall to unpack it into the appropriate Go call (even though it's actually reflectcall'ing a function generated by cgo). In this CL, we decouple this struct from the Go stack layout. Instead, cgo generates a Go function that takes the struct, unpacks it, and calls the exported function. Since this generated function has a generic signature (like the rest of the call path), we don't need reflectcall and can instead depend on the Go compiler itself to implement the call to the exported Go function. One complication is that syscall.NewCallback on Windows, which converts a Go function into a C function pointer, depends on cgocallback's current dynamic calling approach since the signatures of the callbacks aren't known statically. For this specific case, we continue to depend on reflectcall. Really, the current approach makes some overly simplistic assumptions about translating the C ABI to the Go ABI. Now we're at least in a much better position to do a proper ABI translation. For comparison, the current cgo call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> _cgoexp_GoF (generated Go function) -> cgocallback (in asm_*.s) -> cgocallback_gofunc (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> reflectcall (in asm_*.s) -> _cgoexpwrap_GoF (generated Go function) -> p.GoF Now the call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> cgocallback (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> _cgoexp_GoF (generated Go function) -> p.GoF Notably: 1. We combine _cgoexp_GoF and _cgoexpwrap_GoF and move the combined operation to the end of the sequence. This combined function also handles reflectcall's previous role. 2. We combined cgocallback and cgocallback_gofunc since the only purpose of having both was to convert a raw PC into a Go function value. We instead construct the Go function value in cgocallbackg1. 3. cgocallbackg1 no longer reaches backwards through the stack to get the arguments to cgocallback_gofunc. Instead, we just pass the arguments down. 4. Currently, we need an explicit msanwrite to mark the results struct as written because reflectcall doesn't do this. Now, the results are written by regular Go assignments, so the Go compiler generates the necessary MSAN annotations. This also means we no longer need to track the size of the arguments frame. Updates #40724, since now we don't need to teach cgo about the register ABI or change how it uses reflectcall. Change-Id: I7840489a2597962aeb670e0c1798a16a7359c94f Reviewed-on: https://go-review.googlesource.com/c/go/+/258938 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/syscall_windows.go | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'src/runtime/syscall_windows.go') diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 0e2fcfb02d..ff43e7cbed 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/sys" "unsafe" ) @@ -22,10 +23,7 @@ func (c *wincallbackcontext) setCleanstack(cleanstack bool) { c.cleanstack = cleanstack } -var ( - cbs callbacks - cbctxts **wincallbackcontext = &cbs.ctxt[0] // to simplify access to cbs.ctxt in sys_windows_*.s -) +var cbs callbacks func callbackasm() @@ -53,6 +51,8 @@ func callbackasmAddr(i int) uintptr { return funcPC(callbackasm) + uintptr(i*entrySize) } +const callbackMaxArgs = 64 + //go:linkname compileCallback syscall.compileCallback func compileCallback(fn eface, cleanstack bool) (code uintptr) { if fn._type == nil || (fn._type.kind&kindMask) != kindFunc { @@ -66,6 +66,9 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { if ft.out()[0].size != uintptrSize { panic("compileCallback: expected function with one uintptr-sized result") } + if len(ft.in()) > callbackMaxArgs { + panic("compileCallback: too many function arguments") + } argsize := uintptr(0) for _, t := range ft.in() { if t.size > uintptrSize { @@ -106,6 +109,37 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { return r } +type callbackArgs struct { + index uintptr + args *uintptr // Arguments in stdcall/cdecl convention, with registers spilled + // Below are out-args from callbackWrap + result uintptr + retPop uintptr // For 386 cdecl, how many bytes to pop on return +} + +// callbackWrap is called by callbackasm to invoke a registered C callback. +func callbackWrap(a *callbackArgs) { + c := cbs.ctxt[a.index] + a.retPop = c.restorestack + + // Convert from stdcall to Go ABI. We assume the stack layout + // is the same, and we just need to make room for the result. + // + // TODO: This isn't a good assumption. For example, a function + // that takes two uint16 arguments will be laid out + // differently by the stdcall and Go ABIs. We should implement + // proper ABI conversion. + var frame [callbackMaxArgs + 1]uintptr + memmove(unsafe.Pointer(&frame), unsafe.Pointer(a.args), c.argsize) + + // Even though this is copying back results, we can pass a nil + // type because those results must not require write barriers. + reflectcall(nil, c.gobody, noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + + // Extract the result. + a.result = frame[c.argsize/sys.PtrSize] +} + const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 // When available, this function will use LoadLibraryEx with the filename -- cgit v1.3 From bda37a0b8a4e89318901a68492b79cf6531fa2ff Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 3 Oct 2020 19:52:08 -0400 Subject: runtime: tidy compileCallback This makes a few minor cleanups and simplifications to compileCallback. Change-Id: Ibebf4b5ed66fb68bba7c84129c127cd4d8a691fe Reviewed-on: https://go-review.googlesource.com/c/go/+/263269 Trust: Austin Clements Trust: Alex Brainman Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Alex Brainman --- src/runtime/runtime2.go | 8 ---- src/runtime/syscall_windows.go | 86 ++++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 41 deletions(-) (limited to 'src/runtime/syscall_windows.go') diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 21dd7b3949..7bac5fd38d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -387,14 +387,6 @@ type libcall struct { err uintptr // error number } -// describes how to handle callback -type wincallbackcontext struct { - gobody unsafe.Pointer // go function to call - argsize uintptr // callback arguments size (in bytes) - restorestack uintptr // adjust stack on return by (in bytes) (386 only) - cleanstack bool -} - // Stack describes a Go execution stack. // The bounds of the stack are exactly [lo, hi), // with no implicit data structures on either side. diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index ff43e7cbed..3a34d9ddba 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -9,22 +9,26 @@ import ( "unsafe" ) -type callbacks struct { - lock mutex - ctxt [cb_max]*wincallbackcontext - n int +// cbs stores all registered Go callbacks. +var cbs struct { + lock mutex + ctxt [cb_max]winCallback + index map[winCallbackKey]int + n int } -func (c *wincallbackcontext) isCleanstack() bool { - return c.cleanstack +// winCallback records information about a registered Go callback. +type winCallback struct { + fn *funcval // Go function + argsize uintptr // Callback arguments size (in bytes) + cdecl bool // C function uses cdecl calling convention } -func (c *wincallbackcontext) setCleanstack(cleanstack bool) { - c.cleanstack = cleanstack +type winCallbackKey struct { + fn *funcval + cdecl bool } -var cbs callbacks - func callbackasm() // callbackasmAddr returns address of runtime.callbackasm @@ -53,8 +57,20 @@ func callbackasmAddr(i int) uintptr { const callbackMaxArgs = 64 +// compileCallback converts a Go function fn into a C function pointer +// that can be passed to Windows APIs. +// +// On 386, if cdecl is true, the returned C function will use the +// cdecl calling convention; otherwise, it will use stdcall. On amd64, +// it always uses fastcall. On arm, it always uses the ARM convention. +// //go:linkname compileCallback syscall.compileCallback -func compileCallback(fn eface, cleanstack bool) (code uintptr) { +func compileCallback(fn eface, cdecl bool) (code uintptr) { + if GOARCH != "386" { + // cdecl is only meaningful on 386. + cdecl = false + } + if fn._type == nil || (fn._type.kind&kindMask) != kindFunc { panic("compileCallback: expected function with one uintptr-sized result") } @@ -77,36 +93,32 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { argsize += uintptrSize } + key := winCallbackKey{(*funcval)(fn.data), cdecl} + lock(&cbs.lock) // We don't unlock this in a defer because this is used from the system stack. - n := cbs.n - for i := 0; i < n; i++ { - if cbs.ctxt[i].gobody == fn.data && cbs.ctxt[i].isCleanstack() == cleanstack { - r := callbackasmAddr(i) - unlock(&cbs.lock) - return r - } - } - if n >= cb_max { + // Check if this callback is already registered. + if n, ok := cbs.index[key]; ok { unlock(&cbs.lock) - throw("too many callback functions") + return callbackasmAddr(n) } - c := new(wincallbackcontext) - c.gobody = fn.data - c.argsize = argsize - c.setCleanstack(cleanstack) - if cleanstack && argsize != 0 { - c.restorestack = argsize - } else { - c.restorestack = 0 + // Register the callback. + if cbs.index == nil { + cbs.index = make(map[winCallbackKey]int) } + n := cbs.n + if n >= len(cbs.ctxt) { + unlock(&cbs.lock) + throw("too many callback functions") + } + c := winCallback{key.fn, argsize, cdecl} cbs.ctxt[n] = c + cbs.index[key] = n cbs.n++ - r := callbackasmAddr(n) unlock(&cbs.lock) - return r + return callbackasmAddr(n) } type callbackArgs struct { @@ -120,7 +132,15 @@ type callbackArgs struct { // callbackWrap is called by callbackasm to invoke a registered C callback. func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] - a.retPop = c.restorestack + if GOARCH == "386" { + if c.cdecl { + // In cdecl, the callee is responsible for + // popping its arguments. + a.retPop = c.argsize + } else { + a.retPop = 0 + } + } // Convert from stdcall to Go ABI. We assume the stack layout // is the same, and we just need to make room for the result. @@ -134,7 +154,7 @@ func callbackWrap(a *callbackArgs) { // Even though this is copying back results, we can pass a nil // type because those results must not require write barriers. - reflectcall(nil, c.gobody, noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + reflectcall(nil, unsafe.Pointer(c.fn), noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) // Extract the result. a.result = frame[c.argsize/sys.PtrSize] -- cgit v1.3 From 532c199ee56cdbc2cfd12da1c1cfb3359b122c7c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 17 Oct 2020 18:42:03 -0400 Subject: runtime: fix sub-uintptr-sized Windows callback arguments The Windows callback support accepts Go functions with arguments that are uintptr-sized or smaller. However, it doesn't implement smaller arguments correctly. It assumes the Windows arguments layout is equivalent to the Go argument layout. This is often true, but because Windows C ABIs pad arguments to word size, while Go packs arguments, the layout is different if there are multiple sub-word-size arguments in a row. For example, a function with two uint16 arguments will have a two-word C argument frame, but only a 4 byte Go argument frame. There are also subtleties surrounding floating-point register arguments that it doesn't handle correctly. To fix this, when constructing a callback, we examine the Go function's signature to construct a mapping between the C argument frame and the Go argument frame. When the callback is invoked, we use this mapping to build the Go argument frame and copy the result back. This adds several test cases to TestStdcallAndCDeclCallbacks that exercise more complex function signatures. These all fail with the current code, but work with this CL. In addition to fixing these callback types, this is also a step toward the Go register ABI (#40724), which is going to make the ABI translation more complex. Change-Id: I19fb1681b659d9fd528ffd5e88912bebb95da052 Reviewed-on: https://go-review.googlesource.com/c/go/+/263271 Trust: Austin Clements Trust: Alex Brainman Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Reviewed-by: Alex Brainman --- src/runtime/syscall_windows.go | 143 +++++++++++++++++++++++++++--------- src/runtime/syscall_windows_test.go | 32 +++++++- 2 files changed, 136 insertions(+), 39 deletions(-) (limited to 'src/runtime/syscall_windows.go') diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 3a34d9ddba..21f2452b5a 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -19,9 +19,32 @@ var cbs struct { // winCallback records information about a registered Go callback. type winCallback struct { - fn *funcval // Go function - argsize uintptr // Callback arguments size (in bytes) - cdecl bool // C function uses cdecl calling convention + fn *funcval // Go function + retPop uintptr // For 386 cdecl, how many bytes to pop on return + + // abiMap specifies how to translate from a C frame to a Go + // frame. This does not specify how to translate back because + // the result is always a uintptr. If the C ABI is fastcall, + // this assumes the four fastcall registers were first spilled + // to the shadow space. + abiMap []abiPart + // retOffset is the offset of the uintptr-sized result in the Go + // frame. + retOffset uintptr +} + +// abiPart encodes a step in translating between calling ABIs. +type abiPart struct { + src, dst uintptr + len uintptr +} + +func (a *abiPart) tryMerge(b abiPart) bool { + if a.src+a.len == b.src && a.dst+a.len == b.dst { + a.len += b.len + return true + } + return false } type winCallbackKey struct { @@ -55,7 +78,7 @@ func callbackasmAddr(i int) uintptr { return funcPC(callbackasm) + uintptr(i*entrySize) } -const callbackMaxArgs = 64 +const callbackMaxFrame = 64 * sys.PtrSize // compileCallback converts a Go function fn into a C function pointer // that can be passed to Windows APIs. @@ -75,22 +98,81 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { panic("compileCallback: expected function with one uintptr-sized result") } ft := (*functype)(unsafe.Pointer(fn._type)) + + // Check arguments and construct ABI translation. + var abiMap []abiPart + var src, dst uintptr + for _, t := range ft.in() { + if t.size > sys.PtrSize { + // We don't support this right now. In + // stdcall/cdecl, 64-bit ints and doubles are + // passed as two words (little endian); and + // structs are pushed on the stack. In + // fastcall, arguments larger than the word + // size are passed by reference. + panic("compileCallback: argument size is larger than uintptr") + } + if k := t.kind & kindMask; GOARCH == "amd64" && (k == kindFloat32 || k == kindFloat64) { + // In fastcall, floating-point arguments in + // the first four positions are passed in + // floating-point registers, which we don't + // currently spill. + panic("compileCallback: float arguments not supported") + } + + // The Go ABI aligns arguments. + dst = alignUp(dst, uintptr(t.align)) + // In the C ABI, we're already on a word boundary. + // Also, sub-word-sized fastcall register arguments + // are stored to the least-significant bytes of the + // argument word and all supported Windows + // architectures are little endian, so src is already + // pointing to the right place for smaller arguments. + + // Copy just the size of the argument. Note that this + // could be a small by-value struct, but C and Go + // struct layouts are compatible, so we can copy these + // directly, too. + part := abiPart{src, dst, t.size} + // Add this step to the adapter. + if len(abiMap) == 0 || !abiMap[len(abiMap)-1].tryMerge(part) { + abiMap = append(abiMap, part) + } + + // cdecl, stdcall, and fastcall pad arguments to word size. + src += sys.PtrSize + // The Go ABI packs arguments. + dst += t.size + } + // The Go ABI aligns the result to the word size. src is + // already aligned. + dst = alignUp(dst, sys.PtrSize) + retOffset := dst + if len(ft.out()) != 1 { panic("compileCallback: expected function with one uintptr-sized result") } - uintptrSize := unsafe.Sizeof(uintptr(0)) - if ft.out()[0].size != uintptrSize { + if ft.out()[0].size != sys.PtrSize { panic("compileCallback: expected function with one uintptr-sized result") } - if len(ft.in()) > callbackMaxArgs { - panic("compileCallback: too many function arguments") + if k := ft.out()[0].kind & kindMask; k == kindFloat32 || k == kindFloat64 { + // In cdecl and stdcall, float results are returned in + // ST(0). In fastcall, they're returned in XMM0. + // Either way, it's not AX. + panic("compileCallback: float results not supported") } - argsize := uintptr(0) - for _, t := range ft.in() { - if t.size > uintptrSize { - panic("compileCallback: argument size is larger than uintptr") - } - argsize += uintptrSize + // Make room for the uintptr-sized result. + dst += sys.PtrSize + + if dst > callbackMaxFrame { + panic("compileCallback: function argument frame too large") + } + + // For cdecl, the callee is responsible for popping its + // arguments from the C stack. + var retPop uintptr + if cdecl { + retPop = src } key := winCallbackKey{(*funcval)(fn.data), cdecl} @@ -112,7 +194,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { unlock(&cbs.lock) throw("too many callback functions") } - c := winCallback{key.fn, argsize, cdecl} + c := winCallback{key.fn, retPop, abiMap, retOffset} cbs.ctxt[n] = c cbs.index[key] = n cbs.n++ @@ -123,7 +205,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { type callbackArgs struct { index uintptr - args *uintptr // Arguments in stdcall/cdecl convention, with registers spilled + args unsafe.Pointer // Arguments in stdcall/cdecl convention, with registers spilled // Below are out-args from callbackWrap result uintptr retPop uintptr // For 386 cdecl, how many bytes to pop on return @@ -132,32 +214,21 @@ type callbackArgs struct { // callbackWrap is called by callbackasm to invoke a registered C callback. func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] - if GOARCH == "386" { - if c.cdecl { - // In cdecl, the callee is responsible for - // popping its arguments. - a.retPop = c.argsize - } else { - a.retPop = 0 - } - } + a.retPop = c.retPop - // Convert from stdcall to Go ABI. We assume the stack layout - // is the same, and we just need to make room for the result. - // - // TODO: This isn't a good assumption. For example, a function - // that takes two uint16 arguments will be laid out - // differently by the stdcall and Go ABIs. We should implement - // proper ABI conversion. - var frame [callbackMaxArgs + 1]uintptr - memmove(unsafe.Pointer(&frame), unsafe.Pointer(a.args), c.argsize) + // Convert from stdcall to Go ABI. + var frame [callbackMaxFrame]byte + goArgs := unsafe.Pointer(&frame) + for _, part := range c.abiMap { + memmove(add(goArgs, part.dst), add(a.args, part.src), part.len) + } // Even though this is copying back results, we can pass a nil // type because those results must not require write barriers. - reflectcall(nil, unsafe.Pointer(c.fn), noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.retOffset)+sys.PtrSize, uint32(c.retOffset)) // Extract the result. - a.result = frame[c.argsize/sys.PtrSize] + a.result = *(*uintptr)(unsafe.Pointer(&frame[c.retOffset])) } const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go index cb942beb3e..7705d2a017 100644 --- a/src/runtime/syscall_windows_test.go +++ b/src/runtime/syscall_windows_test.go @@ -317,9 +317,13 @@ func (f cbFunc) cSrc(w io.Writer, cdecl bool) { cArgs := make([]string, t.NumIn()) for i := range cTypes { // We included stdint.h, so this works for all sized - // integer types. + // integer types, and uint8Pair_t. cTypes[i] = t.In(i).Name() + "_t" - cArgs[i] = fmt.Sprintf("%d", i+1) + if t.In(i).Name() == "uint8Pair" { + cArgs[i] = fmt.Sprintf("(uint8Pair_t){%d,1}", i) + } else { + cArgs[i] = fmt.Sprintf("%d", i+1) + } } fmt.Fprintf(w, ` typedef uintptr_t %s (*%s)(%s); @@ -341,6 +345,8 @@ func (f cbFunc) testOne(t *testing.T, dll *syscall.DLL, cdecl bool, cb uintptr) } } +type uint8Pair struct{ x, y uint8 } + var cbFuncs = []cbFunc{ {func(i1, i2 uintptr) uintptr { return i1 + i2 @@ -366,6 +372,23 @@ var cbFuncs = []cbFunc{ {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr { return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 }}, + + // Non-uintptr parameters. + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint16) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 int8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1 int8, i2 int16, i3 int32, i4, i5 uintptr) uintptr { + return uintptr(i1) + uintptr(i2) + uintptr(i3) + i4 + i5 + }}, + {func(i1, i2, i3, i4, i5 uint8Pair) uintptr { + return uintptr(i1.x + i1.y + i2.x + i2.y + i3.x + i3.y + i4.x + i4.y + i5.x + i5.y) + }}, } type cbDLL struct { @@ -380,7 +403,10 @@ func (d *cbDLL) makeSrc(t *testing.T, path string) { } defer f.Close() - fmt.Fprintf(f, "#include \n\n") + fmt.Fprint(f, ` +#include +typedef struct { uint8_t x, y; } uint8Pair_t; +`) for _, cbf := range cbFuncs { cbf.cSrc(f, false) cbf.cSrc(f, true) -- cgit v1.3 From 4d048194cd0323e1deffce96e88e8a672a08732d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Nov 2020 21:38:57 +0100 Subject: runtime: support new callbackasm1 calling convention on windows/arm This updates the callbacks implementation on windows/arm for the changes made in CL 258938. At the time, that was left as a TODO. At the same time, it also extends the previous support for only 4 arguments to also support additional arguments on the stack. This is required for functions like SetWinEventHook, which take 7 arguments. It does this by pushing r0-r3 onto the stack before the normal prologue, and then pointing the args struct to that location. This is derived from CL 270077 and CL 270078. Updates #40724. Fixes #42591. Change-Id: Icc199e7f2c24205e41be4e00015283c7e2a9b797 Reviewed-on: https://go-review.googlesource.com/c/go/+/271178 Run-TryBot: Jason A. Donenfeld Trust: Jason A. Donenfeld Reviewed-by: Austin Clements --- src/runtime/sys_windows_arm.s | 72 +++++++++++++++++++----------------------- src/runtime/syscall_windows.go | 29 +++++++++++++---- 2 files changed, 56 insertions(+), 45 deletions(-) (limited to 'src/runtime/syscall_windows.go') diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 3fc6d27cb0..fe267080cc 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -314,48 +314,42 @@ TEXT runtime·externalthreadhandler(SB),NOSPLIT|NOFRAME,$0 GLOBL runtime·cbctxts(SB), NOPTR, $4 TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0 - // TODO(austin): This needs to be converted to match changes - // in cgocallback, but I have no way to test. See CL 258938, - // and callbackasm1 on amd64 and 386. - MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} - SUB $36, R13 // space for locals - - // save callback arguments to stack. We currently support up to 4 arguments - ADD $16, R13, R4 - MOVM.IA [R0-R3], (R4) - - // load cbctxts[i]. The trampoline in zcallback_windows.s puts the callback - // index in R12 - MOVW runtime·cbctxts(SB), R4 - MOVW R12<<2(R4), R4 // R4 holds pointer to wincallbackcontext structure - - // extract callback context - MOVW wincallbackcontext_argsize(R4), R5 - MOVW wincallbackcontext_gobody(R4), R4 - - // we currently support up to 4 arguments - CMP $(4 * 4), R5 - BL.GT runtime·abort(SB) - - // extend argsize by size of return value - ADD $4, R5 - - // Build 'type args struct' - MOVW R4, 4(R13) // fn - ADD $16, R13, R0 // arg (points to r0-r3, ret on stack) - MOVW R0, 8(R13) - MOVW R5, 12(R13) // argsize + // On entry, the trampoline in zcallback_windows_arm.s left + // the callback index in R12 (which is volatile in the C ABI). + + // Push callback register arguments r0-r3. We do this first so + // they're contiguous with stack arguments. + MOVM.DB.W [R0-R3], (R13) + // Push C callee-save registers r4-r11 and lr. + MOVM.DB.W [R4-R11, R14], (R13) + SUB $(16 + callbackArgs__size), R13 // space for locals + + // Create a struct callbackArgs on our stack. + MOVW R12, (16+callbackArgs_index)(R13) // callback index + MOVW $(16+callbackArgs__size+4*9)(R13), R0 + MOVW R0, (16+callbackArgs_args)(R13) // address of args vector + MOVW $0, R0 + MOVW R0, (16+callbackArgs_result)(R13) // result + // Prepare for entry to Go. BL runtime·load_g(SB) - BL runtime·cgocallback_gofunc(SB) - ADD $16, R13, R0 // load arg - MOVW 12(R13), R1 // load argsize - SUB $4, R1 // offset to return value - MOVW R1<<0(R0), R0 // load return value - - ADD $36, R13 // free locals - MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc} + // Call cgocallback, which will call callbackWrap(frame). + MOVW $0, R0 + MOVW R0, 12(R13) // context + MOVW $16(R13), R1 // R1 = &callbackArgs{...} + MOVW R1, 8(R13) // frame (address of callbackArgs) + MOVW $·callbackWrap(SB), R1 + MOVW R1, 4(R13) // PC of function to call + BL runtime·cgocallback(SB) + + // Get callback result. + MOVW (16+callbackArgs_result)(R13), R0 + + ADD $(16 + callbackArgs__size), R13 // free locals + MOVM.IA.W (R13), [R4-R11, R12] // pop {r4-r11, lr=>r12} + ADD $(4*4), R13 // skip r0-r3 + B (R12) // return // uint32 tstart_stdcall(M *newm); TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0 diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 21f2452b5a..7835b492f7 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -109,14 +109,19 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // passed as two words (little endian); and // structs are pushed on the stack. In // fastcall, arguments larger than the word - // size are passed by reference. + // size are passed by reference. On arm, + // 8-byte aligned arguments round up to the + // next even register and can be split across + // registers and the stack. panic("compileCallback: argument size is larger than uintptr") } - if k := t.kind & kindMask; GOARCH == "amd64" && (k == kindFloat32 || k == kindFloat64) { + if k := t.kind & kindMask; (GOARCH == "amd64" || GOARCH == "arm") && (k == kindFloat32 || k == kindFloat64) { // In fastcall, floating-point arguments in // the first four positions are passed in // floating-point registers, which we don't - // currently spill. + // currently spill. arm passes floating-point + // arguments in VFP registers, which we also + // don't support. panic("compileCallback: float arguments not supported") } @@ -128,6 +133,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // argument word and all supported Windows // architectures are little endian, so src is already // pointing to the right place for smaller arguments. + // The same is true on arm. // Copy just the size of the argument. Note that this // could be a small by-value struct, but C and Go @@ -139,7 +145,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { abiMap = append(abiMap, part) } - // cdecl, stdcall, and fastcall pad arguments to word size. + // cdecl, stdcall, fastcall, and arm pad arguments to word size. src += sys.PtrSize // The Go ABI packs arguments. dst += t.size @@ -205,7 +211,18 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { type callbackArgs struct { index uintptr - args unsafe.Pointer // Arguments in stdcall/cdecl convention, with registers spilled + // args points to the argument block. + // + // For cdecl and stdcall, all arguments are on the stack. + // + // For fastcall, the trampoline spills register arguments to + // the reserved spill slots below the stack arguments, + // resulting in a layout equivalent to stdcall. + // + // For arm, the trampoline stores the register arguments just + // below the stack arguments, so again we can treat it as one + // big stack arguments frame. + args unsafe.Pointer // Below are out-args from callbackWrap result uintptr retPop uintptr // For 386 cdecl, how many bytes to pop on return @@ -216,7 +233,7 @@ func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] a.retPop = c.retPop - // Convert from stdcall to Go ABI. + // Convert from C to Go ABI. var frame [callbackMaxFrame]byte goArgs := unsafe.Pointer(&frame) for _, part := range c.abiMap { -- cgit v1.3