diff options
| author | Cherry Mui <cherryyz@google.com> | 2025-09-03 11:19:43 -0400 |
|---|---|---|
| committer | Cherry Mui <cherryyz@google.com> | 2025-09-03 11:19:43 -0400 |
| commit | 7c8b9115bc578a1f6577d2239fd5e9d8db100531 (patch) | |
| tree | 2acffa10f5d90ab07d62eb4cd0670a979bf1062a /src/runtime | |
| parent | 91253515831d1d51f9a998a743309c94e1fc4e1e (diff) | |
| parent | 4c4cefc19a16924f3aa7135d3fdc6d1687fe26c7 (diff) | |
| download | go-7c8b9115bc578a1f6577d2239fd5e9d8db100531.tar.xz | |
[dev.simd] all: merge master (4c4cefc) into dev.simd
Merge List:
+ 2025-09-03 4c4cefc19a cmd/gofmt: simplify logic to process arguments
+ 2025-09-03 925a3cdcd1 unicode/utf8: make DecodeRune{,InString} inlineable
+ 2025-09-03 3e596d448f math: rename Modf parameter int to integer
+ 2025-09-02 2a7f1d47b0 runtime: use one more address bit for tagged pointers
+ 2025-09-02 b09068041a cmd/dist: run racebench tests only in longtest mode
+ 2025-09-02 355370ac52 runtime: add comment for concatstring2
+ 2025-09-02 1eec830f54 go/doc: linkify interface methods
+ 2025-08-31 7bba745820 cmd/compile: use generated loops instead of DUFFZERO on loong64
+ 2025-08-31 882335e2cb cmd/internal/obj/loong64: add LDPTR.{W/D} and STPTR.{W/D} instructions support
+ 2025-08-31 d4b17f5869 internal/runtime/atomic: reset wrong jump target in Cas{,64} on loong64
+ 2025-08-31 6a08e80399 net/http: skip redirecting in ServeMux when URL path for CONNECT is empty
+ 2025-08-29 8bcda6c79d runtime/race: add race detector support for linux/riscv64
+ 2025-08-29 8377adafc5 cmd/cgo: split loadDWARF into two parts
+ 2025-08-29 a7d9d5a80a cmd/cgo: move typedefs and typedefList out of Package
+ 2025-08-29 1d459c4357 all: delete more windows/arm remnants
+ 2025-08-29 27ce6e4e26 cmd/compile: remove sign extension before MULW on riscv64
+ 2025-08-29 84b070bfb1 cmd/compile/internal/ssa: make oneBit function generic
+ 2025-08-29 fe42628dae internal/cpu: inline DebugOptions
+ 2025-08-29 94b7d519bd net: update document on limitation of iprawsock on Windows
+ 2025-08-29 ba9e1ddccf testing: allow specify temp dir by GOTMPDIR environment variable
+ 2025-08-29 9f6936b8da cmd/link: disallow linkname of runtime.addmoduledata
+ 2025-08-29 89d41d254a bytes, strings: speed up TrimSpace
+ 2025-08-29 38204e0872 testing/synctest: call out common issues with tests
+ 2025-08-29 252c901125 os,syscall: pass file flags to CreateFile on Windows
+ 2025-08-29 53515fb0a9 crypto/tls: use hash.Cloner
+ 2025-08-28 13bb48e6fb go/constant: fix complex != unknown comparison
+ 2025-08-28 ba1109feb5 net: remove redundant cgoLookupCNAME return parameter
+ 2025-08-28 f74ed44ed9 net/http/httputil: remove redundant pw.Close() call in DumpRequestOut
+ 2025-08-28 a9689d2e0b time: skip TestLongAdjustTimers in short mode on single CPU systems
+ 2025-08-28 ebc763f76d syscall: only get parent PID if SysProcAttr.Pdeathsig is set
+ 2025-08-28 7f1864b0a8 strings: remove redundant "runs" from string.Fields docstring
+ 2025-08-28 90c21fa5b6 net/textproto: eliminate some bounds checks
+ 2025-08-27 e47d88beae os: return nil slice when ReadDir is used with a file on file_windows
+ 2025-08-27 6b837a64db cmd/internal/obj/loong64: simplify buildop
+ 2025-08-27 765905e3bd debug/elf: don't panic if symtab too small
+ 2025-08-27 2ee4b31242 net/http: Ensure that CONNECT proxied requests respect MaxResponseHeaderBytes
+ 2025-08-27 b21867b1a2 net/http: require exact match for CrossSiteProtection bypass patterns
+ 2025-08-27 d19e377f6e cmd/cgo: make it safe to run gcc in parallel
+ 2025-08-27 49a2f3ed87 net: allow zero value destination address in WriteMsgUDPAddrPort
+ 2025-08-26 afc51ed007 internall/poll: remove bufs field from Windows' poll.operation
+ 2025-08-26 801b74eb95 internal/poll: remove rsa field from Windows' poll.operation
+ 2025-08-26 fa18c547cd syscall: sort Windows env block in StartProcess
+ 2025-08-26 bfd130db02 internal/poll: don't use stack-allocated WSAMsg parameters
+ 2025-08-26 dae9e456ae runtime: identify virtual memory layout for riscv64
+ 2025-08-25 25c2d4109f math: use Trunc to implement Modf
+ 2025-08-25 4e05a070c4 math: implement IsInf using Abs
+ 2025-08-25 1eed4f32a0 math: optimize Signbit implementation slightly
+ 2025-08-25 bd71b94659 cmd/compile/internal: optimizing add+sll rule using ALSLV instruction on loong64
+ 2025-08-25 ea55ca3600 runtime: skip doInit of plugins in runtime.main
+ 2025-08-25 9ae2f1fb57 internal/trace: skip async preempt off tests on low end systems
+ 2025-08-25 bbd5342a62 net: fix cgoResSearch
+ 2025-08-25 ed7f804775 os: set full name for Roots created with Root.OpenRoot
+ 2025-08-25 a21249436b internal/poll: use fdMutex to provide read/write locking on Windows
+ 2025-08-24 44c5956bf7 test/codegen: add Mul2 and DivPow2 test for loong64
+ 2025-08-24 0aa8019e94 test/codegen: add Mul* test for loong64
+ 2025-08-24 83420974b7 test/codegen: add sqrt* abs and copysign test for loong64
+ 2025-08-23 f2db0dca0b net/http/httptest: redirect example.com requests to server
+ 2025-08-22 d86ec92499 internal/syscall/windows: increase internal Windows O_ flags values
+ 2025-08-22 9d3f7fda70 crypto/tls: fix quic comment typo
+ 2025-08-22 78a05c541f internal/poll: don't pass non-nil WSAMsg.Name with 0 namelen on windows
+ 2025-08-22 52c3f73fda runtime/metrics: improve doc
+ 2025-08-22 a076f49757 os: fix Root.MkdirAll to handle race of directory creation
+ 2025-08-22 98238fd495 all: delete remaining windows/arm code
+ 2025-08-21 1ad30844d9 cmd/asm: process forward jump to PCALIGN
+ 2025-08-21 13c082601d internal/poll: permit nil destination address in WriteMsg{Inet4,Inet6}
+ 2025-08-21 9b0a507735 runtime: remove remaining windows/arm files and comments
+ 2025-08-21 1843f1e9c0 cmd/compile: use zero register instead of specialized *zero instructions on loong64
+ 2025-08-21 e0870a0a12 cmd/compile: simplify zerorange on loong64
+ 2025-08-21 fb8bbe46d5 cmd/compile/internal/ssa: eliminate unnecessary extension operations
+ 2025-08-21 9632ba8160 cmd/compile: optimize some patterns into revb2h/revb4h instruction on loong64
+ 2025-08-21 8dcab6f450 syscall: simplify execve handling on libc platforms
+ 2025-08-21 ba840c1bf9 cmd/compile: deduplication in the source code generated by mknode
+ 2025-08-21 fa706ea50f cmd/compile: optimize rule (x + x) << c to x << c+1 on loong64
+ 2025-08-21 ffc85ee1f1 cmd/internal/objabi,cmd/link: add support for additional riscv64 relocations
Change-Id: I3896f74b1a3cc0a52b29ca48767bb0ba84620f71
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/defs_windows_arm.go | 123 | ||||
| -rw-r--r-- | src/runtime/malloc.go | 21 | ||||
| -rw-r--r-- | src/runtime/memclr_arm.s | 1 | ||||
| -rw-r--r-- | src/runtime/metrics/description.go | 2 | ||||
| -rw-r--r-- | src/runtime/metrics/doc.go | 8 | ||||
| -rw-r--r-- | src/runtime/os_windows_arm.go | 22 | ||||
| -rw-r--r-- | src/runtime/proc.go | 10 | ||||
| -rw-r--r-- | src/runtime/race/README | 1 | ||||
| -rw-r--r-- | src/runtime/race/race.go | 2 | ||||
| -rw-r--r-- | src/runtime/race/race_linux_riscv64.syso | bin | 0 -> 1423920 bytes | |||
| -rw-r--r-- | src/runtime/race_riscv64.s | 551 | ||||
| -rw-r--r-- | src/runtime/signal_windows_test.go | 11 | ||||
| -rw-r--r-- | src/runtime/string.go | 6 | ||||
| -rw-r--r-- | src/runtime/sys_windows_arm.s | 218 | ||||
| -rw-r--r-- | src/runtime/syscall_windows.go | 21 | ||||
| -rw-r--r-- | src/runtime/tagptr_64bit.go | 9 |
16 files changed, 607 insertions, 399 deletions
diff --git a/src/runtime/defs_windows_arm.go b/src/runtime/defs_windows_arm.go deleted file mode 100644 index 6416086f9f..0000000000 --- a/src/runtime/defs_windows_arm.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package runtime - -import ( - "internal/goarch" - "unsafe" -) - -// NOTE(rsc): _CONTEXT_CONTROL is actually 0x200001 and should include PC, SP, and LR. -// However, empirically, LR doesn't come along on Windows 10 -// unless you also set _CONTEXT_INTEGER (0x200002). -// Without LR, we skip over the next-to-bottom function in profiles -// when the bottom function is frameless. -// So we set both here, to make a working _CONTEXT_CONTROL. -const _CONTEXT_CONTROL = 0x200003 - -type neon128 struct { - low uint64 - high int64 -} - -type context struct { - contextflags uint32 - r0 uint32 - r1 uint32 - r2 uint32 - r3 uint32 - r4 uint32 - r5 uint32 - r6 uint32 - r7 uint32 - r8 uint32 - r9 uint32 - r10 uint32 - r11 uint32 - r12 uint32 - - spr uint32 - lrr uint32 - pc uint32 - cpsr uint32 - - fpscr uint32 - padding uint32 - - floatNeon [16]neon128 - - bvr [8]uint32 - bcr [8]uint32 - wvr [1]uint32 - wcr [1]uint32 - padding2 [2]uint32 -} - -func (c *context) ip() uintptr { return uintptr(c.pc) } -func (c *context) sp() uintptr { return uintptr(c.spr) } -func (c *context) lr() uintptr { return uintptr(c.lrr) } - -func (c *context) set_ip(x uintptr) { c.pc = uint32(x) } -func (c *context) set_sp(x uintptr) { c.spr = uint32(x) } -func (c *context) set_lr(x uintptr) { c.lrr = uint32(x) } - -// arm does not have frame pointer register. -func (c *context) set_fp(x uintptr) {} - -func (c *context) pushCall(targetPC, resumePC uintptr) { - // Push LR. The injected call is responsible - // for restoring LR. gentraceback is aware of - // this extra slot. See sigctxt.pushCall in - // signal_arm.go. - sp := c.sp() - goarch.StackAlign - c.set_sp(sp) - *(*uint32)(unsafe.Pointer(sp)) = uint32(c.lr()) - c.set_lr(resumePC) - c.set_ip(targetPC) -} - -func prepareContextForSigResume(c *context) { - c.r0 = c.spr - c.r1 = c.pc -} - -func dumpregs(r *context) { - print("r0 ", hex(r.r0), "\n") - print("r1 ", hex(r.r1), "\n") - print("r2 ", hex(r.r2), "\n") - print("r3 ", hex(r.r3), "\n") - print("r4 ", hex(r.r4), "\n") - print("r5 ", hex(r.r5), "\n") - print("r6 ", hex(r.r6), "\n") - print("r7 ", hex(r.r7), "\n") - print("r8 ", hex(r.r8), "\n") - print("r9 ", hex(r.r9), "\n") - print("r10 ", hex(r.r10), "\n") - print("r11 ", hex(r.r11), "\n") - print("r12 ", hex(r.r12), "\n") - print("sp ", hex(r.spr), "\n") - print("lr ", hex(r.lrr), "\n") - print("pc ", hex(r.pc), "\n") - print("cpsr ", hex(r.cpsr), "\n") -} - -func stackcheck() { - // TODO: not implemented on ARM -} - -type _DISPATCHER_CONTEXT struct { - controlPc uint32 - imageBase uint32 - functionEntry uintptr - establisherFrame uint32 - targetIp uint32 - context *context - languageHandler uintptr - handlerData uintptr -} - -func (c *_DISPATCHER_CONTEXT) ctx() *context { - return c.context -} diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index d21b2c49b5..ec5f0765ba 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -580,9 +580,28 @@ func mallocinit() { randHeapBasePrefix = byte(randHeapBase >> (randHeapAddrBits - 8)) } + var vmaSize int + if GOARCH == "riscv64" { + // Identify which memory layout is in use based on the system + // stack address, knowing that the bottom half of virtual memory + // is user space. This should result in 39, 48 or 57. It may be + // possible to use RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS at some + // point in the future - for now use the system stack address. + vmaSize = sys.Len64(uint64(getg().m.g0.stack.hi)) + 1 + if raceenabled && vmaSize != 39 && vmaSize != 48 { + println("vma size = ", vmaSize) + throw("riscv64 vma size is unknown and race mode is enabled") + } + } + for i := 0x7f; i >= 0; i-- { var p uintptr switch { + case raceenabled && GOARCH == "riscv64" && vmaSize == 39: + p = uintptr(i)<<28 | uintptrMask&(0x0013<<28) + if p >= uintptrMask&0x000f00000000 { + continue + } case raceenabled: // The TSAN runtime requires the heap // to be in the range [0x00c000000000, @@ -598,6 +617,8 @@ func mallocinit() { p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) case GOARCH == "arm64": p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) + case GOARCH == "riscv64" && vmaSize == 39: + p = uintptr(i)<<32 | uintptrMask&(0x0013<<28) case GOOS == "aix": if i == 0 { // We don't use addresses directly after 0x0A00000000000000 diff --git a/src/runtime/memclr_arm.s b/src/runtime/memclr_arm.s index f02d058ead..f113a1aa2d 100644 --- a/src/runtime/memclr_arm.s +++ b/src/runtime/memclr_arm.s @@ -33,7 +33,6 @@ // See memclrNoHeapPointers Go doc for important implementation constraints. // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) -// Also called from assembly in sys_windows_arm.s without g (but using Go stack convention). TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8 MOVW ptr+0(FP), TO MOVW n+4(FP), N diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index c8c5bf9888..b9a6ab5fea 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -126,7 +126,7 @@ var allDesc = []Description{ { Name: "/cpu/classes/scavenge/assist:cpu-seconds", Description: "Estimated total CPU time spent returning unused memory to the " + - "underlying platform in response eagerly in response to memory pressure. " + + "underlying platform in response eagerly to memory pressure. " + "This metric is an overestimate, and not directly comparable to " + "system CPU time measurements. Compare only with other /cpu/classes " + "metrics.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 00ce60dde1..e40ce25ff9 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -104,10 +104,10 @@ Below is the full list of supported metrics, ordered lexicographically. /cpu/classes/scavenge/assist:cpu-seconds Estimated total CPU time spent returning unused memory to the - underlying platform in response eagerly in response to memory - pressure. This metric is an overestimate, and not directly - comparable to system CPU time measurements. Compare only with - other /cpu/classes metrics. + underlying platform in response eagerly to memory pressure. This + metric is an overestimate, and not directly comparable to system + CPU time measurements. Compare only with other /cpu/classes + metrics. /cpu/classes/scavenge/background:cpu-seconds Estimated total CPU time spent performing background tasks to diff --git a/src/runtime/os_windows_arm.go b/src/runtime/os_windows_arm.go deleted file mode 100644 index bc29843241..0000000000 --- a/src/runtime/os_windows_arm.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package runtime - -import "unsafe" - -//go:nosplit -func cputicks() int64 { - var counter int64 - stdcall(_QueryPerformanceCounter, uintptr(unsafe.Pointer(&counter))) - return counter -} - -func checkgoarm() { - if goarm < 7 { - print("Need atomic synchronization instructions, coprocessor ", - "access instructions. Recompile using GOARM=7.\n") - exit(1) - } -} diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 1d597d59c2..fd47976591 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -252,8 +252,12 @@ func main() { // by package plugin). Run through the modules in dependency // order (the order they are initialized by the dynamic // loader, i.e. they are added to the moduledata linked list). - for m := &firstmoduledata; m != nil; m = m.next { + last := lastmoduledatap // grab before loop starts. Any added modules after this point will do their own doInit calls. + for m := &firstmoduledata; true; m = m.next { doInit(m.inittasks) + if m == last { + break + } } // Disable init tracing after main init done to avoid overhead @@ -756,10 +760,6 @@ const ( // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete // value of the GODEBUG environment variable. func cpuinit(env string) { - switch GOOS { - case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": - cpu.DebugOptions = true - } cpu.Initialize(env) // Support cpu feature variables are used in code generated by the compiler diff --git a/src/runtime/race/README b/src/runtime/race/README index def7bfec86..a65d463e10 100644 --- a/src/runtime/race/README +++ b/src/runtime/race/README @@ -15,4 +15,5 @@ race_darwin_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 race_linux_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. race_linux_loong64.syso built with LLVM 83fe85115da9dc25fa270d2ea8140113c8d49670 and Go 037112464b4439571b45536de9ebe4bc9e10ecb7. race_linux_ppc64le.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. +race_linux_riscv64.syso built with LLVM c3c24be13f7928460ca1e2fe613a1146c868854e and Go a21249436b6e1fd47356361d53dc053bbc074f90. race_linux_s390x.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go index 9fd75424ca..c2c5913966 100644 --- a/src/runtime/race/race.go +++ b/src/runtime/race/race.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build race && ((linux && (amd64 || arm64 || loong64 || ppc64le || s390x)) || ((freebsd || netbsd || openbsd || windows) && amd64)) +//go:build race && ((linux && (amd64 || arm64 || loong64 || ppc64le || riscv64 || s390x)) || ((freebsd || netbsd || openbsd || windows) && amd64)) package race diff --git a/src/runtime/race/race_linux_riscv64.syso b/src/runtime/race/race_linux_riscv64.syso Binary files differnew file mode 100644 index 0000000000..e5c5b88498 --- /dev/null +++ b/src/runtime/race/race_linux_riscv64.syso diff --git a/src/runtime/race_riscv64.s b/src/runtime/race_riscv64.s new file mode 100644 index 0000000000..9992a519eb --- /dev/null +++ b/src/runtime/race_riscv64.s @@ -0,0 +1,551 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build race + +#include "go_asm.h" +#include "funcdata.h" +#include "textflag.h" + +// The following thunks allow calling the gcc-compiled race runtime directly +// from Go code without going all the way through cgo. +// First, it's much faster (up to 50% speedup for real Go programs). +// Second, it eliminates race-related special cases from cgocall and scheduler. +// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. + +// A brief recap of the riscv C calling convention. +// Arguments are passed in X10...X17 +// Callee-saved registers are: X8, X9, X18..X27 +// Temporary registers are: X5..X7, X28..X31 + +// When calling racecalladdr, X11 is the call target address. + +// The race ctx, ThreadState *thr below, is passed in X10 and loaded in racecalladdr. + +// func runtime·raceread(addr uintptr) +// Called from instrumented code. +TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8 + // void __tsan_read(ThreadState *thr, void *addr, void *pc); + MOV $__tsan_read(SB), X5 + MOV X10, X11 + MOV X1, X12 + JMP racecalladdr<>(SB) + +// func runtime·RaceRead(addr uintptr) +TEXT runtime·RaceRead(SB), NOSPLIT, $0-8 + // This needs to be a tail call, because raceread reads caller pc. + JMP runtime·raceread(SB) + +// func runtime·racereadpc(void *addr, void *callpc, void *pc) +TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 + // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); + MOV $__tsan_read_pc(SB), X5 + MOV addr+0(FP), X11 + MOV callpc+8(FP), X12 + MOV pc+16(FP), X13 + JMP racecalladdr<>(SB) + +// func runtime·racewrite(addr uintptr) +// Called from instrumented code. +TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8 + // void __tsan_write(ThreadState *thr, void *addr, void *pc); + MOV $__tsan_write(SB), X5 + MOV X10, X11 + MOV X1, X12 + JMP racecalladdr<>(SB) + +// func runtime·RaceWrite(addr uintptr) +TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8 + // This needs to be a tail call, because racewrite reads caller pc. + JMP runtime·racewrite(SB) + +// func runtime·racewritepc(void *addr, void *callpc, void *pc) +TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 + // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); + MOV $__tsan_write_pc(SB), X5 + MOV addr+0(FP), X11 + MOV callpc+8(FP), X12 + MOV pc+16(FP), X13 + JMP racecalladdr<>(SB) + +// func runtime·racereadrange(addr, size uintptr) +// Called from instrumented code. +TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16 + // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_read_range(SB), X5 + MOV X11, X12 + MOV X10, X11 + MOV X1, X13 + JMP racecalladdr<>(SB) + +// func runtime·RaceReadRange(addr, size uintptr) +TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16 + // This needs to be a tail call, because racereadrange reads caller pc. + JMP runtime·racereadrange(SB) + +// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc) +TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 + // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_read_range(SB), X5 + MOV addr+0(FP), X11 + MOV size+8(FP), X12 + MOV pc+16(FP), X13 + + // pc is an interceptor address, but TSan expects it to point to the + // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW). + ADD $4, X13 + JMP racecalladdr<>(SB) + +// func runtime·racewriterange(addr, size uintptr) +// Called from instrumented code. +TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16 + // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_write_range(SB), X5 + MOV X11, X12 + MOV X10, X11 + MOV X1, X13 + JMP racecalladdr<>(SB) + +// func runtime·RaceWriteRange(addr, size uintptr) +TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16 + // This needs to be a tail call, because racewriterange reads caller pc. + JMP runtime·racewriterange(SB) + +// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc) +TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24 + // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOV $__tsan_write_range(SB), X5 + MOV addr+0(FP), X11 + MOV size+8(FP), X12 + MOV pc+16(FP), X13 + // pc is an interceptor address, but TSan expects it to point to the + // middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW). + ADD $4, X13 + JMP racecalladdr<>(SB) + +// If addr (X11) is out of range, do nothing. Otherwise, setup goroutine context and +// invoke racecall. Other arguments are already set. +TEXT racecalladdr<>(SB), NOSPLIT, $0-0 + MOV runtime·racearenastart(SB), X7 + BLT X11, X7, data // Before racearena start? + MOV runtime·racearenaend(SB), X7 + BLT X11, X7, call // Before racearena end? +data: + MOV runtime·racedatastart(SB), X7 + BLT X11, X7, ret // Before racedata start? + MOV runtime·racedataend(SB), X7 + BGE X11, X7, ret // At or after racedata end? +call: + MOV g_racectx(g), X10 + JMP racecall<>(SB) +ret: + RET + +// func runtime·racefuncenter(pc uintptr) +// Called from instrumented code. +TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8 + MOV $__tsan_func_enter(SB), X5 + MOV X10, X11 + MOV g_racectx(g), X10 + JMP racecall<>(SB) + +// Common code for racefuncenter +// X1 = caller's return address +TEXT racefuncenter<>(SB), NOSPLIT, $0-0 + // void __tsan_func_enter(ThreadState *thr, void *pc); + MOV $__tsan_func_enter(SB), X5 + MOV g_racectx(g), X10 + MOV X1, X11 + JMP racecall<>(SB) + +// func runtime·racefuncexit() +// Called from instrumented code. +TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0 + // void __tsan_func_exit(ThreadState *thr); + MOV $__tsan_func_exit(SB), X5 + MOV g_racectx(g), X10 + JMP racecall<>(SB) + +// Atomic operations for sync/atomic package. + +// Load + +TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12 + GO_ARGS + MOV $__tsan_go_atomic32_load(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16 + GO_ARGS + MOV $__tsan_go_atomic64_load(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12 + GO_ARGS + JMP sync∕atomic·LoadInt32(SB) + +TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·LoadInt64(SB) + +// Store + +TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12 + GO_ARGS + MOV $__tsan_go_atomic32_store(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16 + GO_ARGS + MOV $__tsan_go_atomic64_store(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12 + GO_ARGS + JMP sync∕atomic·StoreInt32(SB) + +TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·StoreInt64(SB) + +TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16 + GO_ARGS + JMP sync∕atomic·StoreInt64(SB) + +// Swap + +TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·SwapInt32(SB) + +TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·SwapInt64(SB) + +TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·SwapInt64(SB) + +// Add + +TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_add(SB), X5 + CALL racecallatomic<>(SB) + // TSan performed fetch_add, but Go needs add_fetch. + MOVW add+8(FP), X5 + MOVW ret+16(FP), X6 + ADD X5, X6, X5 + MOVW X5, ret+16(FP) + RET + +TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_add(SB), X5 + CALL racecallatomic<>(SB) + // TSan performed fetch_add, but Go needs add_fetch. + MOV add+8(FP), X5 + MOV ret+16(FP), X6 + ADD X5, X6, X5 + MOV X5, ret+16(FP) + RET + +TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·AddInt32(SB) + +TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AddInt64(SB) + +TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AddInt64(SB) + +// And +TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_and(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_and(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·AndInt32(SB) + +TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AndInt64(SB) + +TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AndInt64(SB) + +// Or +TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20 + GO_ARGS + MOV $__tsan_go_atomic32_fetch_or(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24 + GO_ARGS + MOV $__tsan_go_atomic64_fetch_or(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20 + GO_ARGS + JMP sync∕atomic·OrInt32(SB) + +TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·OrInt64(SB) + +TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·OrInt64(SB) + +// CompareAndSwap + +TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 + GO_ARGS + MOV $__tsan_go_atomic32_compare_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25 + GO_ARGS + MOV $__tsan_go_atomic64_compare_exchange(SB), X5 + CALL racecallatomic<>(SB) + RET + +TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt32(SB) + +TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt64(SB) + +TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25 + GO_ARGS + JMP sync∕atomic·CompareAndSwapInt64(SB) + +// Generic atomic operation implementation. +// X5 = addr of target function +TEXT racecallatomic<>(SB), NOSPLIT, $0 + // Set up these registers + // X10 = *ThreadState + // X11 = caller pc + // X12 = pc + // X13 = addr of incoming arg list + + // Trigger SIGSEGV early. + MOV 24(X2), X6 // 1st arg is addr. after two times CALL, get it at 24(X2) + MOVB (X6), X0 // segv here if addr is bad + // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). + MOV runtime·racearenastart(SB), X7 + BLT X6, X7, racecallatomic_data + MOV runtime·racearenaend(SB), X7 + BLT X6, X7, racecallatomic_ok +racecallatomic_data: + MOV runtime·racedatastart(SB), X7 + BLT X6, X7, racecallatomic_ignore + MOV runtime·racedataend(SB), X7 + BGE X6, X7, racecallatomic_ignore +racecallatomic_ok: + // Addr is within the good range, call the atomic function. + MOV g_racectx(g), X10 // goroutine context + MOV 8(X2), X11 // caller pc + MOV X1, X12 // pc + ADD $24, X2, X13 + CALL racecall<>(SB) + RET +racecallatomic_ignore: + // Addr is outside the good range. + // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. + // An attempt to synchronize on the address would cause crash. + MOV X1, X20 // save PC + MOV X5, X21 // save target function + MOV $__tsan_go_ignore_sync_begin(SB), X5 + MOV g_racectx(g), X10 // goroutine context + CALL racecall<>(SB) + MOV X21, X5 // restore the target function + // Call the atomic function. + MOV g_racectx(g), X10 // goroutine context + MOV 8(X2), X11 // caller pc + MOV X20, X12 // pc + ADD $24, X2, X13 // arguments + CALL racecall<>(SB) + // Call __tsan_go_ignore_sync_end. + MOV $__tsan_go_ignore_sync_end(SB), X5 + MOV g_racectx(g), X10 // goroutine context + CALL racecall<>(SB) + RET + +// func runtime·racecall(void(*f)(...), ...) +// Calls C function f from race runtime and passes up to 4 arguments to it. +// The arguments are never heap-object-preserving pointers, so we pretend there +// are no arguments. +TEXT runtime·racecall(SB), NOSPLIT, $0-0 + MOV fn+0(FP), X5 + MOV arg0+8(FP), X10 + MOV arg1+16(FP), X11 + MOV arg2+24(FP), X12 + MOV arg3+32(FP), X13 + JMP racecall<>(SB) + +// Switches SP to g0 stack and calls X5. Arguments are already set. +TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 + MOV X1, X18 // Save RA in callee save register + MOV X2, X19 // Save SP in callee save register + CALL runtime·save_g(SB) // Save g for callbacks + + MOV g_m(g), X6 + + // Switch to g0 stack if we aren't already on g0 or gsignal. + MOV m_gsignal(X6), X7 + BEQ X7, g, call + MOV m_g0(X6), X7 + BEQ X7, g, call + + MOV (g_sched+gobuf_sp)(X7), X2 // Switch to g0 stack +call: + JALR RA, (X5) // Call C function + MOV X19, X2 // Restore SP + JMP (X18) // Return to Go. + +// C->Go callback thunk that allows to call runtime·racesymbolize from C code. +// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. +// The overall effect of Go->C->Go call chain is similar to that of mcall. +// R0 contains command code. R1 contains command-specific context. +// See racecallback for command codes. +TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0 + // Handle command raceGetProcCmd (0) here. + // First, code below assumes that we are on curg, while raceGetProcCmd + // can be executed on g0. Second, it is called frequently, so will + // benefit from this fast path. + BNEZ X10, rest + MOV X1, X5 + MOV g, X6 + CALL runtime·load_g(SB) + MOV g_m(g), X7 + MOV m_p(X7), X7 + MOV p_raceprocctx(X7), X7 + MOV X7, (X11) + MOV X6, g + JMP (X5) +rest: + // Save callee-save registers (X8, X9, X18..X27, F8, F9, F18..F27), + // since Go code will not respect this. + // 8(X2) and 16(X2) are for args passed to racecallback + SUB $(27*8), X2 + MOV X1, (0*8)(X2) + MOV X8, (3*8)(X2) + MOV X9, (4*8)(X2) + MOV X18, (5*8)(X2) + MOV X19, (6*8)(X2) + MOV X20, (7*8)(X2) + MOV X21, (8*8)(X2) + MOV X22, (9*8)(X2) + MOV X23, (10*8)(X2) + MOV X24, (11*8)(X2) + MOV X25, (12*8)(X2) + MOV X26, (13*8)(X2) + MOV g, (14*8)(X2) + MOVD F8, (15*8)(X2) + MOVD F9, (16*8)(X2) + MOVD F18, (17*8)(X2) + MOVD F19, (18*8)(X2) + MOVD F20, (19*8)(X2) + MOVD F21, (20*8)(X2) + MOVD F22, (21*8)(X2) + MOVD F23, (22*8)(X2) + MOVD F24, (23*8)(X2) + MOVD F25, (24*8)(X2) + MOVD F26, (25*8)(X2) + MOVD F27, (26*8)(X2) + + // Set g = g0. + CALL runtime·load_g(SB) + MOV g_m(g), X5 + MOV m_g0(X5), X6 + BEQ X6, g, noswitch // branch if already on g0 + MOV X6, g + + MOV X10, 8(X2) // func arg + MOV X11, 16(X2) // func arg + CALL runtime·racecallback(SB) + + // All registers are smashed after Go code, reload. + MOV g_m(g), X5 + MOV m_curg(X5), g // g = m->curg +ret: + // Restore callee-save registers. + MOV (0*8)(X2), X1 + MOV (3*8)(X2), X8 + MOV (4*8)(X2), X9 + MOV (5*8)(X2), X18 + MOV (6*8)(X2), X19 + MOV (7*8)(X2), X20 + MOV (8*8)(X2), X21 + MOV (9*8)(X2), X22 + MOV (10*8)(X2), X23 + MOV (11*8)(X2), X24 + MOV (12*8)(X2), X25 + MOV (13*8)(X2), X26 + MOV (14*8)(X2), g + MOVD (15*8)(X2), F8 + MOVD (16*8)(X2), F9 + MOVD (17*8)(X2), F18 + MOVD (18*8)(X2), F19 + MOVD (19*8)(X2), F20 + MOVD (20*8)(X2), F21 + MOVD (21*8)(X2), F22 + MOVD (22*8)(X2), F23 + MOVD (23*8)(X2), F24 + MOVD (24*8)(X2), F25 + MOVD (25*8)(X2), F26 + MOVD (26*8)(X2), F27 + + ADD $(27*8), X2 + JMP (X1) + +noswitch: + // already on g0 + MOV X10, 8(X2) // func arg + MOV X11, 16(X2) // func arg + CALL runtime·racecallback(SB) + JMP ret diff --git a/src/runtime/signal_windows_test.go b/src/runtime/signal_windows_test.go index 9318ff9c00..7a9afcce22 100644 --- a/src/runtime/signal_windows_test.go +++ b/src/runtime/signal_windows_test.go @@ -79,12 +79,7 @@ func TestVectoredHandlerDontCrashOnLibrary(t *testing.T) { if *flagQuick { t.Skip("-quick") } - if runtime.GOARCH == "arm" { - //TODO: remove this skip and update testwinlib/main.c - // once windows/arm supports c-shared buildmode. - // See go.dev/issues/43800. - t.Skip("this test can't run on windows/arm") - } + testenv.MustHaveGoBuild(t) testenv.MustHaveCGO(t) testenv.MustHaveExecPath(t, "gcc") @@ -115,8 +110,8 @@ func TestVectoredHandlerDontCrashOnLibrary(t *testing.T) { t.Fatalf("failure while running executable: %s\n%s", err, out) } var expectedOutput string - if runtime.GOARCH == "arm64" || runtime.GOARCH == "arm" { - // TODO: remove when windows/arm64 and windows/arm support SEH stack unwinding. + if runtime.GOARCH == "arm64" { + // TODO: remove when windows/arm64 support SEH stack unwinding. expectedOutput = "exceptionCount: 1\ncontinueCount: 1\nunhandledCount: 0\n" } else { expectedOutput = "exceptionCount: 1\ncontinueCount: 1\nunhandledCount: 1\n" diff --git a/src/runtime/string.go b/src/runtime/string.go index 44d586bc53..3726d9235b 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -59,6 +59,9 @@ func concatstrings(buf *tmpBuf, a []string) string { return s } +// concatstring2 helps make the callsite smaller (compared to concatstrings), +// and we think this is currently more valuable than omitting one call in the +// chain, the same goes for concatstring{3,4,5}. func concatstring2(buf *tmpBuf, a0, a1 string) string { return concatstrings(buf, []string{a0, a1}) } @@ -108,6 +111,9 @@ func concatbytes(buf *tmpBuf, a []string) []byte { return b } +// concatbyte2 helps make the callsite smaller (compared to concatbytes), +// and we think this is currently more valuable than omitting one call in +// the chain, the same goes for concatbyte{3,4,5}. func concatbyte2(buf *tmpBuf, a0, a1 string) []byte { return concatbytes(buf, []string{a0, a1}) } diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s deleted file mode 100644 index c7f2369e57..0000000000 --- a/src/runtime/sys_windows_arm.s +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "go_asm.h" -#include "go_tls.h" -#include "textflag.h" -#include "time_windows.h" - -// Note: For system ABI, R0-R3 are args, R4-R11 are callee-save. - -TEXT runtime·getlasterror(SB),NOSPLIT,$0 - MRC 15, 0, R0, C13, C0, 2 - MOVW 0x34(R0), R0 - MOVW R0, ret+0(FP) - RET - -// Called by Windows as a Vectored Exception Handler (VEH). -// R0 is pointer to struct containing -// exception record and context pointers. -// R1 is the kind of sigtramp function. -// Return value of sigtrampgo is stored in R0. -TEXT sigtramp<>(SB),NOSPLIT|NOFRAME,$0 - MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} (SP-=40) - SUB $(16), R13 // reserve space for parameters/retval to go call - - MOVW R0, R6 // Save param0 - MOVW R1, R7 // Save param1 - BL runtime·load_g(SB) // Clobbers R0 - - MOVW $0, R4 - MOVW R4, 0(R13) // No saved link register. - MOVW R6, 4(R13) // Move arg0 into position - MOVW R7, 8(R13) // Move arg1 into position - BL runtime·sigtrampgo(SB) - MOVW 12(R13), R0 // Fetch return value from stack - - ADD $(16), R13 // free locals - MOVM.IA.W (R13), [R4-R11, R14] // pop {r4-r11, lr} - - B (R14) // return - -// Trampoline to resume execution from exception handler. -// This is part of the control flow guard workaround. -// It switches stacks and jumps to the continuation address. -// R0 and R1 are set above at the end of sigtrampgo -// in the context that starts executing at sigresume. -TEXT runtime·sigresume(SB),NOSPLIT|NOFRAME,$0 - // Important: do not smash LR, - // which is set to a live value when handling - // a signal by pushing a call to sigpanic onto the stack. - MOVW R0, R13 - B (R1) - -TEXT runtime·exceptiontramp(SB),NOSPLIT|NOFRAME,$0 - MOVW $const_callbackVEH, R1 - B sigtramp<>(SB) - -TEXT runtime·firstcontinuetramp(SB),NOSPLIT|NOFRAME,$0 - MOVW $const_callbackFirstVCH, R1 - B sigtramp<>(SB) - -TEXT runtime·lastcontinuetramp(SB),NOSPLIT|NOFRAME,$0 - MOVW $const_callbackLastVCH, R1 - B sigtramp<>(SB) - -TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0 - // On entry, the trampoline in zcallback_windows_arm.s left - // the callback index in R12 (which is volatile in the C ABI). - - // Push callback register arguments r0-r3. We do this first so - // they're contiguous with stack arguments. - MOVM.DB.W [R0-R3], (R13) - // Push C callee-save registers r4-r11 and lr. - MOVM.DB.W [R4-R11, R14], (R13) - SUB $(16 + callbackArgs__size), R13 // space for locals - - // Create a struct callbackArgs on our stack. - MOVW R12, (16+callbackArgs_index)(R13) // callback index - MOVW $(16+callbackArgs__size+4*9)(R13), R0 - MOVW R0, (16+callbackArgs_args)(R13) // address of args vector - MOVW $0, R0 - MOVW R0, (16+callbackArgs_result)(R13) // result - - // Prepare for entry to Go. - BL runtime·load_g(SB) - - // Call cgocallback, which will call callbackWrap(frame). - MOVW $0, R0 - MOVW R0, 12(R13) // context - MOVW $16(R13), R1 // R1 = &callbackArgs{...} - MOVW R1, 8(R13) // frame (address of callbackArgs) - MOVW $·callbackWrap(SB), R1 - MOVW R1, 4(R13) // PC of function to call - BL runtime·cgocallback(SB) - - // Get callback result. - MOVW (16+callbackArgs_result)(R13), R0 - - ADD $(16 + callbackArgs__size), R13 // free locals - MOVM.IA.W (R13), [R4-R11, R12] // pop {r4-r11, lr=>r12} - ADD $(4*4), R13 // skip r0-r3 - B (R12) // return - -// uint32 tstart_stdcall(M *newm); -TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0 - MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} - - MOVW m_g0(R0), g - MOVW R0, g_m(g) - BL runtime·save_g(SB) - - // Layout new m scheduler stack on os stack. - MOVW R13, R0 - MOVW R0, g_stack+stack_hi(g) - SUB $(64*1024), R0 - MOVW R0, (g_stack+stack_lo)(g) - MOVW R0, g_stackguard0(g) - MOVW R0, g_stackguard1(g) - - BL runtime·emptyfunc(SB) // fault if stack check is wrong - BL runtime·mstart(SB) - - // Exit the thread. - MOVW $0, R0 - MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc} - -TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 - B runtime·armPublicationBarrier(SB) - -// never called (this is a GOARM=7 platform) -TEXT runtime·read_tls_fallback(SB),NOSPLIT,$0 - MOVW $0xabcd, R0 - MOVW R0, (R0) - RET - -TEXT runtime·nanotime1(SB),NOSPLIT,$0-8 - MOVW $_INTERRUPT_TIME, R3 -loop: - MOVW time_hi1(R3), R1 - DMB MB_ISH - MOVW time_lo(R3), R0 - DMB MB_ISH - MOVW time_hi2(R3), R2 - CMP R1, R2 - BNE loop - - // wintime = R1:R0, multiply by 100 - MOVW $100, R2 - MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 - MULA R1, R2, R4, R4 - - // wintime*100 = R4:R3 - MOVW R3, ret_lo+0(FP) - MOVW R4, ret_hi+4(FP) - RET - -// save_g saves the g register (R10) into thread local memory -// so that we can call externally compiled -// ARM code that will overwrite those registers. -// NOTE: runtime.gogo assumes that R1 is preserved by this function. -// runtime.mcall assumes this function only clobbers R0 and R11. -// Returns with g in R0. -// Save the value in the _TEB->TlsSlots array. -// Effectively implements TlsSetValue(). -// tls_g stores the TLS slot allocated TlsAlloc(). -TEXT runtime·save_g(SB),NOSPLIT,$0 - MRC 15, 0, R0, C13, C0, 2 - ADD $0xe10, R0 - MOVW $runtime·tls_g(SB), R11 - MOVW (R11), R11 - MOVW g, R11<<2(R0) - MOVW g, R0 // preserve R0 across call to setg<> - RET - -// load_g loads the g register from thread-local memory, -// for use after calling externally compiled -// ARM code that overwrote those registers. -// Get the value from the _TEB->TlsSlots array. -// Effectively implements TlsGetValue(). -TEXT runtime·load_g(SB),NOSPLIT,$0 - MRC 15, 0, R0, C13, C0, 2 - ADD $0xe10, R0 - MOVW $runtime·tls_g(SB), g - MOVW (g), g - MOVW g<<2(R0), g - RET - -// This is called from rt0_go, which runs on the system stack -// using the initial stack allocated by the OS. -// It calls back into standard C using the BL below. -// To do that, the stack pointer must be 8-byte-aligned. -TEXT runtime·_initcgo(SB),NOSPLIT|NOFRAME,$0 - MOVM.DB.W [R4, R14], (R13) // push {r4, lr} - - // Ensure stack is 8-byte aligned before calling C code - MOVW R13, R4 - BIC $0x7, R13 - - // Allocate a TLS slot to hold g across calls to external code - MOVW $runtime·_TlsAlloc(SB), R0 - MOVW (R0), R0 - BL (R0) - - // Assert that slot is less than 64 so we can use _TEB->TlsSlots - CMP $64, R0 - MOVW $runtime·abort(SB), R1 - BL.GE (R1) - - // Save Slot into tls_g - MOVW $runtime·tls_g(SB), R1 - MOVW R0, (R1) - - MOVW R4, R13 - MOVM.IA.W (R13), [R4, R15] // pop {r4, pc} - -// Holds the TLS Slot, which was allocated by TlsAlloc() -GLOBL runtime·tls_g+0(SB), NOPTR, $4 diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index e86ebf41c7..9ad21c8649 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -98,19 +98,14 @@ func (p *abiDesc) assignArg(t *_type) { // passed as two words (little endian); and // structs are pushed on the stack. In // fastcall, arguments larger than the word - // size are passed by reference. On arm, - // 8-byte aligned arguments round up to the - // next even register and can be split across - // registers and the stack. + // size are passed by reference. panic("compileCallback: argument size is larger than uintptr") } if k := t.Kind(); GOARCH != "386" && (k == abi.Float32 || k == abi.Float64) { // In fastcall, floating-point arguments in // the first four positions are passed in // floating-point registers, which we don't - // currently spill. arm passes floating-point - // arguments in VFP registers, which we also - // don't support. + // currently spill. // So basically we only support 386. panic("compileCallback: float arguments not supported") } @@ -127,7 +122,7 @@ func (p *abiDesc) assignArg(t *_type) { // argument word and all supported Windows // architectures are little endian, so srcStackOffset // is already pointing to the right place for smaller - // arguments. The same is true on arm. + // arguments. oldParts := p.parts if p.tryRegAssignArg(t, 0) { @@ -163,8 +158,8 @@ func (p *abiDesc) assignArg(t *_type) { p.dstStackSize += t.Size_ } - // cdecl, stdcall, fastcall, and arm pad arguments to word size. - // TODO(rsc): On arm and arm64 do we need to skip the caller's saved LR? + // cdecl, stdcall, and fastcall pad arguments to word size. + // TODO(rsc): On arm64 do we need to skip the caller's saved LR? p.srcStackSize += goarch.PtrSize } @@ -261,7 +256,7 @@ const callbackMaxFrame = 64 * goarch.PtrSize // // On 386, if cdecl is true, the returned C function will use the // cdecl calling convention; otherwise, it will use stdcall. On amd64, -// it always uses fastcall. On arm, it always uses the ARM convention. +// it always uses fastcall. // //go:linkname compileCallback syscall.compileCallback func compileCallback(fn eface, cdecl bool) (code uintptr) { @@ -356,10 +351,6 @@ type callbackArgs struct { // For fastcall, the trampoline spills register arguments to // the reserved spill slots below the stack arguments, // resulting in a layout equivalent to stdcall. - // - // For arm, the trampoline stores the register arguments just - // below the stack arguments, so again we can treat it as one - // big stack arguments frame. args unsafe.Pointer // Below are out-args from callbackWrap result uintptr diff --git a/src/runtime/tagptr_64bit.go b/src/runtime/tagptr_64bit.go index 3d79332e2d..76733cc1d6 100644 --- a/src/runtime/tagptr_64bit.go +++ b/src/runtime/tagptr_64bit.go @@ -22,10 +22,17 @@ const ( // On AMD64, virtual addresses are 48-bit (or 57-bit) sign-extended. // Other archs are 48-bit zero-extended. // + // We use one extra bit to placate systems which simulate amd64 binaries on + // an arm64 host. Allocated arm64 addresses could be as high as 1<<48-1, + // which would be invalid if we assumed 48-bit sign-extended addresses. + // See issue 69255. + // (Note that this does not help the other way around, simluating arm64 + // on amd64, but we don't have that problem at the moment.) + // // On s390x, virtual addresses are 64-bit. There's not much we // can do about this, so we just hope that the kernel doesn't // get to really high addresses and panic if it does. - defaultAddrBits = 48 + defaultAddrBits = 48 + 1 // On AIX, 64-bit addresses are split into 36-bit segment number and 28-bit // offset in segment. Segment numbers in the range 0x0A0000000-0x0AFFFFFFF(LSA) |
