diff options
| author | Cherry Zhang <cherryyz@google.com> | 2018-02-12 15:27:02 -0500 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2018-05-03 21:35:01 +0000 |
| commit | 1b6fec862cbe890ef0abea99827a587ffbe2e0f1 (patch) | |
| tree | 89f6499fff153fc963057b04efce44d9c65829ca /src/runtime | |
| parent | 4a1baf8bd11c8804a22aa8364028ce8d81b6e1f3 (diff) | |
| download | go-1b6fec862cbe890ef0abea99827a587ffbe2e0f1.tar.xz | |
sync/atomic: redirect many functions to runtime/internal/atomic
The implementation of atomics are inherently tricky. It would
be good to have them implemented in a single place, instead of
multiple copies.
Mostly a simple redirect.
On 386, some functions in sync/atomic have better implementations,
which are moved to runtime/internal/atomic.
On ARM, some functions in sync/atomic have better implementations.
They are dropped by this CL, but restored with an improved
version in a follow-up CL. On linux/arm, 64-bit CAS kernel helper
is dropped, as we're trying to move away from kernel helpers.
Fixes #23778.
Change-Id: Icb9e1039acc92adbb2a371c34baaf0b79551c3ea
Reviewed-on: https://go-review.googlesource.com/93637
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/internal/atomic/asm_386.s | 68 | ||||
| -rw-r--r-- | src/runtime/internal/atomic/asm_amd64p32.s | 3 | ||||
| -rw-r--r-- | src/runtime/internal/atomic/atomic_386.go | 24 | ||||
| -rw-r--r-- | src/runtime/internal/atomic/sys_linux_arm.s | 19 |
4 files changed, 90 insertions, 24 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index c3ef79f913..86a3ef33b9 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -94,6 +94,42 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 MOVL AX, ret+8(FP) RET +TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20 + // no XADDQ so use CMPXCHG8B loop + MOVL ptr+0(FP), BP + TESTL $7, BP + JZ 2(PC) + MOVL 0, AX // crash when unaligned + // DI:SI = delta + MOVL delta_lo+4(FP), SI + MOVL delta_hi+8(FP), DI + // DX:AX = *addr + MOVL 0(BP), AX + MOVL 4(BP), DX +addloop: + // CX:BX = DX:AX (*addr) + DI:SI (delta) + MOVL AX, BX + MOVL DX, CX + ADDL SI, BX + ADCL DI, CX + + // if *addr == DX:AX { + // *addr = CX:BX + // } else { + // DX:AX = *addr + // } + // all in one instruction + LOCK + CMPXCHG8B 0(BP) + + JNZ addloop + + // success + // return CX:BX + MOVL BX, ret_lo+12(FP) + MOVL CX, ret_hi+16(FP) + RET + TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL new+4(FP), AX @@ -104,6 +140,33 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12 JMP runtime∕internal∕atomic·Xchg(SB) +TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20 + // no XCHGQ so use CMPXCHG8B loop + MOVL ptr+0(FP), BP + TESTL $7, BP + JZ 2(PC) + MOVL 0, AX // crash when unaligned + // CX:BX = new + MOVL new_lo+4(FP), BX + MOVL new_hi+8(FP), CX + // DX:AX = *addr + MOVL 0(BP), AX + MOVL 4(BP), DX +swaploop: + // if *addr == DX:AX + // *addr = CX:BX + // else + // DX:AX = *addr + // all in one instruction + LOCK + CMPXCHG8B 0(BP) + JNZ swaploop + + // success + // return DX:AX + MOVL AX, ret_lo+12(FP) + MOVL DX, ret_hi+16(FP) + RET TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX @@ -123,9 +186,8 @@ TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12 TESTL $7, AX JZ 2(PC) MOVL 0, AX // crash with nil ptr deref - LEAL ret_lo+4(FP), BX MOVQ (AX), M0 - MOVQ M0, (BX) + MOVQ M0, ret+4(FP) EMMS RET @@ -141,7 +203,7 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 EMMS // This is essentially a no-op, but it provides required memory fencing. // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). - MOVL $0, AX + XORL AX, AX LOCK XADDL AX, (SP) RET diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s index 87f7a079ca..ff590e601b 100644 --- a/src/runtime/internal/atomic/asm_amd64p32.s +++ b/src/runtime/internal/atomic/asm_amd64p32.s @@ -108,6 +108,9 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24 MOVL ptr+0(FP), BX MOVQ new+8(FP), AX + TESTL $7, BX + JZ 2(PC) + MOVL 0, BX // crash when unaligned XCHGQ AX, 0(BX) MOVQ AX, ret+16(FP) RET diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 23a8479515..4284d2bd7d 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -20,33 +20,19 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer { return *(*unsafe.Pointer)(ptr) } -//go:nosplit -func Xadd64(ptr *uint64, delta int64) uint64 { - for { - old := *ptr - if Cas64(ptr, old, old+uint64(delta)) { - return old + uint64(delta) - } - } -} +//go:noescape +func Xadd64(ptr *uint64, delta int64) uint64 //go:noescape func Xadduintptr(ptr *uintptr, delta uintptr) uintptr -//go:nosplit -func Xchg64(ptr *uint64, new uint64) uint64 { - for { - old := *ptr - if Cas64(ptr, old, new) { - return old - } - } -} - //go:noescape func Xadd(ptr *uint32, delta int32) uint32 //go:noescape +func Xchg64(ptr *uint64, new uint64) uint64 + +//go:noescape func Xchg(ptr *uint32, new uint32) uint32 //go:noescape diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s index 1d6439a6ba..60f28e7216 100644 --- a/src/runtime/internal/atomic/sys_linux_arm.s +++ b/src/runtime/internal/atomic/sys_linux_arm.s @@ -4,8 +4,23 @@ #include "textflag.h" -// Use kernel version instead of native armcas in asm_arm.s. -// See ../../../sync/atomic/asm_linux_arm.s for details. +// Linux/ARM atomic operations. + +// Because there is so much variation in ARM devices, +// the Linux kernel provides an appropriate compare-and-swap +// implementation at address 0xffff0fc0. Caller sets: +// R0 = old value +// R1 = new value +// R2 = addr +// LR = return address +// The function returns with CS true if the swap happened. +// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850 +// On older kernels (before 2.6.24) the function can incorrectly +// report a conflict, so we have to double-check the compare ourselves +// and retry if necessary. +// +// http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5 +// TEXT cas<>(SB),NOSPLIT,$0 MOVW $0xffff0fc0, R15 // R15 is hardware PC. |
