From 1b6fec862cbe890ef0abea99827a587ffbe2e0f1 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 12 Feb 2018 15:27:02 -0500 Subject: sync/atomic: redirect many functions to runtime/internal/atomic The implementation of atomics are inherently tricky. It would be good to have them implemented in a single place, instead of multiple copies. Mostly a simple redirect. On 386, some functions in sync/atomic have better implementations, which are moved to runtime/internal/atomic. On ARM, some functions in sync/atomic have better implementations. They are dropped by this CL, but restored with an improved version in a follow-up CL. On linux/arm, 64-bit CAS kernel helper is dropped, as we're trying to move away from kernel helpers. Fixes #23778. Change-Id: Icb9e1039acc92adbb2a371c34baaf0b79551c3ea Reviewed-on: https://go-review.googlesource.com/93637 Reviewed-by: Austin Clements --- src/runtime/internal/atomic/asm_386.s | 68 +++++++++++++++++++++++++++-- src/runtime/internal/atomic/asm_amd64p32.s | 3 ++ src/runtime/internal/atomic/atomic_386.go | 24 +++------- src/runtime/internal/atomic/sys_linux_arm.s | 19 +++++++- 4 files changed, 90 insertions(+), 24 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index c3ef79f913..86a3ef33b9 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -94,6 +94,42 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 MOVL AX, ret+8(FP) RET +TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20 + // no XADDQ so use CMPXCHG8B loop + MOVL ptr+0(FP), BP + TESTL $7, BP + JZ 2(PC) + MOVL 0, AX // crash when unaligned + // DI:SI = delta + MOVL delta_lo+4(FP), SI + MOVL delta_hi+8(FP), DI + // DX:AX = *addr + MOVL 0(BP), AX + MOVL 4(BP), DX +addloop: + // CX:BX = DX:AX (*addr) + DI:SI (delta) + MOVL AX, BX + MOVL DX, CX + ADDL SI, BX + ADCL DI, CX + + // if *addr == DX:AX { + // *addr = CX:BX + // } else { + // DX:AX = *addr + // } + // all in one instruction + LOCK + CMPXCHG8B 0(BP) + + JNZ addloop + + // success + // return CX:BX + MOVL BX, ret_lo+12(FP) + MOVL CX, ret_hi+16(FP) + RET + TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL new+4(FP), AX @@ -104,6 +140,33 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12 JMP runtime∕internal∕atomic·Xchg(SB) +TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20 + // no XCHGQ so use CMPXCHG8B loop + MOVL ptr+0(FP), BP + TESTL $7, BP + JZ 2(PC) + MOVL 0, AX // crash when unaligned + // CX:BX = new + MOVL new_lo+4(FP), BX + MOVL new_hi+8(FP), CX + // DX:AX = *addr + MOVL 0(BP), AX + MOVL 4(BP), DX +swaploop: + // if *addr == DX:AX + // *addr = CX:BX + // else + // DX:AX = *addr + // all in one instruction + LOCK + CMPXCHG8B 0(BP) + JNZ swaploop + + // success + // return DX:AX + MOVL AX, ret_lo+12(FP) + MOVL DX, ret_hi+16(FP) + RET TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX @@ -123,9 +186,8 @@ TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12 TESTL $7, AX JZ 2(PC) MOVL 0, AX // crash with nil ptr deref - LEAL ret_lo+4(FP), BX MOVQ (AX), M0 - MOVQ M0, (BX) + MOVQ M0, ret+4(FP) EMMS RET @@ -141,7 +203,7 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 EMMS // This is essentially a no-op, but it provides required memory fencing. // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2). - MOVL $0, AX + XORL AX, AX LOCK XADDL AX, (SP) RET diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s index 87f7a079ca..ff590e601b 100644 --- a/src/runtime/internal/atomic/asm_amd64p32.s +++ b/src/runtime/internal/atomic/asm_amd64p32.s @@ -108,6 +108,9 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24 MOVL ptr+0(FP), BX MOVQ new+8(FP), AX + TESTL $7, BX + JZ 2(PC) + MOVL 0, BX // crash when unaligned XCHGQ AX, 0(BX) MOVQ AX, ret+16(FP) RET diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 23a8479515..4284d2bd7d 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -20,32 +20,18 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer { return *(*unsafe.Pointer)(ptr) } -//go:nosplit -func Xadd64(ptr *uint64, delta int64) uint64 { - for { - old := *ptr - if Cas64(ptr, old, old+uint64(delta)) { - return old + uint64(delta) - } - } -} +//go:noescape +func Xadd64(ptr *uint64, delta int64) uint64 //go:noescape func Xadduintptr(ptr *uintptr, delta uintptr) uintptr -//go:nosplit -func Xchg64(ptr *uint64, new uint64) uint64 { - for { - old := *ptr - if Cas64(ptr, old, new) { - return old - } - } -} - //go:noescape func Xadd(ptr *uint32, delta int32) uint32 +//go:noescape +func Xchg64(ptr *uint64, new uint64) uint64 + //go:noescape func Xchg(ptr *uint32, new uint32) uint32 diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s index 1d6439a6ba..60f28e7216 100644 --- a/src/runtime/internal/atomic/sys_linux_arm.s +++ b/src/runtime/internal/atomic/sys_linux_arm.s @@ -4,8 +4,23 @@ #include "textflag.h" -// Use kernel version instead of native armcas in asm_arm.s. -// See ../../../sync/atomic/asm_linux_arm.s for details. +// Linux/ARM atomic operations. + +// Because there is so much variation in ARM devices, +// the Linux kernel provides an appropriate compare-and-swap +// implementation at address 0xffff0fc0. Caller sets: +// R0 = old value +// R1 = new value +// R2 = addr +// LR = return address +// The function returns with CS true if the swap happened. +// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850 +// On older kernels (before 2.6.24) the function can incorrectly +// report a conflict, so we have to double-check the compare ourselves +// and retry if necessary. +// +// http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5 +// TEXT cas<>(SB),NOSPLIT,$0 MOVW $0xffff0fc0, R15 // R15 is hardware PC. -- cgit v1.3-5-g9baa