aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorCherry Zhang <cherryyz@google.com>2018-02-12 15:27:02 -0500
committerCherry Zhang <cherryyz@google.com>2018-05-03 21:35:01 +0000
commit1b6fec862cbe890ef0abea99827a587ffbe2e0f1 (patch)
tree89f6499fff153fc963057b04efce44d9c65829ca /src/runtime
parent4a1baf8bd11c8804a22aa8364028ce8d81b6e1f3 (diff)
downloadgo-1b6fec862cbe890ef0abea99827a587ffbe2e0f1.tar.xz
sync/atomic: redirect many functions to runtime/internal/atomic
The implementation of atomics are inherently tricky. It would be good to have them implemented in a single place, instead of multiple copies. Mostly a simple redirect. On 386, some functions in sync/atomic have better implementations, which are moved to runtime/internal/atomic. On ARM, some functions in sync/atomic have better implementations. They are dropped by this CL, but restored with an improved version in a follow-up CL. On linux/arm, 64-bit CAS kernel helper is dropped, as we're trying to move away from kernel helpers. Fixes #23778. Change-Id: Icb9e1039acc92adbb2a371c34baaf0b79551c3ea Reviewed-on: https://go-review.googlesource.com/93637 Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/internal/atomic/asm_386.s68
-rw-r--r--src/runtime/internal/atomic/asm_amd64p32.s3
-rw-r--r--src/runtime/internal/atomic/atomic_386.go24
-rw-r--r--src/runtime/internal/atomic/sys_linux_arm.s19
4 files changed, 90 insertions, 24 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index c3ef79f913..86a3ef33b9 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -94,6 +94,42 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12
MOVL AX, ret+8(FP)
RET
+TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20
+ // no XADDQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ MOVL 0, AX // crash when unaligned
+ // DI:SI = delta
+ MOVL delta_lo+4(FP), SI
+ MOVL delta_hi+8(FP), DI
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+addloop:
+ // CX:BX = DX:AX (*addr) + DI:SI (delta)
+ MOVL AX, BX
+ MOVL DX, CX
+ ADDL SI, BX
+ ADCL DI, CX
+
+ // if *addr == DX:AX {
+ // *addr = CX:BX
+ // } else {
+ // DX:AX = *addr
+ // }
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+
+ JNZ addloop
+
+ // success
+ // return CX:BX
+ MOVL BX, ret_lo+12(FP)
+ MOVL CX, ret_hi+16(FP)
+ RET
+
TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
MOVL ptr+0(FP), BX
MOVL new+4(FP), AX
@@ -104,6 +140,33 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
JMP runtime∕internal∕atomic·Xchg(SB)
+TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20
+ // no XCHGQ so use CMPXCHG8B loop
+ MOVL ptr+0(FP), BP
+ TESTL $7, BP
+ JZ 2(PC)
+ MOVL 0, AX // crash when unaligned
+ // CX:BX = new
+ MOVL new_lo+4(FP), BX
+ MOVL new_hi+8(FP), CX
+ // DX:AX = *addr
+ MOVL 0(BP), AX
+ MOVL 4(BP), DX
+swaploop:
+ // if *addr == DX:AX
+ // *addr = CX:BX
+ // else
+ // DX:AX = *addr
+ // all in one instruction
+ LOCK
+ CMPXCHG8B 0(BP)
+ JNZ swaploop
+
+ // success
+ // return DX:AX
+ MOVL AX, ret_lo+12(FP)
+ MOVL DX, ret_hi+16(FP)
+ RET
TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
MOVL ptr+0(FP), BX
@@ -123,9 +186,8 @@ TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12
TESTL $7, AX
JZ 2(PC)
MOVL 0, AX // crash with nil ptr deref
- LEAL ret_lo+4(FP), BX
MOVQ (AX), M0
- MOVQ M0, (BX)
+ MOVQ M0, ret+4(FP)
EMMS
RET
@@ -141,7 +203,7 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12
EMMS
// This is essentially a no-op, but it provides required memory fencing.
// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
- MOVL $0, AX
+ XORL AX, AX
LOCK
XADDL AX, (SP)
RET
diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s
index 87f7a079ca..ff590e601b 100644
--- a/src/runtime/internal/atomic/asm_amd64p32.s
+++ b/src/runtime/internal/atomic/asm_amd64p32.s
@@ -108,6 +108,9 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
MOVL ptr+0(FP), BX
MOVQ new+8(FP), AX
+ TESTL $7, BX
+ JZ 2(PC)
+ MOVL 0, BX // crash when unaligned
XCHGQ AX, 0(BX)
MOVQ AX, ret+16(FP)
RET
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index 23a8479515..4284d2bd7d 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -20,33 +20,19 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
return *(*unsafe.Pointer)(ptr)
}
-//go:nosplit
-func Xadd64(ptr *uint64, delta int64) uint64 {
- for {
- old := *ptr
- if Cas64(ptr, old, old+uint64(delta)) {
- return old + uint64(delta)
- }
- }
-}
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
//go:noescape
func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
-//go:nosplit
-func Xchg64(ptr *uint64, new uint64) uint64 {
- for {
- old := *ptr
- if Cas64(ptr, old, new) {
- return old
- }
- }
-}
-
//go:noescape
func Xadd(ptr *uint32, delta int32) uint32
//go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
func Xchg(ptr *uint32, new uint32) uint32
//go:noescape
diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s
index 1d6439a6ba..60f28e7216 100644
--- a/src/runtime/internal/atomic/sys_linux_arm.s
+++ b/src/runtime/internal/atomic/sys_linux_arm.s
@@ -4,8 +4,23 @@
#include "textflag.h"
-// Use kernel version instead of native armcas in asm_arm.s.
-// See ../../../sync/atomic/asm_linux_arm.s for details.
+// Linux/ARM atomic operations.
+
+// Because there is so much variation in ARM devices,
+// the Linux kernel provides an appropriate compare-and-swap
+// implementation at address 0xffff0fc0. Caller sets:
+// R0 = old value
+// R1 = new value
+// R2 = addr
+// LR = return address
+// The function returns with CS true if the swap happened.
+// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850
+// On older kernels (before 2.6.24) the function can incorrectly
+// report a conflict, so we have to double-check the compare ourselves
+// and retry if necessary.
+//
+// http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5
+//
TEXT cas<>(SB),NOSPLIT,$0
MOVW $0xffff0fc0, R15 // R15 is hardware PC.