sync/atomic: redirect many functions to runtime/internal/atomic

The implementation of atomics are inherently tricky. It would be good to have them implemented in a single place, instead of multiple copies. Mostly a simple redirect. On 386, some functions in sync/atomic have better implementations, which are moved to runtime/internal/atomic. On ARM, some functions in sync/atomic have better implementations. They are dropped by this CL, but restored with an improved version in a follow-up CL. On linux/arm, 64-bit CAS kernel helper is dropped, as we're trying to move away from kernel helpers. Fixes #23778. Change-Id: Icb9e1039acc92adbb2a371c34baaf0b79551c3ea Reviewed-on: https://go-review.googlesource.com/93637 Reviewed-by: Austin Clements <austin@google.com>
author: Cherry Zhang <cherryyz@google.com> 2018-02-12 15:27:02 -0500
committer: Cherry Zhang <cherryyz@google.com> 2018-05-03 21:35:01 +0000
commit: 1b6fec862cbe890ef0abea99827a587ffbe2e0f1 (patch)
tree: 89f6499fff153fc963057b04efce44d9c65829ca /src/runtime
parent: 4a1baf8bd11c8804a22aa8364028ce8d81b6e1f3 (diff)
download: go-1b6fec862cbe890ef0abea99827a587ffbe2e0f1.tar.xz
4 files changed, 90 insertions, 24 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s
index c3ef79f913..86a3ef33b9 100644
--- a/src/runtime/internal/atomic/asm_386.s
+++ b/src/runtime/internal/atomic/asm_386.s
@@ -94,6 +94,42 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12
 	MOVL	AX, ret+8(FP)
 	RET
 
+TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20
+	// no XADDQ so use CMPXCHG8B loop
+	MOVL	ptr+0(FP), BP
+	TESTL	$7, BP
+	JZ	2(PC)
+	MOVL	0, AX // crash when unaligned
+	// DI:SI = delta
+	MOVL	delta_lo+4(FP), SI
+	MOVL	delta_hi+8(FP), DI
+	// DX:AX = *addr
+	MOVL	0(BP), AX
+	MOVL	4(BP), DX
+addloop:
+	// CX:BX = DX:AX (*addr) + DI:SI (delta)
+	MOVL	AX, BX
+	MOVL	DX, CX
+	ADDL	SI, BX
+	ADCL	DI, CX
+
+	// if *addr == DX:AX {
+	//	*addr = CX:BX
+	// } else {
+	//	DX:AX = *addr
+	// }
+	// all in one instruction
+	LOCK
+	CMPXCHG8B	0(BP)
+
+	JNZ	addloop
+
+	// success
+	// return CX:BX
+	MOVL	BX, ret_lo+12(FP)
+	MOVL	CX, ret_hi+16(FP)
+	RET
+
 TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
 	MOVL	ptr+0(FP), BX
 	MOVL	new+4(FP), AX
@@ -104,6 +140,33 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
 TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12
 	JMP	runtime∕internal∕atomic·Xchg(SB)
 
+TEXT  runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20
+	// no XCHGQ so use CMPXCHG8B loop
+	MOVL	ptr+0(FP), BP
+	TESTL	$7, BP
+	JZ	2(PC)
+	MOVL	0, AX // crash when unaligned
+	// CX:BX = new
+	MOVL	new_lo+4(FP), BX
+	MOVL	new_hi+8(FP), CX
+	// DX:AX = *addr
+	MOVL	0(BP), AX
+	MOVL	4(BP), DX
+swaploop:
+	// if *addr == DX:AX
+	//	*addr = CX:BX
+	// else
+	//	DX:AX = *addr
+	// all in one instruction
+	LOCK
+	CMPXCHG8B	0(BP)
+	JNZ	swaploop
+
+	// success
+	// return DX:AX
+	MOVL	AX, ret_lo+12(FP)
+	MOVL	DX, ret_hi+16(FP)
+	RET
 
 TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8
 	MOVL	ptr+0(FP), BX
@@ -123,9 +186,8 @@ TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12
 	TESTL	$7, AX
 	JZ	2(PC)
 	MOVL	0, AX // crash with nil ptr deref
-	LEAL	ret_lo+4(FP), BX
 	MOVQ	(AX), M0
-	MOVQ	M0, (BX)
+	MOVQ	M0, ret+4(FP)
 	EMMS
 	RET
 
@@ -141,7 +203,7 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12
 	EMMS
 	// This is essentially a no-op, but it provides required memory fencing.
 	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
-	MOVL	$0, AX
+	XORL	AX, AX
 	LOCK
 	XADDL	AX, (SP)
 	RET
diff --git a/src/runtime/internal/atomic/asm_amd64p32.s b/src/runtime/internal/atomic/asm_amd64p32.s
index 87f7a079ca..ff590e601b 100644
--- a/src/runtime/internal/atomic/asm_amd64p32.s
+++ b/src/runtime/internal/atomic/asm_amd64p32.s
@@ -108,6 +108,9 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12
 TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
 	MOVL	ptr+0(FP), BX
 	MOVQ	new+8(FP), AX
+	TESTL	$7, BX
+	JZ	2(PC)
+	MOVL	0, BX // crash when unaligned
 	XCHGQ	AX, 0(BX)
 	MOVQ	AX, ret+16(FP)
 	RET
diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go
index 23a8479515..4284d2bd7d 100644
--- a/src/runtime/internal/atomic/atomic_386.go
+++ b/src/runtime/internal/atomic/atomic_386.go
@@ -20,33 +20,19 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer {
 	return *(*unsafe.Pointer)(ptr)
 }
 
-//go:nosplit
-func Xadd64(ptr *uint64, delta int64) uint64 {
-	for {
-		old := *ptr
-		if Cas64(ptr, old, old+uint64(delta)) {
-			return old + uint64(delta)
-		}
-	}
-}
+//go:noescape
+func Xadd64(ptr *uint64, delta int64) uint64
 
 //go:noescape
 func Xadduintptr(ptr *uintptr, delta uintptr) uintptr
 
-//go:nosplit
-func Xchg64(ptr *uint64, new uint64) uint64 {
-	for {
-		old := *ptr
-		if Cas64(ptr, old, new) {
-			return old
-		}
-	}
-}
-
 //go:noescape
 func Xadd(ptr *uint32, delta int32) uint32
 
 //go:noescape
+func Xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
 func Xchg(ptr *uint32, new uint32) uint32
 
 //go:noescape
diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s
index 1d6439a6ba..60f28e7216 100644
--- a/src/runtime/internal/atomic/sys_linux_arm.s
+++ b/src/runtime/internal/atomic/sys_linux_arm.s
@@ -4,8 +4,23 @@
 
 #include "textflag.h"
 
-// Use kernel version instead of native armcas in asm_arm.s.
-// See ../../../sync/atomic/asm_linux_arm.s for details.
+// Linux/ARM atomic operations.
+
+// Because there is so much variation in ARM devices,
+// the Linux kernel provides an appropriate compare-and-swap
+// implementation at address 0xffff0fc0.  Caller sets:
+//	R0 = old value
+//	R1 = new value
+//	R2 = addr
+//	LR = return address
+// The function returns with CS true if the swap happened.
+// http://lxr.linux.no/linux+v2.6.37.2/arch/arm/kernel/entry-armv.S#L850
+// On older kernels (before 2.6.24) the function can incorrectly
+// report a conflict, so we have to double-check the compare ourselves
+// and retry if necessary.
+//
+// http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b49c0f24cf6744a3f4fd09289fe7cade349dead5
+//
 TEXT cas<>(SB),NOSPLIT,$0
 	MOVW	$0xffff0fc0, R15 // R15 is hardware PC.
author	Cherry Zhang <cherryyz@google.com>	2018-02-12 15:27:02 -0500
committer	Cherry Zhang <cherryyz@google.com>	2018-05-03 21:35:01 +0000
commit	1b6fec862cbe890ef0abea99827a587ffbe2e0f1 (patch)
tree	89f6499fff153fc963057b04efce44d9c65829ca /src/runtime
parent	4a1baf8bd11c8804a22aa8364028ce8d81b6e1f3 (diff)
download	go-1b6fec862cbe890ef0abea99827a587ffbe2e0f1.tar.xz