aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMauri de Souza Meneguzzo <mauri870@gmail.com>2024-10-23 17:06:39 +0000
committerCherry Mui <cherryyz@google.com>2024-10-28 13:37:30 +0000
commitc5d424bfca065fe6c92e384df61d84cfade2bfed (patch)
tree817b7e5c1950ab722f5cf42425d90e5264c42850
parentbbdc65bb3825dfde90603d1b26a2034e8963356c (diff)
downloadgo-c5d424bfca065fe6c92e384df61d84cfade2bfed.tar.xz
internal/runtime/atomic: add arm native implementations of And8/Or8
With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7. Benchmark results for a raspberry Pi 3 model B+: goos: linux goarch: arm pkg: internal/runtime/atomic cpu: ARMv7 Processor rev 4 (v7l) │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ And8-4 127.65n ± 0% 68.74n ± 0% -46.15% (p=0.000 n=10) Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc GitHub-Last-Rev: a7351802fd212704712b37d183435ab14e58f885 GitHub-Pull-Request: golang/go#70002 Cq-Include-Trybots: luci.golang.try:gotip-linux-arm Reviewed-on: https://go-review.googlesource.com/c/go/+/622075 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
-rw-r--r--src/internal/runtime/atomic/atomic_arm.go10
-rw-r--r--src/internal/runtime/atomic/atomic_arm.s64
2 files changed, 72 insertions, 2 deletions
diff --git a/src/internal/runtime/atomic/atomic_arm.go b/src/internal/runtime/atomic/atomic_arm.go
index b58f643ca3..0909d224fc 100644
--- a/src/internal/runtime/atomic/atomic_arm.go
+++ b/src/internal/runtime/atomic/atomic_arm.go
@@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
addrLock(addr).unlock()
}
+//go:noescape
+func Or8(addr *uint8, v uint8)
+
//go:nosplit
-func Or8(addr *uint8, v uint8) {
+func goOr8(addr *uint8, v uint8) {
// Align down to 4 bytes and use 32-bit CAS.
uaddr := uintptr(unsafe.Pointer(addr))
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
@@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
}
}
+//go:noescape
+func And8(addr *uint8, v uint8)
+
//go:nosplit
-func And8(addr *uint8, v uint8) {
+func goAnd8(addr *uint8, v uint8) {
// Align down to 4 bytes and use 32-bit CAS.
uaddr := uintptr(unsafe.Pointer(addr))
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
diff --git a/src/internal/runtime/atomic/atomic_arm.s b/src/internal/runtime/atomic/atomic_arm.s
index 1cf7d8f6ef..93214da826 100644
--- a/src/internal/runtime/atomic/atomic_arm.s
+++ b/src/internal/runtime/atomic/atomic_arm.s
@@ -228,6 +228,42 @@ store64loop:
DMB MB_ISH
RET
+TEXT armAnd8<>(SB),NOSPLIT,$0-5
+ // addr is already in R1
+ MOVB v+4(FP), R2
+
+and8loop:
+ LDREXB (R1), R6
+
+ DMB MB_ISHST
+
+ AND R2, R6
+ STREXB R6, (R1), R0
+ CMP $0, R0
+ BNE and8loop
+
+ DMB MB_ISH
+
+ RET
+
+TEXT armOr8<>(SB),NOSPLIT,$0-5
+ // addr is already in R1
+ MOVB v+4(FP), R2
+
+or8loop:
+ LDREXB (R1), R6
+
+ DMB MB_ISHST
+
+ ORR R2, R6
+ STREXB R6, (R1), R0
+ CMP $0, R0
+ BNE or8loop
+
+ DMB MB_ISH
+
+ RET
+
// The following functions all panic if their address argument isn't
// 8-byte aligned. Since we're calling back into Go code to do this,
// we have to cooperate with stack unwinding. In the normal case, the
@@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
JMP ·goStore64(SB)
#endif
JMP armStore64<>(SB)
+
+TEXT ·And8(SB),NOSPLIT,$-4-5
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+
+// Uses STREXB/LDREXB that is armv6k or later.
+// For simplicity we only enable this on armv7.
+#ifndef GOARM_7
+ MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
+ CMP $1, R11
+ BEQ 2(PC)
+ JMP ·goAnd8(SB)
+#endif
+ JMP armAnd8<>(SB)
+
+TEXT ·Or8(SB),NOSPLIT,$-4-5
+ NO_LOCAL_POINTERS
+ MOVW addr+0(FP), R1
+
+// Uses STREXB/LDREXB that is armv6k or later.
+// For simplicity we only enable this on armv7.
+#ifndef GOARM_7
+ MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
+ CMP $1, R11
+ BEQ 2(PC)
+ JMP ·goOr8(SB)
+#endif
+ JMP armOr8<>(SB)