diff options
| author | Cherry Zhang <cherryyz@google.com> | 2018-02-14 14:51:39 -0500 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2018-05-03 21:35:39 +0000 |
| commit | 150b728675c64addd24d79ad3bb68fec4c137940 (patch) | |
| tree | a17449aedd1204ab7c05d3cef5a2bf2e9a83f814 /src/runtime | |
| parent | 1b6fec862cbe890ef0abea99827a587ffbe2e0f1 (diff) | |
| download | go-150b728675c64addd24d79ad3bb68fec4c137940.tar.xz | |
runtime: use native CAS and memory barrier on ARMv7
This gets us around the kernel helpers on ARMv7.
It is slightly faster than using the kernel helper.
name old time/op new time/op delta
AtomicLoad-4 72.5ns ± 0% 69.5ns ± 0% -4.08% (p=0.000 n=9+9)
AtomicStore-4 57.6ns ± 1% 54.4ns ± 0% -5.58% (p=0.000 n=10+9)
[Geo mean] 64.6ns 61.5ns -4.83%
If performance is really critical, we can even do compiler intrinsics
on GOARM=7.
Fixes #23792.
Change-Id: I36497d880890b26bdf01e048b542bd5fd7b17d23
Reviewed-on: https://go-review.googlesource.com/94076
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/internal/atomic/bench_test.go | 16 | ||||
| -rw-r--r-- | src/runtime/internal/atomic/sys_linux_arm.s | 9 | ||||
| -rw-r--r-- | src/runtime/sys_linux_arm.s | 13 |
3 files changed, 33 insertions, 5 deletions
diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go index 47010e32d5..2a22e88fb8 100644 --- a/src/runtime/internal/atomic/bench_test.go +++ b/src/runtime/internal/atomic/bench_test.go @@ -26,3 +26,19 @@ func BenchmarkAtomicStore64(b *testing.B) { atomic.Store64(&x, 0) } } + +func BenchmarkAtomicLoad(b *testing.B) { + var x uint32 + sink = &x + for i := 0; i < b.N; i++ { + _ = atomic.Load(&x) + } +} + +func BenchmarkAtomicStore(b *testing.B) { + var x uint32 + sink = &x + for i := 0; i < b.N; i++ { + atomic.Store(&x, 0) + } +} diff --git a/src/runtime/internal/atomic/sys_linux_arm.s b/src/runtime/internal/atomic/sys_linux_arm.s index 60f28e7216..7e234d8f26 100644 --- a/src/runtime/internal/atomic/sys_linux_arm.s +++ b/src/runtime/internal/atomic/sys_linux_arm.s @@ -24,7 +24,14 @@ TEXT cas<>(SB),NOSPLIT,$0 MOVW $0xffff0fc0, R15 // R15 is hardware PC. -TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT,$0 +TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT|NOFRAME,$0 + MOVB runtime·goarm(SB), R11 + CMP $7, R11 + BLT 2(PC) + JMP ·armcas(SB) + JMP ·kernelcas<>(SB) + +TEXT runtime∕internal∕atomic·kernelcas<>(SB),NOSPLIT,$0 MOVW ptr+0(FP), R2 // trigger potential paging fault here, // because we don't know how to traceback through __kuser_cmpxchg diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s index fc9dc9bbb8..aa39732cfb 100644 --- a/src/runtime/sys_linux_arm.s +++ b/src/runtime/sys_linux_arm.s @@ -489,13 +489,18 @@ TEXT runtime·usleep(SB),NOSPLIT,$12 // even on single-core devices. The kernel helper takes care of all of // this for us. -TEXT publicationBarrier<>(SB),NOSPLIT,$0 +TEXT kernelPublicationBarrier<>(SB),NOSPLIT,$0 // void __kuser_memory_barrier(void); - MOVW $0xffff0fa0, R15 // R15 is hardware PC. + MOVW $0xffff0fa0, R11 + CALL (R11) + RET TEXT ·publicationBarrier(SB),NOSPLIT,$0 - BL publicationBarrier<>(SB) - RET + MOVB ·goarm(SB), R11 + CMP $7, R11 + BLT 2(PC) + JMP ·armPublicationBarrier(SB) + JMP kernelPublicationBarrier<>(SB) // extra layer so this function is leaf and no SP adjustment on GOARM=7 TEXT runtime·osyield(SB),NOSPLIT,$0 MOVW $SYS_sched_yield, R7 |
