diff options
| author | Carlos Eduardo Seo <cseo@linux.vnet.ibm.com> | 2018-01-04 18:23:27 -0200 |
|---|---|---|
| committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2018-03-22 14:13:01 +0000 |
| commit | 6633bb2aa7c8ab53dc6cc8a4ef8c4fef7a439cee (patch) | |
| tree | 6eca9e0103b8a0257eeb34d339fd5166f61ca3a9 /src/runtime/internal/atomic | |
| parent | a3d8326993f25869ffb01f27a085abcfabbb42ef (diff) | |
| download | go-6633bb2aa7c8ab53dc6cc8a4ef8c4fef7a439cee.tar.xz | |
cmd/compile/internal/ppc64, runtime internal/atomic, sync/atomic: implement faster atomics for ppc64x
This change implements faster atomics for ppc64x based on the ISA 2.07B,
Appendix B.2 recommendations, replacing SYNC/ISYNC by LWSYNC in some
cases.
Updates #21348
name old time/op new time/op delta
Cond1-16 955ns 856ns -10.33%
Cond2-16 2.38µs 2.03µs -14.59%
Cond4-16 5.90µs 5.44µs -7.88%
Cond8-16 12.1µs 11.1µs -8.42%
Cond16-16 27.0µs 25.1µs -7.04%
Cond32-16 59.1µs 55.5µs -6.14%
LoadMostlyHits/*sync_test.DeepCopyMap-16 22.1ns 24.1ns +9.02%
LoadMostlyHits/*sync_test.RWMutexMap-16 252ns 249ns -1.20%
LoadMostlyHits/*sync.Map-16 16.2ns 16.3ns ~
LoadMostlyMisses/*sync_test.DeepCopyMap-16 22.3ns 22.6ns ~
LoadMostlyMisses/*sync_test.RWMutexMap-16 249ns 247ns -0.51%
LoadMostlyMisses/*sync.Map-16 12.7ns 12.7ns ~
LoadOrStoreBalanced/*sync_test.RWMutexMap-16 1.27µs 1.17µs -7.54%
LoadOrStoreBalanced/*sync.Map-16 1.12µs 1.10µs -2.35%
LoadOrStoreUnique/*sync_test.RWMutexMap-16 1.75µs 1.68µs -3.84%
LoadOrStoreUnique/*sync.Map-16 2.07µs 1.97µs -5.13%
LoadOrStoreCollision/*sync_test.DeepCopyMap-16 15.8ns 15.9ns ~
LoadOrStoreCollision/*sync_test.RWMutexMap-16 496ns 424ns -14.48%
LoadOrStoreCollision/*sync.Map-16 6.07ns 6.07ns ~
Range/*sync_test.DeepCopyMap-16 1.65µs 1.64µs ~
Range/*sync_test.RWMutexMap-16 278µs 288µs +3.75%
Range/*sync.Map-16 2.00µs 2.01µs ~
AdversarialAlloc/*sync_test.DeepCopyMap-16 3.45µs 3.44µs ~
AdversarialAlloc/*sync_test.RWMutexMap-16 226ns 227ns ~
AdversarialAlloc/*sync.Map-16 1.09µs 1.07µs -2.36%
AdversarialDelete/*sync_test.DeepCopyMap-16 553ns 550ns -0.57%
AdversarialDelete/*sync_test.RWMutexMap-16 273ns 274ns ~
AdversarialDelete/*sync.Map-16 247ns 249ns ~
UncontendedSemaphore-16 79.0ns 65.5ns -17.11%
ContendedSemaphore-16 112ns 97ns -13.77%
MutexUncontended-16 3.34ns 2.51ns -24.69%
Mutex-16 266ns 191ns -28.26%
MutexSlack-16 226ns 159ns -29.55%
MutexWork-16 377ns 338ns -10.14%
MutexWorkSlack-16 335ns 308ns -8.20%
MutexNoSpin-16 196ns 184ns -5.91%
MutexSpin-16 710ns 666ns -6.21%
Once-16 1.29ns 1.29ns ~
Pool-16 8.64ns 8.71ns ~
PoolOverflow-16 1.60µs 1.44µs -10.25%
SemaUncontended-16 5.39ns 4.42ns -17.96%
SemaSyntNonblock-16 539ns 483ns -10.42%
SemaSyntBlock-16 413ns 354ns -14.20%
SemaWorkNonblock-16 305ns 258ns -15.36%
SemaWorkBlock-16 266ns 229ns -14.06%
RWMutexUncontended-16 12.9ns 9.7ns -24.80%
RWMutexWrite100-16 203ns 147ns -27.47%
RWMutexWrite10-16 177ns 119ns -32.74%
RWMutexWorkWrite100-16 435ns 403ns -7.39%
RWMutexWorkWrite10-16 642ns 611ns -4.79%
WaitGroupUncontended-16 4.67ns 3.70ns -20.92%
WaitGroupAddDone-16 402ns 355ns -11.54%
WaitGroupAddDoneWork-16 208ns 250ns +20.09%
WaitGroupWait-16 1.21ns 1.21ns ~
WaitGroupWaitWork-16 5.91ns 5.87ns -0.81%
WaitGroupActuallyWait-16 92.2ns 85.8ns -6.91%
Updates #21348
Change-Id: Ibb9b271d11b308264103829e176c6d9fe8f867d3
Reviewed-on: https://go-review.googlesource.com/95175
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/runtime/internal/atomic')
| -rw-r--r-- | src/runtime/internal/atomic/asm_ppc64x.s | 24 |
1 files changed, 10 insertions, 14 deletions
diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s index 7117aef158..a2ed4adc91 100644 --- a/src/runtime/internal/atomic/asm_ppc64x.s +++ b/src/runtime/internal/atomic/asm_ppc64x.s @@ -17,7 +17,7 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-17 MOVD ptr+0(FP), R3 MOVWZ old+8(FP), R4 MOVWZ new+12(FP), R5 - SYNC + LWSYNC cas_again: LWAR (R3), R6 CMPW R6, R4 @@ -25,7 +25,7 @@ cas_again: STWCCC R5, (R3) BNE cas_again MOVD $1, R3 - ISYNC + LWSYNC MOVB R3, ret+16(FP) RET cas_fail: @@ -44,7 +44,7 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-25 MOVD ptr+0(FP), R3 MOVD old+8(FP), R4 MOVD new+16(FP), R5 - SYNC + LWSYNC cas64_again: LDAR (R3), R6 CMP R6, R4 @@ -52,7 +52,7 @@ cas64_again: STDCCC R5, (R3) BNE cas64_again MOVD $1, R3 - ISYNC + LWSYNC MOVB R3, ret+24(FP) RET cas64_fail: @@ -97,31 +97,29 @@ TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-25 TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-20 MOVD ptr+0(FP), R4 MOVW delta+8(FP), R5 - SYNC + LWSYNC LWAR (R4), R3 ADD R5, R3 STWCCC R3, (R4) BNE -3(PC) - ISYNC MOVW R3, ret+16(FP) RET TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-24 MOVD ptr+0(FP), R4 MOVD delta+8(FP), R5 - SYNC + LWSYNC LDAR (R4), R3 ADD R5, R3 STDCCC R3, (R4) BNE -3(PC) - ISYNC MOVD R3, ret+16(FP) RET TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20 MOVD ptr+0(FP), R4 MOVW new+8(FP), R5 - SYNC + LWSYNC LWAR (R4), R3 STWCCC R5, (R4) BNE -2(PC) @@ -132,7 +130,7 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20 TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24 MOVD ptr+0(FP), R4 MOVD new+8(FP), R5 - SYNC + LWSYNC LDAR (R4), R3 STDCCC R5, (R4) BNE -2(PC) @@ -165,24 +163,22 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16 TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 MOVBZ val+8(FP), R4 - SYNC + LWSYNC again: LBAR (R3), R6 OR R4, R6 STBCCC R6, (R3) BNE again - ISYNC RET // void runtime∕internal∕atomic·And8(byte volatile*, byte); TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 MOVBZ val+8(FP), R4 - SYNC + LWSYNC again: LBAR (R3),R6 AND R4,R6 STBCCC R6,(R3) BNE again - ISYNC RET |
