diff options
| author | Guoqi Chen <chenguoqi@loongson.cn> | 2024-10-30 19:11:49 +0800 |
|---|---|---|
| committer | abner chenc <chenguoqi@loongson.cn> | 2024-11-20 02:58:50 +0000 |
| commit | fe7d97d0322c283462b38c0f53bc340b642fe1cb (patch) | |
| tree | f57ba217713b40b7f73dd618b32876ed9afac45e /src/internal | |
| parent | a3c068c57ae3f71a7720fe68da379143bb579362 (diff) | |
| download | go-fe7d97d0322c283462b38c0f53bc340b642fe1cb.tar.xz | |
cmd/compile, internal/runtime/atomic: add Xchg8 for loong64
In Loongson's new microstructure LA664 (Loongson-3A6000) and later, the atomic
instruction AMSWAP[DB]{B,H} [1] is supported. Therefore, the implementation of
the atomic operation exchange can be selected according to the CPUCFG flag LAM_BH:
AMSWAPDBB(full barrier) instruction is used on new microstructures, and traditional
LL-SC is used on LA464 (Loongson-3A5000) and older microstructures. This can
significantly improve the performance of Go programs on new microstructures.
Because Xchg8 implemented using traditional LL-SC uses too many temporary
registers, it is not suitable for intrinsics.
goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A6000 @ 2500.00MHz
BenchmarkXchg8 100000000 10.41 ns/op
BenchmarkXchg8-2 100000000 10.41 ns/op
BenchmarkXchg8-4 100000000 10.41 ns/op
BenchmarkXchg8Parallel 96647592 12.41 ns/op
BenchmarkXchg8Parallel-2 58376136 20.60 ns/op
BenchmarkXchg8Parallel-4 78458899 17.97 ns/op
goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A5000-HV @ 2500.00MHz
BenchmarkXchg8 38323825 31.23 ns/op
BenchmarkXchg8-2 38368219 31.23 ns/op
BenchmarkXchg8-4 37154156 31.26 ns/op
BenchmarkXchg8Parallel 37908301 31.63 ns/op
BenchmarkXchg8Parallel-2 30413440 39.42 ns/op
BenchmarkXchg8Parallel-4 30737626 39.03 ns/op
For #69735
[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
Change-Id: I02ba68f66a2210b6902344fdc9975eb62de728ab
Reviewed-on: https://go-review.googlesource.com/c/go/+/623058
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Mauri de Souza Meneguzzo <mauri870@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src/internal')
| -rw-r--r-- | src/internal/runtime/atomic/atomic_loong64.go | 3 | ||||
| -rw-r--r-- | src/internal/runtime/atomic/atomic_loong64.s | 38 | ||||
| -rw-r--r-- | src/internal/runtime/atomic/xchg8_test.go | 2 |
3 files changed, 42 insertions, 1 deletions
diff --git a/src/internal/runtime/atomic/atomic_loong64.go b/src/internal/runtime/atomic/atomic_loong64.go index 6586ad2f6c..1fa1a9fa5a 100644 --- a/src/internal/runtime/atomic/atomic_loong64.go +++ b/src/internal/runtime/atomic/atomic_loong64.go @@ -26,6 +26,9 @@ func Xadd64(ptr *uint64, delta int64) uint64 func Xadduintptr(ptr *uintptr, delta uintptr) uintptr //go:noescape +func Xchg8(ptr *uint8, new uint8) uint8 + +//go:noescape func Xchg(ptr *uint32, new uint32) uint32 //go:noescape diff --git a/src/internal/runtime/atomic/atomic_loong64.s b/src/internal/runtime/atomic/atomic_loong64.s index d67300afc4..5222b77e77 100644 --- a/src/internal/runtime/atomic/atomic_loong64.s +++ b/src/internal/runtime/atomic/atomic_loong64.s @@ -150,6 +150,44 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24 MOVV R4, ret+16(FP) RET +// uint8 Xchg8(ptr *uint8, new uint8) +// Atomically: +// old := *ptr; +// *ptr = new; +// return old; +TEXT ·Xchg8(SB), NOSPLIT, $0-17 + MOVV ptr+0(FP), R4 + MOVBU new+8(FP), R5 + + // R6 = ((ptr & 3) * 8) + AND $3, R4, R6 + SLLV $3, R6 + + // R7 = ((0xFF) << R6) ^ (-1) + MOVV $0xFF, R8 + SLLV R6, R8, R7 + XOR $-1, R7 + + // R4 = ptr & (~3) + MOVV $~3, R8 + AND R8, R4 + + // R5 = ((val) << R6) + SLLV R6, R5 + + DBAR $0x14 // LoadAcquire barrier +_xchg8_again: + LL (R4), R8 + MOVV R8, R9 // backup old val + AND R7, R8 + OR R5, R8 + SC R8, (R4) + BEQ R8, _xchg8_again + DBAR $0x12 // StoreRelease barrier + SRLV R6, R9, R9 + MOVBU R9, ret+16(FP) + RET + // func Xchg(ptr *uint32, new uint32) uint32 TEXT ·Xchg(SB), NOSPLIT, $0-20 MOVV ptr+0(FP), R4 diff --git a/src/internal/runtime/atomic/xchg8_test.go b/src/internal/runtime/atomic/xchg8_test.go index d9c0a8dd24..016ce819b0 100644 --- a/src/internal/runtime/atomic/xchg8_test.go +++ b/src/internal/runtime/atomic/xchg8_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build 386 || amd64 || arm || arm64 || ppc64 || ppc64le +//go:build 386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le package atomic_test |
