diff options
| author | Guoqi Chen <chenguoqi@loongson.cn> | 2024-10-18 16:31:29 +0800 |
|---|---|---|
| committer | abner chenc <chenguoqi@loongson.cn> | 2024-11-12 00:48:04 +0000 |
| commit | fb9b946adcc8389aafaa43866f3cc26b12411439 (patch) | |
| tree | ecdcf4a724f222908c28c4d757f933b46c7c2526 /test/codegen | |
| parent | 4c8ab993cd881d7eb1b8264f0b716c7cdd638f71 (diff) | |
| download | go-fb9b946adcc8389aafaa43866f3cc26b12411439.tar.xz | |
cmd/compile: optimize math/bits.OnesCount{16,32,64} implementation on loong64
Use Loong64's LSX instruction VPCNT to implement math/bits.OnesCount{16,32,64}
and make it intrinsic.
Benchmark results on loongson 3A5000 and 3A6000 machines:
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000-HV @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
OnesCount 4.413n ± 0% 1.401n ± 0% -68.25% (p=0.000 n=10)
OnesCount8 1.364n ± 0% 1.363n ± 0% ~ (p=0.130 n=10)
OnesCount16 2.112n ± 0% 1.534n ± 0% -27.37% (p=0.000 n=10)
OnesCount32 4.533n ± 0% 1.529n ± 0% -66.27% (p=0.000 n=10)
OnesCount64 4.565n ± 0% 1.531n ± 1% -66.46% (p=0.000 n=10)
geomean 3.048n 1.470n -51.78%
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
OnesCount 3.553n ± 0% 1.201n ± 0% -66.20% (p=0.000 n=10)
OnesCount8 0.8021n ± 0% 0.8004n ± 0% -0.21% (p=0.000 n=10)
OnesCount16 1.216n ± 0% 1.000n ± 0% -17.76% (p=0.000 n=10)
OnesCount32 3.006n ± 0% 1.035n ± 0% -65.57% (p=0.000 n=10)
OnesCount64 3.503n ± 0% 1.035n ± 0% -70.45% (p=0.000 n=10)
geomean 2.053n 1.006n -51.01%
Change-Id: I07a5b8da2bb48711b896387ec7625145804affc8
Reviewed-on: https://go-review.googlesource.com/c/go/+/620978
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/mathbits.go | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index a3d1143424..f258ab9162 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -156,6 +156,7 @@ func OnesCount(n uint) int { // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" // amd64:"POPCNTQ" // arm64:"VCNT","VUADDLV" + // loong64:"VPCNTV" // s390x:"POPCNT" // ppc64x:"POPCNTD" // wasm:"I64Popcnt" @@ -166,6 +167,7 @@ func OnesCount64(n uint64) int { // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" // amd64:"POPCNTQ" // arm64:"VCNT","VUADDLV" + // loong64:"VPCNTV" // s390x:"POPCNT" // ppc64x:"POPCNTD" // wasm:"I64Popcnt" @@ -176,6 +178,7 @@ func OnesCount32(n uint32) int { // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" // amd64:"POPCNTL" // arm64:"VCNT","VUADDLV" + // loong64:"VPCNTW" // s390x:"POPCNT" // ppc64x:"POPCNTW" // wasm:"I64Popcnt" @@ -186,6 +189,7 @@ func OnesCount16(n uint16) int { // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" // amd64:"POPCNTL" // arm64:"VCNT","VUADDLV" + // loong64:"VPCNTH" // s390x:"POPCNT" // ppc64x:"POPCNTW" // wasm:"I64Popcnt" |
