aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2025-02-23 22:17:53 +1100
committerJoel Sing <joel@sing.id.au>2025-03-15 19:07:53 -0700
commit6fb7bdc96d0398fab313586fba6fdc89cc14c679 (patch)
tree15df5dd694e56f854bdc97d74372d61575857808 /test/codegen
parente6ffe764cf1f35e96a116ca144733a5fba02578e (diff)
downloadgo-6fb7bdc96d0398fab313586fba6fdc89cc14c679.tar.xz
cmd/compile: intrinsify math/bits.TrailingZeros on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.TrailingZeros using the CTZ/CTZW machine instructions. On a StarFive VisionFive 2 with GORISCV64=rva22u64: │ ctz.b.1 │ ctz.b.2 │ │ sec/op │ sec/op vs base │ TrailingZeros-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) TrailingZeros8-4 14.76n ± 0% 10.74n ± 0% -27.24% (p=0.000 n=10) TrailingZeros16-4 26.84n ± 0% 10.74n ± 0% -59.99% (p=0.000 n=10) TrailingZeros32-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) TrailingZeros64-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) geomean 23.09n 9.035n -60.88% Change-Id: I71edf2b988acb7a68e797afda4ee66d7a57d587e Reviewed-on: https://go-review.googlesource.com/c/go/+/652320 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/mathbits.go25
1 files changed, 16 insertions, 9 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 1cee39283d..786fad3bd9 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -356,28 +356,30 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
// ------------------------ //
func TrailingZeros(n uint) int {
+ // 386:"BSFL"
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v3:"TZCNTQ"
- // 386:"BSFL"
// arm:"CLZ"
// arm64:"RBIT","CLZ"
// loong64:"CTZV"
- // s390x:"FLOGR"
// ppc64x/power8:"ANDN","POPCNTD"
// ppc64x/power9: "CNTTZD"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
+ // s390x:"FLOGR"
// wasm:"I64Ctz"
return bits.TrailingZeros(n)
}
func TrailingZeros64(n uint64) int {
+ // 386:"BSFL","JNE"
// amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// amd64/v3:"TZCNTQ"
- // 386:"BSFL","JNE"
// arm64:"RBIT","CLZ"
// loong64:"CTZV"
- // s390x:"FLOGR"
// ppc64x/power8:"ANDN","POPCNTD"
// ppc64x/power9: "CNTTZD"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
+ // s390x:"FLOGR"
// wasm:"I64Ctz"
return bits.TrailingZeros64(n)
}
@@ -389,40 +391,43 @@ func TrailingZeros64Subtract(n uint64) int {
}
func TrailingZeros32(n uint32) int {
+ // 386:"BSFL"
// amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
// amd64/v3:"TZCNTL"
- // 386:"BSFL"
// arm:"CLZ"
// arm64:"RBITW","CLZW"
// loong64:"CTZW"
- // s390x:"FLOGR","MOVWZ"
// ppc64x/power8:"ANDN","POPCNTW"
// ppc64x/power9: "CNTTZW"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZW"
+ // s390x:"FLOGR","MOVWZ"
// wasm:"I64Ctz"
return bits.TrailingZeros32(n)
}
func TrailingZeros16(n uint16) int {
- // amd64:"BSFL","ORL\\t\\$65536"
// 386:"BSFL\t"
+ // amd64:"BSFL","ORL\\t\\$65536"
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
// loong64:"CTZV"
- // s390x:"FLOGR","OR\t\\$65536"
// ppc64x/power8:"POPCNTW","ADD\t\\$-1"
// ppc64x/power9:"CNTTZD","ORIS\\t\\$1"
+ // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$65536","CTZW"
+ // s390x:"FLOGR","OR\t\\$65536"
// wasm:"I64Ctz"
return bits.TrailingZeros16(n)
}
func TrailingZeros8(n uint8) int {
- // amd64:"BSFL","ORL\\t\\$256"
// 386:"BSFL"
+ // amd64:"BSFL","ORL\\t\\$256"
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
// loong64:"CTZV"
// ppc64x/power8:"POPCNTB","ADD\t\\$-1"
// ppc64x/power9:"CNTTZD","OR\t\\$256"
+ // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$256","CTZW"
// s390x:"FLOGR","OR\t\\$256"
// wasm:"I64Ctz"
return bits.TrailingZeros8(n)
@@ -469,6 +474,7 @@ func IterateBits16(n uint16) int {
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
// amd64/v3:"TZCNTL"
// arm64:"RBITW","CLZW",-"ORR"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR"
i += bits.TrailingZeros16(n)
n &= n - 1
}
@@ -481,6 +487,7 @@ func IterateBits8(n uint8) int {
// amd64/v1,amd64/v2:"BSFL",-"BTSL"
// amd64/v3:"TZCNTL"
// arm64:"RBITW","CLZW",-"ORR"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR"
i += bits.TrailingZeros8(n)
n &= n - 1
}