aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2025-02-24 00:27:34 +1100
committerJoel Sing <joel@sing.id.au>2025-03-21 18:21:44 -0700
commitb70244ff7a043786c211775b68259de6104ff91c (patch)
tree98e9bbd2cd4601d55478e67cd9072174787fbd64 /test/codegen
parentaf133d86e4de00e65581799c155659ce9c8c556c (diff)
downloadgo-b70244ff7a043786c211775b68259de6104ff91c.tar.xz
cmd/compile: intrinsify math/bits.Len on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.Len using the CLZ/CLZW machine instructions. On a StarFive VisionFive 2 with GORISCV64=rva22u64: │ clz.b.1 │ clz.b.2 │ │ sec/op │ sec/op vs base │ LeadingZeros-4 28.89n ± 0% 12.08n ± 0% -58.19% (p=0.000 n=10) LeadingZeros8-4 18.79n ± 0% 14.76n ± 0% -21.45% (p=0.000 n=10) LeadingZeros16-4 25.27n ± 0% 14.76n ± 0% -41.59% (p=0.000 n=10) LeadingZeros32-4 25.12n ± 0% 12.08n ± 0% -51.92% (p=0.000 n=10) LeadingZeros64-4 25.89n ± 0% 12.08n ± 0% -53.35% (p=0.000 n=10) geomean 24.55n 13.09n -46.70% Change-Id: I0dda684713dbdf5336af393f5ccbdae861c4f694 Reviewed-on: https://go-review.googlesource.com/c/go/+/652321 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/mathbits.go83
1 files changed, 53 insertions, 30 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 786fad3bd9..a9cf466780 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -15,60 +15,70 @@ import "math/bits"
func LeadingZeros(n uint) int {
// amd64/v1,amd64/v2:"BSRQ"
// amd64/v3:"LZCNTQ", -"BSRQ"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV",-"SUB"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"SUB"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.LeadingZeros(n)
}
func LeadingZeros64(n uint64) int {
// amd64/v1,amd64/v2:"BSRQ"
// amd64/v3:"LZCNTQ", -"BSRQ"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm:"CLZ"
+ // arm64:"CLZ"
// loong64:"CLZV",-"SUB"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.LeadingZeros64(n)
}
func LeadingZeros32(n uint32) int {
// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
// amd64/v3: "LZCNTL",- "BSRL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZW"
+ // arm:"CLZ"
+ // arm64:"CLZW"
// loong64:"CLZW",-"SUB"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"CNTLZW"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZW",-"ADDI"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.LeadingZeros32(n)
}
func LeadingZeros16(n uint16) int {
// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
// amd64/v3: "LZCNTL",- "BSRL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-48",-"NEG"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.LeadingZeros16(n)
}
func LeadingZeros8(n uint8) int {
// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
// amd64/v3: "LZCNTL",- "BSRL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-56",-"NEG"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.LeadingZeros8(n)
}
@@ -79,30 +89,35 @@ func LeadingZeros8(n uint8) int {
func Len(n uint) int {
// amd64/v1,amd64/v2:"BSRQ"
// amd64/v3: "LZCNTQ"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"SUBC","CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.Len(n)
}
func Len64(n uint64) int {
// amd64/v1,amd64/v2:"BSRQ"
// amd64/v3: "LZCNTQ"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"SUBC","CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.Len64(n)
}
func SubFromLen64(n uint64) int {
// loong64:"CLZV",-"ADD"
// ppc64x:"CNTLZD",-"SUBC"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI",-"NEG"
return 64 - bits.Len64(n)
}
@@ -114,36 +129,42 @@ func CompareWithLen64(n uint64) bool {
func Len32(n uint32) int {
// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
// amd64/v3: "LZCNTL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZW"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x: "CNTLZW"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZW","ADDI\t\\$-32"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.Len32(n)
}
func Len16(n uint16) int {
// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
// amd64/v3: "LZCNTL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"SUBC","CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.Len16(n)
}
func Len8(n uint8) int {
// amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
// amd64/v3: "LZCNTL"
- // s390x:"FLOGR"
- // arm:"CLZ" arm64:"CLZ"
+ // arm64:"CLZ"
+ // arm:"CLZ"
// loong64:"CLZV"
// mips:"CLZ"
- // wasm:"I64Clz"
// ppc64x:"SUBC","CNTLZD"
+ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64"
+ // s390x:"FLOGR"
+ // wasm:"I64Clz"
return bits.Len8(n)
}
@@ -451,6 +472,7 @@ func IterateBits64(n uint64) int {
for n != 0 {
// amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
// amd64/v3:"TZCNTQ"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
i += bits.TrailingZeros64(n)
n &= n - 1
}
@@ -462,6 +484,7 @@ func IterateBits32(n uint32) int {
for n != 0 {
// amd64/v1,amd64/v2:"BSFL",-"BTSQ"
// amd64/v3:"TZCNTL"
+ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t"
i += bits.TrailingZeros32(n)
n &= n - 1
}