diff options
| author | Joel Sing <joel@sing.id.au> | 2025-02-24 00:27:34 +1100 |
|---|---|---|
| committer | Joel Sing <joel@sing.id.au> | 2025-03-21 18:21:44 -0700 |
| commit | b70244ff7a043786c211775b68259de6104ff91c (patch) | |
| tree | 98e9bbd2cd4601d55478e67cd9072174787fbd64 /test/codegen | |
| parent | af133d86e4de00e65581799c155659ce9c8c556c (diff) | |
| download | go-b70244ff7a043786c211775b68259de6104ff91c.tar.xz | |
cmd/compile: intrinsify math/bits.Len on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.Len using the
CLZ/CLZW machine instructions.
On a StarFive VisionFive 2 with GORISCV64=rva22u64:
│ clz.b.1 │ clz.b.2 │
│ sec/op │ sec/op vs base │
LeadingZeros-4 28.89n ± 0% 12.08n ± 0% -58.19% (p=0.000 n=10)
LeadingZeros8-4 18.79n ± 0% 14.76n ± 0% -21.45% (p=0.000 n=10)
LeadingZeros16-4 25.27n ± 0% 14.76n ± 0% -41.59% (p=0.000 n=10)
LeadingZeros32-4 25.12n ± 0% 12.08n ± 0% -51.92% (p=0.000 n=10)
LeadingZeros64-4 25.89n ± 0% 12.08n ± 0% -53.35% (p=0.000 n=10)
geomean 24.55n 13.09n -46.70%
Change-Id: I0dda684713dbdf5336af393f5ccbdae861c4f694
Reviewed-on: https://go-review.googlesource.com/c/go/+/652321
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/mathbits.go | 83 |
1 files changed, 53 insertions, 30 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 786fad3bd9..a9cf466780 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -15,60 +15,70 @@ import "math/bits" func LeadingZeros(n uint) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3:"LZCNTQ", -"BSRQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"SUB" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros(n) } func LeadingZeros64(n uint64) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3:"LZCNTQ", -"BSRQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm:"CLZ" + // arm64:"CLZ" // loong64:"CLZV",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros64(n) } func LeadingZeros32(n uint32) int { // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZW" + // arm:"CLZ" + // arm64:"CLZW" // loong64:"CLZW",-"SUB" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZW" + // riscv64/rva22u64,riscv64/rva23u64:"CLZW",-"ADDI" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros32(n) } func LeadingZeros16(n uint16) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-48",-"NEG" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros16(n) } func LeadingZeros8(n uint8) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL",- "BSRL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-56",-"NEG" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.LeadingZeros8(n) } @@ -79,30 +89,35 @@ func LeadingZeros8(n uint8) int { func Len(n uint) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3: "LZCNTQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len(n) } func Len64(n uint64) int { // amd64/v1,amd64/v2:"BSRQ" // amd64/v3: "LZCNTQ" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len64(n) } func SubFromLen64(n uint64) int { // loong64:"CLZV",-"ADD" // ppc64x:"CNTLZD",-"SUBC" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI",-"NEG" return 64 - bits.Len64(n) } @@ -114,36 +129,42 @@ func CompareWithLen64(n uint64) bool { func Len32(n uint32) int { // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZW" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x: "CNTLZW" + // riscv64/rva22u64,riscv64/rva23u64:"CLZW","ADDI\t\\$-32" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len32(n) } func Len16(n uint16) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len16(n) } func Len8(n uint8) int { // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" // amd64/v3: "LZCNTL" - // s390x:"FLOGR" - // arm:"CLZ" arm64:"CLZ" + // arm64:"CLZ" + // arm:"CLZ" // loong64:"CLZV" // mips:"CLZ" - // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" + // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" + // s390x:"FLOGR" + // wasm:"I64Clz" return bits.Len8(n) } @@ -451,6 +472,7 @@ func IterateBits64(n uint64) int { for n != 0 { // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" // amd64/v3:"TZCNTQ" + // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" i += bits.TrailingZeros64(n) n &= n - 1 } @@ -462,6 +484,7 @@ func IterateBits32(n uint32) int { for n != 0 { // amd64/v1,amd64/v2:"BSFL",-"BTSQ" // amd64/v3:"TZCNTL" + // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" i += bits.TrailingZeros32(n) n &= n - 1 } |
