diff options
| author | Joel Sing <joel@sing.id.au> | 2025-03-20 01:09:23 +1100 |
|---|---|---|
| committer | Joel Sing <joel@sing.id.au> | 2025-05-01 05:57:13 -0700 |
| commit | 90e8b8cdaeb76a57604a461a138c59340daed7ef (patch) | |
| tree | f1eb63347d046865f3c6537fa8f617cb26fe8017 /test/codegen | |
| parent | 6fc1e341001e10430251ca90e80a022d95b0a5cf (diff) | |
| download | go-90e8b8cdaeb76a57604a461a138c59340daed7ef.tar.xz | |
cmd/compile: intrinsify math/bits.Bswap on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.Bswap
using the REV8 machine instruction.
On a StarFive VisionFive 2 with GORISCV64=rva22u64:
│ rb.1 │ rb.2 │
│ sec/op │ sec/op vs base │
ReverseBytes-4 18.790n ± 0% 4.026n ± 0% -78.57% (p=0.000 n=10)
ReverseBytes16-4 6.710n ± 0% 5.368n ± 0% -20.00% (p=0.000 n=10)
ReverseBytes32-4 13.420n ± 0% 5.368n ± 0% -60.00% (p=0.000 n=10)
ReverseBytes64-4 17.450n ± 0% 4.026n ± 0% -76.93% (p=0.000 n=10)
geomean 13.11n 4.649n -64.54%
Change-Id: I26eee34270b1721f7304bb1cddb0fda129b20ece
Reviewed-on: https://go-review.googlesource.com/c/go/+/660855
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Diffstat (limited to 'test/codegen')
| -rw-r--r-- | test/codegen/mathbits.go | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 873354b838..e9dfbb1443 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -261,42 +261,46 @@ func Reverse8(n uint8) uint8 { // ----------------------- // func ReverseBytes(n uint) uint { - // amd64:"BSWAPQ" // 386:"BSWAPL" - // s390x:"MOVDBR" + // amd64:"BSWAPQ" // arm64:"REV" // loong64:"REVBV" + // riscv64/rva22u64,riscv64/rva23u64:"REV8" + // s390x:"MOVDBR" return bits.ReverseBytes(n) } func ReverseBytes64(n uint64) uint64 { - // amd64:"BSWAPQ" // 386:"BSWAPL" - // s390x:"MOVDBR" + // amd64:"BSWAPQ" // arm64:"REV" - // ppc64x/power10: "BRD" // loong64:"REVBV" + // ppc64x/power10: "BRD" + // riscv64/rva22u64,riscv64/rva23u64:"REV8" + // s390x:"MOVDBR" return bits.ReverseBytes64(n) } func ReverseBytes32(n uint32) uint32 { - // amd64:"BSWAPL" // 386:"BSWAPL" - // s390x:"MOVWBR" + // amd64:"BSWAPL" // arm64:"REVW" // loong64:"REVB2W" // ppc64x/power10: "BRW" + // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$32" + // s390x:"MOVWBR" return bits.ReverseBytes32(n) } func ReverseBytes16(n uint16) uint16 { // amd64:"ROLW" - // arm64:"REV16W",-"UBFX",-"ORR" // arm/5:"SLL","SRL","ORR" // arm/6:"REV16" // arm/7:"REV16" + // arm64:"REV16W",-"UBFX",-"ORR" // loong64:"REVB2H" // ppc64x/power10: "BRH" + // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$48" return bits.ReverseBytes16(n) } |
