aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2025-02-24 00:27:34 +1100
committerJoel Sing <joel@sing.id.au>2025-03-21 18:21:44 -0700
commitb70244ff7a043786c211775b68259de6104ff91c (patch)
tree98e9bbd2cd4601d55478e67cd9072174787fbd64 /src/cmd
parentaf133d86e4de00e65581799c155659ce9c8c556c (diff)
downloadgo-b70244ff7a043786c211775b68259de6104ff91c.tar.xz
cmd/compile: intrinsify math/bits.Len on riscv64
For riscv64/rva22u64 and above, we can intrinsify math/bits.Len using the CLZ/CLZW machine instructions. On a StarFive VisionFive 2 with GORISCV64=rva22u64: │ clz.b.1 │ clz.b.2 │ │ sec/op │ sec/op vs base │ LeadingZeros-4 28.89n ± 0% 12.08n ± 0% -58.19% (p=0.000 n=10) LeadingZeros8-4 18.79n ± 0% 14.76n ± 0% -21.45% (p=0.000 n=10) LeadingZeros16-4 25.27n ± 0% 14.76n ± 0% -41.59% (p=0.000 n=10) LeadingZeros32-4 25.12n ± 0% 12.08n ± 0% -51.92% (p=0.000 n=10) LeadingZeros64-4 25.89n ± 0% 12.08n ± 0% -53.35% (p=0.000 n=10) geomean 24.55n 13.09n -46.70% Change-Id: I0dda684713dbdf5336af393f5ccbdae861c4f694 Reviewed-on: https://go-review.googlesource.com/c/go/+/652321 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/compile/internal/riscv64/ssa.go2
-rw-r--r--src/cmd/compile/internal/ssa/_gen/RISCV64.rules6
-rw-r--r--src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go28
-rw-r--r--src/cmd/compile/internal/ssa/rewriteRISCV64.go74
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go24
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics_test.go7
7 files changed, 142 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go
index 4392081f6e..952a2050a0 100644
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@@ -419,7 +419,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
- ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW:
+ ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 016eb53f04..96b9b11cf9 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -225,6 +225,12 @@
(Ctz16 x) => (CTZW (ORI <typ.UInt32> [1<<16] x))
(Ctz8 x) => (CTZW (ORI <typ.UInt32> [1<<8] x))
+// Bit length (note that these will only be emitted for rva22u64 and above).
+(BitLen64 <t> x) => (SUB (MOVDconst [64]) (CLZ <t> x))
+(BitLen32 <t> x) => (SUB (MOVDconst [32]) (CLZW <t> x))
+(BitLen16 x) => (BitLen64 (ZeroExt16to64 x))
+(BitLen8 x) => (BitLen64 (ZeroExt8to64 x))
+
(Less64 ...) => (SLT ...)
(Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y))
(Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y))
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
index 85e9e47e82..cc2302ff37 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@@ -229,6 +229,8 @@ func init() {
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // ^arg0 & arg1
{name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint
+ {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zeros
+ {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zeros of least significant word
{name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros
{name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word
{name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 3fd5b310ac..0ae88ab246 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2523,6 +2523,8 @@ const (
OpRISCV64AND
OpRISCV64ANDN
OpRISCV64ANDI
+ OpRISCV64CLZ
+ OpRISCV64CLZW
OpRISCV64CTZ
OpRISCV64CTZW
OpRISCV64NOT
@@ -34005,6 +34007,32 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "CLZ",
+ argLen: 1,
+ asm: riscv.ACLZ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "CLZW",
+ argLen: 1,
+ asm: riscv.ACLZW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ outputs: []outputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
name: "CTZ",
argLen: 1,
asm: riscv.ACTZ,
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index ab93309680..b2318e711b 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -102,6 +102,14 @@ func rewriteValueRISCV64(v *Value) bool {
return true
case OpAvg64u:
return rewriteValueRISCV64_OpAvg64u(v)
+ case OpBitLen16:
+ return rewriteValueRISCV64_OpBitLen16(v)
+ case OpBitLen32:
+ return rewriteValueRISCV64_OpBitLen32(v)
+ case OpBitLen64:
+ return rewriteValueRISCV64_OpBitLen64(v)
+ case OpBitLen8:
+ return rewriteValueRISCV64_OpBitLen8(v)
case OpClosureCall:
v.Op = OpRISCV64CALLclosure
return true
@@ -928,6 +936,72 @@ func rewriteValueRISCV64_OpAvg64u(v *Value) bool {
return true
}
}
+func rewriteValueRISCV64_OpBitLen16(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (BitLen16 x)
+ // result: (BitLen64 (ZeroExt16to64 x))
+ for {
+ x := v_0
+ v.reset(OpBitLen64)
+ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
+func rewriteValueRISCV64_OpBitLen32(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (BitLen32 <t> x)
+ // result: (SUB (MOVDconst [32]) (CLZW <t> x))
+ for {
+ t := v.Type
+ x := v_0
+ v.reset(OpRISCV64SUB)
+ v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
+ v0.AuxInt = int64ToAuxInt(32)
+ v1 := b.NewValue0(v.Pos, OpRISCV64CLZW, t)
+ v1.AddArg(x)
+ v.AddArg2(v0, v1)
+ return true
+ }
+}
+func rewriteValueRISCV64_OpBitLen64(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (BitLen64 <t> x)
+ // result: (SUB (MOVDconst [64]) (CLZ <t> x))
+ for {
+ t := v.Type
+ x := v_0
+ v.reset(OpRISCV64SUB)
+ v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
+ v0.AuxInt = int64ToAuxInt(64)
+ v1 := b.NewValue0(v.Pos, OpRISCV64CLZ, t)
+ v1.AddArg(x)
+ v.AddArg2(v0, v1)
+ return true
+ }
+}
+func rewriteValueRISCV64_OpBitLen8(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (BitLen8 x)
+ // result: (BitLen64 (ZeroExt8to64 x))
+ for {
+ x := v_0
+ v.reset(OpBitLen64)
+ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueRISCV64_OpConst16(v *Value) bool {
// match: (Const16 [val])
// result: (MOVDconst [int64(val)])
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index f2b13045eb..eaced0b277 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -962,6 +962,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
},
sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm)
+
+ if cfg.goriscv64 >= 22 {
+ addF("math/bits", "Len64",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
+ },
+ sys.RISCV64)
+ addF("math/bits", "Len32",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
+ },
+ sys.RISCV64)
+ addF("math/bits", "Len16",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
+ },
+ sys.RISCV64)
+ addF("math/bits", "Len8",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
+ },
+ sys.RISCV64)
+ }
+
alias("math/bits", "Len", "math/bits", "Len64", p8...)
alias("math/bits", "Len", "math/bits", "Len32", p4...)
diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go
index a06fdeedb2..230a7bdf67 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go
@@ -1110,6 +1110,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{},
{"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{},
{"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
+ {"riscv64", "internal/runtime/sys", "Len64"}: struct{}{},
+ {"riscv64", "internal/runtime/sys", "Len8"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{},
{"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{},
@@ -1120,6 +1122,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"riscv64", "math/big", "mulWW"}: struct{}{},
{"riscv64", "math/bits", "Add"}: struct{}{},
{"riscv64", "math/bits", "Add64"}: struct{}{},
+ {"riscv64", "math/bits", "Len"}: struct{}{},
+ {"riscv64", "math/bits", "Len16"}: struct{}{},
+ {"riscv64", "math/bits", "Len32"}: struct{}{},
+ {"riscv64", "math/bits", "Len64"}: struct{}{},
+ {"riscv64", "math/bits", "Len8"}: struct{}{},
{"riscv64", "math/bits", "Mul"}: struct{}{},
{"riscv64", "math/bits", "Mul64"}: struct{}{},
{"riscv64", "math/bits", "RotateLeft"}: struct{}{},