aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorlimeidan <limeidan@loongson.cn>2025-08-07 11:23:55 +0800
committerabner chenc <chenguoqi@loongson.cn>2025-08-12 23:01:49 -0700
commit90b7d7aaa27b5a1d7a69f76ed82ed2e88ae84c31 (patch)
treeb65223d70b0176bb0a59fb77a752cfc053a5cb72 /src
parent1b263fc6042ed763319f59fd2b87df23e53c9f6f (diff)
downloadgo-90b7d7aaa27b5a1d7a69f76ed82ed2e88ae84c31.tar.xz
cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on loong64
goos: linux goarch: loong64 pkg: cmd/compile/internal/test cpu: Loongson-3A6000-HV @ 2500.00MHz │ old │ new │ │ sec/op │ sec/op vs base │ MulconstI32/3 0.8004n ± 0% 0.4247n ± 2% -46.94% (p=0.000 n=10) MulconstI32/5 0.8005n ± 0% 0.4256n ± 1% -46.83% (p=0.000 n=10) MulconstI32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstI32/120 0.8090n ± 0% 0.8067n ± 0% -0.28% (p=0.007 n=10) MulconstI32/-120 0.8109n ± 0% 0.8072n ± 0% -0.47% (p=0.000 n=10) MulconstI32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstI32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstI64/3 0.8005n ± 0% 0.4241n ± 1% -47.02% (p=0.000 n=10) MulconstI64/5 0.8004n ± 0% 0.4249n ± 1% -46.91% (p=0.000 n=10) MulconstI64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstI64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.635 n=10) MulconstI64/-120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10) MulconstI64/65538 0.8096n ± 0% 0.8004n ± 0% -1.14% (p=0.000 n=10) MulconstU32/3 0.8004n ± 0% 0.4263n ± 1% -46.75% (p=0.000 n=10) MulconstU32/5 0.8005n ± 0% 0.4262n ± 1% -46.76% (p=0.000 n=10) MulconstU32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10) MulconstU32/120 0.8105n ± 0% 0.8096n ± 0% ~ (p=0.183 n=10) MulconstU32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) MulconstU32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=1.000 n=10) MulconstU64/3 0.8004n ± 0% 0.4265n ± 4% -46.71% (p=0.000 n=10) MulconstU64/5 0.8004n ± 0% 0.4256n ± 0% -46.82% (p=0.000 n=10) MulconstU64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10) MulconstU64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.387 n=10) MulconstU64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10) MulconstU64/65538 0.8080n ± 0% 0.8004n ± 0% -0.93% (p=0.000 n=10) geomean 0.8539n 0.6597n -22.74% Change-Id: Ie33e88985d7639f481bbba540bc917b9f185c357 Reviewed-on: https://go-review.googlesource.com/c/go/+/693855 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn> Reviewed-by: abner chenc <chenguoqi@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/loong64/ssa.go11
-rw-r--r--src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/config.go13
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go16
-rw-r--r--src/cmd/internal/obj/loong64/asm.go4
5 files changed, 42 insertions, 4 deletions
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go
index f8ecebb350..c7fb903d5d 100644
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
{Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)},
})
+ case ssa.OpLOONG64ADDshiftLLV:
+ // ADDshiftLLV Rarg0, Rarg1, $shift
+ // ALSLV $shift, Rarg1, Rarg0, Rtmp
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = v.AuxInt
+ p.Reg = v.Args[1].Reg()
+ p.AddRestSourceReg(v.Args[0].Reg())
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+
case ssa.OpClobber, ssa.OpClobberReg:
// TODO: implement for clobberdead experiment. Nop is ok for now.
default:
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
index 75429cbffd..d6818e8592 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -577,6 +577,8 @@ func init() {
// is $hint and bit[41:5] is $n.
{name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true},
{name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true},
+
+ {name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1<<auxInt, the value of auxInt should be in the range [1, 4].
}
blocks := []blockData{
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index 50ec2ec177..f2097170f4 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -566,7 +566,7 @@ func (c *Config) buildRecipes(arch string) {
}
case "loong64":
// - multiply is 4 cycles.
- // - add/sub/shift are 1 cycle.
+ // - add/sub/shift/alsl are 1 cycle.
// On loong64, using a multiply also needs to load the constant into a register.
// TODO: figure out a happy medium.
mulCost = 45
@@ -601,6 +601,15 @@ func (c *Config) buildRecipes(arch string) {
return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
})
}
+
+ // ADDshiftLLV
+ for i := 1; i < 5; i++ {
+ c := 10
+ r(1, 1<<i, c,
+ func(m, x, y *Value) *Value {
+ return m.Block.NewValue2I(m.Pos, OpLOONG64ADDshiftLLV, m.Type, int64(i), x, y)
+ })
+ }
}
c.mulRecipes = map[int64]mulRecipe{}
@@ -718,7 +727,7 @@ func (c *Config) buildRecipes(arch string) {
// Currently:
// len(c.mulRecipes) == 5984 on arm64
// 680 on amd64
- // 5984 on loong64
+ // 9738 on loong64
// This function takes ~2.5ms on arm64.
//println(len(c.mulRecipes))
}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 06a05c6e3f..e155eca5ff 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1970,6 +1970,7 @@ const (
OpLOONG64LoweredPanicBoundsCC
OpLOONG64PRELD
OpLOONG64PRELDX
+ OpLOONG64ADDshiftLLV
OpMIPSADD
OpMIPSADDconst
@@ -26527,6 +26528,21 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "ADDshiftLLV",
+ auxType: auxInt64,
+ argLen: 2,
+ asm: loong64.AALSLV,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ outputs: []outputInfo{
+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+ },
+ },
+ },
{
name: "ADD",
diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go
index 76ad8e8779..ffd1177350 100644
--- a/src/cmd/internal/obj/loong64/asm.go
+++ b/src/cmd/internal/obj/loong64/asm.go
@@ -2743,8 +2743,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
case 64: // alsl rd, rj, rk, sa2
sa := p.From.Offset - 1
- if sa > 3 {
- c.ctxt.Diag("The shift amount is too large.")
+ if sa < 0 || sa > 3 {
+ c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p)
}
r := p.GetFrom3().Reg
o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg))