diff options
| author | limeidan <limeidan@loongson.cn> | 2025-08-07 11:23:55 +0800 |
|---|---|---|
| committer | abner chenc <chenguoqi@loongson.cn> | 2025-08-12 23:01:49 -0700 |
| commit | 90b7d7aaa27b5a1d7a69f76ed82ed2e88ae84c31 (patch) | |
| tree | b65223d70b0176bb0a59fb77a752cfc053a5cb72 /src/cmd/compile | |
| parent | 1b263fc6042ed763319f59fd2b87df23e53c9f6f (diff) | |
| download | go-90b7d7aaa27b5a1d7a69f76ed82ed2e88ae84c31.tar.xz | |
cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on loong64
goos: linux
goarch: loong64
pkg: cmd/compile/internal/test
cpu: Loongson-3A6000-HV @ 2500.00MHz
│ old │ new │
│ sec/op │ sec/op vs base │
MulconstI32/3 0.8004n ± 0% 0.4247n ± 2% -46.94% (p=0.000 n=10)
MulconstI32/5 0.8005n ± 0% 0.4256n ± 1% -46.83% (p=0.000 n=10)
MulconstI32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10)
MulconstI32/120 0.8090n ± 0% 0.8067n ± 0% -0.28% (p=0.007 n=10)
MulconstI32/-120 0.8109n ± 0% 0.8072n ± 0% -0.47% (p=0.000 n=10)
MulconstI32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10)
MulconstI32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10)
MulconstI64/3 0.8005n ± 0% 0.4241n ± 1% -47.02% (p=0.000 n=10)
MulconstI64/5 0.8004n ± 0% 0.4249n ± 1% -46.91% (p=0.000 n=10)
MulconstI64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10)
MulconstI64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.635 n=10)
MulconstI64/-120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10)
MulconstI64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.837 n=10)
MulconstI64/65538 0.8096n ± 0% 0.8004n ± 0% -1.14% (p=0.000 n=10)
MulconstU32/3 0.8004n ± 0% 0.4263n ± 1% -46.75% (p=0.000 n=10)
MulconstU32/5 0.8005n ± 0% 0.4262n ± 1% -46.76% (p=0.000 n=10)
MulconstU32/12 1.2010n ± 0% 0.8005n ± 0% -33.35% (p=0.000 n=10)
MulconstU32/120 0.8105n ± 0% 0.8096n ± 0% ~ (p=0.183 n=10)
MulconstU32/65537 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10)
MulconstU32/65538 0.8005n ± 0% 0.8005n ± 0% ~ (p=1.000 n=10)
MulconstU64/3 0.8004n ± 0% 0.4265n ± 4% -46.71% (p=0.000 n=10)
MulconstU64/5 0.8004n ± 0% 0.4256n ± 0% -46.82% (p=0.000 n=10)
MulconstU64/12 1.2010n ± 0% 0.8004n ± 0% -33.36% (p=0.000 n=10)
MulconstU64/120 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.387 n=10)
MulconstU64/65537 0.8005n ± 0% 0.8005n ± 0% ~ (p=0.265 n=10)
MulconstU64/65538 0.8080n ± 0% 0.8004n ± 0% -0.93% (p=0.000 n=10)
geomean 0.8539n 0.6597n -22.74%
Change-Id: Ie33e88985d7639f481bbba540bc917b9f185c357
Reviewed-on: https://go-review.googlesource.com/c/go/+/693855
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile')
| -rw-r--r-- | src/cmd/compile/internal/loong64/ssa.go | 11 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 2 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/config.go | 13 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 16 |
4 files changed, 40 insertions, 2 deletions
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index f8ecebb350..c7fb903d5d 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { {Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)}, }) + case ssa.OpLOONG64ADDshiftLLV: + // ADDshiftLLV Rarg0, Rarg1, $shift + // ALSLV $shift, Rarg1, Rarg0, Rtmp + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.Reg = v.Args[1].Reg() + p.AddRestSourceReg(v.Args[0].Reg()) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpClobber, ssa.OpClobberReg: // TODO: implement for clobberdead experiment. Nop is ok for now. default: diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index 75429cbffd..d6818e8592 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -577,6 +577,8 @@ func init() { // is $hint and bit[41:5] is $n. {name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true}, {name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true}, + + {name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1<<auxInt, the value of auxInt should be in the range [1, 4]. } blocks := []blockData{ diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 50ec2ec177..f2097170f4 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -566,7 +566,7 @@ func (c *Config) buildRecipes(arch string) { } case "loong64": // - multiply is 4 cycles. - // - add/sub/shift are 1 cycle. + // - add/sub/shift/alsl are 1 cycle. // On loong64, using a multiply also needs to load the constant into a register. // TODO: figure out a happy medium. mulCost = 45 @@ -601,6 +601,15 @@ func (c *Config) buildRecipes(arch string) { return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x) }) } + + // ADDshiftLLV + for i := 1; i < 5; i++ { + c := 10 + r(1, 1<<i, c, + func(m, x, y *Value) *Value { + return m.Block.NewValue2I(m.Pos, OpLOONG64ADDshiftLLV, m.Type, int64(i), x, y) + }) + } } c.mulRecipes = map[int64]mulRecipe{} @@ -718,7 +727,7 @@ func (c *Config) buildRecipes(arch string) { // Currently: // len(c.mulRecipes) == 5984 on arm64 // 680 on amd64 - // 5984 on loong64 + // 9738 on loong64 // This function takes ~2.5ms on arm64. //println(len(c.mulRecipes)) } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 06a05c6e3f..e155eca5ff 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1970,6 +1970,7 @@ const ( OpLOONG64LoweredPanicBoundsCC OpLOONG64PRELD OpLOONG64PRELDX + OpLOONG64ADDshiftLLV OpMIPSADD OpMIPSADDconst @@ -26527,6 +26528,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDshiftLLV", + auxType: auxInt64, + argLen: 2, + asm: loong64.AALSLV, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "ADD", |
