diff options
| author | Guoqi Chen <chenguoqi@loongson.cn> | 2026-03-11 10:11:35 +0800 |
|---|---|---|
| committer | abner chenc <chenguoqi@loongson.cn> | 2026-03-17 17:58:13 -0700 |
| commit | a92edd97bfb9b383acb934fa0564bcad948bbb20 (patch) | |
| tree | 3852c3b9755ce3c099f2f8798676b029dac8e4a2 /src | |
| parent | 5978090f9d4c45b63c2c749174e04594ca39b064 (diff) | |
| download | go-a92edd97bfb9b383acb934fa0564bcad948bbb20.tar.xz | |
cmd/compile: simplify the implementation of LoweredZeroLoop on loong64
Removes 6484 instructions from the go binary on loong64.
before after delta
asm 561517 561225 -292
cgo 480929 480493 -436
compile 2887121 2886277 -844
cover 530429 530125 -304
fix 851649 851093 -556
link 728361 727813 -548
preprofile 240713 240545 -168
vet 824869 824297 -572
go 1638645 1636053 -2592
gofmt 320213 320041 -172
Change-Id: I15da6e94a4166aed03373657fa4785063aa32f35
Reviewed-on: https://go-review.googlesource.com/c/go/+/754000
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/loong64/ssa.go | 84 |
1 files changed, 37 insertions, 47 deletions
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index a060cbe474..7b3d314a15 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -576,7 +576,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { } case ssa.OpLOONG64LoweredZeroLoop: ptrReg := v.Args[0].Reg() - countReg := v.RegTmp() + endReg := v.RegTmp() flagReg := int16(loong64.REGTMP) var off int64 n := v.AuxInt @@ -593,25 +593,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { v.Fatalf("ZeroLoop size too small %d", n) } - // MOVV $n/loopSize, countReg + // ADDV n - n%loopSize, ptrReg, endReg // MOVBU ir.Syms.Loong64HasLSX, flagReg // BNE flagReg, lsxInit - // genericInit: + // genericLoop: // for off = 0; off < loopSize; off += 8 { // zero8(s, ptrReg, off) // } // ADDV $loopSize, ptrReg - // SUBV $1, countReg - // BNE countReg, genericInit + // BNE endReg, ptrReg, genericLoop // JMP tail // lsxInit: - // VXORV V31, V31, V31, v31 = 0 + // VXORV V31, V31, V31 + // lsxLoop: // for off = 0; off < loopSize; off += 16 { // zero16(s, V31, ptrReg, off) // } // ADDV $loopSize, ptrReg - // SUBV $1, countReg - // BNE countReg, lsxInit + // BNE endReg, ptrReg, lsxLoop // tail: // n %= loopSize // for off = 0; n >= 8; off += 8, n -= 8 { @@ -622,11 +621,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { // zero8(s, ptrReg, off+n-8) // } - p1 := s.Prog(loong64.AMOVV) + p1 := s.Prog(loong64.AADDV) p1.From.Type = obj.TYPE_CONST - p1.From.Offset = n / loopSize + p1.From.Offset = n - n%loopSize + p1.Reg = ptrReg p1.To.Type = obj.TYPE_REG - p1.To.Reg = countReg + p1.To.Reg = endReg p2 := s.Prog(loong64.AMOVBU) p2.From.Type = obj.TYPE_MEM @@ -650,52 +650,42 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p4.To.Type = obj.TYPE_REG p4.To.Reg = ptrReg - p5 := s.Prog(loong64.ASUBV) - p5.From.Type = obj.TYPE_CONST - p5.From.Offset = 1 - p5.To.Type = obj.TYPE_REG - p5.To.Reg = countReg + p5 := s.Prog(loong64.ABNE) + p5.From.Type = obj.TYPE_REG + p5.From.Reg = endReg + p5.Reg = ptrReg + p5.To.Type = obj.TYPE_BRANCH + p5.To.SetTarget(p3.Link) - p6 := s.Prog(loong64.ABNE) - p6.From.Type = obj.TYPE_REG - p6.From.Reg = countReg + p6 := s.Prog(obj.AJMP) p6.To.Type = obj.TYPE_BRANCH - p6.To.SetTarget(p3.Link) - - p7 := s.Prog(obj.AJMP) - p7.To.Type = obj.TYPE_BRANCH - p8 := s.Prog(loong64.AVXORV) - p8.From.Type = obj.TYPE_REG - p8.From.Reg = loong64.REG_V31 - p8.To.Type = obj.TYPE_REG - p8.To.Reg = loong64.REG_V31 - p3.To.SetTarget(p8) + p7 := s.Prog(loong64.AVXORV) + p7.From.Type = obj.TYPE_REG + p7.From.Reg = loong64.REG_V31 + p7.To.Type = obj.TYPE_REG + p7.To.Reg = loong64.REG_V31 + p3.To.SetTarget(p7) for off = 0; off < loopSize; off += 16 { zero16(s, loong64.REG_V31, ptrReg, off) } - p9 := s.Prog(loong64.AADDV) - p9.From.Type = obj.TYPE_CONST - p9.From.Offset = loopSize - p9.To.Type = obj.TYPE_REG - p9.To.Reg = ptrReg - - p10 := s.Prog(loong64.ASUBV) - p10.From.Type = obj.TYPE_CONST - p10.From.Offset = 1 - p10.To.Type = obj.TYPE_REG - p10.To.Reg = countReg + p8 := s.Prog(loong64.AADDV) + p8.From.Type = obj.TYPE_CONST + p8.From.Offset = loopSize + p8.To.Type = obj.TYPE_REG + p8.To.Reg = ptrReg - p11 := s.Prog(loong64.ABNE) - p11.From.Type = obj.TYPE_REG - p11.From.Reg = countReg - p11.To.Type = obj.TYPE_BRANCH - p11.To.SetTarget(p8.Link) + p9 := s.Prog(loong64.ABNE) + p9.From.Type = obj.TYPE_REG + p9.From.Reg = endReg + p9.Reg = ptrReg + p9.To.Type = obj.TYPE_BRANCH + p9.To.SetTarget(p7.Link) - p12 := s.Prog(obj.ANOP) - p7.To.SetTarget(p12) + p10 := s.Prog(obj.ANOP) + p6.To.SetTarget(p10) // Multiples of the loop size are now done. n %= loopSize |
