aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorGuoqi Chen <chenguoqi@loongson.cn>2026-03-11 10:11:35 +0800
committerabner chenc <chenguoqi@loongson.cn>2026-03-17 17:58:13 -0700
commita92edd97bfb9b383acb934fa0564bcad948bbb20 (patch)
tree3852c3b9755ce3c099f2f8798676b029dac8e4a2 /src/cmd/compile
parent5978090f9d4c45b63c2c749174e04594ca39b064 (diff)
downloadgo-a92edd97bfb9b383acb934fa0564bcad948bbb20.tar.xz
cmd/compile: simplify the implementation of LoweredZeroLoop on loong64
Removes 6484 instructions from the go binary on loong64. before after delta asm 561517 561225 -292 cgo 480929 480493 -436 compile 2887121 2886277 -844 cover 530429 530125 -304 fix 851649 851093 -556 link 728361 727813 -548 preprofile 240713 240545 -168 vet 824869 824297 -572 go 1638645 1636053 -2592 gofmt 320213 320041 -172 Change-Id: I15da6e94a4166aed03373657fa4785063aa32f35 Reviewed-on: https://go-review.googlesource.com/c/go/+/754000 Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Meidan Li <limeidan@loongson.cn>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/loong64/ssa.go84
1 files changed, 37 insertions, 47 deletions
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go
index a060cbe474..7b3d314a15 100644
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@@ -576,7 +576,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
}
case ssa.OpLOONG64LoweredZeroLoop:
ptrReg := v.Args[0].Reg()
- countReg := v.RegTmp()
+ endReg := v.RegTmp()
flagReg := int16(loong64.REGTMP)
var off int64
n := v.AuxInt
@@ -593,25 +593,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
v.Fatalf("ZeroLoop size too small %d", n)
}
- // MOVV $n/loopSize, countReg
+ // ADDV n - n%loopSize, ptrReg, endReg
// MOVBU ir.Syms.Loong64HasLSX, flagReg
// BNE flagReg, lsxInit
- // genericInit:
+ // genericLoop:
// for off = 0; off < loopSize; off += 8 {
// zero8(s, ptrReg, off)
// }
// ADDV $loopSize, ptrReg
- // SUBV $1, countReg
- // BNE countReg, genericInit
+ // BNE endReg, ptrReg, genericLoop
// JMP tail
// lsxInit:
- // VXORV V31, V31, V31, v31 = 0
+ // VXORV V31, V31, V31
+ // lsxLoop:
// for off = 0; off < loopSize; off += 16 {
// zero16(s, V31, ptrReg, off)
// }
// ADDV $loopSize, ptrReg
- // SUBV $1, countReg
- // BNE countReg, lsxInit
+ // BNE endReg, ptrReg, lsxLoop
// tail:
// n %= loopSize
// for off = 0; n >= 8; off += 8, n -= 8 {
@@ -622,11 +621,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
// zero8(s, ptrReg, off+n-8)
// }
- p1 := s.Prog(loong64.AMOVV)
+ p1 := s.Prog(loong64.AADDV)
p1.From.Type = obj.TYPE_CONST
- p1.From.Offset = n / loopSize
+ p1.From.Offset = n - n%loopSize
+ p1.Reg = ptrReg
p1.To.Type = obj.TYPE_REG
- p1.To.Reg = countReg
+ p1.To.Reg = endReg
p2 := s.Prog(loong64.AMOVBU)
p2.From.Type = obj.TYPE_MEM
@@ -650,52 +650,42 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p4.To.Type = obj.TYPE_REG
p4.To.Reg = ptrReg
- p5 := s.Prog(loong64.ASUBV)
- p5.From.Type = obj.TYPE_CONST
- p5.From.Offset = 1
- p5.To.Type = obj.TYPE_REG
- p5.To.Reg = countReg
+ p5 := s.Prog(loong64.ABNE)
+ p5.From.Type = obj.TYPE_REG
+ p5.From.Reg = endReg
+ p5.Reg = ptrReg
+ p5.To.Type = obj.TYPE_BRANCH
+ p5.To.SetTarget(p3.Link)
- p6 := s.Prog(loong64.ABNE)
- p6.From.Type = obj.TYPE_REG
- p6.From.Reg = countReg
+ p6 := s.Prog(obj.AJMP)
p6.To.Type = obj.TYPE_BRANCH
- p6.To.SetTarget(p3.Link)
-
- p7 := s.Prog(obj.AJMP)
- p7.To.Type = obj.TYPE_BRANCH
- p8 := s.Prog(loong64.AVXORV)
- p8.From.Type = obj.TYPE_REG
- p8.From.Reg = loong64.REG_V31
- p8.To.Type = obj.TYPE_REG
- p8.To.Reg = loong64.REG_V31
- p3.To.SetTarget(p8)
+ p7 := s.Prog(loong64.AVXORV)
+ p7.From.Type = obj.TYPE_REG
+ p7.From.Reg = loong64.REG_V31
+ p7.To.Type = obj.TYPE_REG
+ p7.To.Reg = loong64.REG_V31
+ p3.To.SetTarget(p7)
for off = 0; off < loopSize; off += 16 {
zero16(s, loong64.REG_V31, ptrReg, off)
}
- p9 := s.Prog(loong64.AADDV)
- p9.From.Type = obj.TYPE_CONST
- p9.From.Offset = loopSize
- p9.To.Type = obj.TYPE_REG
- p9.To.Reg = ptrReg
-
- p10 := s.Prog(loong64.ASUBV)
- p10.From.Type = obj.TYPE_CONST
- p10.From.Offset = 1
- p10.To.Type = obj.TYPE_REG
- p10.To.Reg = countReg
+ p8 := s.Prog(loong64.AADDV)
+ p8.From.Type = obj.TYPE_CONST
+ p8.From.Offset = loopSize
+ p8.To.Type = obj.TYPE_REG
+ p8.To.Reg = ptrReg
- p11 := s.Prog(loong64.ABNE)
- p11.From.Type = obj.TYPE_REG
- p11.From.Reg = countReg
- p11.To.Type = obj.TYPE_BRANCH
- p11.To.SetTarget(p8.Link)
+ p9 := s.Prog(loong64.ABNE)
+ p9.From.Type = obj.TYPE_REG
+ p9.From.Reg = endReg
+ p9.Reg = ptrReg
+ p9.To.Type = obj.TYPE_BRANCH
+ p9.To.SetTarget(p7.Link)
- p12 := s.Prog(obj.ANOP)
- p7.To.SetTarget(p12)
+ p10 := s.Prog(obj.ANOP)
+ p6.To.SetTarget(p10)
// Multiples of the loop size are now done.
n %= loopSize