aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/riscv64/ssa.go100
-rw-r--r--src/cmd/compile/internal/ssa/_gen/RISCV64.rules32
-rw-r--r--src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go41
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go24
-rw-r--r--src/cmd/compile/internal/ssa/rewriteRISCV64.go127
5 files changed, 150 insertions, 174 deletions
diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go
index 88733b0d64..da28197490 100644
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@@ -181,6 +181,8 @@ func largestMove(alignment int64) (obj.As, int64) {
}
}
+var fracMovOps = []obj.As{riscv.AMOVB, riscv.AMOVH, riscv.AMOVW, riscv.AMOV}
+
// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
// RISC-V has no flags, so this is a no-op.
func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {}
@@ -738,30 +740,86 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.RegTo2 = riscv.REG_ZERO
case ssa.OpRISCV64LoweredZero:
- mov, sz := largestMove(v.AuxInt)
+ ptr := v.Args[0].Reg()
+ sc := v.AuxValAndOff()
+ n := sc.Val64()
- // mov ZERO, (Rarg0)
- // ADD $sz, Rarg0
- // BGEU Rarg1, Rarg0, -2(PC)
+ mov, sz := largestMove(sc.Off64())
- p := s.Prog(mov)
- p.From.Type = obj.TYPE_REG
- p.From.Reg = riscv.REG_ZERO
- p.To.Type = obj.TYPE_MEM
- p.To.Reg = v.Args[0].Reg()
+ // mov ZERO, (offset)(Rarg0)
+ var off int64
+ for n >= sz {
+ zeroOp(s, mov, ptr, off)
+ off += sz
+ n -= sz
+ }
+
+ for i := len(fracMovOps) - 1; i >= 0; i-- {
+ tsz := int64(1 << i)
+ if n < tsz {
+ continue
+ }
+ zeroOp(s, fracMovOps[i], ptr, off)
+ off += tsz
+ n -= tsz
+ }
+
+ case ssa.OpRISCV64LoweredZeroLoop:
+ ptr := v.Args[0].Reg()
+ sc := v.AuxValAndOff()
+ n := sc.Val64()
+ mov, sz := largestMove(sc.Off64())
+ chunk := 8 * sz
+
+ if n <= 3*chunk {
+ v.Fatalf("ZeroLoop too small:%d, expect:%d", n, 3*chunk)
+ }
+
+ tmp := v.RegTmp()
+
+ p := s.Prog(riscv.AADD)
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = n - n%chunk
+ p.Reg = ptr
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = tmp
+
+ for i := int64(0); i < 8; i++ {
+ zeroOp(s, mov, ptr, sz*i)
+ }
p2 := s.Prog(riscv.AADD)
p2.From.Type = obj.TYPE_CONST
- p2.From.Offset = sz
+ p2.From.Offset = chunk
p2.To.Type = obj.TYPE_REG
- p2.To.Reg = v.Args[0].Reg()
+ p2.To.Reg = ptr
- p3 := s.Prog(riscv.ABGEU)
- p3.To.Type = obj.TYPE_BRANCH
- p3.Reg = v.Args[0].Reg()
+ p3 := s.Prog(riscv.ABNE)
+ p3.From.Reg = tmp
p3.From.Type = obj.TYPE_REG
- p3.From.Reg = v.Args[1].Reg()
- p3.To.SetTarget(p)
+ p3.Reg = ptr
+ p3.To.Type = obj.TYPE_BRANCH
+ p3.To.SetTarget(p.Link)
+
+ n %= chunk
+
+ // mov ZERO, (offset)(Rarg0)
+ var off int64
+ for n >= sz {
+ zeroOp(s, mov, ptr, off)
+ off += sz
+ n -= sz
+ }
+
+ for i := len(fracMovOps) - 1; i >= 0; i-- {
+ tsz := int64(1 << i)
+ if n < tsz {
+ continue
+ }
+ zeroOp(s, fracMovOps[i], ptr, off)
+ off += tsz
+ n -= tsz
+ }
case ssa.OpRISCV64LoweredMove:
mov, sz := largestMove(v.AuxInt)
@@ -955,3 +1013,13 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
p.Pos = p.Pos.WithNotStmt()
return p
}
+
+func zeroOp(s *ssagen.State, mov obj.As, reg int16, off int64) {
+ p := s.Prog(mov)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = riscv.REG_ZERO
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = reg
+ p.To.Offset = off
+ return
+}
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 821f822746..9382877795 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -373,36 +373,14 @@
(MOVHstore [4] ptr (MOVDconst [0])
(MOVHstore [2] ptr (MOVDconst [0])
(MOVHstore ptr (MOVDconst [0]) mem)))
-(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
- (MOVWstore [8] ptr (MOVDconst [0])
- (MOVWstore [4] ptr (MOVDconst [0])
- (MOVWstore ptr (MOVDconst [0]) mem)))
-(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
- (MOVDstore [8] ptr (MOVDconst [0])
- (MOVDstore ptr (MOVDconst [0]) mem))
-(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
- (MOVDstore [16] ptr (MOVDconst [0])
- (MOVDstore [8] ptr (MOVDconst [0])
- (MOVDstore ptr (MOVDconst [0]) mem)))
-(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
- (MOVDstore [24] ptr (MOVDconst [0])
- (MOVDstore [16] ptr (MOVDconst [0])
- (MOVDstore [8] ptr (MOVDconst [0])
- (MOVDstore ptr (MOVDconst [0]) mem))))
-// Medium 8-aligned zeroing uses a Duff's device
-// 8 and 128 are magic constants, see runtime/mkduff.go
-(Zero [s] {t} ptr mem)
- && s%8 == 0 && s <= 8*128
- && t.Alignment()%8 == 0 =>
- (DUFFZERO [8 * (128 - s/8)] ptr mem)
+// Unroll zeroing in medium size (at most 192 bytes i.e. 3 cachelines)
+(Zero [s] {t} ptr mem) && s <= 24*moveSize(t.Alignment(), config) =>
+ (LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
// Generic zeroing uses a loop
-(Zero [s] {t} ptr mem) =>
- (LoweredZero [t.Alignment()]
- ptr
- (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)]))
- mem)
+(Zero [s] {t} ptr mem) && s > 24*moveSize(t.Alignment(), config) =>
+ (LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
// Checks
(IsNonNil ...) => (SNEZ ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
index 0bccaf63bc..8e2f85b8d7 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@@ -317,25 +317,40 @@ func init() {
// Generic moves and zeros
- // general unaligned zeroing
- // arg0 = address of memory to zero (in X5, changed as side effect)
- // arg1 = address of the last element to zero (inclusive)
- // arg2 = mem
- // auxint = element size
+ // general unrolled zeroing
+ // arg0 = address of memory to zero
+ // arg1 = mem
+ // auxint = element size and type alignment
// returns mem
- // mov ZERO, (X5)
- // ADD $sz, X5
- // BGEU Rarg1, X5, -2(PC)
+ // mov ZERO, (OFFSET)(Rarg0)
{
- name: "LoweredZero",
- aux: "Int64",
- argLength: 3,
+ name: "LoweredZero",
+ aux: "SymValAndOff",
+ typ: "Mem",
+ argLength: 2,
+ symEffect: "Write",
+ faultOnNilArg0: true,
reg: regInfo{
- inputs: []regMask{regNamed["X5"], gpMask},
- clobbers: regNamed["X5"],
+ inputs: []regMask{gpMask},
},
+ },
+ // general unaligned zeroing
+ // arg0 = address of memory to zero (clobber)
+ // arg2 = mem
+ // auxint = element size and type alignment
+ // returns mem
+ {
+ name: "LoweredZeroLoop",
+ aux: "SymValAndOff",
typ: "Mem",
+ argLength: 2,
+ symEffect: "Write",
+ needIntTemp: true,
faultOnNilArg0: true,
+ reg: regInfo{
+ inputs: []regMask{gpMask},
+ clobbersArg0: true,
+ },
},
// general unaligned move
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 60ac188e1e..5f9572d675 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2569,6 +2569,7 @@ const (
OpRISCV64DUFFZERO
OpRISCV64DUFFCOPY
OpRISCV64LoweredZero
+ OpRISCV64LoweredZeroLoop
OpRISCV64LoweredMove
OpRISCV64LoweredAtomicLoad8
OpRISCV64LoweredAtomicLoad32
@@ -34558,15 +34559,28 @@ var opcodeTable = [...]opInfo{
},
{
name: "LoweredZero",
- auxType: auxInt64,
- argLen: 3,
+ auxType: auxSymValAndOff,
+ argLen: 2,
faultOnNilArg0: true,
+ symEffect: SymWrite,
reg: regInfo{
inputs: []inputInfo{
- {0, 16}, // X5
- {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+ },
+ },
+ },
+ {
+ name: "LoweredZeroLoop",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ needIntTemp: true,
+ faultOnNilArg0: true,
+ symEffect: SymWrite,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
- clobbers: 16, // X5
+ clobbersArg0: true,
},
},
{
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index e2c400b0c5..faa465b9db 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -9925,138 +9925,39 @@ func rewriteValueRISCV64_OpZero(v *Value) bool {
v.AddArg3(ptr, v0, v1)
return true
}
- // match: (Zero [12] {t} ptr mem)
- // cond: t.Alignment()%4 == 0
- // result: (MOVWstore [8] ptr (MOVDconst [0]) (MOVWstore [4] ptr (MOVDconst [0]) (MOVWstore ptr (MOVDconst [0]) mem)))
- for {
- if auxIntToInt64(v.AuxInt) != 12 {
- break
- }
- t := auxToType(v.Aux)
- ptr := v_0
- mem := v_1
- if !(t.Alignment()%4 == 0) {
- break
- }
- v.reset(OpRISCV64MOVWstore)
- v.AuxInt = int32ToAuxInt(8)
- v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
- v0.AuxInt = int64ToAuxInt(0)
- v1 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
- v1.AuxInt = int32ToAuxInt(4)
- v2 := b.NewValue0(v.Pos, OpRISCV64MOVWstore, types.TypeMem)
- v2.AddArg3(ptr, v0, mem)
- v1.AddArg3(ptr, v0, v2)
- v.AddArg3(ptr, v0, v1)
- return true
- }
- // match: (Zero [16] {t} ptr mem)
- // cond: t.Alignment()%8 == 0
- // result: (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))
- for {
- if auxIntToInt64(v.AuxInt) != 16 {
- break
- }
- t := auxToType(v.Aux)
- ptr := v_0
- mem := v_1
- if !(t.Alignment()%8 == 0) {
- break
- }
- v.reset(OpRISCV64MOVDstore)
- v.AuxInt = int32ToAuxInt(8)
- v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
- v0.AuxInt = int64ToAuxInt(0)
- v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v1.AddArg3(ptr, v0, mem)
- v.AddArg3(ptr, v0, v1)
- return true
- }
- // match: (Zero [24] {t} ptr mem)
- // cond: t.Alignment()%8 == 0
- // result: (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem)))
- for {
- if auxIntToInt64(v.AuxInt) != 24 {
- break
- }
- t := auxToType(v.Aux)
- ptr := v_0
- mem := v_1
- if !(t.Alignment()%8 == 0) {
- break
- }
- v.reset(OpRISCV64MOVDstore)
- v.AuxInt = int32ToAuxInt(16)
- v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
- v0.AuxInt = int64ToAuxInt(0)
- v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v1.AuxInt = int32ToAuxInt(8)
- v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v2.AddArg3(ptr, v0, mem)
- v1.AddArg3(ptr, v0, v2)
- v.AddArg3(ptr, v0, v1)
- return true
- }
- // match: (Zero [32] {t} ptr mem)
- // cond: t.Alignment()%8 == 0
- // result: (MOVDstore [24] ptr (MOVDconst [0]) (MOVDstore [16] ptr (MOVDconst [0]) (MOVDstore [8] ptr (MOVDconst [0]) (MOVDstore ptr (MOVDconst [0]) mem))))
- for {
- if auxIntToInt64(v.AuxInt) != 32 {
- break
- }
- t := auxToType(v.Aux)
- ptr := v_0
- mem := v_1
- if !(t.Alignment()%8 == 0) {
- break
- }
- v.reset(OpRISCV64MOVDstore)
- v.AuxInt = int32ToAuxInt(24)
- v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
- v0.AuxInt = int64ToAuxInt(0)
- v1 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v1.AuxInt = int32ToAuxInt(16)
- v2 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v2.AuxInt = int32ToAuxInt(8)
- v3 := b.NewValue0(v.Pos, OpRISCV64MOVDstore, types.TypeMem)
- v3.AddArg3(ptr, v0, mem)
- v2.AddArg3(ptr, v0, v3)
- v1.AddArg3(ptr, v0, v2)
- v.AddArg3(ptr, v0, v1)
- return true
- }
// match: (Zero [s] {t} ptr mem)
- // cond: s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
- // result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
+ // cond: s <= 24*moveSize(t.Alignment(), config)
+ // result: (LoweredZero [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
for {
s := auxIntToInt64(v.AuxInt)
t := auxToType(v.Aux)
ptr := v_0
mem := v_1
- if !(s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0) {
+ if !(s <= 24*moveSize(t.Alignment(), config)) {
break
}
- v.reset(OpRISCV64DUFFZERO)
- v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
+ v.reset(OpRISCV64LoweredZero)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
v.AddArg2(ptr, mem)
return true
}
// match: (Zero [s] {t} ptr mem)
- // result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
+ // cond: s > 24*moveSize(t.Alignment(), config)
+ // result: (LoweredZeroLoop [makeValAndOff(int32(s),int32(t.Alignment()))] ptr mem)
for {
s := auxIntToInt64(v.AuxInt)
t := auxToType(v.Aux)
ptr := v_0
mem := v_1
- v.reset(OpRISCV64LoweredZero)
- v.AuxInt = int64ToAuxInt(t.Alignment())
- v0 := b.NewValue0(v.Pos, OpRISCV64ADD, ptr.Type)
- v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64)
- v1.AuxInt = int64ToAuxInt(s - moveSize(t.Alignment(), config))
- v0.AddArg2(ptr, v1)
- v.AddArg3(ptr, v0, mem)
+ if !(s > 24*moveSize(t.Alignment(), config)) {
+ break
+ }
+ v.reset(OpRISCV64LoweredZeroLoop)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(s), int32(t.Alignment())))
+ v.AddArg2(ptr, mem)
return true
}
+ return false
}
func rewriteBlockRISCV64(b *Block) bool {
typ := &b.Func.Config.Types