diff options
| author | Alexander Musman <alexander.musman@gmail.com> | 2026-01-21 22:40:14 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-03-06 14:54:59 -0800 |
| commit | ec3373e379696c5d9ba6874caed34a309a76181b (patch) | |
| tree | 1c6efdac55a24eb7c947f9e3fb1b4331a269fa2d /src | |
| parent | 3a29ebeef985efb12d2b8670f50b146e9a2815ca (diff) | |
| download | go-ec3373e379696c5d9ba6874caed34a309a76181b.tar.xz | |
cmd/compile: arm64 add 128-bit vector load/store SSA ops
Add OpARM64FMOVQload, OpARM64FMOVQstore, OpARM64FLDPQ, and
OpARM64FSTPQ for loading and storing Vec128 values.
Includes offset folding and address combining rules.
These ops will be used by subsequent CLs.
Change-Id: I4ac86a0a31f878411f49d390cb8df01f81cfc4d6
Reviewed-on: https://go-review.googlesource.com/c/go/+/738260
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/arm64/ssa.go | 8 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/ARM64.rules | 28 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/ARM64Ops.go | 4 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 66 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteARM64.go | 217 |
5 files changed, 320 insertions, 3 deletions
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 941c2f2b8f..e32bf9695e 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -510,14 +510,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload, ssa.OpARM64FMOVSload, - ssa.OpARM64FMOVDload: + ssa.OpARM64FMOVDload, + ssa.OpARM64FMOVQload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() - case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS: + case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() @@ -560,6 +561,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVDstore, ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore, + ssa.OpARM64FMOVQstore, ssa.OpARM64STLRB, ssa.OpARM64STLR, ssa.OpARM64STLRW: @@ -584,7 +586,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() - case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS: + case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REGREG p.From.Reg = v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index c1ccc12ade..8e2b4f66d2 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -694,6 +694,12 @@ (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+int32(off2)] {sym} ptr mem) +(FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQload [off1+int32(off2)] {sym} ptr mem) +(FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FLDPQ [off1+int32(off2)] {sym} ptr mem) // register indexed load (MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem) @@ -779,6 +785,12 @@ (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+int32(off2)] {sym} ptr val mem) +(FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQstore [off1+int32(off2)] {sym} ptr val mem) +(FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem) // register indexed store (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) @@ -865,6 +877,14 @@ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) +(FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) +(FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) @@ -894,6 +914,14 @@ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) +(FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) +(FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go index 10feb99211..3314695ad0 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go @@ -396,6 +396,7 @@ func init() { {name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. + {name: "FMOVQload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVQ", typ: "Vec128", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. // LDP instructions load the contents of two adjacent locations in memory into registers. // Address to start loading is addr = arg0 + auxInt + aux. @@ -408,6 +409,7 @@ func init() { {name: "LDPSW", argLength: 2, reg: gpload2, aux: "SymOff", asm: "LDPSW", typ: "(Int32,Int32)", faultOnNilArg0: true, symEffect: "Read"}, // T=int32 (gp reg destination) signed extension {name: "FLDPD", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPD", typ: "(Float64,Float64)", faultOnNilArg0: true, symEffect: "Read"}, // T=float64 (fp reg destination) {name: "FLDPS", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPS", typ: "(Float32,Float32)", faultOnNilArg0: true, symEffect: "Read"}, // T=float32 (fp reg destination) + {name: "FLDPQ", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPQ", typ: "(Vec128,Vec128)", faultOnNilArg0: true, symEffect: "Read"}, // T=vec128 (fp reg destination) // register indexed load {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit dword from arg0 + arg1, arg2 = mem. @@ -435,6 +437,7 @@ func init() { {name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. + {name: "FMOVQstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. // STP instructions store the contents of two registers to adjacent locations in memory. // Address to start storing is addr = arg0 + auxInt + aux. @@ -445,6 +448,7 @@ func init() { {name: "STPW", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STPW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=int32 (gp reg source) {name: "FSTPD", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float64 (fp reg source) {name: "FSTPS", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float32 (fp reg source) + {name: "FSTPQ", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=vec128 (fp reg source) // register indexed store {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 031d87bcf9..96bb288a96 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -4319,11 +4319,13 @@ const ( OpARM64MOVDload OpARM64FMOVSload OpARM64FMOVDload + OpARM64FMOVQload OpARM64LDP OpARM64LDPW OpARM64LDPSW OpARM64FLDPD OpARM64FLDPS + OpARM64FLDPQ OpARM64MOVDloadidx OpARM64MOVWloadidx OpARM64MOVWUloadidx @@ -4346,10 +4348,12 @@ const ( OpARM64MOVDstore OpARM64FMOVSstore OpARM64FMOVDstore + OpARM64FMOVQstore OpARM64STP OpARM64STPW OpARM64FSTPD OpARM64FSTPS + OpARM64FSTPQ OpARM64MOVBstoreidx OpARM64MOVHstoreidx OpARM64MOVWstoreidx @@ -67512,6 +67516,22 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FMOVQload", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + symEffect: SymRead, + asm: arm64.AFMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { name: "LDP", auxType: auxSymOff, argLen: 2, @@ -67597,6 +67617,23 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FLDPQ", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + symEffect: SymRead, + asm: arm64.AFLDPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { name: "MOVDloadidx", argLen: 3, asm: arm64.AMOVD, @@ -67905,6 +67942,20 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FMOVQstore", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: arm64.AFMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { name: "STP", auxType: auxSymOff, argLen: 4, @@ -67965,6 +68016,21 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "FSTPQ", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: arm64.AFSTPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { name: "MOVBstoreidx", argLen: 4, asm: arm64.AMOVB, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 735cda5db0..d243274732 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -108,6 +108,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FCMPS(v) case OpARM64FCVTDS: return rewriteValueARM64_OpARM64FCVTDS(v) + case OpARM64FLDPQ: + return rewriteValueARM64_OpARM64FLDPQ(v) case OpARM64FMOVDfpgp: return rewriteValueARM64_OpARM64FMOVDfpgp(v) case OpARM64FMOVDgpfp: @@ -124,6 +126,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FMOVDstoreidx(v) case OpARM64FMOVDstoreidx8: return rewriteValueARM64_OpARM64FMOVDstoreidx8(v) + case OpARM64FMOVQload: + return rewriteValueARM64_OpARM64FMOVQload(v) + case OpARM64FMOVQstore: + return rewriteValueARM64_OpARM64FMOVQstore(v) case OpARM64FMOVSload: return rewriteValueARM64_OpARM64FMOVSload(v) case OpARM64FMOVSloadidx: @@ -148,6 +154,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FNMULD(v) case OpARM64FNMULS: return rewriteValueARM64_OpARM64FNMULS(v) + case OpARM64FSTPQ: + return rewriteValueARM64_OpARM64FSTPQ(v) case OpARM64FSUBD: return rewriteValueARM64_OpARM64FSUBD(v) case OpARM64FSUBS: @@ -4904,6 +4912,56 @@ func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FLDPQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FLDPQ [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FLDPQ) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + // match: (FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FLDPQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -5352,6 +5410,109 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FMOVQload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQload) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + // match: (FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVQstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQstore) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) + return true + } + // match: (FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(ptr, val, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -5894,6 +6055,62 @@ func rewriteValueARM64_OpARM64FNMULS(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FSTPQ(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + val1 := v_1 + val2 := v_2 + mem := v_3 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FSTPQ) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg4(ptr, val1, val2, mem) + return true + } + // match: (FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + val1 := v_1 + val2 := v_2 + mem := v_3 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FSTPQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg4(ptr, val1, val2, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] |
