aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2026-01-21 22:40:14 +0300
committerGopher Robot <gobot@golang.org>2026-03-06 14:54:59 -0800
commitec3373e379696c5d9ba6874caed34a309a76181b (patch)
tree1c6efdac55a24eb7c947f9e3fb1b4331a269fa2d /src/cmd
parent3a29ebeef985efb12d2b8670f50b146e9a2815ca (diff)
downloadgo-ec3373e379696c5d9ba6874caed34a309a76181b.tar.xz
cmd/compile: arm64 add 128-bit vector load/store SSA ops
Add OpARM64FMOVQload, OpARM64FMOVQstore, OpARM64FLDPQ, and OpARM64FSTPQ for loading and storing Vec128 values. Includes offset folding and address combining rules. These ops will be used by subsequent CLs. Change-Id: I4ac86a0a31f878411f49d390cb8df01f81cfc4d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/738260 Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/compile/internal/arm64/ssa.go8
-rw-r--r--src/cmd/compile/internal/ssa/_gen/ARM64.rules28
-rw-r--r--src/cmd/compile/internal/ssa/_gen/ARM64Ops.go4
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go66
-rw-r--r--src/cmd/compile/internal/ssa/rewriteARM64.go217
5 files changed, 320 insertions, 3 deletions
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 941c2f2b8f..e32bf9695e 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -510,14 +510,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVWUload,
ssa.OpARM64MOVDload,
ssa.OpARM64FMOVSload,
- ssa.OpARM64FMOVDload:
+ ssa.OpARM64FMOVDload,
+ ssa.OpARM64FMOVQload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS:
+ case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
@@ -560,6 +561,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVDstore,
ssa.OpARM64FMOVSstore,
ssa.OpARM64FMOVDstore,
+ ssa.OpARM64FMOVQstore,
ssa.OpARM64STLRB,
ssa.OpARM64STLR,
ssa.OpARM64STLRW:
@@ -584,7 +586,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
- case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS:
+ case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
p.From.Reg = v.Args[1].Reg()
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index c1ccc12ade..8e2b4f66d2 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -694,6 +694,12 @@
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDload [off1+int32(off2)] {sym} ptr mem)
+(FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FMOVQload [off1+int32(off2)] {sym} ptr mem)
+(FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FLDPQ [off1+int32(off2)] {sym} ptr mem)
// register indexed load
(MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem)
@@ -779,6 +785,12 @@
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
+(FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FMOVQstore [off1+int32(off2)] {sym} ptr val mem)
+(FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2)
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem)
// register indexed store
(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
@@ -865,6 +877,14 @@
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
@@ -894,6 +914,14 @@
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
+ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+ (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
index 10feb99211..3314695ad0 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
@@ -396,6 +396,7 @@ func init() {
{name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
+ {name: "FMOVQload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVQ", typ: "Vec128", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// LDP instructions load the contents of two adjacent locations in memory into registers.
// Address to start loading is addr = arg0 + auxInt + aux.
@@ -408,6 +409,7 @@ func init() {
{name: "LDPSW", argLength: 2, reg: gpload2, aux: "SymOff", asm: "LDPSW", typ: "(Int32,Int32)", faultOnNilArg0: true, symEffect: "Read"}, // T=int32 (gp reg destination) signed extension
{name: "FLDPD", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPD", typ: "(Float64,Float64)", faultOnNilArg0: true, symEffect: "Read"}, // T=float64 (fp reg destination)
{name: "FLDPS", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPS", typ: "(Float32,Float32)", faultOnNilArg0: true, symEffect: "Read"}, // T=float32 (fp reg destination)
+ {name: "FLDPQ", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPQ", typ: "(Vec128,Vec128)", faultOnNilArg0: true, symEffect: "Read"}, // T=vec128 (fp reg destination)
// register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
@@ -435,6 +437,7 @@ func init() {
{name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
+ {name: "FMOVQstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// STP instructions store the contents of two registers to adjacent locations in memory.
// Address to start storing is addr = arg0 + auxInt + aux.
@@ -445,6 +448,7 @@ func init() {
{name: "STPW", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STPW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=int32 (gp reg source)
{name: "FSTPD", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float64 (fp reg source)
{name: "FSTPS", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float32 (fp reg source)
+ {name: "FSTPQ", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=vec128 (fp reg source)
// register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 031d87bcf9..96bb288a96 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4319,11 +4319,13 @@ const (
OpARM64MOVDload
OpARM64FMOVSload
OpARM64FMOVDload
+ OpARM64FMOVQload
OpARM64LDP
OpARM64LDPW
OpARM64LDPSW
OpARM64FLDPD
OpARM64FLDPS
+ OpARM64FLDPQ
OpARM64MOVDloadidx
OpARM64MOVWloadidx
OpARM64MOVWUloadidx
@@ -4346,10 +4348,12 @@ const (
OpARM64MOVDstore
OpARM64FMOVSstore
OpARM64FMOVDstore
+ OpARM64FMOVQstore
OpARM64STP
OpARM64STPW
OpARM64FSTPD
OpARM64FSTPS
+ OpARM64FSTPQ
OpARM64MOVBstoreidx
OpARM64MOVHstoreidx
OpARM64MOVWstoreidx
@@ -67512,6 +67516,22 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "FMOVQload",
+ auxType: auxSymOff,
+ argLen: 2,
+ faultOnNilArg0: true,
+ symEffect: SymRead,
+ asm: arm64.AFMOVQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
name: "LDP",
auxType: auxSymOff,
argLen: 2,
@@ -67597,6 +67617,23 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "FLDPQ",
+ auxType: auxSymOff,
+ argLen: 2,
+ faultOnNilArg0: true,
+ symEffect: SymRead,
+ asm: arm64.AFLDPQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
name: "MOVDloadidx",
argLen: 3,
asm: arm64.AMOVD,
@@ -67905,6 +67942,20 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "FMOVQstore",
+ auxType: auxSymOff,
+ argLen: 3,
+ faultOnNilArg0: true,
+ symEffect: SymWrite,
+ asm: arm64.AFMOVQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
name: "STP",
auxType: auxSymOff,
argLen: 4,
@@ -67965,6 +68016,21 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "FSTPQ",
+ auxType: auxSymOff,
+ argLen: 4,
+ faultOnNilArg0: true,
+ symEffect: SymWrite,
+ asm: arm64.AFSTPQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
name: "MOVBstoreidx",
argLen: 4,
asm: arm64.AMOVB,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 735cda5db0..d243274732 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -108,6 +108,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FCMPS(v)
case OpARM64FCVTDS:
return rewriteValueARM64_OpARM64FCVTDS(v)
+ case OpARM64FLDPQ:
+ return rewriteValueARM64_OpARM64FLDPQ(v)
case OpARM64FMOVDfpgp:
return rewriteValueARM64_OpARM64FMOVDfpgp(v)
case OpARM64FMOVDgpfp:
@@ -124,6 +126,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FMOVDstoreidx(v)
case OpARM64FMOVDstoreidx8:
return rewriteValueARM64_OpARM64FMOVDstoreidx8(v)
+ case OpARM64FMOVQload:
+ return rewriteValueARM64_OpARM64FMOVQload(v)
+ case OpARM64FMOVQstore:
+ return rewriteValueARM64_OpARM64FMOVQstore(v)
case OpARM64FMOVSload:
return rewriteValueARM64_OpARM64FMOVSload(v)
case OpARM64FMOVSloadidx:
@@ -148,6 +154,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FNMULD(v)
case OpARM64FNMULS:
return rewriteValueARM64_OpARM64FNMULS(v)
+ case OpARM64FSTPQ:
+ return rewriteValueARM64_OpARM64FSTPQ(v)
case OpARM64FSUBD:
return rewriteValueARM64_OpARM64FSUBD(v)
case OpARM64FSUBS:
@@ -4904,6 +4912,56 @@ func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool {
}
return false
}
+func rewriteValueARM64_OpARM64FLDPQ(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ // match: (FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem)
+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FLDPQ [off1+int32(off2)] {sym} ptr mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDconst {
+ break
+ }
+ off2 := auxIntToInt64(v_0.AuxInt)
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FLDPQ)
+ v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+ v.Aux = symToAux(sym)
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ // match: (FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym1 := auxToSym(v.Aux)
+ if v_0.Op != OpARM64MOVDaddr {
+ break
+ }
+ off2 := auxIntToInt32(v_0.AuxInt)
+ sym2 := auxToSym(v_0.Aux)
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FLDPQ)
+ v.AuxInt = int32ToAuxInt(off1 + off2)
+ v.Aux = symToAux(mergeSym(sym1, sym2))
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@@ -5352,6 +5410,109 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool {
}
return false
}
+func rewriteValueARM64_OpARM64FMOVQload(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ // match: (FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem)
+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FMOVQload [off1+int32(off2)] {sym} ptr mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDconst {
+ break
+ }
+ off2 := auxIntToInt64(v_0.AuxInt)
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FMOVQload)
+ v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+ v.Aux = symToAux(sym)
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ // match: (FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym1 := auxToSym(v.Aux)
+ if v_0.Op != OpARM64MOVDaddr {
+ break
+ }
+ off2 := auxIntToInt32(v_0.AuxInt)
+ sym2 := auxToSym(v_0.Aux)
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FMOVQload)
+ v.AuxInt = int32ToAuxInt(off1 + off2)
+ v.Aux = symToAux(mergeSym(sym1, sym2))
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64FMOVQstore(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ // match: (FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FMOVQstore [off1+int32(off2)] {sym} ptr val mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDconst {
+ break
+ }
+ off2 := auxIntToInt64(v_0.AuxInt)
+ ptr := v_0.Args[0]
+ val := v_1
+ mem := v_2
+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FMOVQstore)
+ v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+ v.Aux = symToAux(sym)
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
+ // match: (FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym1 := auxToSym(v.Aux)
+ if v_0.Op != OpARM64MOVDaddr {
+ break
+ }
+ off2 := auxIntToInt32(v_0.AuxInt)
+ sym2 := auxToSym(v_0.Aux)
+ ptr := v_0.Args[0]
+ val := v_1
+ mem := v_2
+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FMOVQstore)
+ v.AuxInt = int32ToAuxInt(off1 + off2)
+ v.Aux = symToAux(mergeSym(sym1, sym2))
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -5894,6 +6055,62 @@ func rewriteValueARM64_OpARM64FNMULS(v *Value) bool {
}
return false
}
+func rewriteValueARM64_OpARM64FSTPQ(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ config := b.Func.Config
+ // match: (FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem)
+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDconst {
+ break
+ }
+ off2 := auxIntToInt64(v_0.AuxInt)
+ ptr := v_0.Args[0]
+ val1 := v_1
+ val2 := v_2
+ mem := v_3
+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FSTPQ)
+ v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+ v.Aux = symToAux(sym)
+ v.AddArg4(ptr, val1, val2, mem)
+ return true
+ }
+ // match: (FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
+ // result: (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
+ for {
+ off1 := auxIntToInt32(v.AuxInt)
+ sym1 := auxToSym(v.Aux)
+ if v_0.Op != OpARM64MOVDaddr {
+ break
+ }
+ off2 := auxIntToInt32(v_0.AuxInt)
+ sym2 := auxToSym(v_0.Aux)
+ ptr := v_0.Args[0]
+ val1 := v_1
+ val2 := v_2
+ mem := v_3
+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+ break
+ }
+ v.reset(OpARM64FSTPQ)
+ v.AuxInt = int32ToAuxInt(off1 + off2)
+ v.Aux = symToAux(mergeSym(sym1, sym2))
+ v.AddArg4(ptr, val1, val2, mem)
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]