aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Shi <powerman1st@163.com>2018-06-29 02:11:53 +0000
committerBen Shi <powerman1st@163.com>2018-08-24 23:38:25 +0000
commit3bc34385faacbcbefb2b4abc0e280b709aab03c9 (patch)
treea08dee18062c12e69dec69fe34abd66eb5960d53
parentaacc891df29f742a6a128069256436fa369696c2 (diff)
downloadgo-3bc34385faacbcbefb2b4abc0e280b709aab03c9.tar.xz
cmd/compile: introduce more read-modify-write operations for amd64
Add suport of read-modify-write for AND/SUB/AND/OR/XOR on amd64. 1. The total size of pkg/linux_amd64 decreases about 4KB, excluding cmd/compile. 2. The go1 benchmark shows a little improvement, excluding noise. name old time/op new time/op delta BinaryTree17-4 2.63s ± 3% 2.65s ± 4% +1.01% (p=0.037 n=35+35) Fannkuch11-4 2.33s ± 2% 2.39s ± 2% +2.49% (p=0.000 n=35+35) FmtFprintfEmpty-4 45.4ns ± 5% 40.8ns ± 6% -10.09% (p=0.000 n=35+35) FmtFprintfString-4 73.3ns ± 4% 70.9ns ± 3% -3.23% (p=0.000 n=30+35) FmtFprintfInt-4 79.9ns ± 4% 79.5ns ± 3% ~ (p=0.736 n=34+35) FmtFprintfIntInt-4 126ns ± 4% 125ns ± 4% ~ (p=0.083 n=35+35) FmtFprintfPrefixedInt-4 152ns ± 6% 152ns ± 3% ~ (p=0.855 n=34+35) FmtFprintfFloat-4 215ns ± 4% 213ns ± 4% ~ (p=0.066 n=35+35) FmtManyArgs-4 522ns ± 3% 506ns ± 3% -3.15% (p=0.000 n=35+35) GobDecode-4 6.45ms ± 8% 6.51ms ± 7% +0.96% (p=0.026 n=35+35) GobEncode-4 6.10ms ± 6% 6.02ms ± 8% ~ (p=0.160 n=35+35) Gzip-4 228ms ± 3% 221ms ± 3% -2.92% (p=0.000 n=35+35) Gunzip-4 37.5ms ± 4% 37.2ms ± 3% -0.78% (p=0.036 n=35+35) HTTPClientServer-4 58.7µs ± 2% 59.2µs ± 1% +0.80% (p=0.000 n=33+33) JSONEncode-4 12.0ms ± 3% 12.2ms ± 3% +1.84% (p=0.008 n=35+35) JSONDecode-4 57.0ms ± 4% 56.6ms ± 3% ~ (p=0.320 n=35+35) Mandelbrot200-4 3.82ms ± 3% 3.79ms ± 3% ~ (p=0.074 n=35+35) GoParse-4 3.21ms ± 5% 3.24ms ± 4% ~ (p=0.119 n=35+35) RegexpMatchEasy0_32-4 76.3ns ± 4% 75.4ns ± 4% -1.14% (p=0.014 n=34+33) RegexpMatchEasy0_1K-4 251ns ± 4% 254ns ± 3% +1.28% (p=0.016 n=35+35) RegexpMatchEasy1_32-4 69.6ns ± 3% 70.1ns ± 3% +0.82% (p=0.005 n=35+35) RegexpMatchEasy1_1K-4 367ns ± 4% 376ns ± 4% +2.47% (p=0.000 n=35+35) RegexpMatchMedium_32-4 108ns ± 5% 104ns ± 4% -3.18% (p=0.000 n=35+35) RegexpMatchMedium_1K-4 33.8µs ± 3% 32.7µs ± 3% -3.27% (p=0.000 n=35+35) RegexpMatchHard_32-4 1.55µs ± 3% 1.52µs ± 3% -1.64% (p=0.000 n=35+35) RegexpMatchHard_1K-4 46.6µs ± 3% 46.6µs ± 4% ~ (p=0.149 n=35+35) Revcomp-4 416ms ± 7% 412ms ± 6% -0.95% (p=0.033 n=33+35) Template-4 64.3ms ± 3% 62.4ms ± 7% -2.94% (p=0.000 n=35+35) TimeParse-4 320ns ± 2% 322ns ± 3% ~ (p=0.589 n=35+35) TimeFormat-4 300ns ± 3% 300ns ± 3% ~ (p=0.597 n=35+35) [Geo mean] 47.4µs 47.0µs -0.86% name old speed new speed delta GobDecode-4 119MB/s ± 7% 118MB/s ± 7% -0.96% (p=0.027 n=35+35) GobEncode-4 126MB/s ± 7% 127MB/s ± 6% ~ (p=0.157 n=34+34) Gzip-4 85.3MB/s ± 3% 87.9MB/s ± 3% +3.02% (p=0.000 n=35+35) Gunzip-4 518MB/s ± 4% 522MB/s ± 3% +0.79% (p=0.037 n=35+35) JSONEncode-4 162MB/s ± 3% 159MB/s ± 3% -1.81% (p=0.009 n=35+35) JSONDecode-4 34.1MB/s ± 4% 34.3MB/s ± 3% ~ (p=0.318 n=35+35) GoParse-4 18.0MB/s ± 5% 17.9MB/s ± 4% ~ (p=0.117 n=35+35) RegexpMatchEasy0_32-4 419MB/s ± 3% 425MB/s ± 4% +1.46% (p=0.003 n=32+33) RegexpMatchEasy0_1K-4 4.07GB/s ± 4% 4.02GB/s ± 3% -1.28% (p=0.014 n=35+35) RegexpMatchEasy1_32-4 460MB/s ± 3% 456MB/s ± 4% -0.82% (p=0.004 n=35+35) RegexpMatchEasy1_1K-4 2.79GB/s ± 4% 2.72GB/s ± 4% -2.39% (p=0.000 n=35+35) RegexpMatchMedium_32-4 9.23MB/s ± 4% 9.53MB/s ± 4% +3.16% (p=0.000 n=35+35) RegexpMatchMedium_1K-4 30.3MB/s ± 3% 31.3MB/s ± 3% +3.38% (p=0.000 n=35+35) RegexpMatchHard_32-4 20.7MB/s ± 3% 21.0MB/s ± 3% +1.67% (p=0.000 n=35+35) RegexpMatchHard_1K-4 22.0MB/s ± 3% 21.9MB/s ± 4% ~ (p=0.277 n=35+33) Revcomp-4 612MB/s ± 7% 618MB/s ± 6% +0.96% (p=0.034 n=33+35) Template-4 30.2MB/s ± 3% 31.1MB/s ± 6% +3.05% (p=0.000 n=35+35) [Geo mean] 123MB/s 124MB/s +0.64% Change-Id: Ia025da272e07d0069413824bfff3471b106d6280 Reviewed-on: https://go-review.googlesource.com/121535 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com> Reviewed-by: Keith Randall <khr@golang.org>
-rw-r--r--src/cmd/compile/internal/amd64/ssa.go4
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules16
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64Ops.go12
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go160
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go1677
-rw-r--r--test/codegen/arithmetic.go2
6 files changed, 1865 insertions, 6 deletions
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 4ecdb769f3..ae6141dd12 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -699,7 +699,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
- case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
+ case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
+ ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
+ ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index eab66d17ab..10d917632e 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1045,6 +1045,10 @@
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem)
+((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
@@ -1091,6 +1095,12 @@
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+ ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+ ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@@ -2276,6 +2286,12 @@
((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
+(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+ ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+ ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 4735ea1bc0..512df99694 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -346,6 +346,18 @@ func init() {
{name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+ // direct binary-op on memory (read-modify-write)
+ {name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
+ {name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
+ {name: "ANDQmodify", argLength: 3, reg: gpstore, asm: "ANDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem
+ {name: "ORQmodify", argLength: 3, reg: gpstore, asm: "ORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem
+ {name: "XORQmodify", argLength: 3, reg: gpstore, asm: "XORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
+ {name: "ADDLmodify", argLength: 3, reg: gpstore, asm: "ADDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
+ {name: "SUBLmodify", argLength: 3, reg: gpstore, asm: "SUBL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
+ {name: "ANDLmodify", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem
+ {name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem
+ {name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
+
// unary ops
{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0
{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 704792c9af..374949c602 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -600,6 +600,16 @@ const (
OpAMD64ORLload
OpAMD64XORQload
OpAMD64XORLload
+ OpAMD64ADDQmodify
+ OpAMD64SUBQmodify
+ OpAMD64ANDQmodify
+ OpAMD64ORQmodify
+ OpAMD64XORQmodify
+ OpAMD64ADDLmodify
+ OpAMD64SUBLmodify
+ OpAMD64ANDLmodify
+ OpAMD64ORLmodify
+ OpAMD64XORLmodify
OpAMD64NEGQ
OpAMD64NEGL
OpAMD64NOTQ
@@ -7662,6 +7672,156 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "ADDQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AADDQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "SUBQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ASUBQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "ANDQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AANDQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "ORQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AORQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "XORQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AXORQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "ADDLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AADDL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "SUBLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ASUBL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "ANDLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AANDL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "ORLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AORL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "XORLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.AXORL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
name: "NEGQ",
argLen: 1,
resultInArg0: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 245f795d90..e592610c26 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -23,6 +23,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ADDLconstmodify_0(v)
case OpAMD64ADDLload:
return rewriteValueAMD64_OpAMD64ADDLload_0(v)
+ case OpAMD64ADDLmodify:
+ return rewriteValueAMD64_OpAMD64ADDLmodify_0(v)
case OpAMD64ADDQ:
return rewriteValueAMD64_OpAMD64ADDQ_0(v) || rewriteValueAMD64_OpAMD64ADDQ_10(v) || rewriteValueAMD64_OpAMD64ADDQ_20(v)
case OpAMD64ADDQconst:
@@ -31,6 +33,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ADDQconstmodify_0(v)
case OpAMD64ADDQload:
return rewriteValueAMD64_OpAMD64ADDQload_0(v)
+ case OpAMD64ADDQmodify:
+ return rewriteValueAMD64_OpAMD64ADDQmodify_0(v)
case OpAMD64ADDSD:
return rewriteValueAMD64_OpAMD64ADDSD_0(v)
case OpAMD64ADDSDload:
@@ -47,6 +51,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v)
case OpAMD64ANDLload:
return rewriteValueAMD64_OpAMD64ANDLload_0(v)
+ case OpAMD64ANDLmodify:
+ return rewriteValueAMD64_OpAMD64ANDLmodify_0(v)
case OpAMD64ANDQ:
return rewriteValueAMD64_OpAMD64ANDQ_0(v)
case OpAMD64ANDQconst:
@@ -55,6 +61,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v)
case OpAMD64ANDQload:
return rewriteValueAMD64_OpAMD64ANDQload_0(v)
+ case OpAMD64ANDQmodify:
+ return rewriteValueAMD64_OpAMD64ANDQmodify_0(v)
case OpAMD64BSFQ:
return rewriteValueAMD64_OpAMD64BSFQ_0(v)
case OpAMD64BTLconst:
@@ -224,7 +232,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpAMD64MOVLloadidx8:
return rewriteValueAMD64_OpAMD64MOVLloadidx8_0(v)
case OpAMD64MOVLstore:
- return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v)
+ return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v) || rewriteValueAMD64_OpAMD64MOVLstore_20(v) || rewriteValueAMD64_OpAMD64MOVLstore_30(v)
case OpAMD64MOVLstoreconst:
return rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v)
case OpAMD64MOVLstoreconstidx1:
@@ -254,7 +262,7 @@ func rewriteValueAMD64(v *Value) bool {
case OpAMD64MOVQloadidx8:
return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
case OpAMD64MOVQstore:
- return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
+ return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v)
case OpAMD64MOVQstoreconst:
return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
case OpAMD64MOVQstoreconstidx1:
@@ -345,6 +353,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v)
case OpAMD64ORLload:
return rewriteValueAMD64_OpAMD64ORLload_0(v)
+ case OpAMD64ORLmodify:
+ return rewriteValueAMD64_OpAMD64ORLmodify_0(v)
case OpAMD64ORQ:
return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v)
case OpAMD64ORQconst:
@@ -353,6 +363,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v)
case OpAMD64ORQload:
return rewriteValueAMD64_OpAMD64ORQload_0(v)
+ case OpAMD64ORQmodify:
+ return rewriteValueAMD64_OpAMD64ORQmodify_0(v)
case OpAMD64ROLB:
return rewriteValueAMD64_OpAMD64ROLB_0(v)
case OpAMD64ROLBconst:
@@ -467,12 +479,16 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64SUBLconst_0(v)
case OpAMD64SUBLload:
return rewriteValueAMD64_OpAMD64SUBLload_0(v)
+ case OpAMD64SUBLmodify:
+ return rewriteValueAMD64_OpAMD64SUBLmodify_0(v)
case OpAMD64SUBQ:
return rewriteValueAMD64_OpAMD64SUBQ_0(v)
case OpAMD64SUBQconst:
return rewriteValueAMD64_OpAMD64SUBQconst_0(v)
case OpAMD64SUBQload:
return rewriteValueAMD64_OpAMD64SUBQload_0(v)
+ case OpAMD64SUBQmodify:
+ return rewriteValueAMD64_OpAMD64SUBQmodify_0(v)
case OpAMD64SUBSD:
return rewriteValueAMD64_OpAMD64SUBSD_0(v)
case OpAMD64SUBSDload:
@@ -513,6 +529,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v)
case OpAMD64XORLload:
return rewriteValueAMD64_OpAMD64XORLload_0(v)
+ case OpAMD64XORLmodify:
+ return rewriteValueAMD64_OpAMD64XORLmodify_0(v)
case OpAMD64XORQ:
return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v)
case OpAMD64XORQconst:
@@ -521,6 +539,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v)
case OpAMD64XORQload:
return rewriteValueAMD64_OpAMD64XORQload_0(v)
+ case OpAMD64XORQmodify:
+ return rewriteValueAMD64_OpAMD64XORQmodify_0(v)
case OpAdd16:
return rewriteValueAMD64_OpAdd16_0(v)
case OpAdd32:
@@ -2038,6 +2058,62 @@ func rewriteValueAMD64_OpAMD64ADDLload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ADDLmodify_0(v *Value) bool {
+ // match: (ADDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ADDLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ADDLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ADDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ADDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ADDLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64ADDQ_0(v *Value) bool {
// match: (ADDQ x (MOVQconst [c]))
// cond: is32Bit(c)
@@ -2902,6 +2978,62 @@ func rewriteValueAMD64_OpAMD64ADDQload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ADDQmodify_0(v *Value) bool {
+ // match: (ADDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ADDQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ADDQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ADDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ADDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ADDQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
// match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem))
// cond: canMergeLoad(v, l, x) && clobber(l)
@@ -3643,6 +3775,62 @@ func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ANDLmodify_0(v *Value) bool {
+ // match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ANDLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ANDLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ANDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ANDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ANDLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
b := v.Block
_ = b
@@ -4108,6 +4296,62 @@ func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ANDQmodify_0(v *Value) bool {
+ // match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ANDQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ANDQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ANDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ANDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ANDQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool {
b := v.Block
_ = b
@@ -14574,6 +14818,548 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
v.AddArg(mem)
return true
}
+ // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ADDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDLload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ADDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ANDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDLload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ANDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORLload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (XORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORLload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64XORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ADDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ADDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ADDL x l:(MOVLload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ADDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDL {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ADDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool {
+ // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (SUBLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64SUBL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64SUBLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ANDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ANDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ANDL x l:(MOVLload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ANDLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDL {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ANDLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(ORL x l:(MOVLload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORL {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (XORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64XORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(XORL x l:(MOVLload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (XORLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORL {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64XORLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
// match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -14691,6 +15477,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
v.AddArg(mem)
return true
}
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool {
// match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -16677,6 +17466,551 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
+ // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ADDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDQload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ADDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ANDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDQload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ANDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
+ // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (ORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORQload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64ORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
+ // cond: y.Uses==1 && clobber(y)
+ // result: (XORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORQload {
+ break
+ }
+ if y.AuxInt != off {
+ break
+ }
+ if y.Aux != sym {
+ break
+ }
+ _ = y.Args[2]
+ x := y.Args[0]
+ if ptr != y.Args[1] {
+ break
+ }
+ mem := y.Args[2]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && clobber(y)) {
+ break
+ }
+ v.reset(OpAMD64XORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ADDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ADDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ADDQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ADDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ADDQ {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ADDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (SUBQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64SUBQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64SUBQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ANDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ANDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ANDQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ANDQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ANDQ {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ANDQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(ORQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (ORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64ORQ {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64ORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (XORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64XORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool {
+ // match: (MOVQstore {sym} [off] ptr y:(XORQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (XORQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64XORQ {
+ break
+ }
+ _ = y.Args[1]
+ x := y.Args[0]
+ l := y.Args[1]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64XORQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
// match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -16755,9 +18089,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
v.AddArg(mem)
return true
}
- return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
// match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
// cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
// result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -31479,6 +32810,62 @@ func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ORLmodify_0(v *Value) bool {
+ // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ORLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ORLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ORLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64ORQ_0(v *Value) bool {
b := v.Block
_ = b
@@ -42440,6 +43827,62 @@ func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64ORQmodify_0(v *Value) bool {
+ // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (ORQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64ORQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (ORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (ORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64ORQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64ROLB_0(v *Value) bool {
// match: (ROLB x (NEGQ y))
// cond:
@@ -51150,6 +52593,62 @@ func rewriteValueAMD64_OpAMD64SUBLload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64SUBLmodify_0(v *Value) bool {
+ // match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (SUBLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64SUBLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (SUBLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64SUBLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
b := v.Block
_ = b
@@ -51388,6 +52887,62 @@ func rewriteValueAMD64_OpAMD64SUBQload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64SUBQmodify_0(v *Value) bool {
+ // match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (SUBQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64SUBQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (SUBQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (SUBQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64SUBQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
// match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
// cond: canMergeLoad(v, l, x) && clobber(l)
@@ -52988,6 +54543,62 @@ func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64XORLmodify_0(v *Value) bool {
+ // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (XORLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64XORLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (XORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (XORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64XORLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
b := v.Block
_ = b
@@ -53454,6 +55065,62 @@ func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64XORQmodify_0(v *Value) bool {
+ // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (XORQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64XORQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (XORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (XORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64XORQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
// match: (Add16 x y)
// cond:
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 32efcaaa3f..09a2fa091e 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -16,8 +16,10 @@ package codegen
func SubMem(arr []int, b int) int {
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
+ // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
arr[2] -= b
// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
+ // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
arr[3] -= b
// 386:`DECL\s16\([A-Z]+\)`
arr[4]--