aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBen Shi <powerman1st@163.com>2018-06-21 10:14:18 +0000
committerBen Shi <powerman1st@163.com>2018-08-22 02:47:49 +0000
commit705f3c74e6af802fcf54f402e6d0862832249712 (patch)
tree06846a38a5a26e8a7c7b3411154c42a77b9ea98e /src
parent64f3d75bc288679e9e18cb9513897e7139f3fc3b (diff)
downloadgo-705f3c74e6af802fcf54f402e6d0862832249712.tar.xz
cmd/compile: optimize AMD64 with DIVSSload and DIVSDload
DIVSSload & DIVSDload directly operate on a memory operand. And binary size can be reduced by them, while the performance is not affected. The total size of pkg/linux_amd64 (excluding cmd/compile) decreases about 6KB. There is little regression in the go1 benchmark test (excluding noise). name old time/op new time/op delta BinaryTree17-4 2.63s ± 4% 2.62s ± 4% ~ (p=0.809 n=30+30) Fannkuch11-4 2.40s ± 2% 2.40s ± 2% ~ (p=0.109 n=30+30) FmtFprintfEmpty-4 43.1ns ± 4% 43.2ns ± 9% ~ (p=0.168 n=30+30) FmtFprintfString-4 73.6ns ± 4% 74.1ns ± 4% ~ (p=0.069 n=30+30) FmtFprintfInt-4 81.0ns ± 3% 81.4ns ± 5% ~ (p=0.350 n=30+30) FmtFprintfIntInt-4 127ns ± 4% 129ns ± 4% +0.99% (p=0.021 n=30+30) FmtFprintfPrefixedInt-4 156ns ± 4% 155ns ± 4% ~ (p=0.415 n=30+30) FmtFprintfFloat-4 219ns ± 4% 218ns ± 4% ~ (p=0.071 n=30+30) FmtManyArgs-4 522ns ± 3% 518ns ± 3% -0.68% (p=0.034 n=30+30) GobDecode-4 6.49ms ± 6% 6.52ms ± 6% ~ (p=0.832 n=30+30) GobEncode-4 6.10ms ± 9% 6.14ms ± 7% ~ (p=0.485 n=30+30) Gzip-4 227ms ± 1% 224ms ± 4% ~ (p=0.484 n=24+30) Gunzip-4 37.2ms ± 3% 36.8ms ± 4% ~ (p=0.889 n=30+30) HTTPClientServer-4 58.9µs ± 1% 58.7µs ± 2% -0.42% (p=0.003 n=28+28) JSONEncode-4 12.0ms ± 3% 12.0ms ± 4% ~ (p=0.523 n=30+30) JSONDecode-4 54.6ms ± 4% 54.5ms ± 4% ~ (p=0.708 n=30+30) Mandelbrot200-4 3.78ms ± 4% 3.81ms ± 3% +0.99% (p=0.016 n=30+30) GoParse-4 3.20ms ± 4% 3.20ms ± 5% ~ (p=0.994 n=30+30) RegexpMatchEasy0_32-4 77.0ns ± 4% 75.9ns ± 3% -1.39% (p=0.006 n=29+30) RegexpMatchEasy0_1K-4 255ns ± 4% 253ns ± 4% ~ (p=0.091 n=30+30) RegexpMatchEasy1_32-4 69.7ns ± 3% 70.3ns ± 4% ~ (p=0.120 n=30+30) RegexpMatchEasy1_1K-4 373ns ± 2% 378ns ± 3% +1.43% (p=0.000 n=21+26) RegexpMatchMedium_32-4 107ns ± 2% 108ns ± 4% +1.50% (p=0.012 n=22+30) RegexpMatchMedium_1K-4 34.0µs ± 1% 34.3µs ± 3% +1.08% (p=0.008 n=24+30) RegexpMatchHard_32-4 1.53µs ± 3% 1.54µs ± 3% ~ (p=0.234 n=30+30) RegexpMatchHard_1K-4 46.7µs ± 4% 47.0µs ± 4% ~ (p=0.420 n=30+30) Revcomp-4 411ms ± 7% 415ms ± 6% ~ (p=0.059 n=30+30) Template-4 65.5ms ± 5% 66.9ms ± 4% +2.21% (p=0.001 n=30+30) TimeParse-4 317ns ± 3% 311ns ± 3% -1.97% (p=0.000 n=30+30) TimeFormat-4 293ns ± 3% 294ns ± 3% ~ (p=0.243 n=30+30) [Geo mean] 47.4µs 47.5µs +0.17% name old speed new speed delta GobDecode-4 118MB/s ± 5% 118MB/s ± 6% ~ (p=0.832 n=30+30) GobEncode-4 125MB/s ± 7% 125MB/s ± 7% ~ (p=0.625 n=29+30) Gzip-4 85.3MB/s ± 1% 86.6MB/s ± 4% ~ (p=0.486 n=24+30) Gunzip-4 522MB/s ± 3% 527MB/s ± 4% ~ (p=0.889 n=30+30) JSONEncode-4 162MB/s ± 3% 162MB/s ± 4% ~ (p=0.520 n=30+30) JSONDecode-4 35.5MB/s ± 4% 35.6MB/s ± 4% ~ (p=0.701 n=30+30) GoParse-4 18.1MB/s ± 4% 18.1MB/s ± 4% ~ (p=0.891 n=29+30) RegexpMatchEasy0_32-4 416MB/s ± 4% 422MB/s ± 3% +1.43% (p=0.005 n=29+30) RegexpMatchEasy0_1K-4 4.01GB/s ± 4% 4.04GB/s ± 4% ~ (p=0.091 n=30+30) RegexpMatchEasy1_32-4 460MB/s ± 3% 456MB/s ± 5% ~ (p=0.123 n=30+30) RegexpMatchEasy1_1K-4 2.74GB/s ± 2% 2.70GB/s ± 3% -1.33% (p=0.000 n=22+26) RegexpMatchMedium_32-4 9.39MB/s ± 3% 9.19MB/s ± 4% -2.06% (p=0.001 n=28+30) RegexpMatchMedium_1K-4 30.1MB/s ± 1% 29.8MB/s ± 3% -1.04% (p=0.008 n=24+30) RegexpMatchHard_32-4 20.9MB/s ± 3% 20.8MB/s ± 3% ~ (p=0.234 n=30+30) RegexpMatchHard_1K-4 21.9MB/s ± 4% 21.8MB/s ± 4% ~ (p=0.420 n=30+30) Revcomp-4 619MB/s ± 7% 612MB/s ± 7% ~ (p=0.059 n=30+30) Template-4 29.6MB/s ± 4% 29.0MB/s ± 4% -2.16% (p=0.002 n=30+30) [Geo mean] 123MB/s 123MB/s -0.33% Change-Id: Ia59e077feae4f2824df79059daea4d0f678e3e4c Reviewed-on: https://go-review.googlesource.com/120275 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/amd64/ssa.go3
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules20
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64Ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go38
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go178
5 files changed, 230 insertions, 11 deletions
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 584cc4c4bd..4ecdb769f3 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -837,7 +837,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
- ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload:
+ ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
+ ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index bf3a116045..eab66d17ab 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1037,10 +1037,10 @@
((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem)
((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
- ((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
- ((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem)
+((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
+((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
@@ -1079,12 +1079,12 @@
((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-((ADD|SUB|MUL)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- ((ADD|SUB|MUL)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- ((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
@@ -2274,8 +2274,8 @@
// TODO: add indexed variants?
((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem)
((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem)
-((ADD|SUB|MUL)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SDload x [off] {sym} ptr mem)
-((ADD|SUB|MUL)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SSload x [off] {sym} ptr mem)
+((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
+((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 1140958670..4735ea1bc0 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -189,6 +189,8 @@ func init() {
{name: "SUBSDload", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "MULSSload", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "MULSDload", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+ {name: "DIVSSload", argLength: 3, reg: fp21load, asm: "DIVSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+ {name: "DIVSDload", argLength: 3, reg: fp21load, asm: "DIVSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 / tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
// binary ops
{name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true, clobberFlags: true}, // arg0 + arg1
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 34b1d8a4bb..3554623840 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -462,6 +462,8 @@ const (
OpAMD64SUBSDload
OpAMD64MULSSload
OpAMD64MULSDload
+ OpAMD64DIVSSload
+ OpAMD64DIVSDload
OpAMD64ADDQ
OpAMD64ADDL
OpAMD64ADDQconst
@@ -5544,6 +5546,42 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "DIVSSload",
+ auxType: auxSymOff,
+ argLen: 3,
+ resultInArg0: true,
+ faultOnNilArg1: true,
+ symEffect: SymRead,
+ asm: x86.ADIVSS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ },
+ },
+ {
+ name: "DIVSDload",
+ auxType: auxSymOff,
+ argLen: 3,
+ resultInArg0: true,
+ faultOnNilArg1: true,
+ symEffect: SymRead,
+ asm: x86.ADIVSD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ },
+ },
+ {
name: "ADDQ",
argLen: 2,
commutative: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 4ffd6b5d18..245f795d90 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -157,6 +157,14 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v)
case OpAMD64CMPXCHGQlock:
return rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v)
+ case OpAMD64DIVSD:
+ return rewriteValueAMD64_OpAMD64DIVSD_0(v)
+ case OpAMD64DIVSDload:
+ return rewriteValueAMD64_OpAMD64DIVSDload_0(v)
+ case OpAMD64DIVSS:
+ return rewriteValueAMD64_OpAMD64DIVSS_0(v)
+ case OpAMD64DIVSSload:
+ return rewriteValueAMD64_OpAMD64DIVSSload_0(v)
case OpAMD64LEAL:
return rewriteValueAMD64_OpAMD64LEAL_0(v)
case OpAMD64LEAL1:
@@ -8808,6 +8816,176 @@ func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64DIVSD_0(v *Value) bool {
+ // match: (DIVSD x l:(MOVSDload [off] {sym} ptr mem))
+ // cond: canMergeLoad(v, l, x) && clobber(l)
+ // result: (DIVSDload x [off] {sym} ptr mem)
+ for {
+ _ = v.Args[1]
+ x := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpAMD64MOVSDload {
+ break
+ }
+ off := l.AuxInt
+ sym := l.Aux
+ _ = l.Args[1]
+ ptr := l.Args[0]
+ mem := l.Args[1]
+ if !(canMergeLoad(v, l, x) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64DIVSDload)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(x)
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64DIVSDload_0(v *Value) bool {
+ // match: (DIVSDload [off1] {sym} val (ADDQconst [off2] base) mem)
+ // cond: is32Bit(off1+off2)
+ // result: (DIVSDload [off1+off2] {sym} val base mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ val := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_1.AuxInt
+ base := v_1.Args[0]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64DIVSDload)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(val)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (DIVSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (DIVSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ val := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_1.AuxInt
+ sym2 := v_1.Aux
+ base := v_1.Args[0]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64DIVSDload)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(val)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64DIVSS_0(v *Value) bool {
+ // match: (DIVSS x l:(MOVSSload [off] {sym} ptr mem))
+ // cond: canMergeLoad(v, l, x) && clobber(l)
+ // result: (DIVSSload x [off] {sym} ptr mem)
+ for {
+ _ = v.Args[1]
+ x := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpAMD64MOVSSload {
+ break
+ }
+ off := l.AuxInt
+ sym := l.Aux
+ _ = l.Args[1]
+ ptr := l.Args[0]
+ mem := l.Args[1]
+ if !(canMergeLoad(v, l, x) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64DIVSSload)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(x)
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64DIVSSload_0(v *Value) bool {
+ // match: (DIVSSload [off1] {sym} val (ADDQconst [off2] base) mem)
+ // cond: is32Bit(off1+off2)
+ // result: (DIVSSload [off1+off2] {sym} val base mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ val := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_1.AuxInt
+ base := v_1.Args[0]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64DIVSSload)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(val)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (DIVSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (DIVSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ val := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_1.AuxInt
+ sym2 := v_1.Aux
+ base := v_1.Args[0]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64DIVSSload)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(val)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
// match: (LEAL [c] {s} (ADDLconst [d] x))
// cond: is32Bit(c+d)