diff options
| author | Josh Bleecher Snyder <josharian@gmail.com> | 2018-02-26 07:05:19 -0800 |
|---|---|---|
| committer | Josh Bleecher Snyder <josharian@gmail.com> | 2018-05-07 22:09:18 +0000 |
| commit | b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7 (patch) | |
| tree | 3da1f21f9fd3cc1f3f2102b6bf889e5d6bbe1743 /src | |
| parent | 44286b17c5ca6673648ba57b4a9d49ab8dffedf6 (diff) | |
| download | go-b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7.tar.xz | |
cmd/compile: add some LEAL{1,2,4,8} rewrite rules for AMD64
This should improve some 32 bit arithmetic operations.
During make.bash, this increases the number of
rules firing by 15518:
$ wc -l rulelog-*
13490514 rulelog-head
13474996 rulelog-master
compress/flate benchmarks:
name old time/op new time/op delta
Decode/Digits/Huffman/1e4-8 103µs ± 4% 102µs ± 0% -0.95% (p=0.000 n=30+27)
Decode/Digits/Huffman/1e5-8 962µs ± 2% 954µs ± 1% -0.80% (p=0.000 n=25+25)
Decode/Digits/Huffman/1e6-8 9.55ms ± 1% 9.50ms ± 1% -0.57% (p=0.000 n=29+29)
Decode/Digits/Speed/1e4-8 110µs ± 2% 110µs ± 2% -0.41% (p=0.003 n=28+30)
Decode/Digits/Speed/1e5-8 1.15ms ± 1% 1.14ms ± 1% -0.85% (p=0.000 n=29+28)
Decode/Digits/Speed/1e6-8 11.5ms ± 2% 11.4ms ± 1% -1.26% (p=0.000 n=28+27)
Decode/Digits/Default/1e4-8 113µs ± 1% 112µs ± 1% -0.49% (p=0.001 n=27+30)
Decode/Digits/Default/1e5-8 1.13ms ± 0% 1.12ms ± 1% -0.75% (p=0.000 n=26+24)
Decode/Digits/Default/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.47% (p=0.000 n=28+27)
Decode/Digits/Compression/1e4-8 113µs ± 1% 112µs ± 1% -0.70% (p=0.000 n=28+29)
Decode/Digits/Compression/1e5-8 1.13ms ± 2% 1.12ms ± 1% -1.41% (p=0.000 n=28+26)
Decode/Digits/Compression/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.33% (p=0.002 n=29+27)
Decode/Twain/Huffman/1e4-8 115µs ± 1% 115µs ± 1% -0.40% (p=0.000 n=28+26)
Decode/Twain/Huffman/1e5-8 1.05ms ± 1% 1.04ms ± 0% -0.41% (p=0.000 n=27+25)
Decode/Twain/Huffman/1e6-8 10.4ms ± 1% 10.4ms ± 1% ~ (p=0.993 n=28+24)
Decode/Twain/Speed/1e4-8 118µs ± 2% 116µs ± 1% -1.08% (p=0.000 n=27+29)
Decode/Twain/Speed/1e5-8 1.07ms ± 1% 1.07ms ± 1% -0.23% (p=0.041 n=26+27)
Decode/Twain/Speed/1e6-8 10.6ms ± 1% 10.5ms ± 0% -0.68% (p=0.000 n=29+27)
Decode/Twain/Default/1e4-8 110µs ± 1% 109µs ± 0% -0.49% (p=0.000 n=29+26)
Decode/Twain/Default/1e5-8 906µs ± 1% 902µs ± 1% -0.48% (p=0.000 n=27+28)
Decode/Twain/Default/1e6-8 8.75ms ± 1% 8.68ms ± 2% -0.73% (p=0.000 n=28+28)
Decode/Twain/Compression/1e4-8 110µs ± 1% 109µs ± 1% -0.80% (p=0.000 n=27+28)
Decode/Twain/Compression/1e5-8 905µs ± 1% 906µs ± 5% ~ (p=0.065 n=27+29)
Decode/Twain/Compression/1e6-8 8.75ms ± 2% 8.68ms ± 1% -0.76% (p=0.000 n=26+26)
Encode/Digits/Huffman/1e4-8 31.8µs ± 1% 32.3µs ± 2% +1.43% (p=0.000 n=28+27)
Encode/Digits/Huffman/1e5-8 299µs ± 2% 296µs ± 1% -1.05% (p=0.000 n=29+29)
Encode/Digits/Huffman/1e6-8 2.99ms ± 3% 2.96ms ± 1% -1.00% (p=0.000 n=29+28)
Encode/Digits/Speed/1e4-8 149µs ± 1% 152µs ± 4% +2.18% (p=0.000 n=30+30)
Encode/Digits/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 2% +1.02% (p=0.000 n=27+27)
Encode/Digits/Speed/1e6-8 13.7ms ± 0% 13.8ms ± 1% +0.81% (p=0.000 n=27+27)
Encode/Digits/Default/1e4-8 297µs ± 7% 297µs ± 7% ~ (p=1.000 n=30+30)
Encode/Digits/Default/1e5-8 4.51ms ± 1% 4.42ms ± 1% -2.06% (p=0.000 n=29+29)
Encode/Digits/Default/1e6-8 47.5ms ± 1% 46.6ms ± 1% -1.90% (p=0.000 n=27+25)
Encode/Digits/Compression/1e4-8 302µs ± 7% 303µs ± 9% ~ (p=0.854 n=30+30)
Encode/Digits/Compression/1e5-8 4.52ms ± 1% 4.43ms ± 2% -1.91% (p=0.000 n=26+25)
Encode/Digits/Compression/1e6-8 47.5ms ± 1% 46.7ms ± 1% -1.70% (p=0.000 n=26+27)
Encode/Twain/Huffman/1e4-8 46.6µs ± 2% 46.8µs ± 2% ~ (p=0.114 n=30+30)
Encode/Twain/Huffman/1e5-8 357µs ± 3% 352µs ± 2% -1.13% (p=0.000 n=29+28)
Encode/Twain/Huffman/1e6-8 3.58ms ± 4% 3.52ms ± 1% -1.43% (p=0.003 n=30+28)
Encode/Twain/Speed/1e4-8 173µs ± 1% 174µs ± 1% +0.65% (p=0.000 n=27+28)
Encode/Twain/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 1% +0.92% (p=0.000 n=28+27)
Encode/Twain/Speed/1e6-8 13.6ms ± 1% 13.7ms ± 1% +0.51% (p=0.000 n=25+26)
Encode/Twain/Default/1e4-8 364µs ± 5% 361µs ± 5% ~ (p=0.219 n=30+30)
Encode/Twain/Default/1e5-8 5.41ms ± 1% 5.43ms ± 5% ~ (p=0.655 n=27+27)
Encode/Twain/Default/1e6-8 57.2ms ± 1% 58.4ms ± 4% +2.15% (p=0.000 n=22+28)
Encode/Twain/Compression/1e4-8 371µs ± 9% 373µs ± 6% ~ (p=0.503 n=30+29)
Encode/Twain/Compression/1e5-8 5.97ms ± 2% 5.92ms ± 1% -0.75% (p=0.000 n=28+26)
Encode/Twain/Compression/1e6-8 64.0ms ± 1% 63.8ms ± 1% -0.36% (p=0.036 n=27+25)
[Geo mean] 1.37ms 1.36ms -0.38%
Change-Id: I3df4de63f06eaf121c38821bd889453a8de1b199
Reviewed-on: https://go-review.googlesource.com/101276
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 140 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 1420 |
2 files changed, 1458 insertions, 102 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index bd36e60f6e..45c82a0cd7 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -852,10 +852,8 @@ (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) // Using MOVZX instead of AND is cheaper. -(ANDLconst [0xFF] x) -> (MOVBQZX x) -(ANDLconst [0xFFFF] x) -> (MOVWQZX x) -(ANDQconst [0xFF] x) -> (MOVBQZX x) -(ANDQconst [0xFFFF] x) -> (MOVWQZX x) +(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) +(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) // strength reduction @@ -867,75 +865,75 @@ // which can require a register-register move // to preserve the original value, // so it must be used with care. -(MULQconst [-9] x) -> (NEGQ (LEAQ8 <v.Type> x x)) -(MULQconst [-5] x) -> (NEGQ (LEAQ4 <v.Type> x x)) -(MULQconst [-3] x) -> (NEGQ (LEAQ2 <v.Type> x x)) -(MULQconst [-1] x) -> (NEGQ x) -(MULQconst [0] _) -> (MOVQconst [0]) -(MULQconst [1] x) -> x -(MULQconst [3] x) -> (LEAQ2 x x) -(MULQconst [5] x) -> (LEAQ4 x x) -(MULQconst [7] x) -> (LEAQ2 x (LEAQ2 <v.Type> x x)) -(MULQconst [9] x) -> (LEAQ8 x x) -(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x)) -(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x)) -(MULQconst [19] x) -> (LEAQ2 x (LEAQ8 <v.Type> x x)) -(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x)) -(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x)) -(MULQconst [27] x) -> (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x)) -(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x)) -(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x)) -(MULQconst [45] x) -> (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x)) -(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x)) -(MULQconst [81] x) -> (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x)) +(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) +(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x) +(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0]) +(MUL(Q|L)const [ 1] x) -> x +(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x) +(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x) +(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x) +(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) +(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) +(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) +(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) -(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x) -(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x) -(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x) -(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x) -(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x) -(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x)) -(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x)) -(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x)) +(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x) +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x) +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x) +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x) +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x) +(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x)) +(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x)) +(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x)) -// combine add/shift into LEAQ -(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y) -(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y) -(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y) -(ADDQ x (ADDQ y y)) -> (LEAQ2 x y) -(ADDQ x (ADDQ x y)) -> (LEAQ2 y x) +// combine add/shift into LEAQ/LEAL +(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y) +(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y) +(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y) +(ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y) +(ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x) -// combine ADDQ/ADDQconst into LEAQ1 -(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y) -(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y) +// combine ADDQ/ADDQconst into LEAQ1/LEAL1 +(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y) +(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y) (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x) -// fold ADDQ into LEAQ -(ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x) -(LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x) -(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) -(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) +// fold ADDQ/ADDL into LEAQ/LEAL +(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) +(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) +(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) +(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) -// fold ADDQconst into LEAQx -(ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y) -(ADDQconst [c] (LEAQ2 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ2 [c+d] {s} x y) -(ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y) -(ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y) -(LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y) -(LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y) -(LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y) -(LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y) -(LEAQ4 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y) -(LEAQ8 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y) -(LEAQ8 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y) +// fold ADDQconst/ADDLconst into LEAQx/LEALx +(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y) +(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y) +(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y) +(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y) +(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y) +(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y) +(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y) +(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y) +(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y) +(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y) +(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y) -// fold shifts into LEAQx -(LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y) -(LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y) -(LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y) -(LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y) -(LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y) -(LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y) +// fold shifts into LEAQx/LEALx +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y) +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y) +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y) +(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y) +(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y) +(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y) // reverse ordering of compare instruction (SETL (InvertFlags x)) -> (SETG x) @@ -2219,12 +2217,6 @@ && clobber(mem2) -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) -// amd64p32 rules -// same as the rules above, but with 32 instead of 64 bit pointer arithmetic. -// LEAQ,ADDQ -> LEAL,ADDL -(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) -(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) - (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> @@ -2410,8 +2402,8 @@ (MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym}) // LEAQ is rematerializeable, so this helps to avoid register spill. -// See isuue 22947 for details -(ADDQconst [off] x:(SP)) -> (LEAQ [off] x) +// See issue 22947 for details +(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) // Fold loads into compares // Note: these may be undone by the flagalloc pass. diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 2fce1e2221..3ff7e48765 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -16,9 +16,9 @@ var _ = types.TypeMem // in case not otherwise used func rewriteValueAMD64(v *Value) bool { switch v.Op { case OpAMD64ADDL: - return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v) + return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v) || rewriteValueAMD64_OpAMD64ADDL_20(v) case OpAMD64ADDLconst: - return rewriteValueAMD64_OpAMD64ADDLconst_0(v) + return rewriteValueAMD64_OpAMD64ADDLconst_0(v) || rewriteValueAMD64_OpAMD64ADDLconst_10(v) case OpAMD64ADDLconstmem: return rewriteValueAMD64_OpAMD64ADDLconstmem_0(v) case OpAMD64ADDLmem: @@ -155,6 +155,14 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v) case OpAMD64LEAL: return rewriteValueAMD64_OpAMD64LEAL_0(v) + case OpAMD64LEAL1: + return rewriteValueAMD64_OpAMD64LEAL1_0(v) + case OpAMD64LEAL2: + return rewriteValueAMD64_OpAMD64LEAL2_0(v) + case OpAMD64LEAL4: + return rewriteValueAMD64_OpAMD64LEAL4_0(v) + case OpAMD64LEAL8: + return rewriteValueAMD64_OpAMD64LEAL8_0(v) case OpAMD64LEAQ: return rewriteValueAMD64_OpAMD64LEAQ_0(v) case OpAMD64LEAQ1: @@ -296,7 +304,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MULL: return rewriteValueAMD64_OpAMD64MULL_0(v) case OpAMD64MULLconst: - return rewriteValueAMD64_OpAMD64MULLconst_0(v) + return rewriteValueAMD64_OpAMD64MULLconst_0(v) || rewriteValueAMD64_OpAMD64MULLconst_10(v) || rewriteValueAMD64_OpAMD64MULLconst_20(v) || rewriteValueAMD64_OpAMD64MULLconst_30(v) case OpAMD64MULQ: return rewriteValueAMD64_OpAMD64MULQ_0(v) case OpAMD64MULQconst: @@ -1239,6 +1247,328 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADDL x (SHLLconst [3] y)) + // cond: + // result: (LEAL8 x y) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 3 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL (SHLLconst [3] y) x) + // cond: + // result: (LEAL8 x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 3 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool { + // match: (ADDL x (SHLLconst [2] y)) + // cond: + // result: (LEAL4 x y) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 2 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL (SHLLconst [2] y) x) + // cond: + // result: (LEAL4 x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 2 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL x (SHLLconst [1] y)) + // cond: + // result: (LEAL2 x y) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 1 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL (SHLLconst [1] y) x) + // cond: + // result: (LEAL2 x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 1 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL x (ADDL y y)) + // cond: + // result: (LEAL2 x y) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDL { + break + } + _ = v_1.Args[1] + y := v_1.Args[0] + if y != v_1.Args[1] { + break + } + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL (ADDL y y) x) + // cond: + // result: (LEAL2 x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + y := v_0.Args[0] + if y != v_0.Args[1] { + break + } + x := v.Args[1] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL x (ADDL x y)) + // cond: + // result: (LEAL2 y x) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDL { + break + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpAMD64LEAL2) + v.AddArg(y) + v.AddArg(x) + return true + } + // match: (ADDL x (ADDL y x)) + // cond: + // result: (LEAL2 y x) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDL { + break + } + _ = v_1.Args[1] + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpAMD64LEAL2) + v.AddArg(y) + v.AddArg(x) + return true + } + // match: (ADDL (ADDL x y) x) + // cond: + // result: (LEAL2 y x) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAMD64LEAL2) + v.AddArg(y) + v.AddArg(x) + return true + } + // match: (ADDL (ADDL y x) x) + // cond: + // result: (LEAL2 y x) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + y := v_0.Args[0] + x := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAMD64LEAL2) + v.AddArg(y) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64ADDL_20(v *Value) bool { + // match: (ADDL (ADDLconst [c] x) y) + // cond: + // result: (LEAL1 [c] x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + y := v.Args[1] + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL y (ADDLconst [c] x)) + // cond: + // result: (LEAL1 [c] x y) + for { + _ = v.Args[1] + y := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDLconst { + break + } + c := v_1.AuxInt + x := v_1.Args[0] + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL x (LEAL [c] {s} y)) + // cond: x.Op != OpSB && y.Op != OpSB + // result: (LEAL1 [c] {s} x y) + for { + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64LEAL { + break + } + c := v_1.AuxInt + s := v_1.Aux + y := v_1.Args[0] + if !(x.Op != OpSB && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDL (LEAL [c] {s} y) x) + // cond: x.Op != OpSB && y.Op != OpSB + // result: (LEAL1 [c] {s} x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + c := v_0.AuxInt + s := v_0.Aux + y := v_0.Args[0] + x := v.Args[1] + if !(x.Op != OpSB && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } // match: (ADDL x (NEGL y)) // cond: // result: (SUBL x y) @@ -1271,9 +1601,6 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool { v.AddArg(y) return true } - return false -} -func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool { // match: (ADDL x l:(MOVLload [off] {sym} ptr mem)) // cond: canMergeLoad(v, l, x) && clobber(l) // result: (ADDLmem x [off] {sym} ptr mem) @@ -1329,6 +1656,24 @@ func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool { return false } func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool { + // match: (ADDLconst [c] (ADDL x y)) + // cond: + // result: (LEAL1 [c] x y) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.AddArg(x) + v.AddArg(y) + return true + } // match: (ADDLconst [c] (SHLLconst [1] x)) // cond: // result: (LEAL1 [c] x x) @@ -1348,6 +1693,123 @@ func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ADDLconst [c] (LEAL [d] {s} x)) + // cond: is32Bit(c+d) + // result: (LEAL [c+d] {s} x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + d := v_0.AuxInt + s := v_0.Aux + x := v_0.Args[0] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + return true + } + // match: (ADDLconst [c] (LEAL1 [d] {s} x y)) + // cond: is32Bit(c+d) + // result: (LEAL1 [c+d] {s} x y) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL1 { + break + } + d := v_0.AuxInt + s := v_0.Aux + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDLconst [c] (LEAL2 [d] {s} x y)) + // cond: is32Bit(c+d) + // result: (LEAL2 [c+d] {s} x y) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL2 { + break + } + d := v_0.AuxInt + s := v_0.Aux + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL2) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDLconst [c] (LEAL4 [d] {s} x y)) + // cond: is32Bit(c+d) + // result: (LEAL4 [c+d] {s} x y) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL4 { + break + } + d := v_0.AuxInt + s := v_0.Aux + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL4) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDLconst [c] (LEAL8 [d] {s} x y)) + // cond: is32Bit(c+d) + // result: (LEAL8 [c+d] {s} x y) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL8 { + break + } + d := v_0.AuxInt + s := v_0.Aux + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL8) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } // match: (ADDLconst [c] x) // cond: int32(c)==0 // result: x @@ -1392,24 +1854,20 @@ func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (ADDLconst [c] (LEAL [d] {s} x)) - // cond: is32Bit(c+d) - // result: (LEAL [c+d] {s} x) + return false +} +func rewriteValueAMD64_OpAMD64ADDLconst_10(v *Value) bool { + // match: (ADDLconst [off] x:(SP)) + // cond: + // result: (LEAL [off] x) for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64LEAL { - break - } - d := v_0.AuxInt - s := v_0.Aux - x := v_0.Args[0] - if !(is32Bit(c + d)) { + off := v.AuxInt + x := v.Args[0] + if x.Op != OpSP { break } v.reset(OpAMD64LEAL) - v.AuxInt = c + d - v.Aux = s + v.AuxInt = off v.AddArg(x) return true } @@ -3022,7 +3480,7 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (ANDLconst [0xFF] x) + // match: (ANDLconst [ 0xFF] x) // cond: // result: (MOVBQZX x) for { @@ -3425,7 +3883,7 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (ANDQconst [0xFF] x) + // match: (ANDQconst [ 0xFF] x) // cond: // result: (MOVBQZX x) for { @@ -8322,6 +8780,440 @@ func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool { v.AddArg(x) return true } + // match: (LEAL [c] {s} (ADDL x y)) + // cond: x.Op != OpSB && y.Op != OpSB + // result: (LEAL1 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + if !(x.Op != OpSB && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64LEAL1_0(v *Value) bool { + // match: (LEAL1 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL1 [c+d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + y := v.Args[1] + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} y (ADDLconst [d] x)) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL1 [c+d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + y := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDLconst { + break + } + d := v_1.AuxInt + x := v_1.Args[0] + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL1) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} x (SHLLconst [1] y)) + // cond: + // result: (LEAL2 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 1 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL2) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} (SHLLconst [1] y) x) + // cond: + // result: (LEAL2 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 1 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL2) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} x (SHLLconst [2] y)) + // cond: + // result: (LEAL4 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 2 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL4) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} (SHLLconst [2] y) x) + // cond: + // result: (LEAL4 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 2 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL4) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} x (SHLLconst [3] y)) + // cond: + // result: (LEAL8 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 3 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL1 [c] {s} (SHLLconst [3] y) x) + // cond: + // result: (LEAL8 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHLLconst { + break + } + if v_0.AuxInt != 3 { + break + } + y := v_0.Args[0] + x := v.Args[1] + v.reset(OpAMD64LEAL8) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64LEAL2_0(v *Value) bool { + // match: (LEAL2 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL2 [c+d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + y := v.Args[1] + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL2) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL2 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(c+2*d) && y.Op != OpSB + // result: (LEAL2 [c+2*d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDLconst { + break + } + d := v_1.AuxInt + y := v_1.Args[0] + if !(is32Bit(c+2*d) && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL2) + v.AuxInt = c + 2*d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL2 [c] {s} x (SHLLconst [1] y)) + // cond: + // result: (LEAL4 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 1 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL4) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL2 [c] {s} x (SHLLconst [2] y)) + // cond: + // result: (LEAL8 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 2 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64LEAL4_0(v *Value) bool { + // match: (LEAL4 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL4 [c+d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + y := v.Args[1] + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL4) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL4 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(c+4*d) && y.Op != OpSB + // result: (LEAL4 [c+4*d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDLconst { + break + } + d := v_1.AuxInt + y := v_1.Args[0] + if !(is32Bit(c+4*d) && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL4) + v.AuxInt = c + 4*d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL4 [c] {s} x (SHLLconst [1] y)) + // cond: + // result: (LEAL8 [c] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHLLconst { + break + } + if v_1.AuxInt != 1 { + break + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AuxInt = c + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64LEAL8_0(v *Value) bool { + // match: (LEAL8 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL8 [c+d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + y := v.Args[1] + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL8) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (LEAL8 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(c+8*d) && y.Op != OpSB + // result: (LEAL8 [c+8*d] {s} x y) + for { + c := v.AuxInt + s := v.Aux + _ = v.Args[1] + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64ADDLconst { + break + } + d := v_1.AuxInt + y := v_1.Args[0] + if !(is32Bit(c+8*d) && y.Op != OpSB) { + break + } + v.reset(OpAMD64LEAL8) + v.AuxInt = c + 8*d + v.Aux = s + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool { @@ -19849,6 +20741,8 @@ func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool { return false } func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool { + b := v.Block + _ = b // match: (MULLconst [c] (MULLconst [d] x)) // cond: // result: (MULLconst [int64(int32(c * d))] x) @@ -19865,6 +20759,476 @@ func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (MULLconst [-9] x) + // cond: + // result: (NEGL (LEAL8 <v.Type> x x)) + for { + if v.AuxInt != -9 { + break + } + x := v.Args[0] + v.reset(OpAMD64NEGL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [-5] x) + // cond: + // result: (NEGL (LEAL4 <v.Type> x x)) + for { + if v.AuxInt != -5 { + break + } + x := v.Args[0] + v.reset(OpAMD64NEGL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [-3] x) + // cond: + // result: (NEGL (LEAL2 <v.Type> x x)) + for { + if v.AuxInt != -3 { + break + } + x := v.Args[0] + v.reset(OpAMD64NEGL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [-1] x) + // cond: + // result: (NEGL x) + for { + if v.AuxInt != -1 { + break + } + x := v.Args[0] + v.reset(OpAMD64NEGL) + v.AddArg(x) + return true + } + // match: (MULLconst [ 0] _) + // cond: + // result: (MOVLconst [0]) + for { + if v.AuxInt != 0 { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = 0 + return true + } + // match: (MULLconst [ 1] x) + // cond: + // result: x + for { + if v.AuxInt != 1 { + break + } + x := v.Args[0] + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (MULLconst [ 3] x) + // cond: + // result: (LEAL2 x x) + for { + if v.AuxInt != 3 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v.AddArg(x) + return true + } + // match: (MULLconst [ 5] x) + // cond: + // result: (LEAL4 x x) + for { + if v.AuxInt != 5 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v.AddArg(x) + return true + } + // match: (MULLconst [ 7] x) + // cond: + // result: (LEAL2 x (LEAL2 <v.Type> x x)) + for { + if v.AuxInt != 7 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MULLconst_10(v *Value) bool { + b := v.Block + _ = b + // match: (MULLconst [ 9] x) + // cond: + // result: (LEAL8 x x) + for { + if v.AuxInt != 9 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v.AddArg(x) + return true + } + // match: (MULLconst [11] x) + // cond: + // result: (LEAL2 x (LEAL4 <v.Type> x x)) + for { + if v.AuxInt != 11 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [13] x) + // cond: + // result: (LEAL4 x (LEAL2 <v.Type> x x)) + for { + if v.AuxInt != 13 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [19] x) + // cond: + // result: (LEAL2 x (LEAL8 <v.Type> x x)) + for { + if v.AuxInt != 19 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL2) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [21] x) + // cond: + // result: (LEAL4 x (LEAL4 <v.Type> x x)) + for { + if v.AuxInt != 21 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [25] x) + // cond: + // result: (LEAL8 x (LEAL2 <v.Type> x x)) + for { + if v.AuxInt != 25 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [27] x) + // cond: + // result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x)) + for { + if v.AuxInt != 27 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v1.AddArg(x) + v1.AddArg(x) + v.AddArg(v1) + return true + } + // match: (MULLconst [37] x) + // cond: + // result: (LEAL4 x (LEAL8 <v.Type> x x)) + for { + if v.AuxInt != 37 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL4) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [41] x) + // cond: + // result: (LEAL8 x (LEAL4 <v.Type> x x)) + for { + if v.AuxInt != 41 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [45] x) + // cond: + // result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x)) + for { + if v.AuxInt != 45 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v1.AddArg(x) + v1.AddArg(x) + v.AddArg(v1) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MULLconst_20(v *Value) bool { + b := v.Block + _ = b + // match: (MULLconst [73] x) + // cond: + // result: (LEAL8 x (LEAL8 <v.Type> x x)) + for { + if v.AuxInt != 73 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v.AddArg(x) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [81] x) + // cond: + // result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x)) + for { + if v.AuxInt != 81 { + break + } + x := v.Args[0] + v.reset(OpAMD64LEAL8) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v1.AddArg(x) + v1.AddArg(x) + v.AddArg(v1) + return true + } + // match: (MULLconst [c] x) + // cond: isPowerOfTwo(c+1) && c >= 15 + // result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x) + for { + c := v.AuxInt + x := v.Args[0] + if !(isPowerOfTwo(c+1) && c >= 15) { + break + } + v.reset(OpAMD64SUBL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) + v0.AuxInt = log2(c + 1) + v0.AddArg(x) + v.AddArg(v0) + v.AddArg(x) + return true + } + // match: (MULLconst [c] x) + // cond: isPowerOfTwo(c-1) && c >= 17 + // result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x) + for { + c := v.AuxInt + x := v.Args[0] + if !(isPowerOfTwo(c-1) && c >= 17) { + break + } + v.reset(OpAMD64LEAL1) + v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) + v0.AuxInt = log2(c - 1) + v0.AddArg(x) + v.AddArg(v0) + v.AddArg(x) + return true + } + // match: (MULLconst [c] x) + // cond: isPowerOfTwo(c-2) && c >= 34 + // result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x) + for { + c := v.AuxInt + x := v.Args[0] + if !(isPowerOfTwo(c-2) && c >= 34) { + break + } + v.reset(OpAMD64LEAL2) + v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) + v0.AuxInt = log2(c - 2) + v0.AddArg(x) + v.AddArg(v0) + v.AddArg(x) + return true + } + // match: (MULLconst [c] x) + // cond: isPowerOfTwo(c-4) && c >= 68 + // result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x) + for { + c := v.AuxInt + x := v.Args[0] + if !(isPowerOfTwo(c-4) && c >= 68) { + break + } + v.reset(OpAMD64LEAL4) + v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) + v0.AuxInt = log2(c - 4) + v0.AddArg(x) + v.AddArg(v0) + v.AddArg(x) + return true + } + // match: (MULLconst [c] x) + // cond: isPowerOfTwo(c-8) && c >= 136 + // result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x) + for { + c := v.AuxInt + x := v.Args[0] + if !(isPowerOfTwo(c-8) && c >= 136) { + break + } + v.reset(OpAMD64LEAL8) + v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) + v0.AuxInt = log2(c - 8) + v0.AddArg(x) + v.AddArg(v0) + v.AddArg(x) + return true + } + // match: (MULLconst [c] x) + // cond: c%3 == 0 && isPowerOfTwo(c/3) + // result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x)) + for { + c := v.AuxInt + x := v.Args[0] + if !(c%3 == 0 && isPowerOfTwo(c/3)) { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [c] x) + // cond: c%5 == 0 && isPowerOfTwo(c/5) + // result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x)) + for { + c := v.AuxInt + x := v.Args[0] + if !(c%5 == 0 && isPowerOfTwo(c/5)) { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = log2(c / 5) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (MULLconst [c] x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x)) + for { + c := v.AuxInt + x := v.Args[0] + if !(c%9 == 0 && isPowerOfTwo(c/9)) { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = log2(c / 9) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type) + v0.AddArg(x) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MULLconst_30(v *Value) bool { // match: (MULLconst [c] (MOVLconst [d])) // cond: // result: (MOVLconst [int64(int32(c*d))]) @@ -20001,7 +21365,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (MULQconst [0] _) + // match: (MULQconst [ 0] _) // cond: // result: (MOVQconst [0]) for { @@ -20012,7 +21376,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { v.AuxInt = 0 return true } - // match: (MULQconst [1] x) + // match: (MULQconst [ 1] x) // cond: // result: x for { @@ -20025,7 +21389,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (MULQconst [3] x) + // match: (MULQconst [ 3] x) // cond: // result: (LEAQ2 x x) for { @@ -20038,7 +21402,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (MULQconst [5] x) + // match: (MULQconst [ 5] x) // cond: // result: (LEAQ4 x x) for { @@ -20051,7 +21415,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { v.AddArg(x) return true } - // match: (MULQconst [7] x) + // match: (MULQconst [ 7] x) // cond: // result: (LEAQ2 x (LEAQ2 <v.Type> x x)) for { @@ -20072,7 +21436,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool { func rewriteValueAMD64_OpAMD64MULQconst_10(v *Value) bool { b := v.Block _ = b - // match: (MULQconst [9] x) + // match: (MULQconst [ 9] x) // cond: // result: (LEAQ8 x x) for { |
