cmd/compile: add some LEAL{1,2,4,8} rewrite rules for AMD64

This should improve some 32 bit arithmetic operations. During make.bash, this increases the number of rules firing by 15518: $ wc -l rulelog-* 13490514 rulelog-head 13474996 rulelog-master compress/flate benchmarks: name old time/op new time/op delta Decode/Digits/Huffman/1e4-8 103µs ± 4% 102µs ± 0% -0.95% (p=0.000 n=30+27) Decode/Digits/Huffman/1e5-8 962µs ± 2% 954µs ± 1% -0.80% (p=0.000 n=25+25) Decode/Digits/Huffman/1e6-8 9.55ms ± 1% 9.50ms ± 1% -0.57% (p=0.000 n=29+29) Decode/Digits/Speed/1e4-8 110µs ± 2% 110µs ± 2% -0.41% (p=0.003 n=28+30) Decode/Digits/Speed/1e5-8 1.15ms ± 1% 1.14ms ± 1% -0.85% (p=0.000 n=29+28) Decode/Digits/Speed/1e6-8 11.5ms ± 2% 11.4ms ± 1% -1.26% (p=0.000 n=28+27) Decode/Digits/Default/1e4-8 113µs ± 1% 112µs ± 1% -0.49% (p=0.001 n=27+30) Decode/Digits/Default/1e5-8 1.13ms ± 0% 1.12ms ± 1% -0.75% (p=0.000 n=26+24) Decode/Digits/Default/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.47% (p=0.000 n=28+27) Decode/Digits/Compression/1e4-8 113µs ± 1% 112µs ± 1% -0.70% (p=0.000 n=28+29) Decode/Digits/Compression/1e5-8 1.13ms ± 2% 1.12ms ± 1% -1.41% (p=0.000 n=28+26) Decode/Digits/Compression/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.33% (p=0.002 n=29+27) Decode/Twain/Huffman/1e4-8 115µs ± 1% 115µs ± 1% -0.40% (p=0.000 n=28+26) Decode/Twain/Huffman/1e5-8 1.05ms ± 1% 1.04ms ± 0% -0.41% (p=0.000 n=27+25) Decode/Twain/Huffman/1e6-8 10.4ms ± 1% 10.4ms ± 1% ~ (p=0.993 n=28+24) Decode/Twain/Speed/1e4-8 118µs ± 2% 116µs ± 1% -1.08% (p=0.000 n=27+29) Decode/Twain/Speed/1e5-8 1.07ms ± 1% 1.07ms ± 1% -0.23% (p=0.041 n=26+27) Decode/Twain/Speed/1e6-8 10.6ms ± 1% 10.5ms ± 0% -0.68% (p=0.000 n=29+27) Decode/Twain/Default/1e4-8 110µs ± 1% 109µs ± 0% -0.49% (p=0.000 n=29+26) Decode/Twain/Default/1e5-8 906µs ± 1% 902µs ± 1% -0.48% (p=0.000 n=27+28) Decode/Twain/Default/1e6-8 8.75ms ± 1% 8.68ms ± 2% -0.73% (p=0.000 n=28+28) Decode/Twain/Compression/1e4-8 110µs ± 1% 109µs ± 1% -0.80% (p=0.000 n=27+28) Decode/Twain/Compression/1e5-8 905µs ± 1% 906µs ± 5% ~ (p=0.065 n=27+29) Decode/Twain/Compression/1e6-8 8.75ms ± 2% 8.68ms ± 1% -0.76% (p=0.000 n=26+26) Encode/Digits/Huffman/1e4-8 31.8µs ± 1% 32.3µs ± 2% +1.43% (p=0.000 n=28+27) Encode/Digits/Huffman/1e5-8 299µs ± 2% 296µs ± 1% -1.05% (p=0.000 n=29+29) Encode/Digits/Huffman/1e6-8 2.99ms ± 3% 2.96ms ± 1% -1.00% (p=0.000 n=29+28) Encode/Digits/Speed/1e4-8 149µs ± 1% 152µs ± 4% +2.18% (p=0.000 n=30+30) Encode/Digits/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 2% +1.02% (p=0.000 n=27+27) Encode/Digits/Speed/1e6-8 13.7ms ± 0% 13.8ms ± 1% +0.81% (p=0.000 n=27+27) Encode/Digits/Default/1e4-8 297µs ± 7% 297µs ± 7% ~ (p=1.000 n=30+30) Encode/Digits/Default/1e5-8 4.51ms ± 1% 4.42ms ± 1% -2.06% (p=0.000 n=29+29) Encode/Digits/Default/1e6-8 47.5ms ± 1% 46.6ms ± 1% -1.90% (p=0.000 n=27+25) Encode/Digits/Compression/1e4-8 302µs ± 7% 303µs ± 9% ~ (p=0.854 n=30+30) Encode/Digits/Compression/1e5-8 4.52ms ± 1% 4.43ms ± 2% -1.91% (p=0.000 n=26+25) Encode/Digits/Compression/1e6-8 47.5ms ± 1% 46.7ms ± 1% -1.70% (p=0.000 n=26+27) Encode/Twain/Huffman/1e4-8 46.6µs ± 2% 46.8µs ± 2% ~ (p=0.114 n=30+30) Encode/Twain/Huffman/1e5-8 357µs ± 3% 352µs ± 2% -1.13% (p=0.000 n=29+28) Encode/Twain/Huffman/1e6-8 3.58ms ± 4% 3.52ms ± 1% -1.43% (p=0.003 n=30+28) Encode/Twain/Speed/1e4-8 173µs ± 1% 174µs ± 1% +0.65% (p=0.000 n=27+28) Encode/Twain/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 1% +0.92% (p=0.000 n=28+27) Encode/Twain/Speed/1e6-8 13.6ms ± 1% 13.7ms ± 1% +0.51% (p=0.000 n=25+26) Encode/Twain/Default/1e4-8 364µs ± 5% 361µs ± 5% ~ (p=0.219 n=30+30) Encode/Twain/Default/1e5-8 5.41ms ± 1% 5.43ms ± 5% ~ (p=0.655 n=27+27) Encode/Twain/Default/1e6-8 57.2ms ± 1% 58.4ms ± 4% +2.15% (p=0.000 n=22+28) Encode/Twain/Compression/1e4-8 371µs ± 9% 373µs ± 6% ~ (p=0.503 n=30+29) Encode/Twain/Compression/1e5-8 5.97ms ± 2% 5.92ms ± 1% -0.75% (p=0.000 n=28+26) Encode/Twain/Compression/1e6-8 64.0ms ± 1% 63.8ms ± 1% -0.36% (p=0.036 n=27+25) [Geo mean] 1.37ms 1.36ms -0.38% Change-Id: I3df4de63f06eaf121c38821bd889453a8de1b199 Reviewed-on: https://go-review.googlesource.com/101276 Reviewed-by: Keith Randall <khr@golang.org>
author: Josh Bleecher Snyder <josharian@gmail.com> 2018-02-26 07:05:19 -0800
committer: Josh Bleecher Snyder <josharian@gmail.com> 2018-05-07 22:09:18 +0000
commit: b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7 (patch)
tree: 3da1f21f9fd3cc1f3f2102b6bf889e5d6bbe1743 /src
parent: 44286b17c5ca6673648ba57b4a9d49ab8dffedf6 (diff)
download: go-b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7.tar.xz
2 files changed, 1458 insertions, 102 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index bd36e60f6e..45c82a0cd7 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -852,10 +852,8 @@
 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
 
 // Using MOVZX instead of AND is cheaper.
-(ANDLconst [0xFF] x) -> (MOVBQZX x)
-(ANDLconst [0xFFFF] x) -> (MOVWQZX x)
-(ANDQconst [0xFF] x) -> (MOVBQZX x)
-(ANDQconst [0xFFFF] x) -> (MOVWQZX x)
+(AND(Q|L)const [  0xFF] x) -> (MOVBQZX x)
+(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
 
 // strength reduction
@@ -867,75 +865,75 @@
 // which can require a register-register move
 // to preserve the original value,
 // so it must be used with care.
-(MULQconst [-9] x) -> (NEGQ (LEAQ8 <v.Type> x x))
-(MULQconst [-5] x) -> (NEGQ (LEAQ4 <v.Type> x x))
-(MULQconst [-3] x) -> (NEGQ (LEAQ2 <v.Type> x x))
-(MULQconst [-1] x) -> (NEGQ x)
-(MULQconst [0] _) -> (MOVQconst [0])
-(MULQconst [1] x) -> x
-(MULQconst [3] x) -> (LEAQ2 x x)
-(MULQconst [5] x) -> (LEAQ4 x x)
-(MULQconst [7] x) -> (LEAQ2 x (LEAQ2 <v.Type> x x))
-(MULQconst [9] x) -> (LEAQ8 x x)
-(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
-(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
-(MULQconst [19] x) -> (LEAQ2 x (LEAQ8 <v.Type> x x))
-(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
-(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
-(MULQconst [27] x) -> (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
-(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
-(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
-(MULQconst [45] x) -> (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
-(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
-(MULQconst [81] x) -> (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
+(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
+(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
+(MUL(Q|L)const [ 1] x) -> x
+(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
+(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
+(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
+(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
 
-(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
-(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
-(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >=  15 -> (SUB(Q|L)  (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >=  17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >=  34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >=  68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
+(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
 
-// combine add/shift into LEAQ
-(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
-(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y)
-(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y)
-(ADDQ x (ADDQ y y)) -> (LEAQ2 x y)
-(ADDQ x (ADDQ x y)) -> (LEAQ2 y x)
+// combine add/shift into LEAQ/LEAL
+(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
+(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
+(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) y y))        -> (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) x y))        -> (LEA(L|Q)2 y x)
 
-// combine ADDQ/ADDQconst into LEAQ1
-(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y)
-(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y)
+// combine ADDQ/ADDQconst into LEAQ1/LEAL1
+(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
+(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
 
-// fold ADDQ into LEAQ
-(ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
-(LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
-(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
-(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
+// fold ADDQ/ADDL into LEAQ/LEAL
+(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
 
-// fold ADDQconst into LEAQx
-(ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ2 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ2 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y)
-(ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y)
-(LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
-(LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y)
-(LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y)
-(LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y)
-(LEAQ4 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y)
-(LEAQ8 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y)
-(LEAQ8 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y)
+// fold ADDQconst/ADDLconst into LEAQx/LEALx
+(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
+(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
+(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
+(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d)   && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
 
-// fold shifts into LEAQx
-(LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y)
-(LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y)
-(LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y)
-(LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y)
+// fold shifts into LEAQx/LEALx
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
 
 // reverse ordering of compare instruction
 (SETL (InvertFlags x)) -> (SETG x)
@@ -2219,12 +2217,6 @@
   && clobber(mem2)
   -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
 
-// amd64p32 rules
-// same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
-// LEAQ,ADDQ -> LEAL,ADDL
-(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
-
 (MOVQload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
 	(MOVQload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
@@ -2410,8 +2402,8 @@
 (MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
 
 // LEAQ is rematerializeable, so this helps to avoid register spill.
-// See isuue 22947 for details
-(ADDQconst [off] x:(SP)) -> (LEAQ [off] x)
+// See issue 22947 for details
+(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x)
 
 // Fold loads into compares
 // Note: these may be undone by the flagalloc pass.
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 2fce1e2221..3ff7e48765 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -16,9 +16,9 @@ var _ = types.TypeMem // in case not otherwise used
 func rewriteValueAMD64(v *Value) bool {
 	switch v.Op {
 	case OpAMD64ADDL:
-		return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v)
+		return rewriteValueAMD64_OpAMD64ADDL_0(v) || rewriteValueAMD64_OpAMD64ADDL_10(v) || rewriteValueAMD64_OpAMD64ADDL_20(v)
 	case OpAMD64ADDLconst:
-		return rewriteValueAMD64_OpAMD64ADDLconst_0(v)
+		return rewriteValueAMD64_OpAMD64ADDLconst_0(v) || rewriteValueAMD64_OpAMD64ADDLconst_10(v)
 	case OpAMD64ADDLconstmem:
 		return rewriteValueAMD64_OpAMD64ADDLconstmem_0(v)
 	case OpAMD64ADDLmem:
@@ -155,6 +155,14 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v)
 	case OpAMD64LEAL:
 		return rewriteValueAMD64_OpAMD64LEAL_0(v)
+	case OpAMD64LEAL1:
+		return rewriteValueAMD64_OpAMD64LEAL1_0(v)
+	case OpAMD64LEAL2:
+		return rewriteValueAMD64_OpAMD64LEAL2_0(v)
+	case OpAMD64LEAL4:
+		return rewriteValueAMD64_OpAMD64LEAL4_0(v)
+	case OpAMD64LEAL8:
+		return rewriteValueAMD64_OpAMD64LEAL8_0(v)
 	case OpAMD64LEAQ:
 		return rewriteValueAMD64_OpAMD64LEAQ_0(v)
 	case OpAMD64LEAQ1:
@@ -296,7 +304,7 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAMD64MULL:
 		return rewriteValueAMD64_OpAMD64MULL_0(v)
 	case OpAMD64MULLconst:
-		return rewriteValueAMD64_OpAMD64MULLconst_0(v)
+		return rewriteValueAMD64_OpAMD64MULLconst_0(v) || rewriteValueAMD64_OpAMD64MULLconst_10(v) || rewriteValueAMD64_OpAMD64MULLconst_20(v) || rewriteValueAMD64_OpAMD64MULLconst_30(v)
 	case OpAMD64MULQ:
 		return rewriteValueAMD64_OpAMD64MULQ_0(v)
 	case OpAMD64MULQconst:
@@ -1239,6 +1247,328 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (ADDL x (SHLLconst [3] y))
+	// cond:
+	// result: (LEAL8 x y)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 3 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL (SHLLconst [3] y) x)
+	// cond:
+	// result: (LEAL8 x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 3 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool {
+	// match: (ADDL x (SHLLconst [2] y))
+	// cond:
+	// result: (LEAL4 x y)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 2 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL (SHLLconst [2] y) x)
+	// cond:
+	// result: (LEAL4 x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 2 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL x (SHLLconst [1] y))
+	// cond:
+	// result: (LEAL2 x y)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 1 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL (SHLLconst [1] y) x)
+	// cond:
+	// result: (LEAL2 x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 1 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL x (ADDL y y))
+	// cond:
+	// result: (LEAL2 x y)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_1.Args[1]
+		y := v_1.Args[0]
+		if y != v_1.Args[1] {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL (ADDL y y) x)
+	// cond:
+	// result: (LEAL2 x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_0.Args[1]
+		y := v_0.Args[0]
+		if y != v_0.Args[1] {
+			break
+		}
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL x (ADDL x y))
+	// cond:
+	// result: (LEAL2 y x)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_1.Args[1]
+		if x != v_1.Args[0] {
+			break
+		}
+		y := v_1.Args[1]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(y)
+		v.AddArg(x)
+		return true
+	}
+	// match: (ADDL x (ADDL y x))
+	// cond:
+	// result: (LEAL2 y x)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_1.Args[1]
+		y := v_1.Args[0]
+		if x != v_1.Args[1] {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(y)
+		v.AddArg(x)
+		return true
+	}
+	// match: (ADDL (ADDL x y) x)
+	// cond:
+	// result: (LEAL2 y x)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if x != v.Args[1] {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(y)
+		v.AddArg(x)
+		return true
+	}
+	// match: (ADDL (ADDL y x) x)
+	// cond:
+	// result: (LEAL2 y x)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_0.Args[1]
+		y := v_0.Args[0]
+		x := v_0.Args[1]
+		if x != v.Args[1] {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(y)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64ADDL_20(v *Value) bool {
+	// match: (ADDL (ADDLconst [c] x) y)
+	// cond:
+	// result: (LEAL1 [c] x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDLconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		y := v.Args[1]
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL y (ADDLconst [c] x))
+	// cond:
+	// result: (LEAL1 [c] x y)
+	for {
+		_ = v.Args[1]
+		y := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDLconst {
+			break
+		}
+		c := v_1.AuxInt
+		x := v_1.Args[0]
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL x (LEAL [c] {s} y))
+	// cond: x.Op != OpSB && y.Op != OpSB
+	// result: (LEAL1 [c] {s} x y)
+	for {
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64LEAL {
+			break
+		}
+		c := v_1.AuxInt
+		s := v_1.Aux
+		y := v_1.Args[0]
+		if !(x.Op != OpSB && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDL (LEAL [c] {s} y) x)
+	// cond: x.Op != OpSB && y.Op != OpSB
+	// result: (LEAL1 [c] {s} x y)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL {
+			break
+		}
+		c := v_0.AuxInt
+		s := v_0.Aux
+		y := v_0.Args[0]
+		x := v.Args[1]
+		if !(x.Op != OpSB && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	// match: (ADDL x (NEGL y))
 	// cond:
 	// result: (SUBL x y)
@@ -1271,9 +1601,6 @@ func rewriteValueAMD64_OpAMD64ADDL_0(v *Value) bool {
 		v.AddArg(y)
 		return true
 	}
-	return false
-}
-func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool {
 	// match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
 	// cond: canMergeLoad(v, l, x) && clobber(l)
 	// result: (ADDLmem x [off] {sym} ptr mem)
@@ -1329,6 +1656,24 @@ func rewriteValueAMD64_OpAMD64ADDL_10(v *Value) bool {
 	return false
 }
 func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
+	// match: (ADDLconst [c] (ADDL x y))
+	// cond:
+	// result: (LEAL1 [c] x y)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	// match: (ADDLconst [c] (SHLLconst [1] x))
 	// cond:
 	// result: (LEAL1 [c] x x)
@@ -1348,6 +1693,123 @@ func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (ADDLconst [c] (LEAL [d] {s} x))
+	// cond: is32Bit(c+d)
+	// result: (LEAL [c+d] {s} x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL {
+			break
+		}
+		d := v_0.AuxInt
+		s := v_0.Aux
+		x := v_0.Args[0]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpAMD64LEAL)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		return true
+	}
+	// match: (ADDLconst [c] (LEAL1 [d] {s} x y))
+	// cond: is32Bit(c+d)
+	// result: (LEAL1 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL1 {
+			break
+		}
+		d := v_0.AuxInt
+		s := v_0.Aux
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDLconst [c] (LEAL2 [d] {s} x y))
+	// cond: is32Bit(c+d)
+	// result: (LEAL2 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL2 {
+			break
+		}
+		d := v_0.AuxInt
+		s := v_0.Aux
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDLconst [c] (LEAL4 [d] {s} x y))
+	// cond: is32Bit(c+d)
+	// result: (LEAL4 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL4 {
+			break
+		}
+		d := v_0.AuxInt
+		s := v_0.Aux
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADDLconst [c] (LEAL8 [d] {s} x y))
+	// cond: is32Bit(c+d)
+	// result: (LEAL8 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64LEAL8 {
+			break
+		}
+		d := v_0.AuxInt
+		s := v_0.Aux
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	// match: (ADDLconst [c] x)
 	// cond: int32(c)==0
 	// result: x
@@ -1392,24 +1854,20 @@ func rewriteValueAMD64_OpAMD64ADDLconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (ADDLconst [c] (LEAL [d] {s} x))
-	// cond: is32Bit(c+d)
-	// result: (LEAL [c+d] {s} x)
+	return false
+}
+func rewriteValueAMD64_OpAMD64ADDLconst_10(v *Value) bool {
+	// match: (ADDLconst [off] x:(SP))
+	// cond:
+	// result: (LEAL [off] x)
 	for {
-		c := v.AuxInt
-		v_0 := v.Args[0]
-		if v_0.Op != OpAMD64LEAL {
-			break
-		}
-		d := v_0.AuxInt
-		s := v_0.Aux
-		x := v_0.Args[0]
-		if !(is32Bit(c + d)) {
+		off := v.AuxInt
+		x := v.Args[0]
+		if x.Op != OpSP {
 			break
 		}
 		v.reset(OpAMD64LEAL)
-		v.AuxInt = c + d
-		v.Aux = s
+		v.AuxInt = off
 		v.AddArg(x)
 		return true
 	}
@@ -3022,7 +3480,7 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDLconst [0xFF] x)
+	// match: (ANDLconst [ 0xFF] x)
 	// cond:
 	// result: (MOVBQZX x)
 	for {
@@ -3425,7 +3883,7 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (ANDQconst [0xFF] x)
+	// match: (ANDQconst [ 0xFF] x)
 	// cond:
 	// result: (MOVBQZX x)
 	for {
@@ -8322,6 +8780,440 @@ func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (LEAL [c] {s} (ADDL x y))
+	// cond: x.Op != OpSB && y.Op != OpSB
+	// result: (LEAL1 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDL {
+			break
+		}
+		_ = v_0.Args[1]
+		x := v_0.Args[0]
+		y := v_0.Args[1]
+		if !(x.Op != OpSB && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LEAL1_0(v *Value) bool {
+	// match: (LEAL1 [c] {s} (ADDLconst [d] x) y)
+	// cond: is32Bit(c+d) && x.Op != OpSB
+	// result: (LEAL1 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		y := v.Args[1]
+		if !(is32Bit(c+d) && x.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} y (ADDLconst [d] x))
+	// cond: is32Bit(c+d) && x.Op != OpSB
+	// result: (LEAL1 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		y := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_1.AuxInt
+		x := v_1.Args[0]
+		if !(is32Bit(c+d) && x.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} x (SHLLconst [1] y))
+	// cond:
+	// result: (LEAL2 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 1 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} (SHLLconst [1] y) x)
+	// cond:
+	// result: (LEAL2 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 1 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL2)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} x (SHLLconst [2] y))
+	// cond:
+	// result: (LEAL4 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 2 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} (SHLLconst [2] y) x)
+	// cond:
+	// result: (LEAL4 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 2 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} x (SHLLconst [3] y))
+	// cond:
+	// result: (LEAL8 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 3 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL1 [c] {s} (SHLLconst [3] y) x)
+	// cond:
+	// result: (LEAL8 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_0.AuxInt != 3 {
+			break
+		}
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LEAL2_0(v *Value) bool {
+	// match: (LEAL2 [c] {s} (ADDLconst [d] x) y)
+	// cond: is32Bit(c+d) && x.Op != OpSB
+	// result: (LEAL2 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		y := v.Args[1]
+		if !(is32Bit(c+d) && x.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL2 [c] {s} x (ADDLconst [d] y))
+	// cond: is32Bit(c+2*d) && y.Op != OpSB
+	// result: (LEAL2 [c+2*d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_1.AuxInt
+		y := v_1.Args[0]
+		if !(is32Bit(c+2*d) && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v.AuxInt = c + 2*d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL2 [c] {s} x (SHLLconst [1] y))
+	// cond:
+	// result: (LEAL4 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 1 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL2 [c] {s} x (SHLLconst [2] y))
+	// cond:
+	// result: (LEAL8 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 2 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LEAL4_0(v *Value) bool {
+	// match: (LEAL4 [c] {s} (ADDLconst [d] x) y)
+	// cond: is32Bit(c+d) && x.Op != OpSB
+	// result: (LEAL4 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		y := v.Args[1]
+		if !(is32Bit(c+d) && x.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL4 [c] {s} x (ADDLconst [d] y))
+	// cond: is32Bit(c+4*d) && y.Op != OpSB
+	// result: (LEAL4 [c+4*d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_1.AuxInt
+		y := v_1.Args[0]
+		if !(is32Bit(c+4*d) && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL4)
+		v.AuxInt = c + 4*d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL4 [c] {s} x (SHLLconst [1] y))
+	// cond:
+	// result: (LEAL8 [c] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64SHLLconst {
+			break
+		}
+		if v_1.AuxInt != 1 {
+			break
+		}
+		y := v_1.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LEAL8_0(v *Value) bool {
+	// match: (LEAL8 [c] {s} (ADDLconst [d] x) y)
+	// cond: is32Bit(c+d) && x.Op != OpSB
+	// result: (LEAL8 [c+d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		y := v.Args[1]
+		if !(is32Bit(c+d) && x.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c + d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (LEAL8 [c] {s} x (ADDLconst [d] y))
+	// cond: is32Bit(c+8*d) && y.Op != OpSB
+	// result: (LEAL8 [c+8*d] {s} x y)
+	for {
+		c := v.AuxInt
+		s := v.Aux
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpAMD64ADDLconst {
+			break
+		}
+		d := v_1.AuxInt
+		y := v_1.Args[0]
+		if !(is32Bit(c+8*d) && y.Op != OpSB) {
+			break
+		}
+		v.reset(OpAMD64LEAL8)
+		v.AuxInt = c + 8*d
+		v.Aux = s
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
@@ -19849,6 +20741,8 @@ func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool {
 	return false
 }
 func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
+	b := v.Block
+	_ = b
 	// match: (MULLconst [c] (MULLconst [d] x))
 	// cond:
 	// result: (MULLconst [int64(int32(c * d))] x)
@@ -19865,6 +20759,476 @@ func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MULLconst [-9] x)
+	// cond:
+	// result: (NEGL (LEAL8 <v.Type> x x))
+	for {
+		if v.AuxInt != -9 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64NEGL)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [-5] x)
+	// cond:
+	// result: (NEGL (LEAL4 <v.Type> x x))
+	for {
+		if v.AuxInt != -5 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64NEGL)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [-3] x)
+	// cond:
+	// result: (NEGL (LEAL2 <v.Type> x x))
+	for {
+		if v.AuxInt != -3 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64NEGL)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [-1] x)
+	// cond:
+	// result: (NEGL x)
+	for {
+		if v.AuxInt != -1 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64NEGL)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [ 0] _)
+	// cond:
+	// result: (MOVLconst [0])
+	for {
+		if v.AuxInt != 0 {
+			break
+		}
+		v.reset(OpAMD64MOVLconst)
+		v.AuxInt = 0
+		return true
+	}
+	// match: (MULLconst [ 1] x)
+	// cond:
+	// result: x
+	for {
+		if v.AuxInt != 1 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [ 3] x)
+	// cond:
+	// result: (LEAL2 x x)
+	for {
+		if v.AuxInt != 3 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [ 5] x)
+	// cond:
+	// result: (LEAL4 x x)
+	for {
+		if v.AuxInt != 5 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [ 7] x)
+	// cond:
+	// result: (LEAL2 x (LEAL2 <v.Type> x x))
+	for {
+		if v.AuxInt != 7 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_10(v *Value) bool {
+	b := v.Block
+	_ = b
+	// match: (MULLconst [ 9] x)
+	// cond:
+	// result: (LEAL8 x x)
+	for {
+		if v.AuxInt != 9 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [11] x)
+	// cond:
+	// result: (LEAL2 x (LEAL4 <v.Type> x x))
+	for {
+		if v.AuxInt != 11 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [13] x)
+	// cond:
+	// result: (LEAL4 x (LEAL2 <v.Type> x x))
+	for {
+		if v.AuxInt != 13 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [19] x)
+	// cond:
+	// result: (LEAL2 x (LEAL8 <v.Type> x x))
+	for {
+		if v.AuxInt != 19 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL2)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [21] x)
+	// cond:
+	// result: (LEAL4 x (LEAL4 <v.Type> x x))
+	for {
+		if v.AuxInt != 21 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [25] x)
+	// cond:
+	// result: (LEAL8 x (LEAL2 <v.Type> x x))
+	for {
+		if v.AuxInt != 25 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [27] x)
+	// cond:
+	// result: (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
+	for {
+		if v.AuxInt != 27 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v1 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v1.AddArg(x)
+		v1.AddArg(x)
+		v.AddArg(v1)
+		return true
+	}
+	// match: (MULLconst [37] x)
+	// cond:
+	// result: (LEAL4 x (LEAL8 <v.Type> x x))
+	for {
+		if v.AuxInt != 37 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL4)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [41] x)
+	// cond:
+	// result: (LEAL8 x (LEAL4 <v.Type> x x))
+	for {
+		if v.AuxInt != 41 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [45] x)
+	// cond:
+	// result: (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
+	for {
+		if v.AuxInt != 45 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v1 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v1.AddArg(x)
+		v1.AddArg(x)
+		v.AddArg(v1)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_20(v *Value) bool {
+	b := v.Block
+	_ = b
+	// match: (MULLconst [73] x)
+	// cond:
+	// result: (LEAL8 x (LEAL8 <v.Type> x x))
+	for {
+		if v.AuxInt != 73 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v.AddArg(x)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [81] x)
+	// cond:
+	// result: (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
+	for {
+		if v.AuxInt != 81 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpAMD64LEAL8)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v1 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v1.AddArg(x)
+		v1.AddArg(x)
+		v.AddArg(v1)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: isPowerOfTwo(c+1) && c >= 15
+	// result: (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(isPowerOfTwo(c+1) && c >= 15) {
+			break
+		}
+		v.reset(OpAMD64SUBL)
+		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+		v0.AuxInt = log2(c + 1)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: isPowerOfTwo(c-1) && c >= 17
+	// result: (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(isPowerOfTwo(c-1) && c >= 17) {
+			break
+		}
+		v.reset(OpAMD64LEAL1)
+		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+		v0.AuxInt = log2(c - 1)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: isPowerOfTwo(c-2) && c >= 34
+	// result: (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(isPowerOfTwo(c-2) && c >= 34) {
+			break
+		}
+		v.reset(OpAMD64LEAL2)
+		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+		v0.AuxInt = log2(c - 2)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: isPowerOfTwo(c-4) && c >= 68
+	// result: (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(isPowerOfTwo(c-4) && c >= 68) {
+			break
+		}
+		v.reset(OpAMD64LEAL4)
+		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+		v0.AuxInt = log2(c - 4)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: isPowerOfTwo(c-8) && c >= 136
+	// result: (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(isPowerOfTwo(c-8) && c >= 136) {
+			break
+		}
+		v.reset(OpAMD64LEAL8)
+		v0 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type)
+		v0.AuxInt = log2(c - 8)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: c%3 == 0 && isPowerOfTwo(c/3)
+	// result: (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+			break
+		}
+		v.reset(OpAMD64SHLLconst)
+		v.AuxInt = log2(c / 3)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL2, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: c%5 == 0 && isPowerOfTwo(c/5)
+	// result: (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+			break
+		}
+		v.reset(OpAMD64SHLLconst)
+		v.AuxInt = log2(c / 5)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL4, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULLconst [c] x)
+	// cond: c%9 == 0 && isPowerOfTwo(c/9)
+	// result: (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
+	for {
+		c := v.AuxInt
+		x := v.Args[0]
+		if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+			break
+		}
+		v.reset(OpAMD64SHLLconst)
+		v.AuxInt = log2(c / 9)
+		v0 := b.NewValue0(v.Pos, OpAMD64LEAL8, v.Type)
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64MULLconst_30(v *Value) bool {
 	// match: (MULLconst [c] (MOVLconst [d]))
 	// cond:
 	// result: (MOVLconst [int64(int32(c*d))])
@@ -20001,7 +21365,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULQconst [0] _)
+	// match: (MULQconst [ 0] _)
 	// cond:
 	// result: (MOVQconst [0])
 	for {
@@ -20012,7 +21376,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 		v.AuxInt = 0
 		return true
 	}
-	// match: (MULQconst [1] x)
+	// match: (MULQconst [ 1] x)
 	// cond:
 	// result: x
 	for {
@@ -20025,7 +21389,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULQconst [3] x)
+	// match: (MULQconst [ 3] x)
 	// cond:
 	// result: (LEAQ2 x x)
 	for {
@@ -20038,7 +21402,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULQconst [5] x)
+	// match: (MULQconst [ 5] x)
 	// cond:
 	// result: (LEAQ4 x x)
 	for {
@@ -20051,7 +21415,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULQconst [7] x)
+	// match: (MULQconst [ 7] x)
 	// cond:
 	// result: (LEAQ2 x (LEAQ2 <v.Type> x x))
 	for {
@@ -20072,7 +21436,7 @@ func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
 func rewriteValueAMD64_OpAMD64MULQconst_10(v *Value) bool {
 	b := v.Block
 	_ = b
-	// match: (MULQconst [9] x)
+	// match: (MULQconst [ 9] x)
 	// cond:
 	// result: (LEAQ8 x x)
 	for {
author	Josh Bleecher Snyder <josharian@gmail.com>	2018-02-26 07:05:19 -0800
committer	Josh Bleecher Snyder <josharian@gmail.com>	2018-05-07 22:09:18 +0000
commit	b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7 (patch)
tree	3da1f21f9fd3cc1f3f2102b6bf889e5d6bbe1743 /src
parent	44286b17c5ca6673648ba57b4a9d49ab8dffedf6 (diff)
download	go-b1df8d6ffa2c4c5be567934bd44432fff8f3c4a7.tar.xz