diff options
| author | Ben Shi <powerman1st@163.com> | 2018-09-17 02:05:22 +0000 |
|---|---|---|
| committer | Ben Shi <powerman1st@163.com> | 2018-09-19 03:00:58 +0000 |
| commit | c6bf9a81095bdbd3631cedc66ce690fa070d1a38 (patch) | |
| tree | c2159c3ba7a2222b3ad51e5cd2fddba5822b451b | |
| parent | 713edf8b31de04dce26f603576f074133e95de47 (diff) | |
| download | go-c6bf9a81095bdbd3631cedc66ce690fa070d1a38.tar.xz | |
cmd/compile: optimize AMD64's bit wise operation
Currently "arr[idx] |= 0x80" is compiled to MOVLload->BTSL->MOVLstore.
And this CL optimizes it to a single BTSLconstmodify. Other bit wise
operations with a direct memory operand are also implemented.
1. The size of the executable bin/go decreases about 4KB, and the total size
of pkg/linux_amd64 (excluding cmd/compile) decreases about 0.6KB.
2. There a little improvement in the go1 benchmark test (excluding noise).
name old time/op new time/op delta
BinaryTree17-4 2.66s ± 4% 2.66s ± 3% ~ (p=0.596 n=49+49)
Fannkuch11-4 2.38s ± 2% 2.32s ± 2% -2.69% (p=0.000 n=50+50)
FmtFprintfEmpty-4 42.7ns ± 4% 43.2ns ± 7% +1.31% (p=0.009 n=50+50)
FmtFprintfString-4 71.0ns ± 5% 72.0ns ± 3% +1.33% (p=0.000 n=50+50)
FmtFprintfInt-4 80.7ns ± 4% 80.6ns ± 3% ~ (p=0.931 n=50+50)
FmtFprintfIntInt-4 125ns ± 3% 126ns ± 4% ~ (p=0.051 n=50+50)
FmtFprintfPrefixedInt-4 158ns ± 1% 142ns ± 3% -9.84% (p=0.000 n=36+50)
FmtFprintfFloat-4 215ns ± 4% 212ns ± 4% -1.23% (p=0.002 n=50+50)
FmtManyArgs-4 519ns ± 3% 510ns ± 3% -1.77% (p=0.000 n=50+50)
GobDecode-4 6.49ms ± 6% 6.52ms ± 5% ~ (p=0.866 n=50+50)
GobEncode-4 5.93ms ± 8% 6.01ms ± 7% ~ (p=0.076 n=50+50)
Gzip-4 222ms ± 4% 224ms ± 8% +0.80% (p=0.001 n=50+50)
Gunzip-4 36.6ms ± 5% 36.4ms ± 4% ~ (p=0.093 n=50+50)
HTTPClientServer-4 59.1µs ± 1% 58.9µs ± 2% -0.24% (p=0.039 n=49+48)
JSONEncode-4 9.23ms ± 4% 9.21ms ± 5% ~ (p=0.244 n=50+50)
JSONDecode-4 48.8ms ± 4% 48.7ms ± 4% ~ (p=0.653 n=50+50)
Mandelbrot200-4 3.81ms ± 4% 3.80ms ± 3% ~ (p=0.834 n=50+50)
GoParse-4 3.20ms ± 5% 3.19ms ± 5% ~ (p=0.494 n=50+50)
RegexpMatchEasy0_32-4 78.1ns ± 2% 77.4ns ± 3% -0.86% (p=0.005 n=50+50)
RegexpMatchEasy0_1K-4 233ns ± 3% 233ns ± 3% ~ (p=0.074 n=50+50)
RegexpMatchEasy1_32-4 74.2ns ± 3% 73.4ns ± 3% -1.06% (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4 369ns ± 2% 364ns ± 4% -1.41% (p=0.000 n=36+50)
RegexpMatchMedium_32-4 109ns ± 4% 107ns ± 3% -2.06% (p=0.001 n=50+50)
RegexpMatchMedium_1K-4 31.5µs ± 3% 30.8µs ± 3% -2.20% (p=0.000 n=50+50)
RegexpMatchHard_32-4 1.57µs ± 3% 1.56µs ± 2% -0.57% (p=0.016 n=50+50)
RegexpMatchHard_1K-4 47.4µs ± 4% 47.0µs ± 3% -0.82% (p=0.008 n=50+50)
Revcomp-4 414ms ± 7% 412ms ± 7% ~ (p=0.285 n=50+50)
Template-4 64.3ms ± 4% 62.7ms ± 3% -2.44% (p=0.000 n=50+50)
TimeParse-4 316ns ± 3% 313ns ± 3% ~ (p=0.122 n=50+50)
TimeFormat-4 291ns ± 3% 293ns ± 3% +0.80% (p=0.001 n=50+50)
[Geo mean] 46.5µs 46.2µs -0.81%
name old speed new speed delta
GobDecode-4 118MB/s ± 6% 118MB/s ± 5% ~ (p=0.863 n=50+50)
GobEncode-4 130MB/s ± 9% 128MB/s ± 8% ~ (p=0.076 n=50+50)
Gzip-4 87.4MB/s ± 4% 86.8MB/s ± 7% -0.78% (p=0.002 n=50+50)
Gunzip-4 531MB/s ± 5% 533MB/s ± 4% ~ (p=0.093 n=50+50)
JSONEncode-4 210MB/s ± 4% 211MB/s ± 5% ~ (p=0.247 n=50+50)
JSONDecode-4 39.8MB/s ± 4% 39.9MB/s ± 4% ~ (p=0.654 n=50+50)
GoParse-4 18.1MB/s ± 5% 18.2MB/s ± 5% ~ (p=0.493 n=50+50)
RegexpMatchEasy0_32-4 410MB/s ± 2% 413MB/s ± 3% +0.86% (p=0.004 n=50+50)
RegexpMatchEasy0_1K-4 4.39GB/s ± 3% 4.38GB/s ± 3% ~ (p=0.063 n=50+50)
RegexpMatchEasy1_32-4 432MB/s ± 3% 436MB/s ± 3% +1.07% (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4 2.77GB/s ± 2% 2.81GB/s ± 4% +1.46% (p=0.000 n=36+50)
RegexpMatchMedium_32-4 9.16MB/s ± 3% 9.35MB/s ± 4% +2.09% (p=0.001 n=50+50)
RegexpMatchMedium_1K-4 32.5MB/s ± 3% 33.2MB/s ± 3% +2.25% (p=0.000 n=50+50)
RegexpMatchHard_32-4 20.4MB/s ± 3% 20.5MB/s ± 2% +0.56% (p=0.017 n=50+50)
RegexpMatchHard_1K-4 21.6MB/s ± 4% 21.8MB/s ± 3% +0.83% (p=0.008 n=50+50)
Revcomp-4 613MB/s ± 4% 618MB/s ± 7% ~ (p=0.152 n=48+50)
Template-4 30.2MB/s ± 4% 30.9MB/s ± 3% +2.49% (p=0.000 n=50+50)
[Geo mean] 127MB/s 128MB/s +0.64%
Change-Id: If405198283855d75697f66cf894b2bef458f620e
Reviewed-on: https://go-review.googlesource.com/135422
Reviewed-by: Keith Randall <khr@golang.org>
| -rw-r--r-- | src/cmd/compile/internal/amd64/ssa.go | 4 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 68 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 14 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 186 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 1621 | ||||
| -rw-r--r-- | test/codegen/bits.go | 6 |
6 files changed, 1852 insertions, 47 deletions
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 818bc35941..b4c4b1f4cd 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -695,6 +695,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, + ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: p := s.Prog(v.Op.Asm()) @@ -763,7 +764,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { } fallthrough case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, - ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: + ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, + ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: sc := v.AuxValAndOff() off := sc.Off() val := sc.Val() diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 803b8896b0..76a4fc9ab7 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -709,7 +709,17 @@ (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) +(BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ 1<<uint32(c)] x) +(AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ 1<<uint32(d)] x) +(BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x) (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x) +(BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x) +(XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x) +(BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x) +(OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x) +(OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x) +(BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x) +(BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x) (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x) (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) @@ -1051,14 +1061,14 @@ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) -((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> - ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) -((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> - ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) -((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) // Fold constants into stores. (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> @@ -1106,18 +1116,18 @@ ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) -((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) -((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) -((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) -((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) +((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) // generating indexed loads and stores (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -1424,6 +1434,12 @@ (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) +(BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))]) +(BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))]) +(BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))]) +(BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))]) +(BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))]) +(BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))]) // generic simplifications // TODO: more of this @@ -2304,11 +2320,11 @@ ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) -(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> - ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) -(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> - ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) // Merge ADDQconst and LEAQ into atomic loads. (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> @@ -2392,12 +2408,12 @@ (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) -(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) -(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) +(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) +(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) // float <-> int register moves, with no conversion. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 68facebe47..017c07071d 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -289,6 +289,20 @@ func init() { {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 + // direct bit operation on memory operand + {name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem + {name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem + {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 77b9875fd6..fe63633750 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -550,6 +550,18 @@ const ( OpAMD64BTRQconst OpAMD64BTSLconst OpAMD64BTSQconst + OpAMD64BTCQmodify + OpAMD64BTCLmodify + OpAMD64BTSQmodify + OpAMD64BTSLmodify + OpAMD64BTRQmodify + OpAMD64BTRLmodify + OpAMD64BTCQconstmodify + OpAMD64BTCLconstmodify + OpAMD64BTSQconstmodify + OpAMD64BTSLconstmodify + OpAMD64BTRQconstmodify + OpAMD64BTRLconstmodify OpAMD64TESTQ OpAMD64TESTL OpAMD64TESTW @@ -6902,6 +6914,180 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "BTCQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTCLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTCL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTSLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTSL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "BTRLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ABTRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { name: "TESTQ", argLen: 2, commutative: true, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 98b36a96a0..cd82a5642c 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -65,18 +65,46 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDQmodify_0(v) case OpAMD64BSFQ: return rewriteValueAMD64_OpAMD64BSFQ_0(v) + case OpAMD64BTCLconst: + return rewriteValueAMD64_OpAMD64BTCLconst_0(v) + case OpAMD64BTCLconstmodify: + return rewriteValueAMD64_OpAMD64BTCLconstmodify_0(v) + case OpAMD64BTCLmodify: + return rewriteValueAMD64_OpAMD64BTCLmodify_0(v) + case OpAMD64BTCQconst: + return rewriteValueAMD64_OpAMD64BTCQconst_0(v) + case OpAMD64BTCQconstmodify: + return rewriteValueAMD64_OpAMD64BTCQconstmodify_0(v) + case OpAMD64BTCQmodify: + return rewriteValueAMD64_OpAMD64BTCQmodify_0(v) case OpAMD64BTLconst: return rewriteValueAMD64_OpAMD64BTLconst_0(v) case OpAMD64BTQconst: return rewriteValueAMD64_OpAMD64BTQconst_0(v) case OpAMD64BTRLconst: return rewriteValueAMD64_OpAMD64BTRLconst_0(v) + case OpAMD64BTRLconstmodify: + return rewriteValueAMD64_OpAMD64BTRLconstmodify_0(v) + case OpAMD64BTRLmodify: + return rewriteValueAMD64_OpAMD64BTRLmodify_0(v) case OpAMD64BTRQconst: return rewriteValueAMD64_OpAMD64BTRQconst_0(v) + case OpAMD64BTRQconstmodify: + return rewriteValueAMD64_OpAMD64BTRQconstmodify_0(v) + case OpAMD64BTRQmodify: + return rewriteValueAMD64_OpAMD64BTRQmodify_0(v) case OpAMD64BTSLconst: return rewriteValueAMD64_OpAMD64BTSLconst_0(v) + case OpAMD64BTSLconstmodify: + return rewriteValueAMD64_OpAMD64BTSLconstmodify_0(v) + case OpAMD64BTSLmodify: + return rewriteValueAMD64_OpAMD64BTSLmodify_0(v) case OpAMD64BTSQconst: return rewriteValueAMD64_OpAMD64BTSQconst_0(v) + case OpAMD64BTSQconstmodify: + return rewriteValueAMD64_OpAMD64BTSQconstmodify_0(v) + case OpAMD64BTSQmodify: + return rewriteValueAMD64_OpAMD64BTSQmodify_0(v) case OpAMD64CMOVLCC: return rewriteValueAMD64_OpAMD64CMOVLCC_0(v) case OpAMD64CMOVLCS: @@ -278,7 +306,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVQloadidx8: return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) case OpAMD64MOVQstore: - return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) + return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) || rewriteValueAMD64_OpAMD64MOVQstore_30(v) case OpAMD64MOVQstoreconst: return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) case OpAMD64MOVQstoreconstidx1: @@ -3590,6 +3618,22 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDLconst [c] (BTRLconst [d] x)) + // cond: + // result: (ANDLconst [c &^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTRLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = c &^ 1 << uint32(d) + v.AddArg(x) + return true + } // match: (ANDLconst [ 0xFF] x) // cond: // result: (MOVBQZX x) @@ -4101,6 +4145,22 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDQconst [c] (BTRQconst [d] x)) + // cond: + // result: (ANDQconst [c &^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTRQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDQconst) + v.AuxInt = c &^ 1 << uint32(d) + v.AddArg(x) + return true + } // match: (ANDQconst [ 0xFF] x) // cond: // result: (MOVBQZX x) @@ -4429,6 +4489,320 @@ func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64BTCLconst_0(v *Value) bool { + // match: (BTCLconst [c] (XORLconst [d] x)) + // cond: + // result: (XORLconst [d ^ 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64XORLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = d ^ 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTCLconst [c] (BTCLconst [d] x)) + // cond: + // result: (XORLconst [1<<uint32(c) ^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTCLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = 1<<uint32(c) ^ 1<<uint32(d) + v.AddArg(x) + return true + } + // match: (BTCLconst [c] (MOVLconst [d])) + // cond: + // result: (MOVLconst [d^(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVLconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVLconst) + v.AuxInt = d ^ (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTCLconstmodify_0(v *Value) bool { + // match: (BTCLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTCLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTCLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTCLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTCLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTCLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTCLmodify_0(v *Value) bool { + // match: (BTCLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTCLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTCLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTCLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTCLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTCLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTCQconst_0(v *Value) bool { + // match: (BTCQconst [c] (XORQconst [d] x)) + // cond: + // result: (XORQconst [d ^ 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64XORQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORQconst) + v.AuxInt = d ^ 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTCQconst [c] (BTCQconst [d] x)) + // cond: + // result: (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTCQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORQconst) + v.AuxInt = 1<<uint32(c) ^ 1<<uint32(d) + v.AddArg(x) + return true + } + // match: (BTCQconst [c] (MOVQconst [d])) + // cond: + // result: (MOVQconst [d^(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVQconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVQconst) + v.AuxInt = d ^ (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTCQconstmodify_0(v *Value) bool { + // match: (BTCQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTCQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTCQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTCQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTCQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTCQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTCQmodify_0(v *Value) bool { + // match: (BTCQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTCQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTCQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTCQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTCQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTCQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64BTLconst_0(v *Value) bool { // match: (BTLconst [c] (SHRQconst [d] x)) // cond: (c+d)<64 @@ -4643,6 +5017,160 @@ func rewriteValueAMD64_OpAMD64BTRLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (BTRLconst [c] (ANDLconst [d] x)) + // cond: + // result: (ANDLconst [d &^ 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ANDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = d &^ 1 << uint32(c) + v.AddArg(x) + return true + } + // match: (BTRLconst [c] (BTRLconst [d] x)) + // cond: + // result: (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTRLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = ^(1<<uint32(c) | 1<<uint32(d)) + v.AddArg(x) + return true + } + // match: (BTRLconst [c] (MOVLconst [d])) + // cond: + // result: (MOVLconst [d&^(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVLconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVLconst) + v.AuxInt = d &^ (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTRLconstmodify_0(v *Value) bool { + // match: (BTRLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTRLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTRLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTRLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTRLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTRLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTRLmodify_0(v *Value) bool { + // match: (BTRLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTRLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTRLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTRLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTRLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTRLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64BTRQconst_0(v *Value) bool { @@ -4682,6 +5210,160 @@ func rewriteValueAMD64_OpAMD64BTRQconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (BTRQconst [c] (ANDQconst [d] x)) + // cond: + // result: (ANDQconst [d &^ 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ANDQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDQconst) + v.AuxInt = d &^ 1 << uint32(c) + v.AddArg(x) + return true + } + // match: (BTRQconst [c] (BTRQconst [d] x)) + // cond: + // result: (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTRQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDQconst) + v.AuxInt = ^(1<<uint32(c) | 1<<uint32(d)) + v.AddArg(x) + return true + } + // match: (BTRQconst [c] (MOVQconst [d])) + // cond: + // result: (MOVQconst [d&^(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVQconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVQconst) + v.AuxInt = d &^ (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTRQconstmodify_0(v *Value) bool { + // match: (BTRQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTRQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTRQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTRQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTRQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTRQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTRQmodify_0(v *Value) bool { + // match: (BTRQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTRQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTRQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTRQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTRQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTRQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64BTSLconst_0(v *Value) bool { @@ -4721,6 +5403,160 @@ func rewriteValueAMD64_OpAMD64BTSLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (BTSLconst [c] (ORLconst [d] x)) + // cond: + // result: (ORLconst [d | 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORLconst) + v.AuxInt = d | 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTSLconst [c] (BTSLconst [d] x)) + // cond: + // result: (ORLconst [1<<uint32(d) | 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTSLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORLconst) + v.AuxInt = 1<<uint32(d) | 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTSLconst [c] (MOVLconst [d])) + // cond: + // result: (MOVLconst [d|(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVLconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVLconst) + v.AuxInt = d | (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTSLconstmodify_0(v *Value) bool { + // match: (BTSLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTSLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTSLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTSLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTSLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTSLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTSLmodify_0(v *Value) bool { + // match: (BTSLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTSLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTSLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTSLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTSLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTSLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64BTSQconst_0(v *Value) bool { @@ -4760,6 +5596,160 @@ func rewriteValueAMD64_OpAMD64BTSQconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (BTSQconst [c] (ORQconst [d] x)) + // cond: + // result: (ORQconst [d | 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORQconst) + v.AuxInt = d | 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTSQconst [c] (BTSQconst [d] x)) + // cond: + // result: (ORQconst [1<<uint32(d) | 1<<uint32(c)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTSQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORQconst) + v.AuxInt = 1<<uint32(d) | 1<<uint32(c) + v.AddArg(x) + return true + } + // match: (BTSQconst [c] (MOVQconst [d])) + // cond: + // result: (MOVQconst [d|(1<<uint32(c))]) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVQconst { + break + } + d := v_0.AuxInt + v.reset(OpAMD64MOVQconst) + v.AuxInt = d | (1 << uint32(c)) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTSQconstmodify_0(v *Value) bool { + // match: (BTSQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (BTSQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64BTSQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (BTSQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (BTSQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTSQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64BTSQmodify_0(v *Value) bool { + // match: (BTSQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (BTSQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64BTSQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (BTSQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (BTSQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64BTSQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool { @@ -15876,8 +16866,140 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTCLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTCL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTCLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTRLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTRL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTRLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTSLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTSL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTSLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool { // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -15905,7 +17027,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ADDLconstmodify) @@ -15916,7 +17038,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { return true } // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -15944,7 +17066,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ANDLconstmodify) @@ -15955,7 +17077,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { return true } // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -15983,7 +17105,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ORLconstmodify) @@ -15993,11 +17115,8 @@ func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool { // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -16025,7 +17144,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64XORLconstmodify) @@ -16035,6 +17154,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVLstore [off] {sym} ptr a:(BTCLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTCLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTCLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTCLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVLstore [off] {sym} ptr a:(BTRLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTRLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTRLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTRLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVLstore [off] {sym} ptr a:(BTSLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTSLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTSLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTSLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) // cond: // result: (MOVSSstore [off] {sym} ptr val mem) @@ -18527,8 +19763,137 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTCQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTCQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTCQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTRQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTRQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTRQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (BTSQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64BTSQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64BTSQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -18556,7 +19921,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ADDQconstmodify) @@ -18567,7 +19932,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { return true } // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -18595,7 +19960,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ANDQconstmodify) @@ -18606,7 +19971,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { return true } // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -18634,7 +19999,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64ORQconstmodify) @@ -18645,7 +20010,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { return true } // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt @@ -18673,7 +20038,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { if mem != v.Args[2] { break } - if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { break } v.reset(OpAMD64XORQconstmodify) @@ -18683,6 +20048,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTCQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTCQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTRQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTRQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVQstore_30(v *Value) bool { + // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) + // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64BTSQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) { + break + } + v.reset(OpAMD64BTSQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) // cond: // result: (MOVSDstore [off] {sym} ptr val mem) @@ -33141,6 +34626,38 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ORLconst [c] (ORLconst [d] x)) + // cond: + // result: (ORLconst [c | d] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORLconst) + v.AuxInt = c | d + v.AddArg(x) + return true + } + // match: (ORLconst [c] (BTSLconst [d] x)) + // cond: + // result: (ORLconst [c | 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTSLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORLconst) + v.AuxInt = c | 1<<uint32(d) + v.AddArg(x) + return true + } // match: (ORLconst [c] x) // cond: int32(c)==0 // result: x @@ -44160,6 +45677,38 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ORQconst [c] (ORQconst [d] x)) + // cond: + // result: (ORQconst [c | d] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64ORQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORQconst) + v.AuxInt = c | d + v.AddArg(x) + return true + } + // match: (ORQconst [c] (BTSQconst [d] x)) + // cond: + // result: (ORQconst [c | 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTSQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ORQconst) + v.AuxInt = c | 1<<uint32(d) + v.AddArg(x) + return true + } // match: (ORQconst [0] x) // cond: // result: x @@ -54886,6 +56435,22 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool { v.AddArg(x) return true } + // match: (XORLconst [c] (BTCLconst [d] x)) + // cond: + // result: (XORLconst [c ^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTCLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = c ^ 1<<uint32(d) + v.AddArg(x) + return true + } // match: (XORLconst [c] x) // cond: int32(c)==0 // result: x @@ -55409,6 +56974,22 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (XORQconst [c] (BTCQconst [d] x)) + // cond: + // result: (XORQconst [c ^ 1<<uint32(d)] x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64BTCQconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64XORQconst) + v.AuxInt = c ^ 1<<uint32(d) + v.AddArg(x) + return true + } // match: (XORQconst [0] x) // cond: // result: x diff --git a/test/codegen/bits.go b/test/codegen/bits.go index e95e3f64cd..65d57c8f9f 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -270,6 +270,12 @@ func bitOpOnMem(a []uint32) { a[1] |= 220 // amd64:`XORL\s[$]240,\s8\([A-Z]+\)` a[2] ^= 240 + // amd64:`BTRL\s[$]15,\s12\([A-Z]+\)`,-`ANDL` + a[3] &= 0xffff7fff + // amd64:`BTSL\s[$]14,\s16\([A-Z]+\)`,-`ORL` + a[4] |= 0x4000 + // amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL` + a[5] ^= 0x2000 } // Check AND masking on arm64 (Issue #19857) |
