diff options
| author | Rick Hudson <rlh@golang.org> | 2016-04-27 18:19:16 -0400 |
|---|---|---|
| committer | Rick Hudson <rlh@golang.org> | 2016-04-27 18:46:52 -0400 |
| commit | 23aeb34df172b17b7bfaa85fb59ca64bef9073bb (patch) | |
| tree | a8ab866f1e50f0059856ce628f036d93ab620155 /src/cmd/compile/internal/ssa | |
| parent | 1354b32cd70f2702381764fd595dd2faa996840c (diff) | |
| parent | d3c79d324acd7300b6f705e66af8ca711af00d9f (diff) | |
| download | go-23aeb34df172b17b7bfaa85fb59ca64bef9073bb.tar.xz | |
[dev.garbage] Merge remote-tracking branch 'origin/master' into HEAD
Change-Id: I282fd9ce9db435dfd35e882a9502ab1abc185297
Diffstat (limited to 'src/cmd/compile/internal/ssa')
39 files changed, 5209 insertions, 4009 deletions
diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO index e081856bd3..dad4880994 100644 --- a/src/cmd/compile/internal/ssa/TODO +++ b/src/cmd/compile/internal/ssa/TODO @@ -41,8 +41,6 @@ Future/other ------------ - Start another architecture (arm?) - 64-bit ops on 32-bit machines -- Investigate type equality. During SSA generation, should we use n.Type or (say) TypeBool? - Should we get rid of named types in favor of underlying types during SSA generation? -- Should we introduce a new type equality routine that is less strict than the frontend's? - Infrastructure for enabling/disabling/configuring passes - Modify logging for at least pass=1, to be Warnl compatible diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go index 5a17735304..4a10606d3c 100644 --- a/src/cmd/compile/internal/ssa/check.go +++ b/src/cmd/compile/internal/ssa/check.go @@ -162,7 +162,7 @@ func checkFunc(f *Func) { // variable length args) nArgs := opcodeTable[v.Op].argLen if nArgs != -1 && int32(len(v.Args)) != nArgs { - f.Fatalf("value %v has %d args, expected %d", v.LongString(), + f.Fatalf("value %s has %d args, expected %d", v.LongString(), len(v.Args), nArgs) } @@ -193,6 +193,8 @@ func checkFunc(f *Func) { canHaveAuxInt = true case auxInt64, auxFloat64: canHaveAuxInt = true + case auxInt128: + // AuxInt must be zero, so leave canHaveAuxInt set to false. case auxFloat32: canHaveAuxInt = true if !isExactFloat32(v) { @@ -203,19 +205,25 @@ func checkFunc(f *Func) { case auxSymOff, auxSymValAndOff: canHaveAuxInt = true canHaveAux = true + case auxSymInt32: + if v.AuxInt != int64(int32(v.AuxInt)) { + f.Fatalf("bad int32 AuxInt value for %v", v) + } + canHaveAuxInt = true + canHaveAux = true default: f.Fatalf("unknown aux type for %s", v.Op) } if !canHaveAux && v.Aux != nil { - f.Fatalf("value %v has an Aux value %v but shouldn't", v.LongString(), v.Aux) + f.Fatalf("value %s has an Aux value %v but shouldn't", v.LongString(), v.Aux) } if !canHaveAuxInt && v.AuxInt != 0 { - f.Fatalf("value %v has an AuxInt value %d but shouldn't", v.LongString(), v.AuxInt) + f.Fatalf("value %s has an AuxInt value %d but shouldn't", v.LongString(), v.AuxInt) } for _, arg := range v.Args { if arg == nil { - f.Fatalf("value %v has nil arg", v.LongString()) + f.Fatalf("value %s has nil arg", v.LongString()) } } @@ -271,7 +279,7 @@ func checkFunc(f *Func) { for _, v := range b.Values { for i, a := range v.Args { if !valueMark[a.ID] { - f.Fatalf("%v, arg %d of %v, is missing", a, i, v) + f.Fatalf("%v, arg %d of %s, is missing", a, i, v.LongString()) } } } @@ -338,7 +346,7 @@ func checkFunc(f *Func) { // domCheck reports whether x dominates y (including x==y). func domCheck(f *Func, sdom sparseTree, x, y *Block) bool { - if !sdom.isAncestorEq(y, f.Entry) { + if !sdom.isAncestorEq(f.Entry, y) { // unreachable - ignore return true } diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index d52ae9c6da..bc9c830ee9 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -230,9 +230,10 @@ var passes = [...]pass{ {name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt {name: "short circuit", fn: shortcircuit}, {name: "decompose user", fn: decomposeUser, required: true}, - {name: "opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules - {name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values - {name: "opt deadcode", fn: deadcode}, // remove any blocks orphaned during opt + {name: "opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules + {name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values + {name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt + {name: "generic domtree", fn: domTree}, {name: "generic cse", fn: cse}, {name: "phiopt", fn: phiopt}, {name: "nilcheckelim", fn: nilcheckelim}, @@ -288,6 +289,12 @@ var passOrder = [...]constraint{ {"opt", "nilcheckelim"}, // tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET {"tighten", "lower"}, + // cse, phiopt, nilcheckelim, prove and loopbce share idom. + {"generic domtree", "generic cse"}, + {"generic domtree", "phiopt"}, + {"generic domtree", "nilcheckelim"}, + {"generic domtree", "prove"}, + {"generic domtree", "loopbce"}, // tighten will be most effective when as many values have been removed as possible {"generic deadcode", "tighten"}, {"generic cse", "tighten"}, diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 33357124fc..a60291ea53 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -103,6 +103,7 @@ type Frontend interface { SplitInterface(LocalSlot) (LocalSlot, LocalSlot) SplitSlice(LocalSlot) (LocalSlot, LocalSlot, LocalSlot) SplitComplex(LocalSlot) (LocalSlot, LocalSlot) + SplitStruct(LocalSlot, int) LocalSlot // Line returns a string describing the given line number. Line(int32) string @@ -115,6 +116,12 @@ type GCNode interface { String() string } +// GCSym is an interface that *gc.Sym implements. +// Using *gc.Sym directly would lead to import cycles. +type GCSym interface { + IsRuntimeCall(name string) bool +} + // NewConfig returns a new configuration object for the given architecture. func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config { c := &Config{arch: arch, fe: fe} diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go index 1ec5712be0..d501f75e02 100644 --- a/src/cmd/compile/internal/ssa/cse.go +++ b/src/cmd/compile/internal/ssa/cse.go @@ -108,7 +108,7 @@ func cse(f *Func) { break } } - if !equivalent || !v.Type.Equal(w.Type) { + if !equivalent || v.Type.Compare(w.Type) != CMPeq { // w is not equivalent to v. // move it to the end and shrink e. e[j], e[len(e)-1] = e[len(e)-1], e[j] @@ -131,33 +131,36 @@ func cse(f *Func) { } } - // Compute dominator tree - idom := dominators(f) - sdom := newSparseTree(f, idom) + // Dominator tree (f.sdom) is computed by the generic domtree pass. // Compute substitutions we would like to do. We substitute v for w // if v and w are in the same equivalence class and v dominates w. rewrite := make([]*Value, f.NumValues()) for _, e := range partition { - for len(e) > 1 { - // Find a maximal dominant element in e - v := e[0] - for _, w := range e[1:] { - if sdom.isAncestorEq(w.Block, v.Block) { - v = w - } + sort.Sort(sortbyentry{e, f.sdom}) + for i := 0; i < len(e)-1; i++ { + // e is sorted by entry value so maximal dominant element should be + // found first in the slice + v := e[i] + if v == nil { + continue } + e[i] = nil // Replace all elements of e which v dominates - for i := 0; i < len(e); { - w := e[i] - if w == v { - e, e[i] = e[:len(e)-1], e[len(e)-1] - } else if sdom.isAncestorEq(v.Block, w.Block) { + for j := i + 1; j < len(e); j++ { + w := e[j] + if w == nil { + continue + } + if f.sdom.isAncestorEq(v.Block, w.Block) { rewrite[w.ID] = v - e, e[i] = e[:len(e)-1], e[len(e)-1] + e[j] = nil } else { - i++ + // since the blocks are assorted in ascending order by entry number + // once we know that we don't dominate a block we can't dominate any + // 'later' block + break } } } @@ -255,6 +258,14 @@ func cmpVal(v, w *Value, auxIDs auxmap, depth int) Cmp { return lt2Cmp(v.Block.ID < w.Block.ID) } + switch v.Op { + case OpStaticCall, OpAMD64CALLstatic, OpARMCALLstatic: + sym := v.Aux.(GCSym) + if sym.IsRuntimeCall("newobject") { + return lt2Cmp(v.ID < w.ID) + } + } + if tc := v.Type.Compare(w.Type); tc != CMPeq { return tc } @@ -302,3 +313,16 @@ func (sv sortvalues) Less(i, j int) bool { // Sort by value ID last to keep the sort result deterministic. return v.ID < w.ID } + +type sortbyentry struct { + a []*Value // array of values + sdom sparseTree +} + +func (sv sortbyentry) Len() int { return len(sv.a) } +func (sv sortbyentry) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } +func (sv sortbyentry) Less(i, j int) bool { + v := sv.a[i] + w := sv.a[j] + return sv.sdom.maxdomorder(v.Block) < sv.sdom.maxdomorder(w.Block) +} diff --git a/src/cmd/compile/internal/ssa/cse_test.go b/src/cmd/compile/internal/ssa/cse_test.go index 905939fc32..d5be2b52ec 100644 --- a/src/cmd/compile/internal/ssa/cse_test.go +++ b/src/cmd/compile/internal/ssa/cse_test.go @@ -44,6 +44,7 @@ func TestCSEAuxPartitionBug(t *testing.T) { Exit("rstore"))) CheckFunc(fun.f) + domTree(fun.f) cse(fun.f) deadcode(fun.f) CheckFunc(fun.f) diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go index eab9974106..53116ba593 100644 --- a/src/cmd/compile/internal/ssa/decompose.go +++ b/src/cmd/compile/internal/ssa/decompose.go @@ -21,6 +21,7 @@ func decomposeBuiltIn(f *Func) { // NOTE: the component values we are making are dead at this point. // We must do the opt pass before any deadcode elimination or we will // lose the name->value correspondence. + var newNames []LocalSlot for _, name := range f.Names { t := name.Type switch { @@ -32,29 +33,31 @@ func decomposeBuiltIn(f *Func) { elemType = f.Config.fe.TypeFloat32() } rName, iName := f.Config.fe.SplitComplex(name) - f.Names = append(f.Names, rName, iName) + newNames = append(newNames, rName, iName) for _, v := range f.NamedValues[name] { r := v.Block.NewValue1(v.Line, OpComplexReal, elemType, v) i := v.Block.NewValue1(v.Line, OpComplexImag, elemType, v) f.NamedValues[rName] = append(f.NamedValues[rName], r) f.NamedValues[iName] = append(f.NamedValues[iName], i) } + delete(f.NamedValues, name) case t.IsString(): ptrType := f.Config.fe.TypeBytePtr() lenType := f.Config.fe.TypeInt() ptrName, lenName := f.Config.fe.SplitString(name) - f.Names = append(f.Names, ptrName, lenName) + newNames = append(newNames, ptrName, lenName) for _, v := range f.NamedValues[name] { ptr := v.Block.NewValue1(v.Line, OpStringPtr, ptrType, v) len := v.Block.NewValue1(v.Line, OpStringLen, lenType, v) f.NamedValues[ptrName] = append(f.NamedValues[ptrName], ptr) f.NamedValues[lenName] = append(f.NamedValues[lenName], len) } + delete(f.NamedValues, name) case t.IsSlice(): ptrType := f.Config.fe.TypeBytePtr() lenType := f.Config.fe.TypeInt() ptrName, lenName, capName := f.Config.fe.SplitSlice(name) - f.Names = append(f.Names, ptrName, lenName, capName) + newNames = append(newNames, ptrName, lenName, capName) for _, v := range f.NamedValues[name] { ptr := v.Block.NewValue1(v.Line, OpSlicePtr, ptrType, v) len := v.Block.NewValue1(v.Line, OpSliceLen, lenType, v) @@ -63,20 +66,25 @@ func decomposeBuiltIn(f *Func) { f.NamedValues[lenName] = append(f.NamedValues[lenName], len) f.NamedValues[capName] = append(f.NamedValues[capName], cap) } + delete(f.NamedValues, name) case t.IsInterface(): ptrType := f.Config.fe.TypeBytePtr() typeName, dataName := f.Config.fe.SplitInterface(name) - f.Names = append(f.Names, typeName, dataName) + newNames = append(newNames, typeName, dataName) for _, v := range f.NamedValues[name] { typ := v.Block.NewValue1(v.Line, OpITab, ptrType, v) data := v.Block.NewValue1(v.Line, OpIData, ptrType, v) f.NamedValues[typeName] = append(f.NamedValues[typeName], typ) f.NamedValues[dataName] = append(f.NamedValues[dataName], data) } + delete(f.NamedValues, name) case t.Size() > f.Config.IntSize: - f.Unimplementedf("undecomposed named type %s", t) + f.Unimplementedf("undecomposed named type %s %s", name, t) + default: + newNames = append(newNames, name) } } + f.Names = newNames } func decomposeBuiltInPhi(v *Value) { @@ -181,25 +189,32 @@ func decomposeUser(f *Func) { // We must do the opt pass before any deadcode elimination or we will // lose the name->value correspondence. i := 0 + var fnames []LocalSlot + var newNames []LocalSlot for _, name := range f.Names { t := name.Type switch { case t.IsStruct(): n := t.NumFields() + fnames = fnames[:0] + for i := 0; i < n; i++ { + fnames = append(fnames, f.Config.fe.SplitStruct(name, i)) + } for _, v := range f.NamedValues[name] { for i := 0; i < n; i++ { - fname := LocalSlot{name.N, t.FieldType(i), name.Off + t.FieldOff(i)} // TODO: use actual field name? x := v.Block.NewValue1I(v.Line, OpStructSelect, t.FieldType(i), int64(i), v) - f.NamedValues[fname] = append(f.NamedValues[fname], x) + f.NamedValues[fnames[i]] = append(f.NamedValues[fnames[i]], x) } } delete(f.NamedValues, name) + newNames = append(newNames, fnames...) default: f.Names[i] = name i++ } } f.Names = f.Names[:i] + f.Names = append(f.Names, newNames...) } func decomposeUserPhi(v *Value) { diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go index 0fffcdc2af..c0a4bb4188 100644 --- a/src/cmd/compile/internal/ssa/dom.go +++ b/src/cmd/compile/internal/ssa/dom.go @@ -5,11 +5,13 @@ package ssa // mark values +type markKind uint8 + const ( - notFound = 0 // block has not been discovered yet - notExplored = 1 // discovered and in queue, outedges not processed yet - explored = 2 // discovered and in queue, outedges processed - done = 3 // all done, in output ordering + notFound markKind = 0 // block has not been discovered yet + notExplored markKind = 1 // discovered and in queue, outedges not processed yet + explored markKind = 2 // discovered and in queue, outedges processed + done markKind = 3 // all done, in output ordering ) // This file contains code to compute the dominator tree @@ -18,7 +20,10 @@ const ( // postorder computes a postorder traversal ordering for the // basic blocks in f. Unreachable blocks will not appear. func postorder(f *Func) []*Block { - mark := make([]byte, f.NumBlocks()) + return postorderWithNumbering(f, []int{}) +} +func postorderWithNumbering(f *Func, ponums []int) []*Block { + mark := make([]markKind, f.NumBlocks()) // result ordering var order []*Block @@ -34,6 +39,9 @@ func postorder(f *Func) []*Block { // Children have all been visited. Pop & output block. s = s[:len(s)-1] mark[b.ID] = done + if len(ponums) > 0 { + ponums[b.ID] = len(order) + } order = append(order, b) case notExplored: // Children have not been visited yet. Mark as explored @@ -54,14 +62,14 @@ func postorder(f *Func) []*Block { type linkedBlocks func(*Block) []*Block -const nscratchslices = 8 +const nscratchslices = 7 // experimentally, functions with 512 or fewer blocks account // for 75% of memory (size) allocation for dominator computation // in make.bash. const minscratchblocks = 512 -func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g, h []ID) { +func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g []ID) { tot := maxBlockID * nscratchslices scratch := cfg.domblockstore if len(scratch) < tot { @@ -88,213 +96,143 @@ func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g, h [ e = scratch[4*maxBlockID : 5*maxBlockID] f = scratch[5*maxBlockID : 6*maxBlockID] g = scratch[6*maxBlockID : 7*maxBlockID] - h = scratch[7*maxBlockID : 8*maxBlockID] return } -// dfs performs a depth first search over the blocks starting at the set of -// blocks in the entries list (in arbitrary order). dfnum contains a mapping -// from block id to an int indicating the order the block was reached or -// notFound if the block was not reached. order contains a mapping from dfnum -// to block. -func (f *Func) dfs(entries []*Block, succFn linkedBlocks, dfnum, order, parent []ID) (fromID []*Block) { - maxBlockID := entries[0].Func.NumBlocks() - - fromID = make([]*Block, maxBlockID) - - for _, entry := range entries[0].Func.Blocks { - eid := entry.ID - if fromID[eid] != nil { - panic("Colliding entry IDs") - } - fromID[eid] = entry - } - - n := ID(0) - s := make([]*Block, 0, 256) - for _, entry := range entries { - if dfnum[entry.ID] != notFound { - continue // already found from a previous entry - } - s = append(s, entry) - parent[entry.ID] = entry.ID - for len(s) > 0 { - node := s[len(s)-1] - s = s[:len(s)-1] - - n++ - for _, w := range succFn(node) { - // if it has a dfnum, we've already visited it - if dfnum[w.ID] == notFound { - s = append(s, w) - parent[w.ID] = node.ID - dfnum[w.ID] = notExplored - } - } - dfnum[node.ID] = n - order[n] = node.ID - } - } - - return -} - -// dominators computes the dominator tree for f. It returns a slice -// which maps block ID to the immediate dominator of that block. -// Unreachable blocks map to nil. The entry block maps to nil. func dominators(f *Func) []*Block { preds := func(b *Block) []*Block { return b.Preds } succs := func(b *Block) []*Block { return b.Succs } //TODO: benchmark and try to find criteria for swapping between // dominatorsSimple and dominatorsLT - return f.dominatorsLT([]*Block{f.Entry}, preds, succs) + return f.dominatorsLTOrig(f.Entry, preds, succs) } -// postDominators computes the post-dominator tree for f. -func postDominators(f *Func) []*Block { - preds := func(b *Block) []*Block { return b.Preds } - succs := func(b *Block) []*Block { return b.Succs } - - if len(f.Blocks) == 0 { - return nil - } - - // find the exit blocks - var exits []*Block - for _, b := range f.Blocks { - switch b.Kind { - case BlockExit, BlockRet, BlockRetJmp, BlockCall, BlockCheck: - exits = append(exits, b) - } - } - - // infinite loop with no exit - if exits == nil { - return make([]*Block, f.NumBlocks()) - } - return f.dominatorsLT(exits, succs, preds) -} - -// dominatorsLt runs Lengauer-Tarjan to compute a dominator tree starting at +// dominatorsLTOrig runs Lengauer-Tarjan to compute a dominator tree starting at // entry and using predFn/succFn to find predecessors/successors to allow // computing both dominator and post-dominator trees. -func (f *Func) dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block { - // Based on Lengauer-Tarjan from Modern Compiler Implementation in C - - // Appel with optimizations from Finding Dominators in Practice - - // Georgiadis +func (f *Func) dominatorsLTOrig(entry *Block, predFn linkedBlocks, succFn linkedBlocks) []*Block { + // Adapted directly from the original TOPLAS article's "simple" algorithm - maxBlockID := entries[0].Func.NumBlocks() + maxBlockID := entry.Func.NumBlocks() + semi, vertex, label, parent, ancestor, bucketHead, bucketLink := f.Config.scratchBlocksForDom(maxBlockID) - dfnum, vertex, parent, semi, samedom, ancestor, best, bucket := f.Config.scratchBlocksForDom(maxBlockID) - - // dfnum := make([]ID, maxBlockID) // conceptually int32, but punning for allocation purposes. - // vertex := make([]ID, maxBlockID) - // parent := make([]ID, maxBlockID) - - // semi := make([]ID, maxBlockID) - // samedom := make([]ID, maxBlockID) - // ancestor := make([]ID, maxBlockID) - // best := make([]ID, maxBlockID) - // bucket := make([]ID, maxBlockID) + // This version uses integers for most of the computation, + // to make the work arrays smaller and pointer-free. + // fromID translates from ID to *Block where that is needed. + fromID := make([]*Block, maxBlockID) + for _, v := range f.Blocks { + fromID[v.ID] = v + } + idom := make([]*Block, maxBlockID) // Step 1. Carry out a depth first search of the problem graph. Number // the vertices from 1 to n as they are reached during the search. - fromID := f.dfs(entries, succFn, dfnum, vertex, parent) + n := f.dfsOrig(entry, succFn, semi, vertex, label, parent) - idom := make([]*Block, maxBlockID) - - // Step 2. Compute the semidominators of all vertices by applying - // Theorem 4. Carry out the computation vertex by vertex in decreasing - // order by number. - for i := maxBlockID - 1; i > 0; i-- { + for i := n; i >= 2; i-- { w := vertex[i] - if w == 0 { - continue - } - - if dfnum[w] == notFound { - // skip unreachable node - continue - } - - // Step 3. Implicitly define the immediate dominator of each - // vertex by applying Corollary 1. (reordered) - for v := bucket[w]; v != 0; v = bucket[v] { - u := eval(v, ancestor, semi, dfnum, best) - if semi[u] == semi[v] { - idom[v] = fromID[w] // true dominator - } else { - samedom[v] = u // v has same dominator as u - } - } - - p := parent[w] - s := p // semidominator - var sp ID - // calculate the semidominator of w + // step2 in TOPLAS paper for _, v := range predFn(fromID[w]) { - if dfnum[v.ID] == notFound { + if semi[v.ID] == 0 { // skip unreachable predecessor + // not in original, but we're using existing pred instead of building one. continue } - - if dfnum[v.ID] <= dfnum[w] { - sp = v.ID - } else { - sp = semi[eval(v.ID, ancestor, semi, dfnum, best)] - } - - if dfnum[sp] < dfnum[s] { - s = sp + u := evalOrig(v.ID, ancestor, semi, label) + if semi[u] < semi[w] { + semi[w] = semi[u] } } - // link - ancestor[w] = p - best[w] = w + // add w to bucket[vertex[semi[w]]] + // implement bucket as a linked list implemented + // in a pair of arrays. + vsw := vertex[semi[w]] + bucketLink[w] = bucketHead[vsw] + bucketHead[vsw] = w + + linkOrig(parent[w], w, ancestor) - semi[w] = s - if semi[s] != parent[s] { - bucket[w] = bucket[s] - bucket[s] = w + // step3 in TOPLAS paper + for v := bucketHead[parent[w]]; v != 0; v = bucketLink[v] { + u := evalOrig(v, ancestor, semi, label) + if semi[u] < semi[v] { + idom[v] = fromID[u] + } else { + idom[v] = fromID[parent[w]] + } } } - - // Final pass of step 3 - for v := bucket[0]; v != 0; v = bucket[v] { - idom[v] = fromID[bucket[0]] + // step 4 in toplas paper + for i := ID(2); i <= n; i++ { + w := vertex[i] + if idom[w].ID != vertex[semi[w]] { + idom[w] = idom[idom[w].ID] + } } - // Step 4. Explicitly define the immediate dominator of each vertex, - // carrying out the computation vertex by vertex in increasing order by - // number. - for i := 1; i < maxBlockID-1; i++ { - w := vertex[i] - if w == 0 { - continue + return idom +} + +// dfs performs a depth first search over the blocks starting at entry block +// (in arbitrary order). This is a de-recursed version of dfs from the +// original Tarjan-Lengauer TOPLAS article. It's important to return the +// same values for parent as the original algorithm. +func (f *Func) dfsOrig(entry *Block, succFn linkedBlocks, semi, vertex, label, parent []ID) ID { + n := ID(0) + s := make([]*Block, 0, 256) + s = append(s, entry) + + for len(s) > 0 { + v := s[len(s)-1] + s = s[:len(s)-1] + // recursing on v + + if semi[v.ID] != 0 { + continue // already visited } - // w has the same dominator as samedom[w] - if samedom[w] != 0 { - idom[w] = idom[samedom[w]] + n++ + semi[v.ID] = n + vertex[n] = v.ID + label[v.ID] = v.ID + // ancestor[v] already zero + for _, w := range succFn(v) { + // if it has a dfnum, we've already visited it + if semi[w.ID] == 0 { + // yes, w can be pushed multiple times. + s = append(s, w) + parent[w.ID] = v.ID // keep overwriting this till it is visited. + } } } - return idom + return n } -// eval function from LT paper with path compression -func eval(v ID, ancestor []ID, semi []ID, dfnum []ID, best []ID) ID { - a := ancestor[v] - if ancestor[a] != 0 { - bid := eval(a, ancestor, semi, dfnum, best) - ancestor[v] = ancestor[a] - if dfnum[semi[bid]] < dfnum[semi[best[v]]] { - best[v] = bid +// compressOrig is the "simple" compress function from LT paper +func compressOrig(v ID, ancestor, semi, label []ID) { + if ancestor[ancestor[v]] != 0 { + compressOrig(ancestor[v], ancestor, semi, label) + if semi[label[ancestor[v]]] < semi[label[v]] { + label[v] = label[ancestor[v]] } + ancestor[v] = ancestor[ancestor[v]] + } +} + +// evalOrig is the "simple" eval function from LT paper +func evalOrig(v ID, ancestor, semi, label []ID) ID { + if ancestor[v] == 0 { + return v } - return best[v] + compressOrig(v, ancestor, semi, label) + return label[v] +} + +func linkOrig(v, w ID, ancestor []ID) { + ancestor[w] = v } // dominators computes the dominator tree for f. It returns a slice @@ -364,3 +302,9 @@ func intersect(b, c *Block, postnum []int, idom []*Block) *Block { } return b } + +// build immediate dominators. +func domTree(f *Func) { + f.idom = dominators(f) + f.sdom = newSparseTree(f, f.idom) +} diff --git a/src/cmd/compile/internal/ssa/dom_test.go b/src/cmd/compile/internal/ssa/dom_test.go index 9741edf331..6ecbe923d4 100644 --- a/src/cmd/compile/internal/ssa/dom_test.go +++ b/src/cmd/compile/internal/ssa/dom_test.go @@ -372,32 +372,6 @@ func TestDominatorsMultPred(t *testing.T) { verifyDominators(t, fun, dominatorsSimple, doms) } -func TestPostDominators(t *testing.T) { - c := testConfig(t) - fun := Fun(c, "entry", - Bloc("entry", - Valu("mem", OpInitMem, TypeMem, 0, nil), - Valu("p", OpConstBool, TypeBool, 1, nil), - If("p", "a", "c")), - Bloc("a", - If("p", "b", "c")), - Bloc("b", - Goto("c")), - Bloc("c", - If("p", "b", "exit")), - Bloc("exit", - Exit("mem"))) - - doms := map[string]string{"entry": "c", - "a": "c", - "b": "c", - "c": "exit", - } - - CheckFunc(fun.f) - verifyDominators(t, fun, postDominators, doms) -} - func TestInfiniteLoop(t *testing.T) { c := testConfig(t) // note lack of an exit block @@ -415,8 +389,184 @@ func TestInfiniteLoop(t *testing.T) { doms := map[string]string{"a": "entry", "b": "a"} verifyDominators(t, fun, dominators, doms) +} - // no exit block, so there are no post-dominators - postDoms := map[string]string{} - verifyDominators(t, fun, postDominators, postDoms) +func TestDomTricky(t *testing.T) { + doms := map[string]string{ + "4": "1", + "2": "4", + "5": "4", + "11": "4", + "15": "4", // the incorrect answer is "5" + "10": "15", + "19": "15", + } + + if4 := [2]string{"2", "5"} + if5 := [2]string{"15", "11"} + if15 := [2]string{"19", "10"} + + for i := 0; i < 8; i++ { + a := 1 & i + b := 1 & i >> 1 + c := 1 & i >> 2 + + fun := Fun(testConfig(t), "1", + Bloc("1", + Valu("mem", OpInitMem, TypeMem, 0, nil), + Valu("p", OpConstBool, TypeBool, 1, nil), + Goto("4")), + Bloc("2", + Goto("11")), + Bloc("4", + If("p", if4[a], if4[1-a])), // 2, 5 + Bloc("5", + If("p", if5[b], if5[1-b])), //15, 11 + Bloc("10", + Exit("mem")), + Bloc("11", + Goto("15")), + Bloc("15", + If("p", if15[c], if15[1-c])), //19, 10 + Bloc("19", + Goto("10"))) + CheckFunc(fun.f) + verifyDominators(t, fun, dominators, doms) + verifyDominators(t, fun, dominatorsSimple, doms) + } +} + +// generateDominatorMap uses dominatorsSimple to obtain a +// reference dominator tree for testing faster algorithms. +func generateDominatorMap(fut fun) map[string]string { + blockNames := map[*Block]string{} + for n, b := range fut.blocks { + blockNames[b] = n + } + referenceDom := dominatorsSimple(fut.f) + doms := make(map[string]string) + for _, b := range fut.f.Blocks { + if d := referenceDom[b.ID]; d != nil { + doms[blockNames[b]] = blockNames[d] + } + } + return doms +} + +func TestDominatorsPostTricky(t *testing.T) { + c := testConfig(t) + fun := Fun(c, "b1", + Bloc("b1", + Valu("mem", OpInitMem, TypeMem, 0, nil), + Valu("p", OpConstBool, TypeBool, 1, nil), + If("p", "b3", "b2")), + Bloc("b3", + If("p", "b5", "b6")), + Bloc("b5", + Goto("b7")), + Bloc("b7", + If("p", "b8", "b11")), + Bloc("b8", + Goto("b13")), + Bloc("b13", + If("p", "b14", "b15")), + Bloc("b14", + Goto("b10")), + Bloc("b15", + Goto("b16")), + Bloc("b16", + Goto("b9")), + Bloc("b9", + Goto("b7")), + Bloc("b11", + Goto("b12")), + Bloc("b12", + If("p", "b10", "b8")), + Bloc("b10", + Goto("b6")), + Bloc("b6", + Goto("b17")), + Bloc("b17", + Goto("b18")), + Bloc("b18", + If("p", "b22", "b19")), + Bloc("b22", + Goto("b23")), + Bloc("b23", + If("p", "b21", "b19")), + Bloc("b19", + If("p", "b24", "b25")), + Bloc("b24", + Goto("b26")), + Bloc("b26", + Goto("b25")), + Bloc("b25", + If("p", "b27", "b29")), + Bloc("b27", + Goto("b30")), + Bloc("b30", + Goto("b28")), + Bloc("b29", + Goto("b31")), + Bloc("b31", + Goto("b28")), + Bloc("b28", + If("p", "b32", "b33")), + Bloc("b32", + Goto("b21")), + Bloc("b21", + Goto("b47")), + Bloc("b47", + If("p", "b45", "b46")), + Bloc("b45", + Goto("b48")), + Bloc("b48", + Goto("b49")), + Bloc("b49", + If("p", "b50", "b51")), + Bloc("b50", + Goto("b52")), + Bloc("b52", + Goto("b53")), + Bloc("b53", + Goto("b51")), + Bloc("b51", + Goto("b54")), + Bloc("b54", + Goto("b46")), + Bloc("b46", + Exit("mem")), + Bloc("b33", + Goto("b34")), + Bloc("b34", + Goto("b37")), + Bloc("b37", + If("p", "b35", "b36")), + Bloc("b35", + Goto("b38")), + Bloc("b38", + Goto("b39")), + Bloc("b39", + If("p", "b40", "b41")), + Bloc("b40", + Goto("b42")), + Bloc("b42", + Goto("b43")), + Bloc("b43", + Goto("b41")), + Bloc("b41", + Goto("b44")), + Bloc("b44", + Goto("b36")), + Bloc("b36", + Goto("b20")), + Bloc("b20", + Goto("b18")), + Bloc("b2", + Goto("b4")), + Bloc("b4", + Exit("mem"))) + CheckFunc(fun.f) + doms := generateDominatorMap(fun) + verifyDominators(t, fun, dominators, doms) } diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go index ce577ef055..0a67de9f05 100644 --- a/src/cmd/compile/internal/ssa/export_test.go +++ b/src/cmd/compile/internal/ssa/export_test.go @@ -48,6 +48,9 @@ func (d DummyFrontend) SplitComplex(s LocalSlot) (LocalSlot, LocalSlot) { } return LocalSlot{s.N, d.TypeFloat32(), s.Off}, LocalSlot{s.N, d.TypeFloat32(), s.Off + 4} } +func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot { + return LocalSlot{s.N, s.Type.FieldType(i), s.Off + s.Type.FieldOff(i)} +} func (DummyFrontend) Line(line int32) string { return "unknown.go:0" } diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 6e47b7f19c..11ff8d3792 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -36,6 +36,9 @@ type Func struct { freeValues *Value // free Values linked by argstorage[0]. All other fields except ID are 0/nil. freeBlocks *Block // free Blocks linked by succstorage[0]. All other fields except ID are 0/nil. + idom []*Block // precomputed immediate dominators + sdom sparseTree // precomputed dominator tree + constants map[int64][]*Value // constants cache, keyed by constant value; users must check value's Op and Type } @@ -284,7 +287,10 @@ func (b *Block) NewValue2I(line int32, op Op, t Type, auxint int64, arg0, arg1 * func (b *Block) NewValue3(line int32, op Op, t Type, arg0, arg1, arg2 *Value) *Value { v := b.Func.newValue(op, t, b, line) v.AuxInt = 0 - v.Args = []*Value{arg0, arg1, arg2} + v.Args = v.argstorage[:3] + v.argstorage[0] = arg0 + v.argstorage[1] = arg1 + v.argstorage[2] = arg2 arg0.Uses++ arg1.Uses++ arg2.Uses++ @@ -295,7 +301,10 @@ func (b *Block) NewValue3(line int32, op Op, t Type, arg0, arg1, arg2 *Value) *V func (b *Block) NewValue3I(line int32, op Op, t Type, auxint int64, arg0, arg1, arg2 *Value) *Value { v := b.Func.newValue(op, t, b, line) v.AuxInt = auxint - v.Args = []*Value{arg0, arg1, arg2} + v.Args = v.argstorage[:3] + v.argstorage[0] = arg0 + v.argstorage[1] = arg1 + v.argstorage[2] = arg2 arg0.Uses++ arg1.Uses++ arg2.Uses++ @@ -309,7 +318,7 @@ func (f *Func) constVal(line int32, op Op, t Type, c int64, setAux bool) *Value } vv := f.constants[c] for _, v := range vv { - if v.Op == op && v.Type.Equal(t) { + if v.Op == op && v.Type.Compare(t) == CMPeq { if setAux && v.AuxInt != c { panic(fmt.Sprintf("cached const %s should have AuxInt of %d", v.LongString(), c)) } diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 4ad0f883b0..86123ac5c5 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -6,23 +6,23 @@ (Add64 x y) -> (ADDQ x y) (AddPtr x y) -> (ADDQ x y) (Add32 x y) -> (ADDL x y) -(Add16 x y) -> (ADDW x y) -(Add8 x y) -> (ADDB x y) +(Add16 x y) -> (ADDL x y) +(Add8 x y) -> (ADDL x y) (Add32F x y) -> (ADDSS x y) (Add64F x y) -> (ADDSD x y) (Sub64 x y) -> (SUBQ x y) (SubPtr x y) -> (SUBQ x y) (Sub32 x y) -> (SUBL x y) -(Sub16 x y) -> (SUBW x y) -(Sub8 x y) -> (SUBB x y) +(Sub16 x y) -> (SUBL x y) +(Sub8 x y) -> (SUBL x y) (Sub32F x y) -> (SUBSS x y) (Sub64F x y) -> (SUBSD x y) (Mul64 x y) -> (MULQ x y) (Mul32 x y) -> (MULL x y) -(Mul16 x y) -> (MULW x y) -(Mul8 x y) -> (MULB x y) +(Mul16 x y) -> (MULL x y) +(Mul8 x y) -> (MULL x y) (Mul32F x y) -> (MULSS x y) (Mul64F x y) -> (MULSD x y) @@ -60,30 +60,30 @@ (And64 x y) -> (ANDQ x y) (And32 x y) -> (ANDL x y) -(And16 x y) -> (ANDW x y) -(And8 x y) -> (ANDB x y) +(And16 x y) -> (ANDL x y) +(And8 x y) -> (ANDL x y) (Or64 x y) -> (ORQ x y) (Or32 x y) -> (ORL x y) -(Or16 x y) -> (ORW x y) -(Or8 x y) -> (ORB x y) +(Or16 x y) -> (ORL x y) +(Or8 x y) -> (ORL x y) (Xor64 x y) -> (XORQ x y) (Xor32 x y) -> (XORL x y) -(Xor16 x y) -> (XORW x y) -(Xor8 x y) -> (XORB x y) +(Xor16 x y) -> (XORL x y) +(Xor8 x y) -> (XORL x y) (Neg64 x) -> (NEGQ x) (Neg32 x) -> (NEGL x) -(Neg16 x) -> (NEGW x) -(Neg8 x) -> (NEGB x) +(Neg16 x) -> (NEGL x) +(Neg8 x) -> (NEGL x) (Neg32F x) -> (PXOR x (MOVSSconst <config.Frontend().TypeFloat32()> [f2i(math.Copysign(0, -1))])) (Neg64F x) -> (PXOR x (MOVSDconst <config.Frontend().TypeFloat64()> [f2i(math.Copysign(0, -1))])) (Com64 x) -> (NOTQ x) (Com32 x) -> (NOTL x) -(Com16 x) -> (NOTW x) -(Com8 x) -> (NOTB x) +(Com16 x) -> (NOTL x) +(Com8 x) -> (NOTL x) // CMPQconst 0 below is redundant because BSF sets Z but how to remove? (Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64]) @@ -169,15 +169,15 @@ (Lsh32x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) (Lsh32x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) -(Lsh16x64 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) -(Lsh16x32 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) -(Lsh16x16 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) -(Lsh16x8 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) +(Lsh16x64 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) +(Lsh16x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) +(Lsh16x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) +(Lsh16x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) -(Lsh8x64 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) -(Lsh8x32 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) -(Lsh8x16 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) -(Lsh8x8 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) +(Lsh8x64 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) +(Lsh8x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) +(Lsh8x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) +(Lsh8x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) (Lrot64 <t> x [c]) -> (ROLQconst <t> [c&63] x) (Lrot32 <t> x [c]) -> (ROLLconst <t> [c&31] x) @@ -194,38 +194,38 @@ (Rsh32Ux16 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) (Rsh32Ux8 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) -(Rsh16Ux64 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) -(Rsh16Ux32 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) -(Rsh16Ux16 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) -(Rsh16Ux8 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) +(Rsh16Ux64 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) +(Rsh16Ux32 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) +(Rsh16Ux16 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) +(Rsh16Ux8 <t> x y) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) -(Rsh8Ux64 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) -(Rsh8Ux32 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) -(Rsh8Ux16 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) -(Rsh8Ux8 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) +(Rsh8Ux64 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) +(Rsh8Ux32 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) +(Rsh8Ux16 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) +(Rsh8Ux8 <t> x y) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. // Note: for small shift widths we generate 32 bits of mask even when we don't need it all. (Rsh64x64 <t> x y) -> (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64]))))) (Rsh64x32 <t> x y) -> (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64]))))) -(Rsh64x16 <t> x y) -> (SARQ <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64]))))) -(Rsh64x8 <t> x y) -> (SARQ <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64]))))) +(Rsh64x16 <t> x y) -> (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64]))))) +(Rsh64x8 <t> x y) -> (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64]))))) (Rsh32x64 <t> x y) -> (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32]))))) (Rsh32x32 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32]))))) -(Rsh32x16 <t> x y) -> (SARL <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32]))))) -(Rsh32x8 <t> x y) -> (SARL <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32]))))) +(Rsh32x16 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32]))))) +(Rsh32x8 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32]))))) (Rsh16x64 <t> x y) -> (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16]))))) (Rsh16x32 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16]))))) -(Rsh16x16 <t> x y) -> (SARW <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16]))))) -(Rsh16x8 <t> x y) -> (SARW <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16]))))) +(Rsh16x16 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16]))))) +(Rsh16x8 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16]))))) (Rsh8x64 <t> x y) -> (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8]))))) (Rsh8x32 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8]))))) -(Rsh8x16 <t> x y) -> (SARB <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8]))))) -(Rsh8x8 <t> x y) -> (SARB <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8]))))) +(Rsh8x16 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8]))))) +(Rsh8x8 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8]))))) (Less64 x y) -> (SETL (CMPQ x y)) (Less32 x y) -> (SETL (CMPL x y)) @@ -281,6 +281,7 @@ (Eq32 x y) -> (SETEQ (CMPL x y)) (Eq16 x y) -> (SETEQ (CMPW x y)) (Eq8 x y) -> (SETEQ (CMPB x y)) +(EqB x y) -> (SETEQ (CMPB x y)) (EqPtr x y) -> (SETEQ (CMPQ x y)) (Eq64F x y) -> (SETEQF (UCOMISD x y)) (Eq32F x y) -> (SETEQF (UCOMISS x y)) @@ -289,6 +290,7 @@ (Neq32 x y) -> (SETNE (CMPL x y)) (Neq16 x y) -> (SETNE (CMPW x y)) (Neq8 x y) -> (SETNE (CMPB x y)) +(NeqB x y) -> (SETNE (CMPB x y)) (NeqPtr x y) -> (SETNE (CMPQ x y)) (Neq64F x y) -> (SETNEF (UCOMISD x y)) (Neq32F x y) -> (SETNEF (UCOMISS x y)) @@ -366,19 +368,21 @@ (Move [size] dst src mem) && (size > 16*64 || config.noDuffDevice) && size%8 == 0 -> (REPMOVSQ dst src (MOVQconst [size/8]) mem) -(Not x) -> (XORBconst [1] x) +(AndB x y) -> (ANDL x y) +(OrB x y) -> (ORL x y) +(Not x) -> (XORLconst [1] x) (OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr) (OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr) -(Const8 [val]) -> (MOVBconst [val]) -(Const16 [val]) -> (MOVWconst [val]) +(Const8 [val]) -> (MOVLconst [val]) +(Const16 [val]) -> (MOVLconst [val]) (Const32 [val]) -> (MOVLconst [val]) (Const64 [val]) -> (MOVQconst [val]) (Const32F [val]) -> (MOVSSconst [val]) (Const64F [val]) -> (MOVSDconst [val]) (ConstNil) -> (MOVQconst [0]) -(ConstBool [b]) -> (MOVBconst [b]) +(ConstBool [b]) -> (MOVLconst [b]) (Addr {sym} base) -> (LEAQ {sym} base) @@ -404,22 +408,22 @@ (If cond yes no) -> (NE (TESTB cond cond) yes no) -(NE (TESTB (SETL cmp)) yes no) -> (LT cmp yes no) -(NE (TESTB (SETLE cmp)) yes no) -> (LE cmp yes no) -(NE (TESTB (SETG cmp)) yes no) -> (GT cmp yes no) -(NE (TESTB (SETGE cmp)) yes no) -> (GE cmp yes no) -(NE (TESTB (SETEQ cmp)) yes no) -> (EQ cmp yes no) -(NE (TESTB (SETNE cmp)) yes no) -> (NE cmp yes no) -(NE (TESTB (SETB cmp)) yes no) -> (ULT cmp yes no) -(NE (TESTB (SETBE cmp)) yes no) -> (ULE cmp yes no) -(NE (TESTB (SETA cmp)) yes no) -> (UGT cmp yes no) -(NE (TESTB (SETAE cmp)) yes no) -> (UGE cmp yes no) +(NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no) +(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no) +(NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no) +(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no) +(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no) +(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no) +(NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no) +(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no) +(NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no) +(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no) // Special case for floating point - LF/LEF not generated -(NE (TESTB (SETGF cmp)) yes no) -> (UGT cmp yes no) -(NE (TESTB (SETGEF cmp)) yes no) -> (UGE cmp yes no) -(NE (TESTB (SETEQF cmp)) yes no) -> (EQF cmp yes no) -(NE (TESTB (SETNEF cmp)) yes no) -> (NEF cmp yes no) +(NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no) +(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no) +(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no) +(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no) // Disabled because it interferes with the pattern match above and makes worse code. // (SETNEF x) -> (ORQ (SETNE <config.Frontend().TypeInt8()> x) (SETNAN <config.Frontend().TypeInt8()> x)) @@ -439,44 +443,22 @@ (ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x) (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) (ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x) -(ADDW x (MOVWconst [c])) -> (ADDWconst [c] x) -(ADDW (MOVWconst [c]) x) -> (ADDWconst [c] x) -(ADDB x (MOVBconst [c])) -> (ADDBconst [c] x) -(ADDB (MOVBconst [c]) x) -> (ADDBconst [c] x) (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c]) (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c])) (SUBL x (MOVLconst [c])) -> (SUBLconst x [c]) (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c])) -(SUBW x (MOVWconst [c])) -> (SUBWconst x [c]) -(SUBW (MOVWconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [c])) -(SUBB x (MOVBconst [c])) -> (SUBBconst x [c]) -(SUBB (MOVBconst [c]) x) -> (NEGB (SUBBconst <v.Type> x [c])) (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x) (MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x) (MULL x (MOVLconst [c])) -> (MULLconst [c] x) (MULL (MOVLconst [c]) x) -> (MULLconst [c] x) -(MULW x (MOVWconst [c])) -> (MULWconst [c] x) -(MULW (MOVWconst [c]) x) -> (MULWconst [c] x) -(MULB x (MOVBconst [c])) -> (MULBconst [c] x) -(MULB (MOVBconst [c]) x) -> (MULBconst [c] x) (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x) (ANDQ (MOVQconst [c]) x) && is32Bit(c) -> (ANDQconst [c] x) (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) (ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x) -(ANDW x (MOVLconst [c])) -> (ANDWconst [c] x) -(ANDW (MOVLconst [c]) x) -> (ANDWconst [c] x) -(ANDW x (MOVWconst [c])) -> (ANDWconst [c] x) -(ANDW (MOVWconst [c]) x) -> (ANDWconst [c] x) -(ANDB x (MOVLconst [c])) -> (ANDBconst [c] x) -(ANDB (MOVLconst [c]) x) -> (ANDBconst [c] x) -(ANDB x (MOVBconst [c])) -> (ANDBconst [c] x) -(ANDB (MOVBconst [c]) x) -> (ANDBconst [c] x) -(ANDBconst [c] (ANDBconst [d] x)) -> (ANDBconst [c & d] x) -(ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x) (ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x) (ANDQconst [c] (ANDQconst [d] x)) -> (ANDQconst [c & d] x) @@ -484,108 +466,69 @@ (ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x) (ORL x (MOVLconst [c])) -> (ORLconst [c] x) (ORL (MOVLconst [c]) x) -> (ORLconst [c] x) -(ORW x (MOVWconst [c])) -> (ORWconst [c] x) -(ORW (MOVWconst [c]) x) -> (ORWconst [c] x) -(ORB x (MOVBconst [c])) -> (ORBconst [c] x) -(ORB (MOVBconst [c]) x) -> (ORBconst [c] x) (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x) (XORQ (MOVQconst [c]) x) && is32Bit(c) -> (XORQconst [c] x) (XORL x (MOVLconst [c])) -> (XORLconst [c] x) (XORL (MOVLconst [c]) x) -> (XORLconst [c] x) -(XORW x (MOVWconst [c])) -> (XORWconst [c] x) -(XORW (MOVWconst [c]) x) -> (XORWconst [c] x) -(XORB x (MOVBconst [c])) -> (XORBconst [c] x) -(XORB (MOVBconst [c]) x) -> (XORBconst [c] x) (SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x) (SHLQ x (MOVLconst [c])) -> (SHLQconst [c&63] x) -(SHLQ x (MOVWconst [c])) -> (SHLQconst [c&63] x) -(SHLQ x (MOVBconst [c])) -> (SHLQconst [c&63] x) (SHLL x (MOVQconst [c])) -> (SHLLconst [c&31] x) (SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x) -(SHLL x (MOVWconst [c])) -> (SHLLconst [c&31] x) -(SHLL x (MOVBconst [c])) -> (SHLLconst [c&31] x) - -(SHLW x (MOVQconst [c])) -> (SHLWconst [c&31] x) -(SHLW x (MOVLconst [c])) -> (SHLWconst [c&31] x) -(SHLW x (MOVWconst [c])) -> (SHLWconst [c&31] x) -(SHLW x (MOVBconst [c])) -> (SHLWconst [c&31] x) - -(SHLB x (MOVQconst [c])) -> (SHLBconst [c&31] x) -(SHLB x (MOVLconst [c])) -> (SHLBconst [c&31] x) -(SHLB x (MOVWconst [c])) -> (SHLBconst [c&31] x) -(SHLB x (MOVBconst [c])) -> (SHLBconst [c&31] x) (SHRQ x (MOVQconst [c])) -> (SHRQconst [c&63] x) (SHRQ x (MOVLconst [c])) -> (SHRQconst [c&63] x) -(SHRQ x (MOVWconst [c])) -> (SHRQconst [c&63] x) -(SHRQ x (MOVBconst [c])) -> (SHRQconst [c&63] x) (SHRL x (MOVQconst [c])) -> (SHRLconst [c&31] x) (SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x) -(SHRL x (MOVWconst [c])) -> (SHRLconst [c&31] x) -(SHRL x (MOVBconst [c])) -> (SHRLconst [c&31] x) (SHRW x (MOVQconst [c])) -> (SHRWconst [c&31] x) (SHRW x (MOVLconst [c])) -> (SHRWconst [c&31] x) -(SHRW x (MOVWconst [c])) -> (SHRWconst [c&31] x) -(SHRW x (MOVBconst [c])) -> (SHRWconst [c&31] x) (SHRB x (MOVQconst [c])) -> (SHRBconst [c&31] x) (SHRB x (MOVLconst [c])) -> (SHRBconst [c&31] x) -(SHRB x (MOVWconst [c])) -> (SHRBconst [c&31] x) -(SHRB x (MOVBconst [c])) -> (SHRBconst [c&31] x) (SARQ x (MOVQconst [c])) -> (SARQconst [c&63] x) (SARQ x (MOVLconst [c])) -> (SARQconst [c&63] x) -(SARQ x (MOVWconst [c])) -> (SARQconst [c&63] x) -(SARQ x (MOVBconst [c])) -> (SARQconst [c&63] x) (SARL x (MOVQconst [c])) -> (SARLconst [c&31] x) (SARL x (MOVLconst [c])) -> (SARLconst [c&31] x) -(SARL x (MOVWconst [c])) -> (SARLconst [c&31] x) -(SARL x (MOVBconst [c])) -> (SARLconst [c&31] x) (SARW x (MOVQconst [c])) -> (SARWconst [c&31] x) (SARW x (MOVLconst [c])) -> (SARWconst [c&31] x) -(SARW x (MOVWconst [c])) -> (SARWconst [c&31] x) -(SARW x (MOVBconst [c])) -> (SARWconst [c&31] x) (SARB x (MOVQconst [c])) -> (SARBconst [c&31] x) (SARB x (MOVLconst [c])) -> (SARBconst [c&31] x) -(SARB x (MOVWconst [c])) -> (SARBconst [c&31] x) -(SARB x (MOVBconst [c])) -> (SARBconst [c&31] x) -(SARB x (ANDBconst [31] y)) -> (SARB x y) -(SARW x (ANDWconst [31] y)) -> (SARW x y) (SARL x (ANDLconst [31] y)) -> (SARL x y) (SARQ x (ANDQconst [63] y)) -> (SARQ x y) -(SHLB x (ANDBconst [31] y)) -> (SHLB x y) -(SHLW x (ANDWconst [31] y)) -> (SHLW x y) (SHLL x (ANDLconst [31] y)) -> (SHLL x y) (SHLQ x (ANDQconst [63] y)) -> (SHLQ x y) -(SHRB x (ANDBconst [31] y)) -> (SHRB x y) -(SHRW x (ANDWconst [31] y)) -> (SHRW x y) (SHRL x (ANDLconst [31] y)) -> (SHRL x y) (SHRQ x (ANDQconst [63] y)) -> (SHRQ x y) // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) // because the x86 instructions are defined to use all 5 bits of the shift even // for the small shifts. I don't think we'll ever generate a weird shift (e.g. -// (SHLW x (MOVWconst [24])), but just in case. +// (SHRW x (MOVLconst [24])), but just in case. (CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c]) (CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c])) (CMPL x (MOVLconst [c])) -> (CMPLconst x [c]) (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c])) -(CMPW x (MOVWconst [c])) -> (CMPWconst x [c]) -(CMPW (MOVWconst [c]) x) -> (InvertFlags (CMPWconst x [c])) -(CMPB x (MOVBconst [c])) -> (CMPBconst x [c]) -(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c])) +(CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))]) +(CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))])) +(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) +(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) + +// Using MOVBQZX instead of ANDQ is cheaper. +(ANDQconst [0xFF] x) -> (MOVBQZX x) +(ANDQconst [0xFFFF] x) -> (MOVWQZX x) +(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) // strength reduction // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: @@ -684,18 +627,18 @@ // Make sure we don't combine these ops if the load has another use. // This prevents a single load from being split into multiple loads // which then might return different values. See test/atomicload.go. -(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) -(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) -(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) -(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) -(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) -(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) +(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) +(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) +(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) +(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) +(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) +(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) -(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) -(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) -(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) -(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) -(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) +(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) +(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) +(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) +(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) +(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) // replace load from same location as preceding store with copy (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x @@ -704,12 +647,12 @@ (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x // Fold extensions and ANDs together. -(MOVBQZX (ANDBconst [c] x)) -> (ANDQconst [c & 0xff] x) -(MOVWQZX (ANDWconst [c] x)) -> (ANDQconst [c & 0xffff] x) -(MOVLQZX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDQconst [c & 0x7fffffff] x) -(MOVBQSX (ANDBconst [c] x)) && c & 0x80 == 0 -> (ANDQconst [c & 0x7f] x) -(MOVWQSX (ANDWconst [c] x)) && c & 0x8000 == 0 -> (ANDQconst [c & 0x7fff] x) -(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDQconst [c & 0x7fffffff] x) +(MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x) +(MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x) +(MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x) +(MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x) +(MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x) +(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x) // Don't extend before storing (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) @@ -745,9 +688,9 @@ (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) -(MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) && validOff(off) -> +(MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) -(MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) && validOff(off) -> +(MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) // Fold address offsets into constant stores. @@ -1081,22 +1024,27 @@ (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) -(CMPWconst (MOVWconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) -(CMPWconst (MOVWconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) -(CMPWconst (MOVWconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) -(CMPWconst (MOVWconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) -(CMPWconst (MOVWconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) -(CMPBconst (MOVBconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) -(CMPBconst (MOVBconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) -(CMPBconst (MOVBconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) -(CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) -(CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) // Other known comparisons. +(CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) +(CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) +(CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) +(CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT) +(CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT) (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) -(CMPWconst (ANDWconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) -(CMPBconst (ANDBconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) +(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) +(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) // TODO: DIVxU also. // Absorb flag constants into SBB ops. @@ -1173,175 +1121,140 @@ (UGE (FlagGT_UGT) yes no) -> (First nil yes no) // Absorb flag constants into SETxx ops. -(SETEQ (FlagEQ)) -> (MOVBconst [1]) -(SETEQ (FlagLT_ULT)) -> (MOVBconst [0]) -(SETEQ (FlagLT_UGT)) -> (MOVBconst [0]) -(SETEQ (FlagGT_ULT)) -> (MOVBconst [0]) -(SETEQ (FlagGT_UGT)) -> (MOVBconst [0]) +(SETEQ (FlagEQ)) -> (MOVLconst [1]) +(SETEQ (FlagLT_ULT)) -> (MOVLconst [0]) +(SETEQ (FlagLT_UGT)) -> (MOVLconst [0]) +(SETEQ (FlagGT_ULT)) -> (MOVLconst [0]) +(SETEQ (FlagGT_UGT)) -> (MOVLconst [0]) -(SETNE (FlagEQ)) -> (MOVBconst [0]) -(SETNE (FlagLT_ULT)) -> (MOVBconst [1]) -(SETNE (FlagLT_UGT)) -> (MOVBconst [1]) -(SETNE (FlagGT_ULT)) -> (MOVBconst [1]) -(SETNE (FlagGT_UGT)) -> (MOVBconst [1]) +(SETNE (FlagEQ)) -> (MOVLconst [0]) +(SETNE (FlagLT_ULT)) -> (MOVLconst [1]) +(SETNE (FlagLT_UGT)) -> (MOVLconst [1]) +(SETNE (FlagGT_ULT)) -> (MOVLconst [1]) +(SETNE (FlagGT_UGT)) -> (MOVLconst [1]) -(SETL (FlagEQ)) -> (MOVBconst [0]) -(SETL (FlagLT_ULT)) -> (MOVBconst [1]) -(SETL (FlagLT_UGT)) -> (MOVBconst [1]) -(SETL (FlagGT_ULT)) -> (MOVBconst [0]) -(SETL (FlagGT_UGT)) -> (MOVBconst [0]) +(SETL (FlagEQ)) -> (MOVLconst [0]) +(SETL (FlagLT_ULT)) -> (MOVLconst [1]) +(SETL (FlagLT_UGT)) -> (MOVLconst [1]) +(SETL (FlagGT_ULT)) -> (MOVLconst [0]) +(SETL (FlagGT_UGT)) -> (MOVLconst [0]) -(SETLE (FlagEQ)) -> (MOVBconst [1]) -(SETLE (FlagLT_ULT)) -> (MOVBconst [1]) -(SETLE (FlagLT_UGT)) -> (MOVBconst [1]) -(SETLE (FlagGT_ULT)) -> (MOVBconst [0]) -(SETLE (FlagGT_UGT)) -> (MOVBconst [0]) +(SETLE (FlagEQ)) -> (MOVLconst [1]) +(SETLE (FlagLT_ULT)) -> (MOVLconst [1]) +(SETLE (FlagLT_UGT)) -> (MOVLconst [1]) +(SETLE (FlagGT_ULT)) -> (MOVLconst [0]) +(SETLE (FlagGT_UGT)) -> (MOVLconst [0]) -(SETG (FlagEQ)) -> (MOVBconst [0]) -(SETG (FlagLT_ULT)) -> (MOVBconst [0]) -(SETG (FlagLT_UGT)) -> (MOVBconst [0]) -(SETG (FlagGT_ULT)) -> (MOVBconst [1]) -(SETG (FlagGT_UGT)) -> (MOVBconst [1]) +(SETG (FlagEQ)) -> (MOVLconst [0]) +(SETG (FlagLT_ULT)) -> (MOVLconst [0]) +(SETG (FlagLT_UGT)) -> (MOVLconst [0]) +(SETG (FlagGT_ULT)) -> (MOVLconst [1]) +(SETG (FlagGT_UGT)) -> (MOVLconst [1]) -(SETGE (FlagEQ)) -> (MOVBconst [1]) -(SETGE (FlagLT_ULT)) -> (MOVBconst [0]) -(SETGE (FlagLT_UGT)) -> (MOVBconst [0]) -(SETGE (FlagGT_ULT)) -> (MOVBconst [1]) -(SETGE (FlagGT_UGT)) -> (MOVBconst [1]) +(SETGE (FlagEQ)) -> (MOVLconst [1]) +(SETGE (FlagLT_ULT)) -> (MOVLconst [0]) +(SETGE (FlagLT_UGT)) -> (MOVLconst [0]) +(SETGE (FlagGT_ULT)) -> (MOVLconst [1]) +(SETGE (FlagGT_UGT)) -> (MOVLconst [1]) -(SETB (FlagEQ)) -> (MOVBconst [0]) -(SETB (FlagLT_ULT)) -> (MOVBconst [1]) -(SETB (FlagLT_UGT)) -> (MOVBconst [0]) -(SETB (FlagGT_ULT)) -> (MOVBconst [1]) -(SETB (FlagGT_UGT)) -> (MOVBconst [0]) +(SETB (FlagEQ)) -> (MOVLconst [0]) +(SETB (FlagLT_ULT)) -> (MOVLconst [1]) +(SETB (FlagLT_UGT)) -> (MOVLconst [0]) +(SETB (FlagGT_ULT)) -> (MOVLconst [1]) +(SETB (FlagGT_UGT)) -> (MOVLconst [0]) -(SETBE (FlagEQ)) -> (MOVBconst [1]) -(SETBE (FlagLT_ULT)) -> (MOVBconst [1]) -(SETBE (FlagLT_UGT)) -> (MOVBconst [0]) -(SETBE (FlagGT_ULT)) -> (MOVBconst [1]) -(SETBE (FlagGT_UGT)) -> (MOVBconst [0]) +(SETBE (FlagEQ)) -> (MOVLconst [1]) +(SETBE (FlagLT_ULT)) -> (MOVLconst [1]) +(SETBE (FlagLT_UGT)) -> (MOVLconst [0]) +(SETBE (FlagGT_ULT)) -> (MOVLconst [1]) +(SETBE (FlagGT_UGT)) -> (MOVLconst [0]) -(SETA (FlagEQ)) -> (MOVBconst [0]) -(SETA (FlagLT_ULT)) -> (MOVBconst [0]) -(SETA (FlagLT_UGT)) -> (MOVBconst [1]) -(SETA (FlagGT_ULT)) -> (MOVBconst [0]) -(SETA (FlagGT_UGT)) -> (MOVBconst [1]) +(SETA (FlagEQ)) -> (MOVLconst [0]) +(SETA (FlagLT_ULT)) -> (MOVLconst [0]) +(SETA (FlagLT_UGT)) -> (MOVLconst [1]) +(SETA (FlagGT_ULT)) -> (MOVLconst [0]) +(SETA (FlagGT_UGT)) -> (MOVLconst [1]) -(SETAE (FlagEQ)) -> (MOVBconst [1]) -(SETAE (FlagLT_ULT)) -> (MOVBconst [0]) -(SETAE (FlagLT_UGT)) -> (MOVBconst [1]) -(SETAE (FlagGT_ULT)) -> (MOVBconst [0]) -(SETAE (FlagGT_UGT)) -> (MOVBconst [1]) +(SETAE (FlagEQ)) -> (MOVLconst [1]) +(SETAE (FlagLT_ULT)) -> (MOVLconst [0]) +(SETAE (FlagLT_UGT)) -> (MOVLconst [1]) +(SETAE (FlagGT_ULT)) -> (MOVLconst [0]) +(SETAE (FlagGT_UGT)) -> (MOVLconst [1]) // Remove redundant *const ops (ADDQconst [0] x) -> x (ADDLconst [c] x) && int32(c)==0 -> x -(ADDWconst [c] x) && int16(c)==0 -> x -(ADDBconst [c] x) && int8(c)==0 -> x (SUBQconst [0] x) -> x (SUBLconst [c] x) && int32(c) == 0 -> x -(SUBWconst [c] x) && int16(c) == 0 -> x -(SUBBconst [c] x) && int8(c) == 0 -> x (ANDQconst [0] _) -> (MOVQconst [0]) (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) -(ANDWconst [c] _) && int16(c)==0 -> (MOVWconst [0]) -(ANDBconst [c] _) && int8(c)==0 -> (MOVBconst [0]) (ANDQconst [-1] x) -> x (ANDLconst [c] x) && int32(c)==-1 -> x -(ANDWconst [c] x) && int16(c)==-1 -> x -(ANDBconst [c] x) && int8(c)==-1 -> x (ORQconst [0] x) -> x (ORLconst [c] x) && int32(c)==0 -> x -(ORWconst [c] x) && int16(c)==0 -> x -(ORBconst [c] x) && int8(c)==0 -> x (ORQconst [-1] _) -> (MOVQconst [-1]) (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) -(ORWconst [c] _) && int16(c)==-1 -> (MOVWconst [-1]) -(ORBconst [c] _) && int8(c)==-1 -> (MOVBconst [-1]) (XORQconst [0] x) -> x (XORLconst [c] x) && int32(c)==0 -> x -(XORWconst [c] x) && int16(c)==0 -> x -(XORBconst [c] x) && int8(c)==0 -> x +// TODO: since we got rid of the W/B versions, we might miss +// things like (ANDLconst [0x100] x) which were formerly +// (ANDBconst [0] x). Probably doesn't happen very often. +// If we cared, we might do: +// (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) + +// Convert constant subtracts to constant adds +(SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x) +(SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x) // generic constant folding // TODO: more of this (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))]) -(ADDWconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int16(c+d))]) -(ADDBconst [c] (MOVBconst [d])) -> (MOVBconst [int64(int8(c+d))]) (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x) (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x) -(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int16(c+d))] x) -(ADDBconst [c] (ADDBconst [d] x)) -> (ADDBconst [int64(int8(c+d))] x) (SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c]) (SUBLconst (MOVLconst [d]) [c]) -> (MOVLconst [int64(int32(d-c))]) -(SUBWconst (MOVWconst [d]) [c]) -> (MOVWconst [int64(int16(d-c))]) -(SUBBconst (MOVBconst [d]) [c]) -> (MOVBconst [int64(int8(d-c))]) (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x) (SUBLconst (SUBLconst x [d]) [c]) -> (ADDLconst [int64(int32(-c-d))] x) -(SUBWconst (SUBWconst x [d]) [c]) -> (ADDWconst [int64(int16(-c-d))] x) -(SUBBconst (SUBBconst x [d]) [c]) -> (ADDBconst [int64(int8(-c-d))] x) (SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) (SARLconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) (SARWconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) (SARBconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) (NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))]) -(NEGW (MOVWconst [c])) -> (MOVWconst [int64(int16(-c))]) -(NEGB (MOVBconst [c])) -> (MOVBconst [int64(int8(-c))]) (MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))]) -(MULWconst [c] (MOVWconst [d])) -> (MOVWconst [int64(int16(c*d))]) -(MULBconst [c] (MOVBconst [d])) -> (MOVBconst [int64(int8(c*d))]) (ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) -(ANDWconst [c] (MOVWconst [d])) -> (MOVWconst [c&d]) -(ANDBconst [c] (MOVBconst [d])) -> (MOVBconst [c&d]) (ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) -(ORWconst [c] (MOVWconst [d])) -> (MOVWconst [c|d]) -(ORBconst [c] (MOVBconst [d])) -> (MOVBconst [c|d]) (XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) -(XORWconst [c] (MOVWconst [d])) -> (MOVWconst [c^d]) -(XORBconst [c] (MOVBconst [d])) -> (MOVBconst [c^d]) (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) -(NOTW (MOVWconst [c])) -> (MOVWconst [^c]) -(NOTB (MOVBconst [c])) -> (MOVBconst [^c]) // generic simplifications // TODO: more of this (ADDQ x (NEGQ y)) -> (SUBQ x y) (ADDL x (NEGL y)) -> (SUBL x y) -(ADDW x (NEGW y)) -> (SUBW x y) -(ADDB x (NEGB y)) -> (SUBB x y) (SUBQ x x) -> (MOVQconst [0]) (SUBL x x) -> (MOVLconst [0]) -(SUBW x x) -> (MOVWconst [0]) -(SUBB x x) -> (MOVBconst [0]) (ANDQ x x) -> x (ANDL x x) -> x -(ANDW x x) -> x -(ANDB x x) -> x (ORQ x x) -> x (ORL x x) -> x -(ORW x x) -> x -(ORB x x) -> x (XORQ x x) -> (MOVQconst [0]) (XORL x x) -> (MOVLconst [0]) -(XORW x x) -> (MOVWconst [0]) -(XORB x x) -> (MOVBconst [0]) // checking AND against 0. (CMPQconst (ANDQ x y) [0]) -> (TESTQ x y) (CMPLconst (ANDL x y) [0]) -> (TESTL x y) -(CMPWconst (ANDW x y) [0]) -> (TESTW x y) -(CMPBconst (ANDB x y) [0]) -> (TESTB x y) +(CMPWconst (ANDL x y) [0]) -> (TESTW x y) +(CMPBconst (ANDL x y) [0]) -> (TESTB x y) (CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x) (CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x) -(CMPWconst (ANDWconst [c] x) [0]) -> (TESTWconst [c] x) -(CMPBconst (ANDBconst [c] x) [0]) -> (TESTBconst [c] x) +(CMPWconst (ANDLconst [c] x) [0]) -> (TESTWconst [int64(int16(c))] x) +(CMPBconst (ANDLconst [c] x) [0]) -> (TESTBconst [int64(int8(c))] x) // TEST %reg,%reg is shorter than CMP (CMPQconst x [0]) -> (TESTQ x x) @@ -1352,40 +1265,296 @@ // Combining byte loads into larger (unaligned) loads. // There are many ways these combinations could occur. This is // designed to match the way encoding/binary.LittleEndian does it. -(ORW x0:(MOVBload [i] {s} p mem) - (SHLWconst [8] x1:(MOVBload [i+1] {s} p mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) +(ORL x0:(MOVBload [i] {s} p mem) + s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && mergePoint(b,x0,x1) != nil + && clobber(x0) + && clobber(x1) + && clobber(s0) + -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) + +(ORL o0:(ORL o1:(ORL + x0:(MOVBload [i] {s} p mem) + s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) + s1:(SHLLconst [16] x2:(MOVBload [i+2] {s} p mem))) + s2:(SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && s2.Uses == 1 + && o0.Uses == 1 + && o1.Uses == 1 + && mergePoint(b,x0,x1,x2,x3) != nil + && clobber(x0) + && clobber(x1) + && clobber(x2) + && clobber(x3) + && clobber(s0) + && clobber(s1) + && clobber(s2) + && clobber(o0) + && clobber(o1) + -> @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem) + +(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ + x0:(MOVBload [i] {s} p mem) + s0:(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) + s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) + s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) + s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) + s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) + s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) + s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && x4.Uses == 1 + && x5.Uses == 1 + && x6.Uses == 1 + && x7.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && s2.Uses == 1 + && s3.Uses == 1 + && s4.Uses == 1 + && s5.Uses == 1 + && s6.Uses == 1 + && o0.Uses == 1 + && o1.Uses == 1 + && o2.Uses == 1 + && o3.Uses == 1 + && o4.Uses == 1 + && o5.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + && clobber(x0) + && clobber(x1) + && clobber(x2) + && clobber(x3) + && clobber(x4) + && clobber(x5) + && clobber(x6) + && clobber(x7) + && clobber(s0) + && clobber(s1) + && clobber(s2) + && clobber(s3) + && clobber(s4) + && clobber(s5) + && clobber(s6) + && clobber(o0) + && clobber(o1) + && clobber(o2) + && clobber(o3) + && clobber(o4) + && clobber(o5) + -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem) + +(ORL x0:(MOVBloadidx1 [i] {s} p idx mem) + s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && mergePoint(b,x0,x1) != nil + && clobber(x0) + && clobber(x1) + && clobber(s0) + -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem) + +(ORL o0:(ORL o1:(ORL + x0:(MOVBloadidx1 [i] {s} p idx mem) + s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) + s1:(SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) + s2:(SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && s2.Uses == 1 + && o0.Uses == 1 + && o1.Uses == 1 + && mergePoint(b,x0,x1,x2,x3) != nil + && clobber(x0) + && clobber(x1) + && clobber(x2) + && clobber(x3) + && clobber(s0) + && clobber(s1) + && clobber(s2) + && clobber(o0) + && clobber(o1) + -> @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem) + +(ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ + x0:(MOVBloadidx1 [i] {s} p idx mem) + s0:(SHLQconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) + s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) + s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) + s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem))) + s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem))) + s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem))) + s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && x4.Uses == 1 + && x5.Uses == 1 + && x6.Uses == 1 + && x7.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && s2.Uses == 1 + && s3.Uses == 1 + && s4.Uses == 1 + && s5.Uses == 1 + && s6.Uses == 1 + && o0.Uses == 1 + && o1.Uses == 1 + && o2.Uses == 1 + && o3.Uses == 1 + && o4.Uses == 1 + && o5.Uses == 1 + && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + && clobber(x0) + && clobber(x1) + && clobber(x2) + && clobber(x3) + && clobber(x4) + && clobber(x5) + && clobber(x6) + && clobber(x7) + && clobber(s0) + && clobber(s1) + && clobber(s2) + && clobber(s3) + && clobber(s4) + && clobber(s5) + && clobber(s6) + && clobber(o0) + && clobber(o1) + && clobber(o2) + && clobber(o3) + && clobber(o4) + && clobber(o5) + -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem) + +// Combine constant stores into larger (unaligned) stores. +(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && clobber(x) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && clobber(x) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(ORL (ORL (ORL - x0:(MOVBload [i] {s} p mem) - (SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) - (SHLLconst [16] x2:(MOVBload [i+2] {s} p mem))) - (SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem) +(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && clobber(x) + -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) +(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) +(MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && clobber(x) + -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ - x0:(MOVBload [i] {s} p mem) - (SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) - (SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) - (SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) - (SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) - (SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) - (SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) - (SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem) +(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem) +(MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && clobber(x) + -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(ORW x0:(MOVBloadidx1 [i] {s} p idx mem) - (SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem) +// Combine stores into larger (unaligned) stores. +(MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w0 mem) +(MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w mem) +(MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w0 mem) +(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstore [i-4] {s} p w mem) +(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstore [i-4] {s} p w0 mem) -(ORL (ORL (ORL - x0:(MOVBloadidx1 [i] {s} p idx mem) - (SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) - (SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) - (SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem) +(MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstoreidx1 [i-1] {s} p idx w mem) +(MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) +(MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p idx w mem) +(MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) +(MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstoreidx1 [i-4] {s} p idx w mem) +(MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) -(ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ - x0:(MOVBloadidx1 [i] {s} p idx mem) - (SHLQconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) - (SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) - (SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) - (SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem))) - (SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem))) - (SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem))) - (SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem) +(MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) +(MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem) +(MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem) +(MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index b1698c0cf1..b684b9ccdf 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -111,12 +111,14 @@ func init() { // Common regInfo var ( gp01 = regInfo{inputs: []regMask{}, outputs: gponly} - gp11 = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags} + gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly, clobbers: flags} + gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags} gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} - gp21 = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: gponly, clobbers: flags} + gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly, clobbers: flags} + gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags} gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} - gp21shift = regInfo{inputs: []regMask{gpsp, cx}, outputs: []regMask{gp &^ cx}, clobbers: flags} + gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags} gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx | flags} gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, @@ -128,8 +130,8 @@ func init() { gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} flagsgp = regInfo{inputs: flagsonly, outputs: gponly} - // for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp. - gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} + // for CMOVconst -- uses AX to hold constant temporary. + gp1flagsgp = regInfo{inputs: []regMask{gp &^ ax, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} readflags = regInfo{inputs: flagsonly, outputs: gponly} flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} @@ -186,32 +188,20 @@ func init() { {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff"}, // fp64 indexed by 8i store // binary ops - {name: "ADDQ", argLength: 2, reg: gp21, asm: "ADDQ", commutative: true, resultInArg0: true}, // arg0 + arg1 - {name: "ADDL", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 - {name: "ADDW", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 - {name: "ADDB", argLength: 2, reg: gp21, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1 - {name: "ADDQconst", argLength: 1, reg: gp11, asm: "ADDQ", aux: "Int64", resultInArg0: true, typ: "UInt64"}, // arg0 + auxint - {name: "ADDLconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint - {name: "ADDWconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int16", resultInArg0: true}, // arg0 + auxint - {name: "ADDBconst", argLength: 1, reg: gp11, asm: "ADDL", aux: "Int8", resultInArg0: true}, // arg0 + auxint + {name: "ADDQ", argLength: 2, reg: gp21sp, asm: "ADDQ", commutative: true}, // arg0 + arg1 + {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true}, // arg0 + arg1 + {name: "ADDQconst", argLength: 1, reg: gp11sp, asm: "ADDQ", aux: "Int64", typ: "UInt64"}, // arg0 + auxint + {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32"}, // arg0 + auxint {name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true}, // arg0 - arg1 {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1 - {name: "SUBW", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1 - {name: "SUBB", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true}, // arg0 - arg1 {name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int64", resultInArg0: true}, // arg0 - auxint {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true}, // arg0 - auxint - {name: "SUBWconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int16", resultInArg0: true}, // arg0 - auxint - {name: "SUBBconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int8", resultInArg0: true}, // arg0 - auxint {name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true}, // arg0 * arg1 {name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true}, // arg0 * arg1 - {name: "MULW", argLength: 2, reg: gp21, asm: "IMULW", commutative: true, resultInArg0: true}, // arg0 * arg1 - {name: "MULB", argLength: 2, reg: gp21, asm: "IMULW", commutative: true, resultInArg0: true}, // arg0 * arg1 {name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int64", resultInArg0: true}, // arg0 * auxint {name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true}, // arg0 * auxint - {name: "MULWconst", argLength: 1, reg: gp11, asm: "IMULW", aux: "Int16", resultInArg0: true}, // arg0 * auxint - {name: "MULBconst", argLength: 1, reg: gp11, asm: "IMULW", aux: "Int8", resultInArg0: true}, // arg0 * auxint {name: "HMULQ", argLength: 2, reg: gp11hmul, asm: "IMULQ"}, // (arg0 * arg1) >> width {name: "HMULL", argLength: 2, reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width @@ -240,30 +230,18 @@ func init() { {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true}, // arg0 & arg1 {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1 - {name: "ANDW", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1 - {name: "ANDB", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1 {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int64", resultInArg0: true}, // arg0 & auxint {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true}, // arg0 & auxint - {name: "ANDWconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int16", resultInArg0: true}, // arg0 & auxint - {name: "ANDBconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int8", resultInArg0: true}, // arg0 & auxint {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true}, // arg0 | arg1 {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true}, // arg0 | arg1 - {name: "ORW", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true}, // arg0 | arg1 - {name: "ORB", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true}, // arg0 | arg1 {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int64", resultInArg0: true}, // arg0 | auxint {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true}, // arg0 | auxint - {name: "ORWconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int16", resultInArg0: true}, // arg0 | auxint - {name: "ORBconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int8", resultInArg0: true}, // arg0 | auxint {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true}, // arg0 ^ arg1 {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true}, // arg0 ^ arg1 - {name: "XORW", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true}, // arg0 ^ arg1 - {name: "XORB", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true}, // arg0 ^ arg1 {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int64", resultInArg0: true}, // arg0 ^ auxint {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true}, // arg0 ^ auxint - {name: "XORWconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int16", resultInArg0: true}, // arg0 ^ auxint - {name: "XORBconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int8", resultInArg0: true}, // arg0 ^ auxint {name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1 @@ -288,12 +266,8 @@ func init() { {name: "SHLQ", argLength: 2, reg: gp21shift, asm: "SHLQ", resultInArg0: true}, // arg0 << arg1, shift amount is mod 64 {name: "SHLL", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true}, // arg0 << arg1, shift amount is mod 32 - {name: "SHLW", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true}, // arg0 << arg1, shift amount is mod 32 - {name: "SHLB", argLength: 2, reg: gp21shift, asm: "SHLL", resultInArg0: true}, // arg0 << arg1, shift amount is mod 32 {name: "SHLQconst", argLength: 1, reg: gp11, asm: "SHLQ", aux: "Int64", resultInArg0: true}, // arg0 << auxint, shift amount 0-63 {name: "SHLLconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int32", resultInArg0: true}, // arg0 << auxint, shift amount 0-31 - {name: "SHLWconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int16", resultInArg0: true}, // arg0 << auxint, shift amount 0-31 - {name: "SHLBconst", argLength: 1, reg: gp11, asm: "SHLL", aux: "Int8", resultInArg0: true}, // arg0 << auxint, shift amount 0-31 // Note: x86 is weird, the 16 and 8 byte shifts still use all 5 bits of shift amount! {name: "SHRQ", argLength: 2, reg: gp21shift, asm: "SHRQ", resultInArg0: true}, // unsigned arg0 >> arg1, shift amount is mod 64 @@ -322,13 +296,9 @@ func init() { // unary ops {name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true}, // -arg0 {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true}, // -arg0 - {name: "NEGW", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true}, // -arg0 - {name: "NEGB", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true}, // -arg0 {name: "NOTQ", argLength: 1, reg: gp11, asm: "NOTQ", resultInArg0: true}, // ^arg0 {name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 - {name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 - {name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero @@ -383,8 +353,6 @@ func init() { {name: "MOVLQSX", argLength: 1, reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64 {name: "MOVLQZX", argLength: 1, reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64 - {name: "MOVBconst", reg: gp01, asm: "MOVB", typ: "UInt8", aux: "Int8", rematerializeable: true}, // 8 low bits of auxint - {name: "MOVWconst", reg: gp01, asm: "MOVW", typ: "UInt16", aux: "Int16", rematerializeable: true}, // 16 low bits of auxint {name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32", aux: "Int32", rematerializeable: true}, // 32 low bits of auxint {name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint @@ -471,7 +439,7 @@ func init() { clobbers: buildReg("DI FLAGS"), }, }, - {name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", rematerializeable: true}, + {name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true}, // arg0 = address of memory to zero // arg1 = # of 8-byte words to zero diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go index a4f7b17e87..23e8f63471 100644 --- a/src/cmd/compile/internal/ssa/gen/ARMOps.go +++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go @@ -25,13 +25,13 @@ func init() { {name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 - {name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW"}, // load from arg0 + auxInt + aux. arg1=mem. - {name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. + {name: "MOVWload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVW"}, // load from arg0 + auxInt + aux. arg1=mem. + {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"}, // call static function aux.(*gc.Sym). arg0=mem, auxint=argsize, returns mem // pseudo-ops - {name: "LessThan", argLength: 2, reg: flagsgp}, // bool, 1 flags encode x<y 0 otherwise. + {name: "LessThan", argLength: 1, reg: flagsgp}, // bool, 1 flags encode x<y 0 otherwise. } blocks := []blockData{ diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index b56e3f1b2d..b33037f100 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -81,13 +81,13 @@ (Rsh64Ux64 (Const64 [c]) (Const64 [d])) -> (Const64 [int64(uint64(c) >> uint64(d))]) (Lsh32x64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) << uint64(d))]) (Rsh32x64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(c) >> uint64(d))]) -(Rsh32Ux64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(uint32(c) >> uint64(d))]) +(Rsh32Ux64 (Const32 [c]) (Const64 [d])) -> (Const32 [int64(int32(uint32(c) >> uint64(d)))]) (Lsh16x64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) << uint64(d))]) (Rsh16x64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(c) >> uint64(d))]) -(Rsh16Ux64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(uint16(c) >> uint64(d))]) +(Rsh16Ux64 (Const16 [c]) (Const64 [d])) -> (Const16 [int64(int16(uint16(c) >> uint64(d)))]) (Lsh8x64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(c) << uint64(d))]) (Rsh8x64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(c) >> uint64(d))]) -(Rsh8Ux64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(uint8(c) >> uint64(d))]) +(Rsh8Ux64 (Const8 [c]) (Const64 [d])) -> (Const8 [int64(int8(uint8(c) >> uint64(d)))]) (Lsh64x64 (Const64 [0]) _) -> (Const64 [0]) (Rsh64x64 (Const64 [0]) _) -> (Const64 [0]) @@ -114,7 +114,7 @@ (Lsh16x16 (Rsh16Ux16 (Lsh16x16 x (Const16 [c1])) (Const16 [c2])) (Const16 [c3])) && uint16(c1) >= uint16(c2) && uint16(c3) >= uint16(c2) -> (Lsh16x16 x (Const16 <config.fe.TypeUInt16()> [int64(int16(c1-c2+c3))])) (Lsh8x8 (Rsh8Ux8 (Lsh8x8 x (Const8 [c1])) (Const8 [c2])) (Const8 [c3])) && uint8(c1) >= uint8(c2) && uint8(c3) >= uint8(c2) -> (Lsh8x8 x (Const8 <config.fe.TypeUInt8()> [int64(int8(c1-c2+c3))])) -// Fold IsInBounds when the range of the index cannot exceed the limt. +// Fold IsInBounds when the range of the index cannot exceed the limit. (IsInBounds (ZeroExt8to32 _) (Const32 [c])) && (1 << 8) <= c -> (ConstBool [1]) (IsInBounds (ZeroExt8to64 _) (Const64 [c])) && (1 << 8) <= c -> (ConstBool [1]) (IsInBounds (ZeroExt16to32 _) (Const32 [c])) && (1 << 16) <= c -> (ConstBool [1]) @@ -141,17 +141,17 @@ (Eq32 x x) -> (ConstBool [1]) (Eq16 x x) -> (ConstBool [1]) (Eq8 x x) -> (ConstBool [1]) -(Eq8 (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c == d)]) -(Eq8 (ConstBool [0]) x) -> (Not x) -(Eq8 (ConstBool [1]) x) -> x +(EqB (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c == d)]) +(EqB (ConstBool [0]) x) -> (Not x) +(EqB (ConstBool [1]) x) -> x (Neq64 x x) -> (ConstBool [0]) (Neq32 x x) -> (ConstBool [0]) (Neq16 x x) -> (ConstBool [0]) (Neq8 x x) -> (ConstBool [0]) -(Neq8 (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c != d)]) -(Neq8 (ConstBool [0]) x) -> x -(Neq8 (ConstBool [1]) x) -> (Not x) +(NeqB (ConstBool [c]) (ConstBool [d])) -> (ConstBool [b2i(c != d)]) +(NeqB (ConstBool [0]) x) -> x +(NeqB (ConstBool [1]) x) -> (Not x) (Eq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) -> (Eq64 (Const64 <t> [c-d]) x) (Eq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) -> (Eq32 (Const32 <t> [int64(int32(c-d))]) x) @@ -168,13 +168,11 @@ (Eq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Eq32 (Const32 <t> [c]) x) (Eq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Eq16 (Const16 <t> [c]) x) (Eq8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Eq8 (Const8 <t> [c]) x) -(Eq8 x (ConstBool <t> [c])) && x.Op != OpConstBool -> (Eq8 (ConstBool <t> [c]) x) (Neq64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Neq64 (Const64 <t> [c]) x) (Neq32 x (Const32 <t> [c])) && x.Op != OpConst32 -> (Neq32 (Const32 <t> [c]) x) (Neq16 x (Const16 <t> [c])) && x.Op != OpConst16 -> (Neq16 (Const16 <t> [c]) x) (Neq8 x (Const8 <t> [c])) && x.Op != OpConst8 -> (Neq8 (Const8 <t> [c]) x) -(Neq8 x (ConstBool <t> [c])) && x.Op != OpConstBool -> (Neq8 (ConstBool <t> [c]) x) // AddPtr is not canonicalized because nilcheck ptr checks the first argument to be non-nil. (Add64 x (Const64 <t> [c])) && x.Op != OpConst64 -> (Add64 (Const64 <t> [c]) x) @@ -414,6 +412,55 @@ (Neg32 (Sub32 x y)) -> (Sub32 y x) (Neg64 (Sub64 x y)) -> (Sub64 y x) +(And64 x (And64 x y)) -> (And64 x y) +(And32 x (And32 x y)) -> (And32 x y) +(And16 x (And16 x y)) -> (And16 x y) +(And8 x (And8 x y)) -> (And8 x y) +(And64 x (And64 y x)) -> (And64 x y) +(And32 x (And32 y x)) -> (And32 x y) +(And16 x (And16 y x)) -> (And16 x y) +(And8 x (And8 y x)) -> (And8 x y) +(And64 (And64 x y) x) -> (And64 x y) +(And32 (And32 x y) x) -> (And32 x y) +(And16 (And16 x y) x) -> (And16 x y) +(And8 (And8 x y) x) -> (And8 x y) +(And64 (And64 x y) y) -> (And64 x y) +(And32 (And32 x y) y) -> (And32 x y) +(And16 (And16 x y) y) -> (And16 x y) +(And8 (And8 x y) y) -> (And8 x y) +(Or64 x (Or64 x y)) -> (Or64 x y) +(Or32 x (Or32 x y)) -> (Or32 x y) +(Or16 x (Or16 x y)) -> (Or16 x y) +(Or8 x (Or8 x y)) -> (Or8 x y) +(Or64 x (Or64 y x)) -> (Or64 x y) +(Or32 x (Or32 y x)) -> (Or32 x y) +(Or16 x (Or16 y x)) -> (Or16 x y) +(Or8 x (Or8 y x)) -> (Or8 x y) +(Or64 (Or64 x y) x) -> (Or64 x y) +(Or32 (Or32 x y) x) -> (Or32 x y) +(Or16 (Or16 x y) x) -> (Or16 x y) +(Or8 (Or8 x y) x) -> (Or8 x y) +(Or64 (Or64 x y) y) -> (Or64 x y) +(Or32 (Or32 x y) y) -> (Or32 x y) +(Or16 (Or16 x y) y) -> (Or16 x y) +(Or8 (Or8 x y) y) -> (Or8 x y) +(Xor64 x (Xor64 x y)) -> y +(Xor32 x (Xor32 x y)) -> y +(Xor16 x (Xor16 x y)) -> y +(Xor8 x (Xor8 x y)) -> y +(Xor64 x (Xor64 y x)) -> y +(Xor32 x (Xor32 y x)) -> y +(Xor16 x (Xor16 y x)) -> y +(Xor8 x (Xor8 y x)) -> y +(Xor64 (Xor64 x y) x) -> y +(Xor32 (Xor32 x y) x) -> y +(Xor16 (Xor16 x y) x) -> y +(Xor8 (Xor8 x y) x) -> y +(Xor64 (Xor64 x y) y) -> x +(Xor32 (Xor32 x y) y) -> x +(Xor16 (Xor16 x y) y) -> x +(Xor8 (Xor8 x y) y) -> x + (Trunc64to8 (And64 (Const64 [y]) x)) && y&0xFF == 0xFF -> (Trunc64to8 x) (Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc64to16 x) (Trunc64to32 (And64 (Const64 [y]) x)) && y&0xFFFFFFFF == 0xFFFFFFFF -> (Trunc64to32 x) diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index e6a0e8355b..88ae8b189d 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -237,9 +237,14 @@ var genericOps = []opData{ {name: "Geq32F", argLength: 2}, {name: "Geq64F", argLength: 2}, - // 1-input ops - {name: "Not", argLength: 1}, // !arg0, boolean + // boolean ops + {name: "AndB", argLength: 2}, // arg0 && arg1 (not shortcircuited) + {name: "OrB", argLength: 2}, // arg0 || arg1 (not shortcircuited) + {name: "EqB", argLength: 2}, // arg0 == arg1 + {name: "NeqB", argLength: 2}, // arg0 != arg1 + {name: "Not", argLength: 1}, // !arg0, boolean + // 1-input ops {name: "Neg8", argLength: 1}, // -arg0 {name: "Neg16", argLength: 1}, {name: "Neg32", argLength: 1}, diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go index db3c43d3a3..2aec4a324b 100644 --- a/src/cmd/compile/internal/ssa/gen/main.go +++ b/src/cmd/compile/internal/ssa/gen/main.go @@ -39,7 +39,7 @@ type opData struct { rematerializeable bool argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments commutative bool // this operation is commutative (e.g. addition) - resultInArg0 bool // prefer v and v.Args[0] to be allocated to the same register + resultInArg0 bool // v and v.Args[0] must be allocated to the same register } type blockData struct { @@ -155,6 +155,12 @@ func genOp() { } if v.resultInArg0 { fmt.Fprintln(w, "resultInArg0: true,") + if v.reg.inputs[0] != v.reg.outputs[0] { + log.Fatalf("input[0] and output registers must be equal for %s", v.name) + } + if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] { + log.Fatalf("input[1] and output registers must be equal for %s", v.name) + } } if a.name == "generic" { fmt.Fprintln(w, "generic:true,") diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go index b40f480f3e..5f7d1cf984 100644 --- a/src/cmd/compile/internal/ssa/gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/gen/rulegen.go @@ -52,12 +52,12 @@ var ( ) type Rule struct { - rule string - lineno int + rule string + loc string // file name & line number } func (r Rule) String() string { - return fmt.Sprintf("rule %q at line %d", r.rule, r.lineno) + return fmt.Sprintf("rule %q at %s", r.rule, r.loc) } // parse returns the matching part of the rule, additional conditions, and the result. @@ -91,6 +91,7 @@ func genRules(arch arch) { scanner := bufio.NewScanner(text) rule := "" var lineno int + var ruleLineno int // line number of "->" for scanner.Scan() { lineno++ line := scanner.Text() @@ -107,6 +108,9 @@ func genRules(arch arch) { if !strings.Contains(rule, "->") { continue } + if ruleLineno == 0 { + ruleLineno = lineno + } if strings.HasSuffix(rule, "->") { continue } @@ -117,18 +121,20 @@ func genRules(arch arch) { if op[len(op)-1] == ')' { op = op[:len(op)-1] // rule has only opcode, e.g. (ConstNil) -> ... } + loc := fmt.Sprintf("%s.rules:%d", arch.name, ruleLineno) if isBlock(op, arch) { - blockrules[op] = append(blockrules[op], Rule{rule: rule, lineno: lineno}) + blockrules[op] = append(blockrules[op], Rule{rule: rule, loc: loc}) } else { - oprules[op] = append(oprules[op], Rule{rule: rule, lineno: lineno}) + oprules[op] = append(oprules[op], Rule{rule: rule, loc: loc}) } rule = "" + ruleLineno = 0 } if err := scanner.Err(); err != nil { log.Fatalf("scanner failed: %v\n", err) } if unbalanced(rule) { - log.Fatalf("unbalanced rule at line %d: %v\n", lineno, rule) + log.Fatalf("%s.rules:%d: unbalanced rule: %v\n", arch.name, lineno, rule) } // Order all the ops. @@ -174,15 +180,15 @@ func genRules(arch arch) { fmt.Fprintf(w, "// result: %s\n", result) fmt.Fprintf(w, "for {\n") - genMatch(w, arch, match) + genMatch(w, arch, match, rule.loc) if cond != "" { fmt.Fprintf(w, "if !(%s) {\nbreak\n}\n", cond) } - genResult(w, arch, result) + genResult(w, arch, result, rule.loc) if *genLog { - fmt.Fprintf(w, "fmt.Println(\"rewrite %s.rules:%d\")\n", arch.name, rule.lineno) + fmt.Fprintf(w, "fmt.Println(\"rewrite %s\")\n", rule.loc) } fmt.Fprintf(w, "return true\n") @@ -217,7 +223,7 @@ func genRules(arch arch) { if s[1] != "nil" { fmt.Fprintf(w, "v := b.Control\n") if strings.Contains(s[1], "(") { - genMatch0(w, arch, s[1], "v", map[string]struct{}{}, false) + genMatch0(w, arch, s[1], "v", map[string]struct{}{}, false, rule.loc) } else { fmt.Fprintf(w, "%s := b.Control\n", s[1]) } @@ -266,7 +272,7 @@ func genRules(arch arch) { if t[1] == "nil" { fmt.Fprintf(w, "b.SetControl(nil)\n") } else { - fmt.Fprintf(w, "b.SetControl(%s)\n", genResult0(w, arch, t[1], new(int), false, false)) + fmt.Fprintf(w, "b.SetControl(%s)\n", genResult0(w, arch, t[1], new(int), false, false, rule.loc)) } if len(newsuccs) < len(succs) { fmt.Fprintf(w, "b.Succs = b.Succs[:%d]\n", len(newsuccs)) @@ -289,7 +295,7 @@ func genRules(arch arch) { } if *genLog { - fmt.Fprintf(w, "fmt.Println(\"rewrite %s.rules:%d\")\n", arch.name, rule.lineno) + fmt.Fprintf(w, "fmt.Println(\"rewrite %s\")\n", rule.loc) } fmt.Fprintf(w, "return true\n") @@ -315,11 +321,11 @@ func genRules(arch arch) { } } -func genMatch(w io.Writer, arch arch, match string) { - genMatch0(w, arch, match, "v", map[string]struct{}{}, true) +func genMatch(w io.Writer, arch arch, match string, loc string) { + genMatch0(w, arch, match, "v", map[string]struct{}{}, true, loc) } -func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, top bool) { +func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, top bool, loc string) { if match[0] != '(' || match[len(match)-1] != ')' { panic("non-compound expr in genMatch0: " + match) } @@ -328,6 +334,24 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t // contained in () or {}. s := split(match[1 : len(match)-1]) // remove parens, then split + // Find op record + var op opData + for _, x := range genericOps { + if x.name == s[0] { + op = x + break + } + } + for _, x := range arch.ops { + if x.name == s[0] { + op = x + break + } + } + if op.name == "" { + log.Fatalf("%s: unknown op %s", loc, s[0]) + } + // check op if !top { fmt.Fprintf(w, "if %s.Op != %s {\nbreak\n}\n", v, opName(s[0], arch)) @@ -354,6 +378,11 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t } } else if a[0] == '[' { // auxint restriction + switch op.aux { + case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32": + default: + log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux) + } x := a[1 : len(a)-1] // remove [] if !isVariable(x) { // code @@ -368,7 +397,12 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t } } } else if a[0] == '{' { - // auxint restriction + // aux restriction + switch op.aux { + case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32": + default: + log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux) + } x := a[1 : len(a)-1] // remove {} if !isVariable(x) { // code @@ -412,30 +446,18 @@ func genMatch0(w io.Writer, arch arch, match, v string, m map[string]struct{}, t argname = fmt.Sprintf("%s_%d", v, argnum) } fmt.Fprintf(w, "%s := %s.Args[%d]\n", argname, v, argnum) - genMatch0(w, arch, a, argname, m, false) + genMatch0(w, arch, a, argname, m, false, loc) argnum++ } } - - variableLength := false - for _, op := range genericOps { - if op.name == s[0] && op.argLength == -1 { - variableLength = true - break - } - } - for _, op := range arch.ops { - if op.name == s[0] && op.argLength == -1 { - variableLength = true - break - } - } - if variableLength { + if op.argLength == -1 { fmt.Fprintf(w, "if len(%s.Args) != %d {\nbreak\n}\n", v, argnum) + } else if int(op.argLength) != argnum { + log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum) } } -func genResult(w io.Writer, arch arch, result string) { +func genResult(w io.Writer, arch arch, result string, loc string) { move := false if result[0] == '@' { // parse @block directive @@ -444,9 +466,9 @@ func genResult(w io.Writer, arch arch, result string) { result = s[1] move = true } - genResult0(w, arch, result, new(int), true, move) + genResult0(w, arch, result, new(int), true, move, loc) } -func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move bool) string { +func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move bool, loc string) string { // TODO: when generating a constant result, use f.constVal to avoid // introducing copies just to clean them up again. if result[0] != '(' { @@ -464,6 +486,24 @@ func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move boo s := split(result[1 : len(result)-1]) // remove parens, then split + // Find op record + var op opData + for _, x := range genericOps { + if x.name == s[0] { + op = x + break + } + } + for _, x := range arch.ops { + if x.name == s[0] { + op = x + break + } + } + if op.name == "" { + log.Fatalf("%s: unknown op %s", loc, s[0]) + } + // Find the type of the variable. var opType string var typeOverride bool @@ -512,23 +552,38 @@ func genResult0(w io.Writer, arch arch, result string, alloc *int, top, move boo fmt.Fprintf(w, "v.AddArg(%s)\n", v) } } + argnum := 0 for _, a := range s[1:] { if a[0] == '<' { // type restriction, handled above } else if a[0] == '[' { // auxint restriction + switch op.aux { + case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "SymInt32": + default: + log.Fatalf("%s: op %s %s can't have auxint", loc, op.name, op.aux) + } x := a[1 : len(a)-1] // remove [] fmt.Fprintf(w, "%s.AuxInt = %s\n", v, x) } else if a[0] == '{' { // aux restriction + switch op.aux { + case "String", "Sym", "SymOff", "SymValAndOff", "SymInt32": + default: + log.Fatalf("%s: op %s %s can't have aux", loc, op.name, op.aux) + } x := a[1 : len(a)-1] // remove {} fmt.Fprintf(w, "%s.Aux = %s\n", v, x) } else { // regular argument (sexpr or variable) - x := genResult0(w, arch, a, alloc, false, move) + x := genResult0(w, arch, a, alloc, false, move, loc) fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x) + argnum++ } } + if op.argLength != -1 && int(op.argLength) != argnum { + log.Fatalf("%s: op %s should have %d args, has %d", loc, op.name, op.argLength, argnum) + } return v } diff --git a/src/cmd/compile/internal/ssa/id.go b/src/cmd/compile/internal/ssa/id.go index 367e687abf..725279e9fd 100644 --- a/src/cmd/compile/internal/ssa/id.go +++ b/src/cmd/compile/internal/ssa/id.go @@ -11,7 +11,7 @@ type idAlloc struct { last ID } -// get allocates an ID and returns it. +// get allocates an ID and returns it. IDs are always > 0. func (a *idAlloc) get() ID { x := a.last x++ diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go index 76251bdd14..2f52c4c6e6 100644 --- a/src/cmd/compile/internal/ssa/likelyadjust.go +++ b/src/cmd/compile/internal/ssa/likelyadjust.go @@ -11,11 +11,24 @@ import ( type loop struct { header *Block // The header node of this (reducible) loop outer *loop // loop containing this loop - // Next two fields not currently used, but cheap to maintain, - // and aid in computation of inner-ness and list of blocks. - nBlocks int32 // Number of blocks in this loop but not within inner loops - isInner bool // True if never discovered to contain a loop - containsCall bool // if any block in this loop or any loop it contains is a BlockCall or BlockDefer + + // By default, children exits, and depth are not initialized. + children []*loop // loops nested directly within this loop. Initialized by assembleChildren(). + exits []*Block // exits records blocks reached by exits from this loop. Initialized by findExits(). + + // Loops aren't that common, so rather than force regalloc to keep + // a map or slice for its data, just put it here. + spills []*Value + scratch int32 + + // Next three fields used by regalloc and/or + // aid in computation of inner-ness and list of blocks. + nBlocks int32 // Number of blocks in this loop but not within inner loops + depth int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths(). + isInner bool // True if never discovered to contain a loop + + // register allocation uses this. + containsCall bool // if any block in this loop or any loop it contains is a BlockCall or BlockDefer } // outerinner records that outer contains inner @@ -48,6 +61,9 @@ type loopnest struct { po []*Block sdom sparseTree loops []*loop + + // Record which of the lazily initialized fields have actually been initialized. + initializedChildren, initializedDepth, initializedExits bool } func min8(a, b int8) int8 { @@ -295,6 +311,35 @@ func loopnestfor(f *Func) *loopnest { innermost.nBlocks++ } } + + ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops} + + // Curious about the loopiness? "-d=ssa/likelyadjust/stats" + if f.pass.stats > 0 && len(loops) > 0 { + ln.assembleChildren() + ln.calculateDepths() + ln.findExits() + + // Note stats for non-innermost loops are slightly flawed because + // they don't account for inner loop exits that span multiple levels. + + for _, l := range loops { + x := len(l.exits) + cf := 0 + if !l.containsCall { + cf = 1 + } + inner := 0 + if l.isInner { + inner++ + } + + f.logStat("loopstats:", + l.depth, "depth", x, "exits", + inner, "is_inner", cf, "is_callfree", l.nBlocks, "n_blocks") + } + } + if f.pass.debug > 1 && len(loops) > 0 { fmt.Printf("Loops in %s:\n", f.Name) for _, l := range loops { @@ -314,5 +359,90 @@ func loopnestfor(f *Func) *loopnest { } fmt.Print("\n") } - return &loopnest{f, b2l, po, sdom, loops} + return ln +} + +// assembleChildren initializes the children field of each +// loop in the nest. Loop A is a child of loop B if A is +// directly nested within B (based on the reducible-loops +// detection above) +func (ln *loopnest) assembleChildren() { + if ln.initializedChildren { + return + } + for _, l := range ln.loops { + if l.outer != nil { + l.outer.children = append(l.outer.children, l) + } + } + ln.initializedChildren = true +} + +// calculateDepths uses the children field of loops +// to determine the nesting depth (outer=1) of each +// loop. This is helpful for finding exit edges. +func (ln *loopnest) calculateDepths() { + if ln.initializedDepth { + return + } + ln.assembleChildren() + for _, l := range ln.loops { + if l.outer == nil { + l.setDepth(1) + } + } + ln.initializedDepth = true +} + +// findExits uses loop depth information to find the +// exits from a loop. +func (ln *loopnest) findExits() { + if ln.initializedExits { + return + } + ln.calculateDepths() + b2l := ln.b2l + for _, b := range ln.po { + l := b2l[b.ID] + if l != nil && len(b.Succs) == 2 { + sl := b2l[b.Succs[0].ID] + if recordIfExit(l, sl, b.Succs[0]) { + continue + } + sl = b2l[b.Succs[1].ID] + if recordIfExit(l, sl, b.Succs[1]) { + continue + } + } + } + ln.initializedExits = true +} + +// recordIfExit checks sl (the loop containing b) to see if it +// is outside of loop l, and if so, records b as an exit block +// from l and returns true. +func recordIfExit(l, sl *loop, b *Block) bool { + if sl != l { + if sl == nil || sl.depth <= l.depth { + l.exits = append(l.exits, b) + return true + } + // sl is not nil, and is deeper than l + // it's possible for this to be a goto into an irreducible loop made from gotos. + for sl.depth > l.depth { + sl = sl.outer + } + if sl != l { + l.exits = append(l.exits, b) + return true + } + } + return false +} + +func (l *loop) setDepth(d int16) { + l.depth = d + for _, c := range l.children { + c.setDepth(d + 1) + } } diff --git a/src/cmd/compile/internal/ssa/loopbce.go b/src/cmd/compile/internal/ssa/loopbce.go index 17486ac49f..9bd2d3f0de 100644 --- a/src/cmd/compile/internal/ssa/loopbce.go +++ b/src/cmd/compile/internal/ssa/loopbce.go @@ -31,7 +31,7 @@ type indVar struct { // // // TODO: handle 32 bit operations -func findIndVar(f *Func, sdom sparseTree) []indVar { +func findIndVar(f *Func) []indVar { var iv []indVar nextb: @@ -110,7 +110,7 @@ nextb: // Second condition: b.Succs[entry] dominates nxt so that // nxt is computed when inc < max, meaning nxt <= max. - if !sdom.isAncestorEq(b.Succs[entry], nxt.Block) { + if !f.sdom.isAncestorEq(b.Succs[entry], nxt.Block) { // inc+ind can only be reached through the branch that enters the loop. continue } @@ -160,20 +160,18 @@ nextb: // loopbce performs loop based bounds check elimination. func loopbce(f *Func) { - idom := dominators(f) - sdom := newSparseTree(f, idom) - ivList := findIndVar(f, sdom) + ivList := findIndVar(f) m := make(map[*Value]indVar) for _, iv := range ivList { m[iv.ind] = iv } - removeBoundsChecks(f, sdom, m) + removeBoundsChecks(f, m) } // removesBoundsChecks remove IsInBounds and IsSliceInBounds based on the induction variables. -func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) { +func removeBoundsChecks(f *Func, m map[*Value]indVar) { for _, b := range f.Blocks { if b.Kind != BlockIf { continue @@ -202,7 +200,7 @@ func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) { goto skip1 } - if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) { + if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) { if v.Args[1] == iv.max { if f.pass.debug > 0 { f.Config.Warnl(b.Line, "Found redundant %s", v.Op) @@ -229,7 +227,7 @@ func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) { goto skip2 } - if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) { + if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) { if v.Args[1].Op == OpSliceCap && iv.max.Op == OpSliceLen && v.Args[1].Args[0] == iv.max.Args[0] { if f.pass.debug > 0 { f.Config.Warnl(b.Line, "Found redundant %s (len promoted to cap)", v.Op) @@ -240,6 +238,37 @@ func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) { } skip2: + // Simplify + // (IsInBounds (Add64 ind) (Const64 [c])) where 0 <= min <= ind < max <= (Const64 [c]) + // (IsSliceInBounds ind (Const64 [c])) where 0 <= min <= ind < max <= (Const64 [c]) + if v.Op == OpIsInBounds || v.Op == OpIsSliceInBounds { + ind, add := dropAdd64(v.Args[0]) + if ind.Op != OpPhi { + goto skip3 + } + + // ind + add >= 0 <-> min + add >= 0 <-> min >= -add + if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isGreaterOrEqualThan(iv.min, -add) { + if !v.Args[1].isGenericIntConst() || !iv.max.isGenericIntConst() { + goto skip3 + } + + limit := v.Args[1].AuxInt + if v.Op == OpIsSliceInBounds { + // If limit++ overflows signed integer then 0 <= max && max <= limit will be false. + limit++ + } + + if max := iv.max.AuxInt + add; 0 <= max && max <= limit { // handle overflow + if f.pass.debug > 0 { + f.Config.Warnl(b.Line, "Found redundant (%s ind %d), ind < %d", v.Op, v.Args[1].AuxInt, iv.max.AuxInt+add) + } + goto simplify + } + } + } + skip3: + continue simplify: @@ -258,3 +287,13 @@ func dropAdd64(v *Value) (*Value, int64) { } return v, 0 } + +func isGreaterOrEqualThan(v *Value, c int64) bool { + if c == 0 { + return isNonNegative(v) + } + if v.isGenericIntConst() && v.AuxInt >= c { + return true + } + return false +} diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go index 881e3b2eff..62eb0c8ea6 100644 --- a/src/cmd/compile/internal/ssa/nilcheck.go +++ b/src/cmd/compile/internal/ssa/nilcheck.go @@ -4,14 +4,12 @@ package ssa -// TODO: return value from newobject/newarray is non-nil. - // nilcheckelim eliminates unnecessary nil checks. func nilcheckelim(f *Func) { // A nil check is redundant if the same nil check was successful in a // dominating block. The efficacy of this pass depends heavily on the // efficacy of the cse pass. - idom := dominators(f) + idom := f.idom domTree := make([][]*Block, f.NumBlocks()) // Create a block ID -> [dominees] mapping diff --git a/src/cmd/compile/internal/ssa/nilcheck_test.go b/src/cmd/compile/internal/ssa/nilcheck_test.go index d1f38b6951..af6cbe864a 100644 --- a/src/cmd/compile/internal/ssa/nilcheck_test.go +++ b/src/cmd/compile/internal/ssa/nilcheck_test.go @@ -49,6 +49,7 @@ func benchmarkNilCheckDeep(b *testing.B, depth int) { b.ReportAllocs() for i := 0; i < b.N; i++ { + domTree(fun.f) nilcheckelim(fun.f) } } @@ -83,6 +84,7 @@ func TestNilcheckSimple(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -120,6 +122,7 @@ func TestNilcheckDomOrder(t *testing.T) { Goto("exit"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -153,6 +156,7 @@ func TestNilcheckAddr(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -187,6 +191,7 @@ func TestNilcheckAddPtr(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -231,6 +236,7 @@ func TestNilcheckPhi(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -272,6 +278,7 @@ func TestNilcheckKeepRemove(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -319,6 +326,7 @@ func TestNilcheckInFalseBranch(t *testing.T) { Exit("mem"))) CheckFunc(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -370,6 +378,7 @@ func TestNilcheckUser(t *testing.T) { CheckFunc(fun.f) // we need the opt here to rewrite the user nilcheck opt(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check @@ -409,11 +418,12 @@ func TestNilcheckBug(t *testing.T) { Goto("exit")), Bloc("exit", Valu("phi", OpPhi, TypeMem, 0, nil, "mem", "store"), - Exit("mem"))) + Exit("phi"))) CheckFunc(fun.f) // we need the opt here to rewrite the user nilcheck opt(fun.f) + domTree(fun.f) nilcheckelim(fun.f) // clean up the removed nil check diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go index d10ea230ff..cadbc7cd7a 100644 --- a/src/cmd/compile/internal/ssa/op.go +++ b/src/cmd/compile/internal/ssa/op.go @@ -26,7 +26,7 @@ type opInfo struct { generic bool // this is a generic (arch-independent) opcode rematerializeable bool // this op is rematerializeable commutative bool // this operation is commutative (e.g. addition) - resultInArg0 bool // prefer v and v.Args[0] to be allocated to the same register + resultInArg0 bool // v and v.Args[0] must be allocated to the same register } type inputInfo struct { @@ -49,9 +49,10 @@ const ( auxInt16 // auxInt is a 16-bit integer auxInt32 // auxInt is a 32-bit integer auxInt64 // auxInt is a 64-bit integer + auxInt128 // auxInt represents a 128-bit integer. Always 0. auxFloat32 // auxInt is a float32 (encoded with math.Float64bits) auxFloat64 // auxInt is a float64 (encoded with math.Float64bits) - auxString // auxInt is a string + auxString // aux is a string auxSym // aux is a symbol auxSymOff // aux is a symbol, auxInt is an offset auxSymValAndOff // aux is a symbol, auxInt is a ValAndOff diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5465d7f5ed..9ab9be769c 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -120,28 +120,16 @@ const ( OpAMD64MOVSDstoreidx8 OpAMD64ADDQ OpAMD64ADDL - OpAMD64ADDW - OpAMD64ADDB OpAMD64ADDQconst OpAMD64ADDLconst - OpAMD64ADDWconst - OpAMD64ADDBconst OpAMD64SUBQ OpAMD64SUBL - OpAMD64SUBW - OpAMD64SUBB OpAMD64SUBQconst OpAMD64SUBLconst - OpAMD64SUBWconst - OpAMD64SUBBconst OpAMD64MULQ OpAMD64MULL - OpAMD64MULW - OpAMD64MULB OpAMD64MULQconst OpAMD64MULLconst - OpAMD64MULWconst - OpAMD64MULBconst OpAMD64HMULQ OpAMD64HMULL OpAMD64HMULW @@ -165,28 +153,16 @@ const ( OpAMD64MODWU OpAMD64ANDQ OpAMD64ANDL - OpAMD64ANDW - OpAMD64ANDB OpAMD64ANDQconst OpAMD64ANDLconst - OpAMD64ANDWconst - OpAMD64ANDBconst OpAMD64ORQ OpAMD64ORL - OpAMD64ORW - OpAMD64ORB OpAMD64ORQconst OpAMD64ORLconst - OpAMD64ORWconst - OpAMD64ORBconst OpAMD64XORQ OpAMD64XORL - OpAMD64XORW - OpAMD64XORB OpAMD64XORQconst OpAMD64XORLconst - OpAMD64XORWconst - OpAMD64XORBconst OpAMD64CMPQ OpAMD64CMPL OpAMD64CMPW @@ -207,12 +183,8 @@ const ( OpAMD64TESTBconst OpAMD64SHLQ OpAMD64SHLL - OpAMD64SHLW - OpAMD64SHLB OpAMD64SHLQconst OpAMD64SHLLconst - OpAMD64SHLWconst - OpAMD64SHLBconst OpAMD64SHRQ OpAMD64SHRL OpAMD64SHRW @@ -235,12 +207,8 @@ const ( OpAMD64ROLBconst OpAMD64NEGQ OpAMD64NEGL - OpAMD64NEGW - OpAMD64NEGB OpAMD64NOTQ OpAMD64NOTL - OpAMD64NOTW - OpAMD64NOTB OpAMD64BSFQ OpAMD64BSFL OpAMD64BSFW @@ -280,8 +248,6 @@ const ( OpAMD64MOVWQZX OpAMD64MOVLQSX OpAMD64MOVLQZX - OpAMD64MOVBconst - OpAMD64MOVWconst OpAMD64MOVLconst OpAMD64MOVQconst OpAMD64CVTTSD2SL @@ -537,6 +503,10 @@ const ( OpGeq64U OpGeq32F OpGeq64F + OpAndB + OpOrB + OpEqB + OpNeqB OpNot OpNeg8 OpNeg16 @@ -971,81 +941,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "ADDQ", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AADDQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ADDL", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AADDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ADDW", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AADDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ADDB", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AADDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ADDQconst", - auxType: auxInt64, - argLen: 1, - resultInArg0: true, - asm: x86.AADDQ, + name: "ADDQ", + argLen: 2, + commutative: true, + asm: x86.AADDQ, reg: regInfo{ inputs: []inputInfo{ + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS @@ -1055,13 +957,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "ADDLconst", - auxType: auxInt32, - argLen: 1, - resultInArg0: true, - asm: x86.AADDL, + name: "ADDL", + argLen: 2, + commutative: true, + asm: x86.AADDL, reg: regInfo{ inputs: []inputInfo{ + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS @@ -1071,11 +973,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "ADDWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.AADDL, + name: "ADDQconst", + auxType: auxInt64, + argLen: 1, + asm: x86.AADDQ, reg: regInfo{ inputs: []inputInfo{ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 @@ -1087,11 +988,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "ADDBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.AADDL, + name: "ADDLconst", + auxType: auxInt32, + argLen: 1, + asm: x86.AADDL, reg: regInfo{ inputs: []inputInfo{ {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 @@ -1109,8 +1009,8 @@ var opcodeTable = [...]opInfo{ asm: x86.ASUBQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1125,40 +1025,8 @@ var opcodeTable = [...]opInfo{ asm: x86.ASUBL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SUBW", - argLen: 2, - resultInArg0: true, - asm: x86.ASUBL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SUBB", - argLen: 2, - resultInArg0: true, - asm: x86.ASUBL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1174,7 +1042,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASUBQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1190,39 +1058,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASUBL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SUBWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.ASUBL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SUBBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.ASUBL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1238,8 +1074,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AIMULQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1255,42 +1091,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AIMULL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "MULW", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AIMULW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "MULB", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AIMULW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1306,7 +1108,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AIMULQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1322,39 +1124,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AIMULL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "MULWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.AIMULW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "MULBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.AIMULW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1489,8 +1259,8 @@ var opcodeTable = [...]opInfo{ resultInArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1686,8 +1456,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AANDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1703,42 +1473,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AANDL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ANDW", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AANDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ANDB", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AANDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1754,7 +1490,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AANDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1770,39 +1506,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AANDL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ANDWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.AANDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ANDBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.AANDL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1818,8 +1522,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1835,42 +1539,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AORL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ORW", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ORB", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1886,7 +1556,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1902,39 +1572,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AORL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ORWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.AORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "ORBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.AORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1950,8 +1588,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AXORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -1967,42 +1605,8 @@ var opcodeTable = [...]opInfo{ asm: x86.AXORL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "XORW", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AXORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "XORB", - argLen: 2, - commutative: true, - resultInArg0: true, - asm: x86.AXORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2018,7 +1622,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AXORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2034,39 +1638,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AXORL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "XORWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.AXORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "XORBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.AXORL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2334,11 +1906,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2350,43 +1922,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SHLW", - argLen: 2, - resultInArg0: true, - asm: x86.ASHLL, - reg: regInfo{ - inputs: []inputInfo{ - {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SHLB", - argLen: 2, - resultInArg0: true, - asm: x86.ASHLL, - reg: regInfo{ - inputs: []inputInfo{ - {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2398,7 +1938,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHLQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2414,39 +1954,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHLL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SHLWconst", - auxType: auxInt16, - argLen: 1, - resultInArg0: true, - asm: x86.ASHLL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "SHLBconst", - auxType: auxInt8, - argLen: 1, - resultInArg0: true, - asm: x86.ASHLL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2462,11 +1970,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2478,11 +1986,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2494,11 +2002,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2510,11 +2018,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2526,7 +2034,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHRQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2542,7 +2050,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHRL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2558,7 +2066,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHRW, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2574,7 +2082,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASHRB, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2590,11 +2098,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2606,11 +2114,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2622,11 +2130,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2638,11 +2146,11 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 2}, // CX - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ - 65517, // AX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, }, }, @@ -2654,7 +2162,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASARQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2670,7 +2178,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASARL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2686,7 +2194,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASARW, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2702,7 +2210,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ASARB, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2718,7 +2226,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AROLQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2734,7 +2242,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AROLL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2750,7 +2258,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AROLW, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2766,7 +2274,7 @@ var opcodeTable = [...]opInfo{ asm: x86.AROLB, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2781,7 +2289,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ANEGQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2796,37 +2304,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ANEGL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "NEGW", - argLen: 1, - resultInArg0: true, - asm: x86.ANEGL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "NEGB", - argLen: 1, - resultInArg0: true, - asm: x86.ANEGL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2841,7 +2319,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ANOTQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2856,37 +2334,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ANOTL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "NOTW", - argLen: 1, - resultInArg0: true, - asm: x86.ANOTL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - clobbers: 8589934592, // FLAGS - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "NOTB", - argLen: 1, - resultInArg0: true, - asm: x86.ANOTL, - reg: regInfo{ - inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2900,7 +2348,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSFQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2914,7 +2362,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSFL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2928,7 +2376,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSFW, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2942,7 +2390,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSRQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2956,7 +2404,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSRL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2970,7 +2418,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSRW, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -2987,7 +2435,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3004,7 +2452,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3021,7 +2469,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3038,7 +2486,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3055,7 +2503,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3072,7 +2520,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 8589934592}, // FLAGS - {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934593, // AX FLAGS outputs: []regMask{ @@ -3087,7 +2535,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSWAPQ, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -3102,7 +2550,7 @@ var opcodeTable = [...]opInfo{ asm: x86.ABSWAPL, reg: regInfo{ inputs: []inputInfo{ - {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 }, clobbers: 8589934592, // FLAGS outputs: []regMask{ @@ -3438,30 +2886,6 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "MOVBconst", - auxType: auxInt8, - argLen: 0, - rematerializeable: true, - asm: x86.AMOVB, - reg: regInfo{ - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { - name: "MOVWconst", - auxType: auxInt16, - argLen: 0, - rematerializeable: true, - asm: x86.AMOVW, - reg: regInfo{ - outputs: []regMask{ - 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 - }, - }, - }, - { name: "MOVLconst", auxType: auxInt32, argLen: 0, @@ -4211,6 +3635,7 @@ var opcodeTable = [...]opInfo{ }, { name: "MOVOconst", + auxType: auxInt128, argLen: 0, rematerializeable: true, reg: regInfo{ @@ -4430,9 +3855,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "MOVWload", - argLen: 2, - asm: arm.AMOVW, + name: "MOVWload", + auxType: auxSymOff, + argLen: 2, + asm: arm.AMOVW, reg: regInfo{ inputs: []inputInfo{ {0, 31}, // R0 R1 R2 R3 SP @@ -4443,9 +3869,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "MOVWstore", - argLen: 3, - asm: arm.AMOVW, + name: "MOVWstore", + auxType: auxSymOff, + argLen: 3, + asm: arm.AMOVW, reg: regInfo{ inputs: []inputInfo{ {0, 31}, // R0 R1 R2 R3 SP @@ -4463,7 +3890,7 @@ var opcodeTable = [...]opInfo{ }, { name: "LessThan", - argLen: 2, + argLen: 1, reg: regInfo{ inputs: []inputInfo{ {0, 32}, // FLAGS @@ -5354,6 +4781,26 @@ var opcodeTable = [...]opInfo{ generic: true, }, { + name: "AndB", + argLen: 2, + generic: true, + }, + { + name: "OrB", + argLen: 2, + generic: true, + }, + { + name: "EqB", + argLen: 2, + generic: true, + }, + { + name: "NeqB", + argLen: 2, + generic: true, + }, + { name: "Not", argLen: 1, generic: true, diff --git a/src/cmd/compile/internal/ssa/phielim.go b/src/cmd/compile/internal/ssa/phielim.go index ce3b5a199a..77013c6481 100644 --- a/src/cmd/compile/internal/ssa/phielim.go +++ b/src/cmd/compile/internal/ssa/phielim.go @@ -40,11 +40,7 @@ func phielimValue(v *Value) bool { // are not v itself, then the phi must remain. // Otherwise, we can replace it with a copy. var w *Value - for i, x := range v.Args { - if b := v.Block.Preds[i]; b.Kind == BlockFirst && b.Succs[1] == v.Block { - // This branch is never taken so we can just eliminate it. - continue - } + for _, x := range v.Args { if x == v { continue } diff --git a/src/cmd/compile/internal/ssa/phiopt.go b/src/cmd/compile/internal/ssa/phiopt.go index 2d0a45733a..3b6728ca86 100644 --- a/src/cmd/compile/internal/ssa/phiopt.go +++ b/src/cmd/compile/internal/ssa/phiopt.go @@ -26,6 +26,7 @@ package ssa func phiopt(f *Func) { for _, b := range f.Blocks { if len(b.Preds) != 2 || len(b.Values) == 0 { + // TODO: handle more than 2 predecessors, e.g. a || b || c. continue } @@ -45,44 +46,67 @@ func phiopt(f *Func) { } // b0 is the if block giving the boolean value. - var reverse bool + // reverse is the predecessor from which the truth value comes. + var reverse int if b0.Succs[0] == pb0 && b0.Succs[1] == pb1 { - reverse = false + reverse = 0 } else if b0.Succs[0] == pb1 && b0.Succs[1] == pb0 { - reverse = true + reverse = 1 } else { b.Fatalf("invalid predecessors\n") } for _, v := range b.Values { - if v.Op != OpPhi || !v.Type.IsBoolean() || v.Args[0].Op != OpConstBool || v.Args[1].Op != OpConstBool { + if v.Op != OpPhi || !v.Type.IsBoolean() { continue } - ok, isCopy := false, false - if v.Args[0].AuxInt == 1 && v.Args[1].AuxInt == 0 { - ok, isCopy = true, !reverse - } else if v.Args[0].AuxInt == 0 && v.Args[1].AuxInt == 1 { - ok, isCopy = true, reverse + // Replaces + // if a { x = true } else { x = false } with x = a + // and + // if a { x = false } else { x = true } with x = !a + if v.Args[0].Op == OpConstBool && v.Args[1].Op == OpConstBool { + if v.Args[reverse].AuxInt != v.Args[1-reverse].AuxInt { + ops := [2]Op{OpNot, OpCopy} + v.reset(ops[v.Args[reverse].AuxInt]) + v.AddArg(b0.Control) + if f.pass.debug > 0 { + f.Config.Warnl(b.Line, "converted OpPhi to %v", v.Op) + } + continue + } } - // (Phi (ConstBool [x]) (ConstBool [x])) is already handled by opt / phielim. - - if ok && isCopy { - if f.pass.debug > 0 { - f.Config.Warnl(b.Line, "converted OpPhi to OpCopy") + // Replaces + // if a { x = true } else { x = value } with x = a || value. + // Requires that value dominates x, meaning that regardless of a, + // value is always computed. This guarantees that the side effects + // of value are not seen if a is false. + if v.Args[reverse].Op == OpConstBool && v.Args[reverse].AuxInt == 1 { + if tmp := v.Args[1-reverse]; f.sdom.isAncestorEq(tmp.Block, b) { + v.reset(OpOrB) + v.SetArgs2(b0.Control, tmp) + if f.pass.debug > 0 { + f.Config.Warnl(b.Line, "converted OpPhi to %v", v.Op) + } + continue } - v.reset(OpCopy) - v.AddArg(b0.Control) - continue } - if ok && !isCopy { - if f.pass.debug > 0 { - f.Config.Warnl(b.Line, "converted OpPhi to OpNot") + + // Replaces + // if a { x = value } else { x = false } with x = a && value. + // Requires that value dominates x, meaning that regardless of a, + // value is always computed. This guarantees that the side effects + // of value are not seen if a is false. + if v.Args[1-reverse].Op == OpConstBool && v.Args[1-reverse].AuxInt == 0 { + if tmp := v.Args[reverse]; f.sdom.isAncestorEq(tmp.Block, b) { + v.reset(OpAndB) + v.SetArgs2(b0.Control, tmp) + if f.pass.debug > 0 { + f.Config.Warnl(b.Line, "converted OpPhi to %v", v.Op) + } + continue } - v.reset(OpNot) - v.AddArg(b0.Control) - continue } } } diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index a12a996263..f4a10b508a 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -445,9 +445,6 @@ var ( // else branch of the first comparison is executed, we already know that i < len(a). // The code for the second panic can be removed. func prove(f *Func) { - idom := dominators(f) - sdom := newSparseTree(f, idom) - // current node state type walkState int const ( @@ -471,8 +468,8 @@ func prove(f *Func) { for len(work) > 0 { node := work[len(work)-1] work = work[:len(work)-1] - parent := idom[node.block.ID] - branch := getBranch(sdom, parent, node.block) + parent := f.idom[node.block.ID] + branch := getBranch(f.sdom, parent, node.block) switch node.state { case descend: @@ -491,7 +488,7 @@ func prove(f *Func) { block: node.block, state: simplify, }) - for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) { + for s := f.sdom.Child(node.block); s != nil; s = f.sdom.Sibling(s) { work = append(work, bp{ block: s, state: descend, diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 22b9d12c19..65c25dfc5a 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -91,6 +91,18 @@ // will have no use (so don't run deadcode after regalloc!). // TODO: maybe we should introduce these extra phis? +// Additional not-quite-SSA output occurs when spills are sunk out +// of loops to the targets of exit edges from the loop. Before sinking, +// there is one spill site (one StoreReg) targeting stack slot X, after +// sinking there may be multiple spill sites targeting stack slot X, +// with no phi functions at any join points reachable by the multiple +// spill sites. In addition, uses of the spill from copies of the original +// will not name the copy in their reference; instead they will name +// the original, though both will have the same spill location. The +// first sunk spill will be the original, but moved, to an exit block, +// thus ensuring that there is a definition somewhere corresponding to +// the original spill's uses. + package ssa import ( @@ -100,7 +112,8 @@ import ( ) const ( - logSpills = iota + moveSpills = iota + logSpills regDebug stackDebug ) @@ -176,10 +189,9 @@ type valState struct { uses *use // list of uses in this block spill *Value // spilled copy of the Value spillUsed bool - needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() - rematerializeable bool // cached value of v.rematerializeable() - desired register // register we want value to be in, if any - avoid regMask // registers to avoid if we can + spillUsedShuffle bool // true if used in shuffling, after ordinary uses + needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() + rematerializeable bool // cached value of v.rematerializeable() } type regState struct { @@ -191,10 +203,11 @@ type regState struct { type regAllocState struct { f *Func - registers []Register - numRegs register - SPReg register - SBReg register + registers []Register + numRegs register + SPReg register + SBReg register + allocatable regMask // for each block, its primary predecessor. // A predecessor of b is primary if it is the closest @@ -206,6 +219,11 @@ type regAllocState struct { // which are live at the end of b, together with a count of how many instructions // forward to the next use. live [][]liveInfo + // desired register assignments at the end of each block. + // Note that this is a static map computed before allocation occurs. Dynamic + // register desires (from partially completed allocations) will trump + // this information. + desired []desiredState // current state of each (preregalloc) Value values []valState @@ -243,6 +261,15 @@ type regAllocState struct { loopnest *loopnest } +type spillToSink struct { + spill *Value // Spill instruction to move (a StoreReg) + dests int32 // Bitmask indicating exit blocks from loop in which spill/val is defined. 1<<i set means val is live into loop.exitBlocks[i] +} + +func (sts *spillToSink) spilledValue() *Value { + return sts.spill.Args[0] +} + type endReg struct { r register v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) @@ -310,6 +337,7 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) { // If there is no unused register, a Value will be kicked out of // a register to make room. func (s *regAllocState) allocReg(v *Value, mask regMask) register { + mask &= s.allocatable mask &^= s.nospill if mask == 0 { s.f.Fatalf("no register available") @@ -317,20 +345,7 @@ func (s *regAllocState) allocReg(v *Value, mask regMask) register { // Pick an unused register if one is available. if mask&^s.used != 0 { - mask &^= s.used - - // Use desired register if we can. - d := s.values[v.ID].desired - if d != noRegister && mask>>d&1 != 0 { - mask = regMask(1) << d - } - - // Avoid avoidable registers if we can. - if mask&^s.values[v.ID].avoid != 0 { - mask &^= s.values[v.ID].avoid - } - - return pickReg(mask) + return pickReg(mask &^ s.used) } // Pick a value to spill. Spill the value with the @@ -340,10 +355,6 @@ func (s *regAllocState) allocReg(v *Value, mask regMask) register { // TODO: if a single value is in multiple registers, spill one of them // before spilling a value in just a single register. - // SP and SB are allocated specially. No regular value should - // be allocated to them. - mask &^= 1<<s.SPReg | 1<<s.SBReg - // Find a register to spill. We spill the register containing the value // whose next use is as far in the future as possible. // https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm @@ -389,14 +400,6 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line return s.regs[r].c } - if v.Op != OpSP { - mask &^= 1 << s.SPReg // dont' spill SP - } - if v.Op != OpSB { - mask &^= 1 << s.SBReg // don't spill SB - } - mask &^= s.reserved() - // Allocate a register. r := s.allocReg(v, mask) @@ -417,7 +420,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line // Load v from its spill location. case vi.spill != nil: if s.f.pass.debug > logSpills { - s.f.Config.Warnl(vi.spill.Line, "load spill") + s.f.Config.Warnl(vi.spill.Line, "load spill for %v from %v", v, vi.spill) } c = s.curBlock.NewValue1(line, OpLoadReg, v.Type, vi.spill) vi.spillUsed = true @@ -434,6 +437,7 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line } func (s *regAllocState) init(f *Func) { + s.f = f s.registers = f.Config.registers s.numRegs = register(len(s.registers)) if s.numRegs > noRegister || s.numRegs > register(unsafe.Sizeof(regMask(0))*8) { @@ -448,7 +452,17 @@ func (s *regAllocState) init(f *Func) { } } - s.f = f + // Figure out which registers we're allowed to use. + s.allocatable = regMask(1)<<s.numRegs - 1 + s.allocatable &^= 1 << s.SPReg + s.allocatable &^= 1 << s.SBReg + if obj.Framepointer_enabled != 0 { + s.allocatable &^= 1 << 5 // BP + } + if s.f.Config.ctxt.Flag_dynlink { + s.allocatable &^= 1 << 15 // R15 + } + s.regs = make([]regState, s.numRegs) s.values = make([]valState, f.NumValues()) s.orig = make([]*Value, f.NumValues()) @@ -457,7 +471,6 @@ func (s *regAllocState) init(f *Func) { if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() { s.values[v.ID].needReg = true s.values[v.ID].rematerializeable = v.rematerializeable() - s.values[v.ID].desired = noRegister s.orig[v.ID] = v } } @@ -527,6 +540,18 @@ func (s *regAllocState) advanceUses(v *Value) { } } +// liveAfterCurrentInstruction reports whether v is live after +// the current instruction is completed. v must be used by the +// current instruction. +func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { + u := s.values[v.ID].uses + d := u.dist + for u != nil && u.dist == d { + u = u.next + } + return u != nil && u.dist > d +} + // Sets the state of the registers to that encoded in regs. func (s *regAllocState) setState(regs []endReg) { s.freeRegs(s.used) @@ -541,9 +566,25 @@ func (s *regAllocState) compatRegs(t Type) regMask { if t.IsFloat() || t == TypeInt128 { m = 0xffff << 16 // X0-X15 } else { - m = 0xffef << 0 // AX-R15, except SP + m = 0xffff << 0 // AX-R15 + } + return m & s.allocatable +} + +// loopForBlock returns the loop containing block b, +// provided that the loop is "interesting" for purposes +// of improving register allocation (= is inner, and does +// not contain a call) +func (s *regAllocState) loopForBlock(b *Block) *loop { + loop := s.loopnest.b2l[b.ID] + + // Minor for-the-time-being optimization: nothing happens + // unless a loop is both inner and call-free, therefore + // don't bother with other loops. + if loop != nil && (loop.containsCall || !loop.isInner) { + loop = nil } - return m &^ s.reserved() + return loop } func (s *regAllocState) regalloc(f *Func) { @@ -554,12 +595,46 @@ func (s *regAllocState) regalloc(f *Func) { var phiRegs []register var args []*Value + // statistics + var nSpills int // # of spills remaining + var nSpillsInner int // # of spills remaining in inner loops + var nSpillsSunk int // # of sunk spills remaining + var nSpillsChanged int // # of sunk spills lost because of register use change + var nSpillsSunkUnused int // # of spills not sunk because they were removed completely + var nSpillsNotSunkLateUse int // # of spills not sunk because of very late use (in shuffle) + + // Data structure used for computing desired registers. + var desired desiredState + + // Desired registers for inputs & outputs for each instruction in the block. + type dentry struct { + out [4]register // desired output registers + in [3][4]register // desired input registers (for inputs 0,1, and 2) + } + var dinfo []dentry + if f.Entry != f.Blocks[0] { f.Fatalf("entry block must be first") } + // Get loop nest so that spills in inner loops can be + // tracked. When the last block of a loop is processed, + // attempt to move spills out of the loop. + s.loopnest.findExits() + + // Spills are moved from one block's slice of values to another's. + // This confuses register allocation if it occurs before it is + // complete, so candidates are recorded, then rechecked and + // moved after all allocation (register and stack) is complete. + // Because movement is only within a stack slot's lifetime, it + // is safe to do this. + var toSink []spillToSink + // Will be used to figure out live inputs to exit blocks of inner loops. + entryCandidates := newSparseMap(f.NumValues()) + for _, b := range f.Blocks { s.curBlock = b + loop := s.loopForBlock(b) // Initialize liveSet and uses fields for this block. // Walk backwards through the block doing liveness analysis. @@ -739,6 +814,11 @@ func (s *regAllocState) regalloc(f *Func) { s.setOrig(spill, v) s.values[v.ID].spill = spill s.values[v.ID].spillUsed = false + if loop != nil { + loop.spills = append(loop.spills, v) + nSpillsInner++ + } + nSpills++ } // Save the starting state for use by merge edges. @@ -765,26 +845,27 @@ func (s *regAllocState) regalloc(f *Func) { } } - // Compute preferred registers for each value using a backwards pass. + // Allocate space to record the desired registers for each value. + dinfo = dinfo[:0] + for i := 0; i < len(oldSched); i++ { + dinfo = append(dinfo, dentry{}) + } + + // Load static desired register info at the end of the block. + desired.copy(&s.desired[b.ID]) + + // Check actual assigned registers at the start of the next block(s). + // Dynamically assigned registers will trump the static + // desired registers computed during liveness analysis. // Note that we do this phase after startRegs is set above, so that // we get the right behavior for a block which branches to itself. for _, succ := range b.Succs { - // TODO: prioritize likely successor. + // TODO: prioritize likely successor? for _, x := range s.startRegs[succ.ID] { - v := s.orig[x.vid] - s.values[v.ID].desired = x.r - } - // Process phi ops in succ - i := -1 - for j, p := range succ.Preds { - if p == b { - i = j - break - } - } - if i == -1 { - s.f.Fatalf("can't find predecssor %s of %s\n", b, succ) + desired.add(x.vid, x.r) } + // Process phi ops in succ. + pidx := predIdx(succ, b) for _, v := range succ.Values { if v.Op != OpPhi { break @@ -792,47 +873,44 @@ func (s *regAllocState) regalloc(f *Func) { if !s.values[v.ID].needReg { continue } - r, ok := s.f.getHome(v.ID).(*Register) + rp, ok := s.f.getHome(v.ID).(*Register) if !ok { continue } - a := s.orig[v.Args[i].ID] - s.values[a.ID].desired = register(r.Num) + desired.add(v.Args[pidx].ID, register(rp.Num)) } } - - // Set avoid fields to help desired register availability. - liveSet.clear() - for _, e := range s.live[b.ID] { - liveSet.add(e.ID) - } - if v := b.Control; v != nil && s.values[v.ID].needReg { - liveSet.add(v.ID) - } + // Walk values backwards computing desired register info. + // See computeLive for more comments. for i := len(oldSched) - 1; i >= 0; i-- { v := oldSched[i] - liveSet.remove(v.ID) - - r := s.values[v.ID].desired - if r != noRegister { - m := regMask(1) << r - // All live values should avoid this register so - // it will be available at this point. - for _, w := range liveSet.contents() { - s.values[w].avoid |= m + prefs := desired.remove(v.ID) + desired.clobber(opcodeTable[v.Op].reg.clobbers) + for _, j := range opcodeTable[v.Op].reg.inputs { + if countRegs(j.regs) != 1 { + continue } + desired.clobber(j.regs) + desired.add(v.Args[j.idx].ID, pickReg(j.regs)) } - - for _, a := range v.Args { - if !s.values[a.ID].needReg { - continue + if opcodeTable[v.Op].resultInArg0 { + if opcodeTable[v.Op].commutative { + desired.addList(v.Args[1].ID, prefs) } - liveSet.add(a.ID) + desired.addList(v.Args[0].ID, prefs) + } + // Save desired registers for this value. + dinfo[i].out = prefs + for j, a := range v.Args { + if j >= len(dinfo[i].in) { + break + } + dinfo[i].in[j] = desired.get(a.ID) } } // Process all the non-phi values. - for _, v := range oldSched { + for idx, v := range oldSched { if s.f.pass.debug > regDebug { fmt.Printf(" processing %s\n", v.LongString()) } @@ -880,15 +958,132 @@ func (s *regAllocState) regalloc(f *Func) { continue } + if s.f.pass.debug > regDebug { + fmt.Printf("value %s\n", v.LongString()) + fmt.Printf(" out:") + for _, r := range dinfo[idx].out { + if r != noRegister { + fmt.Printf(" %s", s.registers[r].Name()) + } + } + fmt.Println() + for i := 0; i < len(v.Args) && i < 3; i++ { + fmt.Printf(" in%d:", i) + for _, r := range dinfo[idx].in[i] { + if r != noRegister { + fmt.Printf(" %s", s.registers[r].Name()) + } + } + fmt.Println() + } + } + // Move arguments to registers. Process in an ordering defined // by the register specification (most constrained first). args = append(args[:0], v.Args...) for _, i := range regspec.inputs { - if i.regs == flagRegMask { + mask := i.regs + if mask == flagRegMask { // TODO: remove flag input from regspec.inputs. continue } - args[i.idx] = s.allocValToReg(v.Args[i.idx], i.regs, true, v.Line) + if mask&s.values[args[i.idx].ID].regs == 0 { + // Need a new register for the input. + mask &= s.allocatable + mask &^= s.nospill + // Used desired register if available. + if i.idx < 3 { + for _, r := range dinfo[idx].in[i.idx] { + if r != noRegister && (mask&^s.used)>>r&1 != 0 { + // Desired register is allowed and unused. + mask = regMask(1) << r + break + } + } + } + // Avoid registers we're saving for other values. + if mask&^desired.avoid != 0 { + mask &^= desired.avoid + } + } + args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Line) + } + + // If the output clobbers the input register, make sure we have + // at least two copies of the input register so we don't + // have to reload the value from the spill location. + if opcodeTable[v.Op].resultInArg0 { + var m regMask + if !s.liveAfterCurrentInstruction(v.Args[0]) { + // arg0 is dead. We can clobber its register. + goto ok + } + if countRegs(s.values[v.Args[0].ID].regs) >= 2 { + // we have at least 2 copies of arg0. We can afford to clobber one. + goto ok + } + if opcodeTable[v.Op].commutative { + if !s.liveAfterCurrentInstruction(v.Args[1]) { + args[0], args[1] = args[1], args[0] + goto ok + } + if countRegs(s.values[v.Args[1].ID].regs) >= 2 { + args[0], args[1] = args[1], args[0] + goto ok + } + } + + // We can't overwrite arg0 (or arg1, if commutative). So we + // need to make a copy of an input so we have a register we can modify. + + // Possible new registers to copy into. + m = s.compatRegs(v.Args[0].Type) &^ s.used + if m == 0 { + // No free registers. In this case we'll just clobber + // an input and future uses of that input must use a restore. + // TODO(khr): We should really do this like allocReg does it, + // spilling the value with the most distant next use. + goto ok + } + + // Try to move an input to the desired output. + for _, r := range dinfo[idx].out { + if r != noRegister && m>>r&1 != 0 { + m = regMask(1) << r + args[0] = s.allocValToReg(v.Args[0], m, true, v.Line) + // Note: we update args[0] so the instruction will + // use the register copy we just made. + goto ok + } + } + // Try to copy input to its desired location & use its old + // location as the result register. + for _, r := range dinfo[idx].in[0] { + if r != noRegister && m>>r&1 != 0 { + m = regMask(1) << r + s.allocValToReg(v.Args[0], m, true, v.Line) + // Note: no update to args[0] so the instruction will + // use the original copy. + goto ok + } + } + if opcodeTable[v.Op].commutative { + for _, r := range dinfo[idx].in[1] { + if r != noRegister && m>>r&1 != 0 { + m = regMask(1) << r + s.allocValToReg(v.Args[1], m, true, v.Line) + args[0], args[1] = args[1], args[0] + goto ok + } + } + } + // Avoid future fixed uses if we can. + if m&^desired.avoid != 0 { + m &^= desired.avoid + } + // Save input 0 to a new register so we can clobber it. + s.allocValToReg(v.Args[0], m, true, v.Line) + ok: } // Now that all args are in regs, we're ready to issue the value itself. @@ -903,24 +1098,44 @@ func (s *regAllocState) regalloc(f *Func) { // Pick register for output. if s.values[v.ID].needReg { - mask := regspec.outputs[0] &^ s.reserved() - if mask>>33&1 != 0 { - s.f.Fatalf("bad mask %s\n", v.LongString()) - } + mask := regspec.outputs[0] & s.allocatable if opcodeTable[v.Op].resultInArg0 { - r := register(s.f.getHome(args[0].ID).(*Register).Num) - if (mask&^s.used)>>r&1 != 0 { + if !opcodeTable[v.Op].commutative { + // Output must use the same register as input 0. + r := register(s.f.getHome(args[0].ID).(*Register).Num) mask = regMask(1) << r - } - if opcodeTable[v.Op].commutative { - r := register(s.f.getHome(args[1].ID).(*Register).Num) - if (mask&^s.used)>>r&1 != 0 { - mask = regMask(1) << r + } else { + // Output must use the same register as input 0 or 1. + r0 := register(s.f.getHome(args[0].ID).(*Register).Num) + r1 := register(s.f.getHome(args[1].ID).(*Register).Num) + // Check r0 and r1 for desired output register. + found := false + for _, r := range dinfo[idx].out { + if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 { + mask = regMask(1) << r + found = true + if r == r1 { + args[0], args[1] = args[1], args[0] + } + break + } + } + if !found { + // Neither are desired, pick r0. + mask = regMask(1) << r0 } } - // TODO: enforce resultInArg0 always, instead of treating it - // as a hint. Then we don't need the special cases adding - // moves all throughout ssa.go:genValue. + } + for _, r := range dinfo[idx].out { + if r != noRegister && (mask&^s.used)>>r&1 != 0 { + // Desired register is allowed and unused. + mask = regMask(1) << r + break + } + } + // Avoid registers we're saving for other values. + if mask&^desired.avoid != 0 { + mask &^= desired.avoid } r := s.allocReg(v, mask) s.assignReg(r, v, v) @@ -947,6 +1162,11 @@ func (s *regAllocState) regalloc(f *Func) { s.setOrig(spill, v) s.values[v.ID].spill = spill s.values[v.ID].spillUsed = false + if loop != nil { + loop.spills = append(loop.spills, v) + nSpillsInner++ + } + nSpills++ } } @@ -993,6 +1213,9 @@ func (s *regAllocState) regalloc(f *Func) { } v := s.orig[vid] m := s.compatRegs(v.Type) &^ s.used + if m&^desired.avoid != 0 { + m &^= desired.avoid + } if m != 0 { s.allocValToReg(v, m, false, b.Line) } @@ -1056,6 +1279,69 @@ func (s *regAllocState) regalloc(f *Func) { s.values[e.ID].spillUsed = true } + // Keep track of values that are spilled in the loop, but whose spill + // is not used in the loop. It may be possible to move ("sink") the + // spill out of the loop into one or more exit blocks. + if loop != nil { + loop.scratch++ // increment count of blocks in this loop that have been processed + if loop.scratch == loop.nBlocks { // just processed last block of loop, if it is an inner loop. + // This check is redundant with code at the top of the loop. + // This is definitive; the one at the top of the loop is an optimization. + if loop.isInner && // Common case, easier, most likely to be profitable + !loop.containsCall && // Calls force spills, also lead to puzzling spill info. + len(loop.exits) <= 32 { // Almost no inner loops have more than 32 exits, + // and this allows use of a bitvector and a sparseMap. + + // TODO: exit calculation is messed up for non-inner loops + // because of multilevel exits that are not part of the "exit" + // count. + + // Compute the set of spill-movement candidates live at entry to exit blocks. + // isLoopSpillCandidate filters for + // (1) defined in appropriate loop + // (2) needs a register + // (3) spill not already used (in the loop) + // Condition (3) === "in a register at all loop exits" + + entryCandidates.clear() + + for whichExit, ss := range loop.exits { + // Start with live at end. + for _, li := range s.live[ss.ID] { + if s.isLoopSpillCandidate(loop, s.orig[li.ID]) { + entryCandidates.setBit(li.ID, uint(whichExit)) + } + } + // Control can also be live. + if ss.Control != nil && s.isLoopSpillCandidate(loop, ss.Control) { + entryCandidates.setBit(ss.Control.ID, uint(whichExit)) + } + // Walk backwards, filling in locally live values, removing those defined. + for i := len(ss.Values) - 1; i >= 0; i-- { + v := ss.Values[i] + entryCandidates.remove(v.ID) // Cannot be an issue, only keeps the sets smaller. + for _, a := range v.Args { + if s.isLoopSpillCandidate(loop, a) { + entryCandidates.setBit(a.ID, uint(whichExit)) + } + } + } + } + + for _, e := range loop.spills { + whichblocks := entryCandidates.get(e.ID) + oldSpill := s.values[e.ID].spill + if whichblocks != 0 && whichblocks != -1 { // -1 = not in map. + toSink = append(toSink, spillToSink{spill: oldSpill, dests: whichblocks}) + } + } + + } // loop is inner etc + loop.scratch = 0 // Don't leave a mess, just in case. + loop.spills = nil + } // if scratch == nBlocks + } // if loop is not nil + // Clear any final uses. // All that is left should be the pseudo-uses added for values which // are live at the end of b. @@ -1078,7 +1364,7 @@ func (s *regAllocState) regalloc(f *Func) { vi := s.values[i] if vi.spillUsed { if s.f.pass.debug > logSpills { - s.f.Config.Warnl(vi.spill.Line, "spilled value") + s.f.Config.Warnl(vi.spill.Line, "spilled value at %v remains", vi.spill) } continue } @@ -1087,9 +1373,16 @@ func (s *regAllocState) regalloc(f *Func) { // Constants, SP, SB, ... continue } + loop := s.loopForBlock(spill.Block) + if loop != nil { + nSpillsInner-- + } + spill.Args[0].Uses-- f.freeValue(spill) + nSpills-- } + for _, b := range f.Blocks { i := 0 for _, v := range b.Values { @@ -1104,12 +1397,161 @@ func (s *regAllocState) regalloc(f *Func) { // Not important now because this is the last phase that manipulates Values } + // Must clear these out before any potential recycling, though that's + // not currently implemented. + for i, ts := range toSink { + vsp := ts.spill + if vsp.Op == OpInvalid { // This spill was completely eliminated + toSink[i].spill = nil + } + } + // Anything that didn't get a register gets a stack location here. // (StoreReg, stack-based phis, inputs, ...) stacklive := stackalloc(s.f, s.spillLive) // Fix up all merge edges. s.shuffle(stacklive) + + // Insert moved spills (that have not been marked invalid above) + // at start of appropriate block and remove the originals from their + // location within loops. Notice that this can break SSA form; + // if a spill is sunk to multiple exits, there will be no phi for that + // spill at a join point downstream of those two exits, though the + // two spills will target the same stack slot. Notice also that this + // takes place after stack allocation, so the stack allocator does + // not need to process these malformed flow graphs. +sinking: + for _, ts := range toSink { + vsp := ts.spill + if vsp == nil { // This spill was completely eliminated + nSpillsSunkUnused++ + continue sinking + } + e := ts.spilledValue() + if s.values[e.ID].spillUsedShuffle { + nSpillsNotSunkLateUse++ + continue sinking + } + + // move spills to a better (outside of loop) block. + // This would be costly if it occurred very often, but it doesn't. + b := vsp.Block + loop := s.loopnest.b2l[b.ID] + dests := ts.dests + + // Pre-check to be sure that spilled value is still in expected register on all exits where live. + check_val_still_in_reg: + for i := uint(0); i < 32 && dests != 0; i++ { + + if dests&(1<<i) == 0 { + continue + } + dests ^= 1 << i + d := loop.exits[i] + if len(d.Preds) > 1 { + panic("Should be impossible given critical edges removed") + } + p := d.Preds[0] // block in loop exiting to d. + + endregs := s.endRegs[p.ID] + for _, regrec := range endregs { + if regrec.v == e && regrec.r != noRegister && regrec.c == e { // TODO: regrec.c != e implies different spill possible. + continue check_val_still_in_reg + } + } + // If here, the register assignment was lost down at least one exit and it can't be sunk + if s.f.pass.debug > moveSpills { + s.f.Config.Warnl(e.Line, "lost register assignment for spill %v in %v at exit %v to %v", + vsp, b, p, d) + } + nSpillsChanged++ + continue sinking + } + + nSpillsSunk++ + nSpillsInner-- + // don't update nSpills, since spill is only moved, and if it is duplicated, the spills-on-a-path is not increased. + + dests = ts.dests + + // remove vsp from b.Values + i := 0 + for _, w := range b.Values { + if vsp == w { + continue + } + b.Values[i] = w + i++ + } + b.Values = b.Values[:i] + + first := true + for i := uint(0); i < 32 && dests != 0; i++ { + + if dests&(1<<i) == 0 { + continue + } + + dests ^= 1 << i + + d := loop.exits[i] + vspnew := vsp // reuse original for first sunk spill, saves tracking down and renaming uses + if !first { // any sunk spills after first must make a copy + vspnew = d.NewValue1(e.Line, OpStoreReg, e.Type, e) + f.setHome(vspnew, f.getHome(vsp.ID)) // copy stack home + if s.f.pass.debug > moveSpills { + s.f.Config.Warnl(e.Line, "copied spill %v in %v for %v to %v in %v", + vsp, b, e, vspnew, d) + } + } else { + first = false + vspnew.Block = d + d.Values = append(d.Values, vspnew) + if s.f.pass.debug > moveSpills { + s.f.Config.Warnl(e.Line, "moved spill %v in %v for %v to %v in %v", + vsp, b, e, vspnew, d) + } + } + + // shuffle vspnew to the beginning of its block + copy(d.Values[1:], d.Values[0:len(d.Values)-1]) + d.Values[0] = vspnew + + } + } + + if f.pass.stats > 0 { + f.logStat("spills_info", + nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed") + } +} + +// isLoopSpillCandidate indicates whether the spill for v satisfies preliminary +// spill-sinking conditions just after the last block of loop has been processed. +// In particular: +// v needs a register. +// v's spill is not (YET) used. +// v's definition is within loop. +// The spill may be used in the future, either by an outright use +// in the code, or by shuffling code inserted after stack allocation. +// Outright uses cause sinking; shuffling (within the loop) inhibits it. +func (s *regAllocState) isLoopSpillCandidate(loop *loop, v *Value) bool { + return s.values[v.ID].needReg && !s.values[v.ID].spillUsed && s.loopnest.b2l[v.Block.ID] == loop +} + +// lateSpillUse notes a late (after stack allocation) use of the spill of value with ID vid. +// This will inhibit spill sinking. +func (s *regAllocState) lateSpillUse(vid ID) { + // TODO investigate why this is necessary. + // It appears that an outside-the-loop use of + // an otherwise sinkable spill makes the spill + // a candidate for shuffling, when it would not + // otherwise have been the case (spillUsed was not + // true when isLoopSpillCandidate was called, yet + // it was shuffled). Such shuffling cuts the amount + // of spill sinking by more than half (in make.bash) + s.values[vid].spillUsedShuffle = true } // shuffle fixes up all the merge edges (those going into blocks of indegree > 1). @@ -1284,6 +1726,7 @@ func (e *edgeState) process() { if _, isReg := loc.(*Register); isReg { c = e.p.NewValue1(c.Line, OpCopy, c.Type, c) } else { + e.s.lateSpillUse(vid) c = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) } e.set(r, vid, c, false) @@ -1372,6 +1815,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value) bool { } } else { if dstReg { + e.s.lateSpillUse(vid) x = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) } else { // mem->mem. Use temp register. @@ -1389,6 +1833,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value) bool { e.erase(loc) r := e.findRegFor(c.Type) + e.s.lateSpillUse(vid) t := e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) e.set(r, vid, t, false) x = e.p.NewValue1(c.Line, OpStoreReg, loc.(LocalSlot).Type, t) @@ -1554,24 +1999,36 @@ func (v *Value) rematerializeable() bool { } type liveInfo struct { - ID ID // ID of variable + ID ID // ID of value dist int32 // # of instructions before next use } +// dblock contains information about desired & avoid registers at the end of a block. +type dblock struct { + prefers []desiredStateEntry + avoid regMask +} + // computeLive computes a map from block ID to a list of value IDs live at the end // of that block. Together with the value ID is a count of how many instructions -// to the next use of that value. The resulting map is stored at s.live. +// to the next use of that value. The resulting map is stored in s.live. +// computeLive also computes the desired register information at the end of each block. +// This desired register information is stored in s.desired. // TODO: this could be quadratic if lots of variables are live across lots of // basic blocks. Figure out a way to make this function (or, more precisely, the user // of this function) require only linear size & time. func (s *regAllocState) computeLive() { f := s.f s.live = make([][]liveInfo, f.NumBlocks()) + s.desired = make([]desiredState, f.NumBlocks()) var phis []*Value live := newSparseMap(f.NumValues()) t := newSparseMap(f.NumValues()) + // Keep track of which value we want in each register. + var desired desiredState + // Instead of iterating over f.Blocks, iterate over their postordering. // Liveness information flows backward, so starting at the end // increases the probability that we will stabilize quickly. @@ -1594,7 +2051,7 @@ func (s *regAllocState) computeLive() { d := int32(len(b.Values)) if b.Kind == BlockCall || b.Kind == BlockDefer { // Because we keep no values in registers across a call, - // make every use past a call very far away. + // make every use past a call appear very far away. d += unlikelyDistance } for _, e := range s.live[b.ID] { @@ -1623,6 +2080,35 @@ func (s *regAllocState) computeLive() { } } } + // Propagate desired registers backwards. + desired.copy(&s.desired[b.ID]) + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + prefs := desired.remove(v.ID) + if v.Op == OpPhi { + // TODO: if v is a phi, save desired register for phi inputs. + // For now, we just drop it and don't propagate + // desired registers back though phi nodes. + continue + } + // Cancel desired registers if they get clobbered. + desired.clobber(opcodeTable[v.Op].reg.clobbers) + // Update desired registers if there are any fixed register inputs. + for _, j := range opcodeTable[v.Op].reg.inputs { + if countRegs(j.regs) != 1 { + continue + } + desired.clobber(j.regs) + desired.add(v.Args[j.idx].ID, pickReg(j.regs)) + } + // Set desired register of input 0 if this is a 2-operand instruction. + if opcodeTable[v.Op].resultInArg0 { + if opcodeTable[v.Op].commutative { + desired.addList(v.Args[1].ID, prefs) + } + desired.addList(v.Args[0].ID, prefs) + } + } // For each predecessor of b, expand its list of live-at-end values. // invariant: live contains the values live at the start of b (excluding phi inputs) @@ -1642,6 +2128,9 @@ func (s *regAllocState) computeLive() { } } + // Update any desired registers at the end of p. + s.desired[p.ID].merge(&desired) + // Start t off with the previously known live values at the end of p. t.clear() for _, e := range s.live[p.ID] { @@ -1662,7 +2151,7 @@ func (s *regAllocState) computeLive() { // simultaneously happening at the start of the block). for _, v := range phis { id := v.Args[i].ID - if s.values[id].needReg && !t.contains(id) || delta < t.get(id) { + if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) { update = true t.set(id, delta) } @@ -1694,20 +2183,152 @@ func (s *regAllocState) computeLive() { fmt.Printf(" %s:", b) for _, x := range s.live[b.ID] { fmt.Printf(" v%d", x.ID) + for _, e := range s.desired[b.ID].entries { + if e.ID != x.ID { + continue + } + fmt.Printf("[") + first := true + for _, r := range e.regs { + if r == noRegister { + continue + } + if !first { + fmt.Printf(",") + } + fmt.Print(s.registers[r].Name()) + first = false + } + fmt.Printf("]") + } } + fmt.Printf(" avoid=%x", int64(s.desired[b.ID].avoid)) fmt.Println() } } } -// reserved returns a mask of reserved registers. -func (s *regAllocState) reserved() regMask { - var m regMask - if obj.Framepointer_enabled != 0 { - m |= 1 << 5 // BP +// A desiredState represents desired register assignments. +type desiredState struct { + // Desired assignments will be small, so we just use a list + // of valueID+registers entries. + entries []desiredStateEntry + // Registers that other values want to be in. This value will + // contain at least the union of the regs fields of entries, but + // may contain additional entries for values that were once in + // this data structure but are no longer. + avoid regMask +} +type desiredStateEntry struct { + // (pre-regalloc) value + ID ID + // Registers it would like to be in, in priority order. + // Unused slots are filled with noRegister. + regs [4]register +} + +func (d *desiredState) clear() { + d.entries = d.entries[:0] + d.avoid = 0 +} + +// get returns a list of desired registers for value vid. +func (d *desiredState) get(vid ID) [4]register { + for _, e := range d.entries { + if e.ID == vid { + return e.regs + } } - if s.f.Config.ctxt.Flag_dynlink { - m |= 1 << 15 // R15 + return [4]register{noRegister, noRegister, noRegister, noRegister} +} + +// add records that we'd like value vid to be in register r. +func (d *desiredState) add(vid ID, r register) { + d.avoid |= regMask(1) << r + for i := range d.entries { + e := &d.entries[i] + if e.ID != vid { + continue + } + if e.regs[0] == r { + // Already known and highest priority + return + } + for j := 1; j < len(e.regs); j++ { + if e.regs[j] == r { + // Move from lower priority to top priority + copy(e.regs[1:], e.regs[:j]) + e.regs[0] = r + return + } + } + copy(e.regs[1:], e.regs[:]) + e.regs[0] = r + return + } + d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}}) +} + +func (d *desiredState) addList(vid ID, regs [4]register) { + // regs is in priority order, so iterate in reverse order. + for i := len(regs) - 1; i >= 0; i-- { + r := regs[i] + if r != noRegister { + d.add(vid, r) + } + } +} + +// clobber erases any desired registers in the set m. +func (d *desiredState) clobber(m regMask) { + for i := 0; i < len(d.entries); { + e := &d.entries[i] + j := 0 + for _, r := range e.regs { + if r != noRegister && m>>r&1 == 0 { + e.regs[j] = r + j++ + } + } + if j == 0 { + // No more desired registers for this value. + d.entries[i] = d.entries[len(d.entries)-1] + d.entries = d.entries[:len(d.entries)-1] + continue + } + for ; j < len(e.regs); j++ { + e.regs[j] = noRegister + } + i++ + } + d.avoid &^= m +} + +// copy copies a desired state from another desiredState x. +func (d *desiredState) copy(x *desiredState) { + d.entries = append(d.entries[:0], x.entries...) + d.avoid = x.avoid +} + +// remove removes the desired registers for vid and returns them. +func (d *desiredState) remove(vid ID) [4]register { + for i := range d.entries { + if d.entries[i].ID == vid { + regs := d.entries[i].regs + d.entries[i] = d.entries[len(d.entries)-1] + d.entries = d.entries[:len(d.entries)-1] + return regs + } + } + return [4]register{noRegister, noRegister, noRegister, noRegister} +} + +// merge merges another desired state x into d. +func (d *desiredState) merge(x *desiredState) { + d.avoid |= x.avoid + // There should only be a few desired registers, so + // linear insert is ok. + for _, e := range x.entries { + d.addList(e.ID, e.regs) } - return m } diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go index 6f3f690f1e..cf8f452d12 100644 --- a/src/cmd/compile/internal/ssa/regalloc_test.go +++ b/src/cmd/compile/internal/ssa/regalloc_test.go @@ -11,8 +11,8 @@ func TestLiveControlOps(t *testing.T) { f := Fun(c, "entry", Bloc("entry", Valu("mem", OpInitMem, TypeMem, 0, nil), - Valu("x", OpAMD64MOVBconst, TypeInt8, 1, nil), - Valu("y", OpAMD64MOVBconst, TypeInt8, 2, nil), + Valu("x", OpAMD64MOVLconst, TypeInt8, 1, nil), + Valu("y", OpAMD64MOVLconst, TypeInt8, 2, nil), Valu("a", OpAMD64TESTB, TypeFlags, 0, nil, "x", "y"), Valu("b", OpAMD64TESTB, TypeFlags, 0, nil, "y", "x"), Eq("a", "if", "exit"), diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index e0cb7f517b..e9b408a86c 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -40,9 +40,44 @@ func applyRewrite(f *Func, rb func(*Block) bool, rv func(*Value, *Config) bool) } curb = nil for _, v := range b.Values { - change = copyelimValue(v) || change change = phielimValue(v) || change + // Eliminate copy inputs. + // If any copy input becomes unused, mark it + // as invalid and discard its argument. Repeat + // recursively on the discarded argument. + // This phase helps remove phantom "dead copy" uses + // of a value so that a x.Uses==1 rule condition + // fires reliably. + for i, a := range v.Args { + if a.Op != OpCopy { + continue + } + x := a.Args[0] + // Rewriting can generate OpCopy loops. + // They are harmless (see removePredecessor), + // but take care to stop if we find a cycle. + slow := x // advances every other iteration + var advance bool + for x.Op == OpCopy { + x = x.Args[0] + if slow == x { + break + } + if advance { + slow = slow.Args[0] + } + advance = !advance + } + v.SetArg(i, x) + change = true + for a.Uses == 0 { + b := a.Args[0] + a.reset(OpInvalid) + a = b + } + } + // apply rewrite function curv = v if rv(v, config) { @@ -52,7 +87,28 @@ func applyRewrite(f *Func, rb func(*Block) bool, rv func(*Value, *Config) bool) } } if !change { - return + break + } + } + // remove clobbered values + for _, b := range f.Blocks { + j := 0 + for i, v := range b.Values { + if v.Op == OpInvalid { + f.freeValue(v) + continue + } + if i != j { + b.Values[j] = v + } + j++ + } + if j != len(b.Values) { + tail := b.Values[j:] + for j := range tail { + tail[j] = nil + } + b.Values = b.Values[:j] } } } @@ -311,3 +367,13 @@ found: } return nil // too far away } + +// clobber invalidates v. Returns true. +// clobber is used by rewrite rules to: +// A) make sure v is really dead and never used again. +// B) decrement use counts of v's args. +func clobber(v *Value) bool { + v.reset(OpInvalid) + // Note: leave v.Block intact. The Block field is used after clobber. + return true +} diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 11c2de391c..c26aeb0bd0 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -8,10 +8,6 @@ import "math" var _ = math.MinInt8 // in case not otherwise used func rewriteValueAMD64(v *Value, config *Config) bool { switch v.Op { - case OpAMD64ADDB: - return rewriteValueAMD64_OpAMD64ADDB(v, config) - case OpAMD64ADDBconst: - return rewriteValueAMD64_OpAMD64ADDBconst(v, config) case OpAMD64ADDL: return rewriteValueAMD64_OpAMD64ADDL(v, config) case OpAMD64ADDLconst: @@ -20,14 +16,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64ADDQ(v, config) case OpAMD64ADDQconst: return rewriteValueAMD64_OpAMD64ADDQconst(v, config) - case OpAMD64ADDW: - return rewriteValueAMD64_OpAMD64ADDW(v, config) - case OpAMD64ADDWconst: - return rewriteValueAMD64_OpAMD64ADDWconst(v, config) - case OpAMD64ANDB: - return rewriteValueAMD64_OpAMD64ANDB(v, config) - case OpAMD64ANDBconst: - return rewriteValueAMD64_OpAMD64ANDBconst(v, config) case OpAMD64ANDL: return rewriteValueAMD64_OpAMD64ANDL(v, config) case OpAMD64ANDLconst: @@ -36,10 +24,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64ANDQ(v, config) case OpAMD64ANDQconst: return rewriteValueAMD64_OpAMD64ANDQconst(v, config) - case OpAMD64ANDW: - return rewriteValueAMD64_OpAMD64ANDW(v, config) - case OpAMD64ANDWconst: - return rewriteValueAMD64_OpAMD64ANDWconst(v, config) case OpAdd16: return rewriteValueAMD64_OpAdd16(v, config) case OpAdd32: @@ -64,6 +48,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAnd64(v, config) case OpAnd8: return rewriteValueAMD64_OpAnd8(v, config) + case OpAndB: + return rewriteValueAMD64_OpAndB(v, config) case OpAvg64u: return rewriteValueAMD64_OpAvg64u(v, config) case OpBswap32: @@ -180,6 +166,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpEq64F(v, config) case OpEq8: return rewriteValueAMD64_OpEq8(v, config) + case OpEqB: + return rewriteValueAMD64_OpEqB(v, config) case OpEqPtr: return rewriteValueAMD64_OpEqPtr(v, config) case OpGeq16: @@ -458,10 +446,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64MOVWstoreidx1(v, config) case OpAMD64MOVWstoreidx2: return rewriteValueAMD64_OpAMD64MOVWstoreidx2(v, config) - case OpAMD64MULB: - return rewriteValueAMD64_OpAMD64MULB(v, config) - case OpAMD64MULBconst: - return rewriteValueAMD64_OpAMD64MULBconst(v, config) case OpAMD64MULL: return rewriteValueAMD64_OpAMD64MULL(v, config) case OpAMD64MULLconst: @@ -470,10 +454,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64MULQ(v, config) case OpAMD64MULQconst: return rewriteValueAMD64_OpAMD64MULQconst(v, config) - case OpAMD64MULW: - return rewriteValueAMD64_OpAMD64MULW(v, config) - case OpAMD64MULWconst: - return rewriteValueAMD64_OpAMD64MULWconst(v, config) case OpMod16: return rewriteValueAMD64_OpMod16(v, config) case OpMod16u: @@ -504,22 +484,14 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpMul64F(v, config) case OpMul8: return rewriteValueAMD64_OpMul8(v, config) - case OpAMD64NEGB: - return rewriteValueAMD64_OpAMD64NEGB(v, config) case OpAMD64NEGL: return rewriteValueAMD64_OpAMD64NEGL(v, config) case OpAMD64NEGQ: return rewriteValueAMD64_OpAMD64NEGQ(v, config) - case OpAMD64NEGW: - return rewriteValueAMD64_OpAMD64NEGW(v, config) - case OpAMD64NOTB: - return rewriteValueAMD64_OpAMD64NOTB(v, config) case OpAMD64NOTL: return rewriteValueAMD64_OpAMD64NOTL(v, config) case OpAMD64NOTQ: return rewriteValueAMD64_OpAMD64NOTQ(v, config) - case OpAMD64NOTW: - return rewriteValueAMD64_OpAMD64NOTW(v, config) case OpNeg16: return rewriteValueAMD64_OpNeg16(v, config) case OpNeg32: @@ -544,16 +516,14 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpNeq64F(v, config) case OpNeq8: return rewriteValueAMD64_OpNeq8(v, config) + case OpNeqB: + return rewriteValueAMD64_OpNeqB(v, config) case OpNeqPtr: return rewriteValueAMD64_OpNeqPtr(v, config) case OpNilCheck: return rewriteValueAMD64_OpNilCheck(v, config) case OpNot: return rewriteValueAMD64_OpNot(v, config) - case OpAMD64ORB: - return rewriteValueAMD64_OpAMD64ORB(v, config) - case OpAMD64ORBconst: - return rewriteValueAMD64_OpAMD64ORBconst(v, config) case OpAMD64ORL: return rewriteValueAMD64_OpAMD64ORL(v, config) case OpAMD64ORLconst: @@ -562,10 +532,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64ORQ(v, config) case OpAMD64ORQconst: return rewriteValueAMD64_OpAMD64ORQconst(v, config) - case OpAMD64ORW: - return rewriteValueAMD64_OpAMD64ORW(v, config) - case OpAMD64ORWconst: - return rewriteValueAMD64_OpAMD64ORWconst(v, config) case OpOffPtr: return rewriteValueAMD64_OpOffPtr(v, config) case OpOr16: @@ -576,6 +542,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpOr64(v, config) case OpOr8: return rewriteValueAMD64_OpOr8(v, config) + case OpOrB: + return rewriteValueAMD64_OpOrB(v, config) case OpRsh16Ux16: return rewriteValueAMD64_OpRsh16Ux16(v, config) case OpRsh16Ux32: @@ -680,14 +648,10 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64SETLE(v, config) case OpAMD64SETNE: return rewriteValueAMD64_OpAMD64SETNE(v, config) - case OpAMD64SHLB: - return rewriteValueAMD64_OpAMD64SHLB(v, config) case OpAMD64SHLL: return rewriteValueAMD64_OpAMD64SHLL(v, config) case OpAMD64SHLQ: return rewriteValueAMD64_OpAMD64SHLQ(v, config) - case OpAMD64SHLW: - return rewriteValueAMD64_OpAMD64SHLW(v, config) case OpAMD64SHRB: return rewriteValueAMD64_OpAMD64SHRB(v, config) case OpAMD64SHRL: @@ -696,10 +660,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64SHRQ(v, config) case OpAMD64SHRW: return rewriteValueAMD64_OpAMD64SHRW(v, config) - case OpAMD64SUBB: - return rewriteValueAMD64_OpAMD64SUBB(v, config) - case OpAMD64SUBBconst: - return rewriteValueAMD64_OpAMD64SUBBconst(v, config) case OpAMD64SUBL: return rewriteValueAMD64_OpAMD64SUBL(v, config) case OpAMD64SUBLconst: @@ -708,10 +668,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64SUBQ(v, config) case OpAMD64SUBQconst: return rewriteValueAMD64_OpAMD64SUBQconst(v, config) - case OpAMD64SUBW: - return rewriteValueAMD64_OpAMD64SUBW(v, config) - case OpAMD64SUBWconst: - return rewriteValueAMD64_OpAMD64SUBWconst(v, config) case OpSignExt16to32: return rewriteValueAMD64_OpSignExt16to32(v, config) case OpSignExt16to64: @@ -756,10 +712,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpTrunc64to32(v, config) case OpTrunc64to8: return rewriteValueAMD64_OpTrunc64to8(v, config) - case OpAMD64XORB: - return rewriteValueAMD64_OpAMD64XORB(v, config) - case OpAMD64XORBconst: - return rewriteValueAMD64_OpAMD64XORBconst(v, config) case OpAMD64XORL: return rewriteValueAMD64_OpAMD64XORL(v, config) case OpAMD64XORLconst: @@ -768,10 +720,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64XORQ(v, config) case OpAMD64XORQconst: return rewriteValueAMD64_OpAMD64XORQconst(v, config) - case OpAMD64XORW: - return rewriteValueAMD64_OpAMD64XORW(v, config) - case OpAMD64XORWconst: - return rewriteValueAMD64_OpAMD64XORWconst(v, config) case OpXor16: return rewriteValueAMD64_OpXor16(v, config) case OpXor32: @@ -797,105 +745,6 @@ func rewriteValueAMD64(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64ADDB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ADDB x (MOVBconst [c])) - // cond: - // result: (ADDBconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ADDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ADDB (MOVBconst [c]) x) - // cond: - // result: (ADDBconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ADDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ADDB x (NEGB y)) - // cond: - // result: (SUBB x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64NEGB { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SUBB) - v.AddArg(x) - v.AddArg(y) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ADDBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ADDBconst [c] x) - // cond: int8(c)==0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int8(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ADDBconst [c] (MOVBconst [d])) - // cond: - // result: (MOVBconst [int64(int8(c+d))]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = int64(int8(c + d)) - return true - } - // match: (ADDBconst [c] (ADDBconst [d] x)) - // cond: - // result: (ADDBconst [int64(int8(c+d))] x) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64ADDBconst { - break - } - d := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpAMD64ADDBconst) - v.AuxInt = int64(int8(c + d)) - v.AddArg(x) - return true - } - return false -} func rewriteValueAMD64_OpAMD64ADDL(v *Value, config *Config) bool { b := v.Block _ = b @@ -1418,244 +1267,6 @@ func rewriteValueAMD64_OpAMD64ADDQconst(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64ADDW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ADDW x (MOVWconst [c])) - // cond: - // result: (ADDWconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ADDWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ADDW (MOVWconst [c]) x) - // cond: - // result: (ADDWconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ADDWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ADDW x (NEGW y)) - // cond: - // result: (SUBW x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64NEGW { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SUBW) - v.AddArg(x) - v.AddArg(y) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ADDWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ADDWconst [c] x) - // cond: int16(c)==0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int16(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ADDWconst [c] (MOVWconst [d])) - // cond: - // result: (MOVWconst [int64(int16(c+d))]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = int64(int16(c + d)) - return true - } - // match: (ADDWconst [c] (ADDWconst [d] x)) - // cond: - // result: (ADDWconst [int64(int16(c+d))] x) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64ADDWconst { - break - } - d := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpAMD64ADDWconst) - v.AuxInt = int64(int16(c + d)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ANDB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ANDB x (MOVLconst [c])) - // cond: - // result: (ANDBconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVLconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ANDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ANDB (MOVLconst [c]) x) - // cond: - // result: (ANDBconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVLconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ANDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ANDB x (MOVBconst [c])) - // cond: - // result: (ANDBconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ANDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ANDB (MOVBconst [c]) x) - // cond: - // result: (ANDBconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ANDBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ANDB x x) - // cond: - // result: x - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ANDBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ANDBconst [c] (ANDBconst [d] x)) - // cond: - // result: (ANDBconst [c & d] x) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDBconst { - break - } - d := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpAMD64ANDBconst) - v.AuxInt = c & d - v.AddArg(x) - return true - } - // match: (ANDBconst [c] _) - // cond: int8(c)==0 - // result: (MOVBconst [0]) - for { - c := v.AuxInt - if !(int8(c) == 0) { - break - } - v.reset(OpAMD64MOVBconst) - v.AuxInt = 0 - return true - } - // match: (ANDBconst [c] x) - // cond: int8(c)==-1 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int8(c) == -1) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ANDBconst [c] (MOVBconst [d])) - // cond: - // result: (MOVBconst [c&d]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = c & d - return true - } - return false -} func rewriteValueAMD64_OpAMD64ANDL(v *Value, config *Config) bool { b := v.Block _ = b @@ -1838,180 +1449,77 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (ANDQconst [0] _) - // cond: - // result: (MOVQconst [0]) - for { - if v.AuxInt != 0 { - break - } - v.reset(OpAMD64MOVQconst) - v.AuxInt = 0 - return true - } - // match: (ANDQconst [-1] x) + // match: (ANDQconst [0xFF] x) // cond: - // result: x + // result: (MOVBQZX x) for { - if v.AuxInt != -1 { + if v.AuxInt != 0xFF { break } x := v.Args[0] - v.reset(OpCopy) - v.Type = x.Type + v.reset(OpAMD64MOVBQZX) v.AddArg(x) return true } - // match: (ANDQconst [c] (MOVQconst [d])) + // match: (ANDQconst [0xFFFF] x) // cond: - // result: (MOVQconst [c&d]) + // result: (MOVWQZX x) for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVQconst { + if v.AuxInt != 0xFFFF { break } - d := v_0.AuxInt - v.reset(OpAMD64MOVQconst) - v.AuxInt = c & d - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ANDW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ANDW x (MOVLconst [c])) - // cond: - // result: (ANDWconst [c] x) - for { x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVLconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ANDWconst) - v.AuxInt = c + v.reset(OpAMD64MOVWQZX) v.AddArg(x) return true } - // match: (ANDW (MOVLconst [c]) x) + // match: (ANDQconst [0xFFFFFFFF] x) // cond: - // result: (ANDWconst [c] x) + // result: (MOVLQZX x) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVLconst { + if v.AuxInt != 0xFFFFFFFF { break } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ANDWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ANDW x (MOVWconst [c])) - // cond: - // result: (ANDWconst [c] x) - for { x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ANDWconst) - v.AuxInt = c + v.reset(OpAMD64MOVLQZX) v.AddArg(x) return true } - // match: (ANDW (MOVWconst [c]) x) + // match: (ANDQconst [0] _) // cond: - // result: (ANDWconst [c] x) + // result: (MOVQconst [0]) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v.AuxInt != 0 { break } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ANDWconst) - v.AuxInt = c - v.AddArg(x) + v.reset(OpAMD64MOVQconst) + v.AuxInt = 0 return true } - // match: (ANDW x x) + // match: (ANDQconst [-1] x) // cond: // result: x for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ANDWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ANDWconst [c] (ANDWconst [d] x)) - // cond: - // result: (ANDWconst [c & d] x) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDWconst { - break - } - d := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpAMD64ANDWconst) - v.AuxInt = c & d - v.AddArg(x) - return true - } - // match: (ANDWconst [c] _) - // cond: int16(c)==0 - // result: (MOVWconst [0]) - for { - c := v.AuxInt - if !(int16(c) == 0) { + if v.AuxInt != -1 { break } - v.reset(OpAMD64MOVWconst) - v.AuxInt = 0 - return true - } - // match: (ANDWconst [c] x) - // cond: int16(c)==-1 - // result: x - for { - c := v.AuxInt x := v.Args[0] - if !(int16(c) == -1) { - break - } v.reset(OpCopy) v.Type = x.Type v.AddArg(x) return true } - // match: (ANDWconst [c] (MOVWconst [d])) + // match: (ANDQconst [c] (MOVQconst [d])) // cond: - // result: (MOVWconst [c&d]) + // result: (MOVQconst [c&d]) for { c := v.AuxInt v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVQconst { break } d := v_0.AuxInt - v.reset(OpAMD64MOVWconst) + v.reset(OpAMD64MOVQconst) v.AuxInt = c & d return true } @@ -2022,11 +1530,11 @@ func rewriteValueAMD64_OpAdd16(v *Value, config *Config) bool { _ = b // match: (Add16 x y) // cond: - // result: (ADDW x y) + // result: (ADDL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ADDW) + v.reset(OpAMD64ADDL) v.AddArg(x) v.AddArg(y) return true @@ -2102,11 +1610,11 @@ func rewriteValueAMD64_OpAdd8(v *Value, config *Config) bool { _ = b // match: (Add8 x y) // cond: - // result: (ADDB x y) + // result: (ADDL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ADDB) + v.reset(OpAMD64ADDL) v.AddArg(x) v.AddArg(y) return true @@ -2150,11 +1658,11 @@ func rewriteValueAMD64_OpAnd16(v *Value, config *Config) bool { _ = b // match: (And16 x y) // cond: - // result: (ANDW x y) + // result: (ANDL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) + v.reset(OpAMD64ANDL) v.AddArg(x) v.AddArg(y) return true @@ -2198,11 +1706,27 @@ func rewriteValueAMD64_OpAnd8(v *Value, config *Config) bool { _ = b // match: (And8 x y) // cond: - // result: (ANDB x y) + // result: (ANDL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) + v.reset(OpAMD64ANDL) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAndB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (AndB x y) + // cond: + // result: (ANDL x y) + for { + x := v.Args[0] + y := v.Args[1] + v.reset(OpAMD64ANDL) v.AddArg(x) v.AddArg(y) return true @@ -2529,27 +2053,27 @@ func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool { func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { b := v.Block _ = b - // match: (CMPB x (MOVBconst [c])) + // match: (CMPB x (MOVLconst [c])) // cond: - // result: (CMPBconst x [c]) + // result: (CMPBconst x [int64(int8(c))]) for { x := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { + if v_1.Op != OpAMD64MOVLconst { break } c := v_1.AuxInt v.reset(OpAMD64CMPBconst) v.AddArg(x) - v.AuxInt = c + v.AuxInt = int64(int8(c)) return true } - // match: (CMPB (MOVBconst [c]) x) + // match: (CMPB (MOVLconst [c]) x) // cond: - // result: (InvertFlags (CMPBconst x [c])) + // result: (InvertFlags (CMPBconst x [int64(int8(c))])) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } c := v_0.AuxInt @@ -2557,7 +2081,7 @@ func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { v.reset(OpAMD64InvertFlags) v0 := b.NewValue0(v.Line, OpAMD64CMPBconst, TypeFlags) v0.AddArg(x) - v0.AuxInt = c + v0.AuxInt = int64(int8(c)) v.AddArg(v0) return true } @@ -2566,12 +2090,12 @@ func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { b := v.Block _ = b - // match: (CMPBconst (MOVBconst [x]) [y]) + // match: (CMPBconst (MOVLconst [x]) [y]) // cond: int8(x)==int8(y) // result: (FlagEQ) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -2582,12 +2106,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagEQ) return true } - // match: (CMPBconst (MOVBconst [x]) [y]) + // match: (CMPBconst (MOVLconst [x]) [y]) // cond: int8(x)<int8(y) && uint8(x)<uint8(y) // result: (FlagLT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -2598,12 +2122,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_ULT) return true } - // match: (CMPBconst (MOVBconst [x]) [y]) + // match: (CMPBconst (MOVLconst [x]) [y]) // cond: int8(x)<int8(y) && uint8(x)>uint8(y) // result: (FlagLT_UGT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -2614,12 +2138,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_UGT) return true } - // match: (CMPBconst (MOVBconst [x]) [y]) + // match: (CMPBconst (MOVLconst [x]) [y]) // cond: int8(x)>int8(y) && uint8(x)<uint8(y) // result: (FlagGT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -2630,12 +2154,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_ULT) return true } - // match: (CMPBconst (MOVBconst [x]) [y]) + // match: (CMPBconst (MOVLconst [x]) [y]) // cond: int8(x)>int8(y) && uint8(x)>uint8(y) // result: (FlagGT_UGT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -2646,12 +2170,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_UGT) return true } - // match: (CMPBconst (ANDBconst _ [m]) [n]) + // match: (CMPBconst (ANDLconst _ [m]) [n]) // cond: 0 <= int8(m) && int8(m) < int8(n) // result: (FlagLT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDBconst { + if v_0.Op != OpAMD64ANDLconst { break } m := v_0.AuxInt @@ -2662,12 +2186,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_ULT) return true } - // match: (CMPBconst (ANDB x y) [0]) + // match: (CMPBconst (ANDL x y) [0]) // cond: // result: (TESTB x y) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDB { + if v_0.Op != OpAMD64ANDL { break } x := v_0.Args[0] @@ -2680,12 +2204,12 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { v.AddArg(y) return true } - // match: (CMPBconst (ANDBconst [c] x) [0]) + // match: (CMPBconst (ANDLconst [c] x) [0]) // cond: - // result: (TESTBconst [c] x) + // result: (TESTBconst [int64(int8(c))] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDBconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt @@ -2694,7 +2218,7 @@ func rewriteValueAMD64_OpAMD64CMPBconst(v *Value, config *Config) bool { break } v.reset(OpAMD64TESTBconst) - v.AuxInt = c + v.AuxInt = int64(int8(c)) v.AddArg(x) return true } @@ -2833,6 +2357,22 @@ func rewriteValueAMD64_OpAMD64CMPLconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_UGT) return true } + // match: (CMPLconst (SHRLconst _ [c]) [n]) + // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHRLconst { + break + } + c := v_0.AuxInt + n := v.AuxInt + if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } // match: (CMPLconst (ANDLconst _ [m]) [n]) // cond: 0 <= int32(m) && int32(m) < int32(n) // result: (FlagLT_ULT) @@ -3026,6 +2566,67 @@ func rewriteValueAMD64_OpAMD64CMPQconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_UGT) return true } + // match: (CMPQconst (MOVBQZX _) [c]) + // cond: 0xFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVBQZX { + break + } + c := v.AuxInt + if !(0xFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } + // match: (CMPQconst (MOVWQZX _) [c]) + // cond: 0xFFFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVWQZX { + break + } + c := v.AuxInt + if !(0xFFFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } + // match: (CMPQconst (MOVLQZX _) [c]) + // cond: 0xFFFFFFFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVLQZX { + break + } + c := v.AuxInt + if !(0xFFFFFFFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } + // match: (CMPQconst (SHRQconst _ [c]) [n]) + // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64SHRQconst { + break + } + c := v_0.AuxInt + n := v.AuxInt + if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } // match: (CMPQconst (ANDQconst _ [m]) [n]) // cond: 0 <= m && m < n // result: (FlagLT_ULT) @@ -3096,27 +2697,27 @@ func rewriteValueAMD64_OpAMD64CMPQconst(v *Value, config *Config) bool { func rewriteValueAMD64_OpAMD64CMPW(v *Value, config *Config) bool { b := v.Block _ = b - // match: (CMPW x (MOVWconst [c])) + // match: (CMPW x (MOVLconst [c])) // cond: - // result: (CMPWconst x [c]) + // result: (CMPWconst x [int64(int16(c))]) for { x := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { + if v_1.Op != OpAMD64MOVLconst { break } c := v_1.AuxInt v.reset(OpAMD64CMPWconst) v.AddArg(x) - v.AuxInt = c + v.AuxInt = int64(int16(c)) return true } - // match: (CMPW (MOVWconst [c]) x) + // match: (CMPW (MOVLconst [c]) x) // cond: - // result: (InvertFlags (CMPWconst x [c])) + // result: (InvertFlags (CMPWconst x [int64(int16(c))])) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } c := v_0.AuxInt @@ -3124,7 +2725,7 @@ func rewriteValueAMD64_OpAMD64CMPW(v *Value, config *Config) bool { v.reset(OpAMD64InvertFlags) v0 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags) v0.AddArg(x) - v0.AuxInt = c + v0.AuxInt = int64(int16(c)) v.AddArg(v0) return true } @@ -3133,12 +2734,12 @@ func rewriteValueAMD64_OpAMD64CMPW(v *Value, config *Config) bool { func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { b := v.Block _ = b - // match: (CMPWconst (MOVWconst [x]) [y]) + // match: (CMPWconst (MOVLconst [x]) [y]) // cond: int16(x)==int16(y) // result: (FlagEQ) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -3149,12 +2750,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagEQ) return true } - // match: (CMPWconst (MOVWconst [x]) [y]) + // match: (CMPWconst (MOVLconst [x]) [y]) // cond: int16(x)<int16(y) && uint16(x)<uint16(y) // result: (FlagLT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -3165,12 +2766,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_ULT) return true } - // match: (CMPWconst (MOVWconst [x]) [y]) + // match: (CMPWconst (MOVLconst [x]) [y]) // cond: int16(x)<int16(y) && uint16(x)>uint16(y) // result: (FlagLT_UGT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -3181,12 +2782,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_UGT) return true } - // match: (CMPWconst (MOVWconst [x]) [y]) + // match: (CMPWconst (MOVLconst [x]) [y]) // cond: int16(x)>int16(y) && uint16(x)<uint16(y) // result: (FlagGT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -3197,12 +2798,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_ULT) return true } - // match: (CMPWconst (MOVWconst [x]) [y]) + // match: (CMPWconst (MOVLconst [x]) [y]) // cond: int16(x)>int16(y) && uint16(x)>uint16(y) // result: (FlagGT_UGT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { + if v_0.Op != OpAMD64MOVLconst { break } x := v_0.AuxInt @@ -3213,12 +2814,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_UGT) return true } - // match: (CMPWconst (ANDWconst _ [m]) [n]) + // match: (CMPWconst (ANDLconst _ [m]) [n]) // cond: 0 <= int16(m) && int16(m) < int16(n) // result: (FlagLT_ULT) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDWconst { + if v_0.Op != OpAMD64ANDLconst { break } m := v_0.AuxInt @@ -3229,12 +2830,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagLT_ULT) return true } - // match: (CMPWconst (ANDW x y) [0]) + // match: (CMPWconst (ANDL x y) [0]) // cond: // result: (TESTW x y) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDW { + if v_0.Op != OpAMD64ANDL { break } x := v_0.Args[0] @@ -3247,12 +2848,12 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { v.AddArg(y) return true } - // match: (CMPWconst (ANDWconst [c] x) [0]) + // match: (CMPWconst (ANDLconst [c] x) [0]) // cond: - // result: (TESTWconst [c] x) + // result: (TESTWconst [int64(int16(c))] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDWconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt @@ -3261,7 +2862,7 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { break } v.reset(OpAMD64TESTWconst) - v.AuxInt = c + v.AuxInt = int64(int16(c)) v.AddArg(x) return true } @@ -3305,10 +2906,10 @@ func rewriteValueAMD64_OpCom16(v *Value, config *Config) bool { _ = b // match: (Com16 x) // cond: - // result: (NOTW x) + // result: (NOTL x) for { x := v.Args[0] - v.reset(OpAMD64NOTW) + v.reset(OpAMD64NOTL) v.AddArg(x) return true } @@ -3347,10 +2948,10 @@ func rewriteValueAMD64_OpCom8(v *Value, config *Config) bool { _ = b // match: (Com8 x) // cond: - // result: (NOTB x) + // result: (NOTL x) for { x := v.Args[0] - v.reset(OpAMD64NOTB) + v.reset(OpAMD64NOTL) v.AddArg(x) return true } @@ -3361,10 +2962,10 @@ func rewriteValueAMD64_OpConst16(v *Value, config *Config) bool { _ = b // match: (Const16 [val]) // cond: - // result: (MOVWconst [val]) + // result: (MOVLconst [val]) for { val := v.AuxInt - v.reset(OpAMD64MOVWconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = val return true } @@ -3431,10 +3032,10 @@ func rewriteValueAMD64_OpConst8(v *Value, config *Config) bool { _ = b // match: (Const8 [val]) // cond: - // result: (MOVBconst [val]) + // result: (MOVLconst [val]) for { val := v.AuxInt - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = val return true } @@ -3445,10 +3046,10 @@ func rewriteValueAMD64_OpConstBool(v *Value, config *Config) bool { _ = b // match: (ConstBool [b]) // cond: - // result: (MOVBconst [b]) + // result: (MOVLconst [b]) for { b := v.AuxInt - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = b return true } @@ -3983,6 +3584,24 @@ func rewriteValueAMD64_OpEq8(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpEqB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (EqB x y) + // cond: + // result: (SETEQ (CMPB x y)) + for { + x := v.Args[0] + y := v.Args[1] + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Line, OpAMD64CMPB, TypeFlags) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpEqPtr(v *Value, config *Config) bool { b := v.Block _ = b @@ -5842,20 +5461,20 @@ func rewriteValueAMD64_OpLsh16x16(v *Value, config *Config) bool { _ = b // match: (Lsh16x16 <t> x y) // cond: - // result: (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) - v0 := b.NewValue0(v.Line, OpAMD64SHLW, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 16 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -5867,20 +5486,20 @@ func rewriteValueAMD64_OpLsh16x32(v *Value, config *Config) bool { _ = b // match: (Lsh16x32 <t> x y) // cond: - // result: (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) - v0 := b.NewValue0(v.Line, OpAMD64SHLW, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 16 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -5892,20 +5511,20 @@ func rewriteValueAMD64_OpLsh16x64(v *Value, config *Config) bool { _ = b // match: (Lsh16x64 <t> x y) // cond: - // result: (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) - v0 := b.NewValue0(v.Line, OpAMD64SHLW, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 16 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -5917,20 +5536,20 @@ func rewriteValueAMD64_OpLsh16x8(v *Value, config *Config) bool { _ = b // match: (Lsh16x8 <t> x y) // cond: - // result: (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) - v0 := b.NewValue0(v.Line, OpAMD64SHLW, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPBconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 16 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -6142,20 +5761,20 @@ func rewriteValueAMD64_OpLsh8x16(v *Value, config *Config) bool { _ = b // match: (Lsh8x16 <t> x y) // cond: - // result: (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) - v0 := b.NewValue0(v.Line, OpAMD64SHLB, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 8 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -6167,20 +5786,20 @@ func rewriteValueAMD64_OpLsh8x32(v *Value, config *Config) bool { _ = b // match: (Lsh8x32 <t> x y) // cond: - // result: (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) - v0 := b.NewValue0(v.Line, OpAMD64SHLB, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 8 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -6192,20 +5811,20 @@ func rewriteValueAMD64_OpLsh8x64(v *Value, config *Config) bool { _ = b // match: (Lsh8x64 <t> x y) // cond: - // result: (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) - v0 := b.NewValue0(v.Line, OpAMD64SHLB, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 8 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -6217,20 +5836,20 @@ func rewriteValueAMD64_OpLsh8x8(v *Value, config *Config) bool { _ = b // match: (Lsh8x8 <t> x y) // cond: - // result: (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) + // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) - v0 := b.NewValue0(v.Line, OpAMD64SHLB, t) + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Line, OpAMD64SHLL, t) v0.AddArg(x) v0.AddArg(y) v.AddArg(v0) v1 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, t) v2 := b.NewValue0(v.Line, OpAMD64CMPBconst, TypeFlags) v2.AddArg(y) - v2.AuxInt = 8 + v2.AuxInt = 32 v1.AddArg(v2) v.AddArg(v1) return true @@ -6241,7 +5860,7 @@ func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -6252,7 +5871,7 @@ func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -6265,12 +5884,12 @@ func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (MOVBQSX (ANDBconst [c] x)) + // match: (MOVBQSX (ANDLconst [c] x)) // cond: c & 0x80 == 0 - // result: (ANDQconst [c & 0x7f] x) + // result: (ANDLconst [c & 0x7f] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDBconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt @@ -6278,7 +5897,7 @@ func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value, config *Config) bool { if !(c&0x80 == 0) { break } - v.reset(OpAMD64ANDQconst) + v.reset(OpAMD64ANDLconst) v.AuxInt = c & 0x7f v.AddArg(x) return true @@ -6318,7 +5937,7 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -6329,7 +5948,7 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -6343,7 +5962,7 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value, config *Config) bool { return true } // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) for { x := v.Args[0] @@ -6355,7 +5974,7 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value, config *Config) bool { ptr := x.Args[0] idx := x.Args[1] mem := x.Args[2] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -6369,17 +5988,17 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (MOVBQZX (ANDBconst [c] x)) + // match: (MOVBQZX (ANDLconst [c] x)) // cond: - // result: (ANDQconst [c & 0xff] x) + // result: (ANDLconst [c & 0xff] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDBconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt x := v_0.Args[0] - v.reset(OpAMD64ANDQconst) + v.reset(OpAMD64ANDLconst) v.AuxInt = c & 0xff v.AddArg(x) return true @@ -6630,7 +6249,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value, config *Config) bool { v.AddArg(mem) return true } - // match: (MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) + // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) // cond: validOff(off) // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) for { @@ -6638,7 +6257,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value, config *Config) bool { sym := v.Aux ptr := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { + if v_1.Op != OpAMD64MOVLconst { break } c := v_1.AuxInt @@ -6733,6 +6352,97 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + if v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-8 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value, config *Config) bool { @@ -6832,6 +6542,35 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVBstoreconst { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v *Value, config *Config) bool { @@ -6881,6 +6620,40 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v *Value, config *Config) bool v.AddArg(mem) return true } + // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + i := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVBstoreconstidx1 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if i != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(i) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value, config *Config) bool { @@ -6934,13 +6707,114 @@ func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + if v_2.AuxInt != 8 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-8 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -6951,7 +6825,7 @@ func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -6966,7 +6840,7 @@ func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value, config *Config) bool { } // match: (MOVLQSX (ANDLconst [c] x)) // cond: c & 0x80000000 == 0 - // result: (ANDQconst [c & 0x7fffffff] x) + // result: (ANDLconst [c & 0x7fffffff] x) for { v_0 := v.Args[0] if v_0.Op != OpAMD64ANDLconst { @@ -6977,7 +6851,7 @@ func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value, config *Config) bool { if !(c&0x80000000 == 0) { break } - v.reset(OpAMD64ANDQconst) + v.reset(OpAMD64ANDLconst) v.AuxInt = c & 0x7fffffff v.AddArg(x) return true @@ -7017,7 +6891,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -7028,7 +6902,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -7042,7 +6916,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { return true } // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) for { x := v.Args[0] @@ -7054,7 +6928,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { ptr := x.Args[0] idx := x.Args[1] mem := x.Args[2] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -7069,7 +6943,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { return true } // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) for { x := v.Args[0] @@ -7081,7 +6955,7 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { ptr := x.Args[0] idx := x.Args[1] mem := x.Args[2] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -7096,8 +6970,8 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { return true } // match: (MOVLQZX (ANDLconst [c] x)) - // cond: c & 0x80000000 == 0 - // result: (ANDQconst [c & 0x7fffffff] x) + // cond: + // result: (ANDLconst [c] x) for { v_0 := v.Args[0] if v_0.Op != OpAMD64ANDLconst { @@ -7105,11 +6979,8 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value, config *Config) bool { } c := v_0.AuxInt x := v_0.Args[0] - if !(c&0x80000000 == 0) { - break - } - v.reset(OpAMD64ANDQconst) - v.AuxInt = c & 0x7fffffff + v.reset(OpAMD64ANDLconst) + v.AuxInt = c v.AddArg(x) return true } @@ -7589,6 +7460,97 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + if v_1.AuxInt != 32 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVLstore { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVLstore { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value, config *Config) bool { @@ -7714,6 +7676,38 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVLstoreconst { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = ValAndOff(a).Off() + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64MOVQconst, config.fe.TypeUInt64()) + v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 + v.AddArg(v0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value, config *Config) bool { @@ -7787,6 +7781,43 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value, config *Config) bool v.AddArg(mem) return true } + // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + i := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVLstoreconstidx1 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if i != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = ValAndOff(a).Off() + v.Aux = s + v.AddArg(p) + v.AddArg(i) + v0 := b.NewValue0(v.Line, OpAMD64MOVQconst, config.fe.TypeUInt64()) + v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 + v.AddArg(v0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value, config *Config) bool { @@ -7836,6 +7867,46 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value, config *Config) bool v.AddArg(mem) return true } + // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + i := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVLstoreconstidx4 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if i != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = ValAndOff(a).Off() + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, i.Type) + v0.AuxInt = 2 + v0.AddArg(i) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, config.fe.TypeUInt64()) + v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 + v.AddArg(v1) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value, config *Config) bool { @@ -7915,6 +7986,107 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstoreidx1 [i-4] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + if v_2.AuxInt != 32 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVLstoreidx1 { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVLstoreidx1 { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value, config *Config) bool { @@ -7968,6 +8140,113 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + if v_2.AuxInt != 32 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVLstoreidx4 { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, idx.Type) + v0.AuxInt = 2 + v0.AddArg(idx) + v.AddArg(v0) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVLstoreidx4 { + break + } + if x.AuxInt != i-4 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-32 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstoreidx1) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, idx.Type) + v0.AuxInt = 2 + v0.AddArg(idx) + v.AddArg(v0) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVOload(v *Value, config *Config) bool { @@ -9835,7 +10114,7 @@ func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -9846,7 +10125,7 @@ func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -9859,12 +10138,12 @@ func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (MOVWQSX (ANDWconst [c] x)) + // match: (MOVWQSX (ANDLconst [c] x)) // cond: c & 0x8000 == 0 - // result: (ANDQconst [c & 0x7fff] x) + // result: (ANDLconst [c & 0x7fff] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDWconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt @@ -9872,7 +10151,7 @@ func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value, config *Config) bool { if !(c&0x8000 == 0) { break } - v.reset(OpAMD64ANDQconst) + v.reset(OpAMD64ANDLconst) v.AuxInt = c & 0x7fff v.AddArg(x) return true @@ -9912,7 +10191,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) for { x := v.Args[0] @@ -9923,7 +10202,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { sym := x.Aux ptr := x.Args[0] mem := x.Args[1] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -9937,7 +10216,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { return true } // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) for { x := v.Args[0] @@ -9949,7 +10228,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { ptr := x.Args[0] idx := x.Args[1] mem := x.Args[2] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -9964,7 +10243,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { return true } // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 + // cond: x.Uses == 1 && clobber(x) // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) for { x := v.Args[0] @@ -9976,7 +10255,7 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { ptr := x.Args[0] idx := x.Args[1] mem := x.Args[2] - if !(x.Uses == 1) { + if !(x.Uses == 1 && clobber(x)) { break } b = x.Block @@ -9990,17 +10269,17 @@ func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (MOVWQZX (ANDWconst [c] x)) + // match: (MOVWQZX (ANDLconst [c] x)) // cond: - // result: (ANDQconst [c & 0xffff] x) + // result: (ANDLconst [c & 0xffff] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64ANDWconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt x := v_0.Args[0] - v.reset(OpAMD64ANDQconst) + v.reset(OpAMD64ANDLconst) v.AuxInt = c & 0xffff v.AddArg(x) return true @@ -10350,7 +10629,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value, config *Config) bool { v.AddArg(mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) + // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) // cond: validOff(off) // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) for { @@ -10358,7 +10637,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value, config *Config) bool { sym := v.Aux ptr := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { + if v_1.Op != OpAMD64MOVLconst { break } c := v_1.AuxInt @@ -10481,6 +10760,97 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + if v_1.AuxInt != 16 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVWstore { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SHRQconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v.Args[2] + if x.Op != OpAMD64MOVWstore { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-16 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value, config *Config) bool { @@ -10606,6 +10976,35 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + x := v.Args[1] + if x.Op != OpAMD64MOVWstoreconst { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + mem := x.Args[1] + if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value, config *Config) bool { @@ -10679,6 +11078,40 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value, config *Config) bool v.AddArg(mem) return true } + // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + i := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVWstoreconstidx1 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if i != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v.AddArg(i) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value, config *Config) bool { @@ -10728,6 +11161,43 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value, config *Config) bool v.AddArg(mem) return true } + // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem) + for { + c := v.AuxInt + s := v.Aux + p := v.Args[0] + i := v.Args[1] + x := v.Args[2] + if x.Op != OpAMD64MOVWstoreconstidx2 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if i != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, i.Type) + v0.AuxInt = 1 + v0.AddArg(i) + v.AddArg(v0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value, config *Config) bool { @@ -10807,6 +11277,107 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + if v_2.AuxInt != 16 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVWstoreidx1 { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVWstoreidx1 { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-16 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w0) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value, config *Config) bool { @@ -10860,58 +11431,111 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value, config *Config) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MULB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (MULB x (MOVBconst [c])) - // cond: - // result: (MULBconst [c] x) + // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { break } - c := v_1.AuxInt - v.reset(OpAMD64MULBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (MULB (MOVBconst [c]) x) - // cond: - // result: (MULBconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_2.AuxInt != 16 { break } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64MULBconst) - v.AuxInt = c - v.AddArg(x) + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVWstoreidx2 { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, idx.Type) + v0.AuxInt = 1 + v0.AddArg(idx) + v.AddArg(v0) + v.AddArg(w) + v.AddArg(mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MULBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (MULBconst [c] (MOVBconst [d])) - // cond: - // result: (MOVBconst [int64(int8(c*d))]) + // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem) for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + i := v.AuxInt + s := v.Aux + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != OpAMD64SHRQconst { break } - d := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = int64(int8(c * d)) + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != OpAMD64MOVWstoreidx2 { + break + } + if x.AuxInt != i-2 { + break + } + if x.Aux != s { + break + } + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != OpAMD64SHRQconst { + break + } + if w0.AuxInt != j-16 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg(p) + v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, idx.Type) + v0.AuxInt = 1 + v0.AddArg(idx) + v.AddArg(v0) + v.AddArg(w0) + v.AddArg(mem) return true } return false @@ -11382,60 +12006,6 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64MULW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (MULW x (MOVWconst [c])) - // cond: - // result: (MULWconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64MULWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (MULW (MOVWconst [c]) x) - // cond: - // result: (MULWconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64MULWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MULWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (MULWconst [c] (MOVWconst [d])) - // cond: - // result: (MOVWconst [int64(int16(c*d))]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = int64(int16(c * d)) - return true - } - return false -} func rewriteValueAMD64_OpMod16(v *Value, config *Config) bool { b := v.Block _ = b @@ -11931,11 +12501,11 @@ func rewriteValueAMD64_OpMul16(v *Value, config *Config) bool { _ = b // match: (Mul16 x y) // cond: - // result: (MULW x y) + // result: (MULL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64MULW) + v.reset(OpAMD64MULL) v.AddArg(x) v.AddArg(y) return true @@ -12011,35 +12581,17 @@ func rewriteValueAMD64_OpMul8(v *Value, config *Config) bool { _ = b // match: (Mul8 x y) // cond: - // result: (MULB x y) + // result: (MULL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64MULB) + v.reset(OpAMD64MULL) v.AddArg(x) v.AddArg(y) return true } return false } -func rewriteValueAMD64_OpAMD64NEGB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (NEGB (MOVBconst [c])) - // cond: - // result: (MOVBconst [int64(int8(-c))]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = int64(int8(-c)) - return true - } - return false -} func rewriteValueAMD64_OpAMD64NEGL(v *Value, config *Config) bool { b := v.Block _ = b @@ -12076,42 +12628,6 @@ func rewriteValueAMD64_OpAMD64NEGQ(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64NEGW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (NEGW (MOVWconst [c])) - // cond: - // result: (MOVWconst [int64(int16(-c))]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = int64(int16(-c)) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64NOTB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (NOTB (MOVBconst [c])) - // cond: - // result: (MOVBconst [^c]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = ^c - return true - } - return false -} func rewriteValueAMD64_OpAMD64NOTL(v *Value, config *Config) bool { b := v.Block _ = b @@ -12148,33 +12664,15 @@ func rewriteValueAMD64_OpAMD64NOTQ(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64NOTW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (NOTW (MOVWconst [c])) - // cond: - // result: (MOVWconst [^c]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = ^c - return true - } - return false -} func rewriteValueAMD64_OpNeg16(v *Value, config *Config) bool { b := v.Block _ = b // match: (Neg16 x) // cond: - // result: (NEGW x) + // result: (NEGL x) for { x := v.Args[0] - v.reset(OpAMD64NEGW) + v.reset(OpAMD64NEGL) v.AddArg(x) return true } @@ -12247,10 +12745,10 @@ func rewriteValueAMD64_OpNeg8(v *Value, config *Config) bool { _ = b // match: (Neg8 x) // cond: - // result: (NEGB x) + // result: (NEGL x) for { x := v.Args[0] - v.reset(OpAMD64NEGB) + v.reset(OpAMD64NEGL) v.AddArg(x) return true } @@ -12364,6 +12862,24 @@ func rewriteValueAMD64_OpNeq8(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpNeqB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (NeqB x y) + // cond: + // result: (SETNE (CMPB x y)) + for { + x := v.Args[0] + y := v.Args[1] + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Line, OpAMD64CMPB, TypeFlags) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} func rewriteValueAMD64_OpNeqPtr(v *Value, config *Config) bool { b := v.Block _ = b @@ -12403,50 +12919,50 @@ func rewriteValueAMD64_OpNot(v *Value, config *Config) bool { _ = b // match: (Not x) // cond: - // result: (XORBconst [1] x) + // result: (XORLconst [1] x) for { x := v.Args[0] - v.reset(OpAMD64XORBconst) + v.reset(OpAMD64XORLconst) v.AuxInt = 1 v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64ORB(v *Value, config *Config) bool { +func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { b := v.Block _ = b - // match: (ORB x (MOVBconst [c])) + // match: (ORL x (MOVLconst [c])) // cond: - // result: (ORBconst [c] x) + // result: (ORLconst [c] x) for { x := v.Args[0] v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { + if v_1.Op != OpAMD64MOVLconst { break } c := v_1.AuxInt - v.reset(OpAMD64ORBconst) + v.reset(OpAMD64ORLconst) v.AuxInt = c v.AddArg(x) return true } - // match: (ORB (MOVBconst [c]) x) + // match: (ORL (MOVLconst [c]) x) // cond: - // result: (ORBconst [c] x) + // result: (ORLconst [c] x) for { v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if v_0.Op != OpAMD64MOVLconst { break } c := v_0.AuxInt x := v.Args[1] - v.reset(OpAMD64ORBconst) + v.reset(OpAMD64ORLconst) v.AuxInt = c v.AddArg(x) return true } - // match: (ORB x x) + // match: (ORL x x) // cond: // result: x for { @@ -12459,112 +12975,67 @@ func rewriteValueAMD64_OpAMD64ORB(v *Value, config *Config) bool { v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64ORBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ORBconst [c] x) - // cond: int8(c)==0 - // result: x + // match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) for { - c := v.AuxInt - x := v.Args[0] - if !(int8(c) == 0) { + x0 := v.Args[0] + if x0.Op != OpAMD64MOVBload { break } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ORBconst [c] _) - // cond: int8(c)==-1 - // result: (MOVBconst [-1]) - for { - c := v.AuxInt - if !(int8(c) == -1) { + i := x0.AuxInt + s := x0.Aux + p := x0.Args[0] + mem := x0.Args[1] + s0 := v.Args[1] + if s0.Op != OpAMD64SHLLconst { break } - v.reset(OpAMD64MOVBconst) - v.AuxInt = -1 - return true - } - // match: (ORBconst [c] (MOVBconst [d])) - // cond: - // result: (MOVBconst [c|d]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { + if s0.AuxInt != 8 { break } - d := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = c | d - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ORL x (MOVLconst [c])) - // cond: - // result: (ORLconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVLconst { + x1 := s0.Args[0] + if x1.Op != OpAMD64MOVBload { break } - c := v_1.AuxInt - v.reset(OpAMD64ORLconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ORL (MOVLconst [c]) x) - // cond: - // result: (ORLconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVLconst { + if x1.AuxInt != i+1 { break } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ORLconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ORL x x) - // cond: - // result: x - for { - x := v.Args[0] - if x != v.Args[1] { + if x1.Aux != s { + break + } + if p != x1.Args[0] { + break + } + if mem != x1.Args[1] { break } + if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(v.Line, OpAMD64MOVWload, config.fe.TypeUInt16()) v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) + v.AddArg(v0) + v0.AuxInt = i + v0.Aux = s + v0.AddArg(p) + v0.AddArg(mem) return true } - // match: (ORL (ORL (ORL x0:(MOVBload [i] {s} p mem) (SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) (SHLLconst [16] x2:(MOVBload [i+2] {s} p mem))) (SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) - // cond: mergePoint(b,x0,x1,x2,x3) != nil + // match: (ORL o0:(ORL o1:(ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem))) s1:(SHLLconst [16] x2:(MOVBload [i+2] {s} p mem))) s2:(SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1) // result: @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64ORL { + o0 := v.Args[0] + if o0.Op != OpAMD64ORL { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64ORL { + o1 := o0.Args[0] + if o1.Op != OpAMD64ORL { break } - x0 := v_0_0.Args[0] + x0 := o1.Args[0] if x0.Op != OpAMD64MOVBload { break } @@ -12572,14 +13043,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { s := x0.Aux p := x0.Args[0] mem := x0.Args[1] - v_0_0_1 := v_0_0.Args[1] - if v_0_0_1.Op != OpAMD64SHLLconst { + s0 := o1.Args[1] + if s0.Op != OpAMD64SHLLconst { break } - if v_0_0_1.AuxInt != 8 { + if s0.AuxInt != 8 { break } - x1 := v_0_0_1.Args[0] + x1 := s0.Args[0] if x1.Op != OpAMD64MOVBload { break } @@ -12595,14 +13066,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x1.Args[1] { break } - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SHLLconst { + s1 := o0.Args[1] + if s1.Op != OpAMD64SHLLconst { break } - if v_0_1.AuxInt != 16 { + if s1.AuxInt != 16 { break } - x2 := v_0_1.Args[0] + x2 := s1.Args[0] if x2.Op != OpAMD64MOVBload { break } @@ -12618,14 +13089,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x2.Args[1] { break } - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLLconst { + s2 := v.Args[1] + if s2.Op != OpAMD64SHLLconst { break } - if v_1.AuxInt != 24 { + if s2.AuxInt != 24 { break } - x3 := v_1.Args[0] + x3 := s2.Args[0] if x3.Op != OpAMD64MOVBload { break } @@ -12641,7 +13112,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x3.Args[1] { break } - if !(mergePoint(b, x0, x1, x2, x3) != nil) { + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) { break } b = mergePoint(b, x0, x1, x2, x3) @@ -12654,19 +13125,72 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (ORL (ORL (ORL x0:(MOVBloadidx1 [i] {s} p idx mem) (SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) (SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) (SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) - // cond: mergePoint(b,x0,x1,x2,x3) != nil + // match: (ORL x0:(MOVBloadidx1 [i] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) + // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem) + for { + x0 := v.Args[0] + if x0.Op != OpAMD64MOVBloadidx1 { + break + } + i := x0.AuxInt + s := x0.Aux + p := x0.Args[0] + idx := x0.Args[1] + mem := x0.Args[2] + s0 := v.Args[1] + if s0.Op != OpAMD64SHLLconst { + break + } + if s0.AuxInt != 8 { + break + } + x1 := s0.Args[0] + if x1.Op != OpAMD64MOVBloadidx1 { + break + } + if x1.AuxInt != i+1 { + break + } + if x1.Aux != s { + break + } + if p != x1.Args[0] { + break + } + if idx != x1.Args[1] { + break + } + if mem != x1.Args[2] { + break + } + if !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)) { + break + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(v.Line, OpAMD64MOVWloadidx1, v.Type) + v.reset(OpCopy) + v.AddArg(v0) + v0.AuxInt = i + v0.Aux = s + v0.AddArg(p) + v0.AddArg(idx) + v0.AddArg(mem) + return true + } + // match: (ORL o0:(ORL o1:(ORL x0:(MOVBloadidx1 [i] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) s1:(SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) s2:(SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1) // result: @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64ORL { + o0 := v.Args[0] + if o0.Op != OpAMD64ORL { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64ORL { + o1 := o0.Args[0] + if o1.Op != OpAMD64ORL { break } - x0 := v_0_0.Args[0] + x0 := o1.Args[0] if x0.Op != OpAMD64MOVBloadidx1 { break } @@ -12675,14 +13199,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { p := x0.Args[0] idx := x0.Args[1] mem := x0.Args[2] - v_0_0_1 := v_0_0.Args[1] - if v_0_0_1.Op != OpAMD64SHLLconst { + s0 := o1.Args[1] + if s0.Op != OpAMD64SHLLconst { break } - if v_0_0_1.AuxInt != 8 { + if s0.AuxInt != 8 { break } - x1 := v_0_0_1.Args[0] + x1 := s0.Args[0] if x1.Op != OpAMD64MOVBloadidx1 { break } @@ -12701,14 +13225,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x1.Args[2] { break } - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SHLLconst { + s1 := o0.Args[1] + if s1.Op != OpAMD64SHLLconst { break } - if v_0_1.AuxInt != 16 { + if s1.AuxInt != 16 { break } - x2 := v_0_1.Args[0] + x2 := s1.Args[0] if x2.Op != OpAMD64MOVBloadidx1 { break } @@ -12727,14 +13251,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x2.Args[2] { break } - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLLconst { + s2 := v.Args[1] + if s2.Op != OpAMD64SHLLconst { break } - if v_1.AuxInt != 24 { + if s2.AuxInt != 24 { break } - x3 := v_1.Args[0] + x3 := s2.Args[0] if x3.Op != OpAMD64MOVBloadidx1 { break } @@ -12753,7 +13277,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool { if mem != x3.Args[2] { break } - if !(mergePoint(b, x0, x1, x2, x3) != nil) { + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(o0) && clobber(o1)) { break } b = mergePoint(b, x0, x1, x2, x3) @@ -12866,35 +13390,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ x0:(MOVBload [i] {s} p mem) (SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) (SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) (SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) (SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) (SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) (SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) (SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) - // cond: mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ x0:(MOVBload [i] {s} p mem) s0:(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64ORQ { + o0 := v.Args[0] + if o0.Op != OpAMD64ORQ { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64ORQ { + o1 := o0.Args[0] + if o1.Op != OpAMD64ORQ { break } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64ORQ { + o2 := o1.Args[0] + if o2.Op != OpAMD64ORQ { break } - v_0_0_0_0 := v_0_0_0.Args[0] - if v_0_0_0_0.Op != OpAMD64ORQ { + o3 := o2.Args[0] + if o3.Op != OpAMD64ORQ { break } - v_0_0_0_0_0 := v_0_0_0_0.Args[0] - if v_0_0_0_0_0.Op != OpAMD64ORQ { + o4 := o3.Args[0] + if o4.Op != OpAMD64ORQ { break } - v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0] - if v_0_0_0_0_0_0.Op != OpAMD64ORQ { + o5 := o4.Args[0] + if o5.Op != OpAMD64ORQ { break } - x0 := v_0_0_0_0_0_0.Args[0] + x0 := o5.Args[0] if x0.Op != OpAMD64MOVBload { break } @@ -12902,14 +13426,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { s := x0.Aux p := x0.Args[0] mem := x0.Args[1] - v_0_0_0_0_0_0_1 := v_0_0_0_0_0_0.Args[1] - if v_0_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + s0 := o5.Args[1] + if s0.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_0_0_1.AuxInt != 8 { + if s0.AuxInt != 8 { break } - x1 := v_0_0_0_0_0_0_1.Args[0] + x1 := s0.Args[0] if x1.Op != OpAMD64MOVBload { break } @@ -12925,14 +13449,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x1.Args[1] { break } - v_0_0_0_0_0_1 := v_0_0_0_0_0.Args[1] - if v_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + s1 := o4.Args[1] + if s1.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_0_1.AuxInt != 16 { + if s1.AuxInt != 16 { break } - x2 := v_0_0_0_0_0_1.Args[0] + x2 := s1.Args[0] if x2.Op != OpAMD64MOVBload { break } @@ -12948,14 +13472,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x2.Args[1] { break } - v_0_0_0_0_1 := v_0_0_0_0.Args[1] - if v_0_0_0_0_1.Op != OpAMD64SHLQconst { + s2 := o3.Args[1] + if s2.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_1.AuxInt != 24 { + if s2.AuxInt != 24 { break } - x3 := v_0_0_0_0_1.Args[0] + x3 := s2.Args[0] if x3.Op != OpAMD64MOVBload { break } @@ -12971,14 +13495,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x3.Args[1] { break } - v_0_0_0_1 := v_0_0_0.Args[1] - if v_0_0_0_1.Op != OpAMD64SHLQconst { + s3 := o2.Args[1] + if s3.Op != OpAMD64SHLQconst { break } - if v_0_0_0_1.AuxInt != 32 { + if s3.AuxInt != 32 { break } - x4 := v_0_0_0_1.Args[0] + x4 := s3.Args[0] if x4.Op != OpAMD64MOVBload { break } @@ -12994,14 +13518,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x4.Args[1] { break } - v_0_0_1 := v_0_0.Args[1] - if v_0_0_1.Op != OpAMD64SHLQconst { + s4 := o1.Args[1] + if s4.Op != OpAMD64SHLQconst { break } - if v_0_0_1.AuxInt != 40 { + if s4.AuxInt != 40 { break } - x5 := v_0_0_1.Args[0] + x5 := s4.Args[0] if x5.Op != OpAMD64MOVBload { break } @@ -13017,14 +13541,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x5.Args[1] { break } - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SHLQconst { + s5 := o0.Args[1] + if s5.Op != OpAMD64SHLQconst { break } - if v_0_1.AuxInt != 48 { + if s5.AuxInt != 48 { break } - x6 := v_0_1.Args[0] + x6 := s5.Args[0] if x6.Op != OpAMD64MOVBload { break } @@ -13040,14 +13564,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x6.Args[1] { break } - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLQconst { + s6 := v.Args[1] + if s6.Op != OpAMD64SHLQconst { break } - if v_1.AuxInt != 56 { + if s6.AuxInt != 56 { break } - x7 := v_1.Args[0] + x7 := s6.Args[0] if x7.Op != OpAMD64MOVBload { break } @@ -13063,7 +13587,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x7.Args[1] { break } - if !(mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) { + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) { break } b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) @@ -13076,35 +13600,35 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { v0.AddArg(mem) return true } - // match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ x0:(MOVBloadidx1 [i] {s} p idx mem) (SHLQconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) (SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) (SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) (SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem))) (SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem))) (SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem))) (SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) - // cond: mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil + // match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ x0:(MOVBloadidx1 [i] {s} p idx mem) s0:(SHLQconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) s1:(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem))) s2:(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) s3:(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem))) s4:(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem))) s5:(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem))) s6:(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem) for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64ORQ { + o0 := v.Args[0] + if o0.Op != OpAMD64ORQ { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64ORQ { + o1 := o0.Args[0] + if o1.Op != OpAMD64ORQ { break } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64ORQ { + o2 := o1.Args[0] + if o2.Op != OpAMD64ORQ { break } - v_0_0_0_0 := v_0_0_0.Args[0] - if v_0_0_0_0.Op != OpAMD64ORQ { + o3 := o2.Args[0] + if o3.Op != OpAMD64ORQ { break } - v_0_0_0_0_0 := v_0_0_0_0.Args[0] - if v_0_0_0_0_0.Op != OpAMD64ORQ { + o4 := o3.Args[0] + if o4.Op != OpAMD64ORQ { break } - v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0] - if v_0_0_0_0_0_0.Op != OpAMD64ORQ { + o5 := o4.Args[0] + if o5.Op != OpAMD64ORQ { break } - x0 := v_0_0_0_0_0_0.Args[0] + x0 := o5.Args[0] if x0.Op != OpAMD64MOVBloadidx1 { break } @@ -13113,14 +13637,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { p := x0.Args[0] idx := x0.Args[1] mem := x0.Args[2] - v_0_0_0_0_0_0_1 := v_0_0_0_0_0_0.Args[1] - if v_0_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + s0 := o5.Args[1] + if s0.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_0_0_1.AuxInt != 8 { + if s0.AuxInt != 8 { break } - x1 := v_0_0_0_0_0_0_1.Args[0] + x1 := s0.Args[0] if x1.Op != OpAMD64MOVBloadidx1 { break } @@ -13139,14 +13663,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x1.Args[2] { break } - v_0_0_0_0_0_1 := v_0_0_0_0_0.Args[1] - if v_0_0_0_0_0_1.Op != OpAMD64SHLQconst { + s1 := o4.Args[1] + if s1.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_0_1.AuxInt != 16 { + if s1.AuxInt != 16 { break } - x2 := v_0_0_0_0_0_1.Args[0] + x2 := s1.Args[0] if x2.Op != OpAMD64MOVBloadidx1 { break } @@ -13165,14 +13689,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x2.Args[2] { break } - v_0_0_0_0_1 := v_0_0_0_0.Args[1] - if v_0_0_0_0_1.Op != OpAMD64SHLQconst { + s2 := o3.Args[1] + if s2.Op != OpAMD64SHLQconst { break } - if v_0_0_0_0_1.AuxInt != 24 { + if s2.AuxInt != 24 { break } - x3 := v_0_0_0_0_1.Args[0] + x3 := s2.Args[0] if x3.Op != OpAMD64MOVBloadidx1 { break } @@ -13191,14 +13715,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x3.Args[2] { break } - v_0_0_0_1 := v_0_0_0.Args[1] - if v_0_0_0_1.Op != OpAMD64SHLQconst { + s3 := o2.Args[1] + if s3.Op != OpAMD64SHLQconst { break } - if v_0_0_0_1.AuxInt != 32 { + if s3.AuxInt != 32 { break } - x4 := v_0_0_0_1.Args[0] + x4 := s3.Args[0] if x4.Op != OpAMD64MOVBloadidx1 { break } @@ -13217,14 +13741,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x4.Args[2] { break } - v_0_0_1 := v_0_0.Args[1] - if v_0_0_1.Op != OpAMD64SHLQconst { + s4 := o1.Args[1] + if s4.Op != OpAMD64SHLQconst { break } - if v_0_0_1.AuxInt != 40 { + if s4.AuxInt != 40 { break } - x5 := v_0_0_1.Args[0] + x5 := s4.Args[0] if x5.Op != OpAMD64MOVBloadidx1 { break } @@ -13243,14 +13767,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x5.Args[2] { break } - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SHLQconst { + s5 := o0.Args[1] + if s5.Op != OpAMD64SHLQconst { break } - if v_0_1.AuxInt != 48 { + if s5.AuxInt != 48 { break } - x6 := v_0_1.Args[0] + x6 := s5.Args[0] if x6.Op != OpAMD64MOVBloadidx1 { break } @@ -13269,14 +13793,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x6.Args[2] { break } - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLQconst { + s6 := v.Args[1] + if s6.Op != OpAMD64SHLQconst { break } - if v_1.AuxInt != 56 { + if s6.AuxInt != 56 { break } - x7 := v_1.Args[0] + x7 := s6.Args[0] if x7.Op != OpAMD64MOVBloadidx1 { break } @@ -13295,7 +13819,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool { if mem != x7.Args[2] { break } - if !(mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) { + if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)) { break } b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) @@ -13354,200 +13878,6 @@ func rewriteValueAMD64_OpAMD64ORQconst(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ORW x (MOVWconst [c])) - // cond: - // result: (ORWconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64ORWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ORW (MOVWconst [c]) x) - // cond: - // result: (ORWconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64ORWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (ORW x x) - // cond: - // result: x - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ORW x0:(MOVBload [i] {s} p mem) (SHLWconst [8] x1:(MOVBload [i+1] {s} p mem))) - // cond: mergePoint(b,x0,x1) != nil - // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem) - for { - x0 := v.Args[0] - if x0.Op != OpAMD64MOVBload { - break - } - i := x0.AuxInt - s := x0.Aux - p := x0.Args[0] - mem := x0.Args[1] - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLWconst { - break - } - if v_1.AuxInt != 8 { - break - } - x1 := v_1.Args[0] - if x1.Op != OpAMD64MOVBload { - break - } - if x1.AuxInt != i+1 { - break - } - if x1.Aux != s { - break - } - if p != x1.Args[0] { - break - } - if mem != x1.Args[1] { - break - } - if !(mergePoint(b, x0, x1) != nil) { - break - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Line, OpAMD64MOVWload, config.fe.TypeUInt16()) - v.reset(OpCopy) - v.AddArg(v0) - v0.AuxInt = i - v0.Aux = s - v0.AddArg(p) - v0.AddArg(mem) - return true - } - // match: (ORW x0:(MOVBloadidx1 [i] {s} p idx mem) (SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) - // cond: mergePoint(b,x0,x1) != nil - // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem) - for { - x0 := v.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - break - } - i := x0.AuxInt - s := x0.Aux - p := x0.Args[0] - idx := x0.Args[1] - mem := x0.Args[2] - v_1 := v.Args[1] - if v_1.Op != OpAMD64SHLWconst { - break - } - if v_1.AuxInt != 8 { - break - } - x1 := v_1.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - break - } - if x1.AuxInt != i+1 { - break - } - if x1.Aux != s { - break - } - if p != x1.Args[0] { - break - } - if idx != x1.Args[1] { - break - } - if mem != x1.Args[2] { - break - } - if !(mergePoint(b, x0, x1) != nil) { - break - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Line, OpAMD64MOVWloadidx1, v.Type) - v.reset(OpCopy) - v.AddArg(v0) - v0.AuxInt = i - v0.Aux = s - v0.AddArg(p) - v0.AddArg(idx) - v0.AddArg(mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ORWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (ORWconst [c] x) - // cond: int16(c)==0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int16(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (ORWconst [c] _) - // cond: int16(c)==-1 - // result: (MOVWconst [-1]) - for { - c := v.AuxInt - if !(int16(c) == -1) { - break - } - v.reset(OpAMD64MOVWconst) - v.AuxInt = -1 - return true - } - // match: (ORWconst [c] (MOVWconst [d])) - // cond: - // result: (MOVWconst [c|d]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = c | d - return true - } - return false -} func rewriteValueAMD64_OpOffPtr(v *Value, config *Config) bool { b := v.Block _ = b @@ -13585,11 +13915,11 @@ func rewriteValueAMD64_OpOr16(v *Value, config *Config) bool { _ = b // match: (Or16 x y) // cond: - // result: (ORW x y) + // result: (ORL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ORW) + v.reset(OpAMD64ORL) v.AddArg(x) v.AddArg(y) return true @@ -13633,11 +13963,27 @@ func rewriteValueAMD64_OpOr8(v *Value, config *Config) bool { _ = b // match: (Or8 x y) // cond: - // result: (ORB x y) + // result: (ORL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ORB) + v.reset(OpAMD64ORL) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpOrB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (OrB x y) + // cond: + // result: (ORL x y) + for { + x := v.Args[0] + y := v.Args[1] + v.reset(OpAMD64ORL) v.AddArg(x) v.AddArg(y) return true @@ -13649,12 +13995,12 @@ func rewriteValueAMD64_OpRsh16Ux16(v *Value, config *Config) bool { _ = b // match: (Rsh16Ux16 <t> x y) // cond: - // result: (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) + // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRW, t) v0.AddArg(x) v0.AddArg(y) @@ -13674,12 +14020,12 @@ func rewriteValueAMD64_OpRsh16Ux32(v *Value, config *Config) bool { _ = b // match: (Rsh16Ux32 <t> x y) // cond: - // result: (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) + // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRW, t) v0.AddArg(x) v0.AddArg(y) @@ -13699,12 +14045,12 @@ func rewriteValueAMD64_OpRsh16Ux64(v *Value, config *Config) bool { _ = b // match: (Rsh16Ux64 <t> x y) // cond: - // result: (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) + // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRW, t) v0.AddArg(x) v0.AddArg(y) @@ -13724,12 +14070,12 @@ func rewriteValueAMD64_OpRsh16Ux8(v *Value, config *Config) bool { _ = b // match: (Rsh16Ux8 <t> x y) // cond: - // result: (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) + // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDW) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRW, t) v0.AddArg(x) v0.AddArg(y) @@ -13749,7 +14095,7 @@ func rewriteValueAMD64_OpRsh16x16(v *Value, config *Config) bool { _ = b // match: (Rsh16x16 <t> x y) // cond: - // result: (SARW <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16]))))) + // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16]))))) for { t := v.Type x := v.Args[0] @@ -13757,7 +14103,7 @@ func rewriteValueAMD64_OpRsh16x16(v *Value, config *Config) bool { v.reset(OpAMD64SARW) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORW, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -13833,7 +14179,7 @@ func rewriteValueAMD64_OpRsh16x8(v *Value, config *Config) bool { _ = b // match: (Rsh16x8 <t> x y) // cond: - // result: (SARW <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16]))))) + // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16]))))) for { t := v.Type x := v.Args[0] @@ -13841,7 +14187,7 @@ func rewriteValueAMD64_OpRsh16x8(v *Value, config *Config) bool { v.reset(OpAMD64SARW) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORB, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -13961,7 +14307,7 @@ func rewriteValueAMD64_OpRsh32x16(v *Value, config *Config) bool { _ = b // match: (Rsh32x16 <t> x y) // cond: - // result: (SARL <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32]))))) + // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32]))))) for { t := v.Type x := v.Args[0] @@ -13969,7 +14315,7 @@ func rewriteValueAMD64_OpRsh32x16(v *Value, config *Config) bool { v.reset(OpAMD64SARL) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORW, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14045,7 +14391,7 @@ func rewriteValueAMD64_OpRsh32x8(v *Value, config *Config) bool { _ = b // match: (Rsh32x8 <t> x y) // cond: - // result: (SARL <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32]))))) + // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32]))))) for { t := v.Type x := v.Args[0] @@ -14053,7 +14399,7 @@ func rewriteValueAMD64_OpRsh32x8(v *Value, config *Config) bool { v.reset(OpAMD64SARL) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORB, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14173,7 +14519,7 @@ func rewriteValueAMD64_OpRsh64x16(v *Value, config *Config) bool { _ = b // match: (Rsh64x16 <t> x y) // cond: - // result: (SARQ <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64]))))) + // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64]))))) for { t := v.Type x := v.Args[0] @@ -14181,7 +14527,7 @@ func rewriteValueAMD64_OpRsh64x16(v *Value, config *Config) bool { v.reset(OpAMD64SARQ) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORW, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14257,7 +14603,7 @@ func rewriteValueAMD64_OpRsh64x8(v *Value, config *Config) bool { _ = b // match: (Rsh64x8 <t> x y) // cond: - // result: (SARQ <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64]))))) + // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64]))))) for { t := v.Type x := v.Args[0] @@ -14265,7 +14611,7 @@ func rewriteValueAMD64_OpRsh64x8(v *Value, config *Config) bool { v.reset(OpAMD64SARQ) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORB, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14285,12 +14631,12 @@ func rewriteValueAMD64_OpRsh8Ux16(v *Value, config *Config) bool { _ = b // match: (Rsh8Ux16 <t> x y) // cond: - // result: (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) + // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRB, t) v0.AddArg(x) v0.AddArg(y) @@ -14310,12 +14656,12 @@ func rewriteValueAMD64_OpRsh8Ux32(v *Value, config *Config) bool { _ = b // match: (Rsh8Ux32 <t> x y) // cond: - // result: (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) + // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRB, t) v0.AddArg(x) v0.AddArg(y) @@ -14335,12 +14681,12 @@ func rewriteValueAMD64_OpRsh8Ux64(v *Value, config *Config) bool { _ = b // match: (Rsh8Ux64 <t> x y) // cond: - // result: (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) + // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRB, t) v0.AddArg(x) v0.AddArg(y) @@ -14360,12 +14706,12 @@ func rewriteValueAMD64_OpRsh8Ux8(v *Value, config *Config) bool { _ = b // match: (Rsh8Ux8 <t> x y) // cond: - // result: (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) + // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) for { t := v.Type x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64ANDB) + v.reset(OpAMD64ANDL) v0 := b.NewValue0(v.Line, OpAMD64SHRB, t) v0.AddArg(x) v0.AddArg(y) @@ -14385,7 +14731,7 @@ func rewriteValueAMD64_OpRsh8x16(v *Value, config *Config) bool { _ = b // match: (Rsh8x16 <t> x y) // cond: - // result: (SARB <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8]))))) + // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8]))))) for { t := v.Type x := v.Args[0] @@ -14393,7 +14739,7 @@ func rewriteValueAMD64_OpRsh8x16(v *Value, config *Config) bool { v.reset(OpAMD64SARB) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORW, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14469,7 +14815,7 @@ func rewriteValueAMD64_OpRsh8x8(v *Value, config *Config) bool { _ = b // match: (Rsh8x8 <t> x y) // cond: - // result: (SARB <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8]))))) + // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8]))))) for { t := v.Type x := v.Args[0] @@ -14477,7 +14823,7 @@ func rewriteValueAMD64_OpRsh8x8(v *Value, config *Config) bool { v.reset(OpAMD64SARB) v.Type = t v.AddArg(x) - v0 := b.NewValue0(v.Line, OpAMD64ORB, y.Type) + v0 := b.NewValue0(v.Line, OpAMD64ORL, y.Type) v0.AddArg(y) v1 := b.NewValue0(v.Line, OpAMD64NOTL, y.Type) v2 := b.NewValue0(v.Line, OpAMD64SBBLcarrymask, y.Type) @@ -14525,54 +14871,6 @@ func rewriteValueAMD64_OpAMD64SARB(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SARB x (MOVWconst [c])) - // cond: - // result: (SARBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SARB x (MOVBconst [c])) - // cond: - // result: (SARBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SARB x (ANDBconst [31] y)) - // cond: - // result: (SARB x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDBconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SARB) - v.AddArg(x) - v.AddArg(y) - return true - } return false } func rewriteValueAMD64_OpAMD64SARBconst(v *Value, config *Config) bool { @@ -14627,36 +14925,6 @@ func rewriteValueAMD64_OpAMD64SARL(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SARL x (MOVWconst [c])) - // cond: - // result: (SARLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SARL x (MOVBconst [c])) - // cond: - // result: (SARLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } // match: (SARL x (ANDLconst [31] y)) // cond: // result: (SARL x y) @@ -14729,36 +14997,6 @@ func rewriteValueAMD64_OpAMD64SARQ(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SARQ x (MOVWconst [c])) - // cond: - // result: (SARQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } - // match: (SARQ x (MOVBconst [c])) - // cond: - // result: (SARQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } // match: (SARQ x (ANDQconst [63] y)) // cond: // result: (SARQ x y) @@ -14831,54 +15069,6 @@ func rewriteValueAMD64_OpAMD64SARW(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SARW x (MOVWconst [c])) - // cond: - // result: (SARWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SARW x (MOVBconst [c])) - // cond: - // result: (SARWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SARWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SARW x (ANDWconst [31] y)) - // cond: - // result: (SARW x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDWconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SARW) - v.AddArg(x) - v.AddArg(y) - return true - } return false } func rewriteValueAMD64_OpAMD64SARWconst(v *Value, config *Config) bool { @@ -15048,61 +15238,61 @@ func rewriteValueAMD64_OpAMD64SETA(v *Value, config *Config) bool { } // match: (SETA (FlagEQ)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETA (FlagLT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETA (FlagLT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETA (FlagGT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETA (FlagGT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } @@ -15126,61 +15316,61 @@ func rewriteValueAMD64_OpAMD64SETAE(v *Value, config *Config) bool { } // match: (SETAE (FlagEQ)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETAE (FlagLT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETAE (FlagLT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETAE (FlagGT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETAE (FlagGT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } @@ -15204,61 +15394,61 @@ func rewriteValueAMD64_OpAMD64SETB(v *Value, config *Config) bool { } // match: (SETB (FlagEQ)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETB (FlagLT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETB (FlagLT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETB (FlagGT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETB (FlagGT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } @@ -15282,61 +15472,61 @@ func rewriteValueAMD64_OpAMD64SETBE(v *Value, config *Config) bool { } // match: (SETBE (FlagEQ)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETBE (FlagLT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETBE (FlagLT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETBE (FlagGT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETBE (FlagGT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } @@ -15360,61 +15550,61 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value, config *Config) bool { } // match: (SETEQ (FlagEQ)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETEQ (FlagLT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETEQ (FlagLT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETEQ (FlagGT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETEQ (FlagGT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } @@ -15438,61 +15628,61 @@ func rewriteValueAMD64_OpAMD64SETG(v *Value, config *Config) bool { } // match: (SETG (FlagEQ)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETG (FlagLT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETG (FlagLT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETG (FlagGT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETG (FlagGT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } @@ -15516,61 +15706,61 @@ func rewriteValueAMD64_OpAMD64SETGE(v *Value, config *Config) bool { } // match: (SETGE (FlagEQ)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETGE (FlagLT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETGE (FlagLT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETGE (FlagGT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETGE (FlagGT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } @@ -15594,61 +15784,61 @@ func rewriteValueAMD64_OpAMD64SETL(v *Value, config *Config) bool { } // match: (SETL (FlagEQ)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETL (FlagLT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETL (FlagLT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETL (FlagGT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETL (FlagGT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } @@ -15672,61 +15862,61 @@ func rewriteValueAMD64_OpAMD64SETLE(v *Value, config *Config) bool { } // match: (SETLE (FlagEQ)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETLE (FlagLT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETLE (FlagLT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETLE (FlagGT_ULT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETLE (FlagGT_UGT)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } @@ -15750,149 +15940,66 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value, config *Config) bool { } // match: (SETNE (FlagEQ)) // cond: - // result: (MOVBconst [0]) + // result: (MOVLconst [0]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 0 return true } // match: (SETNE (FlagLT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETNE (FlagLT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETNE (FlagGT_ULT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } // match: (SETNE (FlagGT_UGT)) // cond: - // result: (MOVBconst [1]) + // result: (MOVLconst [1]) for { v_0 := v.Args[0] if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64MOVBconst) + v.reset(OpAMD64MOVLconst) v.AuxInt = 1 return true } return false } -func rewriteValueAMD64_OpAMD64SHLB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SHLB x (MOVQconst [c])) - // cond: - // result: (SHLBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLB x (MOVLconst [c])) - // cond: - // result: (SHLBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVLconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLB x (MOVWconst [c])) - // cond: - // result: (SHLBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLB x (MOVBconst [c])) - // cond: - // result: (SHLBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLB x (ANDBconst [31] y)) - // cond: - // result: (SHLB x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDBconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SHLB) - v.AddArg(x) - v.AddArg(y) - return true - } - return false -} func rewriteValueAMD64_OpAMD64SHLL(v *Value, config *Config) bool { b := v.Block _ = b @@ -15926,36 +16033,6 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHLL x (MOVWconst [c])) - // cond: - // result: (SHLLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLL x (MOVBconst [c])) - // cond: - // result: (SHLLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } // match: (SHLL x (ANDLconst [31] y)) // cond: // result: (SHLL x y) @@ -16009,36 +16086,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHLQ x (MOVWconst [c])) - // cond: - // result: (SHLQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } - // match: (SHLQ x (MOVBconst [c])) - // cond: - // result: (SHLQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } // match: (SHLQ x (ANDQconst [63] y)) // cond: // result: (SHLQ x y) @@ -16059,89 +16106,6 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64SHLW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SHLW x (MOVQconst [c])) - // cond: - // result: (SHLWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLW x (MOVLconst [c])) - // cond: - // result: (SHLWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVLconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLW x (MOVWconst [c])) - // cond: - // result: (SHLWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLW x (MOVBconst [c])) - // cond: - // result: (SHLWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHLWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHLW x (ANDWconst [31] y)) - // cond: - // result: (SHLW x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDWconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SHLW) - v.AddArg(x) - v.AddArg(y) - return true - } - return false -} func rewriteValueAMD64_OpAMD64SHRB(v *Value, config *Config) bool { b := v.Block _ = b @@ -16175,54 +16139,6 @@ func rewriteValueAMD64_OpAMD64SHRB(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHRB x (MOVWconst [c])) - // cond: - // result: (SHRBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHRB x (MOVBconst [c])) - // cond: - // result: (SHRBconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRBconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHRB x (ANDBconst [31] y)) - // cond: - // result: (SHRB x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDBconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SHRB) - v.AddArg(x) - v.AddArg(y) - return true - } return false } func rewriteValueAMD64_OpAMD64SHRL(v *Value, config *Config) bool { @@ -16258,36 +16174,6 @@ func rewriteValueAMD64_OpAMD64SHRL(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHRL x (MOVWconst [c])) - // cond: - // result: (SHRLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHRL x (MOVBconst [c])) - // cond: - // result: (SHRLconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRLconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } // match: (SHRL x (ANDLconst [31] y)) // cond: // result: (SHRL x y) @@ -16341,36 +16227,6 @@ func rewriteValueAMD64_OpAMD64SHRQ(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHRQ x (MOVWconst [c])) - // cond: - // result: (SHRQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } - // match: (SHRQ x (MOVBconst [c])) - // cond: - // result: (SHRQconst [c&63] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRQconst) - v.AuxInt = c & 63 - v.AddArg(x) - return true - } // match: (SHRQ x (ANDQconst [63] y)) // cond: // result: (SHRQ x y) @@ -16424,152 +16280,6 @@ func rewriteValueAMD64_OpAMD64SHRW(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (SHRW x (MOVWconst [c])) - // cond: - // result: (SHRWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHRW x (MOVBconst [c])) - // cond: - // result: (SHRWconst [c&31] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SHRWconst) - v.AuxInt = c & 31 - v.AddArg(x) - return true - } - // match: (SHRW x (ANDWconst [31] y)) - // cond: - // result: (SHRW x y) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64ANDWconst { - break - } - if v_1.AuxInt != 31 { - break - } - y := v_1.Args[0] - v.reset(OpAMD64SHRW) - v.AddArg(x) - v.AddArg(y) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SUBB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SUBB x (MOVBconst [c])) - // cond: - // result: (SUBBconst x [c]) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SUBBconst) - v.AddArg(x) - v.AuxInt = c - return true - } - // match: (SUBB (MOVBconst [c]) x) - // cond: - // result: (NEGB (SUBBconst <v.Type> x [c])) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64NEGB) - v0 := b.NewValue0(v.Line, OpAMD64SUBBconst, v.Type) - v0.AddArg(x) - v0.AuxInt = c - v.AddArg(v0) - return true - } - // match: (SUBB x x) - // cond: - // result: (MOVBconst [0]) - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpAMD64MOVBconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SUBBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SUBBconst [c] x) - // cond: int8(c) == 0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int8(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (SUBBconst (MOVBconst [d]) [c]) - // cond: - // result: (MOVBconst [int64(int8(d-c))]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - d := v_0.AuxInt - c := v.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = int64(int8(d - c)) - return true - } - // match: (SUBBconst (SUBBconst x [d]) [c]) - // cond: - // result: (ADDBconst [int64(int8(-c-d))] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64SUBBconst { - break - } - x := v_0.Args[0] - d := v_0.AuxInt - c := v.AuxInt - v.reset(OpAMD64ADDBconst) - v.AuxInt = int64(int8(-c - d)) - v.AddArg(x) - return true - } return false } func rewriteValueAMD64_OpAMD64SUBL(v *Value, config *Config) bool { @@ -16638,6 +16348,17 @@ func rewriteValueAMD64_OpAMD64SUBLconst(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (SUBLconst [c] x) + // cond: + // result: (ADDLconst [int64(int32(-c))] x) + for { + c := v.AuxInt + x := v.Args[0] + v.reset(OpAMD64ADDLconst) + v.AuxInt = int64(int32(-c)) + v.AddArg(x) + return true + } // match: (SUBLconst (MOVLconst [d]) [c]) // cond: // result: (MOVLconst [int64(int32(d-c))]) @@ -16741,6 +16462,20 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (SUBQconst [c] x) + // cond: c != -(1<<31) + // result: (ADDQconst [-c] x) + for { + c := v.AuxInt + x := v.Args[0] + if !(c != -(1 << 31)) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = -c + v.AddArg(x) + return true + } // match: (SUBQconst (MOVQconst [d]) [c]) // cond: // result: (MOVQconst [d-c]) @@ -16776,104 +16511,6 @@ func rewriteValueAMD64_OpAMD64SUBQconst(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64SUBW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SUBW x (MOVWconst [c])) - // cond: - // result: (SUBWconst x [c]) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64SUBWconst) - v.AddArg(x) - v.AuxInt = c - return true - } - // match: (SUBW (MOVWconst [c]) x) - // cond: - // result: (NEGW (SUBWconst <v.Type> x [c])) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64NEGW) - v0 := b.NewValue0(v.Line, OpAMD64SUBWconst, v.Type) - v0.AddArg(x) - v0.AuxInt = c - v.AddArg(v0) - return true - } - // match: (SUBW x x) - // cond: - // result: (MOVWconst [0]) - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpAMD64MOVWconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SUBWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (SUBWconst [c] x) - // cond: int16(c) == 0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int16(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (SUBWconst (MOVWconst [d]) [c]) - // cond: - // result: (MOVWconst [int64(int16(d-c))]) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - d := v_0.AuxInt - c := v.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = int64(int16(d - c)) - return true - } - // match: (SUBWconst (SUBWconst x [d]) [c]) - // cond: - // result: (ADDWconst [int64(int16(-c-d))] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64SUBWconst { - break - } - x := v_0.Args[0] - d := v_0.AuxInt - c := v.AuxInt - v.reset(OpAMD64ADDWconst) - v.AuxInt = int64(int16(-c - d)) - v.AddArg(x) - return true - } - return false -} func rewriteValueAMD64_OpSignExt16to32(v *Value, config *Config) bool { b := v.Block _ = b @@ -17102,11 +16739,11 @@ func rewriteValueAMD64_OpSub16(v *Value, config *Config) bool { _ = b // match: (Sub16 x y) // cond: - // result: (SUBW x y) + // result: (SUBL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64SUBW) + v.reset(OpAMD64SUBL) v.AddArg(x) v.AddArg(y) return true @@ -17182,11 +16819,11 @@ func rewriteValueAMD64_OpSub8(v *Value, config *Config) bool { _ = b // match: (Sub8 x y) // cond: - // result: (SUBB x y) + // result: (SUBL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64SUBB) + v.reset(OpAMD64SUBL) v.AddArg(x) v.AddArg(y) return true @@ -17299,86 +16936,6 @@ func rewriteValueAMD64_OpTrunc64to8(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64XORB(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (XORB x (MOVBconst [c])) - // cond: - // result: (XORBconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVBconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64XORBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (XORB (MOVBconst [c]) x) - // cond: - // result: (XORBconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64XORBconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (XORB x x) - // cond: - // result: (MOVBconst [0]) - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpAMD64MOVBconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueAMD64_OpAMD64XORBconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (XORBconst [c] x) - // cond: int8(c)==0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int8(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (XORBconst [c] (MOVBconst [d])) - // cond: - // result: (MOVBconst [c^d]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVBconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVBconst) - v.AuxInt = c ^ d - return true - } - return false -} func rewriteValueAMD64_OpAMD64XORL(v *Value, config *Config) bool { b := v.Block _ = b @@ -17544,96 +17101,16 @@ func rewriteValueAMD64_OpAMD64XORQconst(v *Value, config *Config) bool { } return false } -func rewriteValueAMD64_OpAMD64XORW(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (XORW x (MOVWconst [c])) - // cond: - // result: (XORWconst [c] x) - for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpAMD64MOVWconst { - break - } - c := v_1.AuxInt - v.reset(OpAMD64XORWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (XORW (MOVWconst [c]) x) - // cond: - // result: (XORWconst [c] x) - for { - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - c := v_0.AuxInt - x := v.Args[1] - v.reset(OpAMD64XORWconst) - v.AuxInt = c - v.AddArg(x) - return true - } - // match: (XORW x x) - // cond: - // result: (MOVWconst [0]) - for { - x := v.Args[0] - if x != v.Args[1] { - break - } - v.reset(OpAMD64MOVWconst) - v.AuxInt = 0 - return true - } - return false -} -func rewriteValueAMD64_OpAMD64XORWconst(v *Value, config *Config) bool { - b := v.Block - _ = b - // match: (XORWconst [c] x) - // cond: int16(c)==0 - // result: x - for { - c := v.AuxInt - x := v.Args[0] - if !(int16(c) == 0) { - break - } - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (XORWconst [c] (MOVWconst [d])) - // cond: - // result: (MOVWconst [c^d]) - for { - c := v.AuxInt - v_0 := v.Args[0] - if v_0.Op != OpAMD64MOVWconst { - break - } - d := v_0.AuxInt - v.reset(OpAMD64MOVWconst) - v.AuxInt = c ^ d - return true - } - return false -} func rewriteValueAMD64_OpXor16(v *Value, config *Config) bool { b := v.Block _ = b // match: (Xor16 x y) // cond: - // result: (XORW x y) + // result: (XORL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64XORW) + v.reset(OpAMD64XORL) v.AddArg(x) v.AddArg(y) return true @@ -17677,11 +17154,11 @@ func rewriteValueAMD64_OpXor8(v *Value, config *Config) bool { _ = b // match: (Xor8 x y) // cond: - // result: (XORB x y) + // result: (XORL x y) for { x := v.Args[0] y := v.Args[1] - v.reset(OpAMD64XORB) + v.reset(OpAMD64XORL) v.AddArg(x) v.AddArg(y) return true @@ -18849,7 +18326,7 @@ func rewriteBlockAMD64(b *Block) bool { return true } case BlockAMD64NE: - // match: (NE (TESTB (SETL cmp)) yes no) + // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no) // cond: // result: (LT cmp yes no) for { @@ -18862,6 +18339,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETL { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64LT @@ -18870,7 +18354,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETLE cmp)) yes no) + // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) // cond: // result: (LE cmp yes no) for { @@ -18883,6 +18367,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETLE { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64LE @@ -18891,7 +18382,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETG cmp)) yes no) + // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no) // cond: // result: (GT cmp yes no) for { @@ -18904,6 +18395,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETG { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64GT @@ -18912,7 +18410,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETGE cmp)) yes no) + // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) // cond: // result: (GE cmp yes no) for { @@ -18925,6 +18423,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETGE { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64GE @@ -18933,7 +18438,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETEQ cmp)) yes no) + // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) // cond: // result: (EQ cmp yes no) for { @@ -18946,6 +18451,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETEQ { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64EQ @@ -18954,7 +18466,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETNE cmp)) yes no) + // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) // cond: // result: (NE cmp yes no) for { @@ -18967,6 +18479,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETNE { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64NE @@ -18975,7 +18494,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETB cmp)) yes no) + // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no) // cond: // result: (ULT cmp yes no) for { @@ -18988,6 +18507,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETB { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64ULT @@ -18996,7 +18522,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETBE cmp)) yes no) + // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) // cond: // result: (ULE cmp yes no) for { @@ -19009,6 +18535,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETBE { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64ULE @@ -19017,7 +18550,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETA cmp)) yes no) + // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no) // cond: // result: (UGT cmp yes no) for { @@ -19030,6 +18563,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETA { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64UGT @@ -19038,7 +18578,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETAE cmp)) yes no) + // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) // cond: // result: (UGE cmp yes no) for { @@ -19051,6 +18591,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETAE { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64UGE @@ -19059,7 +18606,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETGF cmp)) yes no) + // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) // cond: // result: (UGT cmp yes no) for { @@ -19072,6 +18619,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETGF { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64UGT @@ -19080,7 +18634,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETGEF cmp)) yes no) + // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) // cond: // result: (UGE cmp yes no) for { @@ -19093,6 +18647,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETGEF { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64UGE @@ -19101,7 +18662,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETEQF cmp)) yes no) + // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) // cond: // result: (EQF cmp yes no) for { @@ -19114,6 +18675,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETEQF { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64EQF @@ -19122,7 +18690,7 @@ func rewriteBlockAMD64(b *Block) bool { b.Succs[1] = no return true } - // match: (NE (TESTB (SETNEF cmp)) yes no) + // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) // cond: // result: (NEF cmp yes no) for { @@ -19135,6 +18703,13 @@ func rewriteBlockAMD64(b *Block) bool { break } cmp := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64SETNEF { + break + } + if cmp != v_1.Args[0] { + break + } yes := b.Succs[0] no := b.Succs[1] b.Kind = BlockAMD64NEF diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 932cb42235..43e87c3bf6 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -66,6 +66,8 @@ func rewriteValuegeneric(v *Value, config *Config) bool { return rewriteValuegeneric_OpEq64(v, config) case OpEq8: return rewriteValuegeneric_OpEq8(v, config) + case OpEqB: + return rewriteValuegeneric_OpEqB(v, config) case OpEqInter: return rewriteValuegeneric_OpEqInter(v, config) case OpEqPtr: @@ -218,6 +220,8 @@ func rewriteValuegeneric(v *Value, config *Config) bool { return rewriteValuegeneric_OpNeq64(v, config) case OpNeq8: return rewriteValuegeneric_OpNeq8(v, config) + case OpNeqB: + return rewriteValuegeneric_OpNeqB(v, config) case OpNeqInter: return rewriteValuegeneric_OpNeqInter(v, config) case OpNeqPtr: @@ -732,6 +736,78 @@ func rewriteValuegeneric_OpAnd16(v *Value, config *Config) bool { v.AuxInt = 0 return true } + // match: (And16 x (And16 x y)) + // cond: + // result: (And16 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd16 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpAnd16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And16 x (And16 y x)) + // cond: + // result: (And16 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd16 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpAnd16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And16 (And16 x y) x) + // cond: + // result: (And16 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAnd16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And16 (And16 x y) y) + // cond: + // result: (And16 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpAnd16) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpAnd32(v *Value, config *Config) bool { @@ -803,6 +879,78 @@ func rewriteValuegeneric_OpAnd32(v *Value, config *Config) bool { v.AuxInt = 0 return true } + // match: (And32 x (And32 x y)) + // cond: + // result: (And32 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd32 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpAnd32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And32 x (And32 y x)) + // cond: + // result: (And32 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd32 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpAnd32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And32 (And32 x y) x) + // cond: + // result: (And32 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAnd32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And32 (And32 x y) y) + // cond: + // result: (And32 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpAnd32) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpAnd64(v *Value, config *Config) bool { @@ -874,6 +1022,78 @@ func rewriteValuegeneric_OpAnd64(v *Value, config *Config) bool { v.AuxInt = 0 return true } + // match: (And64 x (And64 x y)) + // cond: + // result: (And64 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd64 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpAnd64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And64 x (And64 y x)) + // cond: + // result: (And64 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd64 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpAnd64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And64 (And64 x y) x) + // cond: + // result: (And64 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAnd64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And64 (And64 x y) y) + // cond: + // result: (And64 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpAnd64) + v.AddArg(x) + v.AddArg(y) + return true + } // match: (And64 <t> (Const64 [y]) x) // cond: nlz(y) + nto(y) == 64 && nto(y) >= 32 // result: (Rsh64Ux64 (Lsh64x64 <t> x (Const64 <t> [nlz(y)])) (Const64 <t> [nlz(y)])) @@ -997,6 +1217,78 @@ func rewriteValuegeneric_OpAnd8(v *Value, config *Config) bool { v.AuxInt = 0 return true } + // match: (And8 x (And8 x y)) + // cond: + // result: (And8 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd8 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpAnd8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And8 x (And8 y x)) + // cond: + // result: (And8 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAnd8 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpAnd8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And8 (And8 x y) x) + // cond: + // result: (And8 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpAnd8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (And8 (And8 x y) y) + // cond: + // result: (And8 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpAnd8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpAnd8) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpArg(v *Value, config *Config) bool { @@ -2060,57 +2352,6 @@ func rewriteValuegeneric_OpEq8(v *Value, config *Config) bool { v.AuxInt = 1 return true } - // match: (Eq8 (ConstBool [c]) (ConstBool [d])) - // cond: - // result: (ConstBool [b2i(c == d)]) - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - c := v_0.AuxInt - v_1 := v.Args[1] - if v_1.Op != OpConstBool { - break - } - d := v_1.AuxInt - v.reset(OpConstBool) - v.AuxInt = b2i(c == d) - return true - } - // match: (Eq8 (ConstBool [0]) x) - // cond: - // result: (Not x) - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - if v_0.AuxInt != 0 { - break - } - x := v.Args[1] - v.reset(OpNot) - v.AddArg(x) - return true - } - // match: (Eq8 (ConstBool [1]) x) - // cond: - // result: x - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - if v_0.AuxInt != 1 { - break - } - x := v.Args[1] - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } // match: (Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) // cond: // result: (Eq8 (Const8 <t> [int64(int8(c-d))]) x) @@ -2162,38 +2403,40 @@ func rewriteValuegeneric_OpEq8(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (Eq8 x (ConstBool <t> [c])) - // cond: x.Op != OpConstBool - // result: (Eq8 (ConstBool <t> [c]) x) + // match: (Eq8 (Const8 [c]) (Const8 [d])) + // cond: + // result: (ConstBool [b2i(c == d)]) for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpConstBool { + v_0 := v.Args[0] + if v_0.Op != OpConst8 { break } - t := v_1.Type - c := v_1.AuxInt - if !(x.Op != OpConstBool) { + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpConst8 { break } - v.reset(OpEq8) - v0 := b.NewValue0(v.Line, OpConstBool, t) - v0.AuxInt = c - v.AddArg(v0) - v.AddArg(x) + d := v_1.AuxInt + v.reset(OpConstBool) + v.AuxInt = b2i(c == d) return true } - // match: (Eq8 (Const8 [c]) (Const8 [d])) + return false +} +func rewriteValuegeneric_OpEqB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (EqB (ConstBool [c]) (ConstBool [d])) // cond: // result: (ConstBool [b2i(c == d)]) for { v_0 := v.Args[0] - if v_0.Op != OpConst8 { + if v_0.Op != OpConstBool { break } c := v_0.AuxInt v_1 := v.Args[1] - if v_1.Op != OpConst8 { + if v_1.Op != OpConstBool { break } d := v_1.AuxInt @@ -2201,6 +2444,39 @@ func rewriteValuegeneric_OpEq8(v *Value, config *Config) bool { v.AuxInt = b2i(c == d) return true } + // match: (EqB (ConstBool [0]) x) + // cond: + // result: (Not x) + for { + v_0 := v.Args[0] + if v_0.Op != OpConstBool { + break + } + if v_0.AuxInt != 0 { + break + } + x := v.Args[1] + v.reset(OpNot) + v.AddArg(x) + return true + } + // match: (EqB (ConstBool [1]) x) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpConstBool { + break + } + if v_0.AuxInt != 1 { + break + } + x := v.Args[1] + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } return false } func rewriteValuegeneric_OpEqInter(v *Value, config *Config) bool { @@ -5419,57 +5695,6 @@ func rewriteValuegeneric_OpNeq8(v *Value, config *Config) bool { v.AuxInt = 0 return true } - // match: (Neq8 (ConstBool [c]) (ConstBool [d])) - // cond: - // result: (ConstBool [b2i(c != d)]) - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - c := v_0.AuxInt - v_1 := v.Args[1] - if v_1.Op != OpConstBool { - break - } - d := v_1.AuxInt - v.reset(OpConstBool) - v.AuxInt = b2i(c != d) - return true - } - // match: (Neq8 (ConstBool [0]) x) - // cond: - // result: x - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - if v_0.AuxInt != 0 { - break - } - x := v.Args[1] - v.reset(OpCopy) - v.Type = x.Type - v.AddArg(x) - return true - } - // match: (Neq8 (ConstBool [1]) x) - // cond: - // result: (Not x) - for { - v_0 := v.Args[0] - if v_0.Op != OpConstBool { - break - } - if v_0.AuxInt != 1 { - break - } - x := v.Args[1] - v.reset(OpNot) - v.AddArg(x) - return true - } // match: (Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) // cond: // result: (Neq8 (Const8 <t> [int64(int8(c-d))]) x) @@ -5521,38 +5746,40 @@ func rewriteValuegeneric_OpNeq8(v *Value, config *Config) bool { v.AddArg(x) return true } - // match: (Neq8 x (ConstBool <t> [c])) - // cond: x.Op != OpConstBool - // result: (Neq8 (ConstBool <t> [c]) x) + // match: (Neq8 (Const8 [c]) (Const8 [d])) + // cond: + // result: (ConstBool [b2i(c != d)]) for { - x := v.Args[0] - v_1 := v.Args[1] - if v_1.Op != OpConstBool { + v_0 := v.Args[0] + if v_0.Op != OpConst8 { break } - t := v_1.Type - c := v_1.AuxInt - if !(x.Op != OpConstBool) { + c := v_0.AuxInt + v_1 := v.Args[1] + if v_1.Op != OpConst8 { break } - v.reset(OpNeq8) - v0 := b.NewValue0(v.Line, OpConstBool, t) - v0.AuxInt = c - v.AddArg(v0) - v.AddArg(x) + d := v_1.AuxInt + v.reset(OpConstBool) + v.AuxInt = b2i(c != d) return true } - // match: (Neq8 (Const8 [c]) (Const8 [d])) + return false +} +func rewriteValuegeneric_OpNeqB(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (NeqB (ConstBool [c]) (ConstBool [d])) // cond: // result: (ConstBool [b2i(c != d)]) for { v_0 := v.Args[0] - if v_0.Op != OpConst8 { + if v_0.Op != OpConstBool { break } c := v_0.AuxInt v_1 := v.Args[1] - if v_1.Op != OpConst8 { + if v_1.Op != OpConstBool { break } d := v_1.AuxInt @@ -5560,6 +5787,39 @@ func rewriteValuegeneric_OpNeq8(v *Value, config *Config) bool { v.AuxInt = b2i(c != d) return true } + // match: (NeqB (ConstBool [0]) x) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpConstBool { + break + } + if v_0.AuxInt != 0 { + break + } + x := v.Args[1] + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (NeqB (ConstBool [1]) x) + // cond: + // result: (Not x) + for { + v_0 := v.Args[0] + if v_0.Op != OpConstBool { + break + } + if v_0.AuxInt != 1 { + break + } + x := v.Args[1] + v.reset(OpNot) + v.AddArg(x) + return true + } return false } func rewriteValuegeneric_OpNeqInter(v *Value, config *Config) bool { @@ -5739,6 +5999,78 @@ func rewriteValuegeneric_OpOr16(v *Value, config *Config) bool { v.AuxInt = -1 return true } + // match: (Or16 x (Or16 x y)) + // cond: + // result: (Or16 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr16 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpOr16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or16 x (Or16 y x)) + // cond: + // result: (Or16 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr16 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpOr16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or16 (Or16 x y) x) + // cond: + // result: (Or16 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpOr16) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or16 (Or16 x y) y) + // cond: + // result: (Or16 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpOr16) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpOr32(v *Value, config *Config) bool { @@ -5810,6 +6142,78 @@ func rewriteValuegeneric_OpOr32(v *Value, config *Config) bool { v.AuxInt = -1 return true } + // match: (Or32 x (Or32 x y)) + // cond: + // result: (Or32 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr32 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpOr32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or32 x (Or32 y x)) + // cond: + // result: (Or32 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr32 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpOr32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or32 (Or32 x y) x) + // cond: + // result: (Or32 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpOr32) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or32 (Or32 x y) y) + // cond: + // result: (Or32 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpOr32) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpOr64(v *Value, config *Config) bool { @@ -5881,6 +6285,78 @@ func rewriteValuegeneric_OpOr64(v *Value, config *Config) bool { v.AuxInt = -1 return true } + // match: (Or64 x (Or64 x y)) + // cond: + // result: (Or64 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr64 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpOr64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or64 x (Or64 y x)) + // cond: + // result: (Or64 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr64 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpOr64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or64 (Or64 x y) x) + // cond: + // result: (Or64 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpOr64) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or64 (Or64 x y) y) + // cond: + // result: (Or64 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpOr64) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpOr8(v *Value, config *Config) bool { @@ -5952,6 +6428,78 @@ func rewriteValuegeneric_OpOr8(v *Value, config *Config) bool { v.AuxInt = -1 return true } + // match: (Or8 x (Or8 x y)) + // cond: + // result: (Or8 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr8 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpOr8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or8 x (Or8 y x)) + // cond: + // result: (Or8 x y) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpOr8 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpOr8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or8 (Or8 x y) x) + // cond: + // result: (Or8 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpOr8) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (Or8 (Or8 x y) y) + // cond: + // result: (Or8 x y) + for { + v_0 := v.Args[0] + if v_0.Op != OpOr8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpOr8) + v.AddArg(x) + v.AddArg(y) + return true + } return false } func rewriteValuegeneric_OpPhi(v *Value, config *Config) bool { @@ -6185,7 +6733,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value, config *Config) bool { _ = b // match: (Rsh16Ux64 (Const16 [c]) (Const64 [d])) // cond: - // result: (Const16 [int64(uint16(c) >> uint64(d))]) + // result: (Const16 [int64(int16(uint16(c) >> uint64(d)))]) for { v_0 := v.Args[0] if v_0.Op != OpConst16 { @@ -6198,7 +6746,7 @@ func rewriteValuegeneric_OpRsh16Ux64(v *Value, config *Config) bool { } d := v_1.AuxInt v.reset(OpConst16) - v.AuxInt = int64(uint16(c) >> uint64(d)) + v.AuxInt = int64(int16(uint16(c) >> uint64(d))) return true } // match: (Rsh16Ux64 (Const16 [0]) _) @@ -6547,7 +7095,7 @@ func rewriteValuegeneric_OpRsh32Ux64(v *Value, config *Config) bool { _ = b // match: (Rsh32Ux64 (Const32 [c]) (Const64 [d])) // cond: - // result: (Const32 [int64(uint32(c) >> uint64(d))]) + // result: (Const32 [int64(int32(uint32(c) >> uint64(d)))]) for { v_0 := v.Args[0] if v_0.Op != OpConst32 { @@ -6560,7 +7108,7 @@ func rewriteValuegeneric_OpRsh32Ux64(v *Value, config *Config) bool { } d := v_1.AuxInt v.reset(OpConst32) - v.AuxInt = int64(uint32(c) >> uint64(d)) + v.AuxInt = int64(int32(uint32(c) >> uint64(d))) return true } // match: (Rsh32Ux64 (Const32 [0]) _) @@ -7353,7 +7901,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value, config *Config) bool { _ = b // match: (Rsh8Ux64 (Const8 [c]) (Const64 [d])) // cond: - // result: (Const8 [int64(uint8(c) >> uint64(d))]) + // result: (Const8 [int64(int8(uint8(c) >> uint64(d)))]) for { v_0 := v.Args[0] if v_0.Op != OpConst8 { @@ -7366,7 +7914,7 @@ func rewriteValuegeneric_OpRsh8Ux64(v *Value, config *Config) bool { } d := v_1.AuxInt v.reset(OpConst8) - v.AuxInt = int64(uint8(c) >> uint64(d)) + v.AuxInt = int64(int8(uint8(c) >> uint64(d))) return true } // match: (Rsh8Ux64 (Const8 [0]) _) @@ -8941,6 +9489,78 @@ func rewriteValuegeneric_OpXor16(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (Xor16 x (Xor16 x y)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor16 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor16 x (Xor16 y x)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor16 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor16 (Xor16 x y) x) + // cond: + // result: y + for { + v_0 := v.Args[0] + if v_0.Op != OpXor16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor16 (Xor16 x y) y) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpXor16 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } return false } func rewriteValuegeneric_OpXor32(v *Value, config *Config) bool { @@ -8996,6 +9616,78 @@ func rewriteValuegeneric_OpXor32(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (Xor32 x (Xor32 x y)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor32 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor32 x (Xor32 y x)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor32 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor32 (Xor32 x y) x) + // cond: + // result: y + for { + v_0 := v.Args[0] + if v_0.Op != OpXor32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor32 (Xor32 x y) y) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpXor32 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } return false } func rewriteValuegeneric_OpXor64(v *Value, config *Config) bool { @@ -9051,6 +9743,78 @@ func rewriteValuegeneric_OpXor64(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (Xor64 x (Xor64 x y)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor64 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor64 x (Xor64 y x)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor64 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor64 (Xor64 x y) x) + // cond: + // result: y + for { + v_0 := v.Args[0] + if v_0.Op != OpXor64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor64 (Xor64 x y) y) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpXor64 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } return false } func rewriteValuegeneric_OpXor8(v *Value, config *Config) bool { @@ -9106,6 +9870,78 @@ func rewriteValuegeneric_OpXor8(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (Xor8 x (Xor8 x y)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor8 { + break + } + if x != v_1.Args[0] { + break + } + y := v_1.Args[1] + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor8 x (Xor8 y x)) + // cond: + // result: y + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpXor8 { + break + } + y := v_1.Args[0] + if x != v_1.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor8 (Xor8 x y) x) + // cond: + // result: y + for { + v_0 := v.Args[0] + if v_0.Op != OpXor8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if x != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = y.Type + v.AddArg(y) + return true + } + // match: (Xor8 (Xor8 x y) y) + // cond: + // result: x + for { + v_0 := v.Args[0] + if v_0.Op != OpXor8 { + break + } + x := v_0.Args[0] + y := v_0.Args[1] + if y != v.Args[1] { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } return false } func rewriteBlockgeneric(b *Block) bool { diff --git a/src/cmd/compile/internal/ssa/sparsemap.go b/src/cmd/compile/internal/ssa/sparsemap.go index 6c0043b230..0211a70f09 100644 --- a/src/cmd/compile/internal/ssa/sparsemap.go +++ b/src/cmd/compile/internal/ssa/sparsemap.go @@ -32,6 +32,8 @@ func (s *sparseMap) contains(k ID) bool { return i < len(s.dense) && s.dense[i].key == k } +// get returns the value for key k, or -1 if k does +// not appear in the map. func (s *sparseMap) get(k ID) int32 { i := s.sparse[k] if i < len(s.dense) && s.dense[i].key == k { @@ -50,6 +52,20 @@ func (s *sparseMap) set(k ID, v int32) { s.sparse[k] = len(s.dense) - 1 } +// setBit sets the v'th bit of k's value, where 0 <= v < 32 +func (s *sparseMap) setBit(k ID, v uint) { + if v >= 32 { + panic("bit index too large.") + } + i := s.sparse[k] + if i < len(s.dense) && s.dense[i].key == k { + s.dense[i].val |= 1 << v + return + } + s.dense = append(s.dense, sparseEntry{k, 1 << v}) + s.sparse[k] = len(s.dense) - 1 +} + func (s *sparseMap) remove(k ID) { i := s.sparse[k] if i < len(s.dense) && s.dense[i].key == k { diff --git a/src/cmd/compile/internal/ssa/sparsetree.go b/src/cmd/compile/internal/ssa/sparsetree.go index cae91e7ddb..45c7897496 100644 --- a/src/cmd/compile/internal/ssa/sparsetree.go +++ b/src/cmd/compile/internal/ssa/sparsetree.go @@ -116,6 +116,9 @@ func (t sparseTree) Child(x *Block) *Block { // isAncestorEq reports whether x is an ancestor of or equal to y. func (t sparseTree) isAncestorEq(x, y *Block) bool { + if x == y { + return true + } xx := &t[x.ID] yy := &t[y.ID] return xx.entry <= yy.entry && yy.exit <= xx.exit @@ -123,7 +126,16 @@ func (t sparseTree) isAncestorEq(x, y *Block) bool { // isAncestor reports whether x is a strict ancestor of y. func (t sparseTree) isAncestor(x, y *Block) bool { + if x == y { + return false + } xx := &t[x.ID] yy := &t[y.ID] return xx.entry < yy.entry && yy.exit < xx.exit } + +// maxdomorder returns a value to allow a maximal dominator first sort. maxdomorder(x) < maxdomorder(y) is true +// if x may dominate y, and false if x cannot dominate y. +func (t sparseTree) maxdomorder(x *Block) int32 { + return t[x.ID].entry +} diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index 1de22dc96e..44f4096cb2 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -22,6 +22,13 @@ type stackAllocState struct { names []LocalSlot slots []int used []bool + + nArgSlot, // Number of Values sourced to arg slot + nNotNeed, // Number of Values not needing a stack slot + nNamedSlot, // Number of Values using a named stack slot + nReuse, // Number of values reusing a stack slot + nAuto, // Number of autos allocated for stack slots. + nSelfInterfere int32 // Number of self-interferences } func newStackAllocState(f *Func) *stackAllocState { @@ -54,6 +61,7 @@ func putStackAllocState(s *stackAllocState) { s.f.Config.stackAllocState = s s.f = nil s.live = nil + s.nArgSlot, s.nNotNeed, s.nNamedSlot, s.nReuse, s.nAuto, s.nSelfInterfere = 0, 0, 0, 0, 0, 0 } type stackValState struct { @@ -75,6 +83,13 @@ func stackalloc(f *Func, spillLive [][]ID) [][]ID { defer putStackAllocState(s) s.stackalloc() + if f.pass.stats > 0 { + f.logStat("stack_alloc_stats", + s.nArgSlot, "arg_slots", s.nNotNeed, "slot_not_needed", + s.nNamedSlot, "named_slots", s.nAuto, "auto_slots", + s.nReuse, "reused_slots", s.nSelfInterfere, "self_interfering") + } + return s.live } @@ -170,9 +185,11 @@ func (s *stackAllocState) stackalloc() { for _, b := range f.Blocks { for _, v := range b.Values { if !s.values[v.ID].needSlot { + s.nNotNeed++ continue } if v.Op == OpArg { + s.nArgSlot++ continue // already picked } @@ -184,18 +201,20 @@ func (s *stackAllocState) stackalloc() { } else { name = names[v.ID] } - if name.N != nil && v.Type.Equal(name.Type) { + if name.N != nil && v.Type.Compare(name.Type) == CMPeq { for _, id := range s.interfere[v.ID] { h := f.getHome(id) if h != nil && h.(LocalSlot).N == name.N && h.(LocalSlot).Off == name.Off { // A variable can interfere with itself. // It is rare, but but it can happen. + s.nSelfInterfere++ goto noname } } if f.pass.debug > stackDebug { fmt.Printf("stackalloc %s to %s\n", v, name.Name()) } + s.nNamedSlot++ f.setHome(v, name) continue } @@ -217,11 +236,13 @@ func (s *stackAllocState) stackalloc() { var i int for i = 0; i < len(locs); i++ { if !used[i] { + s.nReuse++ break } } // If there is no unused stack slot, allocate a new one. if i == len(locs) { + s.nAuto++ locs = append(locs, LocalSlot{N: f.Config.fe.Auto(v.Type), Type: v.Type, Off: 0}) locations[v.Type] = locs } @@ -351,7 +372,7 @@ func (s *stackAllocState) buildInterferenceGraph() { if s.values[v.ID].needSlot { live.remove(v.ID) for _, id := range live.contents() { - if s.values[v.ID].typ.Equal(s.values[id].typ) { + if s.values[v.ID].typ.Compare(s.values[id].typ) == CMPeq { s.interfere[v.ID] = append(s.interfere[v.ID], id) s.interfere[id] = append(s.interfere[id], v.ID) } diff --git a/src/cmd/compile/internal/ssa/type.go b/src/cmd/compile/internal/ssa/type.go index 9643b07556..91a4efe78f 100644 --- a/src/cmd/compile/internal/ssa/type.go +++ b/src/cmd/compile/internal/ssa/type.go @@ -31,16 +31,16 @@ type Type interface { ElemType() Type // given []T or *T or [n]T, return T PtrTo() Type // given T, return *T - NumFields() int // # of fields of a struct - FieldType(i int) Type // type of ith field of the struct - FieldOff(i int) int64 // offset of ith field of the struct + NumFields() int // # of fields of a struct + FieldType(i int) Type // type of ith field of the struct + FieldOff(i int) int64 // offset of ith field of the struct + FieldName(i int) string // name of ith field of the struct NumElem() int64 // # of elements of an array String() string SimpleString() string // a coarser generic description of T, e.g. T's underlying type - Equal(Type) bool - Compare(Type) Cmp // compare types, returning one of CMPlt, CMPeq, CMPgt. + Compare(Type) Cmp // compare types, returning one of CMPlt, CMPeq, CMPgt. } // Special compiler-only types. @@ -53,30 +53,31 @@ type CompilerType struct { Int128 bool } -func (t *CompilerType) Size() int64 { return t.size } // Size in bytes -func (t *CompilerType) Alignment() int64 { return 0 } -func (t *CompilerType) IsBoolean() bool { return false } -func (t *CompilerType) IsInteger() bool { return false } -func (t *CompilerType) IsSigned() bool { return false } -func (t *CompilerType) IsFloat() bool { return false } -func (t *CompilerType) IsComplex() bool { return false } -func (t *CompilerType) IsPtrShaped() bool { return false } -func (t *CompilerType) IsString() bool { return false } -func (t *CompilerType) IsSlice() bool { return false } -func (t *CompilerType) IsArray() bool { return false } -func (t *CompilerType) IsStruct() bool { return false } -func (t *CompilerType) IsInterface() bool { return false } -func (t *CompilerType) IsMemory() bool { return t.Memory } -func (t *CompilerType) IsFlags() bool { return t.Flags } -func (t *CompilerType) IsVoid() bool { return t.Void } -func (t *CompilerType) String() string { return t.Name } -func (t *CompilerType) SimpleString() string { return t.Name } -func (t *CompilerType) ElemType() Type { panic("not implemented") } -func (t *CompilerType) PtrTo() Type { panic("not implemented") } -func (t *CompilerType) NumFields() int { panic("not implemented") } -func (t *CompilerType) FieldType(i int) Type { panic("not implemented") } -func (t *CompilerType) FieldOff(i int) int64 { panic("not implemented") } -func (t *CompilerType) NumElem() int64 { panic("not implemented") } +func (t *CompilerType) Size() int64 { return t.size } // Size in bytes +func (t *CompilerType) Alignment() int64 { return 0 } +func (t *CompilerType) IsBoolean() bool { return false } +func (t *CompilerType) IsInteger() bool { return false } +func (t *CompilerType) IsSigned() bool { return false } +func (t *CompilerType) IsFloat() bool { return false } +func (t *CompilerType) IsComplex() bool { return false } +func (t *CompilerType) IsPtrShaped() bool { return false } +func (t *CompilerType) IsString() bool { return false } +func (t *CompilerType) IsSlice() bool { return false } +func (t *CompilerType) IsArray() bool { return false } +func (t *CompilerType) IsStruct() bool { return false } +func (t *CompilerType) IsInterface() bool { return false } +func (t *CompilerType) IsMemory() bool { return t.Memory } +func (t *CompilerType) IsFlags() bool { return t.Flags } +func (t *CompilerType) IsVoid() bool { return t.Void } +func (t *CompilerType) String() string { return t.Name } +func (t *CompilerType) SimpleString() string { return t.Name } +func (t *CompilerType) ElemType() Type { panic("not implemented") } +func (t *CompilerType) PtrTo() Type { panic("not implemented") } +func (t *CompilerType) NumFields() int { panic("not implemented") } +func (t *CompilerType) FieldType(i int) Type { panic("not implemented") } +func (t *CompilerType) FieldOff(i int) int64 { panic("not implemented") } +func (t *CompilerType) FieldName(i int) string { panic("not implemented") } +func (t *CompilerType) NumElem() int64 { panic("not implemented") } // Cmp is a comparison between values a and b. // -1 if a < b @@ -115,14 +116,6 @@ func (t *CompilerType) Compare(u Type) Cmp { return CMPlt } -func (t *CompilerType) Equal(u Type) bool { - x, ok := u.(*CompilerType) - if !ok { - return false - } - return x == t -} - var ( TypeInvalid = &CompilerType{Name: "invalid"} TypeMem = &CompilerType{Name: "mem", Memory: true} diff --git a/src/cmd/compile/internal/ssa/type_test.go b/src/cmd/compile/internal/ssa/type_test.go index cd80abf03f..3b1a892083 100644 --- a/src/cmd/compile/internal/ssa/type_test.go +++ b/src/cmd/compile/internal/ssa/type_test.go @@ -24,30 +24,31 @@ type TypeImpl struct { Name string } -func (t *TypeImpl) Size() int64 { return t.Size_ } -func (t *TypeImpl) Alignment() int64 { return t.Align } -func (t *TypeImpl) IsBoolean() bool { return t.Boolean } -func (t *TypeImpl) IsInteger() bool { return t.Integer } -func (t *TypeImpl) IsSigned() bool { return t.Signed } -func (t *TypeImpl) IsFloat() bool { return t.Float } -func (t *TypeImpl) IsComplex() bool { return t.Complex } -func (t *TypeImpl) IsPtrShaped() bool { return t.Ptr } -func (t *TypeImpl) IsString() bool { return t.string } -func (t *TypeImpl) IsSlice() bool { return t.slice } -func (t *TypeImpl) IsArray() bool { return t.array } -func (t *TypeImpl) IsStruct() bool { return t.struct_ } -func (t *TypeImpl) IsInterface() bool { return t.inter } -func (t *TypeImpl) IsMemory() bool { return false } -func (t *TypeImpl) IsFlags() bool { return false } -func (t *TypeImpl) IsVoid() bool { return false } -func (t *TypeImpl) String() string { return t.Name } -func (t *TypeImpl) SimpleString() string { return t.Name } -func (t *TypeImpl) ElemType() Type { return t.Elem_ } -func (t *TypeImpl) PtrTo() Type { panic("not implemented") } -func (t *TypeImpl) NumFields() int { panic("not implemented") } -func (t *TypeImpl) FieldType(i int) Type { panic("not implemented") } -func (t *TypeImpl) FieldOff(i int) int64 { panic("not implemented") } -func (t *TypeImpl) NumElem() int64 { panic("not implemented") } +func (t *TypeImpl) Size() int64 { return t.Size_ } +func (t *TypeImpl) Alignment() int64 { return t.Align } +func (t *TypeImpl) IsBoolean() bool { return t.Boolean } +func (t *TypeImpl) IsInteger() bool { return t.Integer } +func (t *TypeImpl) IsSigned() bool { return t.Signed } +func (t *TypeImpl) IsFloat() bool { return t.Float } +func (t *TypeImpl) IsComplex() bool { return t.Complex } +func (t *TypeImpl) IsPtrShaped() bool { return t.Ptr } +func (t *TypeImpl) IsString() bool { return t.string } +func (t *TypeImpl) IsSlice() bool { return t.slice } +func (t *TypeImpl) IsArray() bool { return t.array } +func (t *TypeImpl) IsStruct() bool { return t.struct_ } +func (t *TypeImpl) IsInterface() bool { return t.inter } +func (t *TypeImpl) IsMemory() bool { return false } +func (t *TypeImpl) IsFlags() bool { return false } +func (t *TypeImpl) IsVoid() bool { return false } +func (t *TypeImpl) String() string { return t.Name } +func (t *TypeImpl) SimpleString() string { return t.Name } +func (t *TypeImpl) ElemType() Type { return t.Elem_ } +func (t *TypeImpl) PtrTo() Type { panic("not implemented") } +func (t *TypeImpl) NumFields() int { panic("not implemented") } +func (t *TypeImpl) FieldType(i int) Type { panic("not implemented") } +func (t *TypeImpl) FieldOff(i int) int64 { panic("not implemented") } +func (t *TypeImpl) FieldName(i int) string { panic("not implemented") } +func (t *TypeImpl) NumElem() int64 { panic("not implemented") } func (t *TypeImpl) Equal(u Type) bool { x, ok := u.(*TypeImpl) diff --git a/src/cmd/compile/internal/ssa/value.go b/src/cmd/compile/internal/ssa/value.go index fd4eb64db1..6c364ad932 100644 --- a/src/cmd/compile/internal/ssa/value.go +++ b/src/cmd/compile/internal/ssa/value.go @@ -185,6 +185,7 @@ func (v *Value) resetArgs() { } v.argstorage[0] = nil v.argstorage[1] = nil + v.argstorage[2] = nil v.Args = v.argstorage[:0] } |
