From c140df03267ab2e73ffd076002811aaa00fdc80e Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Wed, 9 Dec 2015 15:58:18 -0800 Subject: [dev.ssa] cmd/compile: allocate the flag register in a separate pass Spilling/restoring flag values is a pain to do during regalloc. Instead, allocate the flag register in a separate pass. Regalloc then operates normally on any flag recomputation instructions. Change-Id: Ia1c3d9e6eff678861193093c0b48a00f90e4156b Reviewed-on: https://go-review.googlesource.com/17694 Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/flagalloc.go | 123 ++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/cmd/compile/internal/ssa/flagalloc.go (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go new file mode 100644 index 0000000000..c088158057 --- /dev/null +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -0,0 +1,123 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +const flagRegMask = regMask(1) << 33 // TODO: arch-specific + +// flagalloc allocates the flag register among all the flag-generating +// instructions. Flag values are recomputed if they need to be +// spilled/restored. +func flagalloc(f *Func) { + // Compute the in-register flag value we want at the end of + // each block. This is basically a best-effort live variable + // analysis, so it can be much simpler than a full analysis. + // TODO: do we really need to keep flag values live across blocks? + // Could we force the flags register to be unused at basic block + // boundaries? Then we wouldn't need this computation. + end := make([]*Value, f.NumBlocks()) + for n := 0; n < 2; n++ { + // Walk blocks backwards. Poor-man's postorder traversal. + for i := len(f.Blocks) - 1; i >= 0; i-- { + b := f.Blocks[i] + // Walk values backwards to figure out what flag + // value we want in the flag register at the start + // of the block. + flag := end[b.ID] + if b.Control != nil && b.Control.Type.IsFlags() { + flag = b.Control + } + for j := len(b.Values) - 1; j >= 0; j-- { + v := b.Values[j] + if v == flag { + flag = nil + } + if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 { + flag = nil + } + for _, a := range v.Args { + if a.Type.IsFlags() { + flag = a + } + } + } + for _, p := range b.Preds { + end[p.ID] = flag + } + } + } + // For blocks which have a flags control value, that's the only value + // we can leave in the flags register at the end of the block. (There + // is no place to put a flag regeneration instruction.) + for _, b := range f.Blocks { + v := b.Control + if v != nil && v.Type.IsFlags() && end[b.ID] != v { + end[b.ID] = nil + } + } + + // Add flag recomputations where they are needed. + // TODO: Remove original instructions if they are never used. + var oldSched []*Value + for _, b := range f.Blocks { + oldSched = append(oldSched[:0], b.Values...) + b.Values = b.Values[:0] + // The current live flag value. + var flag *Value + if len(b.Preds) > 0 { + flag = end[b.Preds[0].ID] + // Note: the following condition depends on the lack of critical edges. + for _, p := range b.Preds[1:] { + if end[p.ID] != flag { + f.Fatalf("live flag in %s's predecessors not consistent", b) + } + } + } + for _, v := range oldSched { + if v.Op == OpPhi && v.Type.IsFlags() { + f.Fatalf("phi of flags not supported: %s", v.LongString()) + } + // Make sure any flag arg of v is in the flags register. + // If not, recompute it. + for i, a := range v.Args { + if !a.Type.IsFlags() { + continue + } + if a == flag { + continue + } + // Recalculate a + c := a.copyInto(b) + // Update v. + v.SetArg(i, c) + // Remember the most-recently computed flag value. + flag = c + } + // Issue v. + b.Values = append(b.Values, v) + if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 { + flag = nil + } + if v.Type.IsFlags() { + flag = v + } + } + if v := b.Control; v != nil && v != flag && v.Type.IsFlags() { + // Recalculate control value. + c := v.copyInto(b) + b.Control = c + flag = c + } + if v := end[b.ID]; v != nil && v != flag { + // Need to reissue flag generator for use by + // subsequent blocks. + _ = v.copyInto(b) + // Note: this flag generator is not properly linked up + // with the flag users. This breaks the SSA representation. + // We could fix up the users with another pass, but for now + // we'll just leave it. (Regalloc has the same issue for + // standard regs, and it runs next.) + } + } +} -- cgit v1.3 From 498933719287fbba1015c97d177a9bd4cfb9aada Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 11 Dec 2015 14:59:01 -0800 Subject: [dev.ssa] cmd/compile: allow control values to be CSEd With the separate flagalloc pass, it should be fine to allow CSE of control values. The worst that can happen is that the comparison gets un-CSEd by flagalloc. Fix bug in flagalloc where flag restores were getting clobbered by rematerialization during register allocation. Change-Id: If476cf98b69973e8f1a8eb29441136dd12fab8ad Reviewed-on: https://go-review.googlesource.com/17760 Reviewed-by: David Chase Run-TryBot: Keith Randall --- src/cmd/compile/internal/ssa/cse.go | 11 ++++++++++- src/cmd/compile/internal/ssa/flagalloc.go | 9 +++++++++ src/cmd/compile/internal/ssa/gen/AMD64.rules | 2 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 2 +- src/cmd/compile/internal/ssa/opGen.go | 2 +- src/cmd/compile/internal/ssa/rewriteAMD64.go | 10 +++++----- 6 files changed, 27 insertions(+), 9 deletions(-) (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go index 25f424fbee..58c52f23e6 100644 --- a/src/cmd/compile/internal/ssa/cse.go +++ b/src/cmd/compile/internal/ssa/cse.go @@ -153,7 +153,6 @@ func cse(f *Func) { i++ } } - // TODO(khr): if value is a control value, do we need to keep it block-local? } } @@ -166,6 +165,16 @@ func cse(f *Func) { } } } + if v := b.Control; v != nil { + if x := rewrite[v.ID]; x != nil { + if v.Op == OpNilCheck { + // nilcheck pass will remove the nil checks and log + // them appropriately, so don't mess with them here. + continue + } + b.Control = x + } + } } } diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index c088158057..714ac016a2 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -21,6 +21,15 @@ func flagalloc(f *Func) { // Walk blocks backwards. Poor-man's postorder traversal. for i := len(f.Blocks) - 1; i >= 0; i-- { b := f.Blocks[i] + if len(b.Preds) > 1 { + // Don't use any flags register at the start + // of a merge block. This causes problems + // in regalloc because some of the rematerialization + // instructions used on incoming merge edges clobber + // the flags register. + // TODO: only for architectures where this matters? + continue + } // Walk values backwards to figure out what flag // value we want in the flag register at the start // of the block. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 7d0aa4b2d3..0edbfdaa1a 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -370,7 +370,7 @@ (If (SETGF cmp) yes no) -> (UGT cmp yes no) (If (SETGEF cmp) yes no) -> (UGE cmp yes no) (If (SETEQF cmp) yes no) -> (EQF cmp yes no) -(If (SETNEF cmp) yes no) -> (EQF cmp yes no) +(If (SETNEF cmp) yes no) -> (NEF cmp yes no) (If cond yes no) -> (NE (TESTB cond cond) yes no) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index ba53e81ddd..461026bd7b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -433,7 +433,7 @@ func init() { name: "DUFFCOPY", reg: regInfo{ inputs: []regMask{buildReg("DI"), buildReg("SI")}, - clobbers: buildReg("DI SI X0"), // uses X0 as a temporary + clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary }, }, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 132ca83f95..bbedf2fb64 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -3177,7 +3177,7 @@ var opcodeTable = [...]opInfo{ {0, 128}, // .DI {1, 64}, // .SI }, - clobbers: 65728, // .SI .DI .X0 + clobbers: 8590000320, // .SI .DI .X0 .FLAGS }, }, { diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 3be94e37e7..5c2f3db4b2 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -14213,23 +14213,23 @@ func rewriteBlockAMD64(b *Block) bool { ; // match: (If (SETNEF cmp) yes no) // cond: - // result: (EQF cmp yes no) + // result: (NEF cmp yes no) { v := b.Control if v.Op != OpAMD64SETNEF { - goto endfe25939ca97349543bc2d2ce4f97ba41 + goto endaa989df10b5bbc5fdf8f7f0b81767e86 } cmp := v.Args[0] yes := b.Succs[0] no := b.Succs[1] - b.Kind = BlockAMD64EQF + b.Kind = BlockAMD64NEF b.Control = cmp b.Succs[0] = yes b.Succs[1] = no return true } - goto endfe25939ca97349543bc2d2ce4f97ba41 - endfe25939ca97349543bc2d2ce4f97ba41: + goto endaa989df10b5bbc5fdf8f7f0b81767e86 + endaa989df10b5bbc5fdf8f7f0b81767e86: ; // match: (If cond yes no) // cond: -- cgit v1.3 From 7d9f1067d1c8a2d0252fa2a115f1d016f94f7087 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 17 Dec 2015 10:01:24 -0800 Subject: [dev.ssa] cmd/compile: better register allocator Reorder how register & stack allocation is done. We used to allocate registers, then fix up merge edges, then allocate stack slots. This lead to lots of unnecessary copies on merge edges: v2 = LoadReg v1 v3 = StoreReg v2 If v1 and v3 are allocated to the same stack slot, then this code is unnecessary. But at regalloc time we didn't know the homes of v1 and v3. To fix this problem, allocate all the stack slots before fixing up the merge edges. That way, we know what stack slots values use so we know what copies are required. Use a good technique for shuffling values around on merge edges. Improves performance of the go1 TimeParse benchmark by ~12% Change-Id: I731f43e4ff1a7e0dc4cd4aa428fcdb97812b86fa Reviewed-on: https://go-review.googlesource.com/17915 Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/compile.go | 3 - src/cmd/compile/internal/ssa/flagalloc.go | 9 - src/cmd/compile/internal/ssa/regalloc.go | 898 ++++++++++++++++++++--------- src/cmd/compile/internal/ssa/stackalloc.go | 297 +++++----- 4 files changed, 793 insertions(+), 414 deletions(-) (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 767b774ab0..20af6fd5bd 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -102,7 +102,6 @@ var passes = [...]pass{ {"schedule", schedule}, // schedule values {"flagalloc", flagalloc}, // allocate flags register {"regalloc", regalloc}, - {"stackalloc", stackalloc}, } // Double-check phase ordering constraints. @@ -138,8 +137,6 @@ var passOrder = [...]constraint{ {"critical", "regalloc"}, // regalloc requires all the values in a block to be scheduled {"schedule", "regalloc"}, - // stack allocation requires register allocation - {"regalloc", "stackalloc"}, // checkLower must run after lowering & subsequent dead code elim {"lower", "checkLower"}, {"lowered deadcode", "checkLower"}, diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index 714ac016a2..c088158057 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -21,15 +21,6 @@ func flagalloc(f *Func) { // Walk blocks backwards. Poor-man's postorder traversal. for i := len(f.Blocks) - 1; i >= 0; i-- { b := f.Blocks[i] - if len(b.Preds) > 1 { - // Don't use any flags register at the start - // of a merge block. This causes problems - // in regalloc because some of the rematerialization - // instructions used on incoming merge edges clobber - // the flags register. - // TODO: only for architectures where this matters? - continue - } // Walk values backwards to figure out what flag // value we want in the flag register at the start // of the block. diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 2690b6188e..0f1068a337 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -99,7 +99,7 @@ import ( "unsafe" ) -const regDebug = false +const regDebug = false // TODO: compiler flag const logSpills = false // regalloc performs register allocation on f. It sets f.RegAlloc @@ -201,12 +201,12 @@ type use struct { } type valState struct { - regs regMask // the set of registers holding a Value (usually just one) - uses *use // list of uses in this block - spill *Value // spilled copy of the Value - spill2 *Value // special alternate spill location used for phi resolution - spillUsed bool - spill2used bool + regs regMask // the set of registers holding a Value (usually just one) + uses *use // list of uses in this block + spill *Value // spilled copy of the Value + spillUsed bool + needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags() + rematerializeable bool // cached value of v.rematerializeable() } type regState struct { @@ -218,10 +218,6 @@ type regState struct { type regAllocState struct { f *Func - // For each value, whether it needs a register or not. - // Cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags(). - needReg []bool - // for each block, its primary predecessor. // A predecessor of b is primary if it is the closest // predecessor that appears before b in the layout order. @@ -249,14 +245,33 @@ type regAllocState struct { // mask of registers currently in use used regMask - // Home locations (registers) for Values - home []Location - // current block we're working on curBlock *Block // cache of use records freeUseRecords *use + + // endRegs[blockid] is the register state at the end of each block. + // encoded as a set of endReg records. + endRegs [][]endReg + + // startRegs[blockid] is the register state at the start of merge blocks. + // saved state does not include the state of phi ops in the block. + startRegs [][]startReg + + // spillLive[blockid] is the set of live spills at the end of each block + spillLive [][]ID +} + +type endReg struct { + r register + v *Value // pre-regalloc value held in this register (TODO: can we use ID here?) + c *Value // cached version of the value +} + +type startReg struct { + r register + vid ID // pre-regalloc value needed in this register } // freeReg frees up register r. Any current user of r is kicked out. @@ -268,7 +283,7 @@ func (s *regAllocState) freeReg(r register) { // Mark r as unused. if regDebug { - fmt.Printf("freeReg %d (dump %s/%s)\n", r, v, s.regs[r].c) + fmt.Printf("freeReg %s (dump %s/%s)\n", registers[r].Name(), v, s.regs[r].c) } s.regs[r] = regState{} s.values[v.ID].regs &^= regMask(1) << r @@ -282,21 +297,6 @@ func (s *regAllocState) freeRegs(m regMask) { } } -func (s *regAllocState) setHome(v *Value, r register) { - // Remember assignment. - for int(v.ID) >= len(s.home) { - s.home = append(s.home, nil) - s.home = s.home[:cap(s.home)] - } - s.home[v.ID] = ®isters[r] -} -func (s *regAllocState) getHome(v *Value) register { - if int(v.ID) >= len(s.home) || s.home[v.ID] == nil { - return noRegister - } - return register(s.home[v.ID].(*Register).Num) -} - // setOrig records that c's original value is the same as // v's original value. func (s *regAllocState) setOrig(c *Value, v *Value) { @@ -313,7 +313,7 @@ func (s *regAllocState) setOrig(c *Value, v *Value) { // r must be unused. func (s *regAllocState) assignReg(r register, v *Value, c *Value) { if regDebug { - fmt.Printf("assignReg %d %s/%s\n", r, v, c) + fmt.Printf("assignReg %s %s/%s\n", registers[r].Name(), v, c) } if s.regs[r].v != nil { s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v) @@ -323,7 +323,7 @@ func (s *regAllocState) assignReg(r register, v *Value, c *Value) { s.regs[r] = regState{v, c} s.values[v.ID].regs |= regMask(1) << r s.used |= regMask(1) << r - s.setHome(c, r) + s.f.setHome(c, ®isters[r]) } // allocReg picks an unused register from regmask. If there is no unused register, @@ -361,16 +361,6 @@ func (s *regAllocState) allocReg(mask regMask) register { continue } v := s.regs[t].v - - if s.values[v.ID].uses == nil { - // No subsequent use. - // This can happen when fixing up merge blocks at the end. - // We've already run through the use lists so they are empty. - // Any register would be ok at this point. - r = t - maxuse = 0 - break - } if n := s.values[v.ID].uses.dist; n > maxuse { // v's next use is farther in the future than any value // we've seen so far. A new best spill candidate. @@ -432,12 +422,6 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool) *Val } else { switch { // Load v from its spill location. - case vi.spill2 != nil: - if logSpills { - fmt.Println("regalloc: load spill2") - } - c = s.curBlock.NewValue1(v.Line, OpLoadReg, v.Type, vi.spill2) - vi.spill2used = true case vi.spill != nil: if logSpills { fmt.Println("regalloc: load spill") @@ -462,17 +446,16 @@ func (s *regAllocState) init(f *Func) { } s.f = f - s.needReg = make([]bool, f.NumValues()) s.regs = make([]regState, numRegs) s.values = make([]valState, f.NumValues()) s.orig = make([]*Value, f.NumValues()) for _, b := range f.Blocks { for _, v := range b.Values { - if v.Type.IsMemory() || v.Type.IsVoid() || v.Type.IsFlags() { - continue + if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() { + s.values[v.ID].needReg = true + s.values[v.ID].rematerializeable = v.rematerializeable() + s.orig[v.ID] = v } - s.needReg[v.ID] = true - s.orig[v.ID] = v } } s.computeLive() @@ -498,6 +481,10 @@ func (s *regAllocState) init(f *Func) { } s.primary[b.ID] = int32(best) } + + s.endRegs = make([][]endReg, f.NumBlocks()) + s.startRegs = make([][]startReg, f.NumBlocks()) + s.spillLive = make([][]ID, f.NumBlocks()) } // Adds a use record for id at distance dist from the start of the block. @@ -521,7 +508,7 @@ func (s *regAllocState) addUse(id ID, dist int32) { // Any values which have no more uses are deallocated from registers. func (s *regAllocState) advanceUses(v *Value) { for _, a := range v.Args { - if !s.needReg[a.ID] { + if !s.values[a.ID].needReg { continue } ai := &s.values[a.ID] @@ -536,21 +523,18 @@ func (s *regAllocState) advanceUses(v *Value) { } } -// Sets the state of the registers to that encoded in state. -func (s *regAllocState) setState(state []regState) { +// Sets the state of the registers to that encoded in regs. +func (s *regAllocState) setState(regs []endReg) { s.freeRegs(s.used) - for r, x := range state { - if x.c == nil { - continue - } - s.assignReg(register(r), x.v, x.c) + for _, x := range regs { + s.assignReg(x.r, x.v, x.c) } } -// compatRegs returns the set of registers which can store v. -func (s *regAllocState) compatRegs(v *Value) regMask { +// compatRegs returns the set of registers which can store a type t. +func (s *regAllocState) compatRegs(t Type) regMask { var m regMask - if v.Type.IsFloat() { + if t.IsFloat() { m = 0xffff << 16 // X0-X15 } else { m = 0xffef << 0 // AX-R15, except SP @@ -560,11 +544,8 @@ func (s *regAllocState) compatRegs(v *Value) regMask { func (s *regAllocState) regalloc(f *Func) { liveSet := newSparseSet(f.NumValues()) - argset := newSparseSet(f.NumValues()) var oldSched []*Value var phis []*Value - var stackPhis []*Value - var regPhis []*Value var phiRegs []register var args []*Value @@ -572,11 +553,6 @@ func (s *regAllocState) regalloc(f *Func) { f.Fatalf("entry block must be first") } - // For each merge block, we record the starting register state (after phi ops) - // for that merge block. Indexed by blockid/regnum. - startRegs := make([][]*Value, f.NumBlocks()) - // end state of registers for each block, idexed by blockid/regnum. - endRegs := make([][]regState, f.NumBlocks()) for _, b := range f.Blocks { s.curBlock = b @@ -587,18 +563,21 @@ func (s *regAllocState) regalloc(f *Func) { s.addUse(e.ID, int32(len(b.Values))+e.dist) // pseudo-uses from beyond end of block liveSet.add(e.ID) } - if c := b.Control; c != nil && s.needReg[c.ID] { - s.addUse(c.ID, int32(len(b.Values))) // psuedo-use by control value - liveSet.add(c.ID) + if v := b.Control; v != nil && s.values[v.ID].needReg { + s.addUse(v.ID, int32(len(b.Values))) // psuedo-use by control value + liveSet.add(v.ID) } for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] + liveSet.remove(v.ID) if v.Op == OpPhi { - break // Don't process phi ops. + // Remove v from the live set, but don't add + // any inputs. This is the state the len(b.Preds)>1 + // case below desires; it wants to process phis specially. + continue } - liveSet.remove(v.ID) for _, a := range v.Args { - if !s.needReg[a.ID] { + if !s.values[a.ID].needReg { continue } s.addUse(a.ID, int32(i)) @@ -613,7 +592,7 @@ func (s *regAllocState) regalloc(f *Func) { if u == nil { continue } - fmt.Printf("v%d:", i) + fmt.Printf(" v%d:", i) for u != nil { fmt.Printf(" %d", u.dist) u = u.next @@ -643,7 +622,7 @@ func (s *regAllocState) regalloc(f *Func) { } } else if len(b.Preds) == 1 { // Start regalloc state with the end state of the previous block. - s.setState(endRegs[b.Preds[0].ID]) + s.setState(s.endRegs[b.Preds[0].ID]) if nphi > 0 { f.Fatalf("phis in single-predecessor block") } @@ -669,52 +648,83 @@ func (s *regAllocState) regalloc(f *Func) { f.Fatalf("block with no primary predecessor %s", b) } p := b.Preds[idx] - s.setState(endRegs[p.ID]) + s.setState(s.endRegs[p.ID]) + + if regDebug { + fmt.Printf("starting merge block %s with end state of %s:\n", b, p) + for _, x := range s.endRegs[p.ID] { + fmt.Printf(" %s: orig:%s cache:%s\n", registers[x.r].Name(), x.v, x.c) + } + } // Decide on registers for phi ops. Use the registers determined // by the primary predecessor if we can. // TODO: pick best of (already processed) predecessors? // Majority vote? Deepest nesting level? phiRegs = phiRegs[:0] - var used regMask + var phiUsed regMask for _, v := range phis { - if v.Type.IsMemory() { + if !s.values[v.ID].needReg { phiRegs = append(phiRegs, noRegister) continue } - regs := s.values[v.Args[idx].ID].regs - m := regs &^ used + a := v.Args[idx] + m := s.values[a.ID].regs &^ phiUsed var r register if m != 0 { r = pickReg(m) - used |= regMask(1) << r + s.freeReg(r) + phiUsed |= regMask(1) << r + phiRegs = append(phiRegs, r) } else { - r = noRegister + phiRegs = append(phiRegs, noRegister) + } + } + + // Second pass - deallocate any phi inputs which are now dead. + for _, v := range phis { + if !s.values[v.ID].needReg { + continue + } + a := v.Args[idx] + if !liveSet.contains(a.ID) { + // Input is dead beyond the phi, deallocate + // anywhere else it might live. + s.freeRegs(s.values[a.ID].regs) } - phiRegs = append(phiRegs, r) } - // Change register user from phi input to phi. Add phi spill code. + + // Third pass - pick registers for phis whose inputs + // were not in a register. for i, v := range phis { - if v.Type.IsMemory() { + if !s.values[v.ID].needReg { + continue + } + if phiRegs[i] != noRegister { + continue + } + m := s.compatRegs(v.Type) &^ phiUsed &^ s.used + if m != 0 { + r := pickReg(m) + phiRegs[i] = r + phiUsed |= regMask(1) << r + } + } + + // Set registers for phis. Add phi spill code. + for i, v := range phis { + if !s.values[v.ID].needReg { continue } r := phiRegs[i] if r == noRegister { - m := s.compatRegs(v) & ^s.used - if m == 0 { - // stack-based phi - // Spills will be inserted in all the predecessors below. - s.values[v.ID].spill = v // v starts life spilled - s.values[v.ID].spillUsed = true // use is guaranteed - continue - } - // Allocate phi to an unused register. - r = pickReg(m) - } else { - s.freeReg(r) + // stack-based phi + // Spills will be inserted in all the predecessors below. + s.values[v.ID].spill = v // v starts life spilled + s.values[v.ID].spillUsed = true // use is guaranteed + continue } // register-based phi - // Transfer ownership of register from input arg to phi. s.assignReg(r, v, v) // Spill the phi in case we need to restore it later. spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v) @@ -723,15 +733,35 @@ func (s *regAllocState) regalloc(f *Func) { s.values[v.ID].spillUsed = false } - // Save the starting state for use by incoming edges below. - startRegs[b.ID] = make([]*Value, numRegs) + // Save the starting state for use by merge edges. + var regList []startReg for r := register(0); r < numRegs; r++ { - startRegs[b.ID][r] = s.regs[r].v + v := s.regs[r].v + if v == nil { + continue + } + if phiUsed>>r&1 != 0 { + // Skip registers that phis used, we'll handle those + // specially during merge edge processing. + continue + } + regList = append(regList, startReg{r, v.ID}) + } + s.startRegs[b.ID] = regList + + if regDebug { + fmt.Printf("after phis\n") + for _, x := range s.startRegs[b.ID] { + fmt.Printf(" %s: v%d\n", registers[x.r].Name(), x.vid) + } } } // Process all the non-phi values. - for idx, v := range oldSched { + for _, v := range oldSched { + if regDebug { + fmt.Printf(" processing %s\n", v.LongString()) + } if v.Op == OpPhi { f.Fatalf("phi %s not at start of block", v) } @@ -758,9 +788,6 @@ func (s *regAllocState) regalloc(f *Func) { continue } regspec := opcodeTable[v.Op].reg - if regDebug { - fmt.Printf("%d: working on %s %s %v\n", idx, v, v.LongString(), regspec) - } if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 { // No register allocation required (or none specified yet) s.freeRegs(regspec.clobbers) @@ -768,7 +795,7 @@ func (s *regAllocState) regalloc(f *Func) { continue } - if v.rematerializeable() { + if s.values[v.ID].rematerializeable { // Value is rematerializeable, don't issue it here. // It will get issued just before each use (see // allocValueToReg). @@ -800,7 +827,7 @@ func (s *regAllocState) regalloc(f *Func) { // Pick register for output. var r register var mask regMask - if s.needReg[v.ID] { + if s.values[v.ID].needReg { mask = regspec.outputs[0] &^ s.reserved() if mask>>33&1 != 0 { s.f.Fatalf("bad mask %s\n", v.LongString()) @@ -827,7 +854,7 @@ func (s *regAllocState) regalloc(f *Func) { // f() // } // It would be good to have both spill and restore inside the IF. - if s.needReg[v.ID] { + if s.values[v.ID].needReg { spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v) s.setOrig(spill, v) s.values[v.ID].spill = spill @@ -835,21 +862,70 @@ func (s *regAllocState) regalloc(f *Func) { } } - if c := b.Control; c != nil && s.needReg[c.ID] { + if v := b.Control; v != nil && s.values[v.ID].needReg { + if regDebug { + fmt.Printf(" processing control %s\n", v.LongString()) + } // Load control value into reg. // TODO: regspec for block control values, instead of using // register set from the control op's output. - s.allocValToReg(c, opcodeTable[c.Op].reg.outputs[0], false) + s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false) // Remove this use from the uses list. - u := s.values[c.ID].uses - s.values[c.ID].uses = u.next + vi := &s.values[v.ID] + u := vi.uses + vi.uses = u.next + if u.next == nil { + s.freeRegs(vi.regs) // value is dead + } u.next = s.freeUseRecords s.freeUseRecords = u } - // Record endRegs - endRegs[b.ID] = make([]regState, numRegs) - copy(endRegs[b.ID], s.regs) + // Save end-of-block register state. + var regList []endReg + for r := register(0); r < numRegs; r++ { + v := s.regs[r].v + if v == nil { + continue + } + regList = append(regList, endReg{r, v, s.regs[r].c}) + } + s.endRegs[b.ID] = regList + + // Check. TODO: remove + { + liveSet.clear() + for _, x := range s.live[b.ID] { + liveSet.add(x.ID) + } + for r := register(0); r < numRegs; r++ { + v := s.regs[r].v + if v == nil { + continue + } + if !liveSet.contains(v.ID) { + s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) + } + } + } + + // If a value is live at the end of the block and + // isn't in a register, remember that its spill location + // is live. We need to remember this information so that + // the liveness analysis in stackalloc correct. + for _, e := range s.live[b.ID] { + if s.values[e.ID].regs != 0 { + // in a register, we'll use that source for the merge. + continue + } + spill := s.values[e.ID].spill + if spill == nil { + // rematerializeable values will have spill==nil. + continue + } + s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID) + s.values[e.ID].spillUsed = true + } // Clear any final uses. // All that is left should be the pseudo-uses added for values which @@ -868,137 +944,6 @@ func (s *regAllocState) regalloc(f *Func) { } } - // Process merge block input edges. They are the tricky ones. - dst := make([]*Value, numRegs) - for _, b := range f.Blocks { - if len(b.Preds) <= 1 { - continue - } - for i, p := range b.Preds { - if regDebug { - fmt.Printf("processing %s->%s\n", p, b) - } - - // Find phis, separate them into stack & register classes. - stackPhis = stackPhis[:0] - regPhis = regPhis[:0] - for _, v := range b.Values { - if v.Op != OpPhi { - break - } - if v.Type.IsMemory() { - continue - } - if s.getHome(v) != noRegister { - regPhis = append(regPhis, v) - } else { - stackPhis = append(stackPhis, v) - } - } - - // Start with the state that exists at the end of the - // predecessor block. We'll be adding instructions here - // to shuffle registers & stack phis into the right spot. - s.setState(endRegs[p.ID]) - s.curBlock = p - - // Handle stack-based phi ops first. We need to handle them - // first because we need a register with which to copy them. - - // We must be careful not to overwrite any stack phis which are - // themselves args of other phis. For example: - // v1 = phi(v2, v3) : 8(SP) - // v2 = phi(v4, v5) : 16(SP) - // Here we must not write v2 until v2 is read and written to v1. - // The situation could be even more complicated, with cycles, etc. - // So in the interest of being simple, we find all the phis which - // are arguments of other phis and copy their values to a temporary - // location first. This temporary location is called "spill2" and - // represents a higher-priority but temporary spill location for the value. - // Note this is not a problem for register-based phis because - // if needed we will use the spilled location as the source, and - // the spill location is not clobbered by the code generated here. - argset.clear() - for _, v := range stackPhis { - argset.add(v.Args[i].ID) - } - for _, v := range regPhis { - argset.add(v.Args[i].ID) - } - for _, v := range stackPhis { - if !argset.contains(v.ID) { - continue - } - - // This stack-based phi is the argument of some other - // phi in this block. We must make a copy of its - // value so that we don't clobber it prematurely. - c := s.allocValToReg(v, s.compatRegs(v), false) - d := p.NewValue1(v.Line, OpStoreReg, v.Type, c) - s.setOrig(d, v) - s.values[v.ID].spill2 = d - } - - // Assign to stack-based phis. We do stack phis first because - // we might need a register to do the assignment. - for _, v := range stackPhis { - // Load phi arg into a register, then store it with a StoreReg. - // If already in a register, use that. If not, pick a compatible - // register. - w := v.Args[i] - c := s.allocValToReg(w, s.compatRegs(w), false) - v.Args[i] = p.NewValue1(v.Line, OpStoreReg, v.Type, c) - s.setOrig(v.Args[i], w) - } - // Figure out what value goes in each register. - for r := register(0); r < numRegs; r++ { - dst[r] = startRegs[b.ID][r] - } - // Handle register-based phi ops. - for _, v := range regPhis { - r := s.getHome(v) - if dst[r] != v { - f.Fatalf("dst not right") - } - v.Args[i] = s.allocValToReg(v.Args[i], regMask(1)<CX and CX->DX, do the latter first. Now if we do the - // former first then the latter must be a restore instead of a register move. - // Erase any spills we never used for i := range s.values { vi := s.values[i] @@ -1031,24 +976,450 @@ func (s *regAllocState) regalloc(f *Func) { // Not important now because this is the last phase that manipulates Values } - // Set final regalloc result. - f.RegAlloc = s.home + // Anything that didn't get a register gets a stack location here. + // (StoreReg, stack-based phis, inputs, ...) + stacklive := stackalloc(s.f, s.spillLive) + + // Fix up all merge edges. + s.shuffle(stacklive) +} + +// shuffle fixes up all the merge edges (those going into blocks of indegree > 1). +func (s *regAllocState) shuffle(stacklive [][]ID) { + var e edgeState + e.s = s + e.cache = map[ID][]*Value{} + e.contents = map[Location]contentRecord{} + if regDebug { + fmt.Printf("shuffle %s\n", s.f.Name) + fmt.Println(s.f.String()) + } + + for _, b := range s.f.Blocks { + if len(b.Preds) <= 1 { + continue + } + e.b = b + for i, p := range b.Preds { + e.p = p + e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID]) + e.process() + } + } +} + +type edgeState struct { + s *regAllocState + p, b *Block // edge goes from p->b. + + // for each pre-regalloc value, a list of equivalent cached values + cache map[ID][]*Value + + // map from location to the value it contains + contents map[Location]contentRecord + + // desired destination locations + destinations []dstRecord + extra []dstRecord + + usedRegs regMask // registers currently holding something + uniqueRegs regMask // registers holding the only copy of a value + finalRegs regMask // registers holding final target +} + +type contentRecord struct { + vid ID // pre-regalloc value + c *Value // cached value + final bool // this is a satisfied destination +} + +type dstRecord struct { + loc Location // register or stack slot + vid ID // pre-regalloc value it should contain + splice **Value // place to store reference to the generating instruction +} + +// setup initializes the edge state for shuffling. +func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) { + if regDebug { + fmt.Printf("edge %s->%s\n", e.p, e.b) + } + + // Clear state. + for k := range e.cache { + delete(e.cache, k) + } + for k := range e.contents { + delete(e.contents, k) + } + + // Live registers can be sources. + for _, x := range srcReg { + e.set(®isters[x.r], x.v.ID, x.c, false) + } + // So can all of the spill locations. + for _, spillID := range stacklive { + v := e.s.orig[spillID] + spill := e.s.values[v.ID].spill + e.set(e.s.f.getHome(spillID), v.ID, spill, false) + } + + // Figure out all the destinations we need. + dsts := e.destinations[:0] + for _, x := range dstReg { + dsts = append(dsts, dstRecord{®isters[x.r], x.vid, nil}) + } + // Phis need their args to end up in a specific location. + for _, v := range e.b.Values { + if v.Op != OpPhi { + break + } + loc := e.s.f.getHome(v.ID) + if loc == nil { + continue + } + dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx]}) + } + e.destinations = dsts + + if regDebug { + for vid, a := range e.cache { + for _, c := range a { + fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID).Name(), vid, c) + } + } + for _, d := range e.destinations { + fmt.Printf("dst %s: v%d\n", d.loc.Name(), d.vid) + } + } +} + +// process generates code to move all the values to the right destination locations. +func (e *edgeState) process() { + dsts := e.destinations + + // Process the destinations until they are all satisfied. + for len(dsts) > 0 { + i := 0 + for _, d := range dsts { + if !e.processDest(d.loc, d.vid, d.splice) { + // Failed - save for next iteration. + dsts[i] = d + i++ + } + } + if i < len(dsts) { + // Made some progress. Go around again. + dsts = dsts[:i] + + // Append any extras destinations we generated. + dsts = append(dsts, e.extra...) + e.extra = e.extra[:0] + continue + } + + // We made no progress. That means that any + // remaining unsatisfied moves are in simple cycles. + // For example, A -> B -> C -> D -> A. + // A ----> B + // ^ | + // | | + // | v + // D <---- C + + // To break the cycle, we pick an unused register, say R, + // and put a copy of B there. + // A ----> B + // ^ | + // | | + // | v + // D <---- C <---- R=copyofB + // When we resume the outer loop, the A->B move can now proceed, + // and eventually the whole cycle completes. + + // Copy any cycle location to a temp register. This duplicates + // one of the cycle entries, allowing the just duplicated value + // to be overwritten and the cycle to proceed. + loc := dsts[0].loc + vid := e.contents[loc].vid + c := e.contents[loc].c + r := e.findRegFor(c.Type) + if regDebug { + fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c) + } + if _, isReg := loc.(*Register); isReg { + c = e.p.NewValue1(c.Line, OpCopy, c.Type, c) + } else { + c = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) + } + e.set(r, vid, c, false) + } +} + +// processDest generates code to put value vid into location loc. Returns true +// if progress was made. +func (e *edgeState) processDest(loc Location, vid ID, splice **Value) bool { + occupant := e.contents[loc] + if occupant.vid == vid { + // Value is already in the correct place. + e.contents[loc] = contentRecord{vid, occupant.c, true} + if splice != nil { + *splice = occupant.c + } + // Note: if splice==nil then c will appear dead. This is + // non-SSA formed code, so be careful after this pass not to run + // deadcode elimination. + return true + } + + // Check if we're allowed to clobber the destination location. + if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable { + // We can't overwrite the last copy + // of a value that needs to survive. + return false + } + + // Copy from a source of v, register preferred. + v := e.s.orig[vid] + var c *Value + var src Location + if regDebug { + fmt.Printf("moving v%d to %s\n", vid, loc.Name()) + fmt.Printf("sources of v%d:", vid) + } + for _, w := range e.cache[vid] { + h := e.s.f.getHome(w.ID) + if regDebug { + fmt.Printf(" %s:%s", h.Name(), w) + } + _, isreg := h.(*Register) + if src == nil || isreg { + c = w + src = h + } + } + if regDebug { + if src != nil { + fmt.Printf(" [use %s]\n", src.Name()) + } else { + fmt.Printf(" [no source]\n") + } + } + _, dstReg := loc.(*Register) + var x *Value + if c == nil { + if !e.s.values[vid].rematerializeable { + e.s.f.Fatalf("can't find source for %s->%s: v%d\n", e.p, e.b, vid) + } + if dstReg { + x = v.copyInto(e.p) + } else { + // Rematerialize into stack slot. Need a free + // register to accomplish this. + e.erase(loc) // see pre-clobber comment below + r := e.findRegFor(v.Type) + x = v.copyInto(e.p) + e.set(r, vid, x, false) + x = e.p.NewValue1(x.Line, OpStoreReg, x.Type, x) + } + } else { + // Emit move from src to dst. + _, srcReg := src.(*Register) + if srcReg { + if dstReg { + x = e.p.NewValue1(c.Line, OpCopy, c.Type, c) + } else { + x = e.p.NewValue1(c.Line, OpStoreReg, c.Type, c) + } + } else { + if dstReg { + x = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) + } else { + // mem->mem. Use temp register. + + // Pre-clobber destination. This avoids the + // following situation: + // - v is currently held in R0 and stacktmp0. + // - We want to copy stacktmp1 to stacktmp0. + // - We choose R0 as the temporary register. + // During the copy, both R0 and stacktmp0 are + // clobbered, losing both copies of v. Oops! + // Erasing the destination early means R0 will not + // be chosen as the temp register, as it will then + // be the last copy of v. + e.erase(loc) + + r := e.findRegFor(c.Type) + t := e.p.NewValue1(c.Line, OpLoadReg, c.Type, c) + e.set(r, vid, t, false) + x = e.p.NewValue1(c.Line, OpStoreReg, c.Type, t) + } + } + } + e.set(loc, vid, x, true) + if splice != nil { + *splice = x + } + return true +} + +// set changes the contents of location loc to hold the given value and its cached representative. +func (e *edgeState) set(loc Location, vid ID, c *Value, final bool) { + e.s.f.setHome(c, loc) + e.erase(loc) + e.contents[loc] = contentRecord{vid, c, final} + a := e.cache[vid] + a = append(a, c) + e.cache[vid] = a + if r, ok := loc.(*Register); ok { + e.usedRegs |= regMask(1) << uint(r.Num) + if final { + e.finalRegs |= regMask(1) << uint(r.Num) + } + if len(a) == 1 { + e.uniqueRegs |= regMask(1) << uint(r.Num) + } + if len(a) == 2 { + if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok { + e.uniqueRegs &^= regMask(1) << uint(t.Num) + } + } + } + if regDebug { + fmt.Printf("%s\n", c.LongString()) + fmt.Printf("v%d now available in %s:%s\n", vid, loc.Name(), c) + } +} + +// erase removes any user of loc. +func (e *edgeState) erase(loc Location) { + cr := e.contents[loc] + if cr.c == nil { + return + } + vid := cr.vid + + if cr.final { + // Add a destination to move this value back into place. + // Make sure it gets added to the tail of the destination queue + // so we make progress on other moves first. + e.extra = append(e.extra, dstRecord{loc, cr.vid, nil}) + } + + // Remove c from the list of cached values. + a := e.cache[vid] + for i, c := range a { + if e.s.f.getHome(c.ID) == loc { + if regDebug { + fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c) + } + a[i], a = a[len(a)-1], a[:len(a)-1] + break + } + } + e.cache[vid] = a + + // Update register masks. + if r, ok := loc.(*Register); ok { + e.usedRegs &^= regMask(1) << uint(r.Num) + if cr.final { + e.finalRegs &^= regMask(1) << uint(r.Num) + } + } + if len(a) == 1 { + if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok { + e.uniqueRegs |= regMask(1) << uint(r.Num) + } + } +} + +// findRegFor finds a register we can use to make a temp copy of type typ. +func (e *edgeState) findRegFor(typ Type) Location { + // Which registers are possibilities. + var m regMask + if typ.IsFloat() { + m = e.s.compatRegs(e.s.f.Config.fe.TypeFloat64()) + } else { + m = e.s.compatRegs(e.s.f.Config.fe.TypeInt64()) + } + + // Pick a register. In priority order: + // 1) an unused register + // 2) a non-unique register not holding a final value + // 3) a non-unique register + x := m &^ e.usedRegs + if x != 0 { + return ®isters[pickReg(x)] + } + x = m &^ e.uniqueRegs &^ e.finalRegs + if x != 0 { + return ®isters[pickReg(x)] + } + x = m &^ e.uniqueRegs + if x != 0 { + return ®isters[pickReg(x)] + } + + // No register is available. Allocate a temp location to spill a register to. + // The type of the slot is immaterial - it will not be live across + // any safepoint. Just use a type big enough to hold any register. + typ = e.s.f.Config.fe.TypeInt64() + t := LocalSlot{e.s.f.Config.fe.Auto(typ), typ, 0} + // TODO: reuse these slots. + + // Pick a register to spill. + for vid, a := range e.cache { + for _, c := range a { + if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.Num)&1 != 0 { + x := e.p.NewValue1(c.Line, OpStoreReg, c.Type, c) + e.set(t, vid, x, false) + if regDebug { + fmt.Printf(" SPILL %s->%s %s\n", r.Name(), t.Name(), x.LongString()) + } + // r will now be overwritten by the caller. At some point + // later, the newly saved value will be moved back to its + // final destination in processDest. + return r + } + } + } + + e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b) + return nil } func (v *Value) rematerializeable() bool { // TODO: add a flags field to opInfo for this test? + regspec := opcodeTable[v.Op].reg // rematerializeable ops must be able to fill any register. - outputs := opcodeTable[v.Op].reg.outputs + outputs := regspec.outputs if len(outputs) == 0 || countRegs(outputs[0]) <= 1 { // Note: this case handles OpAMD64LoweredGetClosurePtr // which can't be moved. return false } + + // We can't rematerialize instructions which + // clobber the flags register. + if regspec.clobbers&flagRegMask != 0 { + if v.Op == OpAMD64MOVQconst && v.AuxInt != 0 || + v.Op == OpAMD64MOVLconst && int32(v.AuxInt) != 0 || + v.Op == OpAMD64MOVWconst && int16(v.AuxInt) != 0 || + v.Op == OpAMD64MOVBconst && int8(v.AuxInt) != 0 { + // These are marked as clobbering flags, but only + // the 0 versions actually do. TODO: fix MOV->XOR rewrites + // to understand when they are allowed to clobber flags? + return true + } + return false + } + if len(v.Args) == 0 { return true } if len(v.Args) == 1 && (v.Args[0].Op == OpSP || v.Args[0].Op == OpSB) { + // SP and SB (generated by OpSP and OpSB) are always available. return true } return false @@ -1084,9 +1455,6 @@ func (s *regAllocState) computeLive() { // out to all of them. po := postorder(f) for { - for _, b := range po { - f.Logf("live %s %v\n", b, s.live[b.ID]) - } changed := false for _, b := range po { @@ -1099,7 +1467,7 @@ func (s *regAllocState) computeLive() { } // Mark control value as live - if b.Control != nil && s.needReg[b.Control.ID] { + if b.Control != nil && s.values[b.Control.ID].needReg { live.set(b.Control.ID, int32(len(b.Values))) } @@ -1115,7 +1483,7 @@ func (s *regAllocState) computeLive() { continue } for _, a := range v.Args { - if s.needReg[a.ID] { + if s.values[a.ID].needReg { live.set(a.ID, int32(i)) } } @@ -1162,7 +1530,7 @@ func (s *regAllocState) computeLive() { // simultaneously happening at the start of the block). for _, v := range phis { id := v.Args[i].ID - if s.needReg[id] && !t.contains(id) || delta < t.get(id) { + if s.values[id].needReg && !t.contains(id) || delta < t.get(id) { update = true t.set(id, delta) } @@ -1185,6 +1553,16 @@ func (s *regAllocState) computeLive() { break } } + if regDebug { + fmt.Println("live values at end of each block") + for _, b := range f.Blocks { + fmt.Printf(" %s:", b) + for _, x := range s.live[b.ID] { + fmt.Printf(" v%d", x.ID) + } + fmt.Println() + } + } } // reserved returns a mask of reserved registers. diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index 3eb5c3cf4a..797a6b05e6 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -6,55 +6,65 @@ package ssa +import "fmt" + +const stackDebug = false // TODO: compiler flag + +type stackAllocState struct { + f *Func + values []stackValState + live [][]ID // live[b.id] = live values at the end of block b. + interfere [][]ID // interfere[v.id] = values that interfere with v. +} + +type stackValState struct { + typ Type + spill *Value + needSlot bool +} + // stackalloc allocates storage in the stack frame for // all Values that did not get a register. -func stackalloc(f *Func) { - // Cache value types by ID. - types := make([]Type, f.NumValues()) - for _, b := range f.Blocks { - for _, v := range b.Values { - types[v.ID] = v.Type - } +// Returns a map from block ID to the stack values live at the end of that block. +func stackalloc(f *Func, spillLive [][]ID) [][]ID { + if stackDebug { + fmt.Println("before stackalloc") + fmt.Println(f.String()) } + var s stackAllocState + s.init(f, spillLive) + s.stackalloc() + return s.live +} - // Build interference graph among StoreReg and stack phi ops. - live := f.liveSpills() - interfere := make([][]ID, f.NumValues()) - s := newSparseSet(f.NumValues()) - for _, b := range f.Blocks { - // Start with known live values at the end of the block. - s.clear() - for i := 0; i < len(b.Succs); i++ { - s.addAll(live[b.ID][i]) - } +func (s *stackAllocState) init(f *Func, spillLive [][]ID) { + s.f = f - // Propagate backwards to the start of the block. - // Remember interfering sets. - for i := len(b.Values) - 1; i >= 0; i-- { - v := b.Values[i] - switch { - case v.Op == OpStoreReg, v.isStackPhi(): - s.remove(v.ID) - for _, id := range s.contents() { - if v.Type.Equal(types[id]) { - // Only need interferences between equivalent types. - interfere[v.ID] = append(interfere[v.ID], id) - interfere[id] = append(interfere[id], v.ID) - } - } - case v.Op == OpLoadReg: - s.add(v.Args[0].ID) - case v.Op == OpArg: - // This is an input argument which is pre-spilled. It is kind of - // like a StoreReg, but we don't remove v.ID here because we want - // this value to appear live even before this point. Being live - // all the way to the start of the entry block prevents other - // values from being allocated to the same slot and clobbering - // the input value before we have a chance to load it. + // Initialize value information. + s.values = make([]stackValState, f.NumValues()) + for _, b := range f.Blocks { + for _, v := range b.Values { + s.values[v.ID].typ = v.Type + s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() + if stackDebug && s.values[v.ID].needSlot { + fmt.Printf("%s needs a stack slot\n", v) + } + if v.Op == OpStoreReg { + s.values[v.Args[0].ID].spill = v } } } + // Compute liveness info for values needing a slot. + s.computeLive(spillLive) + + // Build interference graph among values needing a slot. + s.buildInterferenceGraph() +} + +func (s *stackAllocState) stackalloc() { + f := s.f + // Build map from values to their names, if any. // A value may be associated with more than one name (e.g. after // the assignment i=j). This step picks one name per value arbitrarily. @@ -67,49 +77,41 @@ func stackalloc(f *Func) { } } - // Figure out which StoreReg ops are phi args. We don't pick slots for - // phi args because a stack phi and its args must all use the same stack slot. - phiArg := make([]bool, f.NumValues()) - for _, b := range f.Blocks { - for _, v := range b.Values { - if !v.isStackPhi() { - continue - } - for _, a := range v.Args { - phiArg[a.ID] = true - } - } - } - // Allocate args to their assigned locations. for _, v := range f.Entry.Values { if v.Op != OpArg { continue } - f.setHome(v, LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt}) + loc := LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt} + if stackDebug { + fmt.Printf("stackalloc %s to %s\n", v, loc.Name()) + } + f.setHome(v, loc) } // For each type, we keep track of all the stack slots we // have allocated for that type. + // TODO: share slots among equivalent types. We would need to + // only share among types with the same GC signature. See the + // type.Equal calls below for where this matters. locations := map[Type][]LocalSlot{} // Each time we assign a stack slot to a value v, we remember // the slot we used via an index into locations[v.Type]. - // TODO: share slots among equivalent types. slots := make([]int, f.NumValues()) for i := f.NumValues() - 1; i >= 0; i-- { slots[i] = -1 } - // Pick a stack slot for each non-phi-arg StoreReg and each stack phi. + // Pick a stack slot for each value needing one. used := make([]bool, f.NumValues()) for _, b := range f.Blocks { for _, v := range b.Values { - if v.Op != OpStoreReg && !v.isStackPhi() { + if !s.values[v.ID].needSlot { continue } - if phiArg[v.ID] { - continue + if v.Op == OpArg { + continue // already picked } // If this is a named value, try to use the name as @@ -121,7 +123,7 @@ func stackalloc(f *Func) { name = names[v.ID] } if name.N != nil && v.Type.Equal(name.Type) { - for _, id := range interfere[v.ID] { + for _, id := range s.interfere[v.ID] { h := f.getHome(id) if h != nil && h.(LocalSlot) == name { // A variable can interfere with itself. @@ -129,22 +131,10 @@ func stackalloc(f *Func) { goto noname } } - if v.Op == OpPhi { - for _, a := range v.Args { - for _, id := range interfere[a.ID] { - h := f.getHome(id) - if h != nil && h.(LocalSlot) == name { - goto noname - } - } - } + if stackDebug { + fmt.Printf("stackalloc %s to %s\n", v, name.Name()) } f.setHome(v, name) - if v.Op == OpPhi { - for _, a := range v.Args { - f.setHome(a, name) - } - } continue } @@ -155,25 +145,12 @@ func stackalloc(f *Func) { for i := 0; i < len(locs); i++ { used[i] = false } - for _, xid := range interfere[v.ID] { + for _, xid := range s.interfere[v.ID] { slot := slots[xid] if slot >= 0 { used[slot] = true } } - if v.Op == OpPhi { - // Stack phi and args must get the same stack slot, so - // anything the args interfere with is something the phi - // interferes with. - for _, a := range v.Args { - for _, xid := range interfere[a.ID] { - slot := slots[xid] - if slot >= 0 { - used[slot] = true - } - } - } - } // Find an unused stack slot. var i int for i = 0; i < len(locs); i++ { @@ -188,83 +165,80 @@ func stackalloc(f *Func) { } // Use the stack variable at that index for v. loc := locs[i] + if stackDebug { + fmt.Printf("stackalloc %s to %s\n", v, loc.Name()) + } f.setHome(v, loc) slots[v.ID] = i - if v.Op == OpPhi { - for _, a := range v.Args { - f.setHome(a, loc) - slots[a.ID] = i - } - } } } } -// live returns a map from block ID and successor edge index to a list -// of StoreReg/stackphi value IDs live on that edge. +// computeLive computes a map from block ID to a list of +// stack-slot-needing value IDs live at the end of that block. // TODO: this could be quadratic if lots of variables are live across lots of // basic blocks. Figure out a way to make this function (or, more precisely, the user // of this function) require only linear size & time. -func (f *Func) liveSpills() [][][]ID { - live := make([][][]ID, f.NumBlocks()) - for _, b := range f.Blocks { - live[b.ID] = make([][]ID, len(b.Succs)) - } +func (s *stackAllocState) computeLive(spillLive [][]ID) { + s.live = make([][]ID, s.f.NumBlocks()) var phis []*Value - - s := newSparseSet(f.NumValues()) - t := newSparseSet(f.NumValues()) + live := newSparseSet(s.f.NumValues()) + t := newSparseSet(s.f.NumValues()) // Instead of iterating over f.Blocks, iterate over their postordering. // Liveness information flows backward, so starting at the end // increases the probability that we will stabilize quickly. - po := postorder(f) + po := postorder(s.f) for { changed := false for _, b := range po { // Start with known live values at the end of the block - s.clear() - for i := 0; i < len(b.Succs); i++ { - s.addAll(live[b.ID][i]) - } + live.clear() + live.addAll(s.live[b.ID]) // Propagate backwards to the start of the block phis = phis[:0] for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] - switch { - case v.Op == OpStoreReg: - s.remove(v.ID) - case v.Op == OpLoadReg: - s.add(v.Args[0].ID) - case v.isStackPhi(): - s.remove(v.ID) - // save stack phi ops for later - phis = append(phis, v) + live.remove(v.ID) + if v.Op == OpPhi { + // Save phi for later. + // Note: its args might need a stack slot even though + // the phi itself doesn't. So don't use needSlot. + if !v.Type.IsMemory() && !v.Type.IsVoid() { + phis = append(phis, v) + } + continue + } + for _, a := range v.Args { + if s.values[a.ID].needSlot { + live.add(a.ID) + } } } // for each predecessor of b, expand its list of live-at-end values // invariant: s contains the values live at the start of b (excluding phi inputs) for i, p := range b.Preds { - // Find index of b in p's successors. - var j int - for j = 0; j < len(p.Succs); j++ { - if p.Succs[j] == b { - break - } - } t.clear() - t.addAll(live[p.ID][j]) - t.addAll(s.contents()) + t.addAll(s.live[p.ID]) + t.addAll(live.contents()) + t.addAll(spillLive[p.ID]) for _, v := range phis { - t.add(v.Args[i].ID) + a := v.Args[i] + if s.values[a.ID].needSlot { + t.add(a.ID) + } + if spill := s.values[a.ID].spill; spill != nil { + //TODO: remove? Subsumed by SpillUse? + t.add(spill.ID) + } } - if t.size() == len(live[p.ID][j]) { + if t.size() == len(s.live[p.ID]) { continue } // grow p's live set - live[p.ID][j] = append(live[p.ID][j][:0], t.contents()...) + s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...) changed = true } } @@ -273,7 +247,11 @@ func (f *Func) liveSpills() [][][]ID { break } } - return live + if stackDebug { + for _, b := range s.f.Blocks { + fmt.Printf("stacklive %s %v\n", b, s.live[b.ID]) + } + } } func (f *Func) getHome(vid ID) Location { @@ -290,16 +268,51 @@ func (f *Func) setHome(v *Value, loc Location) { f.RegAlloc[v.ID] = loc } -func (v *Value) isStackPhi() bool { - if v.Op != OpPhi { - return false - } - if v.Type == TypeMem { - return false +func (s *stackAllocState) buildInterferenceGraph() { + f := s.f + s.interfere = make([][]ID, f.NumValues()) + live := newSparseSet(f.NumValues()) + for _, b := range f.Blocks { + // Propagate liveness backwards to the start of the block. + // Two values interfere if one is defined while the other is live. + live.clear() + live.addAll(s.live[b.ID]) + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + if s.values[v.ID].needSlot { + live.remove(v.ID) + for _, id := range live.contents() { + if s.values[v.ID].typ.Equal(s.values[id].typ) { + s.interfere[v.ID] = append(s.interfere[v.ID], id) + s.interfere[id] = append(s.interfere[id], v.ID) + } + } + } + for _, a := range v.Args { + if s.values[a.ID].needSlot { + live.add(a.ID) + } + } + if v.Op == OpArg && s.values[v.ID].needSlot { + // OpArg is an input argument which is pre-spilled. + // We add back v.ID here because we want this value + // to appear live even before this point. Being live + // all the way to the start of the entry block prevents other + // values from being allocated to the same slot and clobbering + // the input value before we have a chance to load it. + live.add(v.ID) + } + } } - if int(v.ID) >= len(v.Block.Func.RegAlloc) { - return true + if stackDebug { + for vid, i := range s.interfere { + if len(i) > 0 { + fmt.Printf("v%d interferes with", vid) + for _, x := range i { + fmt.Printf(" v%d", x) + } + fmt.Println() + } + } } - return v.Block.Func.RegAlloc[v.ID] == nil - // TODO: use a separate opcode for StackPhi? } -- cgit v1.3 From 7b773946c09e075ed50c49e76e08f61c16616ee4 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 22 Jan 2016 13:44:58 -0800 Subject: [dev.ssa] cmd/compile: disable xor clearing when flags must be preserved The x86 backend automatically rewrites MOV $0, AX to XOR AX, AX. That rewrite isn't ok when the flags register is live across the MOV. Keep track of which moves care about preserving flags, then disable this rewrite for them. On x86, Prog.Mark was being used to hold the length of the instruction. We already store that in Prog.Isize, so no need to store it in Prog.Mark also. This frees up Prog.Mark to hold a bitmask on x86 just like all the other architectures. Update #12405 Change-Id: Ibad8a8f41fc6222bec1e4904221887d3cc3ca029 Reviewed-on: https://go-review.googlesource.com/18861 Reviewed-by: David Chase Reviewed-by: Russ Cox --- src/cmd/compile/internal/gc/ssa.go | 29 ++++++++++++++++++++++++++++ src/cmd/compile/internal/ssa/block.go | 3 +++ src/cmd/compile/internal/ssa/flagalloc.go | 5 +++++ src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 11 ++++------- src/cmd/compile/internal/ssa/opGen.go | 4 ---- src/cmd/compile/internal/ssa/regalloc.go | 9 --------- src/cmd/internal/obj/link.go | 6 +++--- src/cmd/internal/obj/pass.go | 1 - src/cmd/internal/obj/x86/a.out.go | 6 ++++++ src/cmd/internal/obj/x86/asm6.go | 9 ++++++--- src/cmd/internal/obj/x86/obj6.go | 20 +++++++++---------- 11 files changed, 66 insertions(+), 37 deletions(-) (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 5b8d2423d7..de00fe9651 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3405,6 +3405,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { for i, b := range f.Blocks { s.bstart[b.ID] = Pc // Emit values in block + s.markMoves(b) for _, v := range b.Values { x := Pc s.genValue(v) @@ -3864,6 +3865,11 @@ func (s *genState) genValue(v *ssa.Value) { p.From.Offset = i p.To.Type = obj.TYPE_REG p.To.Reg = x + // If flags are live at this instruction, suppress the + // MOV $0,AX -> XOR AX,AX optimization. + if v.Aux != nil { + p.Mark |= x86.PRESERVEFLAGS + } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := regnum(v) p := Prog(v.Op.Asm()) @@ -4237,6 +4243,29 @@ func (s *genState) genValue(v *ssa.Value) { } } +// markMoves marks any MOVXconst ops that need to avoid clobbering flags. +func (s *genState) markMoves(b *ssa.Block) { + flive := b.FlagsLiveAtEnd + if b.Control != nil && b.Control.Type.IsFlags() { + flive = true + } + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + if flive && (v.Op == ssa.OpAMD64MOVWconst || v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { + // The "mark" is any non-nil Aux value. + v.Aux = v + } + if v.Type.IsFlags() { + flive = false + } + for _, a := range v.Args { + if a.Type.IsFlags() { + flive = true + } + } + } +} + // movZero generates a register indirect move with a 0 immediate and keeps track of bytes left and next offset func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nleft int64, noff int64) { p := Prog(as) diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go index 5fb93cd5a7..02673f0650 100644 --- a/src/cmd/compile/internal/ssa/block.go +++ b/src/cmd/compile/internal/ssa/block.go @@ -50,6 +50,9 @@ type Block struct { // Ignored if len(Succs) < 2. // Fatal if not BranchUnknown and len(Succs) > 2. Likely BranchPrediction + + // After flagalloc, records whether flags are live at the end of the block. + FlagsLiveAtEnd bool } // kind control successors diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index c088158057..f4e289e782 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -120,4 +120,9 @@ func flagalloc(f *Func) { // standard regs, and it runs next.) } } + + // Save live flag state for later. + for _, b := range f.Blocks { + b.FlagsLiveAtEnd = end[b.ID] != nil + } } diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index daee7336b0..dcffb49f63 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -93,7 +93,6 @@ func init() { // Common regInfo var ( gp01 = regInfo{inputs: []regMask{}, outputs: gponly} - gp01flags = regInfo{inputs: []regMask{}, outputs: gponly, clobbers: flags} gp11 = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags} gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} @@ -340,12 +339,10 @@ func init() { {name: "MOVLQSX", reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64 {name: "MOVLQZX", reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64 - // clobbers flags as liblink will rewrite these to XOR reg, reg if the constant is zero - // TODO: revisit when issue 12405 is fixed - {name: "MOVBconst", reg: gp01flags, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint - {name: "MOVWconst", reg: gp01flags, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint - {name: "MOVLconst", reg: gp01flags, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint - {name: "MOVQconst", reg: gp01flags, asm: "MOVQ", typ: "UInt64"}, // auxint + {name: "MOVBconst", reg: gp01, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint + {name: "MOVWconst", reg: gp01, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint + {name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint + {name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64"}, // auxint {name: "CVTTSD2SL", reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32 {name: "CVTTSD2SQ", reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 497b690192..d391b2435e 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2694,7 +2694,6 @@ var opcodeTable = [...]opInfo{ name: "MOVBconst", asm: x86.AMOVB, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2704,7 +2703,6 @@ var opcodeTable = [...]opInfo{ name: "MOVWconst", asm: x86.AMOVW, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2714,7 +2712,6 @@ var opcodeTable = [...]opInfo{ name: "MOVLconst", asm: x86.AMOVL, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2724,7 +2721,6 @@ var opcodeTable = [...]opInfo{ name: "MOVQconst", asm: x86.AMOVQ, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 27deeba718..7cbd30311f 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -1415,15 +1415,6 @@ func (v *Value) rematerializeable() bool { // We can't rematerialize instructions which // clobber the flags register. if regspec.clobbers&flagRegMask != 0 { - if v.Op == OpAMD64MOVQconst && v.AuxInt != 0 || - v.Op == OpAMD64MOVLconst && int32(v.AuxInt) != 0 || - v.Op == OpAMD64MOVWconst && int16(v.AuxInt) != 0 || - v.Op == OpAMD64MOVBconst && int8(v.AuxInt) != 0 { - // These are marked as clobbering flags, but only - // the 0 versions actually do. TODO: fix MOV->XOR rewrites - // to understand when they are allowed to clobber flags? - return true - } return false } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index bc898235c1..f3d1a9557a 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -214,14 +214,14 @@ type Prog struct { Spadj int32 As int16 Reg int16 - RegTo2 int16 // 2nd register output operand - Mark uint16 + RegTo2 int16 // 2nd register output operand + Mark uint16 // bitmask of arch-specific items Optab uint16 Scond uint8 Back uint8 Ft uint8 Tt uint8 - Isize uint8 + Isize uint8 // size of the instruction in bytes (x86 only) Mode int8 Info ProgInfo diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index b92dfe23fb..14c9b6aaba 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -203,7 +203,6 @@ func linkpatch(ctxt *Link, sym *LSym) { } for p := sym.Text; p != nil; p = p.Link { - p.Mark = 0 /* initialization for follow */ if p.Pcond != nil { p.Pcond = brloop(ctxt, p.Pcond) if p.Pcond != nil { diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go index 4ee8cfbc6c..f163505fd0 100644 --- a/src/cmd/internal/obj/x86/a.out.go +++ b/src/cmd/internal/obj/x86/a.out.go @@ -34,6 +34,12 @@ import "cmd/internal/obj" //go:generate go run ../stringer.go -i $GOFILE -o anames.go -p x86 +const ( + /* mark flags */ + DONE = 1 << iota + PRESERVEFLAGS // not allowed to clobber flags +) + /* * amd64 */ diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 164dbd6064..8d0f86681f 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -1748,7 +1748,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) { // process forward jumps to p for q = p.Rel; q != nil; q = q.Forwd { - v = int32(p.Pc - (q.Pc + int64(q.Mark))) + v = int32(p.Pc - (q.Pc + int64(q.Isize))) if q.Back&2 != 0 { // short if v > 127 { loop++ @@ -1761,7 +1761,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) { s.P[q.Pc+1] = byte(v) } } else { - bp = s.P[q.Pc+int64(q.Mark)-4:] + bp = s.P[q.Pc+int64(q.Isize)-4:] bp[0] = byte(v) bp = bp[1:] bp[0] = byte(v >> 8) @@ -1784,7 +1784,6 @@ func span6(ctxt *obj.Link, s *obj.LSym) { obj.Symgrow(ctxt, s, p.Pc+int64(m)) copy(s.P[p.Pc:][:m], ctxt.And[:m]) - p.Mark = uint16(m) c += int32(m) } @@ -2157,6 +2156,10 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { v = int64(int32(v)) } if v == 0 { + if p.Mark&PRESERVEFLAGS != 0 { + // If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX. + return Yu7 + } return Yi0 } if v == 1 { diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index eff6c004c6..e545374828 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -1214,16 +1214,16 @@ loop: q = p.Pcond if q != nil && q.As != obj.ATEXT { /* mark instruction as done and continue layout at target of jump */ - p.Mark = 1 + p.Mark |= DONE p = q - if p.Mark == 0 { + if p.Mark&DONE == 0 { goto loop } } } - if p.Mark != 0 { + if p.Mark&DONE != 0 { /* * p goes here, but already used it elsewhere. * copy up to 4 instructions or else branch to other copy. @@ -1246,7 +1246,7 @@ loop: if nofollow(a) || pushpop(a) { break // NOTE(rsc): arm does goto copy } - if q.Pcond == nil || q.Pcond.Mark != 0 { + if q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } if a == obj.ACALL || a == ALOOP { @@ -1260,10 +1260,10 @@ loop: q = obj.Copyp(ctxt, p) p = p.Link - q.Mark = 1 + q.Mark |= DONE (*last).Link = q *last = q - if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark != 0 { + if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } @@ -1273,7 +1273,7 @@ loop: q.Link = p xfol(ctxt, q.Link, last) p = q.Link - if p.Mark != 0 { + if p.Mark&DONE != 0 { return } goto loop @@ -1290,7 +1290,7 @@ loop: } /* emit p */ - p.Mark = 1 + p.Mark |= DONE (*last).Link = p *last = p @@ -1328,7 +1328,7 @@ loop: } } else { q = p.Link - if q.Mark != 0 { + if q.Mark&DONE != 0 { if a != ALOOP { p.As = relinv(int16(a)) p.Link = p.Pcond @@ -1338,7 +1338,7 @@ loop: } xfol(ctxt, p.Link, last) - if p.Pcond.Mark != 0 { + if p.Pcond.Mark&DONE != 0 { return } p = p.Pcond -- cgit v1.3 From f94e0745b3dc922ca7f3d15507e33ed6d3a65ee6 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 26 Jan 2016 15:47:08 -0800 Subject: [dev.ssa] cmd/compile: prepare for some load+op combining Rename StoreConst to ValAndOff so we can use it for other ops. Make ValAndOff print nicely. Add some notes & checks related to my aborted attempt to implement combined CMP+load ops. Change-Id: I2f901d12d42bc5a82879af0334806aa184a97e27 Reviewed-on: https://go-review.googlesource.com/18947 Run-TryBot: David Chase Reviewed-by: David Chase --- src/cmd/compile/internal/gc/ssa.go | 4 +- src/cmd/compile/internal/ssa/TODO | 7 +- src/cmd/compile/internal/ssa/flagalloc.go | 7 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 68 +++---- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 4 +- src/cmd/compile/internal/ssa/op.go | 74 ++++---- src/cmd/compile/internal/ssa/rewriteAMD64.go | 268 +++++++++++++-------------- src/cmd/compile/internal/ssa/value.go | 31 +++- 8 files changed, 245 insertions(+), 218 deletions(-) (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index a05e33196a..89286f4356 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -4092,7 +4092,7 @@ func (s *genState) genValue(v *ssa.Value) { case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST - sc := ssa.StoreConst(v.AuxInt) + sc := ssa.ValAndOff(v.AuxInt) i := sc.Val() switch v.Op { case ssa.OpAMD64MOVBstoreconst: @@ -4372,7 +4372,7 @@ func (s *genState) genValue(v *ssa.Value) { return } case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: - off := ssa.StoreConst(v.AuxInt).Off() + off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if Debug_checknil != 0 && int(v.Line) > 1 { Warnl(int(v.Line), "removed nil check") diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO index 2f7973c5a3..5245753c07 100644 --- a/src/cmd/compile/internal/ssa/TODO +++ b/src/cmd/compile/internal/ssa/TODO @@ -20,7 +20,6 @@ Optimizations (better compiled code) - Expand current optimizations to all bit widths - Add a value range propagation pass (for bounds elim & bitwidth reduction) - Make dead store pass inter-block -- (x86) Combine loads into other ops - (x86) More combining address arithmetic into loads/stores - (x86) use ADDQ instead of LEAQ when we can - redundant CMP in sequences like this: @@ -38,8 +37,6 @@ Optimizations (better compiled code) Same for interfaces? - boolean logic: movb/xorb$1/testb/jeq -> movb/testb/jne - (ADDQconst (SUBQconst x)) and vice-versa -- (CMP (Load ...)) and (CMPconst (Load ...)) in one instruction - (all instructions, really) - combine LEAQs - store followed by load to same address - (CMPconst [0] (AND x y)) -> (TEST x y) @@ -50,6 +47,10 @@ Optimizations (better compiled code) - better computing of &&/|| in non-if/for contexts - OpArrayIndex should take its index in AuxInt, not a full value. - remove FLAGS from REP instruction clobbers +- (x86) Combine loads into other ops + Note that this is challenging for ops that generate flags + because flagalloc wants to move those instructions around for + flag regeneration. Optimizations (better compiler) ------------------------------- diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index f4e289e782..85e9c4fbee 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -42,11 +42,14 @@ func flagalloc(f *Func) { } } } - for _, p := range b.Preds { - end[p.ID] = flag + if flag != nil { + for _, p := range b.Preds { + end[p.ID] = flag + } } } } + // For blocks which have a flags control value, that's the only value // we can leave in the flags register at the end of the block. (There // is no place to put a flag regeneration instruction.) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 9db3abb9f0..a6ad6c1ca0 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -556,24 +556,24 @@ (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVOstore [addOff(off1, off2)] {sym} ptr val mem) // Fold constants into stores. -(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validStoreConst(c,off) -> - (MOVQstoreconst [makeStoreConst(c,off)] {sym} ptr mem) -(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validStoreConstOff(off) -> - (MOVLstoreconst [makeStoreConst(int64(int32(c)),off)] {sym} ptr mem) -(MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) && validStoreConstOff(off) -> - (MOVWstoreconst [makeStoreConst(int64(int16(c)),off)] {sym} ptr mem) -(MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) && validStoreConstOff(off) -> - (MOVBstoreconst [makeStoreConst(int64(int8(c)),off)] {sym} ptr mem) +(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> + (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) +(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> + (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) +(MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) && validOff(off) -> + (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) +(MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) && validOff(off) -> + (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) // Fold address offsets into constant stores. -(MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && StoreConst(sc).canAdd(off) -> - (MOVQstoreconst [StoreConst(sc).add(off)] {s} ptr mem) -(MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && StoreConst(sc).canAdd(off) -> - (MOVLstoreconst [StoreConst(sc).add(off)] {s} ptr mem) -(MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && StoreConst(sc).canAdd(off) -> - (MOVWstoreconst [StoreConst(sc).add(off)] {s} ptr mem) -(MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && StoreConst(sc).canAdd(off) -> - (MOVBstoreconst [StoreConst(sc).add(off)] {s} ptr mem) +(MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. @@ -607,14 +607,14 @@ (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> (MOVOstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) -(MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) -> - (MOVQstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) -> - (MOVLstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) -> - (MOVWstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) -> - (MOVBstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) // indexed loads and stores (MOVQloadidx8 [off1] {sym} (ADDQconst [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] {sym} ptr idx mem) @@ -647,16 +647,16 @@ (Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem) (Zero [3] destptr mem) -> - (MOVBstoreconst [makeStoreConst(0,2)] destptr + (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [0] destptr mem)) (Zero [5] destptr mem) -> - (MOVBstoreconst [makeStoreConst(0,4)] destptr + (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [0] destptr mem)) (Zero [6] destptr mem) -> - (MOVWstoreconst [makeStoreConst(0,4)] destptr + (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [0] destptr mem)) (Zero [7] destptr mem) -> - (MOVLstoreconst [makeStoreConst(0,3)] destptr + (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [0] destptr mem)) // Strip off any fractional word zeroing. @@ -666,16 +666,16 @@ // Zero small numbers of words directly. (Zero [16] destptr mem) -> - (MOVQstoreconst [makeStoreConst(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)) (Zero [24] destptr mem) -> - (MOVQstoreconst [makeStoreConst(0,16)] destptr - (MOVQstoreconst [makeStoreConst(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))) (Zero [32] destptr mem) -> - (MOVQstoreconst [makeStoreConst(0,24)] destptr - (MOVQstoreconst [makeStoreConst(0,16)] destptr - (MOVQstoreconst [makeStoreConst(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,24)] destptr + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))) // Medium zeroing uses a duff device. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index dcffb49f63..9cf4a2e70b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -382,8 +382,8 @@ func init() { // For storeconst ops, the AuxInt field encodes both // the value to store and an address offset of the store. - // Cast AuxInt to a StoreConst to extract Val and Off fields. - {name: "MOVBstoreconst", reg: gpstoreconst, asm: "MOVB", typ: "Mem"}, // store low byte of StoreConst(AuxInt).Val() to arg0+StoreConst(AuxInt).Off()+aux. arg1=mem + // Cast AuxInt to a ValAndOff to extract Val and Off fields. + {name: "MOVBstoreconst", reg: gpstoreconst, asm: "MOVB", typ: "Mem"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux. arg1=mem {name: "MOVWstoreconst", reg: gpstoreconst, asm: "MOVW", typ: "Mem"}, // store low 2 bytes of ... {name: "MOVLstoreconst", reg: gpstoreconst, asm: "MOVL", typ: "Mem"}, // store low 4 bytes of ... {name: "MOVQstoreconst", reg: gpstoreconst, asm: "MOVQ", typ: "Mem"}, // store 8 bytes of ... diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go index 78cca9e0b8..526722f7bc 100644 --- a/src/cmd/compile/internal/ssa/op.go +++ b/src/cmd/compile/internal/ssa/op.go @@ -4,6 +4,8 @@ package ssa +import "fmt" + // An Op encodes the specific operation that a Value performs. // Opcodes' semantics can be modified by the type and aux fields of the Value. // For instance, OpAdd can be 32 or 64 bit, signed or unsigned, float or complex, depending on Value.Type. @@ -30,57 +32,67 @@ type regInfo struct { outputs []regMask // NOTE: values can only have 1 output for now. } -// A StoreConst is used by the MOVXstoreconst opcodes. It holds -// both the value to store and an offset from the store pointer. -// A StoreConst is intended to be encoded into an AuxInt field. -// The zero StoreConst encodes a value of 0 and an offset of 0. -// The high 32 bits hold a value to be stored. +// A ValAndOff is used by the several opcodes. It holds +// both a value and a pointer offset. +// A ValAndOff is intended to be encoded into an AuxInt field. +// The zero ValAndOff encodes a value of 0 and an offset of 0. +// The high 32 bits hold a value. // The low 32 bits hold a pointer offset. -type StoreConst int64 +type ValAndOff int64 -func (sc StoreConst) Val() int64 { - return int64(sc) >> 32 +func (x ValAndOff) Val() int64 { + return int64(x) >> 32 +} +func (x ValAndOff) Off() int64 { + return int64(int32(x)) } -func (sc StoreConst) Off() int64 { - return int64(int32(sc)) +func (x ValAndOff) Int64() int64 { + return int64(x) } -func (sc StoreConst) Int64() int64 { - return int64(sc) +func (x ValAndOff) String() string { + return fmt.Sprintf("val=%d,off=%d", x.Val(), x.Off()) } -// validStoreConstOff reports whether the offset can be used -// as an argument to makeStoreConst. -func validStoreConstOff(off int64) bool { +// validVal reports whether the value can be used +// as an argument to makeValAndOff. +func validVal(val int64) bool { + return val == int64(int32(val)) +} + +// validOff reports whether the offset can be used +// as an argument to makeValAndOff. +func validOff(off int64) bool { return off == int64(int32(off)) } -// validStoreConst reports whether we can fit the value and offset into -// a StoreConst value. -func validStoreConst(val, off int64) bool { - if val != int64(int32(val)) { +// validValAndOff reports whether we can fit the value and offset into +// a ValAndOff value. +func validValAndOff(val, off int64) bool { + if !validVal(val) { return false } - if !validStoreConstOff(off) { + if !validOff(off) { return false } return true } -// encode encodes a StoreConst into an int64 suitable for storing in an AuxInt field. -func makeStoreConst(val, off int64) int64 { - if !validStoreConst(val, off) { - panic("invalid makeStoreConst") +// makeValAndOff encodes a ValAndOff into an int64 suitable for storing in an AuxInt field. +func makeValAndOff(val, off int64) int64 { + if !validValAndOff(val, off) { + panic("invalid makeValAndOff") } - return StoreConst(val<<32 + int64(uint32(off))).Int64() + return ValAndOff(val<<32 + int64(uint32(off))).Int64() } -func (sc StoreConst) canAdd(off int64) bool { - newoff := sc.Off() + off +func (x ValAndOff) canAdd(off int64) bool { + newoff := x.Off() + off return newoff == int64(int32(newoff)) } -func (sc StoreConst) add(off int64) int64 { - if !sc.canAdd(off) { - panic("invalid StoreConst.add") + +func (x ValAndOff) add(off int64) int64 { + if !x.canAdd(off) { + panic("invalid ValAndOff.add") } - return makeStoreConst(sc.Val(), sc.Off()+off) + return makeValAndOff(x.Val(), x.Off()+off) } diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 3d682f0040..ec3bbe53c2 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -6059,32 +6059,32 @@ end3a2e55db7e03920700c4875f6a55de3b: ende6347ac19d0469ee59d2e7f2e18d1070: ; // match: (MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) - // cond: validStoreConstOff(off) - // result: (MOVBstoreconst [makeStoreConst(int64(int8(c)),off)] {sym} ptr mem) + // cond: validOff(off) + // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) { off := v.AuxInt sym := v.Aux ptr := v.Args[0] if v.Args[1].Op != OpAMD64MOVBconst { - goto enda8ebda583a842dae6377b7f562040318 + goto endfdf24c49923451a076f1868988b8c9d9 } c := v.Args[1].AuxInt mem := v.Args[2] - if !(validStoreConstOff(off)) { - goto enda8ebda583a842dae6377b7f562040318 + if !(validOff(off)) { + goto endfdf24c49923451a076f1868988b8c9d9 } v.Op = OpAMD64MOVBstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(int64(int8(c)), off) + v.AuxInt = makeValAndOff(int64(int8(c)), off) v.Aux = sym v.AddArg(ptr) v.AddArg(mem) return true } - goto enda8ebda583a842dae6377b7f562040318 -enda8ebda583a842dae6377b7f562040318: + goto endfdf24c49923451a076f1868988b8c9d9 +endfdf24c49923451a076f1868988b8c9d9: ; // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) @@ -6123,61 +6123,61 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: StoreConst(sc).canAdd(off) - // result: (MOVBstoreconst [StoreConst(sc).add(off)] {s} ptr mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) { sc := v.AuxInt s := v.Aux if v.Args[0].Op != OpAMD64ADDQconst { - goto ende1cdf6d463f91ba4dd1956f8ba4cb128 + goto end8d35ca650b7c40bc43984d3f5925a052 } off := v.Args[0].AuxInt ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(StoreConst(sc).canAdd(off)) { - goto ende1cdf6d463f91ba4dd1956f8ba4cb128 + if !(ValAndOff(sc).canAdd(off)) { + goto end8d35ca650b7c40bc43984d3f5925a052 } v.Op = OpAMD64MOVBstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s v.AddArg(ptr) v.AddArg(mem) return true } - goto ende1cdf6d463f91ba4dd1956f8ba4cb128 -ende1cdf6d463f91ba4dd1956f8ba4cb128: + goto end8d35ca650b7c40bc43984d3f5925a052 +end8d35ca650b7c40bc43984d3f5925a052: ; // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) - // result: (MOVBstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) { sc := v.AuxInt sym1 := v.Aux if v.Args[0].Op != OpAMD64LEAQ { - goto end5feed29bca3ce7d5fccda89acf71c855 + goto end8deb839acf84818dd8fc827c0338f42c } off := v.Args[0].AuxInt sym2 := v.Args[0].Aux ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off)) { - goto end5feed29bca3ce7d5fccda89acf71c855 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + goto end8deb839acf84818dd8fc827c0338f42c } v.Op = OpAMD64MOVBstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) v.AddArg(mem) return true } - goto end5feed29bca3ce7d5fccda89acf71c855 -end5feed29bca3ce7d5fccda89acf71c855: + goto end8deb839acf84818dd8fc827c0338f42c +end8deb839acf84818dd8fc827c0338f42c: ; return false } @@ -6323,32 +6323,32 @@ end199e8c23a5e7e99728a43d6a83b2c2cf: end43bffdb8d9c1fc85a95778d4911955f1: ; // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validStoreConstOff(off) - // result: (MOVLstoreconst [makeStoreConst(int64(int32(c)),off)] {sym} ptr mem) + // cond: validOff(off) + // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) { off := v.AuxInt sym := v.Aux ptr := v.Args[0] if v.Args[1].Op != OpAMD64MOVLconst { - goto end14bc0c027d67d279cf3ef2038b759ce2 + goto enda62a54c45bf42db801af4095d27faccd } c := v.Args[1].AuxInt mem := v.Args[2] - if !(validStoreConstOff(off)) { - goto end14bc0c027d67d279cf3ef2038b759ce2 + if !(validOff(off)) { + goto enda62a54c45bf42db801af4095d27faccd } v.Op = OpAMD64MOVLstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(int64(int32(c)), off) + v.AuxInt = makeValAndOff(int64(int32(c)), off) v.Aux = sym v.AddArg(ptr) v.AddArg(mem) return true } - goto end14bc0c027d67d279cf3ef2038b759ce2 -end14bc0c027d67d279cf3ef2038b759ce2: + goto enda62a54c45bf42db801af4095d27faccd +enda62a54c45bf42db801af4095d27faccd: ; // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) @@ -6387,61 +6387,61 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: StoreConst(sc).canAdd(off) - // result: (MOVLstoreconst [StoreConst(sc).add(off)] {s} ptr mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) { sc := v.AuxInt s := v.Aux if v.Args[0].Op != OpAMD64ADDQconst { - goto end7665f96d0aaa57009bf98632f19bf8e7 + goto end4981598152dd0763f1d735810a7d34e8 } off := v.Args[0].AuxInt ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(StoreConst(sc).canAdd(off)) { - goto end7665f96d0aaa57009bf98632f19bf8e7 + if !(ValAndOff(sc).canAdd(off)) { + goto end4981598152dd0763f1d735810a7d34e8 } v.Op = OpAMD64MOVLstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s v.AddArg(ptr) v.AddArg(mem) return true } - goto end7665f96d0aaa57009bf98632f19bf8e7 -end7665f96d0aaa57009bf98632f19bf8e7: + goto end4981598152dd0763f1d735810a7d34e8 +end4981598152dd0763f1d735810a7d34e8: ; // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) - // result: (MOVLstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) { sc := v.AuxInt sym1 := v.Aux if v.Args[0].Op != OpAMD64LEAQ { - goto end1664c6056a9c65fcbe30eca273e8ee64 + goto endd579250954b5df84a77518b36f739e12 } off := v.Args[0].AuxInt sym2 := v.Args[0].Aux ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off)) { - goto end1664c6056a9c65fcbe30eca273e8ee64 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + goto endd579250954b5df84a77518b36f739e12 } v.Op = OpAMD64MOVLstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) v.AddArg(mem) return true } - goto end1664c6056a9c65fcbe30eca273e8ee64 -end1664c6056a9c65fcbe30eca273e8ee64: + goto endd579250954b5df84a77518b36f739e12 +endd579250954b5df84a77518b36f739e12: ; return false } @@ -6720,32 +6720,32 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value, config *Config) bool { end0a110b5e42a4576c32fda50590092848: ; // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) - // cond: validStoreConst(c,off) - // result: (MOVQstoreconst [makeStoreConst(c,off)] {sym} ptr mem) + // cond: validValAndOff(c,off) + // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) { off := v.AuxInt sym := v.Aux ptr := v.Args[0] if v.Args[1].Op != OpAMD64MOVQconst { - goto end8368f37d24b6a2f59c3d00966c4d4111 + goto endda0f4b36e19753762dbd1c6ee05e4c81 } c := v.Args[1].AuxInt mem := v.Args[2] - if !(validStoreConst(c, off)) { - goto end8368f37d24b6a2f59c3d00966c4d4111 + if !(validValAndOff(c, off)) { + goto endda0f4b36e19753762dbd1c6ee05e4c81 } v.Op = OpAMD64MOVQstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(c, off) + v.AuxInt = makeValAndOff(c, off) v.Aux = sym v.AddArg(ptr) v.AddArg(mem) return true } - goto end8368f37d24b6a2f59c3d00966c4d4111 -end8368f37d24b6a2f59c3d00966c4d4111: + goto endda0f4b36e19753762dbd1c6ee05e4c81 +endda0f4b36e19753762dbd1c6ee05e4c81: ; // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) @@ -6817,61 +6817,61 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: StoreConst(sc).canAdd(off) - // result: (MOVQstoreconst [StoreConst(sc).add(off)] {s} ptr mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) { sc := v.AuxInt s := v.Aux if v.Args[0].Op != OpAMD64ADDQconst { - goto end5826e30265c68ea8c4cd595ceedf9405 + goto end3694207cd20e8e1cc719e179bdfe0c74 } off := v.Args[0].AuxInt ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(StoreConst(sc).canAdd(off)) { - goto end5826e30265c68ea8c4cd595ceedf9405 + if !(ValAndOff(sc).canAdd(off)) { + goto end3694207cd20e8e1cc719e179bdfe0c74 } v.Op = OpAMD64MOVQstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s v.AddArg(ptr) v.AddArg(mem) return true } - goto end5826e30265c68ea8c4cd595ceedf9405 -end5826e30265c68ea8c4cd595ceedf9405: + goto end3694207cd20e8e1cc719e179bdfe0c74 +end3694207cd20e8e1cc719e179bdfe0c74: ; // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) - // result: (MOVQstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) { sc := v.AuxInt sym1 := v.Aux if v.Args[0].Op != OpAMD64LEAQ { - goto endb9c7f7a9dbc6b885d84f851c74b018e5 + goto endf405b27b22dbf76f83abd1b5ad5e53d9 } off := v.Args[0].AuxInt sym2 := v.Args[0].Aux ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off)) { - goto endb9c7f7a9dbc6b885d84f851c74b018e5 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + goto endf405b27b22dbf76f83abd1b5ad5e53d9 } v.Op = OpAMD64MOVQstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) v.AddArg(mem) return true } - goto endb9c7f7a9dbc6b885d84f851c74b018e5 -endb9c7f7a9dbc6b885d84f851c74b018e5: + goto endf405b27b22dbf76f83abd1b5ad5e53d9 +endf405b27b22dbf76f83abd1b5ad5e53d9: ; return false } @@ -7567,32 +7567,32 @@ end187fe73dfaf9cf5f4c349283b4dfd9d1: endda15fdd59aa956ded0440188f38de1aa: ; // match: (MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) - // cond: validStoreConstOff(off) - // result: (MOVWstoreconst [makeStoreConst(int64(int16(c)),off)] {sym} ptr mem) + // cond: validOff(off) + // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) { off := v.AuxInt sym := v.Aux ptr := v.Args[0] if v.Args[1].Op != OpAMD64MOVWconst { - goto end226f449215b8ea54ac24fb8d52356ffa + goto end60327daf9965d73a8c1971d098e1e31d } c := v.Args[1].AuxInt mem := v.Args[2] - if !(validStoreConstOff(off)) { - goto end226f449215b8ea54ac24fb8d52356ffa + if !(validOff(off)) { + goto end60327daf9965d73a8c1971d098e1e31d } v.Op = OpAMD64MOVWstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(int64(int16(c)), off) + v.AuxInt = makeValAndOff(int64(int16(c)), off) v.Aux = sym v.AddArg(ptr) v.AddArg(mem) return true } - goto end226f449215b8ea54ac24fb8d52356ffa -end226f449215b8ea54ac24fb8d52356ffa: + goto end60327daf9965d73a8c1971d098e1e31d +end60327daf9965d73a8c1971d098e1e31d: ; // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) @@ -7631,61 +7631,61 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value, config *Config) bool { b := v.Block _ = b // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: StoreConst(sc).canAdd(off) - // result: (MOVWstoreconst [StoreConst(sc).add(off)] {s} ptr mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) { sc := v.AuxInt s := v.Aux if v.Args[0].Op != OpAMD64ADDQconst { - goto end2b764f9cf1bb32af25ba4e70a6705b91 + goto end8825edac065f0e1c615ca5e6ba40e2de } off := v.Args[0].AuxInt ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(StoreConst(sc).canAdd(off)) { - goto end2b764f9cf1bb32af25ba4e70a6705b91 + if !(ValAndOff(sc).canAdd(off)) { + goto end8825edac065f0e1c615ca5e6ba40e2de } v.Op = OpAMD64MOVWstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s v.AddArg(ptr) v.AddArg(mem) return true } - goto end2b764f9cf1bb32af25ba4e70a6705b91 -end2b764f9cf1bb32af25ba4e70a6705b91: + goto end8825edac065f0e1c615ca5e6ba40e2de +end8825edac065f0e1c615ca5e6ba40e2de: ; // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off) - // result: (MOVWstoreconst [StoreConst(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) { sc := v.AuxInt sym1 := v.Aux if v.Args[0].Op != OpAMD64LEAQ { - goto enda15bfd8d540015b2245c65be486d2ffd + goto endba47397e07b40a64fa4cad36ac2e32ad } off := v.Args[0].AuxInt sym2 := v.Args[0].Aux ptr := v.Args[0].Args[0] mem := v.Args[1] - if !(canMergeSym(sym1, sym2) && StoreConst(sc).canAdd(off)) { - goto enda15bfd8d540015b2245c65be486d2ffd + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + goto endba47397e07b40a64fa4cad36ac2e32ad } v.Op = OpAMD64MOVWstoreconst v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = StoreConst(sc).add(off) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = mergeSym(sym1, sym2) v.AddArg(ptr) v.AddArg(mem) return true } - goto enda15bfd8d540015b2245c65be486d2ffd -enda15bfd8d540015b2245c65be486d2ffd: + goto endba47397e07b40a64fa4cad36ac2e32ad +endba47397e07b40a64fa4cad36ac2e32ad: ; return false } @@ -14596,10 +14596,10 @@ end07aaaebfa15a48c52cd79b68e28d266f: ; // match: (Zero [3] destptr mem) // cond: - // result: (MOVBstoreconst [makeStoreConst(0,2)] destptr (MOVWstoreconst [0] destptr mem)) + // result: (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [0] destptr mem)) { if v.AuxInt != 3 { - goto end03b2ae08f901891919e454f05273fb4e + goto end3bf4a24a87e0727b9bcfbb5fcd24aabe } destptr := v.Args[0] mem := v.Args[1] @@ -14607,7 +14607,7 @@ end07aaaebfa15a48c52cd79b68e28d266f: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 2) + v.AuxInt = makeValAndOff(0, 2) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVWstoreconst, TypeInvalid) v0.AuxInt = 0 @@ -14617,15 +14617,15 @@ end07aaaebfa15a48c52cd79b68e28d266f: v.AddArg(v0) return true } - goto end03b2ae08f901891919e454f05273fb4e -end03b2ae08f901891919e454f05273fb4e: + goto end3bf4a24a87e0727b9bcfbb5fcd24aabe +end3bf4a24a87e0727b9bcfbb5fcd24aabe: ; // match: (Zero [5] destptr mem) // cond: - // result: (MOVBstoreconst [makeStoreConst(0,4)] destptr (MOVLstoreconst [0] destptr mem)) + // result: (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [0] destptr mem)) { if v.AuxInt != 5 { - goto endc473059deb6291d483262b08312eab48 + goto end567e4a90c6867faf1dfc2cd57daf2ce4 } destptr := v.Args[0] mem := v.Args[1] @@ -14633,7 +14633,7 @@ end03b2ae08f901891919e454f05273fb4e: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 4) + v.AuxInt = makeValAndOff(0, 4) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVLstoreconst, TypeInvalid) v0.AuxInt = 0 @@ -14643,15 +14643,15 @@ end03b2ae08f901891919e454f05273fb4e: v.AddArg(v0) return true } - goto endc473059deb6291d483262b08312eab48 -endc473059deb6291d483262b08312eab48: + goto end567e4a90c6867faf1dfc2cd57daf2ce4 +end567e4a90c6867faf1dfc2cd57daf2ce4: ; // match: (Zero [6] destptr mem) // cond: - // result: (MOVWstoreconst [makeStoreConst(0,4)] destptr (MOVLstoreconst [0] destptr mem)) + // result: (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [0] destptr mem)) { if v.AuxInt != 6 { - goto end41b38839f25e3749384d53b5945bd56b + goto end7cddcaf215fcc2cbca9aa958147b2380 } destptr := v.Args[0] mem := v.Args[1] @@ -14659,7 +14659,7 @@ endc473059deb6291d483262b08312eab48: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 4) + v.AuxInt = makeValAndOff(0, 4) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVLstoreconst, TypeInvalid) v0.AuxInt = 0 @@ -14669,15 +14669,15 @@ endc473059deb6291d483262b08312eab48: v.AddArg(v0) return true } - goto end41b38839f25e3749384d53b5945bd56b -end41b38839f25e3749384d53b5945bd56b: + goto end7cddcaf215fcc2cbca9aa958147b2380 +end7cddcaf215fcc2cbca9aa958147b2380: ; // match: (Zero [7] destptr mem) // cond: - // result: (MOVLstoreconst [makeStoreConst(0,3)] destptr (MOVLstoreconst [0] destptr mem)) + // result: (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [0] destptr mem)) { if v.AuxInt != 7 { - goto end06e677d4c1ac43e08783eb8117a589b6 + goto end1b58cabccbc912ea4e1cf99be8a9fbf7 } destptr := v.Args[0] mem := v.Args[1] @@ -14685,7 +14685,7 @@ end41b38839f25e3749384d53b5945bd56b: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 3) + v.AuxInt = makeValAndOff(0, 3) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVLstoreconst, TypeInvalid) v0.AuxInt = 0 @@ -14695,8 +14695,8 @@ end41b38839f25e3749384d53b5945bd56b: v.AddArg(v0) return true } - goto end06e677d4c1ac43e08783eb8117a589b6 -end06e677d4c1ac43e08783eb8117a589b6: + goto end1b58cabccbc912ea4e1cf99be8a9fbf7 +end1b58cabccbc912ea4e1cf99be8a9fbf7: ; // match: (Zero [size] destptr mem) // cond: size%8 != 0 && size > 8 @@ -14731,10 +14731,10 @@ endc8760f86b83b1372fce0042ab5200fc1: ; // match: (Zero [16] destptr mem) // cond: - // result: (MOVQstoreconst [makeStoreConst(0,8)] destptr (MOVQstoreconst [0] destptr mem)) + // result: (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)) { if v.AuxInt != 16 { - goto endce0bdb028011236be9f04fb53462204d + goto endf1447d60cbf8025adaf1a02a2cd219c4 } destptr := v.Args[0] mem := v.Args[1] @@ -14742,7 +14742,7 @@ endc8760f86b83b1372fce0042ab5200fc1: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 8) + v.AuxInt = makeValAndOff(0, 8) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) v0.AuxInt = 0 @@ -14752,15 +14752,15 @@ endc8760f86b83b1372fce0042ab5200fc1: v.AddArg(v0) return true } - goto endce0bdb028011236be9f04fb53462204d -endce0bdb028011236be9f04fb53462204d: + goto endf1447d60cbf8025adaf1a02a2cd219c4 +endf1447d60cbf8025adaf1a02a2cd219c4: ; // match: (Zero [24] destptr mem) // cond: - // result: (MOVQstoreconst [makeStoreConst(0,16)] destptr (MOVQstoreconst [makeStoreConst(0,8)] destptr (MOVQstoreconst [0] destptr mem))) + // result: (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))) { if v.AuxInt != 24 { - goto end859fe3911b36516ea096299b2a85350e + goto end57f2984a61c64f71a528e7fa75576095 } destptr := v.Args[0] mem := v.Args[1] @@ -14768,10 +14768,10 @@ endce0bdb028011236be9f04fb53462204d: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 16) + v.AuxInt = makeValAndOff(0, 16) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) - v0.AuxInt = makeStoreConst(0, 8) + v0.AuxInt = makeValAndOff(0, 8) v0.AddArg(destptr) v1 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) v1.AuxInt = 0 @@ -14783,15 +14783,15 @@ endce0bdb028011236be9f04fb53462204d: v.AddArg(v0) return true } - goto end859fe3911b36516ea096299b2a85350e -end859fe3911b36516ea096299b2a85350e: + goto end57f2984a61c64f71a528e7fa75576095 +end57f2984a61c64f71a528e7fa75576095: ; // match: (Zero [32] destptr mem) // cond: - // result: (MOVQstoreconst [makeStoreConst(0,24)] destptr (MOVQstoreconst [makeStoreConst(0,16)] destptr (MOVQstoreconst [makeStoreConst(0,8)] destptr (MOVQstoreconst [0] destptr mem)))) + // result: (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))) { if v.AuxInt != 32 { - goto end2c246614f6a9a07f1a683691b3f5780f + goto end418a59f9f84dd389d37ae5c24aba2760 } destptr := v.Args[0] mem := v.Args[1] @@ -14799,13 +14799,13 @@ end859fe3911b36516ea096299b2a85350e: v.AuxInt = 0 v.Aux = nil v.resetArgs() - v.AuxInt = makeStoreConst(0, 24) + v.AuxInt = makeValAndOff(0, 24) v.AddArg(destptr) v0 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) - v0.AuxInt = makeStoreConst(0, 16) + v0.AuxInt = makeValAndOff(0, 16) v0.AddArg(destptr) v1 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) - v1.AuxInt = makeStoreConst(0, 8) + v1.AuxInt = makeValAndOff(0, 8) v1.AddArg(destptr) v2 := b.NewValue0(v.Line, OpAMD64MOVQstoreconst, TypeInvalid) v2.AuxInt = 0 @@ -14819,8 +14819,8 @@ end859fe3911b36516ea096299b2a85350e: v.AddArg(v0) return true } - goto end2c246614f6a9a07f1a683691b3f5780f -end2c246614f6a9a07f1a683691b3f5780f: + goto end418a59f9f84dd389d37ae5c24aba2760 +end418a59f9f84dd389d37ae5c24aba2760: ; // match: (Zero [size] destptr mem) // cond: size <= 1024 && size%8 == 0 && size%16 != 0 diff --git a/src/cmd/compile/internal/ssa/value.go b/src/cmd/compile/internal/ssa/value.go index 420c408e88..7e6e544e26 100644 --- a/src/cmd/compile/internal/ssa/value.go +++ b/src/cmd/compile/internal/ssa/value.go @@ -61,16 +61,22 @@ func (v *Value) String() string { func (v *Value) LongString() string { s := fmt.Sprintf("v%d = %s", v.ID, v.Op.String()) s += " <" + v.Type.String() + ">" - if v.AuxInt != 0 { - s += fmt.Sprintf(" [%d]", v.AuxInt) - - switch { - case v.Op == OpConst32F || v.Op == OpConst64F: - s += fmt.Sprintf("(%g)", math.Float64frombits(uint64(v.AuxInt))) - case v.Op == OpConstBool && v.AuxInt == 0: - s += " (false)" - case v.Op == OpConstBool && v.AuxInt == 1: - s += " (true)" + // TODO: use some operator property flags to decide + // what is encoded in the AuxInt field. + switch v.Op { + case OpConst32F, OpConst64F: + s += fmt.Sprintf(" [%g]", math.Float64frombits(uint64(v.AuxInt))) + case OpConstBool: + if v.AuxInt == 0 { + s += " [false]" + } else { + s += " [true]" + } + case OpAMD64MOVBstoreconst, OpAMD64MOVWstoreconst, OpAMD64MOVLstoreconst, OpAMD64MOVQstoreconst: + s += fmt.Sprintf(" [%s]", ValAndOff(v.AuxInt)) + default: + if v.AuxInt != 0 { + s += fmt.Sprintf(" [%d]", v.AuxInt) } } if v.Aux != nil { @@ -132,6 +138,11 @@ func (v *Value) copyInto(b *Block) *Value { c.Aux = v.Aux c.AuxInt = v.AuxInt c.AddArgs(v.Args...) + for _, a := range v.Args { + if a.Type.IsMemory() { + v.Fatalf("can't move a value with a memory arg %s", v.LongString()) + } + } return c } -- cgit v1.3 From 6d40c62732ac76333426bdd6a67f8c1457ac8334 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 4 Feb 2016 18:02:03 -0800 Subject: [dev.ssa] cmd/compile: remove redundant compare ops Flagalloc was recalculating flags is some situations when it didn't need to. Fixed by using the same name for the original flag calculation instruction throughout. Change-Id: Ic0bf58f728a8d87748434dd25a67b0708755e1f8 Reviewed-on: https://go-review.googlesource.com/19237 Run-TryBot: Keith Randall Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/flagalloc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/cmd/compile/internal/ssa/flagalloc.go') diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index 85e9c4fbee..7ed1fe5908 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -66,7 +66,7 @@ func flagalloc(f *Func) { for _, b := range f.Blocks { oldSched = append(oldSched[:0], b.Values...) b.Values = b.Values[:0] - // The current live flag value. + // The current live flag value the pre-flagalloc copy). var flag *Value if len(b.Preds) > 0 { flag = end[b.Preds[0].ID] @@ -95,7 +95,7 @@ func flagalloc(f *Func) { // Update v. v.SetArg(i, c) // Remember the most-recently computed flag value. - flag = c + flag = a } // Issue v. b.Values = append(b.Values, v) @@ -110,7 +110,7 @@ func flagalloc(f *Func) { // Recalculate control value. c := v.copyInto(b) b.Control = c - flag = c + flag = v } if v := end[b.ID]; v != nil && v != flag { // Need to reissue flag generator for use by -- cgit v1.3