[dev.ssa] cmd/compile: better register allocator

Reorder how register & stack allocation is done. We used to allocate registers, then fix up merge edges, then allocate stack slots. This lead to lots of unnecessary copies on merge edges: v2 = LoadReg v1 v3 = StoreReg v2 If v1 and v3 are allocated to the same stack slot, then this code is unnecessary. But at regalloc time we didn't know the homes of v1 and v3. To fix this problem, allocate all the stack slots before fixing up the merge edges. That way, we know what stack slots values use so we know what copies are required. Use a good technique for shuffling values around on merge edges. Improves performance of the go1 TimeParse benchmark by ~12% Change-Id: I731f43e4ff1a7e0dc4cd4aa428fcdb97812b86fa Reviewed-on: https://go-review.googlesource.com/17915 Reviewed-by: David Chase <drchase@google.com>
author: Keith Randall <khr@golang.org> 2015-12-17 10:01:24 -0800
committer: Keith Randall <khr@golang.org> 2015-12-21 23:12:05 +0000
commit: 7d9f1067d1c8a2d0252fa2a115f1d016f94f7087 (patch)
tree: ef655a345667fd21416907cef77fb77a5a1ac4d7 /src/cmd/compile/internal/ssa/stackalloc.go
parent: 5b355a7907550d6fe457fdf6a92fc320d5a764d5 (diff)
download: go-7d9f1067d1c8a2d0252fa2a115f1d016f94f7087.tar.xz
1 files changed, 155 insertions, 142 deletions
diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go
index 3eb5c3cf4a..797a6b05e6 100644
--- a/src/cmd/compile/internal/ssa/stackalloc.go
+++ b/src/cmd/compile/internal/ssa/stackalloc.go
@@ -6,55 +6,65 @@
 
 package ssa
 
+import "fmt"
+
+const stackDebug = false // TODO: compiler flag
+
+type stackAllocState struct {
+	f         *Func
+	values    []stackValState
+	live      [][]ID // live[b.id] = live values at the end of block b.
+	interfere [][]ID // interfere[v.id] = values that interfere with v.
+}
+
+type stackValState struct {
+	typ      Type
+	spill    *Value
+	needSlot bool
+}
+
 // stackalloc allocates storage in the stack frame for
 // all Values that did not get a register.
-func stackalloc(f *Func) {
-	// Cache value types by ID.
-	types := make([]Type, f.NumValues())
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			types[v.ID] = v.Type
-		}
+// Returns a map from block ID to the stack values live at the end of that block.
+func stackalloc(f *Func, spillLive [][]ID) [][]ID {
+	if stackDebug {
+		fmt.Println("before stackalloc")
+		fmt.Println(f.String())
 	}
+	var s stackAllocState
+	s.init(f, spillLive)
+	s.stackalloc()
+	return s.live
+}
 
-	// Build interference graph among StoreReg and stack phi ops.
-	live := f.liveSpills()
-	interfere := make([][]ID, f.NumValues())
-	s := newSparseSet(f.NumValues())
-	for _, b := range f.Blocks {
-		// Start with known live values at the end of the block.
-		s.clear()
-		for i := 0; i < len(b.Succs); i++ {
-			s.addAll(live[b.ID][i])
-		}
+func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
+	s.f = f
 
-		// Propagate backwards to the start of the block.
-		// Remember interfering sets.
-		for i := len(b.Values) - 1; i >= 0; i-- {
-			v := b.Values[i]
-			switch {
-			case v.Op == OpStoreReg, v.isStackPhi():
-				s.remove(v.ID)
-				for _, id := range s.contents() {
-					if v.Type.Equal(types[id]) {
-						// Only need interferences between equivalent types.
-						interfere[v.ID] = append(interfere[v.ID], id)
-						interfere[id] = append(interfere[id], v.ID)
-					}
-				}
-			case v.Op == OpLoadReg:
-				s.add(v.Args[0].ID)
-			case v.Op == OpArg:
-				// This is an input argument which is pre-spilled.  It is kind of
-				// like a StoreReg, but we don't remove v.ID here because we want
-				// this value to appear live even before this point.  Being live
-				// all the way to the start of the entry block prevents other
-				// values from being allocated to the same slot and clobbering
-				// the input value before we have a chance to load it.
+	// Initialize value information.
+	s.values = make([]stackValState, f.NumValues())
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			s.values[v.ID].typ = v.Type
+			s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable()
+			if stackDebug && s.values[v.ID].needSlot {
+				fmt.Printf("%s needs a stack slot\n", v)
+			}
+			if v.Op == OpStoreReg {
+				s.values[v.Args[0].ID].spill = v
 			}
 		}
 	}
 
+	// Compute liveness info for values needing a slot.
+	s.computeLive(spillLive)
+
+	// Build interference graph among values needing a slot.
+	s.buildInterferenceGraph()
+}
+
+func (s *stackAllocState) stackalloc() {
+	f := s.f
+
 	// Build map from values to their names, if any.
 	// A value may be associated with more than one name (e.g. after
 	// the assignment i=j). This step picks one name per value arbitrarily.
@@ -67,49 +77,41 @@ func stackalloc(f *Func) {
 		}
 	}
 
-	// Figure out which StoreReg ops are phi args.  We don't pick slots for
-	// phi args because a stack phi and its args must all use the same stack slot.
-	phiArg := make([]bool, f.NumValues())
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			if !v.isStackPhi() {
-				continue
-			}
-			for _, a := range v.Args {
-				phiArg[a.ID] = true
-			}
-		}
-	}
-
 	// Allocate args to their assigned locations.
 	for _, v := range f.Entry.Values {
 		if v.Op != OpArg {
 			continue
 		}
-		f.setHome(v, LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt})
+		loc := LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt}
+		if stackDebug {
+			fmt.Printf("stackalloc %s to %s\n", v, loc.Name())
+		}
+		f.setHome(v, loc)
 	}
 
 	// For each type, we keep track of all the stack slots we
 	// have allocated for that type.
+	// TODO: share slots among equivalent types.  We would need to
+	// only share among types with the same GC signature.  See the
+	// type.Equal calls below for where this matters.
 	locations := map[Type][]LocalSlot{}
 
 	// Each time we assign a stack slot to a value v, we remember
 	// the slot we used via an index into locations[v.Type].
-	// TODO: share slots among equivalent types.
 	slots := make([]int, f.NumValues())
 	for i := f.NumValues() - 1; i >= 0; i-- {
 		slots[i] = -1
 	}
 
-	// Pick a stack slot for each non-phi-arg StoreReg and each stack phi.
+	// Pick a stack slot for each value needing one.
 	used := make([]bool, f.NumValues())
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
-			if v.Op != OpStoreReg && !v.isStackPhi() {
+			if !s.values[v.ID].needSlot {
 				continue
 			}
-			if phiArg[v.ID] {
-				continue
+			if v.Op == OpArg {
+				continue // already picked
 			}
 
 			// If this is a named value, try to use the name as
@@ -121,7 +123,7 @@ func stackalloc(f *Func) {
 				name = names[v.ID]
 			}
 			if name.N != nil && v.Type.Equal(name.Type) {
-				for _, id := range interfere[v.ID] {
+				for _, id := range s.interfere[v.ID] {
 					h := f.getHome(id)
 					if h != nil && h.(LocalSlot) == name {
 						// A variable can interfere with itself.
@@ -129,22 +131,10 @@ func stackalloc(f *Func) {
 						goto noname
 					}
 				}
-				if v.Op == OpPhi {
-					for _, a := range v.Args {
-						for _, id := range interfere[a.ID] {
-							h := f.getHome(id)
-							if h != nil && h.(LocalSlot) == name {
-								goto noname
-							}
-						}
-					}
+				if stackDebug {
+					fmt.Printf("stackalloc %s to %s\n", v, name.Name())
 				}
 				f.setHome(v, name)
-				if v.Op == OpPhi {
-					for _, a := range v.Args {
-						f.setHome(a, name)
-					}
-				}
 				continue
 			}
 
@@ -155,25 +145,12 @@ func stackalloc(f *Func) {
 			for i := 0; i < len(locs); i++ {
 				used[i] = false
 			}
-			for _, xid := range interfere[v.ID] {
+			for _, xid := range s.interfere[v.ID] {
 				slot := slots[xid]
 				if slot >= 0 {
 					used[slot] = true
 				}
 			}
-			if v.Op == OpPhi {
-				// Stack phi and args must get the same stack slot, so
-				// anything the args interfere with is something the phi
-				// interferes with.
-				for _, a := range v.Args {
-					for _, xid := range interfere[a.ID] {
-						slot := slots[xid]
-						if slot >= 0 {
-							used[slot] = true
-						}
-					}
-				}
-			}
 			// Find an unused stack slot.
 			var i int
 			for i = 0; i < len(locs); i++ {
@@ -188,83 +165,80 @@ func stackalloc(f *Func) {
 			}
 			// Use the stack variable at that index for v.
 			loc := locs[i]
+			if stackDebug {
+				fmt.Printf("stackalloc %s to %s\n", v, loc.Name())
+			}
 			f.setHome(v, loc)
 			slots[v.ID] = i
-			if v.Op == OpPhi {
-				for _, a := range v.Args {
-					f.setHome(a, loc)
-					slots[a.ID] = i
-				}
-			}
 		}
 	}
 }
 
-// live returns a map from block ID and successor edge index to a list
-// of StoreReg/stackphi value IDs live on that edge.
+// computeLive computes a map from block ID to a list of
+// stack-slot-needing value IDs live at the end of that block.
 // TODO: this could be quadratic if lots of variables are live across lots of
 // basic blocks.  Figure out a way to make this function (or, more precisely, the user
 // of this function) require only linear size & time.
-func (f *Func) liveSpills() [][][]ID {
-	live := make([][][]ID, f.NumBlocks())
-	for _, b := range f.Blocks {
-		live[b.ID] = make([][]ID, len(b.Succs))
-	}
+func (s *stackAllocState) computeLive(spillLive [][]ID) {
+	s.live = make([][]ID, s.f.NumBlocks())
 	var phis []*Value
-
-	s := newSparseSet(f.NumValues())
-	t := newSparseSet(f.NumValues())
+	live := newSparseSet(s.f.NumValues())
+	t := newSparseSet(s.f.NumValues())
 
 	// Instead of iterating over f.Blocks, iterate over their postordering.
 	// Liveness information flows backward, so starting at the end
 	// increases the probability that we will stabilize quickly.
-	po := postorder(f)
+	po := postorder(s.f)
 	for {
 		changed := false
 		for _, b := range po {
 			// Start with known live values at the end of the block
-			s.clear()
-			for i := 0; i < len(b.Succs); i++ {
-				s.addAll(live[b.ID][i])
-			}
+			live.clear()
+			live.addAll(s.live[b.ID])
 
 			// Propagate backwards to the start of the block
 			phis = phis[:0]
 			for i := len(b.Values) - 1; i >= 0; i-- {
 				v := b.Values[i]
-				switch {
-				case v.Op == OpStoreReg:
-					s.remove(v.ID)
-				case v.Op == OpLoadReg:
-					s.add(v.Args[0].ID)
-				case v.isStackPhi():
-					s.remove(v.ID)
-					// save stack phi ops for later
-					phis = append(phis, v)
+				live.remove(v.ID)
+				if v.Op == OpPhi {
+					// Save phi for later.
+					// Note: its args might need a stack slot even though
+					// the phi itself doesn't.  So don't use needSlot.
+					if !v.Type.IsMemory() && !v.Type.IsVoid() {
+						phis = append(phis, v)
+					}
+					continue
+				}
+				for _, a := range v.Args {
+					if s.values[a.ID].needSlot {
+						live.add(a.ID)
+					}
 				}
 			}
 
 			// for each predecessor of b, expand its list of live-at-end values
 			// invariant: s contains the values live at the start of b (excluding phi inputs)
 			for i, p := range b.Preds {
-				// Find index of b in p's successors.
-				var j int
-				for j = 0; j < len(p.Succs); j++ {
-					if p.Succs[j] == b {
-						break
-					}
-				}
 				t.clear()
-				t.addAll(live[p.ID][j])
-				t.addAll(s.contents())
+				t.addAll(s.live[p.ID])
+				t.addAll(live.contents())
+				t.addAll(spillLive[p.ID])
 				for _, v := range phis {
-					t.add(v.Args[i].ID)
+					a := v.Args[i]
+					if s.values[a.ID].needSlot {
+						t.add(a.ID)
+					}
+					if spill := s.values[a.ID].spill; spill != nil {
+						//TODO: remove?  Subsumed by SpillUse?
+						t.add(spill.ID)
+					}
 				}
-				if t.size() == len(live[p.ID][j]) {
+				if t.size() == len(s.live[p.ID]) {
 					continue
 				}
 				// grow p's live set
-				live[p.ID][j] = append(live[p.ID][j][:0], t.contents()...)
+				s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...)
 				changed = true
 			}
 		}
@@ -273,7 +247,11 @@ func (f *Func) liveSpills() [][][]ID {
 			break
 		}
 	}
-	return live
+	if stackDebug {
+		for _, b := range s.f.Blocks {
+			fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
+		}
+	}
 }
 
 func (f *Func) getHome(vid ID) Location {
@@ -290,16 +268,51 @@ func (f *Func) setHome(v *Value, loc Location) {
 	f.RegAlloc[v.ID] = loc
 }
 
-func (v *Value) isStackPhi() bool {
-	if v.Op != OpPhi {
-		return false
-	}
-	if v.Type == TypeMem {
-		return false
+func (s *stackAllocState) buildInterferenceGraph() {
+	f := s.f
+	s.interfere = make([][]ID, f.NumValues())
+	live := newSparseSet(f.NumValues())
+	for _, b := range f.Blocks {
+		// Propagate liveness backwards to the start of the block.
+		// Two values interfere if one is defined while the other is live.
+		live.clear()
+		live.addAll(s.live[b.ID])
+		for i := len(b.Values) - 1; i >= 0; i-- {
+			v := b.Values[i]
+			if s.values[v.ID].needSlot {
+				live.remove(v.ID)
+				for _, id := range live.contents() {
+					if s.values[v.ID].typ.Equal(s.values[id].typ) {
+						s.interfere[v.ID] = append(s.interfere[v.ID], id)
+						s.interfere[id] = append(s.interfere[id], v.ID)
+					}
+				}
+			}
+			for _, a := range v.Args {
+				if s.values[a.ID].needSlot {
+					live.add(a.ID)
+				}
+			}
+			if v.Op == OpArg && s.values[v.ID].needSlot {
+				// OpArg is an input argument which is pre-spilled.
+				// We add back v.ID here because we want this value
+				// to appear live even before this point.  Being live
+				// all the way to the start of the entry block prevents other
+				// values from being allocated to the same slot and clobbering
+				// the input value before we have a chance to load it.
+				live.add(v.ID)
+			}
+		}
 	}
-	if int(v.ID) >= len(v.Block.Func.RegAlloc) {
-		return true
+	if stackDebug {
+		for vid, i := range s.interfere {
+			if len(i) > 0 {
+				fmt.Printf("v%d interferes with", vid)
+				for _, x := range i {
+					fmt.Printf(" v%d", x)
+				}
+				fmt.Println()
+			}
+		}
 	}
-	return v.Block.Func.RegAlloc[v.ID] == nil
-	// TODO: use a separate opcode for StackPhi?
 }
author	Keith Randall <khr@golang.org>	2015-12-17 10:01:24 -0800
committer	Keith Randall <khr@golang.org>	2015-12-21 23:12:05 +0000
commit	7d9f1067d1c8a2d0252fa2a115f1d016f94f7087 (patch)
tree	ef655a345667fd21416907cef77fb77a5a1ac4d7 /src/cmd/compile/internal/ssa/stackalloc.go
parent	5b355a7907550d6fe457fdf6a92fc320d5a764d5 (diff)
download	go-7d9f1067d1c8a2d0252fa2a115f1d016f94f7087.tar.xz