1 files changed, 155 insertions, 142 deletions
diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go
index 3eb5c3cf4a..797a6b05e6 100644
--- a/src/cmd/compile/internal/ssa/stackalloc.go
+++ b/src/cmd/compile/internal/ssa/stackalloc.go
@@ -6,55 +6,65 @@
 
 package ssa
 
+import "fmt"
+
+const stackDebug = false // TODO: compiler flag
+
+type stackAllocState struct {
+	f         *Func
+	values    []stackValState
+	live      [][]ID // live[b.id] = live values at the end of block b.
+	interfere [][]ID // interfere[v.id] = values that interfere with v.
+}
+
+type stackValState struct {
+	typ      Type
+	spill    *Value
+	needSlot bool
+}
+
 // stackalloc allocates storage in the stack frame for
 // all Values that did not get a register.
-func stackalloc(f *Func) {
-	// Cache value types by ID.
-	types := make([]Type, f.NumValues())
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			types[v.ID] = v.Type
-		}
+// Returns a map from block ID to the stack values live at the end of that block.
+func stackalloc(f *Func, spillLive [][]ID) [][]ID {
+	if stackDebug {
+		fmt.Println("before stackalloc")
+		fmt.Println(f.String())
 	}
+	var s stackAllocState
+	s.init(f, spillLive)
+	s.stackalloc()
+	return s.live
+}
 
-	// Build interference graph among StoreReg and stack phi ops.
-	live := f.liveSpills()
-	interfere := make([][]ID, f.NumValues())
-	s := newSparseSet(f.NumValues())
-	for _, b := range f.Blocks {
-		// Start with known live values at the end of the block.
-		s.clear()
-		for i := 0; i < len(b.Succs); i++ {
-			s.addAll(live[b.ID][i])
-		}
+func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
+	s.f = f
 
-		// Propagate backwards to the start of the block.
-		// Remember interfering sets.
-		for i := len(b.Values) - 1; i >= 0; i-- {
-			v := b.Values[i]
-			switch {
-			case v.Op == OpStoreReg, v.isStackPhi():
-				s.remove(v.ID)
-				for _, id := range s.contents() {
-					if v.Type.Equal(types[id]) {
-						// Only need interferences between equivalent types.
-						interfere[v.ID] = append(interfere[v.ID], id)
-						interfere[id] = append(interfere[id], v.ID)
-					}
-				}
-			case v.Op == OpLoadReg:
-				s.add(v.Args[0].ID)
-			case v.Op == OpArg:
-				// This is an input argument which is pre-spilled.  It is kind of
-				// like a StoreReg, but we don't remove v.ID here because we want
-				// this value to appear live even before this point.  Being live
-				// all the way to the start of the entry block prevents other
-				// values from being allocated to the same slot and clobbering
-				// the input value before we have a chance to load it.
+	// Initialize value information.
+	s.values = make([]stackValState, f.NumValues())
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			s.values[v.ID].typ = v.Type
+			s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable()
+			if stackDebug && s.values[v.ID].needSlot {
+				fmt.Printf("%s needs a stack slot\n", v)
+			}
+			if v.Op == OpStoreReg {
+				s.values[v.Args[0].ID].spill = v
 			}
 		}
 	}
 
+	// Compute liveness info for values needing a slot.
+	s.computeLive(spillLive)
+
+	// Build interference graph among values needing a slot.
+	s.buildInterferenceGraph()
+}
+
+func (s *stackAllocState) stackalloc() {
+	f := s.f
+
 	// Build map from values to their names, if any.
 	// A value may be associated with more than one name (e.g. after
 	// the assignment i=j). This step picks one name per value arbitrarily.
@@ -67,49 +77,41 @@ func stackalloc(f *Func) {
 		}
 	}
 
-	// Figure out which StoreReg ops are phi args.  We don't pick slots for
-	// phi args because a stack phi and its args must all use the same stack slot.
-	phiArg := make([]bool, f.NumValues())
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			if !v.isStackPhi() {
-				continue
-			}
-			for _, a := range v.Args {
-				phiArg[a.ID] = true
-			}
-		}
-	}
-
 	// Allocate args to their assigned locations.
 	for _, v := range f.Entry.Values {
 		if v.Op != OpArg {
 			continue
 		}
-		f.setHome(v, LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt})
+		loc := LocalSlot{v.Aux.(GCNode), v.Type, v.AuxInt}
+		if stackDebug {
+			fmt.Printf("stackalloc %s to %s\n", v, loc.Name())
+		}
+		f.setHome(v, loc)
 	}
 
 	// For each type, we keep track of all the stack slots we
 	// have allocated for that type.
+	// TODO: share slots among equivalent types.  We would need to
+	// only share among types with the same GC signature.  See the
+	// type.Equal calls below for where this matters.
 	locations := map[Type][]LocalSlot{}
 
 	// Each time we assign a stack slot to a value v, we remember
 	// the slot we used via an index into locations[v.Type].
-	// TODO: share slots among equivalent types.
 	slots := make([]int, f.NumValues())
 	for i := f.NumValues() - 1; i >= 0; i-- {
 		slots[i] = -1
 	}
 
-	// Pick a stack slot for each non-phi-arg StoreReg and each stack phi.
+	// Pick a stack slot for each value needing one.
 	used := make([]bool, f.NumValues())
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
-			if v.Op != OpStoreReg && !v.isStackPhi() {
+			if !s.values[v.ID].needSlot {
 				continue
 			}
-			if phiArg[v.ID] {
-				continue
+			if v.Op == OpArg {
+				continue // already picked
 			}
 
 			// If this is a named value, try to use the name as
@@ -121,7 +123,7 @@ func stackalloc(f *Func) {
 				name = names[v.ID]
 			}
 			if name.N != nil && v.Type.Equal(name.Type) {
-				for _, id := range interfere[v.ID] {
+				for _, id := range s.interfere[v.ID] {
 					h := f.getHome(id)
 					if h != nil && h.(LocalSlot) == name {
 						// A variable can interfere with itself.
@@ -129,22 +131,10 @@ func stackalloc(f *Func) {
 						goto noname
 					}
 				}
-				if v.Op == OpPhi {
-					for _, a := range v.Args {
-						for _, id := range interfere[a.ID] {
-							h := f.getHome(id)
-							if h != nil && h.(LocalSlot) == name {
-								goto noname
-							}
-						}
-					}
+				if stackDebug {
+					fmt.Printf("stackalloc %s to %s\n", v, name.Name())
 				}
 				f.setHome(v, name)
-				if v.Op == OpPhi {
-					for _, a := range v.Args {
-						f.setHome(a, name)
-					}
-				}
 				continue
 			}
 
@@ -155,25 +145,12 @@ func stackalloc(f *Func) {
 			for i := 0; i < len(locs); i++ {
 				used[i] = false
 			}
-			for _, xid := range interfere[v.ID] {
+			for _, xid := range s.interfere[v.ID] {
 				slot := slots[xid]
 				if slot >= 0 {
 					used[slot] = true
 				}
 			}
-			if v.Op == OpPhi {
-				// Stack phi and args must get the same stack slot, so
-				// anything the args interfere with is something the phi
-				// interferes with.
-				for _, a := range v.Args {
-					for _, xid := range interfere[a.ID] {
-						slot := slots[xid]
-						if slot >= 0 {
-							used[slot] = true
-						}
-					}
-				}
-			}
 			// Find an unused stack slot.
 			var i int
 			for i = 0; i < len(locs); i++ {
@@ -188,83 +165,80 @@ func stackalloc(f *Func) {
 			}
 			// Use the stack variable at that index for v.
 			loc := locs[i]
+			if stackDebug {
+				fmt.Printf("stackalloc %s to %s\n", v, loc.Name())
+			}
 			f.setHome(v, loc)
 			slots[v.ID] = i
-			if v.Op == OpPhi {
-				for _, a := range v.Args {
-					f.setHome(a, loc)
-					slots[a.ID] = i
-				}
-			}
 		}
 	}
 }
 
-// live returns a map from block ID and successor edge index to a list
-// of StoreReg/stackphi value IDs live on that edge.
+// computeLive computes a map from block ID to a list of
+// stack-slot-needing value IDs live at the end of that block.
 // TODO: this could be quadratic if lots of variables are live across lots of
 // basic blocks.  Figure out a way to make this function (or, more precisely, the user
 // of this function) require only linear size & time.
-func (f *Func) liveSpills() [][][]ID {
-	live := make([][][]ID, f.NumBlocks())
-	for _, b := range f.Blocks {
-		live[b.ID] = make([][]ID, len(b.Succs))
-	}
+func (s *stackAllocState) computeLive(spillLive [][]ID) {
+	s.live = make([][]ID, s.f.NumBlocks())
 	var phis []*Value
-
-	s := newSparseSet(f.NumValues())
-	t := newSparseSet(f.NumValues())
+	live := newSparseSet(s.f.NumValues())
+	t := newSparseSet(s.f.NumValues())
 
 	// Instead of iterating over f.Blocks, iterate over their postordering.
 	// Liveness information flows backward, so starting at the end
 	// increases the probability that we will stabilize quickly.
-	po := postorder(f)
+	po := postorder(s.f)
 	for {
 		changed := false
 		for _, b := range po {
 			// Start with known live values at the end of the block
-			s.clear()
-			for i := 0; i < len(b.Succs); i++ {
-				s.addAll(live[b.ID][i])
-			}
+			live.clear()
+			live.addAll(s.live[b.ID])
 
 			// Propagate backwards to the start of the block
 			phis = phis[:0]
 			for i := len(b.Values) - 1; i >= 0; i-- {
 				v := b.Values[i]
-				switch {
-				case v.Op == OpStoreReg:
-					s.remove(v.ID)
-				case v.Op == OpLoadReg:
-					s.add(v.Args[0].ID)
-				case v.isStackPhi():
-					s.remove(v.ID)
-					// save stack phi ops for later
-					phis = append(phis, v)
+				live.remove(v.ID)
+				if v.Op == OpPhi {
+					// Save phi for later.
+					// Note: its args might need a stack slot even though
+					// the phi itself doesn't.  So don't use needSlot.
+					if !v.Type.IsMemory() && !v.Type.IsVoid() {
+						phis = append(phis, v)
+					}
+					continue
+				}
+				for _, a := range v.Args {
+					if s.values[a.ID].needSlot {
+						live.add(a.ID)
+					}
 				}
 			}
 
 			// for each predecessor of b, expand its list of live-at-end values
 			// invariant: s contains the values live at the start of b (excluding phi inputs)
 			for i, p := range b.Preds {
-				// Find index of b in p's successors.
-				var j int
-				for j = 0; j < len(p.Succs); j++ {
-					if p.Succs[j] == b {
-						break
-					}
-				}
 				t.clear()
-				t.addAll(live[p.ID][j])
-				t.addAll(s.contents())
+				t.addAll(s.live[p.ID])
+				t.addAll(live.contents())
+				t.addAll(spillLive[p.ID])
 				for _, v := range phis {
-					t.add(v.Args[i].ID)
+					a := v.Args[i]
+					if s.values[a.ID].needSlot {
+						t.add(a.ID)
+					}
+					if spill := s.values[a.ID].spill; spill != nil {
+						//TODO: remove?  Subsumed by SpillUse?
+						t.add(spill.ID)
+					}
 				}
-				if t.size() == len(live[p.ID][j]) {
+				if t.size() == len(s.live[p.ID]) {
 					continue
 				}
 				// grow p's live set
-				live[p.ID][j] = append(live[p.ID][j][:0], t.contents()...)
+				s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...)
 				changed = true
 			}
 		}
@@ -273,7 +247,11 @@ func (f *Func) liveSpills() [][][]ID {
 			break
 		}
 	}
-	return live
+	if stackDebug {
+		for _, b := range s.f.Blocks {
+			fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
+		}
+	}
 }
 
 func (f *Func) getHome(vid ID) Location {
@@ -290,16 +268,51 @@ func (f *Func) setHome(v *Value, loc Location) {
 	f.RegAlloc[v.ID] = loc
 }
 
-func (v *Value) isStackPhi() bool {
-	if v.Op != OpPhi {
-		return false
-	}
-	if v.Type == TypeMem {
-		return false
+func (s *stackAllocState) buildInterferenceGraph() {
+	f := s.f
+	s.interfere = make([][]ID, f.NumValues())
+	live := newSparseSet(f.NumValues())
+	for _, b := range f.Blocks {
+		// Propagate liveness backwards to the start of the block.
+		// Two values interfere if one is defined while the other is live.
+		live.clear()
+		live.addAll(s.live[b.ID])
+		for i := len(b.Values) - 1; i >= 0; i-- {
+			v := b.Values[i]
+			if s.values[v.ID].needSlot {
+				live.remove(v.ID)
+				for _, id := range live.contents() {
+					if s.values[v.ID].typ.Equal(s.values[id].typ) {
+						s.interfere[v.ID] = append(s.interfere[v.ID], id)
+						s.interfere[id] = append(s.interfere[id], v.ID)
+					}
+				}
+			}
+			for _, a := range v.Args {
+				if s.values[a.ID].needSlot {
+					live.add(a.ID)
+				}
+			}
+			if v.Op == OpArg && s.values[v.ID].needSlot {
+				// OpArg is an input argument which is pre-spilled.
+				// We add back v.ID here because we want this value
+				// to appear live even before this point.  Being live
+				// all the way to the start of the entry block prevents other
+				// values from being allocated to the same slot and clobbering
+				// the input value before we have a chance to load it.
+				live.add(v.ID)
+			}
+		}
 	}
-	if int(v.ID) >= len(v.Block.Func.RegAlloc) {
-		return true
+	if stackDebug {
+		for vid, i := range s.interfere {
+			if len(i) > 0 {
+				fmt.Printf("v%d interferes with", vid)
+				for _, x := range i {
+					fmt.Printf(" v%d", x)
+				}
+				fmt.Println()
+			}
+		}
 	}
-	return v.Block.Func.RegAlloc[v.ID] == nil
-	// TODO: use a separate opcode for StackPhi?
 }