cmd/compile: add x.Uses==1 test to load combiners

We need to make sure that when we combine loads, we only do so if there are no other uses of the load. We can't split one load into two because that can then lead to inconsistent loaded values in the presence of races. Add some aggressive copy removal code so that phantom "dead copy" uses of values are cleaned up promptly. This lets us use x.Uses==1 conditions reliably. Change-Id: I9037311db85665f3868dbeb3adb3de5c20728b38 Reviewed-on: https://go-review.googlesource.com/21853 Reviewed-by: Todd Neal <todd@tneal.org>
author: Keith Randall <khr@golang.org> 2016-04-11 21:23:11 -0700
committer: Keith Randall <khr@golang.org> 2016-04-12 15:10:16 +0000
commit: cd85f711c0b6847cbfe4e05f4402df075ea936de (patch)
tree: d9fa34021c95884eec601e218c9ff2ab03cb8d88 /src
parent: 204b6f48c5107d3132033324fd492ca0253568dc (diff)
download: go-cd85f711c0b6847cbfe4e05f4402df075ea936de.tar.xz
7 files changed, 129 insertions, 21 deletions
diff --git a/src/cmd/compile/internal/gc/ssa_test.go b/src/cmd/compile/internal/gc/ssa_test.go
index 0fb0f17778..46e1b0a7d3 100644
--- a/src/cmd/compile/internal/gc/ssa_test.go
+++ b/src/cmd/compile/internal/gc/ssa_test.go
@@ -101,3 +101,5 @@ func TestPhi(t *testing.T) { runTest(t, "phi_ssa.go") }
 func TestSlice(t *testing.T) { runTest(t, "slice.go") }
 
 func TestNamedReturn(t *testing.T) { runTest(t, "namedReturn.go") }
+
+func TestDuplicateLoad(t *testing.T) { runTest(t, "dupLoad.go") }
diff --git a/src/cmd/compile/internal/gc/testdata/dupLoad.go b/src/cmd/compile/internal/gc/testdata/dupLoad.go
new file mode 100644
index 0000000000..d12c26355a
--- /dev/null
+++ b/src/cmd/compile/internal/gc/testdata/dupLoad.go
@@ -0,0 +1,46 @@
+// run
+
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This test makes sure that we don't split a single
+// load up into two separate loads.
+
+package main
+
+import "fmt"
+
+//go:noinline
+func read(b []byte) (uint16, uint16) {
+	// There is only a single read of b[0].  The two
+	// returned values must have the same low byte.
+	v := b[0]
+	return uint16(v), uint16(v) | uint16(b[1])<<8
+}
+
+const N = 100000
+
+func main() {
+	done := make(chan struct{})
+	b := make([]byte, 2)
+	go func() {
+		for i := 0; i < N; i++ {
+			b[0] = byte(i)
+			b[1] = byte(i)
+		}
+		done <- struct{}{}
+	}()
+	go func() {
+		for i := 0; i < N; i++ {
+			x, y := read(b)
+			if byte(x) != byte(y) {
+				fmt.Printf("x=%x y=%x\n", x, y)
+				panic("bad")
+			}
+		}
+		done <- struct{}{}
+	}()
+	<-done
+	<-done
+}
diff --git a/src/cmd/compile/internal/gc/testdata/namedReturn.go b/src/cmd/compile/internal/gc/testdata/namedReturn.go
index dafb5d719f..19ef8a7e43 100644
--- a/src/cmd/compile/internal/gc/testdata/namedReturn.go
+++ b/src/cmd/compile/internal/gc/testdata/namedReturn.go
@@ -1,5 +1,9 @@
 // run
 
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 // This test makes sure that naming named
 // return variables in a return statement works.
 // See issue #14904.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index dcd5e6a5e1..21c74a9c1c 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1369,13 +1369,13 @@
 // There are many ways these combinations could occur.  This is
 // designed to match the way encoding/binary.LittleEndian does it.
 (ORW               x0:(MOVBload [i]   {s} p mem)
-    (SHLWconst [8] x1:(MOVBload [i+1] {s} p mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
+    (SHLWconst [8] x1:(MOVBload [i+1] {s} p mem))) && x0.Uses == 1 && x1.Uses == 1 && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
 
 (ORL (ORL (ORL
                     x0:(MOVBload [i]   {s} p mem)
     (SHLLconst [8]  x1:(MOVBload [i+1] {s} p mem)))
     (SHLLconst [16] x2:(MOVBload [i+2] {s} p mem)))
-    (SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem)
+    (SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem)
 
 (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ
                     x0:(MOVBload [i]   {s} p mem)
@@ -1385,16 +1385,16 @@
     (SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))
     (SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))
     (SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))
-    (SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
+    (SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
 
 (ORW               x0:(MOVBloadidx1 [i]   {s} p idx mem)
-    (SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
+    (SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) && x0.Uses == 1 && x1.Uses == 1 && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
 
 (ORL (ORL (ORL
                     x0:(MOVBloadidx1 [i]   {s} p idx mem)
     (SHLLconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
     (SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))
-    (SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
+    (SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
 
 (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ
                     x0:(MOVBloadidx1 [i]   {s} p idx mem)
@@ -1404,4 +1404,4 @@
     (SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))
     (SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))
     (SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))
-    (SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
+    (SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
diff --git a/src/cmd/compile/internal/ssa/nilcheck_test.go b/src/cmd/compile/internal/ssa/nilcheck_test.go
index c1c8f94767..af6cbe864a 100644
--- a/src/cmd/compile/internal/ssa/nilcheck_test.go
+++ b/src/cmd/compile/internal/ssa/nilcheck_test.go
@@ -418,7 +418,7 @@ func TestNilcheckBug(t *testing.T) {
 			Goto("exit")),
 		Bloc("exit",
 			Valu("phi", OpPhi, TypeMem, 0, nil, "mem", "store"),
-			Exit("mem")))
+			Exit("phi")))
 
 	CheckFunc(fun.f)
 	// we need the opt here to rewrite the user nilcheck
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index e0cb7f517b..c2f8ceadaf 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -40,9 +40,44 @@ func applyRewrite(f *Func, rb func(*Block) bool, rv func(*Value, *Config) bool)
 			}
 			curb = nil
 			for _, v := range b.Values {
-				change = copyelimValue(v) || change
 				change = phielimValue(v) || change
 
+				// Eliminate copy inputs.
+				// If any copy input becomes unused, mark it
+				// as invalid and discard its argument. Repeat
+				// recursively on the discarded argument.
+				// This phase helps remove phantom "dead copy" uses
+				// of a value so that a x.Uses==1 rule condition
+				// fires reliably.
+				for i, a := range v.Args {
+					if a.Op != OpCopy {
+						continue
+					}
+					x := a.Args[0]
+					// Rewriting can generate OpCopy loops.
+					// They are harmless (see removePredecessor),
+					// but take care to stop if we find a cycle.
+					slow := x // advances every other iteration
+					var advance bool
+					for x.Op == OpCopy {
+						x = x.Args[0]
+						if slow == x {
+							break
+						}
+						if advance {
+							slow = slow.Args[0]
+						}
+						advance = !advance
+					}
+					v.SetArg(i, x)
+					change = true
+					for a.Uses == 0 {
+						b := a.Args[0]
+						a.reset(OpInvalid)
+						a = b
+					}
+				}
+
 				// apply rewrite function
 				curv = v
 				if rv(v, config) {
@@ -52,7 +87,28 @@ func applyRewrite(f *Func, rb func(*Block) bool, rv func(*Value, *Config) bool)
 			}
 		}
 		if !change {
-			return
+			break
+		}
+	}
+	// remove clobbered copies
+	for _, b := range f.Blocks {
+		j := 0
+		for i, v := range b.Values {
+			if v.Op == OpInvalid {
+				f.freeValue(v)
+				continue
+			}
+			if i != j {
+				b.Values[j] = v
+			}
+			j++
+		}
+		if j != len(b.Values) {
+			tail := b.Values[j:]
+			for j := range tail {
+				tail[j] = nil
+			}
+			b.Values = b.Values[:j]
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index a6600513fa..d1793ad8c0 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -12666,7 +12666,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORL (ORL (ORL                     x0:(MOVBload [i]   {s} p mem)     (SHLLconst [8]  x1:(MOVBload [i+1] {s} p mem)))     (SHLLconst [16] x2:(MOVBload [i+2] {s} p mem)))     (SHLLconst [24] x3:(MOVBload [i+3] {s} p mem)))
-	// cond: mergePoint(b,x0,x1,x2,x3) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil
 	// result: @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem)
 	for {
 		v_0 := v.Args[0]
@@ -12754,7 +12754,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
 		if mem != x3.Args[1] {
 			break
 		}
-		if !(mergePoint(b, x0, x1, x2, x3) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1, x2, x3)
@@ -12768,7 +12768,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORL (ORL (ORL                     x0:(MOVBloadidx1 [i]   {s} p idx mem)     (SHLLconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))     (SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))     (SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))
-	// cond: mergePoint(b,x0,x1,x2,x3) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil
 	// result: @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
 	for {
 		v_0 := v.Args[0]
@@ -12866,7 +12866,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
 		if mem != x3.Args[2] {
 			break
 		}
-		if !(mergePoint(b, x0, x1, x2, x3) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1, x2, x3)
@@ -12980,7 +12980,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ                     x0:(MOVBload [i]   {s} p mem)     (SHLQconst [8]  x1:(MOVBload [i+1] {s} p mem)))     (SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))     (SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))     (SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))     (SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))     (SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))     (SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
-	// cond: mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
 	// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
 	for {
 		v_0 := v.Args[0]
@@ -13176,7 +13176,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
 		if mem != x7.Args[1] {
 			break
 		}
-		if !(mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -13190,7 +13190,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ                     x0:(MOVBloadidx1 [i]   {s} p idx mem)     (SHLQconst [8]  x1:(MOVBloadidx1 [i+1] {s} p idx mem)))     (SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))     (SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))     (SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))     (SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))     (SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))     (SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem)))
-	// cond: mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
 	// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
 	for {
 		v_0 := v.Args[0]
@@ -13408,7 +13408,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
 		if mem != x7.Args[2] {
 			break
 		}
-		if !(mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -13514,7 +13514,7 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORW               x0:(MOVBload [i]   {s} p mem)     (SHLWconst [8] x1:(MOVBload [i+1] {s} p mem)))
-	// cond: mergePoint(b,x0,x1) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && mergePoint(b,x0,x1) != nil
 	// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
 	for {
 		x0 := v.Args[0]
@@ -13548,7 +13548,7 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool {
 		if mem != x1.Args[1] {
 			break
 		}
-		if !(mergePoint(b, x0, x1) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && mergePoint(b, x0, x1) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1)
@@ -13562,7 +13562,7 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool {
 		return true
 	}
 	// match: (ORW               x0:(MOVBloadidx1 [i]   {s} p idx mem)     (SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
-	// cond: mergePoint(b,x0,x1) != nil
+	// cond: x0.Uses == 1 && x1.Uses == 1 && mergePoint(b,x0,x1) != nil
 	// result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
 	for {
 		x0 := v.Args[0]
@@ -13600,7 +13600,7 @@ func rewriteValueAMD64_OpAMD64ORW(v *Value, config *Config) bool {
 		if mem != x1.Args[2] {
 			break
 		}
-		if !(mergePoint(b, x0, x1) != nil) {
+		if !(x0.Uses == 1 && x1.Uses == 1 && mergePoint(b, x0, x1) != nil) {
 			break
 		}
 		b = mergePoint(b, x0, x1)
author	Keith Randall <khr@golang.org>	2016-04-11 21:23:11 -0700
committer	Keith Randall <khr@golang.org>	2016-04-12 15:10:16 +0000
commit	cd85f711c0b6847cbfe4e05f4402df075ea936de (patch)
tree	d9fa34021c95884eec601e218c9ff2ab03cb8d88 /src
parent	204b6f48c5107d3132033324fd492ca0253568dc (diff)
download	go-cd85f711c0b6847cbfe4e05f4402df075ea936de.tar.xz