aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/loopbce.go
diff options
context:
space:
mode:
authorAlexandru Moșoi <mosoi@google.com>2016-03-02 12:58:27 +0100
committerAlexandru Moșoi <alexandru@mosoi.ro>2016-04-01 09:37:58 +0000
commitb91cc5303364c4aae758ff1f0b4efc66b7802700 (patch)
tree0e2e96fb803283c9a34c3a44de37f096afd4f596 /src/cmd/compile/internal/ssa/loopbce.go
parentea306ae625d001a43ef20163739593a21be51f97 (diff)
downloadgo-b91cc5303364c4aae758ff1f0b4efc66b7802700.tar.xz
cmd/compile/internal/ssa: BCE for induction variables
There are 5293 loop in the main go repository. A survey of the top most common for loops: 18 for __k__ := 0; i < len(sa.Addr); i++ { 19 for __k__ := 0; ; i++ { 19 for __k__ := 0; i < 16; i++ { 25 for __k__ := 0; i < length; i++ { 30 for __k__ := 0; i < 8; i++ { 49 for __k__ := 0; i < len(s); i++ { 67 for __k__ := 0; i < n; i++ { 376 for __k__ := range __slice__ { 685 for __k__, __v__ := range __slice__ { 2074 for __, __v__ := range __slice__ { The algorithm to find induction variables handles all cases with an upper limit. It currently doesn't find related induction variables such as c * ind or c + ind. 842 out of 22954 bound checks are removed for src/make.bash. 1957 out of 42952 bounds checks are removed for src/all.bash. Things to do in follow-up CLs: * Find the associated pointer for `for _, v := range a {}` * Drop the NilChecks on the pointer. * Replace the implicit induction variable by a loop over the pointer Generated garbage can be reduced if we share the sdom between passes. % benchstat old.txt new.txt name old time/op new time/op delta Template 337ms ± 3% 333ms ± 3% ~ (p=0.258 n=9+9) GoTypes 1.11s ± 2% 1.10s ± 2% ~ (p=0.912 n=10+10) Compiler 5.25s ± 1% 5.29s ± 2% ~ (p=0.077 n=9+9) MakeBash 33.5s ± 1% 34.1s ± 2% +1.85% (p=0.011 n=9+9) name old alloc/op new alloc/op delta Template 63.6MB ± 0% 63.9MB ± 0% +0.52% (p=0.000 n=10+9) GoTypes 218MB ± 0% 219MB ± 0% +0.59% (p=0.000 n=10+9) Compiler 978MB ± 0% 985MB ± 0% +0.69% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 582k ± 0% 583k ± 0% +0.10% (p=0.000 n=10+10) GoTypes 1.78M ± 0% 1.78M ± 0% +0.12% (p=0.000 n=10+10) Compiler 7.68M ± 0% 7.69M ± 0% +0.05% (p=0.000 n=10+10) name old text-bytes new text-bytes delta HelloSize 581k ± 0% 581k ± 0% -0.08% (p=0.000 n=10+10) CmdGoSize 6.40M ± 0% 6.39M ± 0% -0.08% (p=0.000 n=10+10) name old data-bytes new data-bytes delta HelloSize 3.66k ± 0% 3.66k ± 0% ~ (all samples are equal) CmdGoSize 134k ± 0% 134k ± 0% ~ (all samples are equal) name old bss-bytes new bss-bytes delta HelloSize 126k ± 0% 126k ± 0% ~ (all samples are equal) CmdGoSize 149k ± 0% 149k ± 0% ~ (all samples are equal) name old exe-bytes new exe-bytes delta HelloSize 947k ± 0% 946k ± 0% -0.01% (p=0.000 n=10+10) CmdGoSize 9.92M ± 0% 9.91M ± 0% -0.06% (p=0.000 n=10+10) Change-Id: Ie74bdff46fd602db41bb457333d3a762a0c3dc4d Reviewed-on: https://go-review.googlesource.com/20517 Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Alexandru Moșoi <alexandru@mosoi.ro>
Diffstat (limited to 'src/cmd/compile/internal/ssa/loopbce.go')
-rw-r--r--src/cmd/compile/internal/ssa/loopbce.go260
1 files changed, 260 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/loopbce.go b/src/cmd/compile/internal/ssa/loopbce.go
new file mode 100644
index 0000000000..7fbb48a7fc
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/loopbce.go
@@ -0,0 +1,260 @@
+package ssa
+
+type indVar struct {
+ ind *Value // induction variable
+ inc *Value // increment, a constant
+ nxt *Value // ind+inc variable
+ min *Value // minimum value. inclusive,
+ max *Value // maximum value. exclusive.
+ entry *Block // entry block in the loop.
+ // Invariants: for all blocks dominated by entry:
+ // min <= ind < max
+ // min <= nxt <= max
+}
+
+// findIndVar finds induction variables in a function.
+//
+// Look for variables and blocks that satisfy the following
+//
+// loop:
+// ind = (Phi min nxt),
+// if ind < max
+// then goto enter_loop
+// else goto exit_loop
+//
+// enter_loop:
+// do something
+// nxt = inc + ind
+// goto loop
+//
+// exit_loop:
+//
+//
+// TODO: handle 32 bit operations
+func findIndVar(f *Func, sdom sparseTree) []indVar {
+ var iv []indVar
+
+nextb:
+ for _, b := range f.Blocks {
+ if b.Kind != BlockIf || len(b.Preds) != 2 {
+ continue
+ }
+
+ var ind, max *Value // induction, and maximum
+ entry := -1 // which successor of b enters the loop
+
+ // Check thet the control if it either ind < max or max > ind.
+ // TODO: Handle Leq64, Geq64.
+ switch b.Control.Op {
+ case OpLess64:
+ entry = 0
+ ind, max = b.Control.Args[0], b.Control.Args[1]
+ case OpGreater64:
+ entry = 0
+ ind, max = b.Control.Args[1], b.Control.Args[0]
+ default:
+ continue nextb
+ }
+
+ // Check that the induction variable is a phi that depends on itself.
+ if ind.Op != OpPhi {
+ continue
+ }
+
+ // Extract min and nxt knowing that nxt is an addition (e.g. Add64).
+ var min, nxt *Value // minimum, and next value
+ if n := ind.Args[0]; n.Op == OpAdd64 && (n.Args[0] == ind || n.Args[1] == ind) {
+ min, nxt = ind.Args[1], n
+ } else if n := ind.Args[1]; n.Op == OpAdd64 && (n.Args[0] == ind || n.Args[1] == ind) {
+ min, nxt = ind.Args[0], n
+ } else {
+ // Not a recognized induction variable.
+ continue
+ }
+
+ var inc *Value
+ if nxt.Args[0] == ind { // nxt = ind + inc
+ inc = nxt.Args[1]
+ } else if nxt.Args[1] == ind { // nxt = inc + ind
+ inc = nxt.Args[0]
+ } else {
+ panic("unreachable") // one of the cases must be true from the above.
+ }
+
+ // Expect the increment to be a positive constant.
+ // TODO: handle negative increment.
+ if inc.Op != OpConst64 || inc.AuxInt <= 0 {
+ continue
+ }
+
+ // Up to now we extracted the induction variable (ind),
+ // the increment delta (inc), the temporary sum (nxt),
+ // the mininum value (min) and the maximum value (max).
+ //
+ // We also know that ind has the form (Phi min nxt) where
+ // nxt is (Add inc nxt) which means: 1) inc dominates nxt
+ // and 2) there is a loop starting at inc and containing nxt.
+ //
+ // We need to prove that the induction variable is incremented
+ // only when it's smaller than the maximum value.
+ // Two conditions must happen listed below to accept ind
+ // as an induction variable.
+
+ // First condition: loop entry has a single predecessor, which
+ // is the header block. This implies that b.Succs[entry] is
+ // reached iff ind < max.
+ if len(b.Succs[entry].Preds) != 1 {
+ // b.Succs[1-entry] must exit the loop.
+ continue
+ }
+
+ // Second condition: b.Succs[entry] dominates nxt so that
+ // nxt is computed when inc < max, meaning nxt <= max.
+ if !sdom.isAncestorEq(b.Succs[entry], nxt.Block) {
+ // inc+ind can only be reached through the branch that enters the loop.
+ continue
+ }
+
+ // If max is c + SliceLen with c <= 0 then we drop c.
+ // Makes sure c + SliceLen doesn't overflow when SliceLen == 0.
+ // TODO: save c as an offset from max.
+ if w, c := dropAdd64(max); (w.Op == OpStringLen || w.Op == OpSliceLen) && 0 >= c && -c >= 0 {
+ max = w
+ }
+
+ // We can only guarantee that the loops runs withing limits of induction variable
+ // if the increment is 1 or when the limits are constants.
+ if inc.AuxInt != 1 {
+ ok := false
+ if min.Op == OpConst64 && max.Op == OpConst64 {
+ if max.AuxInt > min.AuxInt && max.AuxInt%inc.AuxInt == min.AuxInt%inc.AuxInt { // handle overflow
+ ok = true
+ }
+ }
+ if !ok {
+ continue
+ }
+ }
+
+ if f.pass.debug > 1 {
+ if min.Op == OpConst64 {
+ b.Func.Config.Warnl(b.Line, "Induction variable with minimum %d and increment %d", min.AuxInt, inc.AuxInt)
+ } else {
+ b.Func.Config.Warnl(b.Line, "Induction variable with non-const minimum and increment %d", inc.AuxInt)
+ }
+ }
+
+ iv = append(iv, indVar{
+ ind: ind,
+ inc: inc,
+ nxt: nxt,
+ min: min,
+ max: max,
+ entry: b.Succs[entry],
+ })
+ b.Logf("found induction variable %v (inc = %v, min = %v, max = %v)\n", ind, inc, min, max)
+ }
+
+ return iv
+}
+
+// loopbce performs loop based bounds check elimination.
+func loopbce(f *Func) {
+ idom := dominators(f)
+ sdom := newSparseTree(f, idom)
+ ivList := findIndVar(f, sdom)
+
+ m := make(map[*Value]indVar)
+ for _, iv := range ivList {
+ m[iv.ind] = iv
+ }
+
+ removeBoundsChecks(f, sdom, m)
+}
+
+// removesBoundsChecks remove IsInBounds and IsSliceInBounds based on the induction variables.
+func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) {
+ for _, b := range f.Blocks {
+ if b.Kind != BlockIf {
+ continue
+ }
+
+ v := b.Control
+
+ // Simplify:
+ // (IsInBounds ind max) where 0 <= const == min <= ind < max.
+ // (IsSliceInBounds ind max) where 0 <= const == min <= ind < max.
+ // Found in:
+ // for i := range a {
+ // use a[i]
+ // use a[i:]
+ // use a[:i]
+ // }
+ if v.Op == OpIsInBounds || v.Op == OpIsSliceInBounds {
+ ind, add := dropAdd64(v.Args[0])
+ if ind.Op != OpPhi {
+ goto skip1
+ }
+ if v.Op == OpIsInBounds && add != 0 {
+ goto skip1
+ }
+ if v.Op == OpIsSliceInBounds && (0 > add || add > 1) {
+ goto skip1
+ }
+
+ if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+ if v.Args[1] == iv.max {
+ if f.pass.debug > 0 {
+ f.Config.Warnl(b.Line, "Found redundant %s", v.Op)
+ }
+ goto simplify
+ }
+ }
+ }
+ skip1:
+
+ // Simplify:
+ // (IsSliceInBounds ind (SliceCap a)) where 0 <= min <= ind < max == (SliceLen a)
+ // Found in:
+ // for i := range a {
+ // use a[:i]
+ // use a[:i+1]
+ // }
+ if v.Op == OpIsSliceInBounds {
+ ind, add := dropAdd64(v.Args[0])
+ if ind.Op != OpPhi {
+ goto skip2
+ }
+ if 0 > add || add > 1 {
+ goto skip2
+ }
+
+ if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+ if v.Args[1].Op == OpSliceCap && iv.max.Op == OpSliceLen && v.Args[1].Args[0] == iv.max.Args[0] {
+ if f.pass.debug > 0 {
+ f.Config.Warnl(b.Line, "Found redundant %s (len promoted to cap)", v.Op)
+ }
+ goto simplify
+ }
+ }
+ }
+ skip2:
+
+ continue
+
+ simplify:
+ f.Logf("removing bounds check %v at %v in %s\n", b.Control, b, f.Name)
+ b.Kind = BlockFirst
+ b.SetControl(nil)
+ }
+}
+
+func dropAdd64(v *Value) (*Value, int64) {
+ if v.Op == OpAdd64 && v.Args[0].Op == OpConst64 {
+ return v.Args[1], v.Args[0].AuxInt
+ }
+ if v.Op == OpAdd64 && v.Args[1].Op == OpConst64 {
+ return v.Args[0], v.Args[1].AuxInt
+ }
+ return v, 0
+}