cmd/compile: use sparse algorithm for phis in large program This adds a sparse method for locating nearest ancestors in a dominator tree, and checks blocks with more than one predecessor for differences and inserts phi functions where there are. Uses reversed post order to cut number of passes, running it from first def to last use ("last use" for paramout and mem is end-of-program; last use for a phi input from a backedge is the source of the back edge) Includes a cutover from old algorithm to new to avoid paying large constant factor for small programs. This keeps normal builds running at about the same time, while not running over-long on large machine-generated inputs. Add "phase" flags for ssa/build -- ssa/build/stats prints number of blocks, values (before and after linking references and inserting phis, so expansion can be measured), and their product; the product governs the cutover, where a good value seems to be somewhere between 1 and 5 million. Among the files compiled by make.bash, this is the shape of the tail of the distribution for #blocks, #vars, and their product: #blocks #vars product max 6171 28180 173,898,780 99.9% 1641 6548 10,401,878 99% 463 1909 873,721 95% 152 639 95,235 90% 84 359 30,021 The old algorithm is indeed usually fastest, for 99%ile values of usually. The fix to LookupVarOutgoing ( https://go-review.googlesource.com/#/c/22790/ ) deals with some of the same problems addressed by this CL, but on at least one bug ( #15537 ) this change is still a significant help. With this CL: /tmp/gopath$ rm -rf pkg bin /tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \ github.com/gogo/protobuf/test/theproto3/combos/... ... real 4m35.200s user 13m16.644s sys 0m36.712s and pprof reports 3.4GB allocated in one of the larger profiles With tip: /tmp/gopath$ rm -rf pkg bin /tmp/gopath$ time go get -v -gcflags -memprofile=y.mprof \ github.com/gogo/protobuf/test/theproto3/combos/... ... real 10m36.569s user 25m52.286s sys 4m3.696s and pprof reports 8.3GB allocated in the same larger profile With this CL, most of the compilation time on the benchmarked input is spent in register/stack allocation (cumulative 53%) and in the sparse lookup algorithm itself (cumulative 20%). Fixes #15537. Change-Id: Ia0299dda6a291534d8b08e5f9883216ded677a00 Reviewed-on: https://go-review.googlesource.com/22342 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>

commit: 6b99fb5bea56b9674161b6c5415c1d05f19dfdc6 [log] [tgz]
author: David Chase <drchase@google.com> Thu Apr 21 13:24:58 2016 -0400
committer: David Chase <drchase@google.com> Mon May 16 21:08:05 2016 +0000
tree: b4fb33117b40b4dda17e6573a0770c7cdf2df9ba
parent: 466cae6ca9f28c971a2d716a5a49dc76bbd1d5bb [diff] [blame]
diff --git a/src/cmd/compile/internal/ssa/redblack32_test.go b/src/cmd/compile/internal/ssa/redblack32_test.go
new file mode 100644
index 0000000..6d72a3e
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/redblack32_test.go

@@ -0,0 +1,276 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"testing"
+)
+
+type sstring string
+
+func (s sstring) String() string {
+	return string(s)
+}
+
+// wellFormed ensures that a red-black tree meets
+// all of its invariants and returns a string identifying
+// the first problem encountered. If there is no problem
+// then the returned string is empty. The size is also
+// returned to allow comparison of calculated tree size
+// with expected.
+func (t *RBTint32) wellFormed() (s string, i int) {
+	if t.root == nil {
+		s = ""
+		i = 0
+		return
+	}
+	return t.root.wellFormedSubtree(nil, -0x80000000, 0x7fffffff)
+}
+
+// wellFormedSubtree ensures that a red-black subtree meets
+// all of its invariants and returns a string identifying
+// the first problem encountered. If there is no problem
+// then the returned string is empty. The size is also
+// returned to allow comparison of calculated tree size
+// with expected.
+func (t *node32) wellFormedSubtree(parent *node32, min, max int32) (s string, i int) {
+	i = -1 // initialize to a failing value
+	s = "" // s is the reason for failure; empty means okay.
+
+	if t.parent != parent {
+		s = "t.parent != parent"
+		return
+	}
+
+	if min >= t.key {
+		s = "min >= t.key"
+		return
+	}
+
+	if max <= t.key {
+		s = "max <= t.key"
+		return
+	}
+
+	l := t.left
+	r := t.right
+	if l == nil && r == nil {
+		if t.rank != rankLeaf {
+			s = "leaf rank wrong"
+			return
+		}
+	}
+	if l != nil {
+		if t.rank < l.rank {
+			s = "t.rank < l.rank"
+		} else if t.rank > 1+l.rank {
+			s = "t.rank > 1+l.rank"
+		} else if t.rank <= l.maxChildRank() {
+			s = "t.rank <= l.maxChildRank()"
+		} else if t.key <= l.key {
+			s = "t.key <= l.key"
+		}
+		if s != "" {
+			return
+		}
+	} else {
+		if t.rank != 1 {
+			s = "t w/ left nil has rank != 1"
+			return
+		}
+	}
+	if r != nil {
+		if t.rank < r.rank {
+			s = "t.rank < r.rank"
+		} else if t.rank > 1+r.rank {
+			s = "t.rank > 1+r.rank"
+		} else if t.rank <= r.maxChildRank() {
+			s = "t.rank <= r.maxChildRank()"
+		} else if t.key >= r.key {
+			s = "t.key >= r.key"
+		}
+		if s != "" {
+			return
+		}
+	} else {
+		if t.rank != 1 {
+			s = "t w/ right nil has rank != 1"
+			return
+		}
+	}
+	ii := 1
+	if l != nil {
+		res, il := l.wellFormedSubtree(t, min, t.key)
+		if res != "" {
+			s = "L." + res
+			return
+		}
+		ii += il
+	}
+	if r != nil {
+		res, ir := r.wellFormedSubtree(t, t.key, max)
+		if res != "" {
+			s = "R." + res
+			return
+		}
+		ii += ir
+	}
+	i = ii
+	return
+}
+
+func (t *RBTint32) DebugString() string {
+	if t.root == nil {
+		return ""
+	}
+	return t.root.DebugString()
+}
+
+// DebugString prints the tree with nested information
+// to allow an eyeball check on the tree balance.
+func (t *node32) DebugString() string {
+	s := ""
+	if t.left != nil {
+		s = s + "["
+		s = s + t.left.DebugString()
+		s = s + "]"
+	}
+	s = s + fmt.Sprintf("%v=%v:%d", t.key, t.data, t.rank)
+	if t.right != nil {
+		s = s + "["
+		s = s + t.right.DebugString()
+		s = s + "]"
+	}
+	return s
+}
+
+func allRBT32Ops(te *testing.T, x []int32) {
+	t := &RBTint32{}
+	for i, d := range x {
+		x[i] = d + d // Double everything for glb/lub testing
+	}
+
+	// fmt.Printf("Inserting double of %v", x)
+	k := 0
+	min := int32(0x7fffffff)
+	max := int32(-0x80000000)
+	for _, d := range x {
+		if d < min {
+			min = d
+		}
+
+		if d > max {
+			max = d
+		}
+
+		t.Insert(d, sstring(fmt.Sprintf("%v", d)))
+		k++
+		s, i := t.wellFormed()
+		if i != k {
+			te.Errorf("Wrong tree size %v, expected %v for %v", i, k, t.DebugString())
+		}
+		if s != "" {
+			te.Errorf("Tree consistency problem at %v", s)
+			return
+		} else {
+			// fmt.Printf("%s", t.DebugString())
+		}
+	}
+
+	oops := false
+
+	for _, d := range x {
+		s := fmt.Sprintf("%v", d)
+		f := t.Find(d)
+
+		// data
+		if s != fmt.Sprintf("%v", f) {
+			te.Errorf("s(%v) != f(%v)", s, f)
+			oops = true
+		}
+	}
+
+	if !oops {
+		for _, d := range x {
+			s := fmt.Sprintf("%v", d)
+
+			kg, g := t.Glb(d + 1)
+			kge, ge := t.GlbEq(d)
+			kl, l := t.Lub(d - 1)
+			kle, le := t.LubEq(d)
+
+			// keys
+			if d != kg {
+				te.Errorf("d(%v) != kg(%v)", d, kg)
+			}
+			if d != kl {
+				te.Errorf("d(%v) != kl(%v)", d, kl)
+			}
+			if d != kge {
+				te.Errorf("d(%v) != kge(%v)", d, kge)
+			}
+			if d != kle {
+				te.Errorf("d(%v) != kle(%v)", d, kle)
+			}
+			// data
+			if s != fmt.Sprintf("%v", g) {
+				te.Errorf("s(%v) != g(%v)", s, g)
+			}
+			if s != fmt.Sprintf("%v", l) {
+				te.Errorf("s(%v) != l(%v)", s, l)
+			}
+			if s != fmt.Sprintf("%v", ge) {
+				te.Errorf("s(%v) != ge(%v)", s, ge)
+			}
+			if s != fmt.Sprintf("%v", le) {
+				te.Errorf("s(%v) != le(%v)", s, le)
+			}
+		}
+
+		for _, d := range x {
+			s := fmt.Sprintf("%v", d)
+			kge, ge := t.GlbEq(d + 1)
+			kle, le := t.LubEq(d - 1)
+			if d != kge {
+				te.Errorf("d(%v) != kge(%v)", d, kge)
+			}
+			if d != kle {
+				te.Errorf("d(%v) != kle(%v)", d, kle)
+			}
+			if s != fmt.Sprintf("%v", ge) {
+				te.Errorf("s(%v) != ge(%v)", s, ge)
+			}
+			if s != fmt.Sprintf("%v", le) {
+				te.Errorf("s(%v) != le(%v)", s, le)
+			}
+		}
+
+		kg, g := t.Glb(min)
+		kge, ge := t.GlbEq(min - 1)
+		kl, l := t.Lub(max)
+		kle, le := t.LubEq(max + 1)
+		fmin := t.Find(min - 1)
+		fmax := t.Find(min + 11)
+
+		if kg != 0 || kge != 0 || kl != 0 || kle != 0 {
+			te.Errorf("Got non-zero-key for missing query")
+		}
+
+		if g != nil || ge != nil || l != nil || le != nil || fmin != nil || fmax != nil {
+			te.Errorf("Got non-error-data for missing query")
+		}
+
+	}
+}
+
+func TestAllRBTreeOps(t *testing.T) {
+	allRBT32Ops(t, []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25})
+	allRBT32Ops(t, []int32{22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 3, 2, 1, 25, 24, 23, 12, 11, 10, 9, 8, 7, 6, 5, 4})
+	allRBT32Ops(t, []int32{25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1})
+	allRBT32Ops(t, []int32{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24})
+	allRBT32Ops(t, []int32{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2})
+	allRBT32Ops(t, []int32{24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25})
+}
commit	6b99fb5bea56b9674161b6c5415c1d05f19dfdc6	[log] [tgz]
author	David Chase <drchase@google.com>	Thu Apr 21 13:24:58 2016 -0400
committer	David Chase <drchase@google.com>	Mon May 16 21:08:05 2016 +0000
tree	b4fb33117b40b4dda17e6573a0770c7cdf2df9ba
parent	466cae6ca9f28c971a2d716a5a49dc76bbd1d5bb [diff] [blame]