[dev.ssa] cmd/compile: allocate the flag register in a separate pass
Spilling/restoring flag values is a pain to do during regalloc.
Instead, allocate the flag register in a separate pass. Regalloc then
operates normally on any flag recomputation instructions.
Change-Id: Ia1c3d9e6eff678861193093c0b48a00f90e4156b
Reviewed-on: https://go-review.googlesource.com/17694
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 01238f2..767b774 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -97,9 +97,10 @@
{"lowered cse", cse},
{"lowered deadcode", deadcode},
{"checkLower", checkLower},
- {"critical", critical}, // remove critical edges
- {"layout", layout}, // schedule blocks
- {"schedule", schedule}, // schedule values
+ {"critical", critical}, // remove critical edges
+ {"layout", layout}, // schedule blocks
+ {"schedule", schedule}, // schedule values
+ {"flagalloc", flagalloc}, // allocate flags register
{"regalloc", regalloc},
{"stackalloc", stackalloc},
}
@@ -142,6 +143,10 @@
// checkLower must run after lowering & subsequent dead code elim
{"lower", "checkLower"},
{"lowered deadcode", "checkLower"},
+ // flagalloc needs instructions to be scheduled.
+ {"schedule", "flagalloc"},
+ // regalloc needs flags to be allocated first.
+ {"flagalloc", "regalloc"},
}
func init() {
diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go
new file mode 100644
index 0000000..c088158
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@@ -0,0 +1,123 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+const flagRegMask = regMask(1) << 33 // TODO: arch-specific
+
+// flagalloc allocates the flag register among all the flag-generating
+// instructions. Flag values are recomputed if they need to be
+// spilled/restored.
+func flagalloc(f *Func) {
+ // Compute the in-register flag value we want at the end of
+ // each block. This is basically a best-effort live variable
+ // analysis, so it can be much simpler than a full analysis.
+ // TODO: do we really need to keep flag values live across blocks?
+ // Could we force the flags register to be unused at basic block
+ // boundaries? Then we wouldn't need this computation.
+ end := make([]*Value, f.NumBlocks())
+ for n := 0; n < 2; n++ {
+ // Walk blocks backwards. Poor-man's postorder traversal.
+ for i := len(f.Blocks) - 1; i >= 0; i-- {
+ b := f.Blocks[i]
+ // Walk values backwards to figure out what flag
+ // value we want in the flag register at the start
+ // of the block.
+ flag := end[b.ID]
+ if b.Control != nil && b.Control.Type.IsFlags() {
+ flag = b.Control
+ }
+ for j := len(b.Values) - 1; j >= 0; j-- {
+ v := b.Values[j]
+ if v == flag {
+ flag = nil
+ }
+ if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
+ flag = nil
+ }
+ for _, a := range v.Args {
+ if a.Type.IsFlags() {
+ flag = a
+ }
+ }
+ }
+ for _, p := range b.Preds {
+ end[p.ID] = flag
+ }
+ }
+ }
+ // For blocks which have a flags control value, that's the only value
+ // we can leave in the flags register at the end of the block. (There
+ // is no place to put a flag regeneration instruction.)
+ for _, b := range f.Blocks {
+ v := b.Control
+ if v != nil && v.Type.IsFlags() && end[b.ID] != v {
+ end[b.ID] = nil
+ }
+ }
+
+ // Add flag recomputations where they are needed.
+ // TODO: Remove original instructions if they are never used.
+ var oldSched []*Value
+ for _, b := range f.Blocks {
+ oldSched = append(oldSched[:0], b.Values...)
+ b.Values = b.Values[:0]
+ // The current live flag value.
+ var flag *Value
+ if len(b.Preds) > 0 {
+ flag = end[b.Preds[0].ID]
+ // Note: the following condition depends on the lack of critical edges.
+ for _, p := range b.Preds[1:] {
+ if end[p.ID] != flag {
+ f.Fatalf("live flag in %s's predecessors not consistent", b)
+ }
+ }
+ }
+ for _, v := range oldSched {
+ if v.Op == OpPhi && v.Type.IsFlags() {
+ f.Fatalf("phi of flags not supported: %s", v.LongString())
+ }
+ // Make sure any flag arg of v is in the flags register.
+ // If not, recompute it.
+ for i, a := range v.Args {
+ if !a.Type.IsFlags() {
+ continue
+ }
+ if a == flag {
+ continue
+ }
+ // Recalculate a
+ c := a.copyInto(b)
+ // Update v.
+ v.SetArg(i, c)
+ // Remember the most-recently computed flag value.
+ flag = c
+ }
+ // Issue v.
+ b.Values = append(b.Values, v)
+ if opcodeTable[v.Op].reg.clobbers&flagRegMask != 0 {
+ flag = nil
+ }
+ if v.Type.IsFlags() {
+ flag = v
+ }
+ }
+ if v := b.Control; v != nil && v != flag && v.Type.IsFlags() {
+ // Recalculate control value.
+ c := v.copyInto(b)
+ b.Control = c
+ flag = c
+ }
+ if v := end[b.ID]; v != nil && v != flag {
+ // Need to reissue flag generator for use by
+ // subsequent blocks.
+ _ = v.copyInto(b)
+ // Note: this flag generator is not properly linked up
+ // with the flag users. This breaks the SSA representation.
+ // We could fix up the users with another pass, but for now
+ // we'll just leave it. (Regalloc has the same issue for
+ // standard regs, and it runs next.)
+ }
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go
index d35690a..1dc134d 100644
--- a/src/cmd/compile/internal/ssa/func_test.go
+++ b/src/cmd/compile/internal/ssa/func_test.go
@@ -232,6 +232,11 @@
return ctrl{BlockExit, arg, []string{}}
}
+// Eq specifies a BlockAMD64EQ.
+func Eq(cond, sub, alt string) ctrl {
+ return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
+}
+
// bloc, ctrl, and valu are internal structures used by Bloc, Valu, Goto,
// If, and Exit to help define blocks.
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index 535885a..2690b61 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -38,12 +38,6 @@
// x3 can then be used wherever x is referenced again.
// If the spill (x2) is never used, it will be removed at the end of regalloc.
//
-// Flags values are special. Instead of attempting to spill and restore the flags
-// register, we recalculate it if needed.
-// There are more efficient schemes (see the discussion in CL 13844),
-// but flag restoration is empirically rare, and this approach is simple
-// and architecture-independent.
-//
// Phi values are special, as always. We define two kinds of phis, those
// where the merge happens in a register (a "register" phi) and those where
// the merge happens in a stack location (a "stack" phi).
@@ -173,7 +167,6 @@
Register{30, "X14"},
Register{31, "X15"},
Register{32, "SB"}, // pseudo-register for global base pointer (aka %rip)
- Register{33, "FLAGS"},
// TODO: make arch-dependent
}
@@ -226,7 +219,7 @@
f *Func
// For each value, whether it needs a register or not.
- // Cached value of !v.Type.IsMemory() && !v.Type.IsVoid().
+ // Cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags().
needReg []bool
// for each block, its primary predecessor.
@@ -435,40 +428,9 @@
c = s.curBlock.NewValue1(v.Line, OpCopy, v.Type, s.regs[r2].c)
} else if v.rematerializeable() {
// Rematerialize instead of loading from the spill location.
- c = s.curBlock.NewValue0(v.Line, v.Op, v.Type)
- c.Aux = v.Aux
- c.AuxInt = v.AuxInt
- c.AddArgs(v.Args...)
+ c = v.copyInto(s.curBlock)
} else {
switch {
- // It is difficult to spill and reload flags on many architectures.
- // Instead, we regenerate the flags register by issuing the same instruction again.
- // This requires (possibly) spilling and reloading that instruction's args.
- case v.Type.IsFlags():
- if logSpills {
- fmt.Println("regalloc: regenerating flags")
- }
- ns := s.nospill
- // Place v's arguments in registers, spilling and loading as needed
- args := make([]*Value, 0, len(v.Args))
- regspec := opcodeTable[v.Op].reg
- for _, i := range regspec.inputs {
- // Extract the original arguments to v
- a := s.orig[v.Args[i.idx].ID]
- if a.Type.IsFlags() {
- s.f.Fatalf("cannot load flags value with flags arg: %v has unwrapped arg %v", v.LongString(), a.LongString())
- }
- cc := s.allocValToReg(a, i.regs, true)
- args = append(args, cc)
- }
- s.nospill = ns
- // Recalculate v
- c = s.curBlock.NewValue0(v.Line, v.Op, v.Type)
- c.Aux = v.Aux
- c.AuxInt = v.AuxInt
- c.resetArgs()
- c.AddArgs(args...)
-
// Load v from its spill location.
case vi.spill2 != nil:
if logSpills {
@@ -506,7 +468,7 @@
s.orig = make([]*Value, f.NumValues())
for _, b := range f.Blocks {
for _, v := range b.Values {
- if v.Type.IsMemory() || v.Type.IsVoid() {
+ if v.Type.IsMemory() || v.Type.IsVoid() || v.Type.IsFlags() {
continue
}
s.needReg[v.ID] = true
@@ -818,6 +780,10 @@
// by the register specification (most constrained first).
args = append(args[:0], v.Args...)
for _, i := range regspec.inputs {
+ if i.regs == flagRegMask {
+ // TODO: remove flag input from regspec.inputs.
+ continue
+ }
args[i.idx] = s.allocValToReg(v.Args[i.idx], i.regs, true)
}
@@ -834,8 +800,11 @@
// Pick register for output.
var r register
var mask regMask
- if len(regspec.outputs) > 0 {
+ if s.needReg[v.ID] {
mask = regspec.outputs[0] &^ s.reserved()
+ if mask>>33&1 != 0 {
+ s.f.Fatalf("bad mask %s\n", v.LongString())
+ }
}
if mask != 0 {
r = s.allocReg(mask)
@@ -858,7 +827,7 @@
// f()
// }
// It would be good to have both spill and restore inside the IF.
- if !v.Type.IsFlags() {
+ if s.needReg[v.ID] {
spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
s.setOrig(spill, v)
s.values[v.ID].spill = spill
diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go
index 08260fb..596a920 100644
--- a/src/cmd/compile/internal/ssa/regalloc_test.go
+++ b/src/cmd/compile/internal/ssa/regalloc_test.go
@@ -13,12 +13,12 @@
Valu("mem", OpInitMem, TypeMem, 0, ".mem"),
Valu("x", OpAMD64MOVBconst, TypeInt8, 0, 1),
Valu("y", OpAMD64MOVBconst, TypeInt8, 0, 2),
- Valu("a", OpAMD64TESTB, TypeBool, 0, nil, "x", "y"),
- Valu("b", OpAMD64TESTB, TypeBool, 0, nil, "y", "x"),
- If("a", "if", "exit"),
+ Valu("a", OpAMD64TESTB, TypeFlags, 0, nil, "x", "y"),
+ Valu("b", OpAMD64TESTB, TypeFlags, 0, nil, "y", "x"),
+ Eq("a", "if", "exit"),
),
Bloc("if",
- If("b", "plain", "exit"),
+ Eq("b", "plain", "exit"),
),
Bloc("plain",
Goto("exit"),
@@ -27,6 +27,7 @@
Exit("mem"),
),
)
+ flagalloc(f.f)
regalloc(f.f)
checkFunc(f.f)
}
diff --git a/src/cmd/compile/internal/ssa/value.go b/src/cmd/compile/internal/ssa/value.go
index 661a059..fc31863 100644
--- a/src/cmd/compile/internal/ssa/value.go
+++ b/src/cmd/compile/internal/ssa/value.go
@@ -126,6 +126,15 @@
v.Args = v.argstorage[:0]
}
+// copyInto makes a new value identical to v and adds it to the end of b.
+func (v *Value) copyInto(b *Block) *Value {
+ c := b.NewValue0(v.Line, v.Op, v.Type)
+ c.Aux = v.Aux
+ c.AuxInt = v.AuxInt
+ c.AddArgs(v.Args...)
+ return c
+}
+
func (v *Value) Logf(msg string, args ...interface{}) { v.Block.Logf(msg, args...) }
func (v *Value) Fatalf(msg string, args ...interface{}) { v.Block.Fatalf(msg, args...) }
func (v *Value) Unimplementedf(msg string, args ...interface{}) { v.Block.Unimplementedf(msg, args...) }