[dev.ssa] cmd/compile: optimize nil checks
Use faulting loads instead of test/jeq to do nil checks.
Fold nil checks into a following load/store if possible.
Makes binaries about 2% smaller.
Change-Id: I54af0f0a93c853f37e34e0ce7e3f01dd2ac87f64
Reviewed-on: https://go-review.googlesource.com/16287
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 8939f14..50fc935 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -18,6 +18,9 @@
"cmd/internal/obj/x86"
)
+// Smallest possible faulting page at address zero.
+const minZeroPage = 4096
+
// buildssa builds an SSA function
// and reports whether it should be used.
// Once the SSA implementation is complete,
@@ -2428,21 +2431,12 @@
if Disable_checknil != 0 {
return
}
- c := s.newValue1(ssa.OpIsNonNil, Types[TBOOL], ptr)
+ chk := s.newValue2(ssa.OpNilCheck, ssa.TypeVoid, ptr, s.mem())
b := s.endBlock()
- b.Kind = ssa.BlockIf
- b.Control = c
- b.Likely = ssa.BranchLikely
+ b.Kind = ssa.BlockCheck
+ b.Control = chk
bNext := s.f.NewBlock(ssa.BlockPlain)
- bPanic := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bNext)
- b.AddEdgeTo(bPanic)
- s.startBlock(bPanic)
- // TODO: implicit nil checks somehow?
- chk := s.newValue2(ssa.OpPanicNilCheck, ssa.TypeMem, ptr, s.mem())
- s.endBlock()
- bPanic.Kind = ssa.BlockExit
- bPanic.Control = chk
s.startBlock(bNext)
}
@@ -3827,18 +3821,6 @@
case ssa.OpArg:
// memory arg needs no code
// TODO: check that only mem arg goes here.
- case ssa.OpAMD64LoweredPanicNilCheck:
- if Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
- Warnl(int(v.Line), "generated nil check")
- }
- // Write to memory address 0. It doesn't matter what we write; use AX.
- // Input 0 is the pointer we just checked, use it as the destination.
- r := regnum(v.Args[0])
- q := Prog(x86.AMOVL)
- q.From.Type = obj.TYPE_REG
- q.From.Reg = x86.REG_AX
- q.To.Type = obj.TYPE_MEM
- q.To.Reg = r
case ssa.OpAMD64LoweredGetClosurePtr:
// Output is hardwired to DX only,
// and DX contains the closure pointer on
@@ -3986,6 +3968,44 @@
Gvardef(v.Aux.(*Node))
case ssa.OpVarKill:
gvarkill(v.Aux.(*Node))
+ case ssa.OpAMD64LoweredNilCheck:
+ // Optimization - if the subsequent block has a load or store
+ // at the same address, we don't need to issue this instruction.
+ for _, w := range v.Block.Succs[0].Values {
+ if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() {
+ // w doesn't use a store - can't be a memory op.
+ continue
+ }
+ if w.Args[len(w.Args)-1] != v.Args[1] {
+ v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w)
+ }
+ switch w.Op {
+ case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload,
+ ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore:
+ if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
+ return
+ }
+ }
+ if w.Type.IsMemory() {
+ // We can't delay the nil check past the next store.
+ break
+ }
+ }
+ // Issue a load which will fault if the input is nil.
+ // TODO: We currently use the 2-byte instruction TESTB AX, (reg).
+ // Should we use the 3-byte TESTB $0, (reg) instead? It is larger
+ // but it doesn't have false dependency on AX.
+ // Or maybe allocate an output register and use MOVL (reg),reg2 ?
+ // That trades clobbering flags for clobbering a register.
+ p := Prog(x86.ATESTB)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x86.REG_AX
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = regnum(v.Args[0])
+ addAux(&p.To, v)
+ if Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
+ Warnl(int(v.Line), "generated nil check")
+ }
default:
v.Unimplementedf("genValue not implemented: %s", v.LongString())
}
@@ -4088,7 +4108,7 @@
lineno = b.Line
switch b.Kind {
- case ssa.BlockPlain, ssa.BlockCall:
+ case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck:
if b.Succs[0] != next {
p := Prog(obj.AJMP)
p.To.Type = obj.TYPE_BRANCH