cmd/compile: inline closures with captures
When inlining a closure with captured variables, walk up the
param chain to find the one that is defined inside the scope
into which the function is being inlined, and map occurrences
of the captures to temporary inlvars, similarly to what is
done for function parameters.
No noticeable impact on compilation speed and binary size.
Minor improvements to go1 benchmarks on darwin/amd64
name old time/op new time/op delta
BinaryTree17-4 2.59s ± 3% 2.58s ± 1% ~ (p=0.470 n=19+19)
Fannkuch11-4 3.15s ± 2% 3.15s ± 1% ~ (p=0.647 n=20+19)
FmtFprintfEmpty-4 43.7ns ± 3% 43.4ns ± 4% ~ (p=0.178 n=18+20)
FmtFprintfString-4 74.0ns ± 2% 77.1ns ± 7% +4.13% (p=0.000 n=20+20)
FmtFprintfInt-4 77.2ns ± 3% 79.2ns ± 6% +2.53% (p=0.000 n=20+20)
FmtFprintfIntInt-4 112ns ± 4% 112ns ± 2% ~ (p=0.672 n=20+19)
FmtFprintfPrefixedInt-4 136ns ± 1% 135ns ± 2% ~ (p=0.827 n=16+20)
FmtFprintfFloat-4 232ns ± 2% 233ns ± 1% ~ (p=0.194 n=20+20)
FmtManyArgs-4 490ns ± 2% 484ns ± 2% -1.28% (p=0.001 n=20+20)
GobDecode-4 6.68ms ± 2% 6.72ms ± 2% ~ (p=0.113 n=20+19)
GobEncode-4 5.62ms ± 2% 5.71ms ± 2% +1.64% (p=0.000 n=20+19)
Gzip-4 235ms ± 3% 236ms ± 2% ~ (p=0.607 n=20+19)
Gunzip-4 37.1ms ± 2% 36.8ms ± 3% ~ (p=0.060 n=20+20)
HTTPClientServer-4 61.9µs ± 2% 62.7µs ± 4% +1.24% (p=0.007 n=18+19)
JSONEncode-4 12.5ms ± 2% 12.4ms ± 3% ~ (p=0.192 n=20+20)
JSONDecode-4 51.6ms ± 3% 51.0ms ± 3% -1.19% (p=0.008 n=20+19)
Mandelbrot200-4 4.12ms ± 6% 4.06ms ± 5% ~ (p=0.063 n=20+20)
GoParse-4 3.12ms ± 5% 3.10ms ± 2% ~ (p=0.402 n=19+19)
RegexpMatchEasy0_32-4 80.7ns ± 2% 75.1ns ± 9% -6.94% (p=0.000 n=17+20)
RegexpMatchEasy0_1K-4 197ns ± 2% 186ns ± 2% -5.43% (p=0.000 n=20+20)
RegexpMatchEasy1_32-4 77.5ns ± 4% 71.9ns ± 7% -7.25% (p=0.000 n=20+18)
RegexpMatchEasy1_1K-4 341ns ± 3% 341ns ± 3% ~ (p=0.732 n=20+20)
RegexpMatchMedium_32-4 113ns ± 2% 112ns ± 3% ~ (p=0.102 n=20+20)
RegexpMatchMedium_1K-4 36.6µs ± 2% 35.8µs ± 2% -2.26% (p=0.000 n=18+20)
RegexpMatchHard_32-4 1.75µs ± 3% 1.74µs ± 2% ~ (p=0.473 n=20+19)
RegexpMatchHard_1K-4 52.6µs ± 2% 52.0µs ± 3% -1.15% (p=0.005 n=20+20)
Revcomp-4 381ms ± 4% 377ms ± 2% ~ (p=0.067 n=20+18)
Template-4 57.3ms ± 2% 57.7ms ± 2% ~ (p=0.108 n=20+20)
TimeParse-4 291ns ± 3% 292ns ± 2% ~ (p=0.585 n=20+20)
TimeFormat-4 314ns ± 3% 315ns ± 1% ~ (p=0.681 n=20+20)
[Geo mean] 47.4µs 47.1µs -0.73%
name old speed new speed delta
GobDecode-4 115MB/s ± 2% 114MB/s ± 2% ~ (p=0.115 n=20+19)
GobEncode-4 137MB/s ± 2% 134MB/s ± 2% -1.63% (p=0.000 n=20+19)
Gzip-4 82.5MB/s ± 3% 82.4MB/s ± 2% ~ (p=0.612 n=20+19)
Gunzip-4 523MB/s ± 2% 528MB/s ± 3% ~ (p=0.060 n=20+20)
JSONEncode-4 155MB/s ± 2% 156MB/s ± 3% ~ (p=0.192 n=20+20)
JSONDecode-4 37.6MB/s ± 3% 38.1MB/s ± 3% +1.21% (p=0.007 n=20+19)
GoParse-4 18.6MB/s ± 4% 18.7MB/s ± 2% ~ (p=0.405 n=19+19)
RegexpMatchEasy0_32-4 396MB/s ± 2% 426MB/s ± 8% +7.56% (p=0.000 n=17+20)
RegexpMatchEasy0_1K-4 5.18GB/s ± 2% 5.48GB/s ± 2% +5.79% (p=0.000 n=20+20)
RegexpMatchEasy1_32-4 413MB/s ± 4% 444MB/s ± 6% +7.46% (p=0.000 n=20+19)
RegexpMatchEasy1_1K-4 3.00GB/s ± 3% 3.00GB/s ± 3% ~ (p=0.678 n=20+20)
RegexpMatchMedium_32-4 8.82MB/s ± 2% 8.90MB/s ± 3% +0.99% (p=0.044 n=20+20)
RegexpMatchMedium_1K-4 28.0MB/s ± 2% 28.6MB/s ± 2% +2.32% (p=0.000 n=18+20)
RegexpMatchHard_32-4 18.3MB/s ± 3% 18.4MB/s ± 2% ~ (p=0.482 n=20+19)
RegexpMatchHard_1K-4 19.5MB/s ± 2% 19.7MB/s ± 3% +1.18% (p=0.004 n=20+20)
Revcomp-4 668MB/s ± 4% 674MB/s ± 2% ~ (p=0.066 n=20+18)
Template-4 33.8MB/s ± 2% 33.6MB/s ± 2% ~ (p=0.104 n=20+20)
[Geo mean] 124MB/s 126MB/s +1.54%
Updates #15561
Updates #18270
Change-Id: I980086efe28b36aa27f81577065e2a729ff03d4e
Reviewed-on: https://go-review.googlesource.com/72490
Reviewed-by: Hugues Bruant <hugues.bruant@gmail.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go
index ea31da9..e54bb97 100644
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@@ -280,6 +280,26 @@
v.budget -= fn.InlCost
break
}
+ if n.Left.Op == OCLOSURE {
+ if fn := inlinableClosure(n.Left); fn != nil {
+ v.budget -= fn.Func.InlCost
+ break
+ }
+ } else if n.Left.Op == ONAME && n.Left.Name != nil && n.Left.Name.Defn != nil {
+ // NB: this case currently cannot trigger since closure definition
+ // prevents inlining
+ // NB: ideally we would also handle captured variables defined as
+ // closures in the outer scope this brings us back to the idea of
+ // function value propagation, which if available would both avoid
+ // the "reassigned" check and neatly handle multiple use cases in a
+ // single code path
+ if d := n.Left.Name.Defn; d.Op == OAS && d.Right.Op == OCLOSURE {
+ if fn := inlinableClosure(d.Right); fn != nil {
+ v.budget -= fn.Func.InlCost
+ break
+ }
+ }
+ }
if n.Left.isMethodExpression() {
if d := asNode(n.Left.Sym.Def); d != nil && d.Func.Inl.Len() != 0 {
@@ -629,22 +649,16 @@
return n
}
+// inlinableClosure takes an OCLOSURE node and follows linkage to the matching ONAME with
+// the inlinable body. Returns nil if the function is not inlinable.
func inlinableClosure(n *Node) *Node {
c := n.Func.Closure
caninl(c)
f := c.Func.Nname
- if f != nil && f.Func.Inl.Len() != 0 {
- if n.Func.Cvars.Len() != 0 {
- // FIXME: support closure with captured variables
- // they currently result in invariant violation in the SSA phase
- if Debug['m'] > 1 {
- fmt.Printf("%v: cannot inline closure w/ captured vars %v\n", n.Line(), n.Left)
- }
- return nil
- }
- return f
+ if f == nil || f.Func.Inl.Len() == 0 {
+ return nil
}
- return nil
+ return f
}
// reassigned takes an ONAME node, walks the function in which it is defined, and returns a boolean
@@ -792,16 +806,53 @@
ninit := n.Ninit
+ // Make temp names to use instead of the originals.
+ inlvars := make(map[*Node]*Node)
+
// Find declarations corresponding to inlineable body.
var dcl []*Node
if fn.Name.Defn != nil {
dcl = fn.Func.Inldcl.Slice() // local function
+
+ // handle captured variables when inlining closures
+ if c := fn.Name.Defn.Func.Closure; c != nil {
+ for _, v := range c.Func.Cvars.Slice() {
+ if v.Op == OXXX {
+ continue
+ }
+
+ o := v.Name.Param.Outer
+ // make sure the outer param matches the inlining location
+ // NB: if we enabled inlining of functions containing OCLOSURE or refined
+ // the reassigned check via some sort of copy propagation this would most
+ // likely need to be changed to a loop to walk up to the correct Param
+ if o == nil || (o.Name.Curfn != Curfn && o.Name.Curfn.Func.Closure != Curfn) {
+ Fatalf("%v: unresolvable capture %v %v\n", n.Line(), fn, v)
+ }
+
+ if v.Name.Byval() {
+ iv := typecheck(inlvar(v), Erv)
+ ninit.Append(nod(ODCL, iv, nil))
+ ninit.Append(typecheck(nod(OAS, iv, o), Etop))
+ inlvars[v] = iv
+ } else {
+ addr := newname(lookup("&" + v.Sym.Name))
+ addr.Type = types.NewPtr(v.Type)
+ ia := typecheck(inlvar(addr), Erv)
+ ninit.Append(nod(ODCL, ia, nil))
+ ninit.Append(typecheck(nod(OAS, ia, nod(OADDR, o, nil)), Etop))
+ inlvars[addr] = ia
+
+ // When capturing by reference, all occurrence of the captured var
+ // must be substituted with dereference of the temporary address
+ inlvars[v] = typecheck(nod(OIND, ia, nil), Erv)
+ }
+ }
+ }
} else {
dcl = fn.Func.Dcl // imported function
}
- // Make temp names to use instead of the originals.
- inlvars := make(map[*Node]*Node)
for _, ln := range dcl {
if ln.Op != ONAME {
continue