| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package slice |
| |
| // This file implements a stack-allocation optimization |
| // for the backing store of slices. |
| // |
| // Consider the code: |
| // |
| // var s []int |
| // for i := range ... { |
| // s = append(s, i) |
| // } |
| // return s |
| // |
| // Some of the append operations will need to do an allocation |
| // by calling growslice. This will happen on the 1st, 2nd, 4th, |
| // 8th, etc. append calls. The allocations done by all but the |
| // last growslice call will then immediately be garbage. |
| // |
| // We'd like to avoid doing some of those intermediate |
| // allocations if possible. |
| // |
| // If we can determine that the "return s" statement is the |
| // *only* way that the backing store for s escapes, then we |
| // can rewrite the code to something like: |
| // |
| // var s []int |
| // for i := range N { |
| // s = append(s, i) |
| // } |
| // s = move2heap(s) |
| // return s |
| // |
| // Using the move2heap runtime function, which does: |
| // |
| // move2heap(s): |
| // If s is not backed by a stackframe-allocated |
| // backing store, return s. Otherwise, copy s |
| // to the heap and return the copy. |
| // |
| // Now we can treat the backing store of s allocated at the |
| // append site as not escaping. Previous stack allocation |
| // optimizations now apply, which can use a fixed-size |
| // stack-allocated backing store for s when appending. |
| // (See ../ssagen/ssa.go:(*state).append) |
| // |
| // It is tricky to do this optimization safely. To describe |
| // our analysis, we first define what an "exclusive" slice |
| // variable is. |
| // |
| // A slice variable (a variable of slice type) is called |
| // "exclusive" if, when it has a reference to a |
| // stackframe-allocated backing store, it is the only |
| // variable with such a reference. |
| // |
| // In other words, a slice variable is exclusive if |
| // any of the following holds: |
| // 1) It points to a heap-allocated backing store |
| // 2) It points to a stack-allocated backing store |
| // for any parent frame. |
| // 3) It is the only variable that references its |
| // backing store. |
| // 4) It is nil. |
| // |
| // The nice thing about exclusive slice variables is that |
| // it is always safe to do |
| // s = move2heap(s) |
| // whenever s is an exclusive slice variable. Because no |
| // one else has a reference to the backing store, no one |
| // else can tell that we moved the backing store from one |
| // location to another. |
| // |
| // Note that exclusiveness is a dynamic property. A slice |
| // variable may be exclusive during some parts of execution |
| // and not exclusive during others. |
| // |
| // The following operations set or preserve the exclusivity |
| // of a slice variable s: |
| // s = nil |
| // s = append(s, ...) |
| // s = s[i:j] |
| // ... = s[i] |
| // s[i] = ... |
| // f(s) where f does not escape its argument |
| // Other operations destroy exclusivity. A non-exhaustive list includes: |
| // x = s |
| // *p = s |
| // f(s) where f escapes its argument |
| // return s |
| // To err on the safe side, we white list exclusivity-preserving |
| // operations and we asssume that any other operations that mention s |
| // destroy its exclusivity. |
| // |
| // Our strategy is to move the backing store of s to the heap before |
| // any exclusive->nonexclusive transition. That way, s will only ever |
| // have a reference to a stack backing store while it is exclusive. |
| // |
| // move2heap for a variable s is implemented with: |
| // if s points to within the stack frame { |
| // s2 := make([]T, s.len, s.cap) |
| // copy(s2[:s.cap], s[:s.cap]) |
| // s = s2 |
| // } |
| // Note that in general we need to copy all of s[:cap(s)] elements when |
| // moving to the heap. As an optimization, we keep track of slice variables |
| // whose capacity, and the elements in s[len(s):cap(s)], are never accessed. |
| // For those slice variables, we can allocate to the next size class above |
| // the length, which saves memory and copying cost. |
| |
| import ( |
| "cmd/compile/internal/base" |
| "cmd/compile/internal/escape" |
| "cmd/compile/internal/ir" |
| "cmd/compile/internal/reflectdata" |
| ) |
| |
| func Funcs(all []*ir.Func) { |
| if base.Flag.N != 0 { |
| return |
| } |
| for _, fn := range all { |
| analyze(fn) |
| } |
| } |
| |
| func analyze(fn *ir.Func) { |
| type sliceInfo struct { |
| // Slice variable. |
| s *ir.Name |
| |
| // Count of uses that this pass understands. |
| okUses int32 |
| // Count of all uses found. |
| allUses int32 |
| |
| // A place where the slice variable transitions from |
| // exclusive to nonexclusive. |
| // We could keep track of more than one, but one is enough for now. |
| // Currently, this can be either a return statement or |
| // an assignment. |
| // TODO: other possible transitions? |
| transition ir.Stmt |
| |
| // Each s = append(s, ...) instance we found. |
| appends []*ir.CallExpr |
| |
| // Weight of the number of s = append(s, ...) instances we found. |
| // The optimizations we do are only really useful if there are at |
| // least weight 2. (Note: appends in loops have weight >= 2.) |
| appendWeight int |
| |
| // Loop depth at declaration point. |
| // Use for heuristics only, it is not guaranteed to be correct |
| // in the presence of gotos. |
| declDepth int |
| |
| // Whether we ever do cap(s), or other operations that use cap(s) |
| // (possibly implicitly), like s[i:j]. |
| capUsed bool |
| } |
| |
| // Every variable (*ir.Name) that we are tracking will have |
| // a non-nil *sliceInfo in its Opt field. |
| haveLocalSlice := false |
| maxStackSize := int64(base.Debug.VariableMakeThreshold) |
| var namedRets []*ir.Name |
| for _, s := range fn.Dcl { |
| if !s.Type().IsSlice() { |
| continue |
| } |
| if s.Type().Elem().Size() > maxStackSize { |
| continue |
| } |
| if !base.VariableMakeHash.MatchPos(s.Pos(), nil) { |
| continue |
| } |
| s.Opt = &sliceInfo{s: s} // start tracking s |
| haveLocalSlice = true |
| if s.Class == ir.PPARAMOUT { |
| namedRets = append(namedRets, s) |
| } |
| } |
| if !haveLocalSlice { |
| return |
| } |
| |
| // Keep track of loop depth while walking. |
| loopDepth := 0 |
| |
| // tracking returns the info for the slice variable if n is a slice |
| // variable that we're still considering, or nil otherwise. |
| tracking := func(n ir.Node) *sliceInfo { |
| if n == nil || n.Op() != ir.ONAME { |
| return nil |
| } |
| s := n.(*ir.Name) |
| if s.Opt == nil { |
| return nil |
| } |
| return s.Opt.(*sliceInfo) |
| } |
| |
| // addTransition(n, loc) records that s experiences an exclusive->nonexclusive |
| // transition somewhere within loc. |
| addTransition := func(i *sliceInfo, loc ir.Stmt) { |
| if i.transition != nil { |
| // We only keep track of a single exclusive->nonexclusive transition |
| // for a slice variable. If we find more than one, give up. |
| // (More than one transition location would be fine, but we would |
| // start to get worried about introducing too much additional code.) |
| i.s.Opt = nil |
| return |
| } |
| if loopDepth > i.declDepth { |
| // Conservatively, we disable this optimization when the |
| // transition is inside a loop. This can result in adding |
| // overhead unnecessarily in cases like: |
| // func f(n int, p *[]byte) { |
| // var s []byte |
| // for i := range n { |
| // *p = s |
| // s = append(s, 0) |
| // } |
| // } |
| i.s.Opt = nil |
| return |
| } |
| i.transition = loc |
| } |
| |
| // Examine an x = y assignment that occurs somewhere within statement stmt. |
| assign := func(x, y ir.Node, stmt ir.Stmt) { |
| if i := tracking(x); i != nil { |
| // s = y. Check for understood patterns for y. |
| if y == nil || y.Op() == ir.ONIL { |
| // s = nil is ok. |
| i.okUses++ |
| } else if y.Op() == ir.OSLICELIT { |
| // s = []{...} is ok. |
| // Note: this reveals capacity. Should it? |
| i.okUses++ |
| i.capUsed = true |
| } else if y.Op() == ir.OSLICE { |
| y := y.(*ir.SliceExpr) |
| if y.X == i.s { |
| // s = s[...:...] is ok |
| i.okUses += 2 |
| i.capUsed = true |
| } |
| } else if y.Op() == ir.OAPPEND { |
| y := y.(*ir.CallExpr) |
| if y.Args[0] == i.s { |
| // s = append(s, ...) is ok |
| i.okUses += 2 |
| i.appends = append(i.appends, y) |
| i.appendWeight += 1 + (loopDepth - i.declDepth) |
| } |
| // TODO: s = append(nil, ...)? |
| } |
| // Note that technically s = make([]T, ...) preserves exclusivity, but |
| // we don't track that because we assume users who wrote that know |
| // better than the compiler does. |
| |
| // TODO: figure out how to handle s = fn(..., s, ...) |
| // It would be nice to maintain exclusivity of s in this situation. |
| // But unfortunately, fn can return one of its other arguments, which |
| // may be a slice with a stack-allocated backing store other than s. |
| // (which may have preexisting references to its backing store). |
| // |
| // Maybe we could do it if s is the only argument? |
| } |
| |
| if i := tracking(y); i != nil { |
| // ... = s |
| // Treat this as an exclusive->nonexclusive transition. |
| i.okUses++ |
| addTransition(i, stmt) |
| } |
| } |
| |
| var do func(ir.Node) bool |
| do = func(n ir.Node) bool { |
| if n == nil { |
| return false |
| } |
| switch n.Op() { |
| case ir.ONAME: |
| if i := tracking(n); i != nil { |
| // A use of a slice variable. Count it. |
| i.allUses++ |
| } |
| case ir.ODCL: |
| n := n.(*ir.Decl) |
| if i := tracking(n.X); i != nil { |
| i.okUses++ |
| i.declDepth = loopDepth |
| } |
| case ir.OINDEX: |
| n := n.(*ir.IndexExpr) |
| if i := tracking(n.X); i != nil { |
| // s[i] is ok. |
| i.okUses++ |
| } |
| case ir.OLEN: |
| n := n.(*ir.UnaryExpr) |
| if i := tracking(n.X); i != nil { |
| // len(s) is ok |
| i.okUses++ |
| } |
| case ir.OCAP: |
| n := n.(*ir.UnaryExpr) |
| if i := tracking(n.X); i != nil { |
| // cap(s) is ok |
| i.okUses++ |
| i.capUsed = true |
| } |
| case ir.OADDR: |
| n := n.(*ir.AddrExpr) |
| if n.X.Op() == ir.OINDEX { |
| n := n.X.(*ir.IndexExpr) |
| if i := tracking(n.X); i != nil { |
| // &s[i] is definitely a nonexclusive transition. |
| // (We need this case because s[i] is ok, but &s[i] is not.) |
| i.s.Opt = nil |
| } |
| } |
| case ir.ORETURN: |
| n := n.(*ir.ReturnStmt) |
| for _, x := range n.Results { |
| if i := tracking(x); i != nil { |
| i.okUses++ |
| // We go exclusive->nonexclusive here |
| addTransition(i, n) |
| } |
| } |
| if len(n.Results) == 0 { |
| // Uses of named result variables are implicit here. |
| for _, x := range namedRets { |
| if i := tracking(x); i != nil { |
| addTransition(i, n) |
| } |
| } |
| } |
| case ir.OCALLFUNC: |
| n := n.(*ir.CallExpr) |
| for idx, arg := range n.Args { |
| if i := tracking(arg); i != nil { |
| if !argLeak(n, idx) { |
| // Passing s to a nonescaping arg is ok. |
| i.okUses++ |
| i.capUsed = true |
| } |
| } |
| } |
| case ir.ORANGE: |
| // Range over slice is ok. |
| n := n.(*ir.RangeStmt) |
| if i := tracking(n.X); i != nil { |
| i.okUses++ |
| } |
| case ir.OAS: |
| n := n.(*ir.AssignStmt) |
| assign(n.X, n.Y, n) |
| case ir.OAS2: |
| n := n.(*ir.AssignListStmt) |
| for i := range len(n.Lhs) { |
| assign(n.Lhs[i], n.Rhs[i], n) |
| } |
| case ir.OCLOSURE: |
| n := n.(*ir.ClosureExpr) |
| for _, v := range n.Func.ClosureVars { |
| do(v.Outer) |
| } |
| } |
| if n.Op() == ir.OFOR || n.Op() == ir.ORANGE { |
| // Note: loopDepth isn't really right for init portion |
| // of the for statement, but that's ok. Correctness |
| // does not depend on depth info. |
| loopDepth++ |
| defer func() { loopDepth-- }() |
| } |
| // Check all the children. |
| ir.DoChildren(n, do) |
| return false |
| } |
| |
| // Run the analysis over the whole body. |
| for _, stmt := range fn.Body { |
| do(stmt) |
| } |
| |
| // Process accumulated info to find slice variables |
| // that we can allocate on the stack. |
| for _, s := range fn.Dcl { |
| if s.Opt == nil { |
| continue |
| } |
| i := s.Opt.(*sliceInfo) |
| s.Opt = nil |
| if i.okUses != i.allUses { |
| // Some use of i.s that don't understand lurks. Give up. |
| continue |
| } |
| |
| // At this point, we've decided that we *can* do |
| // the optimization. |
| |
| if i.transition == nil { |
| // Exclusive for its whole lifetime. That means it |
| // didn't escape. We can already handle nonescaping |
| // slices without this pass. |
| continue |
| } |
| if i.appendWeight < 2 { |
| // This optimization only really helps if there is |
| // (dynamically) more than one append. |
| continue |
| } |
| |
| // Commit point - at this point we've decided we *should* |
| // do the optimization. |
| |
| // Insert a move2heap operation before the exclusive->nonexclusive |
| // transition. |
| move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s) |
| if i.capUsed { |
| move.PreserveCapacity = true |
| } |
| move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0]) |
| move.SetType(i.s.Type()) |
| move.SetTypecheck(1) |
| as := ir.NewAssignStmt(i.transition.Pos(), i.s, move) |
| as.SetTypecheck(1) |
| i.transition.PtrInit().Prepend(as) |
| // Note: we prepend because we need to put the move2heap |
| // operation first, before any other init work, as the transition |
| // might occur in the init work. |
| |
| // Now that we've inserted a move2heap operation before every |
| // exclusive -> nonexclusive transition, appends can now use |
| // stack backing stores. |
| // (This is the whole point of this pass, to enable stack |
| // allocation of append backing stores.) |
| for _, a := range i.appends { |
| a.SetEsc(ir.EscNone) |
| if i.capUsed { |
| a.UseBuf = true |
| } |
| } |
| } |
| } |
| |
| // argLeak reports if the idx'th argument to the call n escapes anywhere |
| // (to the heap, another argument, return value, etc.) |
| // If unknown returns true. |
| func argLeak(n *ir.CallExpr, idx int) bool { |
| if n.Op() != ir.OCALLFUNC { |
| return true |
| } |
| fn := ir.StaticCalleeName(ir.StaticValue(n.Fun)) |
| if fn == nil { |
| return true |
| } |
| fntype := fn.Type() |
| if recv := fntype.Recv(); recv != nil { |
| if idx == 0 { |
| return escape.ParseLeaks(recv.Note).Any() |
| } |
| idx-- |
| } |
| return escape.ParseLeaks(fntype.Params()[idx].Note).Any() |
| } |