blob: 426d2985ab3f0305955ec1786927dd0c6a5d7133 [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pkginit
import (
"container/heap"
"fmt"
"strings"
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
)
// Package initialization
//
// Here we implement the algorithm for ordering package-level variable
// initialization. The spec is written in terms of variable
// initialization, but multiple variables initialized by a single
// assignment are handled together, so here we instead focus on
// ordering initialization assignments. Conveniently, this maps well
// to how we represent package-level initializations using the Node
// AST.
//
// Assignments are in one of three phases: NotStarted, Pending, or
// Done. For assignments in the Pending phase, we use Xoffset to
// record the number of unique variable dependencies whose
// initialization assignment is not yet Done. We also maintain a
// "blocking" map that maps assignments back to all of the assignments
// that depend on it.
//
// For example, for an initialization like:
//
// var x = f(a, b, b)
// var a, b = g()
//
// the "x = f(a, b, b)" assignment depends on two variables (a and b),
// so its Xoffset will be 2. Correspondingly, the "a, b = g()"
// assignment's "blocking" entry will have two entries back to x's
// assignment.
//
// Logically, initialization works by (1) taking all NotStarted
// assignments, calculating their dependencies, and marking them
// Pending; (2) adding all Pending assignments with Xoffset==0 to a
// "ready" priority queue (ordered by variable declaration position);
// and (3) iteratively processing the next Pending assignment from the
// queue, decreasing the Xoffset of assignments it's blocking, and
// adding them to the queue if decremented to 0.
//
// As an optimization, we actually apply each of these three steps for
// each assignment. This yields the same order, but keeps queue size
// down and thus also heap operation costs.
// Static initialization phase.
// These values are stored in two bits in Node.flags.
const (
InitNotStarted = iota
InitDone
InitPending
)
type InitOrder struct {
// blocking maps initialization assignments to the assignments
// that depend on it.
blocking map[ir.Node][]ir.Node
// ready is the queue of Pending initialization assignments
// that are ready for initialization.
ready declOrder
order map[ir.Node]int
}
// initOrder computes initialization order for a list l of
// package-level declarations (in declaration order) and outputs the
// corresponding list of statements to include in the init() function
// body.
func initOrder(l []ir.Node) []ir.Node {
var res ir.Nodes
o := InitOrder{
blocking: make(map[ir.Node][]ir.Node),
order: make(map[ir.Node]int),
}
// Process all package-level assignment in declaration order.
for _, n := range l {
switch n.Op() {
case ir.OAS, ir.OAS2DOTTYPE, ir.OAS2FUNC, ir.OAS2MAPR, ir.OAS2RECV:
o.processAssign(n)
o.flushReady(func(n ir.Node) { res.Append(n) })
case ir.ODCLCONST, ir.ODCLFUNC, ir.ODCLTYPE:
// nop
default:
base.Fatalf("unexpected package-level statement: %v", n)
}
}
// Check that all assignments are now Done; if not, there must
// have been a dependency cycle.
for _, n := range l {
switch n.Op() {
case ir.OAS, ir.OAS2DOTTYPE, ir.OAS2FUNC, ir.OAS2MAPR, ir.OAS2RECV:
if o.order[n] != orderDone {
// If there have already been errors
// printed, those errors may have
// confused us and there might not be
// a loop. Let the user fix those
// first.
base.ExitIfErrors()
o.findInitLoopAndExit(firstLHS(n), new([]*ir.Name), new(ir.NameSet))
base.Fatalf("initialization unfinished, but failed to identify loop")
}
}
}
// Invariant consistency check. If this is non-zero, then we
// should have found a cycle above.
if len(o.blocking) != 0 {
base.Fatalf("expected empty map: %v", o.blocking)
}
return res
}
func (o *InitOrder) processAssign(n ir.Node) {
if _, ok := o.order[n]; ok {
base.Fatalf("unexpected state: %v, %v", n, o.order[n])
}
o.order[n] = 0
// Compute number of variable dependencies and build the
// inverse dependency ("blocking") graph.
for dep := range collectDeps(n, true) {
defn := dep.Defn
// Skip dependencies on functions (PFUNC) and
// variables already initialized (InitDone).
if dep.Class != ir.PEXTERN || o.order[defn] == orderDone {
continue
}
o.order[n]++
o.blocking[defn] = append(o.blocking[defn], n)
}
if o.order[n] == 0 {
heap.Push(&o.ready, n)
}
}
const orderDone = -1000
// flushReady repeatedly applies initialize to the earliest (in
// declaration order) assignment ready for initialization and updates
// the inverse dependency ("blocking") graph.
func (o *InitOrder) flushReady(initialize func(ir.Node)) {
for o.ready.Len() != 0 {
n := heap.Pop(&o.ready).(ir.Node)
if order, ok := o.order[n]; !ok || order != 0 {
base.Fatalf("unexpected state: %v, %v, %v", n, ok, order)
}
initialize(n)
o.order[n] = orderDone
blocked := o.blocking[n]
delete(o.blocking, n)
for _, m := range blocked {
if o.order[m]--; o.order[m] == 0 {
heap.Push(&o.ready, m)
}
}
}
}
// findInitLoopAndExit searches for an initialization loop involving variable
// or function n. If one is found, it reports the loop as an error and exits.
//
// path points to a slice used for tracking the sequence of
// variables/functions visited. Using a pointer to a slice allows the
// slice capacity to grow and limit reallocations.
func (o *InitOrder) findInitLoopAndExit(n *ir.Name, path *[]*ir.Name, ok *ir.NameSet) {
for i, x := range *path {
if x == n {
reportInitLoopAndExit((*path)[i:])
return
}
}
// There might be multiple loops involving n; by sorting
// references, we deterministically pick the one reported.
refers := collectDeps(n.Defn, false).Sorted(func(ni, nj *ir.Name) bool {
return ni.Pos().Before(nj.Pos())
})
*path = append(*path, n)
for _, ref := range refers {
// Short-circuit variables that were initialized.
if ref.Class == ir.PEXTERN && o.order[ref.Defn] == orderDone || ok.Has(ref) {
continue
}
o.findInitLoopAndExit(ref, path, ok)
}
// n is not involved in a cycle.
// Record that fact to avoid checking it again when reached another way,
// or else this traversal will take exponential time traversing all paths
// through the part of the package's call graph implicated in the cycle.
ok.Add(n)
*path = (*path)[:len(*path)-1]
}
// reportInitLoopAndExit reports and initialization loop as an error
// and exits. However, if l is not actually an initialization loop, it
// simply returns instead.
func reportInitLoopAndExit(l []*ir.Name) {
// Rotate loop so that the earliest variable declaration is at
// the start.
i := -1
for j, n := range l {
if n.Class == ir.PEXTERN && (i == -1 || n.Pos().Before(l[i].Pos())) {
i = j
}
}
if i == -1 {
// False positive: loop only involves recursive
// functions. Return so that findInitLoop can continue
// searching.
return
}
l = append(l[i:], l[:i]...)
// TODO(mdempsky): Method values are printed as "T.m-fm"
// rather than "T.m". Figure out how to avoid that.
var msg strings.Builder
fmt.Fprintf(&msg, "initialization loop:\n")
for _, n := range l {
fmt.Fprintf(&msg, "\t%v: %v refers to\n", ir.Line(n), n)
}
fmt.Fprintf(&msg, "\t%v: %v", ir.Line(l[0]), l[0])
base.ErrorfAt(l[0].Pos(), msg.String())
base.ErrorExit()
}
// collectDeps returns all of the package-level functions and
// variables that declaration n depends on. If transitive is true,
// then it also includes the transitive dependencies of any depended
// upon functions (but not variables).
func collectDeps(n ir.Node, transitive bool) ir.NameSet {
d := initDeps{transitive: transitive}
switch n.Op() {
case ir.OAS:
n := n.(*ir.AssignStmt)
d.inspect(n.Y)
case ir.OAS2DOTTYPE, ir.OAS2FUNC, ir.OAS2MAPR, ir.OAS2RECV:
n := n.(*ir.AssignListStmt)
d.inspect(n.Rhs[0])
case ir.ODCLFUNC:
n := n.(*ir.Func)
d.inspectList(n.Body)
default:
base.Fatalf("unexpected Op: %v", n.Op())
}
return d.seen
}
type initDeps struct {
transitive bool
seen ir.NameSet
cvisit func(ir.Node)
}
func (d *initDeps) cachedVisit() func(ir.Node) {
if d.cvisit == nil {
d.cvisit = d.visit // cache closure
}
return d.cvisit
}
func (d *initDeps) inspect(n ir.Node) { ir.Visit(n, d.cachedVisit()) }
func (d *initDeps) inspectList(l ir.Nodes) { ir.VisitList(l, d.cachedVisit()) }
// visit calls foundDep on any package-level functions or variables
// referenced by n, if any.
func (d *initDeps) visit(n ir.Node) {
switch n.Op() {
case ir.ONAME:
n := n.(*ir.Name)
switch n.Class {
case ir.PEXTERN, ir.PFUNC:
d.foundDep(n)
}
case ir.OCLOSURE:
n := n.(*ir.ClosureExpr)
d.inspectList(n.Func.Body)
case ir.ODOTMETH, ir.OMETHVALUE, ir.OMETHEXPR:
d.foundDep(ir.MethodExprName(n))
}
}
// foundDep records that we've found a dependency on n by adding it to
// seen.
func (d *initDeps) foundDep(n *ir.Name) {
// Can happen with method expressions involving interface
// types; e.g., fixedbugs/issue4495.go.
if n == nil {
return
}
// Names without definitions aren't interesting as far as
// initialization ordering goes.
if n.Defn == nil {
return
}
// Treat coverage counter variables effectively as invisible with
// respect to init order. If we don't do this, then the
// instrumentation vars can perturb the order of initialization
// away from the order of the original uninstrumented program.
// See issue #56293 for more details.
if n.CoverageCounter() || n.CoverageAuxVar() {
return
}
if d.seen.Has(n) {
return
}
d.seen.Add(n)
if d.transitive && n.Class == ir.PFUNC {
d.inspectList(n.Defn.(*ir.Func).Body)
}
}
// declOrder implements heap.Interface, ordering assignment statements
// by the position of their first LHS expression.
//
// N.B., the Pos of the first LHS expression is used because because
// an OAS node's Pos may not be unique. For example, given the
// declaration "var a, b = f(), g()", "a" must be ordered before "b",
// but both OAS nodes use the "=" token's position as their Pos.
type declOrder []ir.Node
func (s declOrder) Len() int { return len(s) }
func (s declOrder) Less(i, j int) bool {
return firstLHS(s[i]).Pos().Before(firstLHS(s[j]).Pos())
}
func (s declOrder) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s *declOrder) Push(x interface{}) { *s = append(*s, x.(ir.Node)) }
func (s *declOrder) Pop() interface{} {
n := (*s)[len(*s)-1]
*s = (*s)[:len(*s)-1]
return n
}
// firstLHS returns the first expression on the left-hand side of
// assignment n.
func firstLHS(n ir.Node) *ir.Name {
switch n.Op() {
case ir.OAS:
n := n.(*ir.AssignStmt)
return n.X.Name()
case ir.OAS2DOTTYPE, ir.OAS2FUNC, ir.OAS2RECV, ir.OAS2MAPR:
n := n.(*ir.AssignListStmt)
return n.Lhs[0].Name()
}
base.Fatalf("unexpected Op: %v", n.Op())
return nil
}