blob: cbe49aa65508380f8043fa2cffb4cd78c1b7d79c [file] [log] [blame] [edit]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package liveness
import (
"cmd/compile/internal/base"
"cmd/compile/internal/bitvec"
"cmd/compile/internal/ir"
"cmd/compile/internal/ssa"
"cmd/internal/src"
"fmt"
"os"
"path/filepath"
"slices"
"sort"
"strings"
)
// MergeLocalsState encapsulates information about which AUTO
// (stack-allocated) variables within a function can be safely
// merged/overlapped, e.g. share a stack slot with some other auto).
// An instance of MergeLocalsState is produced by MergeLocals() below
// and then consumed in ssagen.AllocFrame. The map 'partition'
// contains entries of the form <N,SL> where N is an *ir.Name and SL
// is a slice holding the indices (within 'vars') of other variables
// that share the same slot, specifically the slot of the first
// element in the partition, which we'll call the "leader". For
// example, if a function contains five variables where v1/v2/v3 are
// safe to overlap and v4/v5 are safe to overlap, the MergeLocalsState
// content might look like
//
// vars: [v1, v2, v3, v4, v5]
// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
// v4 -> [3, 4], v5 -> [3, 4]
//
// A nil MergeLocalsState indicates that no local variables meet the
// necessary criteria for overlap.
type MergeLocalsState struct {
// contains auto vars that participate in overlapping
vars []*ir.Name
// maps auto variable to overlap partition
partition map[*ir.Name][]int
}
// candRegion is a sub-range (start, end) corresponding to an interval
// [st,en] within the list of candidate variables.
type candRegion struct {
st, en int
}
// cstate holds state information we'll need during the analysis
// phase of stack slot merging but can be discarded when the analysis
// is done.
type cstate struct {
fn *ir.Func
f *ssa.Func
lv *liveness
cands []*ir.Name
nameToSlot map[*ir.Name]int32
regions []candRegion
indirectUE map[ssa.ID][]*ir.Name
ivs []Intervals
hashDeselected map[*ir.Name]bool
trace int // debug trace level
}
// MergeLocals analyzes the specified ssa function f to determine which
// of its auto variables can safely share the same stack slot, returning
// a state object that describes how the overlap should be done.
func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
// Create a container object for useful state info and then
// call collectMergeCandidates to see if there are vars suitable
// for stack slot merging.
cs := &cstate{
fn: fn,
f: f,
trace: base.Debug.MergeLocalsTrace,
}
cs.collectMergeCandidates()
if len(cs.regions) == 0 {
return nil
}
// Kick off liveness analysis.
//
// If we have a local variable such as "r2" below that's written
// but then not read, something like:
//
// vardef r1
// r1.x = ...
// vardef r2
// r2.x = 0
// r2.y = ...
// <call foo>
// // no subsequent use of r2
// ... = r1.x
//
// then for the purpose of calculating stack maps at the call, we
// can ignore "r2" completely during liveness analysis for stack
// maps, however for stack slock merging we most definitely want
// to treat the writes as "uses".
cs.lv = newliveness(fn, f, cs.cands, cs.nameToSlot, 0)
cs.lv.conservativeWrites = true
cs.lv.prologue()
cs.lv.solve()
// Compute intervals for each candidate based on the liveness and
// on block effects.
cs.computeIntervals()
// Perform merging within each region of the candidates list.
rv := cs.performMerging()
if err := rv.check(); err != nil {
base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
}
return rv
}
// Subsumed returns whether variable n is subsumed, e.g. appears
// in an overlap position but is not the leader in that partition.
func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
return true
}
return false
}
// IsLeader returns whether a variable n is the leader (first element)
// in a sharing partition.
func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
return true
}
return false
}
// Leader returns the leader variable for subsumed var n.
func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
if sl, ok := mls.partition[n]; ok {
if mls.vars[sl[0]] == n {
panic("variable is not subsumed")
}
return mls.vars[sl[0]]
}
panic("not a merge candidate")
}
// Followers writes a list of the followers for leader n into the slice tmp.
func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
tmp = tmp[:0]
sl, ok := mls.partition[n]
if !ok {
panic("no entry for leader")
}
if mls.vars[sl[0]] != n {
panic("followers invoked on subsumed var")
}
for _, k := range sl[1:] {
tmp = append(tmp, mls.vars[k])
}
slices.SortStableFunc(tmp, func(a, b *ir.Name) int {
return strings.Compare(a.Sym().Name, b.Sym().Name)
})
return tmp
}
// EstSavings returns the estimated reduction in stack size (number of bytes) for
// the given merge locals state via a pair of ints, the first for non-pointer types and the second for pointer types.
func (mls *MergeLocalsState) EstSavings() (int, int) {
totnp := 0
totp := 0
for n := range mls.partition {
if mls.Subsumed(n) {
sz := int(n.Type().Size())
if n.Type().HasPointers() {
totp += sz
} else {
totnp += sz
}
}
}
return totnp, totp
}
// check tests for various inconsistencies and problems in mls,
// returning an error if any problems are found.
func (mls *MergeLocalsState) check() error {
if mls == nil {
return nil
}
used := make(map[int]bool)
seenv := make(map[*ir.Name]int)
for ii, v := range mls.vars {
if prev, ok := seenv[v]; ok {
return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
v.Sym().Name, ii, prev)
}
seenv[v] = ii
}
for k, sl := range mls.partition {
// length of slice value needs to be more than 1
if len(sl) < 2 {
return fmt.Errorf("k=%q v=%+v slice len %d invalid",
k.Sym().Name, sl, len(sl))
}
// values in the slice need to be var indices
for i, v := range sl {
if v < 0 || v > len(mls.vars)-1 {
return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
}
}
}
for k, sl := range mls.partition {
foundk := false
for i, v := range sl {
vv := mls.vars[v]
if i == 0 {
if !mls.IsLeader(vv) {
return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
}
} else {
if !mls.Subsumed(vv) {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
}
if mls.Leader(vv) != mls.vars[sl[0]] {
return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
}
}
if vv == k {
foundk = true
if used[v] {
return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
}
used[v] = true
}
}
if !foundk {
return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
}
vl := mls.vars[sl[0]]
for _, v := range sl[1:] {
vv := mls.vars[v]
if vv.Type().Size() > vl.Type().Size() {
return fmt.Errorf("k=%s v=+%v follower %s size %d larger than leader %s size %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Size(), vl.Sym().Name, vl.Type().Size())
}
if vv.Type().HasPointers() && !vl.Type().HasPointers() {
return fmt.Errorf("k=%s v=+%v follower %s hasptr=true but leader %s hasptr=false", k.Sym().Name, sl, vv.Sym().Name, vl.Sym().Name)
}
if vv.Type().Alignment() > vl.Type().Alignment() {
return fmt.Errorf("k=%s v=+%v follower %s align %d greater than leader %s align %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Alignment(), vl.Sym().Name, vl.Type().Alignment())
}
}
}
for i := range used {
if !used[i] {
return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
}
}
return nil
}
func (mls *MergeLocalsState) String() string {
var leaders []*ir.Name
for n, sl := range mls.partition {
if n == mls.vars[sl[0]] {
leaders = append(leaders, n)
}
}
slices.SortFunc(leaders, func(a, b *ir.Name) int {
return strings.Compare(a.Sym().Name, b.Sym().Name)
})
var sb strings.Builder
for _, n := range leaders {
sb.WriteString(n.Sym().Name + ":")
sl := mls.partition[n]
for _, k := range sl[1:] {
n := mls.vars[k]
sb.WriteString(" " + n.Sym().Name)
}
sb.WriteString("\n")
}
return sb.String()
}
// collectMergeCandidates visits all of the AUTO vars declared in
// function fn and identifies a list of candidate variables for
// merging / overlapping. On return the "cands" field of cs will be
// filled in with our set of potentially overlappable candidate
// variables, the "regions" field will hold regions/sequence of
// compatible vars within the candidates list, "nameToSlot" field will
// be populated, and the "indirectUE" field will be filled in with
// information about indirect upwards-exposed uses in the func.
func (cs *cstate) collectMergeCandidates() {
var cands []*ir.Name
// Collect up the available set of appropriate AUTOs in the
// function as a first step, and bail if we have fewer than
// two candidates.
for _, n := range cs.fn.Dcl {
if !n.Used() {
continue
}
if !ssa.IsMergeCandidate(n) {
continue
}
cands = append(cands, n)
}
if len(cands) < 2 {
return
}
// Sort by pointerness, size, and then name.
sort.SliceStable(cands, func(i, j int) bool {
return nameLess(cands[i], cands[j])
})
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", cs.fn)
for i := range cands {
dumpCand(cands[i], i)
}
}
// Now generate an initial pruned candidate list and regions list.
// This may be empty if we don't have enough compatible candidates.
initial, _ := cs.genRegions(cands)
if len(initial) < 2 {
return
}
// Set up for hash bisection if enabled.
cs.setupHashBisection(initial)
// Create and populate an indirect use table that we'll use
// during interval construction. As part of this process we may
// wind up tossing out additional candidates, so check to make
// sure we still have something to work with.
cs.cands, cs.regions = cs.populateIndirectUseTable(initial)
if len(cs.cands) < 2 {
return
}
// At this point we have a final pruned set of candidates and a
// corresponding set of regions for the candidates. Build a
// name-to-slot map for the candidates.
cs.nameToSlot = make(map[*ir.Name]int32)
for i, n := range cs.cands {
cs.nameToSlot[n] = int32(i)
}
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= pruned candidate list for fn %v:\n", cs.fn)
for i := range cs.cands {
dumpCand(cs.cands[i], i)
}
}
}
// genRegions generates a set of regions within cands corresponding
// to potentially overlappable/mergeable variables.
func (cs *cstate) genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
var pruned []*ir.Name
var regions []candRegion
st := 0
for {
en := nextRegion(cands, st)
if en == -1 {
break
}
if st == en {
// region has just one element, we can skip it
st++
continue
}
pst := len(pruned)
pen := pst + (en - st)
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= addregion st=%d en=%d: add part %d -> %d\n", st, en, pst, pen)
}
// non-empty region, add to pruned
pruned = append(pruned, cands[st:en+1]...)
regions = append(regions, candRegion{st: pst, en: pen})
st = en + 1
}
if len(pruned) < 2 {
return nil, nil
}
return pruned, regions
}
func (cs *cstate) dumpFunc() {
fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
ii := 0
for k, b := range cs.f.Blocks {
fmt.Fprintf(os.Stderr, "b%d:\n", k)
for _, v := range b.Values {
pos := base.Ctxt.PosTable.Pos(v.Pos)
fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
ii++
}
}
}
func (cs *cstate) dumpFuncIfSelected() {
if base.Debug.MergeLocalsDumpFunc == "" {
return
}
if !strings.HasSuffix(fmt.Sprintf("%v", cs.fn),
base.Debug.MergeLocalsDumpFunc) {
return
}
cs.dumpFunc()
}
// setupHashBisection checks to see if any of the candidate
// variables have been de-selected by our hash debug. Here
// we also implement the -d=mergelocalshtrace flag, which turns
// on debug tracing only if we have at least two candidates
// selected by the hash debug for this function.
func (cs *cstate) setupHashBisection(cands []*ir.Name) {
if base.Debug.MergeLocalsHash == "" {
return
}
deselected := make(map[*ir.Name]bool)
selCount := 0
for _, cand := range cands {
if !base.MergeLocalsHash.MatchPosWithInfo(cand.Pos(), "mergelocals", nil) {
deselected[cand] = true
} else {
deselected[cand] = false
selCount++
}
}
if selCount < len(cands) {
cs.hashDeselected = deselected
}
if base.Debug.MergeLocalsHTrace != 0 && selCount >= 2 {
cs.trace = base.Debug.MergeLocalsHTrace
}
}
// populateIndirectUseTable creates and populates the "indirectUE" table
// within cs by doing some additional analysis of how the vars in
// cands are accessed in the function.
//
// It is possible to have situations where a given ir.Name is
// non-address-taken at the source level, but whose address is
// materialized in order to accommodate the needs of
// architecture-dependent operations or one sort or another (examples
// include things like LoweredZero/DuffZero, etc). The issue here is
// that the SymAddr op will show up as touching a variable of
// interest, but the subsequent memory op will not. This is generally
// not an issue for computing whether something is live across a call,
// but it is problematic for collecting the more fine-grained live
// interval info that drives stack slot merging.
//
// To handle this problem, make a forward pass over each basic block
// looking for instructions of the form vK := SymAddr(N) where N is a
// raw candidate. Create an entry in a map at that point from vK to
// its use count. Continue the walk, looking for uses of vK: when we
// see one, record it in a side table as an upwards exposed use of N.
// Each time we see a use, decrement the use count in the map, and if
// we hit zero, remove the map entry. If we hit the end of the basic
// block and we still have map entries, then evict the name in
// question from the candidate set.
func (cs *cstate) populateIndirectUseTable(cands []*ir.Name) ([]*ir.Name, []candRegion) {
// main indirect UE table, this is what we're producing in this func
indirectUE := make(map[ssa.ID][]*ir.Name)
// this map holds the current set of candidates; the set may
// shrink if we have to evict any candidates.
rawcands := make(map[*ir.Name]struct{})
// maps ssa value V to the ir.Name it is taking the addr of,
// plus a count of the uses we've seen of V during a block walk.
pendingUses := make(map[ssa.ID]nameCount)
// A temporary indirect UE tab just for the current block
// being processed; used to help with evictions.
blockIndirectUE := make(map[ssa.ID][]*ir.Name)
// temporary map used to record evictions in a given block.
evicted := make(map[*ir.Name]bool)
for _, n := range cands {
rawcands[n] = struct{}{}
}
for k := 0; k < len(cs.f.Blocks); k++ {
clear(pendingUses)
clear(blockIndirectUE)
b := cs.f.Blocks[k]
for _, v := range b.Values {
if n, e := affectedVar(v); n != nil {
if _, ok := rawcands[n]; ok {
if e&ssa.SymAddr != 0 && v.Uses != 0 {
// we're taking the address of candidate var n
if _, ok := pendingUses[v.ID]; ok {
// should never happen
base.FatalfAt(v.Pos, "internal error: apparent multiple defs for SSA value %d", v.ID)
}
// Stash an entry in pendingUses recording
// that we took the address of "n" via this
// val.
pendingUses[v.ID] = nameCount{n: n, count: v.Uses}
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= SymAddr(%s) on %s\n",
n.Sym().Name, v.LongString())
}
}
}
}
for _, arg := range v.Args {
if nc, ok := pendingUses[arg.ID]; ok {
// We found a use of some value that took the
// address of nc.n. Record this inst as a
// potential indirect use.
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= add indirectUE(%s) count=%d on %s\n", nc.n.Sym().Name, nc.count, v.LongString())
}
blockIndirectUE[v.ID] = append(blockIndirectUE[v.ID], nc.n)
nc.count--
if nc.count == 0 {
// That was the last use of the value. Clean
// up the entry in pendingUses.
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= last use of v%d\n",
arg.ID)
}
delete(pendingUses, arg.ID)
} else {
// Not the last use; record the decremented
// use count and move on.
pendingUses[arg.ID] = nc
}
}
}
}
// We've reached the end of this basic block: if we have any
// leftover entries in pendingUses, then evict the
// corresponding names from the candidate set. The idea here
// is that if we materialized the address of some local and
// that value is flowing out of the block off somewhere else,
// we're going to treat that local as truly address-taken and
// not have it be a merge candidate.
clear(evicted)
if len(pendingUses) != 0 {
for id, nc := range pendingUses {
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= evicting %q due to pendingUse %d count %d\n", nc.n.Sym().Name, id, nc.count)
}
delete(rawcands, nc.n)
evicted[nc.n] = true
}
}
// Copy entries from blockIndirectUE into final indirectUE. Skip
// anything that we evicted in the loop above.
for id, sl := range blockIndirectUE {
for _, n := range sl {
if evicted[n] {
continue
}
indirectUE[id] = append(indirectUE[id], n)
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= add final indUE v%d name %s\n", id, n.Sym().Name)
}
}
}
}
if len(rawcands) < 2 {
return nil, nil
}
cs.indirectUE = indirectUE
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= iuetab:\n")
ids := make([]ssa.ID, 0, len(indirectUE))
for k := range indirectUE {
ids = append(ids, k)
}
slices.Sort(ids)
for _, id := range ids {
fmt.Fprintf(os.Stderr, " v%d:", id)
for _, n := range indirectUE[id] {
fmt.Fprintf(os.Stderr, " %s", n.Sym().Name)
}
fmt.Fprintf(os.Stderr, "\n")
}
}
pruned := cands[:0]
for k := range rawcands {
pruned = append(pruned, k)
}
sort.Slice(pruned, func(i, j int) bool {
return nameLess(pruned[i], pruned[j])
})
var regions []candRegion
pruned, regions = cs.genRegions(pruned)
if len(pruned) < 2 {
return nil, nil
}
return pruned, regions
}
type nameCount struct {
n *ir.Name
count int32
}
// nameLess compares ci with cj to see if ci should be less than cj in
// a relative ordering of candidate variables. This is used to sort
// vars by pointerness (variables with pointers first), then in order
// of decreasing alignment, then by decreasing size. We are assuming a
// merging algorithm that merges later entries in the list into
// earlier entries. An example ordered candidate list produced by
// nameLess:
//
// idx name type align size
// 0: abc [10]*int 8 80
// 1: xyz [9]*int 8 72
// 2: qrs [2]*int 8 16
// 3: tuv [9]int 8 72
// 4: wxy [9]int32 4 36
// 5: jkl [8]int32 4 32
func nameLess(ci, cj *ir.Name) bool {
if ci.Type().HasPointers() != cj.Type().HasPointers() {
return ci.Type().HasPointers()
}
if ci.Type().Alignment() != cj.Type().Alignment() {
return cj.Type().Alignment() < ci.Type().Alignment()
}
if ci.Type().Size() != cj.Type().Size() {
return cj.Type().Size() < ci.Type().Size()
}
if ci.Sym().Name != cj.Sym().Name {
return ci.Sym().Name < cj.Sym().Name
}
return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", cj.Pos())
}
// nextRegion starts at location idx and walks forward in the cands
// slice looking for variables that are "compatible" (potentially
// overlappable, in the sense that they could potentially share the
// stack slot of cands[idx]); it returns the end of the new region
// (range of compatible variables starting at idx).
func nextRegion(cands []*ir.Name, idx int) int {
n := len(cands)
if idx >= n {
return -1
}
c0 := cands[idx]
szprev := c0.Type().Size()
alnprev := c0.Type().Alignment()
for j := idx + 1; j < n; j++ {
cj := cands[j]
szj := cj.Type().Size()
if szj > szprev {
return j - 1
}
alnj := cj.Type().Alignment()
if alnj > alnprev {
return j - 1
}
szprev = szj
alnprev = alnj
}
return n - 1
}
// mergeVisitRegion tries to perform overlapping of variables with a
// given subrange of cands described by st and en (indices into our
// candidate var list), where the variables within this range have
// already been determined to be compatible with respect to type,
// size, etc. Overlapping is done in a greedy fashion: we select the
// first element in the st->en range, then walk the rest of the
// elements adding in vars whose lifetimes don't overlap with the
// first element, then repeat the process until we run out of work.
// Ordering of the candidates within the region [st,en] is important;
// within the list the assumption is that if we overlap two variables
// X and Y where X precedes Y in the list, we need to make X the
// "leader" (keep X's slot and set Y's frame offset to X's) as opposed
// to the other way around, since it's possible that Y is smaller in
// size than X.
func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) {
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
}
n := en - st + 1
used := bitvec.New(int32(n))
nxt := func(slot int) int {
for c := slot - st; c < n; c++ {
if used.Get(int32(c)) {
continue
}
return c + st
}
return -1
}
navail := n
cands := cs.cands
ivs := cs.ivs
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail)
}
for navail >= 2 {
leader := nxt(st)
used.Set(int32(leader - st))
navail--
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader,
used.String())
}
elems := []int{leader}
lints := ivs[leader]
for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
// Skip if de-selected by merge locals hash.
if cs.hashDeselected != nil && cs.hashDeselected[cands[succ]] {
continue
}
// Skip if already used.
if used.Get(int32(succ - st)) {
continue
}
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
}
// Can we overlap leader with this var?
if lints.Overlaps(ivs[succ]) {
continue
} else {
// Add to overlap set.
elems = append(elems, succ)
lints = lints.Merge(ivs[succ])
}
}
if len(elems) > 1 {
// We found some things to overlap with leader. Add the
// candidate elements to "vars" and update "partition".
off := len(mls.vars)
sl := make([]int, len(elems))
for i, candslot := range elems {
sl[i] = off + i
mls.vars = append(mls.vars, cands[candslot])
mls.partition[cands[candslot]] = sl
}
navail -= (len(elems) - 1)
for i := range elems {
used.Set(int32(elems[i] - st))
}
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
for i := range sl {
dumpCand(mls.vars[sl[i]], sl[i])
}
for i, v := range elems {
fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
}
}
}
}
}
// performMerging carries out variable merging within each of the
// candidate ranges in regions, returning a state object
// that describes the variable overlaps.
func (cs *cstate) performMerging() *MergeLocalsState {
cands := cs.cands
mls := &MergeLocalsState{
partition: make(map[*ir.Name][]int),
}
// Dump state before attempting overlap.
if cs.trace > 1 {
fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
for i := range cands {
c := cands[i]
fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
i, c.Sym().Name, c.Type().Size(), cs.ivs[i].String())
}
fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(cs.regions))
for _, cr := range cs.regions {
fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
}
fmt.Fprintf(os.Stderr, "\n")
}
// Apply a greedy merge/overlap strategy within each region
// of compatible variables.
for _, cr := range cs.regions {
cs.mergeVisitRegion(mls, cr.st, cr.en)
}
if len(mls.vars) == 0 {
return nil
}
return mls
}
// computeIntervals performs a backwards sweep over the instructions
// of the function we're compiling, building up an Intervals object
// for each candidate variable by looking for upwards exposed uses
// and kills.
func (cs *cstate) computeIntervals() {
lv := cs.lv
ibuilders := make([]IntervalsBuilder, len(cs.cands))
nvars := int32(len(lv.vars))
liveout := bitvec.New(nvars)
cs.dumpFuncIfSelected()
// Count instructions.
ninstr := 0
for _, b := range lv.f.Blocks {
ninstr += len(b.Values)
}
// current instruction index during backwards walk
iidx := ninstr - 1
// Make a backwards pass over all blocks
for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
b := lv.f.Blocks[k]
be := lv.blockEffects(b)
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
for j := range lv.vars {
if be.liveout.Get(int32(j)) {
fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
}
}
fmt.Fprintf(os.Stderr, "\n")
}
// Take into account effects taking place at end of this basic
// block by comparing our current live set with liveout for
// the block. If a given var was not live before and is now
// becoming live we need to mark this transition with a
// builder "Live" call; similarly if a var was live before and
// is now no longer live, we need a "Kill" call.
for j := range lv.vars {
isLive := liveout.Get(int32(j))
blockLiveOut := be.liveout.Get(int32(j))
if isLive {
if !blockLiveOut {
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
}
ibuilders[j].Kill(iidx)
}
} else if blockLiveOut {
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
iidx, lv.vars[j])
}
ibuilders[j].Live(iidx)
}
}
// Set our working "currently live" set to the previously
// computed live out set for the block.
liveout.Copy(be.liveout)
// Now walk backwards through this block.
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
}
// Update liveness based on what we see happening in this
// instruction.
pos, e := lv.valueEffects(v)
becomeslive := e&uevar != 0
iskilled := e&varkill != 0
if becomeslive && iskilled {
// we do not ever expect to see both a kill and an
// upwards exposed use given our size constraints.
panic("should never happen")
}
if iskilled && liveout.Get(pos) {
ibuilders[pos].Kill(iidx)
liveout.Unset(pos)
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
iidx, lv.vars[pos])
}
} else if becomeslive && !liveout.Get(pos) {
ibuilders[pos].Live(iidx)
liveout.Set(pos)
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
iidx, lv.vars[pos])
}
}
if cs.indirectUE != nil {
// Now handle "indirect" upwards-exposed uses.
ues := cs.indirectUE[v.ID]
for _, n := range ues {
if pos, ok := lv.idx[n]; ok {
if !liveout.Get(pos) {
ibuilders[pos].Live(iidx)
liveout.Set(pos)
if cs.trace > 2 {
fmt.Fprintf(os.Stderr, "=+= at instr %d v%d indirect upwards-exposed use of %v\n", iidx, v.ID, lv.vars[pos])
}
}
}
}
}
iidx--
}
// This check disabled for now due to the way scheduling works
// for ops that materialize values of local variables. For
// many architecture we have rewrite rules of this form:
//
// (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem))
// (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVDaddr {sym} base)
//
// which are designed to ensure that if you have a pointerful
// variable "abc" sequence
//
// v30 = VarDef <mem> {abc} v21
// v31 = LocalAddr <*SB> {abc} v2 v30
// v32 = Zero <mem> {SB} [2056] v31 v30
//
// this will be lowered into
//
// v30 = VarDef <mem> {sb} v21
// v106 = SPanchored <uintptr> v2 v30
// v31 = MOVDaddr <*SB> {sb} v106
// v3 = DUFFZERO <mem> [2056] v31 v30
//
// Note the SPanchored: this ensures that the scheduler won't
// move the MOVDaddr earlier than the vardef. With a variable
// "xyz" that has no pointers, however, if we start with
//
// v66 = VarDef <mem> {t2} v65
// v67 = LocalAddr <*T> {t2} v2 v66
// v68 = Zero <mem> {T} [2056] v67 v66
//
// we might lower to
//
// v66 = VarDef <mem> {t2} v65
// v29 = MOVDaddr <*T> {t2} [2032] v2
// v43 = LoweredZero <mem> v67 v29 v66
// v68 = Zero [2056] v2 v43
//
// where that MOVDaddr can float around arbitrarily, meaning
// that we may see an upwards-exposed use to it before the
// VarDef.
//
// One avenue to restoring the check below would be to change
// the rewrite rules to something like
//
// (LocalAddr <t> {sym} base mem) && (t.Elem().HasPointers() || isMergeCandidate(t) => (MOVDaddr {sym} (SPanchored base mem))
//
// however that change will have to be carefully evaluated,
// since it would constrain the scheduler for _all_ LocalAddr
// ops for potential merge candidates, even if we don't
// actually succeed in any overlaps. This will be revisitged in
// a later CL if possible.
//
const checkLiveOnEntry = false
if checkLiveOnEntry && b == lv.f.Entry {
for j, v := range lv.vars {
if liveout.Get(int32(j)) {
lv.f.Fatalf("%v %L recorded as live on entry",
lv.fn.Nname, v)
}
}
}
}
if iidx != -1 {
panic("iidx underflow")
}
// Finish intervals construction.
ivs := make([]Intervals, len(cs.cands))
for i := range cs.cands {
var err error
ivs[i], err = ibuilders[i].Finish()
if err != nil {
cs.dumpFunc()
base.FatalfAt(cs.cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cs.cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
}
}
cs.ivs = ivs
}
func fmtFullPos(p src.XPos) string {
var sb strings.Builder
sep := ""
base.Ctxt.AllPos(p, func(pos src.Pos) {
sb.WriteString(sep)
sep = "|"
file := filepath.Base(pos.Filename())
fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
})
return sb.String()
}
func dumpCand(c *ir.Name, i int) {
fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v align=%d t=%v\n",
i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
c.Type().HasPointers(), c.Type().Alignment(), c.Type())
}
// for unit testing only.
func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
mls := &MergeLocalsState{partition: partition, vars: vars}
if err := mls.check(); err != nil {
return nil, err
}
return mls, nil
}