blob: bbd9aeee51249aa813463020f556c1b74310426f [file] [log] [blame]
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/src"
"fmt"
"sort"
)
// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
// split between registers and memory).
//
// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
func expandCalls(f *Func) {
if !LateCallExpansionEnabledWithin(f) {
return
}
canSSAType := f.fe.CanSSA
regSize := f.Config.RegSize
sp, _ := f.spSb()
debug := f.pass.debug > 0
// For 32-bit, need to deal with decomposition of 64-bit integers
tUint32 := types.Types[types.TUINT32]
tInt32 := types.Types[types.TINT32]
var hiOffset, lowOffset int64
if f.Config.BigEndian {
lowOffset = 4
} else {
hiOffset = 4
}
// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
// that has no 64-bit integer registers.
intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
tHi = tUint32
if et == types.TINT64 {
tHi = tInt32
}
tLo = tUint32
return
}
// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
// that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
// Other aggregate types are expanded in decomposeBuiltin, which comes later.
isAlreadyExpandedAggregateType := func(t *types.Type) bool {
if !canSSAType(t) {
return false
}
return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
}
// removeTrivialWrapperTypes unwraps layers of
// struct { singleField SomeType } and [1]SomeType
// until a non-wrapper type is reached. This is useful
// for working with assignments to/from interface data
// fields (either second operand to OpIMake or OpIData)
// where the wrapping or type conversion can be elided
// because of type conversions/assertions in source code
// that do not appear in SSA.
removeTrivialWrapperTypes := func(t *types.Type) *types.Type {
for {
if t.IsStruct() && t.NumFields() == 1 {
t = t.Field(0).Type
continue
}
if t.IsArray() && t.NumElem() == 1 {
t = t.Elem()
continue
}
break
}
return t
}
// Calls that need lowering have some number of inputs, including a memory input,
// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
// With the current ABI those inputs need to be converted into stores to memory,
// rethreading the call's memory input to the first, and the new call now receiving the last.
// With the current ABI, the outputs need to be converted to loads, which will all use the call's
// memory output as their input.
// rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
// a chain of Struct/Array Select operations. If the chain of selectors does not
// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
// It emits the code necessary to implement the leaf select operation that leads to the call.
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
var rewriteSelect func(leaf *Value, selector *Value, offset int64)
rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
switch selector.Op {
case OpSelectN:
// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
call := selector.Args[0]
aux := call.Aux.(*AuxCall)
which := selector.AuxInt
if which == aux.NResults() { // mem is after the results.
// rewrite v as a Copy of call -- the replacement call will produce a mem.
leaf.copyOf(call)
} else {
leafType := removeTrivialWrapperTypes(leaf.Type)
pt := types.NewPtr(leafType)
if canSSAType(leafType) {
off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
if leaf.Block == call.Block {
leaf.reset(OpLoad)
leaf.SetArgs2(off, call)
leaf.Type = leafType
} else {
w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
leaf.copyOf(w)
}
} else {
panic("Should not have non-SSA-able OpSelectN")
}
}
case OpStructSelect:
w := selector.Args[0]
if w.Type.Etype != types.TSTRUCT {
fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
}
rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
case OpInt64Hi:
w := selector.Args[0]
rewriteSelect(leaf, w, offset+hiOffset)
case OpInt64Lo:
w := selector.Args[0]
rewriteSelect(leaf, w, offset+lowOffset)
case OpArraySelect:
w := selector.Args[0]
rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
default:
// Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
}
}
// storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
// smaller types into individual parameter slots.
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
switch a.Op {
case OpArrayMake0, OpStructMake0:
return mem
case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
}
return mem
case OpArrayMake1:
return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
case OpInt64Make:
tHi, tLo := intPairTypes(t.Etype)
mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
}
dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
if debug {
fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
}
return x
}
// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
// TODO should also optimize offsets from SB?
offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
pt := types.NewPtr(t)
if offset == 0 && dst.Type == pt { // this is not actually likely
return dst
}
if dst.Op != OpOffPtr {
return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
}
// Simplify OpOffPtr
from := dst.Args[0]
offset += dst.AuxInt
if from == sp {
return f.ConstOffPtrSP(pt, offset, sp)
}
return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
}
// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
// TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
pos := v.Pos.WithNotStmt()
switch t.Etype {
case types.TINT64, types.TUINT64:
if t.Width == regSize {
break
}
tHi, tLo := intPairTypes(t.Etype)
sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
firstStorePos = firstStorePos.WithNotStmt()
sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
case types.TARRAY:
elt := t.Elem()
if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
t = removeTrivialWrapperTypes(t)
if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
}
break // handle the leaf type.
}
for i := int64(0); i < t.NumElem(); i++ {
sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
firstStorePos = firstStorePos.WithNotStmt()
}
return mem
case types.TSTRUCT:
if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
// This peculiar test deals with accesses to immediate interface data.
// It works okay because everything is the same size.
// Example code that triggers this can be found in go/constant/value.go, function ToComplex
// v119 (+881) = IData <intVal> v6
// v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
// Guard against "struct{struct{*foo}}"
t = removeTrivialWrapperTypes(t)
if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
}
break // handle the leaf type.
}
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
firstStorePos = firstStorePos.WithNotStmt()
}
return mem
}
// Default, including for aggregates whose single element exactly fills their container
// TODO this will be a problem for cast interfaces containing floats when we move to registers.
x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
if debug {
fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
}
return x
}
// rewriteArgs removes all the Args from a call and converts the call args into appropriate
// stores (or later, register movement). Extra args for interface and closure calls are ignored,
// but removed.
rewriteArgs := func(v *Value, firstArg int) *Value {
// Thread the stores on the memory arg
aux := v.Aux.(*AuxCall)
pos := v.Pos.WithNotStmt()
m0 := v.Args[len(v.Args)-1]
mem := m0
for i, a := range v.Args {
if i < firstArg {
continue
}
if a == m0 { // mem is last.
break
}
auxI := int64(i - firstArg)
if a.Op == OpDereference {
if a.MemoryArg() != m0 {
f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
}
// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
// TODO this will be more complicated with registers in the picture.
src := a.Args[0]
dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
if a.Uses == 1 && a.Block == v.Block {
a.reset(OpMove)
a.Pos = pos
a.Type = types.TypeMem
a.Aux = aux.TypeOfArg(auxI)
a.AuxInt = aux.SizeOfArg(auxI)
a.SetArgs3(dst, src, mem)
mem = a
} else {
mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
mem.AuxInt = aux.SizeOfArg(auxI)
}
} else {
mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
}
}
v.resetArgs()
return mem
}
// Step 0: rewrite the calls to convert incoming args to stores.
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpStaticLECall:
mem := rewriteArgs(v, 0)
v.SetArgs1(mem)
case OpClosureLECall:
code := v.Args[0]
context := v.Args[1]
mem := rewriteArgs(v, 2)
v.SetArgs3(code, context, mem)
case OpInterLECall:
code := v.Args[0]
mem := rewriteArgs(v, 1)
v.SetArgs2(code, mem)
}
}
}
// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op == OpStore {
t := v.Aux.(*types.Type)
if isAlreadyExpandedAggregateType(t) {
dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
v.copyOf(mem)
}
}
}
}
val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
// Step 2: accumulate selection operations for rewrite in topological order.
// Any select-for-addressing applied to call results can be transformed directly.
// TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
for _, b := range f.Blocks {
for _, v := range b.Values {
// Accumulate chains of selectors for processing in topological order
switch v.Op {
case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
w := v.Args[0]
switch w.Op {
case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
val2Preds[w] += 1
if debug {
fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
}
}
fallthrough
case OpSelectN:
if _, ok := val2Preds[v]; !ok {
val2Preds[v] = 0
if debug {
fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
}
}
case OpSelectNAddr:
// Do these directly, there are no chains of selectors.
call := v.Args[0]
which := v.AuxInt
aux := call.Aux.(*AuxCall)
pt := v.Type
off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
v.copyOf(off)
}
}
}
// Compilation must be deterministic
var ordered []*Value
less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
// Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
for len(val2Preds) > 0 {
ordered = ordered[:0]
for v, n := range val2Preds {
if n == 0 {
ordered = append(ordered, v)
}
}
sort.Slice(ordered, less)
for _, v := range ordered {
for {
w := v.Args[0]
if debug {
fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
}
delete(val2Preds, v)
rewriteSelect(v, v, 0)
v = w
n, ok := val2Preds[v]
if !ok {
break
}
if n != 1 {
val2Preds[v] = n - 1
break
}
// Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
}
}
}
// Step 4: rewrite the calls themselves, correcting the type
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpStaticLECall:
v.Op = OpStaticCall
v.Type = types.TypeMem
case OpClosureLECall:
v.Op = OpClosureCall
v.Type = types.TypeMem
case OpInterLECall:
v.Op = OpInterCall
v.Type = types.TypeMem
}
}
}
// Step 5: elide any copies introduced.
for _, b := range f.Blocks {
for _, v := range b.Values {
for i, a := range v.Args {
if a.Op != OpCopy {
continue
}
aa := copySource(a)
v.SetArg(i, aa)
for a.Uses == 0 {
b := a.Args[0]
a.reset(OpInvalid)
a = b
}
}
}
}
}