| // Copyright 2020 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package ssa |
| |
| import ( |
| "cmd/compile/internal/types" |
| "cmd/internal/src" |
| "fmt" |
| "sort" |
| ) |
| |
| // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form |
| // that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into |
| // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are |
| // reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in |
| // a subsequent phase, but that may need to change for a register ABI in case one of those composite values is |
| // split between registers and memory). |
| // |
| // TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower. |
| func expandCalls(f *Func) { |
| if !LateCallExpansionEnabledWithin(f) { |
| return |
| } |
| canSSAType := f.fe.CanSSA |
| regSize := f.Config.RegSize |
| sp, _ := f.spSb() |
| |
| debug := f.pass.debug > 0 |
| |
| // For 32-bit, need to deal with decomposition of 64-bit integers |
| tUint32 := types.Types[types.TUINT32] |
| tInt32 := types.Types[types.TINT32] |
| var hiOffset, lowOffset int64 |
| if f.Config.BigEndian { |
| lowOffset = 4 |
| } else { |
| hiOffset = 4 |
| } |
| |
| // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target |
| // that has no 64-bit integer registers. |
| intPairTypes := func(et types.EType) (tHi, tLo *types.Type) { |
| tHi = tUint32 |
| if et == types.TINT64 { |
| tHi = tInt32 |
| } |
| tLo = tUint32 |
| return |
| } |
| |
| // isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type |
| // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser). |
| // Other aggregate types are expanded in decomposeBuiltin, which comes later. |
| isAlreadyExpandedAggregateType := func(t *types.Type) bool { |
| if !canSSAType(t) { |
| return false |
| } |
| return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger() |
| } |
| |
| // removeTrivialWrapperTypes unwraps layers of |
| // struct { singleField SomeType } and [1]SomeType |
| // until a non-wrapper type is reached. This is useful |
| // for working with assignments to/from interface data |
| // fields (either second operand to OpIMake or OpIData) |
| // where the wrapping or type conversion can be elided |
| // because of type conversions/assertions in source code |
| // that do not appear in SSA. |
| removeTrivialWrapperTypes := func(t *types.Type) *types.Type { |
| for { |
| if t.IsStruct() && t.NumFields() == 1 { |
| t = t.Field(0).Type |
| continue |
| } |
| if t.IsArray() && t.NumElem() == 1 { |
| t = t.Elem() |
| continue |
| } |
| break |
| } |
| return t |
| } |
| |
| // Calls that need lowering have some number of inputs, including a memory input, |
| // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. |
| |
| // With the current ABI those inputs need to be converted into stores to memory, |
| // rethreading the call's memory input to the first, and the new call now receiving the last. |
| |
| // With the current ABI, the outputs need to be converted to loads, which will all use the call's |
| // memory output as their input. |
| |
| // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through |
| // a chain of Struct/Array Select operations. If the chain of selectors does not |
| // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering). |
| // It emits the code necessary to implement the leaf select operation that leads to the call. |
| // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. |
| var rewriteSelect func(leaf *Value, selector *Value, offset int64) |
| rewriteSelect = func(leaf *Value, selector *Value, offset int64) { |
| switch selector.Op { |
| case OpSelectN: |
| // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there. |
| call := selector.Args[0] |
| aux := call.Aux.(*AuxCall) |
| which := selector.AuxInt |
| if which == aux.NResults() { // mem is after the results. |
| // rewrite v as a Copy of call -- the replacement call will produce a mem. |
| leaf.copyOf(call) |
| } else { |
| leafType := removeTrivialWrapperTypes(leaf.Type) |
| pt := types.NewPtr(leafType) |
| if canSSAType(leafType) { |
| off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp) |
| // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input. |
| if leaf.Block == call.Block { |
| leaf.reset(OpLoad) |
| leaf.SetArgs2(off, call) |
| leaf.Type = leafType |
| } else { |
| w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call) |
| leaf.copyOf(w) |
| } |
| } else { |
| panic("Should not have non-SSA-able OpSelectN") |
| } |
| } |
| case OpStructSelect: |
| w := selector.Args[0] |
| if w.Type.Etype != types.TSTRUCT { |
| fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name) |
| } |
| rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt))) |
| |
| case OpInt64Hi: |
| w := selector.Args[0] |
| rewriteSelect(leaf, w, offset+hiOffset) |
| |
| case OpInt64Lo: |
| w := selector.Args[0] |
| rewriteSelect(leaf, w, offset+lowOffset) |
| |
| case OpArraySelect: |
| w := selector.Args[0] |
| rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt) |
| default: |
| // Ignore dead ends; on 32-bit, these can occur running before decompose builtins. |
| } |
| } |
| |
| // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of |
| // smaller types into individual parameter slots. |
| // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. |
| var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value |
| storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value { |
| switch a.Op { |
| case OpArrayMake0, OpStructMake0: |
| return mem |
| case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4: |
| for i := 0; i < t.NumFields(); i++ { |
| fld := t.Field(i) |
| mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem) |
| } |
| return mem |
| case OpArrayMake1: |
| return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem) |
| |
| case OpInt64Make: |
| tHi, tLo := intPairTypes(t.Etype) |
| mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem) |
| return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem) |
| } |
| dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp) |
| x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem) |
| if debug { |
| fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString()) |
| } |
| return x |
| } |
| |
| // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP |
| // TODO should also optimize offsets from SB? |
| offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value { |
| pt := types.NewPtr(t) |
| if offset == 0 && dst.Type == pt { // this is not actually likely |
| return dst |
| } |
| if dst.Op != OpOffPtr { |
| return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst) |
| } |
| // Simplify OpOffPtr |
| from := dst.Args[0] |
| offset += dst.AuxInt |
| if from == sp { |
| return f.ConstOffPtrSP(pt, offset, sp) |
| } |
| return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from) |
| } |
| |
| // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting |
| // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts. |
| var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value |
| splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value { |
| // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point. |
| pos := v.Pos.WithNotStmt() |
| switch t.Etype { |
| case types.TINT64, types.TUINT64: |
| if t.Width == regSize { |
| break |
| } |
| tHi, tLo := intPairTypes(t.Etype) |
| sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src) |
| mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos) |
| firstStorePos = firstStorePos.WithNotStmt() |
| sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src) |
| return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos) |
| |
| case types.TARRAY: |
| elt := t.Elem() |
| if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize { |
| t = removeTrivialWrapperTypes(t) |
| if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY { |
| f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it") |
| } |
| break // handle the leaf type. |
| } |
| for i := int64(0); i < t.NumElem(); i++ { |
| sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src) |
| mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos) |
| firstStorePos = firstStorePos.WithNotStmt() |
| } |
| return mem |
| case types.TSTRUCT: |
| if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize { |
| // This peculiar test deals with accesses to immediate interface data. |
| // It works okay because everything is the same size. |
| // Example code that triggers this can be found in go/constant/value.go, function ToComplex |
| // v119 (+881) = IData <intVal> v6 |
| // v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1 |
| // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)" |
| // Guard against "struct{struct{*foo}}" |
| t = removeTrivialWrapperTypes(t) |
| if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY { |
| f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it") |
| } |
| break // handle the leaf type. |
| } |
| for i := 0; i < t.NumFields(); i++ { |
| fld := t.Field(i) |
| sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src) |
| mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos) |
| firstStorePos = firstStorePos.WithNotStmt() |
| } |
| return mem |
| } |
| // Default, including for aggregates whose single element exactly fills their container |
| // TODO this will be a problem for cast interfaces containing floats when we move to registers. |
| x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem) |
| if debug { |
| fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString()) |
| } |
| return x |
| } |
| |
| // rewriteArgs removes all the Args from a call and converts the call args into appropriate |
| // stores (or later, register movement). Extra args for interface and closure calls are ignored, |
| // but removed. |
| rewriteArgs := func(v *Value, firstArg int) *Value { |
| // Thread the stores on the memory arg |
| aux := v.Aux.(*AuxCall) |
| pos := v.Pos.WithNotStmt() |
| m0 := v.Args[len(v.Args)-1] |
| mem := m0 |
| for i, a := range v.Args { |
| if i < firstArg { |
| continue |
| } |
| if a == m0 { // mem is last. |
| break |
| } |
| auxI := int64(i - firstArg) |
| if a.Op == OpDereference { |
| if a.MemoryArg() != m0 { |
| f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString()) |
| } |
| // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move |
| // TODO this will be more complicated with registers in the picture. |
| src := a.Args[0] |
| dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp) |
| if a.Uses == 1 && a.Block == v.Block { |
| a.reset(OpMove) |
| a.Pos = pos |
| a.Type = types.TypeMem |
| a.Aux = aux.TypeOfArg(auxI) |
| a.AuxInt = aux.SizeOfArg(auxI) |
| a.SetArgs3(dst, src, mem) |
| mem = a |
| } else { |
| mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem) |
| mem.AuxInt = aux.SizeOfArg(auxI) |
| } |
| } else { |
| mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem) |
| } |
| } |
| v.resetArgs() |
| return mem |
| } |
| |
| // Step 0: rewrite the calls to convert incoming args to stores. |
| for _, b := range f.Blocks { |
| for _, v := range b.Values { |
| switch v.Op { |
| case OpStaticLECall: |
| mem := rewriteArgs(v, 0) |
| v.SetArgs1(mem) |
| case OpClosureLECall: |
| code := v.Args[0] |
| context := v.Args[1] |
| mem := rewriteArgs(v, 2) |
| v.SetArgs3(code, context, mem) |
| case OpInterLECall: |
| code := v.Args[0] |
| mem := rewriteArgs(v, 1) |
| v.SetArgs2(code, mem) |
| } |
| } |
| } |
| |
| // Step 1: any stores of aggregates remaining are believed to be sourced from call results. |
| // Decompose those stores into a series of smaller stores, adding selection ops as necessary. |
| for _, b := range f.Blocks { |
| for _, v := range b.Values { |
| if v.Op == OpStore { |
| t := v.Aux.(*types.Type) |
| if isAlreadyExpandedAggregateType(t) { |
| dst, src, mem := v.Args[0], v.Args[1], v.Args[2] |
| mem = splitStore(dst, src, mem, v, t, 0, v.Pos) |
| v.copyOf(mem) |
| } |
| } |
| } |
| } |
| |
| val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering. |
| |
| // Step 2: accumulate selection operations for rewrite in topological order. |
| // Any select-for-addressing applied to call results can be transformed directly. |
| // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves. |
| for _, b := range f.Blocks { |
| for _, v := range b.Values { |
| // Accumulate chains of selectors for processing in topological order |
| switch v.Op { |
| case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo: |
| w := v.Args[0] |
| switch w.Op { |
| case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN: |
| val2Preds[w] += 1 |
| if debug { |
| fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) |
| } |
| } |
| fallthrough |
| case OpSelectN: |
| if _, ok := val2Preds[v]; !ok { |
| val2Preds[v] = 0 |
| if debug { |
| fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) |
| } |
| } |
| case OpSelectNAddr: |
| // Do these directly, there are no chains of selectors. |
| call := v.Args[0] |
| which := v.AuxInt |
| aux := call.Aux.(*AuxCall) |
| pt := v.Type |
| off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp) |
| v.copyOf(off) |
| } |
| } |
| } |
| |
| // Compilation must be deterministic |
| var ordered []*Value |
| less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID } |
| |
| // Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call. |
| for len(val2Preds) > 0 { |
| ordered = ordered[:0] |
| for v, n := range val2Preds { |
| if n == 0 { |
| ordered = append(ordered, v) |
| } |
| } |
| sort.Slice(ordered, less) |
| for _, v := range ordered { |
| for { |
| w := v.Args[0] |
| if debug { |
| fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString()) |
| } |
| delete(val2Preds, v) |
| rewriteSelect(v, v, 0) |
| v = w |
| n, ok := val2Preds[v] |
| if !ok { |
| break |
| } |
| if n != 1 { |
| val2Preds[v] = n - 1 |
| break |
| } |
| // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero. |
| } |
| } |
| } |
| |
| // Step 4: rewrite the calls themselves, correcting the type |
| for _, b := range f.Blocks { |
| for _, v := range b.Values { |
| switch v.Op { |
| case OpStaticLECall: |
| v.Op = OpStaticCall |
| v.Type = types.TypeMem |
| case OpClosureLECall: |
| v.Op = OpClosureCall |
| v.Type = types.TypeMem |
| case OpInterLECall: |
| v.Op = OpInterCall |
| v.Type = types.TypeMem |
| } |
| } |
| } |
| |
| // Step 5: elide any copies introduced. |
| for _, b := range f.Blocks { |
| for _, v := range b.Values { |
| for i, a := range v.Args { |
| if a.Op != OpCopy { |
| continue |
| } |
| aa := copySource(a) |
| v.SetArg(i, aa) |
| for a.Uses == 0 { |
| b := a.Args[0] |
| a.reset(OpInvalid) |
| a = b |
| } |
| } |
| } |
| } |
| } |