src/cmd/compile/internal/ssa/expand_calls.go - go - Git at Google

 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package ssa

 import (
 	"cmd/compile/internal/types"
 	"cmd/internal/src"
 	"fmt"
 	"sort"
 )

 // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
 // that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
 // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
 // reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
 // a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
 // split between registers and memory).
 //
 // TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
 func expandCalls(f *Func) {
 	if !LateCallExpansionEnabledWithin(f) {
 		return
 	}
 	canSSAType := f.fe.CanSSA
 	regSize := f.Config.RegSize
 	sp, _ := f.spSb()

 	debug := f.pass.debug > 0

 	// For 32-bit, need to deal with decomposition of 64-bit integers
 	tUint32 := types.Types[types.TUINT32]
 	tInt32 := types.Types[types.TINT32]
 	var hiOffset, lowOffset int64
 	if f.Config.BigEndian {
 		lowOffset = 4
 	} else {
 		hiOffset = 4
 	}

 	// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
 	// that has no 64-bit integer registers.
 	intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
 		tHi = tUint32
 		if et == types.TINT64 {
 			tHi = tInt32
 		}
 		tLo = tUint32
 		return
 	}

 	// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
 	// that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
 	// Other aggregate types are expanded in decomposeBuiltin, which comes later.
 	isAlreadyExpandedAggregateType := func(t *types.Type) bool {
 		if !canSSAType(t) {
 			return false
 		}
 		return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
 	}

 	// removeTrivialWrapperTypes unwraps layers of
 	// struct { singleField SomeType } and [1]SomeType
 	// until a non-wrapper type is reached.  This is useful
 	// for working with assignments to/from interface data
 	// fields (either second operand to OpIMake or OpIData)
 	// where the wrapping or type conversion can be elided
 	// because of type conversions/assertions in source code
 	// that do not appear in SSA.
 	removeTrivialWrapperTypes := func(t *types.Type) *types.Type {
 		for {
 			if t.IsStruct() && t.NumFields() == 1 {
 				t = t.Field(0).Type
 				continue
 			}
 			if t.IsArray() && t.NumElem() == 1 {
 				t = t.Elem()
 				continue
 			}
 			break
 		}
 		return t
 	}

 	// Calls that need lowering have some number of inputs, including a memory input,
 	// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.

 	// With the current ABI those inputs need to be converted into stores to memory,
 	// rethreading the call's memory input to the first, and the new call now receiving the last.

 	// With the current ABI, the outputs need to be converted to loads, which will all use the call's
 	// memory output as their input.

 	// rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
 	// a chain of Struct/Array Select operations.  If the chain of selectors does not
 	// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
 	// It emits the code necessary to implement the leaf select operation that leads to the call.
 	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
 	var rewriteSelect func(leaf *Value, selector *Value, offset int64)
 	rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
 		switch selector.Op {
 		case OpSelectN:
 			// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
 			call := selector.Args[0]
 			aux := call.Aux.(*AuxCall)
 			which := selector.AuxInt
 			if which == aux.NResults() { // mem is after the results.
 				// rewrite v as a Copy of call -- the replacement call will produce a mem.
 				leaf.copyOf(call)
 			} else {
 				leafType := removeTrivialWrapperTypes(leaf.Type)
 				pt := types.NewPtr(leafType)
 				if canSSAType(leafType) {
 					off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
 					// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
 					if leaf.Block == call.Block {
 						leaf.reset(OpLoad)
 						leaf.SetArgs2(off, call)
 						leaf.Type = leafType
 					} else {
 						w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
 						leaf.copyOf(w)
 					}
 				} else {
 					panic("Should not have non-SSA-able OpSelectN")
 				}
 			}
 		case OpStructSelect:
 			w := selector.Args[0]
 			if w.Type.Etype != types.TSTRUCT {
 				fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
 			}
 			rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))

 		case OpInt64Hi:
 			w := selector.Args[0]
 			rewriteSelect(leaf, w, offset+hiOffset)

 		case OpInt64Lo:
 			w := selector.Args[0]
 			rewriteSelect(leaf, w, offset+lowOffset)

 		case OpArraySelect:
 			w := selector.Args[0]
 			rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
 		default:
 			// Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
 		}
 	}

 	// storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
 	// smaller types into individual parameter slots.
 	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
 	var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
 	storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
 		switch a.Op {
 		case OpArrayMake0, OpStructMake0:
 			return mem
 		case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
 			for i := 0; i < t.NumFields(); i++ {
 				fld := t.Field(i)
 				mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
 			}
 			return mem
 		case OpArrayMake1:
 			return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)

 		case OpInt64Make:
 			tHi, tLo := intPairTypes(t.Etype)
 			mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
 			return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
 		}
 		dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
 		x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
 		if debug {
 			fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
 		}
 		return x
 	}

 	// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
 	// TODO should also optimize offsets from SB?
 	offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
 		pt := types.NewPtr(t)
 		if offset == 0 && dst.Type == pt { // this is not actually likely
 			return dst
 		}
 		if dst.Op != OpOffPtr {
 			return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
 		}
 		// Simplify OpOffPtr
 		from := dst.Args[0]
 		offset += dst.AuxInt
 		if from == sp {
 			return f.ConstOffPtrSP(pt, offset, sp)
 		}
 		return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
 	}

 	// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
 	// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
 	var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
 	splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
 		// TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
 		pos := v.Pos.WithNotStmt()
 		switch t.Etype {
 		case types.TINT64, types.TUINT64:
 			if t.Width == regSize {
 				break
 			}
 			tHi, tLo := intPairTypes(t.Etype)
 			sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
 			mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
 			firstStorePos = firstStorePos.WithNotStmt()
 			sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
 			return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)

 		case types.TARRAY:
 			elt := t.Elem()
 			if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
 				t = removeTrivialWrapperTypes(t)
 				if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
 					f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
 				}
 				break // handle the leaf type.
 			}
 			for i := int64(0); i < t.NumElem(); i++ {
 				sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
 				mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
 				firstStorePos = firstStorePos.WithNotStmt()
 			}
 			return mem
 		case types.TSTRUCT:
 			if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
 				// This peculiar test deals with accesses to immediate interface data.
 				// It works okay because everything is the same size.
 				// Example code that triggers this can be found in go/constant/value.go, function ToComplex
 				// v119 (+881) = IData <intVal> v6
 				// v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
 				// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
 				// Guard against "struct{struct{*foo}}"
 				t = removeTrivialWrapperTypes(t)
 				if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
 					f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
 				}
 				break // handle the leaf type.
 			}
 			for i := 0; i < t.NumFields(); i++ {
 				fld := t.Field(i)
 				sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
 				mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
 				firstStorePos = firstStorePos.WithNotStmt()
 			}
 			return mem
 		}
 		// Default, including for aggregates whose single element exactly fills their container
 		// TODO this will be a problem for cast interfaces containing floats when we move to registers.
 		x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
 		if debug {
 			fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
 		}
 		return x
 	}

 	// rewriteArgs removes all the Args from a call and converts the call args into appropriate
 	// stores (or later, register movement).  Extra args for interface and closure calls are ignored,
 	// but removed.
 	rewriteArgs := func(v *Value, firstArg int) *Value {
 		// Thread the stores on the memory arg
 		aux := v.Aux.(*AuxCall)
 		pos := v.Pos.WithNotStmt()
 		m0 := v.Args[len(v.Args)-1]
 		mem := m0
 		for i, a := range v.Args {
 			if i < firstArg {
 				continue
 			}
 			if a == m0 { // mem is last.
 				break
 			}
 			auxI := int64(i - firstArg)
 			if a.Op == OpDereference {
 				if a.MemoryArg() != m0 {
 					f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
 				}
 				// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
 				// TODO this will be more complicated with registers in the picture.
 				src := a.Args[0]
 				dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
 				if a.Uses == 1 && a.Block == v.Block {
 					a.reset(OpMove)
 					a.Pos = pos
 					a.Type = types.TypeMem
 					a.Aux = aux.TypeOfArg(auxI)
 					a.AuxInt = aux.SizeOfArg(auxI)
 					a.SetArgs3(dst, src, mem)
 					mem = a
 				} else {
 					mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
 					mem.AuxInt = aux.SizeOfArg(auxI)
 				}
 			} else {
 				mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
 			}
 		}
 		v.resetArgs()
 		return mem
 	}

 	// Step 0: rewrite the calls to convert incoming args to stores.
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			switch v.Op {
 			case OpStaticLECall:
 				mem := rewriteArgs(v, 0)
 				v.SetArgs1(mem)
 			case OpClosureLECall:
 				code := v.Args[0]
 				context := v.Args[1]
 				mem := rewriteArgs(v, 2)
 				v.SetArgs3(code, context, mem)
 			case OpInterLECall:
 				code := v.Args[0]
 				mem := rewriteArgs(v, 1)
 				v.SetArgs2(code, mem)
 			}
 		}
 	}

 	// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
 	// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			if v.Op == OpStore {
 				t := v.Aux.(*types.Type)
 				if isAlreadyExpandedAggregateType(t) {
 					dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
 					mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
 					v.copyOf(mem)
 				}
 			}
 		}
 	}

 	val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.

 	// Step 2: accumulate selection operations for rewrite in topological order.
 	// Any select-for-addressing applied to call results can be transformed directly.
 	// TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			// Accumulate chains of selectors for processing in topological order
 			switch v.Op {
 			case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
 				w := v.Args[0]
 				switch w.Op {
 				case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
 					val2Preds[w] += 1
 					if debug {
 						fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
 					}
 				}
 				fallthrough
 			case OpSelectN:
 				if _, ok := val2Preds[v]; !ok {
 					val2Preds[v] = 0
 					if debug {
 						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
 					}
 				}
 			case OpSelectNAddr:
 				// Do these directly, there are no chains of selectors.
 				call := v.Args[0]
 				which := v.AuxInt
 				aux := call.Aux.(*AuxCall)
 				pt := v.Type
 				off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
 				v.copyOf(off)
 			}
 		}
 	}

 	// Compilation must be deterministic
 	var ordered []*Value
 	less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }

 	// Step 3: Rewrite in topological order.  All chains of selectors end up in same block as the call.
 	for len(val2Preds) > 0 {
 		ordered = ordered[:0]
 		for v, n := range val2Preds {
 			if n == 0 {
 				ordered = append(ordered, v)
 			}
 		}
 		sort.Slice(ordered, less)
 		for _, v := range ordered {
 			for {
 				w := v.Args[0]
 				if debug {
 					fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
 				}
 				delete(val2Preds, v)
 				rewriteSelect(v, v, 0)
 				v = w
 				n, ok := val2Preds[v]
 				if !ok {
 					break
 				}
 				if n != 1 {
 					val2Preds[v] = n - 1
 					break
 				}
 				// Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
 			}
 		}
 	}

 	// Step 4: rewrite the calls themselves, correcting the type
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			switch v.Op {
 			case OpStaticLECall:
 				v.Op = OpStaticCall
 				v.Type = types.TypeMem
 			case OpClosureLECall:
 				v.Op = OpClosureCall
 				v.Type = types.TypeMem
 			case OpInterLECall:
 				v.Op = OpInterCall
 				v.Type = types.TypeMem
 			}
 		}
 	}

 	// Step 5: elide any copies introduced.
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
 			for i, a := range v.Args {
 				if a.Op != OpCopy {
 					continue
 				}
 				aa := copySource(a)
 				v.SetArg(i, aa)
 				for a.Uses == 0 {
 					b := a.Args[0]
 					a.reset(OpInvalid)
 					a = b
 				}
 			}
 		}
 	}
 }
	// Copyright 2020 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package ssa

	import (
	"cmd/compile/internal/types"
	"cmd/internal/src"
	"fmt"
	"sort"
	)

	// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
	// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
	// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
	// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
	// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
	// split between registers and memory).
	//
	// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
	func expandCalls(f *Func) {
	if !LateCallExpansionEnabledWithin(f) {
	return
	}
	canSSAType := f.fe.CanSSA
	regSize := f.Config.RegSize
	sp, _ := f.spSb()

	debug := f.pass.debug > 0

	// For 32-bit, need to deal with decomposition of 64-bit integers
	tUint32 := types.Types[types.TUINT32]
	tInt32 := types.Types[types.TINT32]
	var hiOffset, lowOffset int64
	if f.Config.BigEndian {
	lowOffset = 4
	} else {
	hiOffset = 4
	}

	// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
	// that has no 64-bit integer registers.
	intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
	tHi = tUint32
	if et == types.TINT64 {
	tHi = tInt32
	}
	tLo = tUint32
	return
	}

	// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
	// that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
	// Other aggregate types are expanded in decomposeBuiltin, which comes later.
	isAlreadyExpandedAggregateType := func(t *types.Type) bool {
	if !canSSAType(t) {
	return false
	}
	return t.IsStruct() \|\| t.IsArray() \|\| regSize == 4 && t.Size() > 4 && t.IsInteger()
	}

	// removeTrivialWrapperTypes unwraps layers of
	// struct { singleField SomeType } and [1]SomeType
	// until a non-wrapper type is reached. This is useful
	// for working with assignments to/from interface data
	// fields (either second operand to OpIMake or OpIData)
	// where the wrapping or type conversion can be elided
	// because of type conversions/assertions in source code
	// that do not appear in SSA.
	removeTrivialWrapperTypes := func(t types.Type) types.Type {
	for {
	if t.IsStruct() && t.NumFields() == 1 {
	t = t.Field(0).Type
	continue
	}
	if t.IsArray() && t.NumElem() == 1 {
	t = t.Elem()
	continue
	}
	break
	}
	return t
	}

	// Calls that need lowering have some number of inputs, including a memory input,
	// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.

	// With the current ABI those inputs need to be converted into stores to memory,
	// rethreading the call's memory input to the first, and the new call now receiving the last.

	// With the current ABI, the outputs need to be converted to loads, which will all use the call's
	// memory output as their input.

	// rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
	// a chain of Struct/Array Select operations. If the chain of selectors does not
	// end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
	// It emits the code necessary to implement the leaf select operation that leads to the call.
	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
	var rewriteSelect func(leaf Value, selector Value, offset int64)
	rewriteSelect = func(leaf Value, selector Value, offset int64) {
	switch selector.Op {
	case OpSelectN:
	// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
	call := selector.Args[0]
	aux := call.Aux.(*AuxCall)
	which := selector.AuxInt
	if which == aux.NResults() { // mem is after the results.
	// rewrite v as a Copy of call -- the replacement call will produce a mem.
	leaf.copyOf(call)
	} else {
	leafType := removeTrivialWrapperTypes(leaf.Type)
	pt := types.NewPtr(leafType)
	if canSSAType(leafType) {
	off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
	// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
	if leaf.Block == call.Block {
	leaf.reset(OpLoad)
	leaf.SetArgs2(off, call)
	leaf.Type = leafType
	} else {
	w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
	leaf.copyOf(w)
	}
	} else {
	panic("Should not have non-SSA-able OpSelectN")
	}
	}
	case OpStructSelect:
	w := selector.Args[0]
	if w.Type.Etype != types.TSTRUCT {
	fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
	}
	rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))

	case OpInt64Hi:
	w := selector.Args[0]
	rewriteSelect(leaf, w, offset+hiOffset)

	case OpInt64Lo:
	w := selector.Args[0]
	rewriteSelect(leaf, w, offset+lowOffset)

	case OpArraySelect:
	w := selector.Args[0]
	rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
	default:
	// Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
	}
	}

	// storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
	// smaller types into individual parameter slots.
	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
	var storeArg func(pos src.XPos, b Block, a Value, t types.Type, offset int64, mem Value) *Value
	storeArg = func(pos src.XPos, b Block, a Value, t types.Type, offset int64, mem Value) *Value {
	switch a.Op {
	case OpArrayMake0, OpStructMake0:
	return mem
	case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
	for i := 0; i < t.NumFields(); i++ {
	fld := t.Field(i)
	mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
	}
	return mem
	case OpArrayMake1:
	return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)

	case OpInt64Make:
	tHi, tLo := intPairTypes(t.Etype)
	mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
	return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
	}
	dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
	x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
	if debug {
	fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
	}
	return x
	}

	// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
	// TODO should also optimize offsets from SB?
	offsetFrom := func(dst Value, offset int64, t types.Type) *Value {
	pt := types.NewPtr(t)
	if offset == 0 && dst.Type == pt { // this is not actually likely
	return dst
	}
	if dst.Op != OpOffPtr {
	return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
	}
	// Simplify OpOffPtr
	from := dst.Args[0]
	offset += dst.AuxInt
	if from == sp {
	return f.ConstOffPtrSP(pt, offset, sp)
	}
	return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
	}

	// splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
	// appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
	var splitStore func(dst, src, mem, v Value, t types.Type, offset int64, firstStorePos src.XPos) *Value
	splitStore = func(dst, src, mem, v Value, t types.Type, offset int64, firstStorePos src.XPos) *Value {
	// TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
	pos := v.Pos.WithNotStmt()
	switch t.Etype {
	case types.TINT64, types.TUINT64:
	if t.Width == regSize {
	break
	}
	tHi, tLo := intPairTypes(t.Etype)
	sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
	mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
	firstStorePos = firstStorePos.WithNotStmt()
	sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
	return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)

	case types.TARRAY:
	elt := t.Elem()
	if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
	t = removeTrivialWrapperTypes(t)
	if t.Etype == types.TSTRUCT \|\| t.Etype == types.TARRAY {
	f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
	}
	break // handle the leaf type.
	}
	for i := int64(0); i < t.NumElem(); i++ {
	sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
	mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
	firstStorePos = firstStorePos.WithNotStmt()
	}
	return mem
	case types.TSTRUCT:
	if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
	// This peculiar test deals with accesses to immediate interface data.
	// It works okay because everything is the same size.
	// Example code that triggers this can be found in go/constant/value.go, function ToComplex
	// v119 (+881) = IData <intVal> v6
	// v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
	// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
	// Guard against "struct{struct{*foo}}"
	t = removeTrivialWrapperTypes(t)
	if t.Etype == types.TSTRUCT \|\| t.Etype == types.TARRAY {
	f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
	}
	break // handle the leaf type.
	}
	for i := 0; i < t.NumFields(); i++ {
	fld := t.Field(i)
	sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
	mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
	firstStorePos = firstStorePos.WithNotStmt()
	}
	return mem
	}
	// Default, including for aggregates whose single element exactly fills their container
	// TODO this will be a problem for cast interfaces containing floats when we move to registers.
	x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
	if debug {
	fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
	}
	return x
	}

	// rewriteArgs removes all the Args from a call and converts the call args into appropriate
	// stores (or later, register movement). Extra args for interface and closure calls are ignored,
	// but removed.
	rewriteArgs := func(v Value, firstArg int) Value {
	// Thread the stores on the memory arg
	aux := v.Aux.(*AuxCall)
	pos := v.Pos.WithNotStmt()
	m0 := v.Args[len(v.Args)-1]
	mem := m0
	for i, a := range v.Args {
	if i < firstArg {
	continue
	}
	if a == m0 { // mem is last.
	break
	}
	auxI := int64(i - firstArg)
	if a.Op == OpDereference {
	if a.MemoryArg() != m0 {
	f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
	}
	// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
	// TODO this will be more complicated with registers in the picture.
	src := a.Args[0]
	dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
	if a.Uses == 1 && a.Block == v.Block {
	a.reset(OpMove)
	a.Pos = pos
	a.Type = types.TypeMem
	a.Aux = aux.TypeOfArg(auxI)
	a.AuxInt = aux.SizeOfArg(auxI)
	a.SetArgs3(dst, src, mem)
	mem = a
	} else {
	mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
	mem.AuxInt = aux.SizeOfArg(auxI)
	}
	} else {
	mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
	}
	}
	v.resetArgs()
	return mem
	}

	// Step 0: rewrite the calls to convert incoming args to stores.
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	switch v.Op {
	case OpStaticLECall:
	mem := rewriteArgs(v, 0)
	v.SetArgs1(mem)
	case OpClosureLECall:
	code := v.Args[0]
	context := v.Args[1]
	mem := rewriteArgs(v, 2)
	v.SetArgs3(code, context, mem)
	case OpInterLECall:
	code := v.Args[0]
	mem := rewriteArgs(v, 1)
	v.SetArgs2(code, mem)
	}
	}
	}

	// Step 1: any stores of aggregates remaining are believed to be sourced from call results.
	// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	if v.Op == OpStore {
	t := v.Aux.(*types.Type)
	if isAlreadyExpandedAggregateType(t) {
	dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
	mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
	v.copyOf(mem)
	}
	}
	}
	}

	val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.

	// Step 2: accumulate selection operations for rewrite in topological order.
	// Any select-for-addressing applied to call results can be transformed directly.
	// TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	// Accumulate chains of selectors for processing in topological order
	switch v.Op {
	case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
	w := v.Args[0]
	switch w.Op {
	case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
	val2Preds[w] += 1
	if debug {
	fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
	}
	}
	fallthrough
	case OpSelectN:
	if _, ok := val2Preds[v]; !ok {
	val2Preds[v] = 0
	if debug {
	fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
	}
	}
	case OpSelectNAddr:
	// Do these directly, there are no chains of selectors.
	call := v.Args[0]
	which := v.AuxInt
	aux := call.Aux.(*AuxCall)
	pt := v.Type
	off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
	v.copyOf(off)
	}
	}
	}

	// Compilation must be deterministic
	var ordered []*Value
	less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }

	// Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
	for len(val2Preds) > 0 {
	ordered = ordered[:0]
	for v, n := range val2Preds {
	if n == 0 {
	ordered = append(ordered, v)
	}
	}
	sort.Slice(ordered, less)
	for _, v := range ordered {
	for {
	w := v.Args[0]
	if debug {
	fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
	}
	delete(val2Preds, v)
	rewriteSelect(v, v, 0)
	v = w
	n, ok := val2Preds[v]
	if !ok {
	break
	}
	if n != 1 {
	val2Preds[v] = n - 1
	break
	}
	// Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
	}
	}
	}

	// Step 4: rewrite the calls themselves, correcting the type
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	switch v.Op {
	case OpStaticLECall:
	v.Op = OpStaticCall
	v.Type = types.TypeMem
	case OpClosureLECall:
	v.Op = OpClosureCall
	v.Type = types.TypeMem
	case OpInterLECall:
	v.Op = OpInterCall
	v.Type = types.TypeMem
	}
	}
	}

	// Step 5: elide any copies introduced.
	for _, b := range f.Blocks {
	for _, v := range b.Values {
	for i, a := range v.Args {
	if a.Op != OpCopy {
	continue
	}
	aa := copySource(a)
	v.SetArg(i, aa)
	for a.Uses == 0 {
	b := a.Args[0]
	a.reset(OpInvalid)
	a = b
	}
	}
	}
	}
	}