src/cmd/compile/internal/ssa/rewrite.go - go - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package ssa

 import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"fmt"
 	"io"
 	"math"
 	"os"
 	"path/filepath"
 )

 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
 	// repeat rewrites until we find no more rewrites
 	for {
 		change := false
 		for _, b := range f.Blocks {
 			if b.Control != nil && b.Control.Op == OpCopy {
 				for b.Control.Op == OpCopy {
 					b.SetControl(b.Control.Args[0])
 				}
 			}
 			if rb(b) {
 				change = true
 			}
 			for _, v := range b.Values {
 				change = phielimValue(v) || change

 				// Eliminate copy inputs.
 				// If any copy input becomes unused, mark it
 				// as invalid and discard its argument. Repeat
 				// recursively on the discarded argument.
 				// This phase helps remove phantom "dead copy" uses
 				// of a value so that a x.Uses==1 rule condition
 				// fires reliably.
 				for i, a := range v.Args {
 					if a.Op != OpCopy {
 						continue
 					}
 					v.SetArg(i, copySource(a))
 					change = true
 					for a.Uses == 0 {
 						b := a.Args[0]
 						a.reset(OpInvalid)
 						a = b
 					}
 				}

 				// apply rewrite function
 				if rv(v) {
 					change = true
 				}
 			}
 		}
 		if !change {
 			break
 		}
 	}
 	// remove clobbered values
 	for _, b := range f.Blocks {
 		j := 0
 		for i, v := range b.Values {
 			if v.Op == OpInvalid {
 				f.freeValue(v)
 				continue
 			}
 			if i != j {
 				b.Values[j] = v
 			}
 			j++
 		}
 		if j != len(b.Values) {
 			tail := b.Values[j:]
 			for j := range tail {
 				tail[j] = nil
 			}
 			b.Values = b.Values[:j]
 		}
 	}
 }

 // Common functions called from rewriting rules

 func is64BitFloat(t *types.Type) bool {
 	return t.Size() == 8 && t.IsFloat()
 }

 func is32BitFloat(t *types.Type) bool {
 	return t.Size() == 4 && t.IsFloat()
 }

 func is64BitInt(t *types.Type) bool {
 	return t.Size() == 8 && t.IsInteger()
 }

 func is32BitInt(t *types.Type) bool {
 	return t.Size() == 4 && t.IsInteger()
 }

 func is16BitInt(t *types.Type) bool {
 	return t.Size() == 2 && t.IsInteger()
 }

 func is8BitInt(t *types.Type) bool {
 	return t.Size() == 1 && t.IsInteger()
 }

 func isPtr(t *types.Type) bool {
 	return t.IsPtrShaped()
 }

 func isSigned(t *types.Type) bool {
 	return t.IsSigned()
 }

 // mergeSym merges two symbolic offsets. There is no real merging of
 // offsets, we just pick the non-nil one.
 func mergeSym(x, y interface{}) interface{} {
 	if x == nil {
 		return y
 	}
 	if y == nil {
 		return x
 	}
 	panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
 }
 func canMergeSym(x, y interface{}) bool {
 	return x == nil || y == nil
 }

 // canMergeLoad reports whether the load can be merged into target without
 // invalidating the schedule.
 // It also checks that the other non-load argument x is something we
 // are ok with clobbering (all our current load+op instructions clobber
 // their input register).
 func canMergeLoad(target, load, x *Value) bool {
 	if target.Block.ID != load.Block.ID {
 		// If the load is in a different block do not merge it.
 		return false
 	}

 	// We can't merge the load into the target if the load
 	// has more than one use.
 	if load.Uses != 1 {
 		return false
 	}

 	// The register containing x is going to get clobbered.
 	// Don't merge if we still need the value of x.
 	// We don't have liveness information here, but we can
 	// approximate x dying with:
 	//  1) target is x's only use.
 	//  2) target is not in a deeper loop than x.
 	if x.Uses != 1 {
 		return false
 	}
 	loopnest := x.Block.Func.loopnest()
 	loopnest.calculateDepths()
 	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 		return false
 	}

 	mem := load.MemoryArg()

 	// We need the load's memory arg to still be alive at target. That
 	// can't be the case if one of target's args depends on a memory
 	// state that is a successor of load's memory arg.
 	//
 	// For example, it would be invalid to merge load into target in
 	// the following situation because newmem has killed oldmem
 	// before target is reached:
 	//     load = read ... oldmem
 	//   newmem = write ... oldmem
 	//     arg0 = read ... newmem
 	//   target = add arg0 load
 	//
 	// If the argument comes from a different block then we can exclude
 	// it immediately because it must dominate load (which is in the
 	// same block as target).
 	var args []*Value
 	for _, a := range target.Args {
 		if a != load && a.Block.ID == target.Block.ID {
 			args = append(args, a)
 		}
 	}

 	// memPreds contains memory states known to be predecessors of load's
 	// memory state. It is lazily initialized.
 	var memPreds map[*Value]bool
 search:
 	for i := 0; len(args) > 0; i++ {
 		const limit = 100
 		if i >= limit {
 			// Give up if we have done a lot of iterations.
 			return false
 		}
 		v := args[len(args)-1]
 		args = args[:len(args)-1]
 		if target.Block.ID != v.Block.ID {
 			// Since target and load are in the same block
 			// we can stop searching when we leave the block.
 			continue search
 		}
 		if v.Op == OpPhi {
 			// A Phi implies we have reached the top of the block.
 			// The memory phi, if it exists, is always
 			// the first logical store in the block.
 			continue search
 		}
 		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
 			// We could handle this situation however it is likely
 			// to be very rare.
 			return false
 		}
 		if v.Type.IsMemory() {
 			if memPreds == nil {
 				// Initialise a map containing memory states
 				// known to be predecessors of load's memory
 				// state.
 				memPreds = make(map[*Value]bool)
 				m := mem
 				const limit = 50
 				for i := 0; i < limit; i++ {
 					if m.Op == OpPhi {
 						// The memory phi, if it exists, is always
 						// the first logical store in the block.
 						break
 					}
 					if m.Block.ID != target.Block.ID {
 						break
 					}
 					if !m.Type.IsMemory() {
 						break
 					}
 					memPreds[m] = true
 					if len(m.Args) == 0 {
 						break
 					}
 					m = m.MemoryArg()
 				}
 			}

 			// We can merge if v is a predecessor of mem.
 			//
 			// For example, we can merge load into target in the
 			// following scenario:
 			//      x = read ... v
 			//    mem = write ... v
 			//   load = read ... mem
 			// target = add x load
 			if memPreds[v] {
 				continue search
 			}
 			return false
 		}
 		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
 			// If v takes mem as an input then we know mem
 			// is valid at this point.
 			continue search
 		}
 		for _, a := range v.Args {
 			if target.Block.ID == a.Block.ID {
 				args = append(args, a)
 			}
 		}
 	}

 	return true
 }

 // isSameSym returns whether sym is the same as the given named symbol
 func isSameSym(sym interface{}, name string) bool {
 	s, ok := sym.(fmt.Stringer)
 	return ok && s.String() == name
 }

 // nlz returns the number of leading zeros.
 func nlz(x int64) int64 {
 	// log2(0) == 1, so nlz(0) == 64
 	return 63 - log2(x)
 }

 // ntz returns the number of trailing zeros.
 func ntz(x int64) int64 {
 	return 64 - nlz(^x&(x-1))
 }

 func oneBit(x int64) bool {
 	return nlz(x)+ntz(x) == 63
 }

 // nlo returns the number of leading ones.
 func nlo(x int64) int64 {
 	return nlz(^x)
 }

 // nto returns the number of trailing ones.
 func nto(x int64) int64 {
 	return ntz(^x)
 }

 // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
 // Rounds down.
 func log2(n int64) (l int64) {
 	l = -1
 	x := uint64(n)
 	for ; x >= 0x8000; x >>= 16 {
 		l += 16
 	}
 	if x >= 0x80 {
 		x >>= 8
 		l += 8
 	}
 	if x >= 0x8 {
 		x >>= 4
 		l += 4
 	}
 	if x >= 0x2 {
 		x >>= 2
 		l += 2
 	}
 	if x >= 0x1 {
 		l++
 	}
 	return
 }

 // isPowerOfTwo reports whether n is a power of 2.
 func isPowerOfTwo(n int64) bool {
 	return n > 0 && n&(n-1) == 0
 }

 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 func is32Bit(n int64) bool {
 	return n == int64(int32(n))
 }

 // is16Bit reports whether n can be represented as a signed 16 bit integer.
 func is16Bit(n int64) bool {
 	return n == int64(int16(n))
 }

 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
 func isU12Bit(n int64) bool {
 	return 0 <= n && n < (1<<12)
 }

 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
 func isU16Bit(n int64) bool {
 	return n == int64(uint16(n))
 }

 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
 func isU32Bit(n int64) bool {
 	return n == int64(uint32(n))
 }

 // is20Bit reports whether n can be represented as a signed 20 bit integer.
 func is20Bit(n int64) bool {
 	return -(1<<19) <= n && n < (1<<19)
 }

 // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
 func b2i(b bool) int64 {
 	if b {
 		return 1
 	}
 	return 0
 }

 // i2f is used in rules for converting from an AuxInt to a float.
 func i2f(i int64) float64 {
 	return math.Float64frombits(uint64(i))
 }

 // i2f32 is used in rules for converting from an AuxInt to a float32.
 func i2f32(i int64) float32 {
 	return float32(math.Float64frombits(uint64(i)))
 }

 // f2i is used in the rules for storing a float in AuxInt.
 func f2i(f float64) int64 {
 	return int64(math.Float64bits(f))
 }

 // uaddOvf returns true if unsigned a+b would overflow.
 func uaddOvf(a, b int64) bool {
 	return uint64(a)+uint64(b) < uint64(a)
 }

 // de-virtualize an InterCall
 // 'sym' is the symbol for the itab
 func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
 	f := v.Block.Func
 	n, ok := sym.(*obj.LSym)
 	if !ok {
 		return nil
 	}
 	lsym := f.fe.DerefItab(n, offset)
 	if f.pass.debug > 0 {
 		if lsym != nil {
 			f.Warnl(v.Pos, "de-virtualizing call")
 		} else {
 			f.Warnl(v.Pos, "couldn't de-virtualize call")
 		}
 	}
 	return lsym
 }

 // isSamePtr reports whether p1 and p2 point to the same address.
 func isSamePtr(p1, p2 *Value) bool {
 	if p1 == p2 {
 		return true
 	}
 	if p1.Op != p2.Op {
 		return false
 	}
 	switch p1.Op {
 	case OpOffPtr:
 		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 	case OpAddr:
 		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
 		// Checking for value equality only works after [z]cse has run.
 		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
 	case OpAddPtr:
 		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 	}
 	return false
 }

 // moveSize returns the number of bytes an aligned MOV instruction moves
 func moveSize(align int64, c *Config) int64 {
 	switch {
 	case align%8 == 0 && c.PtrSize == 8:
 		return 8
 	case align%4 == 0:
 		return 4
 	case align%2 == 0:
 		return 2
 	}
 	return 1
 }

 // mergePoint finds a block among a's blocks which dominates b and is itself
 // dominated by all of a's blocks. Returns nil if it can't find one.
 // Might return nil even if one does exist.
 func mergePoint(b *Block, a ...*Value) *Block {
 	// Walk backward from b looking for one of the a's blocks.

 	// Max distance
 	d := 100

 	for d > 0 {
 		for _, x := range a {
 			if b == x.Block {
 				goto found
 			}
 		}
 		if len(b.Preds) > 1 {
 			// Don't know which way to go back. Abort.
 			return nil
 		}
 		b = b.Preds[0].b
 		d--
 	}
 	return nil // too far away
 found:
 	// At this point, r is the first value in a that we find by walking backwards.
 	// if we return anything, r will be it.
 	r := b

 	// Keep going, counting the other a's that we find. They must all dominate r.
 	na := 0
 	for d > 0 {
 		for _, x := range a {
 			if b == x.Block {
 				na++
 			}
 		}
 		if na == len(a) {
 			// Found all of a in a backwards walk. We can return r.
 			return r
 		}
 		if len(b.Preds) > 1 {
 			return nil
 		}
 		b = b.Preds[0].b
 		d--

 	}
 	return nil // too far away
 }

 // clobber invalidates v.  Returns true.
 // clobber is used by rewrite rules to:
 //   A) make sure v is really dead and never used again.
 //   B) decrement use counts of v's args.
 func clobber(v *Value) bool {
 	v.reset(OpInvalid)
 	// Note: leave v.Block intact.  The Block field is used after clobber.
 	return true
 }

 // clobberIfDead resets v when use count is 1. Returns true.
 // clobberIfDead is used by rewrite rules to decrement
 // use counts of v's args when v is dead and never used.
 func clobberIfDead(v *Value) bool {
 	if v.Uses == 1 {
 		v.reset(OpInvalid)
 	}
 	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
 	return true
 }

 // noteRule is an easy way to track if a rule is matched when writing
 // new ones.  Make the rule of interest also conditional on
 //     noteRule("note to self: rule of interest matched")
 // and that message will print when the rule matches.
 func noteRule(s string) bool {
 	fmt.Println(s)
 	return true
 }

 // warnRule generates a compiler debug output with string s when
 // cond is true and the rule is fired.
 func warnRule(cond bool, v *Value, s string) bool {
 	if cond {
 		v.Block.Func.Warnl(v.Pos, s)
 	}
 	return true
 }

 // for a pseudo-op like (LessThan x), extract x
 func flagArg(v *Value) *Value {
 	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
 		return nil
 	}
 	return v.Args[0]
 }

 // arm64Negate finds the complement to an ARM64 condition code,
 // for example Equal -> NotEqual or LessThan -> GreaterEqual
 //
 // TODO: add floating-point conditions
 func arm64Negate(op Op) Op {
 	switch op {
 	case OpARM64LessThan:
 		return OpARM64GreaterEqual
 	case OpARM64LessThanU:
 		return OpARM64GreaterEqualU
 	case OpARM64GreaterThan:
 		return OpARM64LessEqual
 	case OpARM64GreaterThanU:
 		return OpARM64LessEqualU
 	case OpARM64LessEqual:
 		return OpARM64GreaterThan
 	case OpARM64LessEqualU:
 		return OpARM64GreaterThanU
 	case OpARM64GreaterEqual:
 		return OpARM64LessThan
 	case OpARM64GreaterEqualU:
 		return OpARM64LessThanU
 	case OpARM64Equal:
 		return OpARM64NotEqual
 	case OpARM64NotEqual:
 		return OpARM64Equal
 	default:
 		panic("unreachable")
 	}
 }

 // arm64Invert evaluates (InvertFlags op), which
 // is the same as altering the condition codes such
 // that the same result would be produced if the arguments
 // to the flag-generating instruction were reversed, e.g.
 // (InvertFlags (CMP x y)) -> (CMP y x)
 //
 // TODO: add floating-point conditions
 func arm64Invert(op Op) Op {
 	switch op {
 	case OpARM64LessThan:
 		return OpARM64GreaterThan
 	case OpARM64LessThanU:
 		return OpARM64GreaterThanU
 	case OpARM64GreaterThan:
 		return OpARM64LessThan
 	case OpARM64GreaterThanU:
 		return OpARM64LessThanU
 	case OpARM64LessEqual:
 		return OpARM64GreaterEqual
 	case OpARM64LessEqualU:
 		return OpARM64GreaterEqualU
 	case OpARM64GreaterEqual:
 		return OpARM64LessEqual
 	case OpARM64GreaterEqualU:
 		return OpARM64LessEqualU
 	case OpARM64Equal, OpARM64NotEqual:
 		return op
 	default:
 		panic("unreachable")
 	}
 }

 // evaluate an ARM64 op against a flags value
 // that is potentially constant; return 1 for true,
 // -1 for false, and 0 for not constant.
 func ccARM64Eval(cc interface{}, flags *Value) int {
 	op := cc.(Op)
 	fop := flags.Op
 	switch fop {
 	case OpARM64InvertFlags:
 		return -ccARM64Eval(op, flags.Args[0])
 	case OpARM64FlagEQ:
 		switch op {
 		case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
 			OpARM64GreaterEqualU, OpARM64LessEqualU:
 			return 1
 		default:
 			return -1
 		}
 	case OpARM64FlagLT_ULT:
 		switch op {
 		case OpARM64LessThan, OpARM64LessThanU,
 			OpARM64LessEqual, OpARM64LessEqualU:
 			return 1
 		default:
 			return -1
 		}
 	case OpARM64FlagLT_UGT:
 		switch op {
 		case OpARM64LessThan, OpARM64GreaterThanU,
 			OpARM64LessEqual, OpARM64GreaterEqualU:
 			return 1
 		default:
 			return -1
 		}
 	case OpARM64FlagGT_ULT:
 		switch op {
 		case OpARM64GreaterThan, OpARM64LessThanU,
 			OpARM64GreaterEqual, OpARM64LessEqualU:
 			return 1
 		default:
 			return -1
 		}
 	case OpARM64FlagGT_UGT:
 		switch op {
 		case OpARM64GreaterThan, OpARM64GreaterThanU,
 			OpARM64GreaterEqual, OpARM64GreaterEqualU:
 			return 1
 		default:
 			return -1
 		}
 	default:
 		return 0
 	}
 }

 // logRule logs the use of the rule s. This will only be enabled if
 // rewrite rules were generated with the -log option, see gen/rulegen.go.
 func logRule(s string) {
 	if ruleFile == nil {
 		// Open a log file to write log to. We open in append
 		// mode because all.bash runs the compiler lots of times,
 		// and we want the concatenation of all of those logs.
 		// This means, of course, that users need to rm the old log
 		// to get fresh data.
 		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
 		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
 			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
 		if err != nil {
 			panic(err)
 		}
 		ruleFile = w
 	}
 	_, err := fmt.Fprintf(ruleFile, "rewrite %s\n", s)
 	if err != nil {
 		panic(err)
 	}
 }

 var ruleFile io.Writer

 func min(x, y int64) int64 {
 	if x < y {
 		return x
 	}
 	return y
 }

 func isConstZero(v *Value) bool {
 	switch v.Op {
 	case OpConstNil:
 		return true
 	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
 		return v.AuxInt == 0
 	}
 	return false
 }

 // reciprocalExact64 reports whether 1/c is exactly representable.
 func reciprocalExact64(c float64) bool {
 	b := math.Float64bits(c)
 	man := b & (1<<52 - 1)
 	if man != 0 {
 		return false // not a power of 2, denormal, or NaN
 	}
 	exp := b >> 52 & (1<<11 - 1)
 	// exponent bias is 0x3ff.  So taking the reciprocal of a number
 	// changes the exponent to 0x7fe-exp.
 	switch exp {
 	case 0:
 		return false // ±0
 	case 0x7ff:
 		return false // ±inf
 	case 0x7fe:
 		return false // exponent is not representable
 	default:
 		return true
 	}
 }

 // reciprocalExact32 reports whether 1/c is exactly representable.
 func reciprocalExact32(c float32) bool {
 	b := math.Float32bits(c)
 	man := b & (1<<23 - 1)
 	if man != 0 {
 		return false // not a power of 2, denormal, or NaN
 	}
 	exp := b >> 23 & (1<<8 - 1)
 	// exponent bias is 0x7f.  So taking the reciprocal of a number
 	// changes the exponent to 0xfe-exp.
 	switch exp {
 	case 0:
 		return false // ±0
 	case 0xff:
 		return false // ±inf
 	case 0xfe:
 		return false // exponent is not representable
 	default:
 		return true
 	}
 }

 // check if an immediate can be directly encoded into an ARM's instruction
 func isARMImmRot(v uint32) bool {
 	for i := 0; i < 16; i++ {
 		if v&^0xff == 0 {
 			return true
 		}
 		v = v<<2 | v>>30
 	}

 	return false
 }

 // overlap reports whether the ranges given by the given offset and
 // size pairs overlap.
 func overlap(offset1, size1, offset2, size2 int64) bool {
 	if offset1 >= offset2 && offset2+size2 > offset1 {
 		return true
 	}
 	if offset2 >= offset1 && offset1+size1 > offset2 {
 		return true
 	}
 	return false
 }

 // check if value zeroes out upper 32-bit of 64-bit register.
 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
 // because it catches same amount of cases as 4.
 func zeroUpper32Bits(x *Value, depth int) bool {
 	switch x.Op {
 	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
 		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
 		OpAMD64MOVLloadidx4, OpAMD64ADDLmem, OpAMD64SUBLmem, OpAMD64ANDLmem,
 		OpAMD64ORLmem, OpAMD64XORLmem, OpAMD64CVTTSD2SL,
 		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
 		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
 		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
 		return true
 	case OpArg:
 		return x.Type.Width == 4
 	case OpSelect0, OpSelect1:
 		// Disabled for now. See issue 23305.
 		// TODO: we could look into the arg of the Select to decide.
 		return false
 	case OpPhi:
 		// Phis can use each-other as an arguments, instead of tracking visited values,
 		// just limit recursion depth.
 		if depth <= 0 {
 			return false
 		}
 		for i := range x.Args {
 			if !zeroUpper32Bits(x.Args[i], depth-1) {
 				return false
 			}
 		}
 		return true

 	}
 	return false
 }

 // inlineablememmovesize reports whether the given arch performs OpMove of the given size
 // faster than memmove and in a safe way when src and dst overlap.
 // This is used as a check for replacing memmove with OpMove.
 func isInlinableMemmoveSize(sz int64, c *Config) bool {
 	switch c.arch {
 	case "amd64", "amd64p32":
 		return sz <= 16
 	case "386", "ppc64", "s390x", "ppc64le":
 		return sz <= 8
 	case "arm", "mips", "mips64", "mipsle", "mips64le":
 		return sz <= 4
 	}
 	return false
 }
	// Copyright 2015 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package ssa

	import (
	"cmd/compile/internal/types"
	"cmd/internal/obj"
	"fmt"
	"io"
	"math"
	"os"
	"path/filepath"
	)

	func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
	// repeat rewrites until we find no more rewrites
	for {
	change := false
	for _, b := range f.Blocks {
	if b.Control != nil && b.Control.Op == OpCopy {
	for b.Control.Op == OpCopy {
	b.SetControl(b.Control.Args[0])
	}
	}
	if rb(b) {
	change = true
	}
	for _, v := range b.Values {
	change = phielimValue(v) \|\| change

	// Eliminate copy inputs.
	// If any copy input becomes unused, mark it
	// as invalid and discard its argument. Repeat
	// recursively on the discarded argument.
	// This phase helps remove phantom "dead copy" uses
	// of a value so that a x.Uses==1 rule condition
	// fires reliably.
	for i, a := range v.Args {
	if a.Op != OpCopy {
	continue
	}
	v.SetArg(i, copySource(a))
	change = true
	for a.Uses == 0 {
	b := a.Args[0]
	a.reset(OpInvalid)
	a = b
	}
	}

	// apply rewrite function
	if rv(v) {
	change = true
	}
	}
	}
	if !change {
	break
	}
	}
	// remove clobbered values
	for _, b := range f.Blocks {
	j := 0
	for i, v := range b.Values {
	if v.Op == OpInvalid {
	f.freeValue(v)
	continue
	}
	if i != j {
	b.Values[j] = v
	}
	j++
	}
	if j != len(b.Values) {
	tail := b.Values[j:]
	for j := range tail {
	tail[j] = nil
	}
	b.Values = b.Values[:j]
	}
	}
	}

	// Common functions called from rewriting rules

	func is64BitFloat(t *types.Type) bool {
	return t.Size() == 8 && t.IsFloat()
	}

	func is32BitFloat(t *types.Type) bool {
	return t.Size() == 4 && t.IsFloat()
	}

	func is64BitInt(t *types.Type) bool {
	return t.Size() == 8 && t.IsInteger()
	}

	func is32BitInt(t *types.Type) bool {
	return t.Size() == 4 && t.IsInteger()
	}

	func is16BitInt(t *types.Type) bool {
	return t.Size() == 2 && t.IsInteger()
	}

	func is8BitInt(t *types.Type) bool {
	return t.Size() == 1 && t.IsInteger()
	}

	func isPtr(t *types.Type) bool {
	return t.IsPtrShaped()
	}

	func isSigned(t *types.Type) bool {
	return t.IsSigned()
	}

	// mergeSym merges two symbolic offsets. There is no real merging of
	// offsets, we just pick the non-nil one.
	func mergeSym(x, y interface{}) interface{} {
	if x == nil {
	return y
	}
	if y == nil {
	return x
	}
	panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
	}
	func canMergeSym(x, y interface{}) bool {
	return x == nil \|\| y == nil
	}

	// canMergeLoad reports whether the load can be merged into target without
	// invalidating the schedule.
	// It also checks that the other non-load argument x is something we
	// are ok with clobbering (all our current load+op instructions clobber
	// their input register).
	func canMergeLoad(target, load, x *Value) bool {
	if target.Block.ID != load.Block.ID {
	// If the load is in a different block do not merge it.
	return false
	}

	// We can't merge the load into the target if the load
	// has more than one use.
	if load.Uses != 1 {
	return false
	}

	// The register containing x is going to get clobbered.
	// Don't merge if we still need the value of x.
	// We don't have liveness information here, but we can
	// approximate x dying with:
	// 1) target is x's only use.
	// 2) target is not in a deeper loop than x.
	if x.Uses != 1 {
	return false
	}
	loopnest := x.Block.Func.loopnest()
	loopnest.calculateDepths()
	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
	return false
	}

	mem := load.MemoryArg()

	// We need the load's memory arg to still be alive at target. That
	// can't be the case if one of target's args depends on a memory
	// state that is a successor of load's memory arg.
	//
	// For example, it would be invalid to merge load into target in
	// the following situation because newmem has killed oldmem
	// before target is reached:
	// load = read ... oldmem
	// newmem = write ... oldmem
	// arg0 = read ... newmem
	// target = add arg0 load
	//
	// If the argument comes from a different block then we can exclude
	// it immediately because it must dominate load (which is in the
	// same block as target).
	var args []*Value
	for _, a := range target.Args {
	if a != load && a.Block.ID == target.Block.ID {
	args = append(args, a)
	}
	}

	// memPreds contains memory states known to be predecessors of load's
	// memory state. It is lazily initialized.
	var memPreds map[*Value]bool
	search:
	for i := 0; len(args) > 0; i++ {
	const limit = 100
	if i >= limit {
	// Give up if we have done a lot of iterations.
	return false
	}
	v := args[len(args)-1]
	args = args[:len(args)-1]
	if target.Block.ID != v.Block.ID {
	// Since target and load are in the same block
	// we can stop searching when we leave the block.
	continue search
	}
	if v.Op == OpPhi {
	// A Phi implies we have reached the top of the block.
	// The memory phi, if it exists, is always
	// the first logical store in the block.
	continue search
	}
	if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
	// We could handle this situation however it is likely
	// to be very rare.
	return false
	}
	if v.Type.IsMemory() {
	if memPreds == nil {
	// Initialise a map containing memory states
	// known to be predecessors of load's memory
	// state.
	memPreds = make(map[*Value]bool)
	m := mem
	const limit = 50
	for i := 0; i < limit; i++ {
	if m.Op == OpPhi {
	// The memory phi, if it exists, is always
	// the first logical store in the block.
	break
	}
	if m.Block.ID != target.Block.ID {
	break
	}
	if !m.Type.IsMemory() {
	break
	}
	memPreds[m] = true
	if len(m.Args) == 0 {
	break
	}
	m = m.MemoryArg()
	}
	}

	// We can merge if v is a predecessor of mem.
	//
	// For example, we can merge load into target in the
	// following scenario:
	// x = read ... v
	// mem = write ... v
	// load = read ... mem
	// target = add x load
	if memPreds[v] {
	continue search
	}
	return false
	}
	if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
	// If v takes mem as an input then we know mem
	// is valid at this point.
	continue search
	}
	for _, a := range v.Args {
	if target.Block.ID == a.Block.ID {
	args = append(args, a)
	}
	}
	}

	return true
	}

	// isSameSym returns whether sym is the same as the given named symbol
	func isSameSym(sym interface{}, name string) bool {
	s, ok := sym.(fmt.Stringer)
	return ok && s.String() == name
	}

	// nlz returns the number of leading zeros.
	func nlz(x int64) int64 {
	// log2(0) == 1, so nlz(0) == 64
	return 63 - log2(x)
	}

	// ntz returns the number of trailing zeros.
	func ntz(x int64) int64 {
	return 64 - nlz(^x&(x-1))
	}

	func oneBit(x int64) bool {
	return nlz(x)+ntz(x) == 63
	}

	// nlo returns the number of leading ones.
	func nlo(x int64) int64 {
	return nlz(^x)
	}

	// nto returns the number of trailing ones.
	func nto(x int64) int64 {
	return ntz(^x)
	}

	// log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
	// Rounds down.
	func log2(n int64) (l int64) {
	l = -1
	x := uint64(n)
	for ; x >= 0x8000; x >>= 16 {
	l += 16
	}
	if x >= 0x80 {
	x >>= 8
	l += 8
	}
	if x >= 0x8 {
	x >>= 4
	l += 4
	}
	if x >= 0x2 {
	x >>= 2
	l += 2
	}
	if x >= 0x1 {
	l++
	}
	return
	}

	// isPowerOfTwo reports whether n is a power of 2.
	func isPowerOfTwo(n int64) bool {
	return n > 0 && n&(n-1) == 0
	}

	// is32Bit reports whether n can be represented as a signed 32 bit integer.
	func is32Bit(n int64) bool {
	return n == int64(int32(n))
	}

	// is16Bit reports whether n can be represented as a signed 16 bit integer.
	func is16Bit(n int64) bool {
	return n == int64(int16(n))
	}

	// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
	func isU12Bit(n int64) bool {
	return 0 <= n && n < (1<<12)
	}

	// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
	func isU16Bit(n int64) bool {
	return n == int64(uint16(n))
	}

	// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
	func isU32Bit(n int64) bool {
	return n == int64(uint32(n))
	}

	// is20Bit reports whether n can be represented as a signed 20 bit integer.
	func is20Bit(n int64) bool {
	return -(1<<19) <= n && n < (1<<19)
	}

	// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
	func b2i(b bool) int64 {
	if b {
	return 1
	}
	return 0
	}

	// i2f is used in rules for converting from an AuxInt to a float.
	func i2f(i int64) float64 {
	return math.Float64frombits(uint64(i))
	}

	// i2f32 is used in rules for converting from an AuxInt to a float32.
	func i2f32(i int64) float32 {
	return float32(math.Float64frombits(uint64(i)))
	}

	// f2i is used in the rules for storing a float in AuxInt.
	func f2i(f float64) int64 {
	return int64(math.Float64bits(f))
	}

	// uaddOvf returns true if unsigned a+b would overflow.
	func uaddOvf(a, b int64) bool {
	return uint64(a)+uint64(b) < uint64(a)
	}

	// de-virtualize an InterCall
	// 'sym' is the symbol for the itab
	func devirt(v Value, sym interface{}, offset int64) obj.LSym {
	f := v.Block.Func
	n, ok := sym.(*obj.LSym)
	if !ok {
	return nil
	}
	lsym := f.fe.DerefItab(n, offset)
	if f.pass.debug > 0 {
	if lsym != nil {
	f.Warnl(v.Pos, "de-virtualizing call")
	} else {
	f.Warnl(v.Pos, "couldn't de-virtualize call")
	}
	}
	return lsym
	}

	// isSamePtr reports whether p1 and p2 point to the same address.
	func isSamePtr(p1, p2 *Value) bool {
	if p1 == p2 {
	return true
	}
	if p1.Op != p2.Op {
	return false
	}
	switch p1.Op {
	case OpOffPtr:
	return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
	case OpAddr:
	// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
	// Checking for value equality only works after [z]cse has run.
	return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
	case OpAddPtr:
	return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
	}
	return false
	}

	// moveSize returns the number of bytes an aligned MOV instruction moves
	func moveSize(align int64, c *Config) int64 {
	switch {
	case align%8 == 0 && c.PtrSize == 8:
	return 8
	case align%4 == 0:
	return 4
	case align%2 == 0:
	return 2
	}
	return 1
	}

	// mergePoint finds a block among a's blocks which dominates b and is itself
	// dominated by all of a's blocks. Returns nil if it can't find one.
	// Might return nil even if one does exist.
	func mergePoint(b Block, a ...Value) *Block {
	// Walk backward from b looking for one of the a's blocks.

	// Max distance
	d := 100

	for d > 0 {
	for _, x := range a {
	if b == x.Block {
	goto found
	}
	}
	if len(b.Preds) > 1 {
	// Don't know which way to go back. Abort.
	return nil
	}
	b = b.Preds[0].b
	d--
	}
	return nil // too far away
	found:
	// At this point, r is the first value in a that we find by walking backwards.
	// if we return anything, r will be it.
	r := b

	// Keep going, counting the other a's that we find. They must all dominate r.
	na := 0
	for d > 0 {
	for _, x := range a {
	if b == x.Block {
	na++
	}
	}
	if na == len(a) {
	// Found all of a in a backwards walk. We can return r.
	return r
	}
	if len(b.Preds) > 1 {
	return nil
	}
	b = b.Preds[0].b
	d--

	}
	return nil // too far away
	}

	// clobber invalidates v. Returns true.
	// clobber is used by rewrite rules to:
	// A) make sure v is really dead and never used again.
	// B) decrement use counts of v's args.
	func clobber(v *Value) bool {
	v.reset(OpInvalid)
	// Note: leave v.Block intact. The Block field is used after clobber.
	return true
	}

	// clobberIfDead resets v when use count is 1. Returns true.
	// clobberIfDead is used by rewrite rules to decrement
	// use counts of v's args when v is dead and never used.
	func clobberIfDead(v *Value) bool {
	if v.Uses == 1 {
	v.reset(OpInvalid)
	}
	// Note: leave v.Block intact. The Block field is used after clobberIfDead.
	return true
	}

	// noteRule is an easy way to track if a rule is matched when writing
	// new ones. Make the rule of interest also conditional on
	// noteRule("note to self: rule of interest matched")
	// and that message will print when the rule matches.
	func noteRule(s string) bool {
	fmt.Println(s)
	return true
	}

	// warnRule generates a compiler debug output with string s when
	// cond is true and the rule is fired.
	func warnRule(cond bool, v *Value, s string) bool {
	if cond {
	v.Block.Func.Warnl(v.Pos, s)
	}
	return true
	}

	// for a pseudo-op like (LessThan x), extract x
	func flagArg(v Value) Value {
	if len(v.Args) != 1 \|\| !v.Args[0].Type.IsFlags() {
	return nil
	}
	return v.Args[0]
	}

	// arm64Negate finds the complement to an ARM64 condition code,
	// for example Equal -> NotEqual or LessThan -> GreaterEqual
	//
	// TODO: add floating-point conditions
	func arm64Negate(op Op) Op {
	switch op {
	case OpARM64LessThan:
	return OpARM64GreaterEqual
	case OpARM64LessThanU:
	return OpARM64GreaterEqualU
	case OpARM64GreaterThan:
	return OpARM64LessEqual
	case OpARM64GreaterThanU:
	return OpARM64LessEqualU
	case OpARM64LessEqual:
	return OpARM64GreaterThan
	case OpARM64LessEqualU:
	return OpARM64GreaterThanU
	case OpARM64GreaterEqual:
	return OpARM64LessThan
	case OpARM64GreaterEqualU:
	return OpARM64LessThanU
	case OpARM64Equal:
	return OpARM64NotEqual
	case OpARM64NotEqual:
	return OpARM64Equal
	default:
	panic("unreachable")
	}
	}

	// arm64Invert evaluates (InvertFlags op), which
	// is the same as altering the condition codes such
	// that the same result would be produced if the arguments
	// to the flag-generating instruction were reversed, e.g.
	// (InvertFlags (CMP x y)) -> (CMP y x)
	//
	// TODO: add floating-point conditions
	func arm64Invert(op Op) Op {
	switch op {
	case OpARM64LessThan:
	return OpARM64GreaterThan
	case OpARM64LessThanU:
	return OpARM64GreaterThanU
	case OpARM64GreaterThan:
	return OpARM64LessThan
	case OpARM64GreaterThanU:
	return OpARM64LessThanU
	case OpARM64LessEqual:
	return OpARM64GreaterEqual
	case OpARM64LessEqualU:
	return OpARM64GreaterEqualU
	case OpARM64GreaterEqual:
	return OpARM64LessEqual
	case OpARM64GreaterEqualU:
	return OpARM64LessEqualU
	case OpARM64Equal, OpARM64NotEqual:
	return op
	default:
	panic("unreachable")
	}
	}

	// evaluate an ARM64 op against a flags value
	// that is potentially constant; return 1 for true,
	// -1 for false, and 0 for not constant.
	func ccARM64Eval(cc interface{}, flags *Value) int {
	op := cc.(Op)
	fop := flags.Op
	switch fop {
	case OpARM64InvertFlags:
	return -ccARM64Eval(op, flags.Args[0])
	case OpARM64FlagEQ:
	switch op {
	case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
	OpARM64GreaterEqualU, OpARM64LessEqualU:
	return 1
	default:
	return -1
	}
	case OpARM64FlagLT_ULT:
	switch op {
	case OpARM64LessThan, OpARM64LessThanU,
	OpARM64LessEqual, OpARM64LessEqualU:
	return 1
	default:
	return -1
	}
	case OpARM64FlagLT_UGT:
	switch op {
	case OpARM64LessThan, OpARM64GreaterThanU,
	OpARM64LessEqual, OpARM64GreaterEqualU:
	return 1
	default:
	return -1
	}
	case OpARM64FlagGT_ULT:
	switch op {
	case OpARM64GreaterThan, OpARM64LessThanU,
	OpARM64GreaterEqual, OpARM64LessEqualU:
	return 1
	default:
	return -1
	}
	case OpARM64FlagGT_UGT:
	switch op {
	case OpARM64GreaterThan, OpARM64GreaterThanU,
	OpARM64GreaterEqual, OpARM64GreaterEqualU:
	return 1
	default:
	return -1
	}
	default:
	return 0
	}
	}

	// logRule logs the use of the rule s. This will only be enabled if
	// rewrite rules were generated with the -log option, see gen/rulegen.go.
	func logRule(s string) {
	if ruleFile == nil {
	// Open a log file to write log to. We open in append
	// mode because all.bash runs the compiler lots of times,
	// and we want the concatenation of all of those logs.
	// This means, of course, that users need to rm the old log
	// to get fresh data.
	// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
	w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
	os.O_CREATE\|os.O_WRONLY\|os.O_APPEND, 0666)
	if err != nil {
	panic(err)
	}
	ruleFile = w
	}
	_, err := fmt.Fprintf(ruleFile, "rewrite %s\n", s)
	if err != nil {
	panic(err)
	}
	}

	var ruleFile io.Writer

	func min(x, y int64) int64 {
	if x < y {
	return x
	}
	return y
	}

	func isConstZero(v *Value) bool {
	switch v.Op {
	case OpConstNil:
	return true
	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
	return v.AuxInt == 0
	}
	return false
	}

	// reciprocalExact64 reports whether 1/c is exactly representable.
	func reciprocalExact64(c float64) bool {
	b := math.Float64bits(c)
	man := b & (1<<52 - 1)
	if man != 0 {
	return false // not a power of 2, denormal, or NaN
	}
	exp := b >> 52 & (1<<11 - 1)
	// exponent bias is 0x3ff. So taking the reciprocal of a number
	// changes the exponent to 0x7fe-exp.
	switch exp {
	case 0:
	return false // ±0
	case 0x7ff:
	return false // ±inf
	case 0x7fe:
	return false // exponent is not representable
	default:
	return true
	}
	}

	// reciprocalExact32 reports whether 1/c is exactly representable.
	func reciprocalExact32(c float32) bool {
	b := math.Float32bits(c)
	man := b & (1<<23 - 1)
	if man != 0 {
	return false // not a power of 2, denormal, or NaN
	}
	exp := b >> 23 & (1<<8 - 1)
	// exponent bias is 0x7f. So taking the reciprocal of a number
	// changes the exponent to 0xfe-exp.
	switch exp {
	case 0:
	return false // ±0
	case 0xff:
	return false // ±inf
	case 0xfe:
	return false // exponent is not representable
	default:
	return true
	}
	}

	// check if an immediate can be directly encoded into an ARM's instruction
	func isARMImmRot(v uint32) bool {
	for i := 0; i < 16; i++ {
	if v&^0xff == 0 {
	return true
	}
	v = v<<2 \| v>>30
	}

	return false
	}

	// overlap reports whether the ranges given by the given offset and
	// size pairs overlap.
	func overlap(offset1, size1, offset2, size2 int64) bool {
	if offset1 >= offset2 && offset2+size2 > offset1 {
	return true
	}
	if offset2 >= offset1 && offset1+size1 > offset2 {
	return true
	}
	return false
	}

	// check if value zeroes out upper 32-bit of 64-bit register.
	// depth limits recursion depth. In AMD64.rules 3 is used as limit,
	// because it catches same amount of cases as 4.
	func zeroUpper32Bits(x *Value, depth int) bool {
	switch x.Op {
	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
	OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
	OpAMD64MOVLloadidx4, OpAMD64ADDLmem, OpAMD64SUBLmem, OpAMD64ANDLmem,
	OpAMD64ORLmem, OpAMD64XORLmem, OpAMD64CVTTSD2SL,
	OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
	OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
	OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
	return true
	case OpArg:
	return x.Type.Width == 4
	case OpSelect0, OpSelect1:
	// Disabled for now. See issue 23305.
	// TODO: we could look into the arg of the Select to decide.
	return false
	case OpPhi:
	// Phis can use each-other as an arguments, instead of tracking visited values,
	// just limit recursion depth.
	if depth <= 0 {
	return false
	}
	for i := range x.Args {
	if !zeroUpper32Bits(x.Args[i], depth-1) {
	return false
	}
	}
	return true

	}
	return false
	}

	// inlineablememmovesize reports whether the given arch performs OpMove of the given size
	// faster than memmove and in a safe way when src and dst overlap.
	// This is used as a check for replacing memmove with OpMove.
	func isInlinableMemmoveSize(sz int64, c *Config) bool {
	switch c.arch {
	case "amd64", "amd64p32":
	return sz <= 16
	case "386", "ppc64", "s390x", "ppc64le":
	return sz <= 8
	case "arm", "mips", "mips64", "mipsle", "mips64le":
	return sz <= 4
	}
	return false
	}