| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package ssa |
| |
| import ( |
| "cmd/compile/internal/types" |
| "cmd/internal/obj" |
| "fmt" |
| "io" |
| "math" |
| "os" |
| "path/filepath" |
| ) |
| |
| func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) { |
| // repeat rewrites until we find no more rewrites |
| for { |
| change := false |
| for _, b := range f.Blocks { |
| if b.Control != nil && b.Control.Op == OpCopy { |
| for b.Control.Op == OpCopy { |
| b.SetControl(b.Control.Args[0]) |
| } |
| } |
| if rb(b) { |
| change = true |
| } |
| for _, v := range b.Values { |
| change = phielimValue(v) || change |
| |
| // Eliminate copy inputs. |
| // If any copy input becomes unused, mark it |
| // as invalid and discard its argument. Repeat |
| // recursively on the discarded argument. |
| // This phase helps remove phantom "dead copy" uses |
| // of a value so that a x.Uses==1 rule condition |
| // fires reliably. |
| for i, a := range v.Args { |
| if a.Op != OpCopy { |
| continue |
| } |
| v.SetArg(i, copySource(a)) |
| change = true |
| for a.Uses == 0 { |
| b := a.Args[0] |
| a.reset(OpInvalid) |
| a = b |
| } |
| } |
| |
| // apply rewrite function |
| if rv(v) { |
| change = true |
| } |
| } |
| } |
| if !change { |
| break |
| } |
| } |
| // remove clobbered values |
| for _, b := range f.Blocks { |
| j := 0 |
| for i, v := range b.Values { |
| if v.Op == OpInvalid { |
| f.freeValue(v) |
| continue |
| } |
| if i != j { |
| b.Values[j] = v |
| } |
| j++ |
| } |
| if j != len(b.Values) { |
| tail := b.Values[j:] |
| for j := range tail { |
| tail[j] = nil |
| } |
| b.Values = b.Values[:j] |
| } |
| } |
| } |
| |
| // Common functions called from rewriting rules |
| |
| func is64BitFloat(t *types.Type) bool { |
| return t.Size() == 8 && t.IsFloat() |
| } |
| |
| func is32BitFloat(t *types.Type) bool { |
| return t.Size() == 4 && t.IsFloat() |
| } |
| |
| func is64BitInt(t *types.Type) bool { |
| return t.Size() == 8 && t.IsInteger() |
| } |
| |
| func is32BitInt(t *types.Type) bool { |
| return t.Size() == 4 && t.IsInteger() |
| } |
| |
| func is16BitInt(t *types.Type) bool { |
| return t.Size() == 2 && t.IsInteger() |
| } |
| |
| func is8BitInt(t *types.Type) bool { |
| return t.Size() == 1 && t.IsInteger() |
| } |
| |
| func isPtr(t *types.Type) bool { |
| return t.IsPtrShaped() |
| } |
| |
| func isSigned(t *types.Type) bool { |
| return t.IsSigned() |
| } |
| |
| func typeSize(t *types.Type) int64 { |
| return t.Size() |
| } |
| |
| // mergeSym merges two symbolic offsets. There is no real merging of |
| // offsets, we just pick the non-nil one. |
| func mergeSym(x, y interface{}) interface{} { |
| if x == nil { |
| return y |
| } |
| if y == nil { |
| return x |
| } |
| panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y)) |
| } |
| func canMergeSym(x, y interface{}) bool { |
| return x == nil || y == nil |
| } |
| |
| // canMergeLoad reports whether the load can be merged into target without |
| // invalidating the schedule. |
| // It also checks that the other non-load argument x is something we |
| // are ok with clobbering (all our current load+op instructions clobber |
| // their input register). |
| func canMergeLoad(target, load, x *Value) bool { |
| if target.Block.ID != load.Block.ID { |
| // If the load is in a different block do not merge it. |
| return false |
| } |
| |
| // We can't merge the load into the target if the load |
| // has more than one use. |
| if load.Uses != 1 { |
| return false |
| } |
| |
| // The register containing x is going to get clobbered. |
| // Don't merge if we still need the value of x. |
| // We don't have liveness information here, but we can |
| // approximate x dying with: |
| // 1) target is x's only use. |
| // 2) target is not in a deeper loop than x. |
| if x.Uses != 1 { |
| return false |
| } |
| loopnest := x.Block.Func.loopnest() |
| loopnest.calculateDepths() |
| if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { |
| return false |
| } |
| |
| mem := load.MemoryArg() |
| |
| // We need the load's memory arg to still be alive at target. That |
| // can't be the case if one of target's args depends on a memory |
| // state that is a successor of load's memory arg. |
| // |
| // For example, it would be invalid to merge load into target in |
| // the following situation because newmem has killed oldmem |
| // before target is reached: |
| // load = read ... oldmem |
| // newmem = write ... oldmem |
| // arg0 = read ... newmem |
| // target = add arg0 load |
| // |
| // If the argument comes from a different block then we can exclude |
| // it immediately because it must dominate load (which is in the |
| // same block as target). |
| var args []*Value |
| for _, a := range target.Args { |
| if a != load && a.Block.ID == target.Block.ID { |
| args = append(args, a) |
| } |
| } |
| |
| // memPreds contains memory states known to be predecessors of load's |
| // memory state. It is lazily initialized. |
| var memPreds map[*Value]bool |
| search: |
| for i := 0; len(args) > 0; i++ { |
| const limit = 100 |
| if i >= limit { |
| // Give up if we have done a lot of iterations. |
| return false |
| } |
| v := args[len(args)-1] |
| args = args[:len(args)-1] |
| if target.Block.ID != v.Block.ID { |
| // Since target and load are in the same block |
| // we can stop searching when we leave the block. |
| continue search |
| } |
| if v.Op == OpPhi { |
| // A Phi implies we have reached the top of the block. |
| // The memory phi, if it exists, is always |
| // the first logical store in the block. |
| continue search |
| } |
| if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { |
| // We could handle this situation however it is likely |
| // to be very rare. |
| return false |
| } |
| if v.Type.IsMemory() { |
| if memPreds == nil { |
| // Initialise a map containing memory states |
| // known to be predecessors of load's memory |
| // state. |
| memPreds = make(map[*Value]bool) |
| m := mem |
| const limit = 50 |
| for i := 0; i < limit; i++ { |
| if m.Op == OpPhi { |
| // The memory phi, if it exists, is always |
| // the first logical store in the block. |
| break |
| } |
| if m.Block.ID != target.Block.ID { |
| break |
| } |
| if !m.Type.IsMemory() { |
| break |
| } |
| memPreds[m] = true |
| if len(m.Args) == 0 { |
| break |
| } |
| m = m.MemoryArg() |
| } |
| } |
| |
| // We can merge if v is a predecessor of mem. |
| // |
| // For example, we can merge load into target in the |
| // following scenario: |
| // x = read ... v |
| // mem = write ... v |
| // load = read ... mem |
| // target = add x load |
| if memPreds[v] { |
| continue search |
| } |
| return false |
| } |
| if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { |
| // If v takes mem as an input then we know mem |
| // is valid at this point. |
| continue search |
| } |
| for _, a := range v.Args { |
| if target.Block.ID == a.Block.ID { |
| args = append(args, a) |
| } |
| } |
| } |
| |
| return true |
| } |
| |
| // isSameSym returns whether sym is the same as the given named symbol |
| func isSameSym(sym interface{}, name string) bool { |
| s, ok := sym.(fmt.Stringer) |
| return ok && s.String() == name |
| } |
| |
| // nlz returns the number of leading zeros. |
| func nlz(x int64) int64 { |
| // log2(0) == 1, so nlz(0) == 64 |
| return 63 - log2(x) |
| } |
| |
| // ntz returns the number of trailing zeros. |
| func ntz(x int64) int64 { |
| return 64 - nlz(^x&(x-1)) |
| } |
| |
| func oneBit(x int64) bool { |
| return nlz(x)+ntz(x) == 63 |
| } |
| |
| // nlo returns the number of leading ones. |
| func nlo(x int64) int64 { |
| return nlz(^x) |
| } |
| |
| // nto returns the number of trailing ones. |
| func nto(x int64) int64 { |
| return ntz(^x) |
| } |
| |
| // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1. |
| // Rounds down. |
| func log2(n int64) (l int64) { |
| l = -1 |
| x := uint64(n) |
| for ; x >= 0x8000; x >>= 16 { |
| l += 16 |
| } |
| if x >= 0x80 { |
| x >>= 8 |
| l += 8 |
| } |
| if x >= 0x8 { |
| x >>= 4 |
| l += 4 |
| } |
| if x >= 0x2 { |
| x >>= 2 |
| l += 2 |
| } |
| if x >= 0x1 { |
| l++ |
| } |
| return |
| } |
| |
| // isPowerOfTwo reports whether n is a power of 2. |
| func isPowerOfTwo(n int64) bool { |
| return n > 0 && n&(n-1) == 0 |
| } |
| |
| // is32Bit reports whether n can be represented as a signed 32 bit integer. |
| func is32Bit(n int64) bool { |
| return n == int64(int32(n)) |
| } |
| |
| // is16Bit reports whether n can be represented as a signed 16 bit integer. |
| func is16Bit(n int64) bool { |
| return n == int64(int16(n)) |
| } |
| |
| // isU12Bit reports whether n can be represented as an unsigned 12 bit integer. |
| func isU12Bit(n int64) bool { |
| return 0 <= n && n < (1<<12) |
| } |
| |
| // isU16Bit reports whether n can be represented as an unsigned 16 bit integer. |
| func isU16Bit(n int64) bool { |
| return n == int64(uint16(n)) |
| } |
| |
| // isU32Bit reports whether n can be represented as an unsigned 32 bit integer. |
| func isU32Bit(n int64) bool { |
| return n == int64(uint32(n)) |
| } |
| |
| // is20Bit reports whether n can be represented as a signed 20 bit integer. |
| func is20Bit(n int64) bool { |
| return -(1<<19) <= n && n < (1<<19) |
| } |
| |
| // b2i translates a boolean value to 0 or 1 for assigning to auxInt. |
| func b2i(b bool) int64 { |
| if b { |
| return 1 |
| } |
| return 0 |
| } |
| |
| // i2f is used in rules for converting from an AuxInt to a float. |
| func i2f(i int64) float64 { |
| return math.Float64frombits(uint64(i)) |
| } |
| |
| // i2f32 is used in rules for converting from an AuxInt to a float32. |
| func i2f32(i int64) float32 { |
| return float32(math.Float64frombits(uint64(i))) |
| } |
| |
| // f2i is used in the rules for storing a float in AuxInt. |
| func f2i(f float64) int64 { |
| return int64(math.Float64bits(f)) |
| } |
| |
| // uaddOvf returns true if unsigned a+b would overflow. |
| func uaddOvf(a, b int64) bool { |
| return uint64(a)+uint64(b) < uint64(a) |
| } |
| |
| // de-virtualize an InterCall |
| // 'sym' is the symbol for the itab |
| func devirt(v *Value, sym interface{}, offset int64) *obj.LSym { |
| f := v.Block.Func |
| n, ok := sym.(*obj.LSym) |
| if !ok { |
| return nil |
| } |
| lsym := f.fe.DerefItab(n, offset) |
| if f.pass.debug > 0 { |
| if lsym != nil { |
| f.Warnl(v.Pos, "de-virtualizing call") |
| } else { |
| f.Warnl(v.Pos, "couldn't de-virtualize call") |
| } |
| } |
| return lsym |
| } |
| |
| // isSamePtr reports whether p1 and p2 point to the same address. |
| func isSamePtr(p1, p2 *Value) bool { |
| if p1 == p2 { |
| return true |
| } |
| if p1.Op != p2.Op { |
| return false |
| } |
| switch p1.Op { |
| case OpOffPtr: |
| return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) |
| case OpAddr: |
| // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op. |
| // Checking for value equality only works after [z]cse has run. |
| return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op |
| case OpAddPtr: |
| return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) |
| } |
| return false |
| } |
| |
| // moveSize returns the number of bytes an aligned MOV instruction moves |
| func moveSize(align int64, c *Config) int64 { |
| switch { |
| case align%8 == 0 && c.PtrSize == 8: |
| return 8 |
| case align%4 == 0: |
| return 4 |
| case align%2 == 0: |
| return 2 |
| } |
| return 1 |
| } |
| |
| // mergePoint finds a block among a's blocks which dominates b and is itself |
| // dominated by all of a's blocks. Returns nil if it can't find one. |
| // Might return nil even if one does exist. |
| func mergePoint(b *Block, a ...*Value) *Block { |
| // Walk backward from b looking for one of the a's blocks. |
| |
| // Max distance |
| d := 100 |
| |
| for d > 0 { |
| for _, x := range a { |
| if b == x.Block { |
| goto found |
| } |
| } |
| if len(b.Preds) > 1 { |
| // Don't know which way to go back. Abort. |
| return nil |
| } |
| b = b.Preds[0].b |
| d-- |
| } |
| return nil // too far away |
| found: |
| // At this point, r is the first value in a that we find by walking backwards. |
| // if we return anything, r will be it. |
| r := b |
| |
| // Keep going, counting the other a's that we find. They must all dominate r. |
| na := 0 |
| for d > 0 { |
| for _, x := range a { |
| if b == x.Block { |
| na++ |
| } |
| } |
| if na == len(a) { |
| // Found all of a in a backwards walk. We can return r. |
| return r |
| } |
| if len(b.Preds) > 1 { |
| return nil |
| } |
| b = b.Preds[0].b |
| d-- |
| |
| } |
| return nil // too far away |
| } |
| |
| // clobber invalidates v. Returns true. |
| // clobber is used by rewrite rules to: |
| // A) make sure v is really dead and never used again. |
| // B) decrement use counts of v's args. |
| func clobber(v *Value) bool { |
| v.reset(OpInvalid) |
| // Note: leave v.Block intact. The Block field is used after clobber. |
| return true |
| } |
| |
| // noteRule is an easy way to track if a rule is matched when writing |
| // new ones. Make the rule of interest also conditional on |
| // noteRule("note to self: rule of interest matched") |
| // and that message will print when the rule matches. |
| func noteRule(s string) bool { |
| fmt.Println(s) |
| return true |
| } |
| |
| // warnRule generates a compiler debug output with string s when |
| // cond is true and the rule is fired. |
| func warnRule(cond bool, v *Value, s string) bool { |
| if cond { |
| v.Block.Func.Warnl(v.Pos, s) |
| } |
| return true |
| } |
| |
| // logRule logs the use of the rule s. This will only be enabled if |
| // rewrite rules were generated with the -log option, see gen/rulegen.go. |
| func logRule(s string) { |
| if ruleFile == nil { |
| // Open a log file to write log to. We open in append |
| // mode because all.bash runs the compiler lots of times, |
| // and we want the concatenation of all of those logs. |
| // This means, of course, that users need to rm the old log |
| // to get fresh data. |
| // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? |
| w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), |
| os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) |
| if err != nil { |
| panic(err) |
| } |
| ruleFile = w |
| } |
| _, err := fmt.Fprintf(ruleFile, "rewrite %s\n", s) |
| if err != nil { |
| panic(err) |
| } |
| } |
| |
| var ruleFile io.Writer |
| |
| func min(x, y int64) int64 { |
| if x < y { |
| return x |
| } |
| return y |
| } |
| |
| func isConstZero(v *Value) bool { |
| switch v.Op { |
| case OpConstNil: |
| return true |
| case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: |
| return v.AuxInt == 0 |
| } |
| return false |
| } |
| |
| // reciprocalExact64 reports whether 1/c is exactly representable. |
| func reciprocalExact64(c float64) bool { |
| b := math.Float64bits(c) |
| man := b & (1<<52 - 1) |
| if man != 0 { |
| return false // not a power of 2, denormal, or NaN |
| } |
| exp := b >> 52 & (1<<11 - 1) |
| // exponent bias is 0x3ff. So taking the reciprocal of a number |
| // changes the exponent to 0x7fe-exp. |
| switch exp { |
| case 0: |
| return false // ±0 |
| case 0x7ff: |
| return false // ±inf |
| case 0x7fe: |
| return false // exponent is not representable |
| default: |
| return true |
| } |
| } |
| |
| // reciprocalExact32 reports whether 1/c is exactly representable. |
| func reciprocalExact32(c float32) bool { |
| b := math.Float32bits(c) |
| man := b & (1<<23 - 1) |
| if man != 0 { |
| return false // not a power of 2, denormal, or NaN |
| } |
| exp := b >> 23 & (1<<8 - 1) |
| // exponent bias is 0x7f. So taking the reciprocal of a number |
| // changes the exponent to 0xfe-exp. |
| switch exp { |
| case 0: |
| return false // ±0 |
| case 0xff: |
| return false // ±inf |
| case 0xfe: |
| return false // exponent is not representable |
| default: |
| return true |
| } |
| } |
| |
| // check if an immediate can be directly encoded into an ARM's instruction |
| func isARMImmRot(v uint32) bool { |
| for i := 0; i < 16; i++ { |
| if v&^0xff == 0 { |
| return true |
| } |
| v = v<<2 | v>>30 |
| } |
| |
| return false |
| } |
| |
| // overlap reports whether the ranges given by the given offset and |
| // size pairs overlap. |
| func overlap(offset1, size1, offset2, size2 int64) bool { |
| if offset1 >= offset2 && offset2+size2 > offset1 { |
| return true |
| } |
| if offset2 >= offset1 && offset1+size1 > offset2 { |
| return true |
| } |
| return false |
| } |
| |
| // check if value zeroes out upper 32-bit of 64-bit register. |
| // depth limits recursion depth. In AMD64.rules 3 is used as limit, |
| // because it catches same amount of cases as 4. |
| func zeroUpper32Bits(x *Value, depth int) bool { |
| switch x.Op { |
| case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, |
| OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, |
| OpAMD64MOVLloadidx4, OpAMD64ADDLmem, OpAMD64SUBLmem, OpAMD64ANDLmem, |
| OpAMD64ORLmem, OpAMD64XORLmem, OpAMD64CVTTSD2SL, |
| OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, |
| OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, |
| OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL: |
| return true |
| case OpArg, OpSelect0, OpSelect1: |
| return x.Type.Width == 4 |
| case OpPhi: |
| // Phis can use each-other as an arguments, instead of tracking visited values, |
| // just limit recursion depth. |
| if depth <= 0 { |
| return false |
| } |
| for i := range x.Args { |
| if !zeroUpper32Bits(x.Args[i], depth-1) { |
| return false |
| } |
| } |
| return true |
| |
| } |
| return false |
| } |
| |
| // inlineablememmovesize reports whether the given arch performs OpMove of the given size |
| // faster than memmove and in a safe way when src and dst overlap. |
| // This is used as a check for replacing memmove with OpMove. |
| func isInlinableMemmoveSize(sz int64, c *Config) bool { |
| switch c.arch { |
| case "amd64", "amd64p32": |
| return sz <= 16 |
| case "386", "ppc64", "s390x", "ppc64le": |
| return sz <= 8 |
| case "arm", "mips", "mips64", "mipsle", "mips64le": |
| return sz <= 4 |
| } |
| return false |
| } |