| // Derived from Inferno utils/6c/reg.c |
| // http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c |
| // |
| // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. |
| // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) |
| // Portions Copyright © 1997-1999 Vita Nuova Limited |
| // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) |
| // Portions Copyright © 2004,2006 Bruce Ellis |
| // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) |
| // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others |
| // Portions Copyright © 2009 The Go Authors. All rights reserved. |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a copy |
| // of this software and associated documentation files (the "Software"), to deal |
| // in the Software without restriction, including without limitation the rights |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| // copies of the Software, and to permit persons to whom the Software is |
| // furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| // THE SOFTWARE. |
| |
| // "Portable" optimizations. |
| // Compiled separately for 5g, 6g, and 8g, so allowed to use gg.h, opt.h. |
| // Must code to the intersection of the three back ends. |
| |
| #include <u.h> |
| #include <libc.h> |
| #include "gg.h" |
| #include "opt.h" |
| |
| // p is a call instruction. Does the call fail to return? |
| int |
| noreturn(Prog *p) |
| { |
| Sym *s; |
| int i; |
| static Sym* symlist[10]; |
| |
| if(symlist[0] == S) { |
| symlist[0] = pkglookup("panicindex", runtimepkg); |
| symlist[1] = pkglookup("panicslice", runtimepkg); |
| symlist[2] = pkglookup("throwinit", runtimepkg); |
| symlist[3] = pkglookup("panic", runtimepkg); |
| symlist[4] = pkglookup("panicwrap", runtimepkg); |
| } |
| |
| s = p->to.sym; |
| if(s == S) |
| return 0; |
| for(i=0; symlist[i]!=S; i++) |
| if(s == symlist[i]) |
| return 1; |
| return 0; |
| } |
| |
| // JMP chasing and removal. |
| // |
| // The code generator depends on being able to write out jump |
| // instructions that it can jump to now but fill in later. |
| // the linker will resolve them nicely, but they make the code |
| // longer and more difficult to follow during debugging. |
| // Remove them. |
| |
| /* what instruction does a JMP to p eventually land on? */ |
| static Prog* |
| chasejmp(Prog *p, int *jmploop) |
| { |
| int n; |
| |
| n = 0; |
| while(p != P && p->as == AJMP && p->to.type == D_BRANCH) { |
| if(++n > 10) { |
| *jmploop = 1; |
| break; |
| } |
| p = p->to.u.branch; |
| } |
| return p; |
| } |
| |
| /* |
| * reuse reg pointer for mark/sweep state. |
| * leave reg==nil at end because alive==nil. |
| */ |
| #define alive ((void*)0) |
| #define dead ((void*)1) |
| |
| /* mark all code reachable from firstp as alive */ |
| static void |
| mark(Prog *firstp) |
| { |
| Prog *p; |
| |
| for(p=firstp; p; p=p->link) { |
| if(p->opt != dead) |
| break; |
| p->opt = alive; |
| if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch) |
| mark(p->to.u.branch); |
| if(p->as == AJMP || p->as == ARET || p->as == AUNDEF) |
| break; |
| } |
| } |
| |
| void |
| fixjmp(Prog *firstp) |
| { |
| int jmploop; |
| Prog *p, *last; |
| |
| if(debug['R'] && debug['v']) |
| print("\nfixjmp\n"); |
| |
| // pass 1: resolve jump to jump, mark all code as dead. |
| jmploop = 0; |
| for(p=firstp; p; p=p->link) { |
| if(debug['R'] && debug['v']) |
| print("%P\n", p); |
| if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch && p->to.u.branch->as == AJMP) { |
| p->to.u.branch = chasejmp(p->to.u.branch, &jmploop); |
| if(debug['R'] && debug['v']) |
| print("->%P\n", p); |
| } |
| p->opt = dead; |
| } |
| if(debug['R'] && debug['v']) |
| print("\n"); |
| |
| // pass 2: mark all reachable code alive |
| mark(firstp); |
| |
| // pass 3: delete dead code (mostly JMPs). |
| last = nil; |
| for(p=firstp; p; p=p->link) { |
| if(p->opt == dead) { |
| if(p->link == P && p->as == ARET && last && last->as != ARET) { |
| // This is the final ARET, and the code so far doesn't have one. |
| // Let it stay. |
| } else { |
| if(debug['R'] && debug['v']) |
| print("del %P\n", p); |
| continue; |
| } |
| } |
| if(last) |
| last->link = p; |
| last = p; |
| } |
| last->link = P; |
| |
| // pass 4: elide JMP to next instruction. |
| // only safe if there are no jumps to JMPs anymore. |
| if(!jmploop) { |
| last = nil; |
| for(p=firstp; p; p=p->link) { |
| if(p->as == AJMP && p->to.type == D_BRANCH && p->to.u.branch == p->link) { |
| if(debug['R'] && debug['v']) |
| print("del %P\n", p); |
| continue; |
| } |
| if(last) |
| last->link = p; |
| last = p; |
| } |
| last->link = P; |
| } |
| |
| if(debug['R'] && debug['v']) { |
| print("\n"); |
| for(p=firstp; p; p=p->link) |
| print("%P\n", p); |
| print("\n"); |
| } |
| } |
| |
| #undef alive |
| #undef dead |
| |
| // Control flow analysis. The Flow structures hold predecessor and successor |
| // information as well as basic loop analysis. |
| // |
| // graph = flowstart(firstp, sizeof(Flow)); |
| // ... use flow graph ... |
| // flowend(graph); // free graph |
| // |
| // Typical uses of the flow graph are to iterate over all the flow-relevant instructions: |
| // |
| // for(f = graph->start; f != nil; f = f->link) |
| // |
| // or, given an instruction f, to iterate over all the predecessors, which is |
| // f->p1 and this list: |
| // |
| // for(f2 = f->p2; f2 != nil; f2 = f2->p2link) |
| // |
| // Often the Flow struct is embedded as the first field inside a larger struct S. |
| // In that case casts are needed to convert Flow* to S* in many places but the |
| // idea is the same. Pass sizeof(S) instead of sizeof(Flow) to flowstart. |
| |
| Graph* |
| flowstart(Prog *firstp, int size) |
| { |
| int nf; |
| Flow *f, *f1, *start, *last; |
| Graph *graph; |
| Prog *p; |
| ProgInfo info; |
| |
| // Count and mark instructions to annotate. |
| nf = 0; |
| for(p = firstp; p != P; p = p->link) { |
| p->opt = nil; // should be already, but just in case |
| proginfo(&info, p); |
| if(info.flags & Skip) |
| continue; |
| p->opt = (void*)1; |
| nf++; |
| } |
| |
| if(nf == 0) |
| return nil; |
| |
| if(nf >= 20000) { |
| // fatal("%S is too big (%d instructions)", curfn->nname->sym, nf); |
| return nil; |
| } |
| |
| // Allocate annotations and assign to instructions. |
| graph = calloc(sizeof *graph + size*nf, 1); |
| if(graph == nil) |
| fatal("out of memory"); |
| start = (Flow*)(graph+1); |
| last = nil; |
| f = start; |
| for(p = firstp; p != P; p = p->link) { |
| if(p->opt == nil) |
| continue; |
| p->opt = f; |
| f->prog = p; |
| if(last) |
| last->link = f; |
| last = f; |
| |
| f = (Flow*)((uchar*)f + size); |
| } |
| |
| // Fill in pred/succ information. |
| for(f = start; f != nil; f = f->link) { |
| p = f->prog; |
| proginfo(&info, p); |
| if(!(info.flags & Break)) { |
| f1 = f->link; |
| f->s1 = f1; |
| f1->p1 = f; |
| } |
| if(p->to.type == D_BRANCH) { |
| if(p->to.u.branch == P) |
| fatal("pnil %P", p); |
| f1 = p->to.u.branch->opt; |
| if(f1 == nil) |
| fatal("fnil %P / %P", p, p->to.u.branch); |
| if(f1 == f) { |
| //fatal("self loop %P", p); |
| continue; |
| } |
| f->s2 = f1; |
| f->p2link = f1->p2; |
| f1->p2 = f; |
| } |
| } |
| |
| graph->start = start; |
| graph->num = nf; |
| return graph; |
| } |
| |
| void |
| flowend(Graph *graph) |
| { |
| Flow *f; |
| |
| for(f = graph->start; f != nil; f = f->link) |
| f->prog->opt = nil; |
| free(graph); |
| } |
| |
| /* |
| * find looping structure |
| * |
| * 1) find reverse postordering |
| * 2) find approximate dominators, |
| * the actual dominators if the flow graph is reducible |
| * otherwise, dominators plus some other non-dominators. |
| * See Matthew S. Hecht and Jeffrey D. Ullman, |
| * "Analysis of a Simple Algorithm for Global Data Flow Problems", |
| * Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts, |
| * Oct. 1-3, 1973, pp. 207-217. |
| * 3) find all nodes with a predecessor dominated by the current node. |
| * such a node is a loop head. |
| * recursively, all preds with a greater rpo number are in the loop |
| */ |
| static int32 |
| postorder(Flow *r, Flow **rpo2r, int32 n) |
| { |
| Flow *r1; |
| |
| r->rpo = 1; |
| r1 = r->s1; |
| if(r1 && !r1->rpo) |
| n = postorder(r1, rpo2r, n); |
| r1 = r->s2; |
| if(r1 && !r1->rpo) |
| n = postorder(r1, rpo2r, n); |
| rpo2r[n] = r; |
| n++; |
| return n; |
| } |
| |
| static int32 |
| rpolca(int32 *idom, int32 rpo1, int32 rpo2) |
| { |
| int32 t; |
| |
| if(rpo1 == -1) |
| return rpo2; |
| while(rpo1 != rpo2){ |
| if(rpo1 > rpo2){ |
| t = rpo2; |
| rpo2 = rpo1; |
| rpo1 = t; |
| } |
| while(rpo1 < rpo2){ |
| t = idom[rpo2]; |
| if(t >= rpo2) |
| fatal("bad idom"); |
| rpo2 = t; |
| } |
| } |
| return rpo1; |
| } |
| |
| static int |
| doms(int32 *idom, int32 r, int32 s) |
| { |
| while(s > r) |
| s = idom[s]; |
| return s == r; |
| } |
| |
| static int |
| loophead(int32 *idom, Flow *r) |
| { |
| int32 src; |
| |
| src = r->rpo; |
| if(r->p1 != nil && doms(idom, src, r->p1->rpo)) |
| return 1; |
| for(r = r->p2; r != nil; r = r->p2link) |
| if(doms(idom, src, r->rpo)) |
| return 1; |
| return 0; |
| } |
| |
| static void |
| loopmark(Flow **rpo2r, int32 head, Flow *r) |
| { |
| if(r->rpo < head || r->active == head) |
| return; |
| r->active = head; |
| r->loop += LOOP; |
| if(r->p1 != nil) |
| loopmark(rpo2r, head, r->p1); |
| for(r = r->p2; r != nil; r = r->p2link) |
| loopmark(rpo2r, head, r); |
| } |
| |
| void |
| flowrpo(Graph *g) |
| { |
| Flow *r1; |
| int32 i, d, me, nr, *idom; |
| Flow **rpo2r; |
| |
| free(g->rpo); |
| g->rpo = calloc(g->num*sizeof g->rpo[0], 1); |
| idom = calloc(g->num*sizeof idom[0], 1); |
| if(g->rpo == nil || idom == nil) |
| fatal("out of memory"); |
| |
| for(r1 = g->start; r1 != nil; r1 = r1->link) |
| r1->active = 0; |
| |
| rpo2r = g->rpo; |
| d = postorder(g->start, rpo2r, 0); |
| nr = g->num; |
| if(d > nr) |
| fatal("too many reg nodes %d %d", d, nr); |
| nr = d; |
| for(i = 0; i < nr / 2; i++) { |
| r1 = rpo2r[i]; |
| rpo2r[i] = rpo2r[nr - 1 - i]; |
| rpo2r[nr - 1 - i] = r1; |
| } |
| for(i = 0; i < nr; i++) |
| rpo2r[i]->rpo = i; |
| |
| idom[0] = 0; |
| for(i = 0; i < nr; i++) { |
| r1 = rpo2r[i]; |
| me = r1->rpo; |
| d = -1; |
| // rpo2r[r->rpo] == r protects against considering dead code, |
| // which has r->rpo == 0. |
| if(r1->p1 != nil && rpo2r[r1->p1->rpo] == r1->p1 && r1->p1->rpo < me) |
| d = r1->p1->rpo; |
| for(r1 = r1->p2; r1 != nil; r1 = r1->p2link) |
| if(rpo2r[r1->rpo] == r1 && r1->rpo < me) |
| d = rpolca(idom, d, r1->rpo); |
| idom[i] = d; |
| } |
| |
| for(i = 0; i < nr; i++) { |
| r1 = rpo2r[i]; |
| r1->loop++; |
| if(r1->p2 != nil && loophead(idom, r1)) |
| loopmark(rpo2r, i, r1); |
| } |
| free(idom); |
| |
| for(r1 = g->start; r1 != nil; r1 = r1->link) |
| r1->active = 0; |
| } |
| |
| Flow* |
| uniqp(Flow *r) |
| { |
| Flow *r1; |
| |
| r1 = r->p1; |
| if(r1 == nil) { |
| r1 = r->p2; |
| if(r1 == nil || r1->p2link != nil) |
| return nil; |
| } else |
| if(r->p2 != nil) |
| return nil; |
| return r1; |
| } |
| |
| Flow* |
| uniqs(Flow *r) |
| { |
| Flow *r1; |
| |
| r1 = r->s1; |
| if(r1 == nil) { |
| r1 = r->s2; |
| if(r1 == nil) |
| return nil; |
| } else |
| if(r->s2 != nil) |
| return nil; |
| return r1; |
| } |
| |
| // The compilers assume they can generate temporary variables |
| // as needed to preserve the right semantics or simplify code |
| // generation and the back end will still generate good code. |
| // This results in a large number of ephemeral temporary variables. |
| // Merge temps with non-overlapping lifetimes and equal types using the |
| // greedy algorithm in Poletto and Sarkar, "Linear Scan Register Allocation", |
| // ACM TOPLAS 1999. |
| |
| typedef struct TempVar TempVar; |
| typedef struct TempFlow TempFlow; |
| |
| struct TempVar |
| { |
| Node *node; |
| TempFlow *def; // definition of temp var |
| TempFlow *use; // use list, chained through TempFlow.uselink |
| TempVar *freelink; // next free temp in Type.opt list |
| TempVar *merge; // merge var with this one |
| uint32 start; // smallest Prog.loc in live range |
| uint32 end; // largest Prog.loc in live range |
| uchar addr; // address taken - no accurate end |
| uchar removed; // removed from program |
| }; |
| |
| struct TempFlow |
| { |
| Flow f; |
| TempFlow *uselink; |
| }; |
| |
| static int |
| startcmp(const void *va, const void *vb) |
| { |
| TempVar *a, *b; |
| |
| a = *(TempVar**)va; |
| b = *(TempVar**)vb; |
| |
| if(a->start < b->start) |
| return -1; |
| if(a->start > b->start) |
| return +1; |
| return 0; |
| } |
| |
| // Is n available for merging? |
| static int |
| canmerge(Node *n) |
| { |
| return n->class == PAUTO && !n->addrtaken && strncmp(n->sym->name, "autotmp", 7) == 0; |
| } |
| |
| static void mergewalk(TempVar*, TempFlow*, uint32); |
| |
| void |
| mergetemp(Prog *firstp) |
| { |
| int i, j, nvar, ninuse, nfree, nkill; |
| TempVar *var, *v, *v1, **bystart, **inuse; |
| TempFlow *r; |
| NodeList *l, **lp; |
| Node *n; |
| Prog *p, *p1; |
| Type *t; |
| ProgInfo info, info1; |
| int32 gen; |
| Graph *g; |
| |
| enum { Debug = 0 }; |
| |
| g = flowstart(firstp, sizeof(TempFlow)); |
| if(g == nil) |
| return; |
| |
| // Build list of all mergeable variables. |
| nvar = 0; |
| for(l = curfn->dcl; l != nil; l = l->next) |
| if(canmerge(l->n)) |
| nvar++; |
| |
| var = calloc(nvar*sizeof var[0], 1); |
| nvar = 0; |
| for(l = curfn->dcl; l != nil; l = l->next) { |
| n = l->n; |
| if(canmerge(n)) { |
| v = &var[nvar++]; |
| n->opt = v; |
| v->node = n; |
| } |
| } |
| |
| // Build list of uses. |
| // We assume that the earliest reference to a temporary is its definition. |
| // This is not true of variables in general but our temporaries are all |
| // single-use (that's why we have so many!). |
| for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { |
| p = r->f.prog; |
| proginfo(&info, p); |
| |
| if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt) |
| fatal("double node %P", p); |
| if((n = p->from.node) != N && (v = n->opt) != nil || |
| (n = p->to.node) != N && (v = n->opt) != nil) { |
| if(v->def == nil) |
| v->def = r; |
| r->uselink = v->use; |
| v->use = r; |
| if(n == p->from.node && (info.flags & LeftAddr)) |
| v->addr = 1; |
| } |
| } |
| |
| if(Debug > 1) |
| dumpit("before", g->start, 0); |
| |
| nkill = 0; |
| |
| // Special case. |
| for(v = var; v < var+nvar; v++) { |
| if(v->addr) |
| continue; |
| // Used in only one instruction, which had better be a write. |
| if((r = v->use) != nil && r->uselink == nil) { |
| p = r->f.prog; |
| proginfo(&info, p); |
| if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { |
| p->as = ANOP; |
| p->to = zprog.to; |
| v->removed = 1; |
| if(Debug) |
| print("drop write-only %S\n", v->node->sym); |
| } else |
| fatal("temp used and not set: %P", p); |
| nkill++; |
| continue; |
| } |
| |
| // Written in one instruction, read in the next, otherwise unused, |
| // no jumps to the next instruction. Happens mainly in 386 compiler. |
| if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { |
| p = r->f.prog; |
| proginfo(&info, p); |
| p1 = r->f.link->prog; |
| proginfo(&info1, p1); |
| enum { |
| SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, |
| }; |
| if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && |
| !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && |
| (info.flags & SizeAny) == (info1.flags & SizeAny)) { |
| p1->from = p->from; |
| excise(&r->f); |
| v->removed = 1; |
| if(Debug) |
| print("drop immediate-use %S\n", v->node->sym); |
| } |
| nkill++; |
| continue; |
| } |
| } |
| |
| // Traverse live range of each variable to set start, end. |
| // Each flood uses a new value of gen so that we don't have |
| // to clear all the r->f.active words after each variable. |
| gen = 0; |
| for(v = var; v < var+nvar; v++) { |
| gen++; |
| for(r = v->use; r != nil; r = r->uselink) |
| mergewalk(v, r, gen); |
| } |
| |
| // Sort variables by start. |
| bystart = malloc(nvar*sizeof bystart[0]); |
| for(i=0; i<nvar; i++) |
| bystart[i] = &var[i]; |
| qsort(bystart, nvar, sizeof bystart[0], startcmp); |
| |
| // List of in-use variables, sorted by end, so that the ones that |
| // will last the longest are the earliest ones in the array. |
| // The tail inuse[nfree:] holds no-longer-used variables. |
| // In theory we should use a sorted tree so that insertions are |
| // guaranteed O(log n) and then the loop is guaranteed O(n log n). |
| // In practice, it doesn't really matter. |
| inuse = malloc(nvar*sizeof inuse[0]); |
| ninuse = 0; |
| nfree = nvar; |
| for(i=0; i<nvar; i++) { |
| v = bystart[i]; |
| if(v->addr || v->removed) |
| continue; |
| |
| // Expire no longer in use. |
| while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { |
| v1 = inuse[--ninuse]; |
| inuse[--nfree] = v1; |
| } |
| |
| // Find old temp to reuse if possible. |
| t = v->node->type; |
| for(j=nfree; j<nvar; j++) { |
| v1 = inuse[j]; |
| if(eqtype(t, v1->node->type)) { |
| inuse[j] = inuse[nfree++]; |
| if(v1->merge) |
| v->merge = v1->merge; |
| else |
| v->merge = v1; |
| nkill++; |
| break; |
| } |
| } |
| |
| // Sort v into inuse. |
| j = ninuse++; |
| while(j > 0 && inuse[j-1]->end < v->end) { |
| inuse[j] = inuse[j-1]; |
| j--; |
| } |
| inuse[j] = v; |
| } |
| |
| if(Debug) { |
| print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); |
| for(v=var; v<var+nvar; v++) { |
| print("var %#N %T %d-%d", v->node, v->node->type, v->start, v->end); |
| if(v->addr) |
| print(" addr=1"); |
| if(v->removed) |
| print(" dead=1"); |
| if(v->merge) |
| print(" merge %#N", v->merge->node); |
| if(v->start == v->end) |
| print(" %P", v->def->f.prog); |
| print("\n"); |
| } |
| |
| if(Debug > 1) |
| dumpit("after", g->start, 0); |
| } |
| |
| // Update node references to use merged temporaries. |
| for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { |
| p = r->f.prog; |
| if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) |
| p->from.node = v->merge->node; |
| if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) |
| p->to.node = v->merge->node; |
| } |
| |
| // Delete merged nodes from declaration list. |
| for(lp = &curfn->dcl; (l = *lp); ) { |
| curfn->dcl->end = l; |
| n = l->n; |
| v = n->opt; |
| if(v && (v->merge || v->removed)) { |
| *lp = l->next; |
| continue; |
| } |
| lp = &l->next; |
| } |
| |
| // Clear aux structures. |
| for(v=var; v<var+nvar; v++) |
| v->node->opt = nil; |
| free(var); |
| free(bystart); |
| free(inuse); |
| flowend(g); |
| } |
| |
| static void |
| mergewalk(TempVar *v, TempFlow *r0, uint32 gen) |
| { |
| Prog *p; |
| TempFlow *r1, *r, *r2; |
| |
| for(r1 = r0; r1 != nil; r1 = (TempFlow*)r1->f.p1) { |
| if(r1->f.active == gen) |
| break; |
| r1->f.active = gen; |
| p = r1->f.prog; |
| if(v->end < p->loc) |
| v->end = p->loc; |
| if(r1 == v->def) { |
| v->start = p->loc; |
| break; |
| } |
| } |
| |
| for(r = r0; r != r1; r = (TempFlow*)r->f.p1) |
| for(r2 = (TempFlow*)r->f.p2; r2 != nil; r2 = (TempFlow*)r2->f.p2link) |
| mergewalk(v, r2, gen); |
| } |
| |
| // Eliminate redundant nil pointer checks. |
| // |
| // The code generation pass emits a CHECKNIL for every possibly nil pointer. |
| // This pass removes a CHECKNIL if every predecessor path has already |
| // checked this value for nil. |
| // |
| // Simple backwards flood from check to definition. |
| // Run prog loop backward from end of program to beginning to avoid quadratic |
| // behavior removing a run of checks. |
| // |
| // Assume that stack variables with address not taken can be loaded multiple times |
| // from memory without being rechecked. Other variables need to be checked on |
| // each load. |
| |
| typedef struct NilVar NilVar; |
| typedef struct NilFlow NilFlow; |
| |
| struct NilFlow { |
| Flow f; |
| int kill; |
| }; |
| |
| static void nilwalkback(NilFlow *rcheck); |
| static void nilwalkfwd(NilFlow *rcheck); |
| |
| void |
| nilopt(Prog *firstp) |
| { |
| NilFlow *r; |
| Prog *p; |
| Graph *g; |
| int ncheck, nkill; |
| |
| g = flowstart(firstp, sizeof(NilFlow)); |
| if(g == nil) |
| return; |
| |
| if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */) |
| dumpit("nilopt", g->start, 0); |
| |
| ncheck = 0; |
| nkill = 0; |
| for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { |
| p = r->f.prog; |
| if(p->as != ACHECKNIL || !regtyp(&p->from)) |
| continue; |
| ncheck++; |
| if(stackaddr(&p->from)) { |
| if(debug_checknil && p->lineno > 1) |
| warnl(p->lineno, "removed nil check of SP address"); |
| r->kill = 1; |
| continue; |
| } |
| nilwalkfwd(r); |
| if(r->kill) { |
| if(debug_checknil && p->lineno > 1) |
| warnl(p->lineno, "removed nil check before indirect"); |
| continue; |
| } |
| nilwalkback(r); |
| if(r->kill) { |
| if(debug_checknil && p->lineno > 1) |
| warnl(p->lineno, "removed repeated nil check"); |
| continue; |
| } |
| } |
| |
| for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { |
| if(r->kill) { |
| nkill++; |
| excise(&r->f); |
| } |
| } |
| |
| flowend(g); |
| |
| if(debug_checknil > 1) |
| print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck); |
| } |
| |
| static void |
| nilwalkback(NilFlow *rcheck) |
| { |
| Prog *p; |
| ProgInfo info; |
| NilFlow *r; |
| |
| for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) { |
| p = r->f.prog; |
| proginfo(&info, p); |
| if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { |
| // Found initialization of value we're checking for nil. |
| // without first finding the check, so this one is unchecked. |
| return; |
| } |
| if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) { |
| rcheck->kill = 1; |
| return; |
| } |
| } |
| |
| // Here is a more complex version that scans backward across branches. |
| // It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason |
| // to keep the check (setting rcheck->kill = 0). |
| // It doesn't handle copying of aggregates as well as I would like, |
| // nor variables with their address taken, |
| // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. |
| /* |
| for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) { |
| if(r1->f.active == gen) |
| break; |
| r1->f.active = gen; |
| p = r1->f.prog; |
| |
| // If same check, stop this loop but still check |
| // alternate predecessors up to this point. |
| if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) |
| break; |
| |
| proginfo(&info, p); |
| if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { |
| // Found initialization of value we're checking for nil. |
| // without first finding the check, so this one is unchecked. |
| rcheck->kill = 0; |
| return; |
| } |
| |
| if(r1->f.p1 == nil && r1->f.p2 == nil) { |
| print("lost pred for %P\n", rcheck->f.prog); |
| for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) { |
| proginfo(&info, r1->f.prog); |
| print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from); |
| } |
| fatal("lost pred trail"); |
| } |
| } |
| |
| for(r = r0; r != r1; r = (NilFlow*)r->f.p1) |
| for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link) |
| nilwalkback(rcheck, r2, gen); |
| */ |
| } |
| |
| static void |
| nilwalkfwd(NilFlow *rcheck) |
| { |
| NilFlow *r; |
| Prog *p; |
| ProgInfo info; |
| |
| // If the path down from rcheck dereferences the address |
| // (possibly with a small offset) before writing to memory |
| // and before any subsequent checks, it's okay to wait for |
| // that implicit check. Only consider this basic block to |
| // avoid problems like: |
| // _ = *x // should panic |
| // for {} // no writes but infinite loop may be considered visible |
| for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { |
| p = r->f.prog; |
| proginfo(&info, p); |
| |
| if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) { |
| rcheck->kill = 1; |
| return; |
| } |
| if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) { |
| rcheck->kill = 1; |
| return; |
| } |
| |
| // Stop if another nil check happens. |
| if(p->as == ACHECKNIL) |
| return; |
| // Stop if value is lost. |
| if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) |
| return; |
| // Stop if memory write. |
| if((info.flags & RightWrite) && !regtyp(&p->to)) |
| return; |
| } |
| } |