blob: a80d72b9b82796513d6a601ffc286e3ff1892a0d [file] [log] [blame]
// Derived from Inferno utils/6c/reg.c
// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package gc
import (
"bytes"
"cmd/internal/obj"
"cmd/internal/sys"
"fmt"
"sort"
"strings"
)
// A Var represents a single variable that may be stored in a register.
// That variable may itself correspond to a hardware register,
// to represent the use of registers in the unoptimized instruction stream.
type Var struct {
offset int64
node *Node
nextinnode *Var
width int
id int // index in vars
name int8
etype EType
addr int8
}
// Bits represents a set of Vars, stored as a bit set of var numbers
// (the index in vars, or equivalently v.id).
type Bits struct {
b [BITS]uint64
}
const (
BITS = 3
NVAR = BITS * 64
)
var (
vars [NVAR]Var // variables under consideration
nvar int // number of vars
regbits uint64 // bits for hardware registers
zbits Bits // zero
externs Bits // global variables
params Bits // function parameters and results
ivar Bits // function parameters (inputs)
ovar Bits // function results (outputs)
consts Bits // constant values
addrs Bits // variables with address taken
)
// A Reg is a wrapper around a single Prog (one instruction) that holds
// register optimization information while the optimizer runs.
// r->prog is the instruction.
type Reg struct {
set Bits // regopt variables written by this instruction.
use1 Bits // regopt variables read by prog->from.
use2 Bits // regopt variables read by prog->to.
// refahead/refbehind are the regopt variables whose current
// value may be used in the following/preceding instructions
// up to a CALL (or the value is clobbered).
refbehind Bits
refahead Bits
// calahead/calbehind are similar, but for variables in
// instructions that are reachable after hitting at least one
// CALL.
calbehind Bits
calahead Bits
regdiff Bits
act Bits
regu uint64 // register used bitmap
}
// A Rgn represents a single regopt variable over a region of code
// where a register could potentially be dedicated to that variable.
// The code encompassed by a Rgn is defined by the flow graph,
// starting at enter, flood-filling forward while varno is refahead
// and backward while varno is refbehind, and following branches.
// A single variable may be represented by multiple disjoint Rgns and
// each Rgn may choose a different register for that variable.
// Registers are allocated to regions greedily in order of descending
// cost.
type Rgn struct {
enter *Flow
cost int16
varno int16
regno int16
}
// The Plan 9 C compilers used a limit of 600 regions,
// but the yacc-generated parser in y.go has 3100 regions.
// We set MaxRgn large enough to handle that.
// There's not a huge cost to having too many regions:
// the main processing traces the live area for each variable,
// which is limited by the number of variables times the area,
// not the raw region count. If there are many regions, they
// are almost certainly small and easy to trace.
// The only operation that scales with region count is the
// sorting by cost, which uses sort.Sort and is therefore
// guaranteed n log n.
const MaxRgn = 6000
var (
region []Rgn
nregion int
)
type rcmp []Rgn
func (x rcmp) Len() int {
return len(x)
}
func (x rcmp) Swap(i, j int) {
x[i], x[j] = x[j], x[i]
}
func (x rcmp) Less(i, j int) bool {
p1 := &x[i]
p2 := &x[j]
if p1.cost != p2.cost {
return int(p2.cost)-int(p1.cost) < 0
}
if p1.varno != p2.varno {
return int(p2.varno)-int(p1.varno) < 0
}
if p1.enter != p2.enter {
return int(p2.enter.Id-p1.enter.Id) < 0
}
return false
}
func setaddrs(bit Bits) {
var i int
var n int
var v *Var
var node *Node
for bany(&bit) {
// convert each bit to a variable
i = bnum(&bit)
node = vars[i].node
n = int(vars[i].name)
biclr(&bit, uint(i))
// disable all pieces of that variable
for i = 0; i < nvar; i++ {
v = &vars[i]
if v.node == node && int(v.name) == n {
v.addr = 2
}
}
}
}
var regnodes [64]*Node
func walkvardef(n *Node, f *Flow, active int) {
var f1 *Flow
var bn int
var v *Var
for f1 = f; f1 != nil; f1 = f1.S1 {
if f1.Active == int32(active) {
break
}
f1.Active = int32(active)
if f1.Prog.As == obj.AVARKILL && f1.Prog.To.Node == n {
break
}
for v, _ = n.Opt().(*Var); v != nil; v = v.nextinnode {
bn = v.id
biset(&(f1.Data.(*Reg)).act, uint(bn))
}
if f1.Prog.As == obj.ACALL {
break
}
}
for f2 := f; f2 != f1; f2 = f2.S1 {
if f2.S2 != nil {
walkvardef(n, f2.S2, active)
}
}
}
// add mov b,rn
// just after r
func addmove(r *Flow, bn int, rn int, f int) {
p1 := Ctxt.NewProg()
Clearp(p1)
p1.Pc = 9999
p := r.Prog
p1.Link = p.Link
p.Link = p1
p1.Lineno = p.Lineno
v := &vars[bn]
a := &p1.To
a.Offset = v.offset
a.Etype = uint8(v.etype)
a.Type = obj.TYPE_MEM
a.Name = v.name
a.Node = v.node
a.Sym = Linksym(v.node.Sym)
/* NOTE(rsc): 9g did
if(a->etype == TARRAY)
a->type = TYPE_ADDR;
else if(a->sym == nil)
a->type = TYPE_CONST;
*/
p1.As = Thearch.Optoas(OAS, Types[uint8(v.etype)])
// TODO(rsc): Remove special case here.
if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) && v.etype == TBOOL {
p1.As = Thearch.Optoas(OAS, Types[TUINT8])
}
p1.From.Type = obj.TYPE_REG
p1.From.Reg = int16(rn)
p1.From.Name = obj.NAME_NONE
if f == 0 {
p1.From = *a
*a = obj.Addr{}
a.Type = obj.TYPE_REG
a.Reg = int16(rn)
}
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("%v ===add=== %v\n", p, p1)
}
Ostats.Nspill++
}
func overlap_reg(o1 int64, w1 int, o2 int64, w2 int) bool {
t1 := o1 + int64(w1)
t2 := o2 + int64(w2)
if t1 <= o2 || t2 <= o1 {
return false
}
return true
}
func mkvar(f *Flow, a *obj.Addr) Bits {
// mark registers used
if a.Type == obj.TYPE_NONE {
return zbits
}
r := f.Data.(*Reg)
r.use1.b[0] |= Thearch.Doregbits(int(a.Index)) // TODO: Use RtoB
var n int
switch a.Type {
default:
regu := Thearch.Doregbits(int(a.Reg)) | Thearch.RtoB(int(a.Reg)) // TODO: Use RtoB
if regu == 0 {
return zbits
}
bit := zbits
bit.b[0] = regu
return bit
// TODO(rsc): Remove special case here.
case obj.TYPE_ADDR:
var bit Bits
if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64) {
goto memcase
}
a.Type = obj.TYPE_MEM
bit = mkvar(f, a)
setaddrs(bit)
a.Type = obj.TYPE_ADDR
Ostats.Naddr++
return zbits
memcase:
fallthrough
case obj.TYPE_MEM:
if r != nil {
r.use1.b[0] |= Thearch.RtoB(int(a.Reg))
}
/* NOTE: 5g did
if(r->f.prog->scond & (C_PBIT|C_WBIT))
r->set.b[0] |= RtoB(a->reg);
*/
switch a.Name {
default:
// Note: This case handles NAME_EXTERN and NAME_STATIC.
// We treat these as requiring eager writes to memory, due to
// the possibility of a fault handler looking at them, so there is
// not much point in registerizing the loads.
// If we later choose the set of candidate variables from a
// larger list, these cases could be deprioritized instead of
// removed entirely.
return zbits
case obj.NAME_PARAM,
obj.NAME_AUTO:
n = int(a.Name)
}
}
node, _ := a.Node.(*Node)
if node == nil || node.Op != ONAME || node.Orig == nil {
return zbits
}
node = node.Orig
if node.Orig != node {
Fatalf("%v: bad node", Ctxt.Dconv(a))
}
if node.Sym == nil || node.Sym.Name[0] == '.' {
return zbits
}
et := EType(a.Etype)
o := a.Offset
w := a.Width
if w < 0 {
Fatalf("bad width %d for %v", w, Ctxt.Dconv(a))
}
flag := 0
var v *Var
for i := 0; i < nvar; i++ {
v = &vars[i]
if v.node == node && int(v.name) == n {
if v.offset == o {
if v.etype == et {
if int64(v.width) == w {
// TODO(rsc): Remove special case for arm here.
if flag == 0 || Thearch.LinkArch.Family != sys.ARM {
return blsh(uint(i))
}
}
}
}
// if they overlap, disable both
if overlap_reg(v.offset, v.width, o, int(w)) {
// print("disable overlap %s %d %d %d %d, %E != %E\n", s->name, v->offset, v->width, o, w, v->etype, et);
v.addr = 1
flag = 1
}
}
}
switch et {
case 0, TFUNC:
return zbits
}
if nvar >= NVAR {
if Debug['w'] > 1 && node != nil {
Fatalf("variable not optimized: %v", Nconv(node, FmtSharp))
}
if Debug['v'] > 0 {
Warn("variable not optimized: %v", Nconv(node, FmtSharp))
}
// If we're not tracking a word in a variable, mark the rest as
// having its address taken, so that we keep the whole thing
// live at all calls. otherwise we might optimize away part of
// a variable but not all of it.
var v *Var
for i := 0; i < nvar; i++ {
v = &vars[i]
if v.node == node {
v.addr = 1
}
}
return zbits
}
i := nvar
nvar++
v = &vars[i]
v.id = i
v.offset = o
v.name = int8(n)
v.etype = et
v.width = int(w)
v.addr = int8(flag) // funny punning
v.node = node
// node->opt is the head of a linked list
// of Vars within the given Node, so that
// we can start at a Var and find all the other
// Vars in the same Go variable.
v.nextinnode, _ = node.Opt().(*Var)
node.SetOpt(v)
bit := blsh(uint(i))
if n == obj.NAME_EXTERN || n == obj.NAME_STATIC {
for z := 0; z < BITS; z++ {
externs.b[z] |= bit.b[z]
}
}
if n == obj.NAME_PARAM {
for z := 0; z < BITS; z++ {
params.b[z] |= bit.b[z]
}
}
if node.Class == PPARAM {
for z := 0; z < BITS; z++ {
ivar.b[z] |= bit.b[z]
}
}
if node.Class == PPARAMOUT {
for z := 0; z < BITS; z++ {
ovar.b[z] |= bit.b[z]
}
}
// Treat values with their address taken as live at calls,
// because the garbage collector's liveness analysis in plive.go does.
// These must be consistent or else we will elide stores and the garbage
// collector will see uninitialized data.
// The typical case where our own analysis is out of sync is when the
// node appears to have its address taken but that code doesn't actually
// get generated and therefore doesn't show up as an address being
// taken when we analyze the instruction stream.
// One instance of this case is when a closure uses the same name as
// an outer variable for one of its own variables declared with :=.
// The parser flags the outer variable as possibly shared, and therefore
// sets addrtaken, even though it ends up not being actually shared.
// If we were better about _ elision, _ = &x would suffice too.
// The broader := in a closure problem is mentioned in a comment in
// closure.go:/^typecheckclosure and dcl.go:/^oldname.
if node.Addrtaken {
v.addr = 1
}
// Disable registerization for globals, because:
// (1) we might panic at any time and we want the recovery code
// to see the latest values (issue 1304).
// (2) we don't know what pointers might point at them and we want
// loads via those pointers to see updated values and vice versa (issue 7995).
//
// Disable registerization for results if using defer, because the deferred func
// might recover and return, causing the current values to be used.
if node.Class == PEXTERN || (hasdefer && node.Class == PPARAMOUT) {
v.addr = 1
}
if Debug['R'] != 0 {
fmt.Printf("bit=%2d et=%v w=%d+%d %v %v flag=%d\n", i, et, o, w, Nconv(node, FmtSharp), Ctxt.Dconv(a), v.addr)
}
Ostats.Nvar++
return bit
}
var change int
func prop(f *Flow, ref Bits, cal Bits) {
var f1 *Flow
var r1 *Reg
var z int
var i int
var v *Var
var v1 *Var
for f1 = f; f1 != nil; f1 = f1.P1 {
r1 = f1.Data.(*Reg)
for z = 0; z < BITS; z++ {
ref.b[z] |= r1.refahead.b[z]
if ref.b[z] != r1.refahead.b[z] {
r1.refahead.b[z] = ref.b[z]
change = 1
}
cal.b[z] |= r1.calahead.b[z]
if cal.b[z] != r1.calahead.b[z] {
r1.calahead.b[z] = cal.b[z]
change = 1
}
}
switch f1.Prog.As {
case obj.ACALL:
if Noreturn(f1.Prog) {
break
}
// Mark all input variables (ivar) as used, because that's what the
// liveness bitmaps say. The liveness bitmaps say that so that a
// panic will not show stale values in the parameter dump.
// Mark variables with a recent VARDEF (r1->act) as used,
// so that the optimizer flushes initializations to memory,
// so that if a garbage collection happens during this CALL,
// the collector will see initialized memory. Again this is to
// match what the liveness bitmaps say.
for z = 0; z < BITS; z++ {
cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1.act.b[z]
ref.b[z] = 0
}
// cal.b is the current approximation of what's live across the call.
// Every bit in cal.b is a single stack word. For each such word,
// find all the other tracked stack words in the same Go variable
// (struct/slice/string/interface) and mark them live too.
// This is necessary because the liveness analysis for the garbage
// collector works at variable granularity, not at word granularity.
// It is fundamental for slice/string/interface: the garbage collector
// needs the whole value, not just some of the words, in order to
// interpret the other bits correctly. Specifically, slice needs a consistent
// ptr and cap, string needs a consistent ptr and len, and interface
// needs a consistent type word and data word.
for z = 0; z < BITS; z++ {
if cal.b[z] == 0 {
continue
}
for i = 0; i < 64; i++ {
if z*64+i >= nvar || (cal.b[z]>>uint(i))&1 == 0 {
continue
}
v = &vars[z*64+i]
if v.node.Opt() == nil { // v represents fixed register, not Go variable
continue
}
// v->node->opt is the head of a linked list of Vars
// corresponding to tracked words from the Go variable v->node.
// Walk the list and set all the bits.
// For a large struct this could end up being quadratic:
// after the first setting, the outer loop (for z, i) would see a 1 bit
// for all of the remaining words in the struct, and for each such
// word would go through and turn on all the bits again.
// To avoid the quadratic behavior, we only turn on the bits if
// v is the head of the list or if the head's bit is not yet turned on.
// This will set the bits at most twice, keeping the overall loop linear.
v1, _ = v.node.Opt().(*Var)
if v == v1 || !btest(&cal, uint(v1.id)) {
for ; v1 != nil; v1 = v1.nextinnode {
biset(&cal, uint(v1.id))
}
}
}
}
case obj.ATEXT:
for z = 0; z < BITS; z++ {
cal.b[z] = 0
ref.b[z] = 0
}
case obj.ARET:
for z = 0; z < BITS; z++ {
cal.b[z] = externs.b[z] | ovar.b[z]
ref.b[z] = 0
}
}
for z = 0; z < BITS; z++ {
ref.b[z] = ref.b[z]&^r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z]
cal.b[z] &^= (r1.set.b[z] | r1.use1.b[z] | r1.use2.b[z])
r1.refbehind.b[z] = ref.b[z]
r1.calbehind.b[z] = cal.b[z]
}
if f1.Active != 0 {
break
}
f1.Active = 1
}
var r *Reg
var f2 *Flow
for ; f != f1; f = f.P1 {
r = f.Data.(*Reg)
for f2 = f.P2; f2 != nil; f2 = f2.P2link {
prop(f2, r.refbehind, r.calbehind)
}
}
}
func synch(f *Flow, dif Bits) {
var r1 *Reg
var z int
for f1 := f; f1 != nil; f1 = f1.S1 {
r1 = f1.Data.(*Reg)
for z = 0; z < BITS; z++ {
dif.b[z] = dif.b[z]&^(^r1.refbehind.b[z]&r1.refahead.b[z]) | r1.set.b[z] | r1.regdiff.b[z]
if dif.b[z] != r1.regdiff.b[z] {
r1.regdiff.b[z] = dif.b[z]
change = 1
}
}
if f1.Active != 0 {
break
}
f1.Active = 1
for z = 0; z < BITS; z++ {
dif.b[z] &^= (^r1.calbehind.b[z] & r1.calahead.b[z])
}
if f1.S2 != nil {
synch(f1.S2, dif)
}
}
}
func allreg(b uint64, r *Rgn) uint64 {
v := &vars[r.varno]
r.regno = 0
switch v.etype {
default:
Fatalf("unknown etype %d/%v", Bitno(b), v.etype)
case TINT8,
TUINT8,
TINT16,
TUINT16,
TINT32,
TUINT32,
TINT64,
TUINT64,
TINT,
TUINT,
TUINTPTR,
TBOOL,
TPTR32,
TPTR64:
i := Thearch.BtoR(^b)
if i != 0 && r.cost > 0 {
r.regno = int16(i)
return Thearch.RtoB(i)
}
case TFLOAT32, TFLOAT64:
i := Thearch.BtoF(^b)
if i != 0 && r.cost > 0 {
r.regno = int16(i)
return Thearch.FtoB(i)
}
}
return 0
}
func LOAD(r *Reg, z int) uint64 {
return ^r.refbehind.b[z] & r.refahead.b[z]
}
func STORE(r *Reg, z int) uint64 {
return ^r.calbehind.b[z] & r.calahead.b[z]
}
// Cost parameters
const (
CLOAD = 5 // cost of load
CREF = 5 // cost of reference if not registerized
LOOP = 3 // loop execution count (applied in popt.go)
)
func paint1(f *Flow, bn int) {
z := bn / 64
bb := uint64(1 << uint(bn%64))
r := f.Data.(*Reg)
if r.act.b[z]&bb != 0 {
return
}
var f1 *Flow
var r1 *Reg
for {
if r.refbehind.b[z]&bb == 0 {
break
}
f1 = f.P1
if f1 == nil {
break
}
r1 = f1.Data.(*Reg)
if r1.refahead.b[z]&bb == 0 {
break
}
if r1.act.b[z]&bb != 0 {
break
}
f = f1
r = r1
}
if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
change -= CLOAD * int(f.Loop)
}
for {
r.act.b[z] |= bb
if f.Prog.As != obj.ANOP { // don't give credit for NOPs
if r.use1.b[z]&bb != 0 {
change += CREF * int(f.Loop)
}
if (r.use2.b[z]|r.set.b[z])&bb != 0 {
change += CREF * int(f.Loop)
}
}
if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
change -= CLOAD * int(f.Loop)
}
if r.refbehind.b[z]&bb != 0 {
for f1 = f.P2; f1 != nil; f1 = f1.P2link {
if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
paint1(f1, bn)
}
}
}
if r.refahead.b[z]&bb == 0 {
break
}
f1 = f.S2
if f1 != nil {
if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
paint1(f1, bn)
}
}
f = f.S1
if f == nil {
break
}
r = f.Data.(*Reg)
if r.act.b[z]&bb != 0 {
break
}
if r.refbehind.b[z]&bb == 0 {
break
}
}
}
func paint2(f *Flow, bn int, depth int) uint64 {
z := bn / 64
bb := uint64(1 << uint(bn%64))
vreg := regbits
r := f.Data.(*Reg)
if r.act.b[z]&bb == 0 {
return vreg
}
var r1 *Reg
var f1 *Flow
for {
if r.refbehind.b[z]&bb == 0 {
break
}
f1 = f.P1
if f1 == nil {
break
}
r1 = f1.Data.(*Reg)
if r1.refahead.b[z]&bb == 0 {
break
}
if r1.act.b[z]&bb == 0 {
break
}
f = f1
r = r1
}
for {
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf(" paint2 %d %v\n", depth, f.Prog)
}
r.act.b[z] &^= bb
vreg |= r.regu
if r.refbehind.b[z]&bb != 0 {
for f1 = f.P2; f1 != nil; f1 = f1.P2link {
if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
vreg |= paint2(f1, bn, depth+1)
}
}
}
if r.refahead.b[z]&bb == 0 {
break
}
f1 = f.S2
if f1 != nil {
if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
vreg |= paint2(f1, bn, depth+1)
}
}
f = f.S1
if f == nil {
break
}
r = f.Data.(*Reg)
if r.act.b[z]&bb == 0 {
break
}
if r.refbehind.b[z]&bb == 0 {
break
}
}
return vreg
}
func paint3(f *Flow, bn int, rb uint64, rn int) {
z := bn / 64
bb := uint64(1 << uint(bn%64))
r := f.Data.(*Reg)
if r.act.b[z]&bb != 0 {
return
}
var r1 *Reg
var f1 *Flow
for {
if r.refbehind.b[z]&bb == 0 {
break
}
f1 = f.P1
if f1 == nil {
break
}
r1 = f1.Data.(*Reg)
if r1.refahead.b[z]&bb == 0 {
break
}
if r1.act.b[z]&bb != 0 {
break
}
f = f1
r = r1
}
if LOAD(r, z)&^(r.set.b[z]&^(r.use1.b[z]|r.use2.b[z]))&bb != 0 {
addmove(f, bn, rn, 0)
}
var p *obj.Prog
for {
r.act.b[z] |= bb
p = f.Prog
if r.use1.b[z]&bb != 0 {
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("%v", p)
}
addreg(&p.From, rn)
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf(" ===change== %v\n", p)
}
}
if (r.use2.b[z]|r.set.b[z])&bb != 0 {
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("%v", p)
}
addreg(&p.To, rn)
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf(" ===change== %v\n", p)
}
}
if STORE(r, z)&r.regdiff.b[z]&bb != 0 {
addmove(f, bn, rn, 1)
}
r.regu |= rb
if r.refbehind.b[z]&bb != 0 {
for f1 = f.P2; f1 != nil; f1 = f1.P2link {
if (f1.Data.(*Reg)).refahead.b[z]&bb != 0 {
paint3(f1, bn, rb, rn)
}
}
}
if r.refahead.b[z]&bb == 0 {
break
}
f1 = f.S2
if f1 != nil {
if (f1.Data.(*Reg)).refbehind.b[z]&bb != 0 {
paint3(f1, bn, rb, rn)
}
}
f = f.S1
if f == nil {
break
}
r = f.Data.(*Reg)
if r.act.b[z]&bb != 0 {
break
}
if r.refbehind.b[z]&bb == 0 {
break
}
}
}
func addreg(a *obj.Addr, rn int) {
a.Sym = nil
a.Node = nil
a.Offset = 0
a.Type = obj.TYPE_REG
a.Reg = int16(rn)
a.Name = 0
Ostats.Ncvtreg++
}
func dumpone(f *Flow, isreg int) {
fmt.Printf("%d:%v", f.Loop, f.Prog)
if isreg != 0 {
r := f.Data.(*Reg)
var bit Bits
for z := 0; z < BITS; z++ {
bit.b[z] = r.set.b[z] | r.use1.b[z] | r.use2.b[z] | r.refbehind.b[z] | r.refahead.b[z] | r.calbehind.b[z] | r.calahead.b[z] | r.regdiff.b[z] | r.act.b[z] | 0
}
if bany(&bit) {
fmt.Printf("\t")
if bany(&r.set) {
fmt.Printf(" s:%v", &r.set)
}
if bany(&r.use1) {
fmt.Printf(" u1:%v", &r.use1)
}
if bany(&r.use2) {
fmt.Printf(" u2:%v", &r.use2)
}
if bany(&r.refbehind) {
fmt.Printf(" rb:%v ", &r.refbehind)
}
if bany(&r.refahead) {
fmt.Printf(" ra:%v ", &r.refahead)
}
if bany(&r.calbehind) {
fmt.Printf(" cb:%v ", &r.calbehind)
}
if bany(&r.calahead) {
fmt.Printf(" ca:%v ", &r.calahead)
}
if bany(&r.regdiff) {
fmt.Printf(" d:%v ", &r.regdiff)
}
if bany(&r.act) {
fmt.Printf(" a:%v ", &r.act)
}
}
}
fmt.Printf("\n")
}
func Dumpit(str string, r0 *Flow, isreg int) {
var r1 *Flow
fmt.Printf("\n%s\n", str)
for r := r0; r != nil; r = r.Link {
dumpone(r, isreg)
r1 = r.P2
if r1 != nil {
fmt.Printf("\tpred:")
for ; r1 != nil; r1 = r1.P2link {
fmt.Printf(" %.4d", uint(int(r1.Prog.Pc)))
}
if r.P1 != nil {
fmt.Printf(" (and %.4d)", uint(int(r.P1.Prog.Pc)))
} else {
fmt.Printf(" (only)")
}
fmt.Printf("\n")
}
// Print successors if it's not just the next one
if r.S1 != r.Link || r.S2 != nil {
fmt.Printf("\tsucc:")
if r.S1 != nil {
fmt.Printf(" %.4d", uint(int(r.S1.Prog.Pc)))
}
if r.S2 != nil {
fmt.Printf(" %.4d", uint(int(r.S2.Prog.Pc)))
}
fmt.Printf("\n")
}
}
}
func regopt(firstp *obj.Prog) {
mergetemp(firstp)
// control flow is more complicated in generated go code
// than in generated c code. define pseudo-variables for
// registers, so we have complete register usage information.
var nreg int
regnames := Thearch.Regnames(&nreg)
nvar = nreg
for i := 0; i < nreg; i++ {
vars[i] = Var{}
}
for i := 0; i < nreg; i++ {
if regnodes[i] == nil {
regnodes[i] = newname(Lookup(regnames[i]))
}
vars[i].node = regnodes[i]
}
regbits = Thearch.Excludedregs()
externs = zbits
params = zbits
consts = zbits
addrs = zbits
ivar = zbits
ovar = zbits
// pass 1
// build aux data structure
// allocate pcs
// find use and set of variables
g := Flowstart(firstp, func() interface{} { return new(Reg) })
if g == nil {
for i := 0; i < nvar; i++ {
vars[i].node.SetOpt(nil)
}
return
}
firstf := g.Start
for f := firstf; f != nil; f = f.Link {
p := f.Prog
// AVARLIVE must be considered a use, do not skip it.
// Otherwise the variable will be optimized away,
// and the whole point of AVARLIVE is to keep it on the stack.
if p.As == obj.AVARDEF || p.As == obj.AVARKILL {
continue
}
// Avoid making variables for direct-called functions.
if p.As == obj.ACALL && p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_EXTERN {
continue
}
// from vs to doesn't matter for registers.
r := f.Data.(*Reg)
r.use1.b[0] |= p.Info.Reguse | p.Info.Regindex
r.set.b[0] |= p.Info.Regset
bit := mkvar(f, &p.From)
if bany(&bit) {
if p.Info.Flags&LeftAddr != 0 {
setaddrs(bit)
}
if p.Info.Flags&LeftRead != 0 {
for z := 0; z < BITS; z++ {
r.use1.b[z] |= bit.b[z]
}
}
if p.Info.Flags&LeftWrite != 0 {
for z := 0; z < BITS; z++ {
r.set.b[z] |= bit.b[z]
}
}
}
// Compute used register for reg
if p.Info.Flags&RegRead != 0 {
r.use1.b[0] |= Thearch.RtoB(int(p.Reg))
}
// Currently we never generate three register forms.
// If we do, this will need to change.
if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST {
Fatalf("regopt not implemented for from3")
}
bit = mkvar(f, &p.To)
if bany(&bit) {
if p.Info.Flags&RightAddr != 0 {
setaddrs(bit)
}
if p.Info.Flags&RightRead != 0 {
for z := 0; z < BITS; z++ {
r.use2.b[z] |= bit.b[z]
}
}
if p.Info.Flags&RightWrite != 0 {
for z := 0; z < BITS; z++ {
r.set.b[z] |= bit.b[z]
}
}
}
}
for i := 0; i < nvar; i++ {
v := &vars[i]
if v.addr != 0 {
bit := blsh(uint(i))
for z := 0; z < BITS; z++ {
addrs.b[z] |= bit.b[z]
}
}
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("bit=%2d addr=%d et=%v w=%-2d s=%v + %d\n", i, v.addr, v.etype, v.width, v.node, v.offset)
}
}
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass1", firstf, 1)
}
// pass 2
// find looping structure
flowrpo(g)
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass2", firstf, 1)
}
// pass 2.5
// iterate propagating fat vardef covering forward
// r->act records vars with a VARDEF since the last CALL.
// (r->act will be reused in pass 5 for something else,
// but we'll be done with it by then.)
active := 0
for f := firstf; f != nil; f = f.Link {
f.Active = 0
r := f.Data.(*Reg)
r.act = zbits
}
for f := firstf; f != nil; f = f.Link {
p := f.Prog
if p.As == obj.AVARDEF && Isfat(((p.To.Node).(*Node)).Type) && ((p.To.Node).(*Node)).Opt() != nil {
active++
walkvardef(p.To.Node.(*Node), f, active)
}
}
// pass 3
// iterate propagating usage
// back until flow graph is complete
var f1 *Flow
var i int
var f *Flow
loop1:
change = 0
for f = firstf; f != nil; f = f.Link {
f.Active = 0
}
for f = firstf; f != nil; f = f.Link {
if f.Prog.As == obj.ARET {
prop(f, zbits, zbits)
}
}
// pick up unreachable code
loop11:
i = 0
for f = firstf; f != nil; f = f1 {
f1 = f.Link
if f1 != nil && f1.Active != 0 && f.Active == 0 {
prop(f, zbits, zbits)
i = 1
}
}
if i != 0 {
goto loop11
}
if change != 0 {
goto loop1
}
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass3", firstf, 1)
}
// pass 4
// iterate propagating register/variable synchrony
// forward until graph is complete
loop2:
change = 0
for f = firstf; f != nil; f = f.Link {
f.Active = 0
}
synch(firstf, zbits)
if change != 0 {
goto loop2
}
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass4", firstf, 1)
}
// pass 4.5
// move register pseudo-variables into regu.
mask := uint64((1 << uint(nreg)) - 1)
for f := firstf; f != nil; f = f.Link {
r := f.Data.(*Reg)
r.regu = (r.refbehind.b[0] | r.set.b[0]) & mask
r.set.b[0] &^= mask
r.use1.b[0] &^= mask
r.use2.b[0] &^= mask
r.refbehind.b[0] &^= mask
r.refahead.b[0] &^= mask
r.calbehind.b[0] &^= mask
r.calahead.b[0] &^= mask
r.regdiff.b[0] &^= mask
r.act.b[0] &^= mask
}
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass4.5", firstf, 1)
}
// pass 5
// isolate regions
// calculate costs (paint1)
var bit Bits
if f := firstf; f != nil {
r := f.Data.(*Reg)
for z := 0; z < BITS; z++ {
bit.b[z] = (r.refahead.b[z] | r.calahead.b[z]) &^ (externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z])
}
if bany(&bit) && !f.Refset {
// should never happen - all variables are preset
if Debug['w'] != 0 {
fmt.Printf("%v: used and not set: %v\n", f.Prog.Line(), &bit)
}
f.Refset = true
}
}
for f := firstf; f != nil; f = f.Link {
(f.Data.(*Reg)).act = zbits
}
nregion = 0
region = region[:0]
for f := firstf; f != nil; f = f.Link {
r := f.Data.(*Reg)
for z := 0; z < BITS; z++ {
bit.b[z] = r.set.b[z] &^ (r.refahead.b[z] | r.calahead.b[z] | addrs.b[z])
}
if bany(&bit) && !f.Refset {
if Debug['w'] != 0 {
fmt.Printf("%v: set and not used: %v\n", f.Prog.Line(), &bit)
}
f.Refset = true
Thearch.Excise(f)
}
for z := 0; z < BITS; z++ {
bit.b[z] = LOAD(r, z) &^ (r.act.b[z] | addrs.b[z])
}
for bany(&bit) {
i = bnum(&bit)
change = 0
paint1(f, i)
biclr(&bit, uint(i))
if change <= 0 {
continue
}
if nregion >= MaxRgn {
nregion++
continue
}
region = append(region, Rgn{
enter: f,
cost: int16(change),
varno: int16(i),
})
nregion++
}
}
if false && Debug['v'] != 0 && strings.Contains(Curfn.Func.Nname.Sym.Name, "Parse") {
Warn("regions: %d\n", nregion)
}
if nregion >= MaxRgn {
if Debug['v'] != 0 {
Warn("too many regions: %d\n", nregion)
}
nregion = MaxRgn
}
sort.Sort(rcmp(region[:nregion]))
if Debug['R'] != 0 && Debug['v'] != 0 {
Dumpit("pass5", firstf, 1)
}
// pass 6
// determine used registers (paint2)
// replace code (paint3)
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("\nregisterizing\n")
}
for i := 0; i < nregion; i++ {
rgp := &region[i]
if Debug['R'] != 0 && Debug['v'] != 0 {
fmt.Printf("region %d: cost %d varno %d enter %d\n", i, rgp.cost, rgp.varno, rgp.enter.Prog.Pc)
}
bit = blsh(uint(rgp.varno))
usedreg := paint2(rgp.enter, int(rgp.varno), 0)
vreg := allreg(usedreg, rgp)
if rgp.regno != 0 {
if Debug['R'] != 0 && Debug['v'] != 0 {
v := &vars[rgp.varno]
fmt.Printf("registerize %v+%d (bit=%2d et=%v) in %v usedreg=%#x vreg=%#x\n", v.node, v.offset, rgp.varno, v.etype, obj.Rconv(int(rgp.regno)), usedreg, vreg)
}
paint3(rgp.enter, int(rgp.varno), vreg, int(rgp.regno))
}
}
// free aux structures. peep allocates new ones.
for i := 0; i < nvar; i++ {
vars[i].node.SetOpt(nil)
}
Flowend(g)
firstf = nil
if Debug['R'] != 0 && Debug['v'] != 0 {
// Rebuild flow graph, since we inserted instructions
g := Flowstart(firstp, nil)
firstf = g.Start
Dumpit("pass6", firstf, 0)
Flowend(g)
firstf = nil
}
// pass 7
// peep-hole on basic block
if Debug['R'] == 0 || Debug['P'] != 0 {
Thearch.Peep(firstp)
}
// eliminate nops
for p := firstp; p != nil; p = p.Link {
for p.Link != nil && p.Link.As == obj.ANOP {
p.Link = p.Link.Link
}
if p.To.Type == obj.TYPE_BRANCH {
for p.To.Val.(*obj.Prog) != nil && p.To.Val.(*obj.Prog).As == obj.ANOP {
p.To.Val = p.To.Val.(*obj.Prog).Link
}
}
}
if Debug['R'] != 0 {
if Ostats.Ncvtreg != 0 || Ostats.Nspill != 0 || Ostats.Nreload != 0 || Ostats.Ndelmov != 0 || Ostats.Nvar != 0 || Ostats.Naddr != 0 || false {
fmt.Printf("\nstats\n")
}
if Ostats.Ncvtreg != 0 {
fmt.Printf("\t%4d cvtreg\n", Ostats.Ncvtreg)
}
if Ostats.Nspill != 0 {
fmt.Printf("\t%4d spill\n", Ostats.Nspill)
}
if Ostats.Nreload != 0 {
fmt.Printf("\t%4d reload\n", Ostats.Nreload)
}
if Ostats.Ndelmov != 0 {
fmt.Printf("\t%4d delmov\n", Ostats.Ndelmov)
}
if Ostats.Nvar != 0 {
fmt.Printf("\t%4d var\n", Ostats.Nvar)
}
if Ostats.Naddr != 0 {
fmt.Printf("\t%4d addr\n", Ostats.Naddr)
}
Ostats = OptStats{}
}
}
// bany reports whether any bits in a are set.
func bany(a *Bits) bool {
for _, x := range &a.b { // & to avoid making a copy of a.b
if x != 0 {
return true
}
}
return false
}
// bnum reports the lowest index of a 1 bit in a.
func bnum(a *Bits) int {
for i, x := range &a.b { // & to avoid making a copy of a.b
if x != 0 {
return 64*i + Bitno(x)
}
}
Fatalf("bad in bnum")
return 0
}
// blsh returns a Bits with 1 at index n, 0 elsewhere (1<<n).
func blsh(n uint) Bits {
c := zbits
c.b[n/64] = 1 << (n % 64)
return c
}
// btest reports whether bit n is 1.
func btest(a *Bits, n uint) bool {
return a.b[n/64]&(1<<(n%64)) != 0
}
// biset sets bit n to 1.
func biset(a *Bits, n uint) {
a.b[n/64] |= 1 << (n % 64)
}
// biclr sets bit n to 0.
func biclr(a *Bits, n uint) {
a.b[n/64] &^= (1 << (n % 64))
}
// Bitno reports the lowest index of a 1 bit in b.
// It calls Fatalf if there is no 1 bit.
func Bitno(b uint64) int {
if b == 0 {
Fatalf("bad in bitno")
}
n := 0
if b&(1<<32-1) == 0 {
n += 32
b >>= 32
}
if b&(1<<16-1) == 0 {
n += 16
b >>= 16
}
if b&(1<<8-1) == 0 {
n += 8
b >>= 8
}
if b&(1<<4-1) == 0 {
n += 4
b >>= 4
}
if b&(1<<2-1) == 0 {
n += 2
b >>= 2
}
if b&1 == 0 {
n++
}
return n
}
// String returns a space-separated list of the variables represented by bits.
func (bits Bits) String() string {
// Note: This method takes a value receiver, both for convenience
// and to make it safe to modify the bits as we process them.
// Even so, most prints above use &bits, because then the value
// being stored in the interface{} is a pointer and does not require
// an allocation and copy to create the interface{}.
var buf bytes.Buffer
sep := ""
for bany(&bits) {
i := bnum(&bits)
buf.WriteString(sep)
sep = " "
v := &vars[i]
if v.node == nil || v.node.Sym == nil {
fmt.Fprintf(&buf, "$%d", i)
} else {
fmt.Fprintf(&buf, "%s(%d)", v.node.Sym.Name, i)
if v.offset != 0 {
fmt.Fprintf(&buf, "%+d", v.offset)
}
}
biclr(&bits, uint(i))
}
return buf.String()
}