cmd/compile: start on ARM port

Start working on arm port.  Gets close to correct
code for fibonacci:
    func fib(n int) int {
        if n < 2 {
            return n
        }
        return fib(n-1) + fib(n-2)
    }

Still a lot to do, but this is a good starting point.

Cleaned up some arch-specific dependencies in regalloc.

Change-Id: I4301c6c31a8402168e50dcfee8bcf7aee73ea9d5
Reviewed-on: https://go-review.googlesource.com/21000
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/arm/galign.go b/src/cmd/compile/internal/arm/galign.go
index d89b852..e05f4d0 100644
--- a/src/cmd/compile/internal/arm/galign.go
+++ b/src/cmd/compile/internal/arm/galign.go
@@ -6,6 +6,7 @@
 
 import (
 	"cmd/compile/internal/gc"
+	"cmd/compile/internal/ssa"
 	"cmd/internal/obj/arm"
 )
 
@@ -65,6 +66,11 @@
 	gc.Thearch.Doregbits = doregbits
 	gc.Thearch.Regnames = regnames
 
+	gc.Thearch.SSARegToReg = ssaRegToReg
+	gc.Thearch.SSAMarkMoves = func(s *gc.SSAGenState, b *ssa.Block) {}
+	gc.Thearch.SSAGenValue = ssaGenValue
+	gc.Thearch.SSAGenBlock = ssaGenBlock
+
 	gc.Main()
 	gc.Exit(0)
 }
diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go
new file mode 100644
index 0000000..a9baf43
--- /dev/null
+++ b/src/cmd/compile/internal/arm/ssa.go
@@ -0,0 +1,152 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package arm
+
+import (
+	"cmd/compile/internal/gc"
+	"cmd/compile/internal/ssa"
+	"cmd/internal/obj"
+	"cmd/internal/obj/arm"
+)
+
+var ssaRegToReg = []int16{
+	arm.REG_R0,
+	arm.REG_R1,
+	arm.REG_R2,
+	arm.REG_R3,
+	arm.REGSP, // aka R13
+}
+
+func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
+	s.SetLineno(v.Line)
+	switch v.Op {
+	case ssa.OpInitMem:
+		// memory arg needs no code
+	case ssa.OpArg:
+		// input args need no code
+	case ssa.OpSP, ssa.OpSB:
+		// nothing to do
+	case ssa.OpCopy:
+	case ssa.OpLoadReg:
+		// TODO: by type
+		p := gc.Prog(arm.AMOVW)
+		n, off := gc.AutoVar(v.Args[0])
+		p.From.Type = obj.TYPE_MEM
+		p.From.Node = n
+		p.From.Sym = gc.Linksym(n.Sym)
+		p.From.Offset = off
+		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
+			p.From.Name = obj.NAME_PARAM
+			p.From.Offset += n.Xoffset
+		} else {
+			p.From.Name = obj.NAME_AUTO
+		}
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
+
+	case ssa.OpStoreReg:
+		// TODO: by type
+		p := gc.Prog(arm.AMOVW)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = gc.SSARegNum(v.Args[0])
+		n, off := gc.AutoVar(v)
+		p.To.Type = obj.TYPE_MEM
+		p.To.Node = n
+		p.To.Sym = gc.Linksym(n.Sym)
+		p.To.Offset = off
+		if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT {
+			p.To.Name = obj.NAME_PARAM
+			p.To.Offset += n.Xoffset
+		} else {
+			p.To.Name = obj.NAME_AUTO
+		}
+	case ssa.OpARMADD:
+		r := gc.SSARegNum(v)
+		r1 := gc.SSARegNum(v.Args[0])
+		r2 := gc.SSARegNum(v.Args[1])
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = r1
+		p.Reg = r2
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = r
+	case ssa.OpARMADDconst:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.AuxInt
+		if v.Aux != nil {
+			panic("can't handle symbolic constant yet")
+		}
+		p.Reg = gc.SSARegNum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMMOVWconst:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.AuxInt2Int64()
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMCMP:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = gc.SSARegNum(v.Args[0])
+		p.Reg = gc.SSARegNum(v.Args[1])
+	case ssa.OpARMMOVWload:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = gc.SSARegNum(v.Args[0])
+		gc.AddAux(&p.From, v)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARMMOVWstore:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = gc.SSARegNum(v.Args[1])
+		p.To.Type = obj.TYPE_MEM
+		p.To.Reg = gc.SSARegNum(v.Args[0])
+		gc.AddAux(&p.To, v)
+	case ssa.OpARMCALLstatic:
+		// TODO: deferreturn
+		p := gc.Prog(obj.ACALL)
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_EXTERN
+		p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym))
+		if gc.Maxarg < v.AuxInt {
+			gc.Maxarg = v.AuxInt
+		}
+	case ssa.OpVarDef:
+		gc.Gvardef(v.Aux.(*gc.Node))
+	case ssa.OpVarKill:
+		gc.Gvarkill(v.Aux.(*gc.Node))
+	case ssa.OpVarLive:
+		gc.Gvarlive(v.Aux.(*gc.Node))
+	case ssa.OpARMLessThan:
+		v.Fatalf("pseudo-op made it to output: %s", v.LongString())
+	default:
+		v.Unimplementedf("genValue not implemented: %s", v.LongString())
+	}
+}
+
+func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
+	s.SetLineno(b.Line)
+
+	switch b.Kind {
+	case ssa.BlockCall:
+		if b.Succs[0] != next {
+			p := gc.Prog(obj.AJMP)
+			p.To.Type = obj.TYPE_BRANCH
+			s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
+		}
+	case ssa.BlockRet:
+		gc.Prog(obj.ARET)
+	case ssa.BlockARMLT:
+		p := gc.Prog(arm.ABGE)
+		p.To.Type = obj.TYPE_BRANCH
+		s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]})
+		p = gc.Prog(obj.AJMP)
+		p.To.Type = obj.TYPE_BRANCH
+		s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]})
+	}
+}
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 7467acb..93b820b 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -30,8 +30,14 @@
 }
 
 func shouldssa(fn *Node) bool {
-	if Thearch.Thestring != "amd64" {
-		return false
+	switch Thearch.Thestring {
+	default:
+		// Only available for testing.
+		if os.Getenv("SSATEST") == "" {
+			return false
+		}
+		// Generally available.
+	case "amd64":
 	}
 	if !ssaEnabled {
 		return false
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index e7f4aec..d0de429 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -18,6 +18,7 @@
 	PtrSize      int64                      // 4 or 8
 	lowerBlock   func(*Block) bool          // lowering function
 	lowerValue   func(*Value, *Config) bool // lowering function
+	registers    []Register                 // machine registers
 	fe           Frontend                   // callbacks into compiler frontend
 	HTML         *HTMLWriter                // html writer, for debugging
 	ctxt         *obj.Link                  // Generic arch information
@@ -112,11 +113,18 @@
 		c.PtrSize = 8
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64
+		c.registers = registersAMD64[:]
 	case "386":
 		c.IntSize = 4
 		c.PtrSize = 4
 		c.lowerBlock = rewriteBlockAMD64
 		c.lowerValue = rewriteValueAMD64 // TODO(khr): full 32-bit support
+	case "arm":
+		c.IntSize = 4
+		c.PtrSize = 4
+		c.lowerBlock = rewriteBlockARM
+		c.lowerValue = rewriteValueARM
+		c.registers = registersARM[:]
 	default:
 		fe.Unimplementedf(0, "arch %s not implemented", arch)
 	}
diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
new file mode 100644
index 0000000..273500f
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -0,0 +1,32 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+(Add32 x y) -> (ADD x y)
+
+(Const32 [val]) -> (MOVWconst [val])
+
+(Less32 x y) -> (LessThan (CMP x y))
+
+(OffPtr [off] ptr) -> (ADD (MOVWconst <config.Frontend().TypeInt32()> [off]) ptr)
+
+(Addr {sym} base) -> (ADDconst {sym} base)
+
+(Load <t> ptr mem) && is32BitInt(t) -> (MOVWload ptr mem)
+(Store [4] ptr val mem) -> (MOVWstore ptr val mem)
+
+(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
+
+// Absorb LessThan into blocks.
+(If (LessThan cc) yes no) -> (LT cc yes no)
+
+
+
+// Optimizations
+
+(ADD (MOVWconst [c]) x) -> (ADDconst [c] x)
+(ADD x (MOVWconst [c])) -> (ADDconst [c] x)
+(MOVWload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+  (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVWstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+  (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go
new file mode 100644
index 0000000..2b2a750
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@@ -0,0 +1,66 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+func init() {
+	var (
+		gp01       = regInfo{inputs: []regMask{}, outputs: []regMask{31}}
+		gp11       = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gp21       = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{31}}
+		gp2flags   = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{32}}
+		gpload     = regInfo{inputs: []regMask{31}, outputs: []regMask{31}}
+		gpstore    = regInfo{inputs: []regMask{31, 31}, outputs: []regMask{}}
+		flagsgp    = regInfo{inputs: []regMask{32}, outputs: []regMask{31}}
+		callerSave = regMask(15)
+	)
+	ops := []opData{
+		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},  // arg0 + arg1
+		{name: "ADDconst", argLength: 1, reg: gp11, asm: "ADD", aux: "SymOff"}, // arg0 + auxInt + aux.(*gc.Sym)
+
+		{name: "MOVWconst", argLength: 0, reg: gp01, aux: "Int32", asm: "MOVW", rematerializeable: true}, // 32 low bits of auxint
+
+		{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
+
+		{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVW"},   // load from arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
+
+		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff"}, // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
+
+		// pseudo-ops
+		{name: "LessThan", argLength: 2, reg: flagsgp}, // bool, 1 flags encode x<y 0 otherwise.
+	}
+
+	blocks := []blockData{
+		{name: "EQ"},
+		{name: "NE"},
+		{name: "LT"},
+		{name: "LE"},
+		{name: "GT"},
+		{name: "GE"},
+		{name: "ULT"},
+		{name: "ULE"},
+		{name: "UGT"},
+		{name: "UGE"},
+	}
+
+	regNames := []string{
+		"R0",
+		"R1",
+		"R2",
+		"R3",
+		"SP",
+		"FLAGS",
+		"SB",
+	}
+
+	archs = append(archs, arch{
+		name:     "ARM",
+		pkg:      "cmd/internal/obj/arm",
+		genfile:  "../../arm/ssa.go",
+		ops:      ops,
+		blocks:   blocks,
+		regnames: regNames,
+	})
+}
diff --git a/src/cmd/compile/internal/ssa/gen/decOps.go b/src/cmd/compile/internal/ssa/gen/decOps.go
index a9045d6..0cc11cb 100644
--- a/src/cmd/compile/internal/ssa/gen/decOps.go
+++ b/src/cmd/compile/internal/ssa/gen/decOps.go
@@ -10,8 +10,9 @@
 
 func init() {
 	archs = append(archs, arch{
-		name:   "dec",
-		ops:    decOps,
-		blocks: decBlocks,
+		name:    "dec",
+		ops:     decOps,
+		blocks:  decBlocks,
+		generic: true,
 	})
 }
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 94180c7..ab5e335 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -414,8 +414,9 @@
 
 func init() {
 	archs = append(archs, arch{
-		name:   "generic",
-		ops:    genericOps,
-		blocks: genericBlocks,
+		name:    "generic",
+		ops:     genericOps,
+		blocks:  genericBlocks,
+		generic: true,
 	})
 }
diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go
index c707ece..5e681b6 100644
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -26,6 +26,7 @@
 	ops      []opData
 	blocks   []blockData
 	regnames []string
+	generic  bool
 }
 
 type opData struct {
@@ -205,6 +206,18 @@
 	// generate op string method
 	fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }")
 
+	// generate registers
+	for _, a := range archs {
+		if a.generic {
+			continue
+		}
+		fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name)
+		for i, r := range a.regnames {
+			fmt.Fprintf(w, "  {%d, \"%s\"},\n", i, r)
+		}
+		fmt.Fprintln(w, "}")
+	}
+
 	// gofmt result
 	b := w.Bytes()
 	var err error
diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go
index ecffb5a..d10ea23 100644
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -55,6 +55,8 @@
 	auxSym                  // aux is a symbol
 	auxSymOff               // aux is a symbol, auxInt is an offset
 	auxSymValAndOff         // aux is a symbol, auxInt is a ValAndOff
+
+	auxSymInt32 // aux is a symbol, auxInt is a 32-bit integer
 )
 
 // A ValAndOff is used by the several opcodes. It holds
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 9817467..3ff2b5a 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -5,6 +5,7 @@
 
 import (
 	"cmd/internal/obj"
+	"cmd/internal/obj/arm"
 	"cmd/internal/obj/x86"
 )
 
@@ -26,6 +27,17 @@
 	BlockAMD64ORD
 	BlockAMD64NAN
 
+	BlockARMEQ
+	BlockARMNE
+	BlockARMLT
+	BlockARMLE
+	BlockARMGT
+	BlockARMGE
+	BlockARMULT
+	BlockARMULE
+	BlockARMUGT
+	BlockARMUGE
+
 	BlockPlain
 	BlockIf
 	BlockCall
@@ -56,6 +68,17 @@
 	BlockAMD64ORD: "ORD",
 	BlockAMD64NAN: "NAN",
 
+	BlockARMEQ:  "EQ",
+	BlockARMNE:  "NE",
+	BlockARMLT:  "LT",
+	BlockARMLE:  "LE",
+	BlockARMGT:  "GT",
+	BlockARMGE:  "GE",
+	BlockARMULT: "ULT",
+	BlockARMULE: "ULE",
+	BlockARMUGT: "UGT",
+	BlockARMUGE: "UGE",
+
 	BlockPlain:  "Plain",
 	BlockIf:     "If",
 	BlockCall:   "Call",
@@ -309,6 +332,15 @@
 	OpAMD64FlagGT_UGT
 	OpAMD64FlagGT_ULT
 
+	OpARMADD
+	OpARMADDconst
+	OpARMMOVWconst
+	OpARMCMP
+	OpARMMOVWload
+	OpARMMOVWstore
+	OpARMCALLstatic
+	OpARMLessThan
+
 	OpAdd8
 	OpAdd16
 	OpAdd32
@@ -3916,6 +3948,106 @@
 	},
 
 	{
+		name:        "ADD",
+		argLen:      2,
+		commutative: true,
+		asm:         arm.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 31}, // R0 R1 R2 R3 SP
+				{1, 31}, // R0 R1 R2 R3 SP
+			},
+			outputs: []regMask{
+				31, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+	{
+		name:    "ADDconst",
+		auxType: auxSymOff,
+		argLen:  1,
+		asm:     arm.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 31}, // R0 R1 R2 R3 SP
+			},
+			outputs: []regMask{
+				31, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+	{
+		name:              "MOVWconst",
+		auxType:           auxInt32,
+		argLen:            0,
+		rematerializeable: true,
+		asm:               arm.AMOVW,
+		reg: regInfo{
+			outputs: []regMask{
+				31, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+	{
+		name:   "CMP",
+		argLen: 2,
+		asm:    arm.ACMP,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 31}, // R0 R1 R2 R3 SP
+				{1, 31}, // R0 R1 R2 R3 SP
+			},
+			outputs: []regMask{
+				32, // FLAGS
+			},
+		},
+	},
+	{
+		name:   "MOVWload",
+		argLen: 2,
+		asm:    arm.AMOVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 31}, // R0 R1 R2 R3 SP
+			},
+			outputs: []regMask{
+				31, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+	{
+		name:   "MOVWstore",
+		argLen: 3,
+		asm:    arm.AMOVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 31}, // R0 R1 R2 R3 SP
+				{1, 31}, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+	{
+		name:    "CALLstatic",
+		auxType: auxSymOff,
+		argLen:  1,
+		reg: regInfo{
+			clobbers: 15, // R0 R1 R2 R3
+		},
+	},
+	{
+		name:   "LessThan",
+		argLen: 2,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 32}, // FLAGS
+			},
+			outputs: []regMask{
+				31, // R0 R1 R2 R3 SP
+			},
+		},
+	},
+
+	{
 		name:        "Add8",
 		argLen:      2,
 		commutative: true,
@@ -5343,3 +5475,49 @@
 
 func (o Op) Asm() obj.As    { return opcodeTable[o].asm }
 func (o Op) String() string { return opcodeTable[o].name }
+
+var registersAMD64 = [...]Register{
+	{0, "AX"},
+	{1, "CX"},
+	{2, "DX"},
+	{3, "BX"},
+	{4, "SP"},
+	{5, "BP"},
+	{6, "SI"},
+	{7, "DI"},
+	{8, "R8"},
+	{9, "R9"},
+	{10, "R10"},
+	{11, "R11"},
+	{12, "R12"},
+	{13, "R13"},
+	{14, "R14"},
+	{15, "R15"},
+	{16, "X0"},
+	{17, "X1"},
+	{18, "X2"},
+	{19, "X3"},
+	{20, "X4"},
+	{21, "X5"},
+	{22, "X6"},
+	{23, "X7"},
+	{24, "X8"},
+	{25, "X9"},
+	{26, "X10"},
+	{27, "X11"},
+	{28, "X12"},
+	{29, "X13"},
+	{30, "X14"},
+	{31, "X15"},
+	{32, "SB"},
+	{33, "FLAGS"},
+}
+var registersARM = [...]Register{
+	{0, "R0"},
+	{1, "R1"},
+	{2, "R2"},
+	{3, "R3"},
+	{4, "SP"},
+	{5, "FLAGS"},
+	{6, "SB"},
+}
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index 4e2ca4e..eb4401c 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -129,10 +129,11 @@
 
 func (m regMask) String() string {
 	s := ""
-	for r := register(0); r < numRegs; r++ {
+	for r := register(0); m != 0; r++ {
 		if m>>r&1 == 0 {
 			continue
 		}
+		m &^= regMask(1) << r
 		if s != "" {
 			s += " "
 		}
@@ -141,47 +142,6 @@
 	return s
 }
 
-// TODO: make arch-dependent
-var numRegs register = 64
-
-var registers = [...]Register{
-	Register{0, "AX"},
-	Register{1, "CX"},
-	Register{2, "DX"},
-	Register{3, "BX"},
-	Register{4, "SP"},
-	Register{5, "BP"},
-	Register{6, "SI"},
-	Register{7, "DI"},
-	Register{8, "R8"},
-	Register{9, "R9"},
-	Register{10, "R10"},
-	Register{11, "R11"},
-	Register{12, "R12"},
-	Register{13, "R13"},
-	Register{14, "R14"},
-	Register{15, "R15"},
-	Register{16, "X0"},
-	Register{17, "X1"},
-	Register{18, "X2"},
-	Register{19, "X3"},
-	Register{20, "X4"},
-	Register{21, "X5"},
-	Register{22, "X6"},
-	Register{23, "X7"},
-	Register{24, "X8"},
-	Register{25, "X9"},
-	Register{26, "X10"},
-	Register{27, "X11"},
-	Register{28, "X12"},
-	Register{29, "X13"},
-	Register{30, "X14"},
-	Register{31, "X15"},
-	Register{32, "SB"}, // pseudo-register for global base pointer (aka %rip)
-
-	// TODO: make arch-dependent
-}
-
 // countRegs returns the number of set bits in the register mask.
 func countRegs(r regMask) int {
 	n := 0
@@ -231,6 +191,11 @@
 type regAllocState struct {
 	f *Func
 
+	registers []Register
+	numRegs   register
+	SPReg     register
+	SBReg     register
+
 	// for each block, its primary predecessor.
 	// A predecessor of b is primary if it is the closest
 	// predecessor that appears before b in the layout order.
@@ -298,7 +263,7 @@
 
 	// Mark r as unused.
 	if s.f.pass.debug > regDebug {
-		fmt.Printf("freeReg %s (dump %s/%s)\n", registers[r].Name(), v, s.regs[r].c)
+		fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c)
 	}
 	s.regs[r] = regState{}
 	s.values[v.ID].regs &^= regMask(1) << r
@@ -328,7 +293,7 @@
 // r must be unused.
 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
 	if s.f.pass.debug > regDebug {
-		fmt.Printf("assignReg %s %s/%s\n", registers[r].Name(), v, c)
+		fmt.Printf("assignReg %s %s/%s\n", s.registers[r].Name(), v, c)
 	}
 	if s.regs[r].v != nil {
 		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
@@ -338,7 +303,7 @@
 	s.regs[r] = regState{v, c}
 	s.values[v.ID].regs |= regMask(1) << r
 	s.used |= regMask(1) << r
-	s.f.setHome(c, &registers[r])
+	s.f.setHome(c, &s.registers[r])
 }
 
 // allocReg chooses a register for v from the set of registers in mask.
@@ -377,14 +342,14 @@
 
 	// SP and SB are allocated specially. No regular value should
 	// be allocated to them.
-	mask &^= 1<<4 | 1<<32
+	mask &^= 1<<s.SPReg | 1<<s.SBReg
 
 	// Find a register to spill. We spill the register containing the value
 	// whose next use is as far in the future as possible.
 	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
 	var r register
 	maxuse := int32(-1)
-	for t := register(0); t < numRegs; t++ {
+	for t := register(0); t < s.numRegs; t++ {
 		if mask>>t&1 == 0 {
 			continue
 		}
@@ -425,10 +390,10 @@
 	}
 
 	if v.Op != OpSP {
-		mask &^= 1 << 4 // dont' spill SP
+		mask &^= 1 << s.SPReg // dont' spill SP
 	}
 	if v.Op != OpSB {
-		mask &^= 1 << 32 // don't spill SB
+		mask &^= 1 << s.SBReg // don't spill SB
 	}
 	mask &^= s.reserved()
 
@@ -469,12 +434,22 @@
 }
 
 func (s *regAllocState) init(f *Func) {
-	if numRegs > noRegister || numRegs > register(unsafe.Sizeof(regMask(0))*8) {
+	s.registers = f.Config.registers
+	s.numRegs = register(len(s.registers))
+	if s.numRegs > noRegister || s.numRegs > register(unsafe.Sizeof(regMask(0))*8) {
 		panic("too many registers")
 	}
+	for r := register(0); r < s.numRegs; r++ {
+		if s.registers[r].Name() == "SP" {
+			s.SPReg = r
+		}
+		if s.registers[r].Name() == "SB" {
+			s.SBReg = r
+		}
+	}
 
 	s.f = f
-	s.regs = make([]regState, numRegs)
+	s.regs = make([]regState, s.numRegs)
 	s.values = make([]valState, f.NumValues())
 	s.orig = make([]*Value, f.NumValues())
 	for _, b := range f.Blocks {
@@ -663,7 +638,7 @@
 			// Drop any values which are no longer live.
 			// This may happen because at the end of p, a value may be
 			// live but only used by some other successor of p.
-			for r := register(0); r < numRegs; r++ {
+			for r := register(0); r < s.numRegs; r++ {
 				v := s.regs[r].v
 				if v != nil && !liveSet.contains(v.ID) {
 					s.freeReg(r)
@@ -687,7 +662,7 @@
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
 				for _, x := range s.endRegs[p.ID] {
-					fmt.Printf("  %s: orig:%s cache:%s\n", registers[x.r].Name(), x.v, x.c)
+					fmt.Printf("  %s: orig:%s cache:%s\n", s.registers[x.r].Name(), x.v, x.c)
 				}
 			}
 
@@ -769,7 +744,7 @@
 
 			// Save the starting state for use by merge edges.
 			var regList []startReg
-			for r := register(0); r < numRegs; r++ {
+			for r := register(0); r < s.numRegs; r++ {
 				v := s.regs[r].v
 				if v == nil {
 					continue
@@ -786,7 +761,7 @@
 			if s.f.pass.debug > regDebug {
 				fmt.Printf("after phis\n")
 				for _, x := range s.startRegs[b.ID] {
-					fmt.Printf("  %s: v%d\n", registers[x.r].Name(), x.vid)
+					fmt.Printf("  %s: v%d\n", s.registers[x.r].Name(), x.vid)
 				}
 			}
 		}
@@ -866,13 +841,13 @@
 				f.Fatalf("phi %s not at start of block", v)
 			}
 			if v.Op == OpSP {
-				s.assignReg(4, v, v) // TODO: arch-dependent
+				s.assignReg(s.SPReg, v, v)
 				b.Values = append(b.Values, v)
 				s.advanceUses(v)
 				continue
 			}
 			if v.Op == OpSB {
-				s.assignReg(32, v, v) // TODO: arch-dependent
+				s.assignReg(s.SBReg, v, v)
 				b.Values = append(b.Values, v)
 				s.advanceUses(v)
 				continue
@@ -1030,7 +1005,7 @@
 		// Save end-of-block register state.
 		// First count how many, this cuts allocations in half.
 		k := 0
-		for r := register(0); r < numRegs; r++ {
+		for r := register(0); r < s.numRegs; r++ {
 			v := s.regs[r].v
 			if v == nil {
 				continue
@@ -1038,7 +1013,7 @@
 			k++
 		}
 		regList := make([]endReg, 0, k)
-		for r := register(0); r < numRegs; r++ {
+		for r := register(0); r < s.numRegs; r++ {
 			v := s.regs[r].v
 			if v == nil {
 				continue
@@ -1053,7 +1028,7 @@
 			for _, x := range s.live[b.ID] {
 				liveSet.add(x.ID)
 			}
-			for r := register(0); r < numRegs; r++ {
+			for r := register(0); r < s.numRegs; r++ {
 				v := s.regs[r].v
 				if v == nil {
 					continue
@@ -1214,7 +1189,7 @@
 
 	// Live registers can be sources.
 	for _, x := range srcReg {
-		e.set(&registers[x.r], x.v.ID, x.c, false)
+		e.set(&e.s.registers[x.r], x.v.ID, x.c, false)
 	}
 	// So can all of the spill locations.
 	for _, spillID := range stacklive {
@@ -1226,7 +1201,7 @@
 	// Figure out all the destinations we need.
 	dsts := e.destinations[:0]
 	for _, x := range dstReg {
-		dsts = append(dsts, dstRecord{&registers[x.r], x.vid, nil})
+		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.vid, nil})
 	}
 	// Phis need their args to end up in a specific location.
 	for _, v := range e.b.Values {
@@ -1519,15 +1494,15 @@
 	// 3) a non-unique register
 	x := m &^ e.usedRegs
 	if x != 0 {
-		return &registers[pickReg(x)]
+		return &e.s.registers[pickReg(x)]
 	}
 	x = m &^ e.uniqueRegs &^ e.finalRegs
 	if x != 0 {
-		return &registers[pickReg(x)]
+		return &e.s.registers[pickReg(x)]
 	}
 	x = m &^ e.uniqueRegs
 	if x != 0 {
-		return &registers[pickReg(x)]
+		return &e.s.registers[pickReg(x)]
 	}
 
 	// No register is available. Allocate a temp location to spill a register to.
diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go
new file mode 100644
index 0000000..67eff50
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
@@ -0,0 +1,294 @@
+// autogenerated from gen/ARM.rules: do not edit!
+// generated with: cd gen; go run *.go
+
+package ssa
+
+import "math"
+
+var _ = math.MinInt8 // in case not otherwise used
+func rewriteValueARM(v *Value, config *Config) bool {
+	switch v.Op {
+	case OpARMADD:
+		return rewriteValueARM_OpARMADD(v, config)
+	case OpAdd32:
+		return rewriteValueARM_OpAdd32(v, config)
+	case OpAddr:
+		return rewriteValueARM_OpAddr(v, config)
+	case OpConst32:
+		return rewriteValueARM_OpConst32(v, config)
+	case OpLess32:
+		return rewriteValueARM_OpLess32(v, config)
+	case OpLoad:
+		return rewriteValueARM_OpLoad(v, config)
+	case OpARMMOVWload:
+		return rewriteValueARM_OpARMMOVWload(v, config)
+	case OpARMMOVWstore:
+		return rewriteValueARM_OpARMMOVWstore(v, config)
+	case OpOffPtr:
+		return rewriteValueARM_OpOffPtr(v, config)
+	case OpStaticCall:
+		return rewriteValueARM_OpStaticCall(v, config)
+	case OpStore:
+		return rewriteValueARM_OpStore(v, config)
+	}
+	return false
+}
+func rewriteValueARM_OpARMADD(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ADD (MOVWconst [c]) x)
+	// cond:
+	// result: (ADDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARMMOVWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARMADDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (ADD x (MOVWconst [c]))
+	// cond:
+	// result: (ADDconst [c] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARMMOVWconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARMADDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpAdd32(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Add32 x y)
+	// cond:
+	// result: (ADD x y)
+	for {
+		x := v.Args[0]
+		y := v.Args[1]
+		v.reset(OpARMADD)
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpAddr(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Addr {sym} base)
+	// cond:
+	// result: (ADDconst {sym} base)
+	for {
+		sym := v.Aux
+		base := v.Args[0]
+		v.reset(OpARMADDconst)
+		v.Aux = sym
+		v.AddArg(base)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpConst32(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Const32 [val])
+	// cond:
+	// result: (MOVWconst [val])
+	for {
+		val := v.AuxInt
+		v.reset(OpARMMOVWconst)
+		v.AuxInt = val
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpLess32(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Less32 x y)
+	// cond:
+	// result: (LessThan (CMP x y))
+	for {
+		x := v.Args[0]
+		y := v.Args[1]
+		v.reset(OpARMLessThan)
+		v0 := b.NewValue0(v.Line, OpARMCMP, TypeFlags)
+		v0.AddArg(x)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpLoad(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Load <t> ptr mem)
+	// cond: is32BitInt(t)
+	// result: (MOVWload ptr mem)
+	for {
+		t := v.Type
+		ptr := v.Args[0]
+		mem := v.Args[1]
+		if !(is32BitInt(t)) {
+			break
+		}
+		v.reset(OpARMMOVWload)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpARMMOVWload(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVWload [off1] {sym1} (ADDconst [off2] {sym2} ptr) mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARMADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARMMOVWload)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpARMMOVWstore(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVWstore [off1] {sym1} (ADDconst [off2] {sym2} ptr) val mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARMADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		val := v.Args[1]
+		mem := v.Args[2]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARMMOVWstore)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(val)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpOffPtr(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (OffPtr [off] ptr)
+	// cond:
+	// result: (ADD (MOVWconst <config.Frontend().TypeInt32()> [off]) ptr)
+	for {
+		off := v.AuxInt
+		ptr := v.Args[0]
+		v.reset(OpARMADD)
+		v0 := b.NewValue0(v.Line, OpARMMOVWconst, config.Frontend().TypeInt32())
+		v0.AuxInt = off
+		v.AddArg(v0)
+		v.AddArg(ptr)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpStaticCall(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (StaticCall [argwid] {target} mem)
+	// cond:
+	// result: (CALLstatic [argwid] {target} mem)
+	for {
+		argwid := v.AuxInt
+		target := v.Aux
+		mem := v.Args[0]
+		v.reset(OpARMCALLstatic)
+		v.AuxInt = argwid
+		v.Aux = target
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpStore(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (Store [4] ptr val mem)
+	// cond:
+	// result: (MOVWstore ptr val mem)
+	for {
+		if v.AuxInt != 4 {
+			break
+		}
+		ptr := v.Args[0]
+		val := v.Args[1]
+		mem := v.Args[2]
+		v.reset(OpARMMOVWstore)
+		v.AddArg(ptr)
+		v.AddArg(val)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteBlockARM(b *Block) bool {
+	switch b.Kind {
+	case BlockIf:
+		// match: (If (LessThan cc) yes no)
+		// cond:
+		// result: (LT cc yes no)
+		for {
+			v := b.Control
+			if v.Op != OpARMLessThan {
+				break
+			}
+			cc := v.Args[0]
+			yes := b.Succs[0]
+			no := b.Succs[1]
+			b.Kind = BlockARMLT
+			b.SetControl(cc)
+			b.Succs[0] = yes
+			b.Succs[1] = no
+			return true
+		}
+	}
+	return false
+}