[dev.ssa] Merge remote-tracking branch 'origin/master' into mergebranch

Semi-regular merge of tip to dev.ssa.

Complicated a bit by the move of cmd/internal/* to cmd/compile/internal/*.

Change-Id: I1c66d3c29bb95cce4a53c5a3476373aa5245303d
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index 5fb0776..c170060 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -5,6 +5,7 @@
 package gc
 
 import (
+	"cmd/compile/internal/ssa"
 	"cmd/internal/obj"
 	"crypto/md5"
 	"fmt"
@@ -348,6 +349,7 @@
 	var nam *Node
 	var gcargs *Sym
 	var gclocals *Sym
+	var ssafn *ssa.Func
 	if fn.Nbody == nil {
 		if pure_go != 0 || strings.HasPrefix(fn.Nname.Sym.Name, "init.") {
 			Yyerror("missing function body for %q", fn.Nname.Sym.Name)
@@ -399,6 +401,14 @@
 		goto ret
 	}
 
+	// Build an SSA backend function
+	{
+		name := Curfn.Nname.Sym.Name
+		if len(name) > 4 && name[len(name)-4:] == "_ssa" {
+			ssafn = buildssa(Curfn)
+		}
+	}
+
 	continpc = nil
 	breakpc = nil
 
@@ -460,6 +470,10 @@
 	}
 
 	Genlist(Curfn.Func.Enter)
+	if ssafn != nil {
+		genssa(ssafn, ptxt, gcargs, gclocals)
+		return
+	}
 	Genlist(Curfn.Nbody)
 	gclean()
 	checklabels()
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
new file mode 100644
index 0000000..7f78fce
--- /dev/null
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -0,0 +1,909 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+	"log"
+
+	"cmd/compile/internal/ssa"
+	"cmd/internal/obj"
+	"cmd/internal/obj/x86" // TODO: remove
+)
+
+func buildssa(fn *Node) *ssa.Func {
+	dumplist("buildssa", Curfn.Nbody)
+
+	var s state
+
+	// TODO(khr): build config just once at the start of the compiler binary
+	s.config = ssa.NewConfig(Thearch.Thestring)
+	s.f = s.config.NewFunc()
+	s.f.Name = fn.Nname.Sym.Name
+
+	// We construct SSA using an algorithm similar to
+	// Brau, Buchwald, Hack, Leißa, Mallon, and Zwinkau
+	// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
+	// TODO: check this comment
+
+	// Allocate starting block
+	s.f.Entry = s.f.NewBlock(ssa.BlockPlain)
+
+	// Allocate exit block
+	s.exit = s.f.NewBlock(ssa.BlockExit)
+
+	// Allocate starting values
+	s.startmem = s.f.Entry.NewValue(ssa.OpArg, ssa.TypeMem, ".mem")
+	s.fp = s.f.Entry.NewValue(ssa.OpFP, s.config.Uintptr, nil) // TODO: use generic pointer type (unsafe.Pointer?) instead
+	s.sp = s.f.Entry.NewValue(ssa.OpSP, s.config.Uintptr, nil)
+
+	s.vars = map[string]*ssa.Value{}
+	s.labels = map[string]*ssa.Block{}
+	s.argOffsets = map[string]int64{}
+
+	// Convert the AST-based IR to the SSA-based IR
+	s.startBlock(s.f.Entry)
+	s.stmtList(fn.Nbody)
+
+	// fallthrough to exit
+	if b := s.endBlock(); b != nil {
+		addEdge(b, s.exit)
+	}
+
+	// Finish up exit block
+	s.startBlock(s.exit)
+	s.exit.Control = s.mem()
+	s.endBlock()
+
+	// Link up variable uses to variable definitions
+	s.linkForwardReferences()
+
+	// Main call to ssa package to compile function
+	ssa.Compile(s.f)
+
+	return s.f
+}
+
+type state struct {
+	// configuration (arch) information
+	config *ssa.Config
+
+	// function we're building
+	f *ssa.Func
+
+	// exit block that "return" jumps to (and panics jump to)
+	exit *ssa.Block
+
+	// the target block for each label in f
+	labels map[string]*ssa.Block
+
+	// current location where we're interpreting the AST
+	curBlock *ssa.Block
+
+	// variable assignments in the current block (map from variable name to ssa value)
+	vars map[string]*ssa.Value
+
+	// all defined variables at the end of each block.  Indexed by block ID.
+	defvars []map[string]*ssa.Value
+
+	// offsets of argument slots
+	// unnamed and unused args are not listed.
+	argOffsets map[string]int64
+
+	// starting values.  Memory, frame pointer, and stack pointer
+	startmem *ssa.Value
+	fp       *ssa.Value
+	sp       *ssa.Value
+}
+
+// startBlock sets the current block we're generating code in to b.
+func (s *state) startBlock(b *ssa.Block) {
+	if s.curBlock != nil {
+		log.Fatalf("starting block %v when block %v has not ended", b, s.curBlock)
+	}
+	s.curBlock = b
+	s.vars = map[string]*ssa.Value{}
+}
+
+// endBlock marks the end of generating code for the current block.
+// Returns the (former) current block.  Returns nil if there is no current
+// block, i.e. if no code flows to the current execution point.
+func (s *state) endBlock() *ssa.Block {
+	b := s.curBlock
+	if b == nil {
+		return nil
+	}
+	for len(s.defvars) <= int(b.ID) {
+		s.defvars = append(s.defvars, nil)
+	}
+	s.defvars[b.ID] = s.vars
+	s.curBlock = nil
+	s.vars = nil
+	return b
+}
+
+// ssaStmtList converts the statement n to SSA and adds it to s.
+func (s *state) stmtList(l *NodeList) {
+	for ; l != nil; l = l.Next {
+		s.stmt(l.N)
+	}
+}
+
+// ssaStmt converts the statement n to SSA and adds it to s.
+func (s *state) stmt(n *Node) {
+	s.stmtList(n.Ninit)
+	switch n.Op {
+
+	case OBLOCK:
+		s.stmtList(n.List)
+
+	case ODCL:
+		// TODO: ???  Assign 0?
+
+	case OLABEL, OGOTO:
+		// get block at label, or make one
+		t := s.labels[n.Left.Sym.Name]
+		if t == nil {
+			t = s.f.NewBlock(ssa.BlockPlain)
+			s.labels[n.Left.Sym.Name] = t
+		}
+		// go to that label (we pretend "label:" is preceded by "goto label")
+		b := s.endBlock()
+		addEdge(b, t)
+
+		if n.Op == OLABEL {
+			// next we work on the label's target block
+			s.startBlock(t)
+		}
+
+	case OAS:
+		// TODO(khr): colas?
+		val := s.expr(n.Right)
+		if n.Left.Op == ONAME && !n.Left.Addrtaken && n.Left.Class&PHEAP == 0 && n.Left.Class != PEXTERN && n.Left.Class != PPARAMOUT {
+			// ssa-able variable.
+			s.vars[n.Left.Sym.Name] = val
+			return
+		}
+		// not ssa-able.  Treat as a store.
+		addr := s.addr(n.Left)
+		s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem())
+		// TODO: try to make more variables registerizeable.
+	case OIF:
+		cond := s.expr(n.Ntest)
+		b := s.endBlock()
+		b.Kind = ssa.BlockIf
+		b.Control = cond
+		// TODO(khr): likely direction
+
+		bThen := s.f.NewBlock(ssa.BlockPlain)
+		bEnd := s.f.NewBlock(ssa.BlockPlain)
+		var bElse *ssa.Block
+
+		if n.Nelse == nil {
+			addEdge(b, bThen)
+			addEdge(b, bEnd)
+		} else {
+			bElse = s.f.NewBlock(ssa.BlockPlain)
+			addEdge(b, bThen)
+			addEdge(b, bElse)
+		}
+
+		s.startBlock(bThen)
+		s.stmtList(n.Nbody)
+		b = s.endBlock()
+		if b != nil {
+			addEdge(b, bEnd)
+		}
+
+		if n.Nelse != nil {
+			s.startBlock(bElse)
+			s.stmtList(n.Nelse)
+			b = s.endBlock()
+			if b != nil {
+				addEdge(b, bEnd)
+			}
+		}
+		s.startBlock(bEnd)
+
+	case ORETURN:
+		s.stmtList(n.List)
+		b := s.endBlock()
+		addEdge(b, s.exit)
+
+	case OFOR:
+		bCond := s.f.NewBlock(ssa.BlockPlain)
+		bBody := s.f.NewBlock(ssa.BlockPlain)
+		bEnd := s.f.NewBlock(ssa.BlockPlain)
+
+		// first, jump to condition test
+		b := s.endBlock()
+		addEdge(b, bCond)
+
+		// generate code to test condition
+		// TODO(khr): Ntest == nil exception
+		s.startBlock(bCond)
+		cond := s.expr(n.Ntest)
+		b = s.endBlock()
+		b.Kind = ssa.BlockIf
+		b.Control = cond
+		// TODO(khr): likely direction
+		addEdge(b, bBody)
+		addEdge(b, bEnd)
+
+		// generate body
+		s.startBlock(bBody)
+		s.stmtList(n.Nbody)
+		s.stmt(n.Nincr)
+		b = s.endBlock()
+		addEdge(b, bCond)
+
+		s.startBlock(bEnd)
+
+	case OVARKILL:
+		// TODO(khr): ??? anything to do here?  Only for addrtaken variables?
+		// Maybe just link it in the store chain?
+	default:
+		log.Fatalf("unhandled stmt %s", opnames[n.Op])
+	}
+}
+
+// expr converts the expression n to ssa, adds it to s and returns the ssa result.
+func (s *state) expr(n *Node) *ssa.Value {
+	if n == nil {
+		// TODO(khr): is this nil???
+		return s.f.Entry.NewValue(ssa.OpConst, n.Type, nil)
+	}
+	switch n.Op {
+	case ONAME:
+		// TODO: remember offsets for PPARAM names
+		if n.Class == PEXTERN {
+			// global variable
+			addr := s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Type), n.Sym)
+			return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, addr, s.mem())
+		}
+		s.argOffsets[n.Sym.Name] = n.Xoffset
+		return s.variable(n.Sym.Name, n.Type)
+	case OLITERAL:
+		switch n.Val.Ctype {
+		case CTINT:
+			return s.f.ConstInt(n.Type, Mpgetfix(n.Val.U.(*Mpint)))
+		default:
+			log.Fatalf("unhandled OLITERAL %v", n.Val.Ctype)
+			return nil
+		}
+
+		// binary ops
+	case OLT:
+		a := s.expr(n.Left)
+		b := s.expr(n.Right)
+		return s.curBlock.NewValue2(ssa.OpLess, ssa.TypeBool, nil, a, b)
+	case OADD:
+		a := s.expr(n.Left)
+		b := s.expr(n.Right)
+		return s.curBlock.NewValue2(ssa.OpAdd, a.Type, nil, a, b)
+	case OSUB:
+		// TODO:(khr) fold code for all binary ops together somehow
+		a := s.expr(n.Left)
+		b := s.expr(n.Right)
+		return s.curBlock.NewValue2(ssa.OpSub, a.Type, nil, a, b)
+	case OLSH:
+		a := s.expr(n.Left)
+		b := s.expr(n.Right)
+		return s.curBlock.NewValue2(ssa.OpLsh, a.Type, nil, a, b)
+	case ORSH:
+		a := s.expr(n.Left)
+		b := s.expr(n.Right)
+		return s.curBlock.NewValue2(ssa.OpRsh, a.Type, nil, a, b)
+
+	case OADDR:
+		return s.addr(n.Left)
+
+	case OIND:
+		p := s.expr(n.Left)
+		s.nilCheck(p)
+		return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem())
+
+	case ODOTPTR:
+		p := s.expr(n.Left)
+		s.nilCheck(p)
+		p = s.curBlock.NewValue2(ssa.OpAdd, p.Type, nil, p, s.f.ConstInt(s.config.Uintptr, n.Xoffset))
+		return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem())
+
+	case OINDEX:
+		if n.Left.Type.Bound >= 0 { // array
+			a := s.expr(n.Left)
+			i := s.expr(n.Right)
+			s.boundsCheck(i, s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound))
+			return s.curBlock.NewValue2(ssa.OpArrayIndex, n.Left.Type.Type, nil, a, i)
+		} else { // slice
+			p := s.addr(n)
+			return s.curBlock.NewValue2(ssa.OpLoad, n.Left.Type.Type, nil, p, s.mem())
+		}
+
+	case OCALLFUNC:
+		// run all argument assignments
+		// TODO(khr): do we need to evaluate function first?
+		// Or is it already side-effect-free and does not require a call?
+		s.stmtList(n.List)
+
+		if n.Left.Op != ONAME {
+			// TODO(khr): closure calls?
+			log.Fatalf("can't handle CALLFUNC with non-ONAME fn %s", opnames[n.Left.Op])
+		}
+		bNext := s.f.NewBlock(ssa.BlockPlain)
+		call := s.curBlock.NewValue1(ssa.OpStaticCall, ssa.TypeMem, n.Left.Sym, s.mem())
+		b := s.endBlock()
+		b.Kind = ssa.BlockCall
+		b.Control = call
+		addEdge(b, bNext)
+		addEdge(b, s.exit)
+
+		// read result from stack at the start of the fallthrough block
+		s.startBlock(bNext)
+		var titer Iter
+		fp := Structfirst(&titer, Getoutarg(n.Left.Type))
+		a := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(fp.Type), fp.Width, s.sp)
+		return s.curBlock.NewValue2(ssa.OpLoad, fp.Type, nil, a, call)
+	default:
+		log.Fatalf("unhandled expr %s", opnames[n.Op])
+		return nil
+	}
+}
+
+// expr converts the address of the expression n to SSA, adds it to s and returns the SSA result.
+func (s *state) addr(n *Node) *ssa.Value {
+	switch n.Op {
+	case ONAME:
+		if n.Class == PEXTERN {
+			// global variable
+			return s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Type), n.Sym)
+		}
+		if n.Class == PPARAMOUT {
+			// store to parameter slot
+			return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.fp)
+		}
+		// TODO: address of locals
+		log.Fatalf("variable address of %v not implemented", n)
+		return nil
+	case OINDREG:
+		// indirect off a register (TODO: always SP?)
+		// used for storing/loading arguments/returns to/from callees
+		return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.sp)
+	case OINDEX:
+		if n.Left.Type.Bound >= 0 { // array
+			a := s.addr(n.Left)
+			i := s.expr(n.Right)
+			len := s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound)
+			s.boundsCheck(i, len)
+			return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, a, i)
+		} else { // slice
+			a := s.expr(n.Left)
+			i := s.expr(n.Right)
+			len := s.curBlock.NewValue1(ssa.OpSliceLen, s.config.Uintptr, nil, a)
+			s.boundsCheck(i, len)
+			p := s.curBlock.NewValue1(ssa.OpSlicePtr, Ptrto(n.Left.Type.Type), nil, a)
+			return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, p, i)
+		}
+	default:
+		log.Fatalf("addr: bad op %v", n.Op)
+		return nil
+	}
+}
+
+// nilCheck generates nil pointer checking code.
+// Starts a new block on return.
+func (s *state) nilCheck(ptr *ssa.Value) {
+	c := s.curBlock.NewValue1(ssa.OpIsNonNil, ssa.TypeBool, nil, ptr)
+	b := s.endBlock()
+	b.Kind = ssa.BlockIf
+	b.Control = c
+	bNext := s.f.NewBlock(ssa.BlockPlain)
+	addEdge(b, bNext)
+	addEdge(b, s.exit)
+	s.startBlock(bNext)
+	// TODO(khr): Don't go directly to exit.  Go to a stub that calls panicmem first.
+	// TODO: implicit nil checks somehow?
+}
+
+// boundsCheck generates bounds checking code.  Checks if 0 <= idx < len, branches to exit if not.
+// Starts a new block on return.
+func (s *state) boundsCheck(idx, len *ssa.Value) {
+	// TODO: convert index to full width?
+	// TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.
+
+	// bounds check
+	cmp := s.curBlock.NewValue2(ssa.OpIsInBounds, ssa.TypeBool, nil, idx, len)
+	b := s.endBlock()
+	b.Kind = ssa.BlockIf
+	b.Control = cmp
+	bNext := s.f.NewBlock(ssa.BlockPlain)
+	addEdge(b, bNext)
+	addEdge(b, s.exit)
+	// TODO: don't go directly to s.exit.  Go to a stub that calls panicindex first.
+	s.startBlock(bNext)
+}
+
+// variable returns the value of a variable at the current location.
+func (s *state) variable(name string, t ssa.Type) *ssa.Value {
+	if s.curBlock == nil {
+		log.Fatalf("nil curblock!")
+	}
+	v := s.vars[name]
+	if v == nil {
+		// TODO: get type?  Take Sym as arg?
+		v = s.curBlock.NewValue(ssa.OpFwdRef, t, name)
+		s.vars[name] = v
+	}
+	return v
+}
+
+func (s *state) mem() *ssa.Value {
+	return s.variable(".mem", ssa.TypeMem)
+}
+
+func (s *state) linkForwardReferences() {
+	// Build ssa graph.  Each variable on its first use in a basic block
+	// leaves a FwdRef in that block representing the incoming value
+	// of that variable.  This function links that ref up with possible definitions,
+	// inserting Phi values as needed.  This is essentially the algorithm
+	// described by Brau, Buchwald, Hack, Leißa, Mallon, and Zwinkau:
+	// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
+	for _, b := range s.f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != ssa.OpFwdRef {
+				continue
+			}
+			name := v.Aux.(string)
+			v.Op = ssa.OpCopy
+			v.Aux = nil
+			v.SetArgs1(s.lookupVarIncoming(b, v.Type, name))
+		}
+	}
+}
+
+// lookupVarIncoming finds the variable's value at the start of block b.
+func (s *state) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
+	// TODO(khr): have lookupVarIncoming overwrite the fwdRef or copy it
+	// will be used in, instead of having the result used in a copy value.
+	if b == s.f.Entry {
+		if name == ".mem" {
+			return s.startmem
+		}
+		// variable is live at the entry block.  Load it.
+		addr := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(t.(*Type)), s.argOffsets[name], s.fp)
+		return b.NewValue2(ssa.OpLoad, t, nil, addr, s.startmem)
+	}
+	var vals []*ssa.Value
+	for _, p := range b.Preds {
+		vals = append(vals, s.lookupVarOutgoing(p, t, name))
+	}
+	v0 := vals[0]
+	for i := 1; i < len(vals); i++ {
+		if vals[i] != v0 {
+			// need a phi value
+			v := b.NewValue(ssa.OpPhi, t, nil)
+			v.AddArgs(vals...)
+			return v
+		}
+	}
+	return v0
+}
+
+// lookupVarOutgoing finds the variable's value at the end of block b.
+func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
+	m := s.defvars[b.ID]
+	if v, ok := m[name]; ok {
+		return v
+	}
+	// The variable is not defined by b and we haven't
+	// looked it up yet.  Generate v, a copy value which
+	// will be the outgoing value of the variable.  Then
+	// look up w, the incoming value of the variable.
+	// Make v = copy(w).  We need the extra copy to
+	// prevent infinite recursion when looking up the
+	// incoming value of the variable.
+	v := b.NewValue(ssa.OpCopy, t, nil)
+	m[name] = v
+	v.AddArg(s.lookupVarIncoming(b, t, name))
+	return v
+}
+
+// TODO: the above mutually recursive functions can lead to very deep stacks.  Fix that.
+
+// addEdge adds an edge from b to c.
+func addEdge(b, c *ssa.Block) {
+	b.Succs = append(b.Succs, c)
+	c.Preds = append(c.Preds, b)
+}
+
+// an unresolved branch
+type branch struct {
+	p *obj.Prog  // branch instruction
+	b *ssa.Block // target
+}
+
+// genssa appends entries to ptxt for each instruction in f.
+// gcargs and gclocals are filled in with pointer maps for the frame.
+func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
+	// TODO: line numbers
+
+	if f.FrameSize > 1<<31 {
+		Yyerror("stack frame too large (>2GB)")
+		return
+	}
+
+	ptxt.To.Type = obj.TYPE_TEXTSIZE
+	ptxt.To.Val = int32(Rnd(Curfn.Type.Argwid, int64(Widthptr))) // arg size
+	ptxt.To.Offset = f.FrameSize - 8                             // TODO: arch-dependent
+
+	// Remember where each block starts.
+	bstart := make([]*obj.Prog, f.NumBlocks())
+
+	// Remember all the branch instructions we've seen
+	// and where they would like to go
+	var branches []branch
+
+	// Emit basic blocks
+	for i, b := range f.Blocks {
+		bstart[b.ID] = Pc
+		// Emit values in block
+		for _, v := range b.Values {
+			genValue(v)
+		}
+		// Emit control flow instructions for block
+		var next *ssa.Block
+		if i < len(f.Blocks)-1 {
+			next = f.Blocks[i+1]
+		}
+		branches = genBlock(b, next, branches)
+	}
+
+	// Resolve branches
+	for _, br := range branches {
+		br.p.To.Val = bstart[br.b.ID]
+	}
+
+	Pc.As = obj.ARET // overwrite AEND
+
+	// TODO: liveness
+	// TODO: gcargs
+	// TODO: gclocals
+
+	// TODO: dump frame if -f
+
+	// Emit garbage collection symbols.  TODO: put something in them
+	liveness(Curfn, ptxt, gcargs, gclocals)
+}
+
+func genValue(v *ssa.Value) {
+	switch v.Op {
+	case ssa.OpADDQ:
+		// TODO: use addq instead of leaq if target is in the right register.
+		p := Prog(x86.ALEAQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Scale = 1
+		p.From.Index = regnum(v.Args[1])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpADDQconst:
+		// TODO: use addq instead of leaq if target is in the right register.
+		p := Prog(x86.ALEAQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpMULQconst:
+		// TODO: this isn't right.  doasm fails on it.  I don't think obj
+		// has ever been taught to compile imul $c, r1, r2.
+		p := Prog(x86.AIMULQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.Aux.(int64)
+		p.From3.Type = obj.TYPE_REG
+		p.From3.Reg = regnum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpSUBQconst:
+		// This code compensates for the fact that the register allocator
+		// doesn't understand 2-address instructions yet.  TODO: fix that.
+		x := regnum(v.Args[0])
+		r := regnum(v)
+		if x != r {
+			p := Prog(x86.AMOVQ)
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = x
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = r
+			x = r
+		}
+		p := Prog(x86.ASUBQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = r
+	case ssa.OpSHLQconst:
+		x := regnum(v.Args[0])
+		r := regnum(v)
+		if x != r {
+			p := Prog(x86.AMOVQ)
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = x
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = r
+			x = r
+		}
+		p := Prog(x86.ASHLQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = r
+	case ssa.OpLEAQ:
+		p := Prog(x86.ALEAQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Scale = 1
+		p.From.Index = regnum(v.Args[1])
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpCMPQ:
+		p := Prog(x86.ACMPQ)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = regnum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v.Args[1])
+	case ssa.OpCMPQconst:
+		p := Prog(x86.ACMPQ)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = regnum(v.Args[0])
+		p.To.Type = obj.TYPE_CONST
+		p.To.Offset = v.Aux.(int64)
+	case ssa.OpTESTB:
+		p := Prog(x86.ATESTB)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = regnum(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v.Args[1])
+	case ssa.OpMOVQconst:
+		x := regnum(v)
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = x
+	case ssa.OpMOVQload:
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpMOVBload:
+		p := Prog(x86.AMOVB)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Offset = v.Aux.(int64)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpMOVQloadidx8:
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = regnum(v.Args[0])
+		p.From.Offset = v.Aux.(int64)
+		p.From.Scale = 8
+		p.From.Index = regnum(v.Args[1])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpMOVQstore:
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = regnum(v.Args[1])
+		p.To.Type = obj.TYPE_MEM
+		p.To.Reg = regnum(v.Args[0])
+		p.To.Offset = v.Aux.(int64)
+	case ssa.OpCopy:
+		x := regnum(v.Args[0])
+		y := regnum(v)
+		if x != y {
+			p := Prog(x86.AMOVQ)
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = x
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = y
+		}
+	case ssa.OpLoadReg8:
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Reg = x86.REG_SP
+		p.From.Offset = localOffset(v.Args[0])
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpStoreReg8:
+		p := Prog(x86.AMOVQ)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = regnum(v.Args[0])
+		p.To.Type = obj.TYPE_MEM
+		p.To.Reg = x86.REG_SP
+		p.To.Offset = localOffset(v)
+	case ssa.OpPhi:
+		// just check to make sure regalloc did it right
+		f := v.Block.Func
+		loc := f.RegAlloc[v.ID]
+		for _, a := range v.Args {
+			if f.RegAlloc[a.ID] != loc { // TODO: .Equal() instead?
+				log.Fatalf("phi arg at different location than phi %v %v %v %v", v, loc, a, f.RegAlloc[a.ID])
+			}
+		}
+	case ssa.OpConst:
+		if v.Block.Func.RegAlloc[v.ID] != nil {
+			log.Fatalf("const value %v shouldn't have a location", v)
+		}
+	case ssa.OpArg:
+		// memory arg needs no code
+		// TODO: only mem arg goes here.
+	case ssa.OpLEAQglobal:
+		g := v.Aux.(ssa.GlobalOffset)
+		p := Prog(x86.ALEAQ)
+		p.From.Type = obj.TYPE_MEM
+		p.From.Name = obj.NAME_EXTERN
+		p.From.Sym = Linksym(g.Global.(*Sym))
+		p.From.Offset = g.Offset
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = regnum(v)
+	case ssa.OpStaticCall:
+		p := Prog(obj.ACALL)
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_EXTERN
+		p.To.Sym = Linksym(v.Aux.(*Sym))
+	case ssa.OpFP, ssa.OpSP:
+		// nothing to do
+	default:
+		log.Fatalf("value %s not implemented", v.LongString())
+	}
+}
+
+func genBlock(b, next *ssa.Block, branches []branch) []branch {
+	switch b.Kind {
+	case ssa.BlockPlain:
+		if b.Succs[0] != next {
+			p := Prog(obj.AJMP)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		}
+	case ssa.BlockExit:
+		Prog(obj.ARET)
+	case ssa.BlockCall:
+		if b.Succs[0] != next {
+			p := Prog(obj.AJMP)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		}
+	case ssa.BlockEQ:
+		if b.Succs[0] == next {
+			p := Prog(x86.AJNE)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[1]})
+		} else if b.Succs[1] == next {
+			p := Prog(x86.AJEQ)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		} else {
+			p := Prog(x86.AJEQ)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+			q := Prog(obj.AJMP)
+			q.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{q, b.Succs[1]})
+		}
+	case ssa.BlockNE:
+		if b.Succs[0] == next {
+			p := Prog(x86.AJEQ)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[1]})
+		} else if b.Succs[1] == next {
+			p := Prog(x86.AJNE)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		} else {
+			p := Prog(x86.AJNE)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+			q := Prog(obj.AJMP)
+			q.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{q, b.Succs[1]})
+		}
+	case ssa.BlockLT:
+		if b.Succs[0] == next {
+			p := Prog(x86.AJGE)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[1]})
+		} else if b.Succs[1] == next {
+			p := Prog(x86.AJLT)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		} else {
+			p := Prog(x86.AJLT)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+			q := Prog(obj.AJMP)
+			q.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{q, b.Succs[1]})
+		}
+	case ssa.BlockULT:
+		if b.Succs[0] == next {
+			p := Prog(x86.AJCC)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[1]})
+		} else if b.Succs[1] == next {
+			p := Prog(x86.AJCS)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		} else {
+			p := Prog(x86.AJCS)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+			q := Prog(obj.AJMP)
+			q.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{q, b.Succs[1]})
+		}
+	case ssa.BlockUGT:
+		if b.Succs[0] == next {
+			p := Prog(x86.AJLS)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[1]})
+		} else if b.Succs[1] == next {
+			p := Prog(x86.AJHI)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+		} else {
+			p := Prog(x86.AJHI)
+			p.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{p, b.Succs[0]})
+			q := Prog(obj.AJMP)
+			q.To.Type = obj.TYPE_BRANCH
+			branches = append(branches, branch{q, b.Succs[1]})
+		}
+
+	default:
+		log.Fatalf("branch %s not implemented", b.LongString())
+	}
+	return branches
+}
+
+// ssaRegToReg maps ssa register numbers to obj register numbers.
+var ssaRegToReg = [...]int16{
+	x86.REG_AX,
+	x86.REG_CX,
+	x86.REG_DX,
+	x86.REG_BX,
+	x86.REG_SP,
+	x86.REG_BP,
+	x86.REG_SI,
+	x86.REG_DI,
+	x86.REG_R8,
+	x86.REG_R9,
+	x86.REG_R10,
+	x86.REG_R11,
+	x86.REG_R12,
+	x86.REG_R13,
+	x86.REG_R14,
+	x86.REG_R15,
+	// TODO: more
+	// TODO: arch-dependent
+}
+
+// regnum returns the register (in cmd/internal/obj numbering) to
+// which v has been allocated.  Panics if v is not assigned to a
+// register.
+func regnum(v *ssa.Value) int16 {
+	return ssaRegToReg[v.Block.Func.RegAlloc[v.ID].(*ssa.Register).Num]
+}
+
+// localOffset returns the offset below the frame pointer where
+// a stack-allocated local has been allocated.  Panics if v
+// is not assigned to a local slot.
+func localOffset(v *ssa.Value) int64 {
+	return v.Block.Func.RegAlloc[v.ID].(*ssa.LocalSlot).Idx
+}
diff --git a/src/cmd/compile/internal/gc/type.go b/src/cmd/compile/internal/gc/type.go
new file mode 100644
index 0000000..cf1589e
--- /dev/null
+++ b/src/cmd/compile/internal/gc/type.go
@@ -0,0 +1,58 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides methods that let us export a Type as an ../ssa:Type.
+// We don't export this package's Type directly because it would lead
+// to an import cycle with this package and ../ssa.
+// TODO: move Type to its own package, then we don't need to dance around import cycles.
+
+package gc
+
+import (
+	"cmd/compile/internal/ssa"
+)
+
+func (t *Type) Size() int64 {
+	dowidth(t)
+	return t.Width
+}
+
+func (t *Type) IsBoolean() bool {
+	return t.Etype == TBOOL
+}
+
+func (t *Type) IsInteger() bool {
+	switch t.Etype {
+	case TINT8, TUINT8, TINT16, TUINT16, TINT32, TUINT32, TINT64, TUINT64, TINT, TUINT, TUINTPTR:
+		return true
+	}
+	return false
+}
+
+func (t *Type) IsSigned() bool {
+	switch t.Etype {
+	case TINT8, TINT16, TINT32, TINT64, TINT:
+		return true
+	}
+	return false
+}
+
+func (t *Type) IsFloat() bool {
+	return t.Etype == TFLOAT32 || t.Etype == TFLOAT64
+}
+
+func (t *Type) IsPtr() bool {
+	return t.Etype == TPTR32 || t.Etype == TPTR64 ||
+		t.Etype == TMAP || t.Etype == TCHAN || t.Etype == TFUNC
+}
+
+func (t *Type) Elem() ssa.Type {
+	return t.Type
+}
+func (t *Type) PtrTo() ssa.Type {
+	return Ptrto(t)
+}
+
+func (t *Type) IsMemory() bool { return false }
+func (t *Type) IsFlags() bool  { return false }
diff --git a/src/cmd/compile/internal/ssa/TODO b/src/cmd/compile/internal/ssa/TODO
new file mode 100644
index 0000000..afb723a
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/TODO
@@ -0,0 +1,47 @@
+This is a list of things that need to be worked on.  It is by no means complete.
+
+Allocation
+- Allocation of decls in stackalloc.  Decls survive if they are
+  addrtaken or are too large for registerization.
+
+Scheduling
+ - Make sure loads are scheduled correctly with respect to stores.
+   Same for flag type values.  We can't have more than one value of
+   mem or flag types live at once.
+ - Reduce register pressure.  Schedule instructions which kill
+   variables first.
+
+Values
+ - Add a line number field.  Figure out how to populate it and
+   maintain it during rewrites.
+ - Store *Type instead of Type?  Keep an array of used Types in Func
+   and reference by id?  Unify with the type ../gc so we just use a
+   pointer instead of an interface?
+ - Recycle dead values instead of using GC to do that.
+ - A lot of Aux fields are just int64.  Add a separate AuxInt field?
+   If not that, then cache the interfaces that wrap int64s.
+ - OpStore uses 3 args.  Increase the size of argstorage to 3?
+
+Opcodes
+ - Rename ops to prevent cross-arch conflicts.  MOVQ -> MOVQamd64 (or
+   MOVQ6?).  Other option: build opcode table in Config instead of globally.
+ - Remove asm string from opinfo, no longer needed.
+ - It's annoying to list the opcode both in the opcode list and an
+   opInfo map entry.  Specify it one place and use go:generate to
+   produce both?
+
+Regalloc
+ - Make less arch-dependent
+ - Don't spill everything at every basic block boundary.
+ - Allow args and return values to be ssa-able.
+ - Handle 2-address instructions.
+
+Rewrites
+ - Strength reduction (both arch-indep and arch-dependent?)
+ - Code sequence for shifts >= wordsize
+ - Start another architecture (arm?)
+
+Common-Subexpression Elimination
+ - Make better decision about which value in an equivalence class we should
+   choose to replace other values in that class.
+ - Can we move control values out of their basic block?
diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go
new file mode 100644
index 0000000..dcf3676
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/block.go
@@ -0,0 +1,94 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Block represents a basic block in the control flow graph of a function.
+type Block struct {
+	// A unique identifier for the block.  The system will attempt to allocate
+	// these IDs densely, but no guarantees.
+	ID ID
+
+	// The kind of block this is.
+	Kind BlockKind
+
+	// Subsequent blocks, if any.  The number and order depend on the block kind.
+	// All successors must be distinct (to make phi values in successors unambiguous).
+	Succs []*Block
+
+	// Inverse of successors.
+	// The order is significant to Phi nodes in the block.
+	Preds []*Block
+	// TODO: predecessors is a pain to maintain.  Can we somehow order phi
+	// arguments by block id and have this field computed explicitly when needed?
+
+	// A value that determines how the block is exited.  Its value depends on the kind
+	// of the block.  For instance, a BlockIf has a boolean control value and BlockExit
+	// has a memory control value.
+	Control *Value
+
+	// The unordered set of Values that define the operation of this block.
+	// The list must include the control value, if any. (TODO: need this last condition?)
+	// After the scheduling pass, this list is ordered.
+	Values []*Value
+
+	// The containing function
+	Func *Func
+}
+
+//     kind           control    successors
+//   ------------------------------------------
+//     Exit        return mem                []
+//    Plain               nil            [next]
+//       If   a boolean Value      [then, else]
+//     Call               mem  [nopanic, panic]  (control opcode should be OpCall or OpStaticCall)
+type BlockKind int8
+
+const (
+	BlockExit  BlockKind = iota // no successors.  There should only be 1 of these.
+	BlockPlain                  // a single successor
+	BlockIf                     // 2 successors, if control goto Succs[0] else goto Succs[1]
+	BlockCall                   // 2 successors, normal return and panic
+	// TODO(khr): BlockPanic for the built-in panic call, has 1 edge to the exit block
+	BlockUnknown
+
+	// 386/amd64 variants of BlockIf that take the flags register as an arg
+	BlockEQ
+	BlockNE
+	BlockLT
+	BlockLE
+	BlockGT
+	BlockGE
+	BlockULT
+	BlockULE
+	BlockUGT
+	BlockUGE
+)
+
+//go:generate stringer -type=BlockKind
+
+// short form print
+func (b *Block) String() string {
+	return fmt.Sprintf("b%d", b.ID)
+}
+
+// long form print
+func (b *Block) LongString() string {
+	s := strings.TrimPrefix(b.Kind.String(), "Block")
+	if b.Control != nil {
+		s += fmt.Sprintf(" %s", b.Control)
+	}
+	if len(b.Succs) > 0 {
+		s += " ->"
+		for _, c := range b.Succs {
+			s += " " + c.String()
+		}
+	}
+	return s
+}
diff --git a/src/cmd/compile/internal/ssa/blockkind_string.go b/src/cmd/compile/internal/ssa/blockkind_string.go
new file mode 100644
index 0000000..6204f19
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/blockkind_string.go
@@ -0,0 +1,16 @@
+// generated by stringer -type=BlockKind; DO NOT EDIT
+
+package ssa
+
+import "fmt"
+
+const _BlockKind_name = "BlockExitBlockPlainBlockIfBlockCallBlockUnknownBlockEQBlockNEBlockLTBlockLEBlockGTBlockGEBlockULTBlockULEBlockUGTBlockUGE"
+
+var _BlockKind_index = [...]uint8{0, 9, 19, 26, 35, 47, 54, 61, 68, 75, 82, 89, 97, 105, 113, 121}
+
+func (i BlockKind) String() string {
+	if i < 0 || i+1 >= BlockKind(len(_BlockKind_index)) {
+		return fmt.Sprintf("BlockKind(%d)", i)
+	}
+	return _BlockKind_name[_BlockKind_index[i]:_BlockKind_index[i+1]]
+}
diff --git a/src/cmd/compile/internal/ssa/cgen.go b/src/cmd/compile/internal/ssa/cgen.go
new file mode 100644
index 0000000..51c72aa
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/cgen.go
@@ -0,0 +1,135 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+)
+
+// cgen selects machine instructions for the function.
+// This pass generates assembly output for now, but should
+// TODO(khr): generate binary output (via liblink?) instead of text.
+func cgen(f *Func) {
+	fmt.Printf("TEXT %s(SB),0,$0\n", f.Name) // TODO: frame size / arg size
+
+	// TODO: prolog, allocate stack frame
+
+	for idx, b := range f.Blocks {
+		fmt.Printf("%d:\n", b.ID)
+		for _, v := range b.Values {
+			var buf bytes.Buffer
+			asm := opcodeTable[v.Op].asm
+			buf.WriteString("        ")
+			for i := 0; i < len(asm); i++ {
+				switch asm[i] {
+				default:
+					buf.WriteByte(asm[i])
+				case '\t':
+					buf.WriteByte(' ')
+					for buf.Len()%8 != 0 {
+						buf.WriteByte(' ')
+					}
+				case '%':
+					i++
+					switch asm[i] {
+					case '%':
+						buf.WriteByte('%')
+					case 'I':
+						i++
+						n := asm[i] - '0'
+						if f.RegAlloc[v.Args[n].ID] != nil {
+							buf.WriteString(f.RegAlloc[v.Args[n].ID].Name())
+						} else {
+							fmt.Fprintf(&buf, "v%d", v.Args[n].ID)
+						}
+					case 'O':
+						i++
+						n := asm[i] - '0'
+						if n != 0 {
+							panic("can only handle 1 output for now")
+						}
+						if f.RegAlloc[v.ID] != nil {
+							buf.WriteString(f.RegAlloc[v.ID].Name())
+						} else {
+							fmt.Fprintf(&buf, "v%d", v.ID)
+						}
+					case 'A':
+						fmt.Fprint(&buf, v.Aux)
+					}
+				}
+			}
+			for buf.Len() < 40 {
+				buf.WriteByte(' ')
+			}
+			buf.WriteString("; ")
+			buf.WriteString(v.LongString())
+			buf.WriteByte('\n')
+			os.Stdout.Write(buf.Bytes())
+		}
+		// find next block in layout sequence
+		var next *Block
+		if idx < len(f.Blocks)-1 {
+			next = f.Blocks[idx+1]
+		}
+		// emit end of block code
+		// TODO: this is machine specific
+		switch b.Kind {
+		case BlockPlain:
+			if b.Succs[0] != next {
+				fmt.Printf("\tJMP\t%d\n", b.Succs[0].ID)
+			}
+		case BlockExit:
+			// TODO: run defers (if any)
+			// TODO: deallocate frame
+			fmt.Println("\tRET")
+		case BlockCall:
+			// nothing to emit - call instruction already happened
+		case BlockEQ:
+			if b.Succs[0] == next {
+				fmt.Printf("\tJNE\t%d\n", b.Succs[1].ID)
+			} else if b.Succs[1] == next {
+				fmt.Printf("\tJEQ\t%d\n", b.Succs[0].ID)
+			} else {
+				fmt.Printf("\tJEQ\t%d\n", b.Succs[0].ID)
+				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
+			}
+		case BlockNE:
+			if b.Succs[0] == next {
+				fmt.Printf("\tJEQ\t%d\n", b.Succs[1].ID)
+			} else if b.Succs[1] == next {
+				fmt.Printf("\tJNE\t%d\n", b.Succs[0].ID)
+			} else {
+				fmt.Printf("\tJNE\t%d\n", b.Succs[0].ID)
+				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
+			}
+		case BlockLT:
+			if b.Succs[0] == next {
+				fmt.Printf("\tJGE\t%d\n", b.Succs[1].ID)
+			} else if b.Succs[1] == next {
+				fmt.Printf("\tJLT\t%d\n", b.Succs[0].ID)
+			} else {
+				fmt.Printf("\tJLT\t%d\n", b.Succs[0].ID)
+				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
+			}
+		case BlockULT:
+			if b.Succs[0] == next {
+				fmt.Printf("\tJAE\t%d\n", b.Succs[1].ID)
+			} else if b.Succs[1] == next {
+				fmt.Printf("\tJB\t%d\n", b.Succs[0].ID)
+			} else {
+				fmt.Printf("\tJB\t%d\n", b.Succs[0].ID)
+				fmt.Printf("\tJMP\t%d\n", b.Succs[1].ID)
+			}
+		default:
+			fmt.Printf("\t%s ->", b.Kind.String())
+			for _, s := range b.Succs {
+				fmt.Printf(" %d", s.ID)
+			}
+			fmt.Printf("\n")
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go
new file mode 100644
index 0000000..667313a
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/check.go
@@ -0,0 +1,124 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+// checkFunc checks invariants of f.
+func checkFunc(f *Func) {
+	blockMark := make([]bool, f.NumBlocks())
+	valueMark := make([]bool, f.NumValues())
+
+	for _, b := range f.Blocks {
+		if blockMark[b.ID] {
+			log.Panicf("block %s appears twice in %s!", b, f.Name)
+		}
+		blockMark[b.ID] = true
+		if b.Func != f {
+			log.Panicf("%s.Func=%s, want %s", b, b.Func.Name, f.Name)
+		}
+
+		for i, c := range b.Succs {
+			for j, d := range b.Succs {
+				if i != j && c == d {
+					log.Panicf("%s.Succs has duplicate block %s", b, c)
+				}
+			}
+		}
+		// Note: duplicate successors are hard in the following case:
+		//      if(...) goto x else goto x
+		//   x: v = phi(a, b)
+		// If the conditional is true, does v get the value of a or b?
+		// We could solve this other ways, but the easiest is just to
+		// require (by possibly adding empty control-flow blocks) that
+		// all successors are distinct.  They will need to be distinct
+		// anyway for register allocation (duplicate successors implies
+		// the existence of critical edges).
+
+		for _, p := range b.Preds {
+			var found bool
+			for _, c := range p.Succs {
+				if c == b {
+					found = true
+					break
+				}
+			}
+			if !found {
+				log.Panicf("block %s is not a succ of its pred block %s", b, p)
+			}
+		}
+
+		switch b.Kind {
+		case BlockExit:
+			if len(b.Succs) != 0 {
+				log.Panicf("exit block %s has successors", b)
+			}
+			if b.Control == nil {
+				log.Panicf("exit block %s has no control value", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				log.Panicf("exit block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+		case BlockPlain:
+			if len(b.Succs) != 1 {
+				log.Panicf("plain block %s len(Succs)==%d, want 1", b, len(b.Succs))
+			}
+			if b.Control != nil {
+				log.Panicf("plain block %s has non-nil control %s", b, b.Control.LongString())
+			}
+		case BlockIf:
+			if len(b.Succs) != 2 {
+				log.Panicf("if block %s len(Succs)==%d, want 2", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				log.Panicf("if block %s has no control value", b)
+			}
+			if !b.Control.Type.IsBoolean() {
+				log.Panicf("if block %s has non-bool control value %s", b, b.Control.LongString())
+			}
+		case BlockCall:
+			if len(b.Succs) != 2 {
+				log.Panicf("call block %s len(Succs)==%d, want 2", b, len(b.Succs))
+			}
+			if b.Control == nil {
+				log.Panicf("call block %s has no control value", b)
+			}
+			if !b.Control.Type.IsMemory() {
+				log.Panicf("call block %s has non-memory control value %s", b, b.Control.LongString())
+			}
+			if b.Succs[1].Kind != BlockExit {
+				log.Panicf("exception edge from call block %s does not go to exit but %s", b, b.Succs[1])
+			}
+		}
+
+		for _, v := range b.Values {
+			if valueMark[v.ID] {
+				log.Panicf("value %s appears twice!", v.LongString())
+			}
+			valueMark[v.ID] = true
+
+			if v.Block != b {
+				log.Panicf("%s.block != %s", v, b)
+			}
+			if v.Op == OpPhi && len(v.Args) != len(b.Preds) {
+				log.Panicf("phi length %s does not match pred length %d for block %s", v.LongString(), len(b.Preds), b)
+			}
+
+			// TODO: check for cycles in values
+			// TODO: check type
+		}
+	}
+
+	for _, id := range f.bid.free {
+		if blockMark[id] {
+			log.Panicf("used block b%d in free list", id)
+		}
+	}
+	for _, id := range f.vid.free {
+		if valueMark[id] {
+			log.Panicf("used value v%d in free list", id)
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
new file mode 100644
index 0000000..c1f7956
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -0,0 +1,115 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"log"
+)
+
+// Compile is the main entry point for this package.
+// Compile modifies f so that on return:
+//   · all Values in f map to 0 or 1 assembly instructions of the target architecture
+//   · the order of f.Blocks is the order to emit the Blocks
+//   · the order of b.Values is the order to emit the Values in each Block
+//   · f has a non-nil regAlloc field
+func Compile(f *Func) {
+	// TODO: debugging - set flags to control verbosity of compiler,
+	// which phases to dump IR before/after, etc.
+	fmt.Printf("compiling %s\n", f.Name)
+
+	// hook to print function & phase if panic happens
+	phaseName := "init"
+	defer func() {
+		if phaseName != "" {
+			fmt.Printf("panic during %s while compiling %s\n", phaseName, f.Name)
+		}
+	}()
+
+	// Run all the passes
+	printFunc(f)
+	checkFunc(f)
+	for _, p := range passes {
+		phaseName = p.name
+		fmt.Printf("  pass %s begin\n", p.name)
+		p.fn(f)
+		fmt.Printf("  pass %s end\n", p.name)
+		printFunc(f)
+		checkFunc(f)
+	}
+
+	// Squash error printing defer
+	phaseName = ""
+}
+
+type pass struct {
+	name string
+	fn   func(*Func)
+}
+
+// list of passes for the compiler
+var passes = [...]pass{
+	{"phielim", phielim},
+	{"copyelim", copyelim},
+	{"opt", opt},
+	{"generic cse", cse},
+	{"generic deadcode", deadcode},
+	{"fuse", fuse},
+	{"lower", lower},
+	{"lowered cse", cse},
+	{"lowered deadcode", deadcode},
+	{"critical", critical}, // remove critical edges
+	{"layout", layout},     // schedule blocks
+	{"schedule", schedule}, // schedule values
+	{"regalloc", regalloc},
+	{"stackalloc", stackalloc},
+	{"cgen", cgen},
+}
+
+// Double-check phase ordering constraints.
+// This code is intended to document the ordering requirements
+// between different phases.  It does not override the passes
+// list above.
+type constraint struct {
+	a, b string // a must come before b
+}
+
+var passOrder = [...]constraint{
+	// don't layout blocks until critical edges have been removed
+	{"critical", "layout"},
+	// regalloc requires the removal of all critical edges
+	{"critical", "regalloc"},
+	// regalloc requires all the values in a block to be scheduled
+	{"schedule", "regalloc"},
+	// stack allocation requires register allocation
+	{"regalloc", "stackalloc"},
+	// code generation requires stack allocation
+	{"stackalloc", "cgen"},
+}
+
+func init() {
+	for _, c := range passOrder {
+		a, b := c.a, c.b
+		i := -1
+		j := -1
+		for k, p := range passes {
+			if p.name == a {
+				i = k
+			}
+			if p.name == b {
+				j = k
+			}
+		}
+		if i < 0 {
+			log.Panicf("pass %s not found", a)
+		}
+		if j < 0 {
+			log.Panicf("pass %s not found", b)
+		}
+		if i >= j {
+			log.Panicf("passes %s and %s out of order", a, b)
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
new file mode 100644
index 0000000..9f1d2a8
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -0,0 +1,48 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+type Config struct {
+	arch    string            // "amd64", etc.
+	ptrSize int64             // 4 or 8
+	Uintptr Type              // pointer arithmetic type
+	lower   func(*Value) bool // lowering function
+
+	// TODO: more stuff.  Compiler flags of interest, ...
+}
+
+// NewConfig returns a new configuration object for the given architecture.
+func NewConfig(arch string) *Config {
+	c := &Config{arch: arch}
+	switch arch {
+	case "amd64":
+		c.ptrSize = 8
+		c.lower = lowerAmd64
+	case "386":
+		c.ptrSize = 4
+		c.lower = lowerAmd64 // TODO(khr): full 32-bit support
+	default:
+		log.Fatalf("arch %s not implemented", arch)
+	}
+
+	// cache the intptr type in the config
+	c.Uintptr = TypeUInt32
+	if c.ptrSize == 8 {
+		c.Uintptr = TypeUInt64
+	}
+
+	return c
+}
+
+// NewFunc returns a new, empty function object
+func (c *Config) NewFunc() *Func {
+	// TODO(khr): should this function take name, type, etc. as arguments?
+	return &Func{Config: c}
+}
+
+// TODO(khr): do we really need a separate Config, or can we just
+// store all its fields inside a Func?
diff --git a/src/cmd/compile/internal/ssa/copyelim.go b/src/cmd/compile/internal/ssa/copyelim.go
new file mode 100644
index 0000000..10c2dcc
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/copyelim.go
@@ -0,0 +1,29 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// copyelim removes all copies from f.
+func copyelim(f *Func) {
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, w := range v.Args {
+				x := w
+				for x.Op == OpCopy {
+					x = x.Args[0]
+				}
+				if x != w {
+					v.Args[i] = x
+				}
+			}
+		}
+		v := b.Control
+		if v != nil {
+			for v.Op == OpCopy {
+				v = v.Args[0]
+			}
+			b.Control = v
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/critical.go b/src/cmd/compile/internal/ssa/critical.go
new file mode 100644
index 0000000..503681f
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/critical.go
@@ -0,0 +1,51 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// critical splits critical edges (those that go from a block with
+// more than one outedge to a block with more than one inedge).
+// Regalloc wants a critical-edge-free CFG so it can implement phi values.
+func critical(f *Func) {
+	for _, b := range f.Blocks {
+		if len(b.Preds) <= 1 {
+			continue
+		}
+
+		// decide if we need to split edges coming into b.
+		hasphi := false
+		for _, v := range b.Values {
+			if v.Op == OpPhi && v.Type != TypeMem {
+				hasphi = true
+				break
+			}
+		}
+		if !hasphi {
+			// no splitting needed
+			continue
+		}
+
+		// split input edges coming from multi-output blocks.
+		for i, c := range b.Preds {
+			if c.Kind == BlockPlain {
+				continue // only single output block
+			}
+
+			// allocate a new block to place on the edge
+			d := f.NewBlock(BlockPlain)
+
+			// splice it in
+			d.Preds = append(d.Preds, c)
+			d.Succs = append(d.Succs, b)
+			b.Preds[i] = d
+			// replace b with d in c's successor list.
+			for j, b2 := range c.Succs {
+				if b2 == b {
+					c.Succs[j] = d
+					break
+				}
+			}
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go
new file mode 100644
index 0000000..aba24ae
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/cse.go
@@ -0,0 +1,163 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "sort"
+
+// cse does common-subexpression elimination on the Function.
+// Values are just relinked, nothing is deleted.  A subsequent deadcode
+// pass is required to actually remove duplicate expressions.
+func cse(f *Func) {
+	// Two values are equivalent if they satisfy the following definition:
+	// equivalent(v, w):
+	//   v.op == w.op
+	//   v.type == w.type
+	//   v.aux == w.aux
+	//   len(v.args) == len(w.args)
+	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
+
+	// The algorithm searches for a partition of f's values into
+	// equivalence classes using the above definition.
+	// It starts with a coarse partition and iteratively refines it
+	// until it reaches a fixed point.
+
+	// Make initial partition based on opcode/type/aux/nargs
+	// TODO(khr): types are not canonical, so we may split unnecessarily.  Fix that.
+	type key struct {
+		op    Op
+		typ   Type
+		aux   interface{}
+		nargs int
+	}
+	m := map[key]eqclass{}
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			k := key{v.Op, v.Type, v.Aux, len(v.Args)}
+			m[k] = append(m[k], v)
+		}
+	}
+
+	// A partition is a set of disjoint eqclasses.
+	var partition []eqclass
+	for _, v := range m {
+		partition = append(partition, v)
+	}
+
+	// map from value id back to eqclass id
+	valueEqClass := make([]int, f.NumValues())
+	for i, e := range partition {
+		for _, v := range e {
+			valueEqClass[v.ID] = i
+		}
+	}
+
+	// Find an equivalence class where some members of the class have
+	// non-equivalent arguments.  Split the equivalence class appropriately.
+	// Repeat until we can't find any more splits.
+	for {
+		changed := false
+
+		for i, e := range partition {
+			v := e[0]
+			// all values in this equiv class that are not equivalent to v get moved
+			// into another equiv class q.
+			var q eqclass
+		eqloop:
+			for j := 1; j < len(e); {
+				w := e[j]
+				for i := 0; i < len(v.Args); i++ {
+					if valueEqClass[v.Args[i].ID] != valueEqClass[w.Args[i].ID] {
+						// w is not equivalent to v.
+						// remove w from e
+						e, e[j] = e[:len(e)-1], e[len(e)-1]
+						// add w to q
+						q = append(q, w)
+						valueEqClass[w.ID] = len(partition)
+						changed = true
+						continue eqloop
+					}
+				}
+				// v and w are equivalent.  Keep w in e.
+				j++
+			}
+			partition[i] = e
+			if q != nil {
+				partition = append(partition, q)
+			}
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	// Compute dominator tree
+	idom := dominators(f)
+
+	// Compute substitutions we would like to do.  We substitute v for w
+	// if v and w are in the same equivalence class and v dominates w.
+	rewrite := make([]*Value, f.NumValues())
+	for _, e := range partition {
+		sort.Sort(e) // ensure deterministic ordering
+		for len(e) > 1 {
+			// Find a maximal dominant element in e
+			v := e[0]
+			for _, w := range e[1:] {
+				if dom(w.Block, v.Block, idom) {
+					v = w
+				}
+			}
+
+			// Replace all elements of e which v dominates
+			for i := 0; i < len(e); {
+				w := e[i]
+				if w == v {
+					e, e[i] = e[:len(e)-1], e[len(e)-1]
+				} else if dom(v.Block, w.Block, idom) {
+					rewrite[w.ID] = v
+					e, e[i] = e[:len(e)-1], e[len(e)-1]
+				} else {
+					i++
+				}
+			}
+			// TODO(khr): if value is a control value, do we need to keep it block-local?
+		}
+	}
+
+	// Apply substitutions
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, w := range v.Args {
+				if x := rewrite[w.ID]; x != nil {
+					v.SetArg(i, x)
+				}
+			}
+		}
+	}
+}
+
+// returns true if b dominates c.
+// TODO(khr): faster
+func dom(b, c *Block, idom []*Block) bool {
+	// Walk up from c in the dominator tree looking for b.
+	for c != nil {
+		if c == b {
+			return true
+		}
+		c = idom[c.ID]
+	}
+	// Reached the entry block, never saw b.
+	return false
+}
+
+// An eqclass approximates an equivalence class.  During the
+// algorithm it may represent the union of several of the
+// final equivalence classes.
+type eqclass []*Value
+
+// Sort an equivalence class by value ID.
+func (e eqclass) Len() int           { return len(e) }
+func (e eqclass) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
+func (e eqclass) Less(i, j int) bool { return e[i].ID < e[j].ID }
diff --git a/src/cmd/compile/internal/ssa/deadcode.go b/src/cmd/compile/internal/ssa/deadcode.go
new file mode 100644
index 0000000..a805861
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/deadcode.go
@@ -0,0 +1,157 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+// deadcode removes dead code from f.
+func deadcode(f *Func) {
+
+	// Find all reachable basic blocks.
+	reachable := make([]bool, f.NumBlocks())
+	reachable[f.Entry.ID] = true
+	p := []*Block{f.Entry} // stack-like worklist
+	for len(p) > 0 {
+		// pop a reachable block
+		b := p[len(p)-1]
+		p = p[:len(p)-1]
+
+		// constant-fold conditionals
+		// TODO: rewrite rules instead?
+		if b.Kind == BlockIf && b.Control.Op == OpConst {
+			cond := b.Control.Aux.(bool)
+			var c *Block
+			if cond {
+				// then branch is always taken
+				c = b.Succs[1]
+			} else {
+				// else branch is always taken
+				c = b.Succs[0]
+				b.Succs[0] = b.Succs[1]
+			}
+			b.Succs[1] = nil // aid GC
+			b.Succs = b.Succs[:1]
+			removePredecessor(b, c)
+			b.Kind = BlockPlain
+			b.Control = nil
+		}
+
+		for _, c := range b.Succs {
+			if !reachable[c.ID] {
+				reachable[c.ID] = true
+				p = append(p, c) // push
+			}
+		}
+	}
+
+	// Find all live values
+	live := make([]bool, f.NumValues()) // flag to set for each live value
+	var q []*Value                      // stack-like worklist of unscanned values
+
+	// Starting set: all control values of reachable blocks are live.
+	for _, b := range f.Blocks {
+		if !reachable[b.ID] {
+			continue
+		}
+		if v := b.Control; v != nil && !live[v.ID] {
+			live[v.ID] = true
+			q = append(q, v)
+		}
+	}
+
+	// Compute transitive closure of live values.
+	for len(q) > 0 {
+		// pop a reachable value
+		v := q[len(q)-1]
+		q = q[:len(q)-1]
+		for _, x := range v.Args {
+			if !live[x.ID] {
+				live[x.ID] = true
+				q = append(q, x) // push
+			}
+		}
+	}
+
+	// Remove dead values from blocks' value list.  Return dead
+	// value ids to the allocator.
+	for _, b := range f.Blocks {
+		i := 0
+		for _, v := range b.Values {
+			if live[v.ID] {
+				b.Values[i] = v
+				i++
+			} else {
+				f.vid.put(v.ID)
+			}
+		}
+		// aid GC
+		tail := b.Values[i:]
+		for j := range tail {
+			tail[j] = nil
+		}
+		b.Values = b.Values[:i]
+	}
+
+	// Remove unreachable blocks.  Return dead block ids to allocator.
+	i := 0
+	for _, b := range f.Blocks {
+		if reachable[b.ID] {
+			f.Blocks[i] = b
+			i++
+		} else {
+			if len(b.Values) > 0 {
+				panic("live value in unreachable block")
+			}
+			f.bid.put(b.ID)
+		}
+	}
+	// zero remainder to help GC
+	tail := f.Blocks[i:]
+	for j := range tail {
+		tail[j] = nil
+	}
+	f.Blocks = f.Blocks[:i]
+
+	// TODO: renumber Blocks and Values densely?
+	// TODO: save dead Values and Blocks for reuse?  Or should we just let GC handle it?
+}
+
+// There was an edge b->c.  It has been removed from b's successors.
+// Fix up c to handle that fact.
+func removePredecessor(b, c *Block) {
+	n := len(c.Preds) - 1
+	if n == 0 {
+		// c is now dead - don't bother working on it
+		if c.Preds[0] != b {
+			log.Panicf("%s.Preds[0]==%s, want %s", c, c.Preds[0], b)
+		}
+		return
+	}
+
+	// find index of b in c's predecessor list
+	var i int
+	for j, p := range c.Preds {
+		if p == b {
+			i = j
+			break
+		}
+	}
+
+	c.Preds[i] = c.Preds[n]
+	c.Preds[n] = nil // aid GC
+	c.Preds = c.Preds[:n]
+	// rewrite phi ops to match the new predecessor list
+	for _, v := range c.Values {
+		if v.Op != OpPhi {
+			continue
+		}
+		v.Args[i] = v.Args[n]
+		v.Args[n] = nil // aid GC
+		v.Args = v.Args[:n]
+		if n == 1 {
+			v.Op = OpCopy
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/deadcode_test.go b/src/cmd/compile/internal/ssa/deadcode_test.go
new file mode 100644
index 0000000..ced46e5
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/deadcode_test.go
@@ -0,0 +1,93 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"testing"
+)
+
+func TestDeadLoop(t *testing.T) {
+	fun := Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpArg, TypeMem, ".mem"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")),
+		// dead loop
+		Bloc("deadblock",
+			// dead value in dead block
+			Valu("deadval", OpConst, TypeBool, true),
+			If("deadval", "deadblock", "exit")))
+
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["deadblock"] {
+			t.Errorf("dead block not removed")
+		}
+		for _, v := range b.Values {
+			if v == fun.values["deadval"] {
+				t.Errorf("control value of dead block not removed")
+			}
+		}
+	}
+}
+
+func TestDeadValue(t *testing.T) {
+	fun := Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpArg, TypeMem, ".mem"),
+			Valu("deadval", OpConst, TypeInt64, int64(37)),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	for _, b := range fun.f.Blocks {
+		for _, v := range b.Values {
+			if v == fun.values["deadval"] {
+				t.Errorf("dead value not removed")
+			}
+		}
+	}
+}
+
+func TestNeverTaken(t *testing.T) {
+	fun := Fun("entry",
+		Bloc("entry",
+			Valu("cond", OpConst, TypeBool, false),
+			Valu("mem", OpArg, TypeMem, ".mem"),
+			If("cond", "then", "else")),
+		Bloc("then",
+			Goto("exit")),
+		Bloc("else",
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	Deadcode(fun.f)
+	CheckFunc(fun.f)
+
+	if fun.blocks["entry"].Kind != BlockPlain {
+		t.Errorf("if(false) not simplified")
+	}
+	for _, b := range fun.f.Blocks {
+		if b == fun.blocks["then"] {
+			t.Errorf("then block still present")
+		}
+		for _, v := range b.Values {
+			if v == fun.values["cond"] {
+				t.Errorf("constant condition still present")
+			}
+		}
+	}
+
+}
diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go
new file mode 100644
index 0000000..aaf3ab3
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/dom.go
@@ -0,0 +1,121 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// This file contains code to compute the dominator tree
+// of a control-flow graph.
+
+import "log"
+
+// postorder computes a postorder traversal ordering for the
+// basic blocks in f.  Unreachable blocks will not appear.
+func postorder(f *Func) []*Block {
+	mark := make([]byte, f.NumBlocks())
+	// mark values
+	const (
+		notFound    = 0 // block has not been discovered yet
+		notExplored = 1 // discovered and in queue, outedges not processed yet
+		explored    = 2 // discovered and in queue, outedges processed
+		done        = 3 // all done, in output ordering
+	)
+
+	// result ordering
+	var order []*Block
+
+	// stack of blocks
+	var s []*Block
+	s = append(s, f.Entry)
+	mark[f.Entry.ID] = notExplored
+	for len(s) > 0 {
+		b := s[len(s)-1]
+		switch mark[b.ID] {
+		case explored:
+			// Children have all been visited.  Pop & output block.
+			s = s[:len(s)-1]
+			mark[b.ID] = done
+			order = append(order, b)
+		case notExplored:
+			// Children have not been visited yet.  Mark as explored
+			// and queue any children we haven't seen yet.
+			mark[b.ID] = explored
+			for _, c := range b.Succs {
+				if mark[c.ID] == notFound {
+					mark[c.ID] = notExplored
+					s = append(s, c)
+				}
+			}
+		default:
+			log.Fatalf("bad stack state %v %d", b, mark[b.ID])
+		}
+	}
+	return order
+}
+
+// dominators computes the dominator tree for f.  It returns a slice
+// which maps block ID to the immediate dominator of that block.
+// Unreachable blocks map to nil.  The entry block maps to nil.
+func dominators(f *Func) []*Block {
+	// A simple algorithm for now
+	// Cooper, Harvey, Kennedy
+	idom := make([]*Block, f.NumBlocks())
+
+	// Compute postorder walk
+	post := postorder(f)
+
+	// Make map from block id to order index (for intersect call)
+	postnum := make([]int, f.NumBlocks())
+	for i, b := range post {
+		postnum[b.ID] = i
+	}
+
+	// Make the entry block a self-loop
+	idom[f.Entry.ID] = f.Entry
+	if postnum[f.Entry.ID] != len(post)-1 {
+		log.Fatalf("entry block %v not last in postorder", f.Entry)
+	}
+
+	// Compute relaxation of idom entries
+	for {
+		changed := false
+
+		for i := len(post) - 2; i >= 0; i-- {
+			b := post[i]
+			var d *Block
+			for _, p := range b.Preds {
+				if idom[p.ID] == nil {
+					continue
+				}
+				if d == nil {
+					d = p
+					continue
+				}
+				d = intersect(d, p, postnum, idom)
+			}
+			if d != idom[b.ID] {
+				idom[b.ID] = d
+				changed = true
+			}
+		}
+		if !changed {
+			break
+		}
+	}
+	// Set idom of entry block to nil instead of itself.
+	idom[f.Entry.ID] = nil
+	return idom
+}
+
+// intersect finds the closest dominator of both b and c.
+// It requires a postorder numbering of all the blocks.
+func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
+	for b != c {
+		if postnum[b.ID] < postnum[c.ID] {
+			b = idom[b.ID]
+		} else {
+			c = idom[c.ID]
+		}
+	}
+	return b
+}
diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go
new file mode 100644
index 0000000..ab4ab82
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -0,0 +1,9 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+var CheckFunc = checkFunc
+var PrintFunc = printFunc
+var Deadcode = deadcode
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go
new file mode 100644
index 0000000..3e41ef3
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -0,0 +1,108 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// A Func represents a Go func declaration (or function literal) and
+// its body.  This package compiles each Func independently.
+type Func struct {
+	Config *Config  // architecture information
+	Name   string   // e.g. bytes·Compare
+	Type   Type     // type signature of the function.
+	Blocks []*Block // unordered set of all basic blocks (note: not indexable by ID)
+	Entry  *Block   // the entry basic block
+	bid    idAlloc  // block ID allocator
+	vid    idAlloc  // value ID allocator
+
+	// when register allocation is done, maps value ids to locations
+	RegAlloc []Location
+	// when stackalloc is done, the size of the stack frame
+	FrameSize int64
+}
+
+// NumBlocks returns an integer larger than the id of any Block in the Func.
+func (f *Func) NumBlocks() int {
+	return f.bid.num()
+}
+
+// NumValues returns an integer larger than the id of any Value in the Func.
+func (f *Func) NumValues() int {
+	return f.vid.num()
+}
+
+// NewBlock returns a new block of the given kind and appends it to f.Blocks.
+func (f *Func) NewBlock(kind BlockKind) *Block {
+	b := &Block{
+		ID:   f.bid.get(),
+		Kind: kind,
+		Func: f,
+	}
+	f.Blocks = append(f.Blocks, b)
+	return b
+}
+
+// NewValue returns a new value in the block with no arguments.
+func (b *Block) NewValue(op Op, t Type, aux interface{}) *Value {
+	v := &Value{
+		ID:    b.Func.vid.get(),
+		Op:    op,
+		Type:  t,
+		Aux:   aux,
+		Block: b,
+	}
+	v.Args = v.argstorage[:0]
+	b.Values = append(b.Values, v)
+	return v
+}
+
+// NewValue1 returns a new value in the block with one argument.
+func (b *Block) NewValue1(op Op, t Type, aux interface{}, arg *Value) *Value {
+	v := &Value{
+		ID:    b.Func.vid.get(),
+		Op:    op,
+		Type:  t,
+		Aux:   aux,
+		Block: b,
+	}
+	v.Args = v.argstorage[:1]
+	v.Args[0] = arg
+	b.Values = append(b.Values, v)
+	return v
+}
+
+// NewValue2 returns a new value in the block with two arguments.
+func (b *Block) NewValue2(op Op, t Type, aux interface{}, arg0, arg1 *Value) *Value {
+	v := &Value{
+		ID:    b.Func.vid.get(),
+		Op:    op,
+		Type:  t,
+		Aux:   aux,
+		Block: b,
+	}
+	v.Args = v.argstorage[:2]
+	v.Args[0] = arg0
+	v.Args[1] = arg1
+	b.Values = append(b.Values, v)
+	return v
+}
+
+// NewValue3 returns a new value in the block with three arguments.
+func (b *Block) NewValue3(op Op, t Type, aux interface{}, arg0, arg1, arg2 *Value) *Value {
+	v := &Value{
+		ID:    b.Func.vid.get(),
+		Op:    op,
+		Type:  t,
+		Aux:   aux,
+		Block: b,
+	}
+	v.Args = []*Value{arg0, arg1, arg2}
+	b.Values = append(b.Values, v)
+	return v
+}
+
+// ConstInt returns an int constant representing its argument.
+func (f *Func) ConstInt(t Type, c int64) *Value {
+	// TODO: cache?
+	return f.Entry.NewValue(OpConst, t, c)
+}
diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go
new file mode 100644
index 0000000..e7619ca
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/func_test.go
@@ -0,0 +1,401 @@
+// This file contains some utility functions to help define Funcs for testing.
+// As an example, the following func
+//
+//   b1:
+//     v1 = Arg <mem> [.mem]
+//     Plain -> b2
+//   b2:
+//     Exit v1
+//   b3:
+//     v2 = Const <bool> [true]
+//     If v2 -> b3 b2
+//
+// can be defined as
+//
+//   fun := Fun("entry",
+//       Bloc("entry",
+//           Valu("mem", OpArg, TypeMem, ".mem"),
+//           Goto("exit")),
+//       Bloc("exit",
+//           Exit("mem")),
+//       Bloc("deadblock",
+//          Valu("deadval", OpConst, TypeBool, true),
+//          If("deadval", "deadblock", "exit")))
+//
+// and the Blocks or Values used in the Func can be accessed
+// like this:
+//   fun.blocks["entry"] or fun.values["deadval"]
+
+package ssa
+
+// TODO(matloob): Choose better names for Fun, Bloc, Goto, etc.
+// TODO(matloob): Write a parser for the Func disassembly. Maybe
+//                the parser can be used instead of Fun.
+
+import (
+	"log"
+	"reflect"
+	"testing"
+)
+
+// Compare two Funcs for equivalence. Their CFGs must be isomorphic,
+// and their values must correspond.
+// Requires that values and predecessors are in the same order, even
+// though Funcs could be equivalent when they are not.
+// TODO(matloob): Allow values and predecessors to be in different
+// orders if the CFG are otherwise equivalent.
+func Equiv(f, g *Func) bool {
+	valcor := make(map[*Value]*Value)
+	var checkVal func(fv, gv *Value) bool
+	checkVal = func(fv, gv *Value) bool {
+		if fv == nil && gv == nil {
+			return true
+		}
+		if valcor[fv] == nil && valcor[gv] == nil {
+			valcor[fv] = gv
+			valcor[gv] = fv
+			// Ignore ids. Ops and Types are compared for equality.
+			// TODO(matloob): Make sure types are canonical and can
+			// be compared for equality.
+			if fv.Op != gv.Op || fv.Type != gv.Type {
+				return false
+			}
+			if !reflect.DeepEqual(fv.Aux, gv.Aux) {
+				// This makes the assumption that aux values can be compared
+				// using DeepEqual.
+				// TODO(matloob): Aux values may be *gc.Sym pointers in the near
+				// future. Make sure they are canonical.
+				return false
+			}
+			if len(fv.Args) != len(gv.Args) {
+				return false
+			}
+			for i := range fv.Args {
+				if !checkVal(fv.Args[i], gv.Args[i]) {
+					return false
+				}
+			}
+		}
+		return valcor[fv] == gv && valcor[gv] == fv
+	}
+	blkcor := make(map[*Block]*Block)
+	var checkBlk func(fb, gb *Block) bool
+	checkBlk = func(fb, gb *Block) bool {
+		if blkcor[fb] == nil && blkcor[gb] == nil {
+			blkcor[fb] = gb
+			blkcor[gb] = fb
+			// ignore ids
+			if fb.Kind != gb.Kind {
+				return false
+			}
+			if len(fb.Values) != len(gb.Values) {
+				return false
+			}
+			for i := range fb.Values {
+				if !checkVal(fb.Values[i], gb.Values[i]) {
+					return false
+				}
+			}
+			if len(fb.Succs) != len(gb.Succs) {
+				return false
+			}
+			for i := range fb.Succs {
+				if !checkBlk(fb.Succs[i], gb.Succs[i]) {
+					return false
+				}
+			}
+			if len(fb.Preds) != len(gb.Preds) {
+				return false
+			}
+			for i := range fb.Preds {
+				if !checkBlk(fb.Preds[i], gb.Preds[i]) {
+					return false
+				}
+			}
+			return true
+
+		}
+		return blkcor[fb] == gb && blkcor[gb] == fb
+	}
+
+	return checkBlk(f.Entry, g.Entry)
+}
+
+// fun is the return type of Fun. It contains the created func
+// itself as well as indexes from block and value names into the
+// corresponding Blocks and Values.
+type fun struct {
+	f      *Func
+	blocks map[string]*Block
+	values map[string]*Value
+}
+
+// Fun takes the name of an entry bloc and a series of Bloc calls, and
+// returns a fun containing the composed Func. entry must be a name
+// supplied to one of the Bloc functions. Each of the bloc names and
+// valu names should be unique across the Fun.
+func Fun(entry string, blocs ...bloc) fun {
+	f := new(Func)
+	blocks := make(map[string]*Block)
+	values := make(map[string]*Value)
+	// Create all the blocks and values.
+	for _, bloc := range blocs {
+		b := f.NewBlock(bloc.control.kind)
+		blocks[bloc.name] = b
+		for _, valu := range bloc.valus {
+			// args are filled in the second pass.
+			values[valu.name] = b.NewValue(valu.op, valu.t, valu.aux)
+		}
+	}
+	// Connect the blocks together and specify control values.
+	f.Entry = blocks[entry]
+	for _, bloc := range blocs {
+		b := blocks[bloc.name]
+		c := bloc.control
+		// Specify control values.
+		if c.control != "" {
+			cval, ok := values[c.control]
+			if !ok {
+				log.Panicf("control value for block %s missing", bloc.name)
+			}
+			b.Control = cval
+		}
+		// Fill in args.
+		for _, valu := range bloc.valus {
+			v := values[valu.name]
+			for _, arg := range valu.args {
+				a, ok := values[arg]
+				if !ok {
+					log.Panicf("arg %s missing for value %s in block %s",
+						arg, valu.name, bloc.name)
+				}
+				v.AddArg(a)
+			}
+		}
+		// Connect to successors.
+		for _, succ := range c.succs {
+			addEdge(b, blocks[succ])
+		}
+	}
+	return fun{f, blocks, values}
+}
+
+// Bloc defines a block for Fun. The bloc name should be unique
+// across the containing Fun. entries should consist of calls to valu,
+// as well as one call to Goto, If, or Exit to specify the block kind.
+func Bloc(name string, entries ...interface{}) bloc {
+	b := bloc{}
+	b.name = name
+	seenCtrl := false
+	for _, e := range entries {
+		switch v := e.(type) {
+		case ctrl:
+			// there should be exactly one Ctrl entry.
+			if seenCtrl {
+				log.Panicf("already seen control for block %s", name)
+			}
+			b.control = v
+			seenCtrl = true
+		case valu:
+			b.valus = append(b.valus, v)
+		}
+	}
+	if !seenCtrl {
+		log.Panicf("block %s doesn't have control", b.name)
+	}
+	return b
+}
+
+// Valu defines a value in a block.
+func Valu(name string, op Op, t Type, aux interface{}, args ...string) valu {
+	return valu{name, op, t, aux, args}
+}
+
+// Goto specifies that this is a BlockPlain and names the single successor.
+// TODO(matloob): choose a better name.
+func Goto(succ string) ctrl {
+	return ctrl{BlockPlain, "", []string{succ}}
+}
+
+// If specifies a BlockIf.
+func If(cond, sub, alt string) ctrl {
+	return ctrl{BlockIf, cond, []string{sub, alt}}
+}
+
+// Exit specifies a BlockExit.
+func Exit(arg string) ctrl {
+	return ctrl{BlockExit, arg, []string{}}
+}
+
+// bloc, ctrl, and valu are internal structures used by Bloc, Valu, Goto,
+// If, and Exit to help define blocks.
+
+type bloc struct {
+	name    string
+	control ctrl
+	valus   []valu
+}
+
+type ctrl struct {
+	kind    BlockKind
+	control string
+	succs   []string
+}
+
+type valu struct {
+	name string
+	op   Op
+	t    Type
+	aux  interface{}
+	args []string
+}
+
+func addEdge(b, c *Block) {
+	b.Succs = append(b.Succs, c)
+	c.Preds = append(c.Preds, b)
+}
+
+func TestArgs(t *testing.T) {
+	fun := Fun("entry",
+		Bloc("entry",
+			Valu("a", OpConst, TypeInt64, 14),
+			Valu("b", OpConst, TypeInt64, 26),
+			Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+			Valu("mem", OpArg, TypeMem, ".mem"),
+			Goto("exit")),
+		Bloc("exit",
+			Exit("mem")))
+	sum := fun.values["sum"]
+	for i, name := range []string{"a", "b"} {
+		if sum.Args[i] != fun.values[name] {
+			t.Errorf("arg %d for sum is incorrect: want %s, got %s",
+				i, sum.Args[i], fun.values[name])
+		}
+	}
+}
+
+func TestEquiv(t *testing.T) {
+	equivalentCases := []struct{ f, g fun }{
+		// simple case
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("entry",
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+		},
+		// block order changed
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("exit",
+					Exit("mem")),
+				Bloc("entry",
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Goto("exit"))),
+		},
+	}
+	for _, c := range equivalentCases {
+		if !Equiv(c.f.f, c.g.f) {
+			t.Errorf("expected equivalence. Func definitions:")
+			// TODO(matloob): Rewrite PrintFunc to output to a string or writer,
+			// so the functions can be written to the error log.
+			PrintFunc(c.f.f)
+			PrintFunc(c.g.f)
+		}
+	}
+
+	differentCases := []struct{ f, g fun }{
+		// different shape
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Goto("exit")),
+				Bloc("exit",
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Exit("mem"))),
+		},
+		// value order changed
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("a", OpConst, TypeInt64, 14),
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Exit("mem"))),
+		},
+		// value aux different
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("a", OpConst, TypeInt64, 14),
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("a", OpConst, TypeInt64, 26),
+					Exit("mem"))),
+		},
+		// value args different
+		{
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("a", OpConst, TypeInt64, 14),
+					Valu("b", OpConst, TypeInt64, 26),
+					Valu("sum", OpAdd, TypeInt64, nil, "a", "b"),
+					Exit("mem"))),
+			Fun("entry",
+				Bloc("entry",
+					Valu("mem", OpArg, TypeMem, ".mem"),
+					Valu("a", OpConst, TypeInt64, 0),
+					Valu("b", OpConst, TypeInt64, 14),
+					Valu("sum", OpAdd, TypeInt64, nil, "b", "a"),
+					Exit("mem"))),
+		},
+	}
+	for _, c := range differentCases {
+		if Equiv(c.f.f, c.g.f) {
+			t.Errorf("expected difference. Func definitions:")
+			// TODO(matloob): Rewrite PrintFunc to output to a string or writer,
+			// so the functions can be written to the error log.
+			PrintFunc(c.f.f)
+			PrintFunc(c.g.f)
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
new file mode 100644
index 0000000..af3e8a8
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -0,0 +1,43 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// fuse simplifies control flow by joining basic blocks.
+func fuse(f *Func) {
+	for _, b := range f.Blocks {
+		if b.Kind != BlockPlain {
+			continue
+		}
+		c := b.Succs[0]
+		if len(c.Preds) != 1 {
+			continue
+		}
+
+		// move all of b's values to c.
+		for _, v := range b.Values {
+			v.Block = c
+			c.Values = append(c.Values, v)
+		}
+
+		// replace b->c edge with preds(b) -> c
+		c.Preds = b.Preds
+		for _, p := range c.Preds {
+			for i, q := range p.Succs {
+				if q == b {
+					p.Succs[i] = c
+				}
+			}
+		}
+		if f.Entry == b {
+			f.Entry = c
+		}
+
+		// trash b, just in case
+		b.Kind = BlockUnknown
+		b.Values = nil
+		b.Preds = nil
+		b.Succs = nil
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/generic.go b/src/cmd/compile/internal/ssa/generic.go
new file mode 100644
index 0000000..91f9c17
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/generic.go
@@ -0,0 +1,236 @@
+// autogenerated from rulegen/generic.rules: do not edit!
+// generated with: go run rulegen/rulegen.go rulegen/generic.rules genericRules generic.go
+package ssa
+
+func genericRules(v *Value) bool {
+	switch v.Op {
+	case OpAdd:
+		// match: (Add <t> (Const [c]) (Const [d]))
+		// cond: is64BitInt(t)
+		// result: (Const [{c.(int64)+d.(int64)}])
+		{
+			t := v.Type
+			if v.Args[0].Op != OpConst {
+				goto end8d047ed0ae9537b840adc79ea82c6e05
+			}
+			c := v.Args[0].Aux
+			if v.Args[1].Op != OpConst {
+				goto end8d047ed0ae9537b840adc79ea82c6e05
+			}
+			d := v.Args[1].Aux
+			if !(is64BitInt(t)) {
+				goto end8d047ed0ae9537b840adc79ea82c6e05
+			}
+			v.Op = OpConst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c.(int64) + d.(int64)
+			return true
+		}
+		goto end8d047ed0ae9537b840adc79ea82c6e05
+	end8d047ed0ae9537b840adc79ea82c6e05:
+		;
+	case OpArrayIndex:
+		// match: (ArrayIndex (Load ptr mem) idx)
+		// cond:
+		// result: (Load (PtrIndex <ptr.Type.Elem().Elem().PtrTo()> ptr idx) mem)
+		{
+			if v.Args[0].Op != OpLoad {
+				goto end3809f4c52270a76313e4ea26e6f0b753
+			}
+			ptr := v.Args[0].Args[0]
+			mem := v.Args[0].Args[1]
+			idx := v.Args[1]
+			v.Op = OpLoad
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpPtrIndex, TypeInvalid, nil)
+			v0.Type = ptr.Type.Elem().Elem().PtrTo()
+			v0.AddArg(ptr)
+			v0.AddArg(idx)
+			v.AddArg(v0)
+			v.AddArg(mem)
+			return true
+		}
+		goto end3809f4c52270a76313e4ea26e6f0b753
+	end3809f4c52270a76313e4ea26e6f0b753:
+		;
+	case OpIsInBounds:
+		// match: (IsInBounds (Const [c]) (Const [d]))
+		// cond:
+		// result: (Const [inBounds(c.(int64),d.(int64))])
+		{
+			if v.Args[0].Op != OpConst {
+				goto enddbd1a394d9b71ee64335361b8384865c
+			}
+			c := v.Args[0].Aux
+			if v.Args[1].Op != OpConst {
+				goto enddbd1a394d9b71ee64335361b8384865c
+			}
+			d := v.Args[1].Aux
+			v.Op = OpConst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = inBounds(c.(int64), d.(int64))
+			return true
+		}
+		goto enddbd1a394d9b71ee64335361b8384865c
+	enddbd1a394d9b71ee64335361b8384865c:
+		;
+	case OpMul:
+		// match: (Mul <t> (Const [c]) (Const [d]))
+		// cond: is64BitInt(t)
+		// result: (Const [{c.(int64)*d.(int64)}])
+		{
+			t := v.Type
+			if v.Args[0].Op != OpConst {
+				goto end776610f88cf04f438242d76ed2b14f1c
+			}
+			c := v.Args[0].Aux
+			if v.Args[1].Op != OpConst {
+				goto end776610f88cf04f438242d76ed2b14f1c
+			}
+			d := v.Args[1].Aux
+			if !(is64BitInt(t)) {
+				goto end776610f88cf04f438242d76ed2b14f1c
+			}
+			v.Op = OpConst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c.(int64) * d.(int64)
+			return true
+		}
+		goto end776610f88cf04f438242d76ed2b14f1c
+	end776610f88cf04f438242d76ed2b14f1c:
+		;
+	case OpPtrIndex:
+		// match: (PtrIndex <t> ptr idx)
+		// cond:
+		// result: (Add ptr (Mul <v.Block.Func.Config.Uintptr> idx (Const <v.Block.Func.Config.Uintptr> [t.Elem().Size()])))
+		{
+			t := v.Type
+			ptr := v.Args[0]
+			idx := v.Args[1]
+			v.Op = OpAdd
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(ptr)
+			v0 := v.Block.NewValue(OpMul, TypeInvalid, nil)
+			v0.Type = v.Block.Func.Config.Uintptr
+			v0.AddArg(idx)
+			v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+			v1.Type = v.Block.Func.Config.Uintptr
+			v1.Aux = t.Elem().Size()
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			return true
+		}
+		goto end383c68c41e72d22ef00c4b7b0fddcbb8
+	end383c68c41e72d22ef00c4b7b0fddcbb8:
+		;
+	case OpSliceCap:
+		// match: (SliceCap (Load ptr mem))
+		// cond:
+		// result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
+		{
+			if v.Args[0].Op != OpLoad {
+				goto endbf1d4db93c4664ed43be3f73afb4dfa3
+			}
+			ptr := v.Args[0].Args[0]
+			mem := v.Args[0].Args[1]
+			v.Op = OpLoad
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil)
+			v0.Type = ptr.Type
+			v0.AddArg(ptr)
+			v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+			v1.Type = v.Block.Func.Config.Uintptr
+			v1.Aux = int64(v.Block.Func.Config.ptrSize * 2)
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			v.AddArg(mem)
+			return true
+		}
+		goto endbf1d4db93c4664ed43be3f73afb4dfa3
+	endbf1d4db93c4664ed43be3f73afb4dfa3:
+		;
+	case OpSliceLen:
+		// match: (SliceLen (Load ptr mem))
+		// cond:
+		// result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize)])) mem)
+		{
+			if v.Args[0].Op != OpLoad {
+				goto end9190b1ecbda4c5dd6d3e05d2495fb297
+			}
+			ptr := v.Args[0].Args[0]
+			mem := v.Args[0].Args[1]
+			v.Op = OpLoad
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil)
+			v0.Type = ptr.Type
+			v0.AddArg(ptr)
+			v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+			v1.Type = v.Block.Func.Config.Uintptr
+			v1.Aux = int64(v.Block.Func.Config.ptrSize)
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			v.AddArg(mem)
+			return true
+		}
+		goto end9190b1ecbda4c5dd6d3e05d2495fb297
+	end9190b1ecbda4c5dd6d3e05d2495fb297:
+		;
+	case OpSlicePtr:
+		// match: (SlicePtr (Load ptr mem))
+		// cond:
+		// result: (Load ptr mem)
+		{
+			if v.Args[0].Op != OpLoad {
+				goto end459613b83f95b65729d45c2ed663a153
+			}
+			ptr := v.Args[0].Args[0]
+			mem := v.Args[0].Args[1]
+			v.Op = OpLoad
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(ptr)
+			v.AddArg(mem)
+			return true
+		}
+		goto end459613b83f95b65729d45c2ed663a153
+	end459613b83f95b65729d45c2ed663a153:
+		;
+	case OpStore:
+		// match: (Store dst (Load <t> src mem) mem)
+		// cond: t.Size() > 8
+		// result: (Move [t.Size()] dst src mem)
+		{
+			dst := v.Args[0]
+			if v.Args[1].Op != OpLoad {
+				goto end324ffb6d2771808da4267f62c854e9c8
+			}
+			t := v.Args[1].Type
+			src := v.Args[1].Args[0]
+			mem := v.Args[1].Args[1]
+			if v.Args[2] != v.Args[1].Args[1] {
+				goto end324ffb6d2771808da4267f62c854e9c8
+			}
+			if !(t.Size() > 8) {
+				goto end324ffb6d2771808da4267f62c854e9c8
+			}
+			v.Op = OpMove
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = t.Size()
+			v.AddArg(dst)
+			v.AddArg(src)
+			v.AddArg(mem)
+			return true
+		}
+		goto end324ffb6d2771808da4267f62c854e9c8
+	end324ffb6d2771808da4267f62c854e9c8:
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/ssa/id.go b/src/cmd/compile/internal/ssa/id.go
new file mode 100644
index 0000000..3f53e1a
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/id.go
@@ -0,0 +1,39 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+type ID int32
+
+// idAlloc provides an allocator for unique integers.
+type idAlloc struct {
+	last ID
+	free []ID
+}
+
+// get allocates an ID and returns it.
+func (a *idAlloc) get() ID {
+	if n := len(a.free); n > 0 {
+		x := a.free[n-1]
+		a.free = a.free[:n-1]
+		return x
+	}
+	x := a.last
+	x++
+	if x == 1<<31-1 {
+		panic("too many ids for this function")
+	}
+	a.last = x
+	return x
+}
+
+// put deallocates an ID.
+func (a *idAlloc) put(x ID) {
+	a.free = append(a.free, x)
+}
+
+// num returns the maximum ID ever returned + 1.
+func (a *idAlloc) num() int {
+	return int(a.last + 1)
+}
diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go
new file mode 100644
index 0000000..7123397
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/layout.go
@@ -0,0 +1,88 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+// layout orders basic blocks in f with the goal of minimizing control flow instructions.
+// After this phase returns, the order of f.Blocks matters and is the order
+// in which those blocks will appear in the assembly output.
+func layout(f *Func) {
+	order := make([]*Block, 0, f.NumBlocks())
+	scheduled := make([]bool, f.NumBlocks())
+	idToBlock := make([]*Block, f.NumBlocks())
+	indegree := make([]int, f.NumBlocks())
+	posdegree := newSparseSet(f.NumBlocks())  // blocks with positive remaining degree
+	zerodegree := newSparseSet(f.NumBlocks()) // blocks with zero remaining degree
+
+	// Initialize indegree of each block
+	for _, b := range f.Blocks {
+		idToBlock[b.ID] = b
+		indegree[b.ID] = len(b.Preds)
+		if len(b.Preds) == 0 {
+			zerodegree.add(b.ID)
+		} else {
+			posdegree.add(b.ID)
+		}
+	}
+
+	bid := f.Entry.ID
+blockloop:
+	for {
+		// add block to schedule
+		b := idToBlock[bid]
+		order = append(order, b)
+		scheduled[bid] = true
+		if len(order) == len(f.Blocks) {
+			break
+		}
+
+		for _, c := range b.Succs {
+			indegree[c.ID]--
+			if indegree[c.ID] == 0 {
+				posdegree.remove(c.ID)
+				zerodegree.add(c.ID)
+			}
+		}
+
+		// Pick the next block to schedule
+		// Pick among the successor blocks that have not been scheduled yet.
+		// Just use degree for now.  TODO(khr): use likely direction hints.
+		bid = 0
+		mindegree := f.NumBlocks()
+		for _, c := range order[len(order)-1].Succs {
+			if scheduled[c.ID] {
+				continue
+			}
+			if indegree[c.ID] < mindegree {
+				mindegree = indegree[c.ID]
+				bid = c.ID
+			}
+		}
+		if bid != 0 {
+			continue
+		}
+		// TODO: improve this part
+		// No successor of the previously scheduled block works.
+		// Pick a zero-degree block if we can.
+		for zerodegree.size() > 0 {
+			cid := zerodegree.pop()
+			if !scheduled[cid] {
+				bid = cid
+				continue blockloop
+			}
+		}
+		// Still nothing, pick any block.
+		for {
+			cid := posdegree.pop()
+			if !scheduled[cid] {
+				bid = cid
+				continue blockloop
+			}
+		}
+		log.Panicf("no block available for layout")
+	}
+	f.Blocks = order
+}
diff --git a/src/cmd/compile/internal/ssa/location.go b/src/cmd/compile/internal/ssa/location.go
new file mode 100644
index 0000000..1b6f6d6
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/location.go
@@ -0,0 +1,34 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+)
+
+// A place that an ssa variable can reside.
+type Location interface {
+	Name() string // name to use in assembly templates: %rax, 16(%rsp), ...
+}
+
+// A Register is a machine register, like %rax.
+// They are numbered densely from 0 (for each architecture).
+type Register struct {
+	Num  int32
+	name string
+}
+
+func (r *Register) Name() string {
+	return r.name
+}
+
+// A LocalSlot is a location in the stack frame.
+type LocalSlot struct {
+	Idx int64 // offset in locals area (distance up from SP)
+}
+
+func (s *LocalSlot) Name() string {
+	return fmt.Sprintf("%d(SP)", s.Idx)
+}
diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go
new file mode 100644
index 0000000..44f0b83
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -0,0 +1,112 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+//go:generate go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go
+
+// convert to machine-dependent ops
+func lower(f *Func) {
+	// repeat rewrites until we find no more rewrites
+	applyRewrite(f, f.Config.lower)
+
+	// Check for unlowered opcodes, fail if we find one.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op < OpGenericEnd && v.Op != OpFP && v.Op != OpSP && v.Op != OpArg && v.Op != OpCopy && v.Op != OpPhi {
+				log.Panicf("%s not lowered", v.LongString())
+			}
+		}
+	}
+
+	// additional pass for 386/amd64, link condition codes directly to blocks
+	// TODO: do generically somehow?  Special "block" rewrite rules?
+	for _, b := range f.Blocks {
+		for {
+			switch b.Kind {
+			case BlockIf:
+				switch b.Control.Op {
+				case OpSETL:
+					b.Kind = BlockLT
+					b.Control = b.Control.Args[0]
+					continue
+				case OpSETNE:
+					b.Kind = BlockNE
+					b.Control = b.Control.Args[0]
+					continue
+				case OpSETB:
+					b.Kind = BlockULT
+					b.Control = b.Control.Args[0]
+					continue
+				case OpMOVBload:
+					b.Kind = BlockNE
+					b.Control = b.NewValue2(OpTESTB, TypeFlags, nil, b.Control, b.Control)
+					continue
+					// TODO: others
+				}
+			case BlockLT:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockGT
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockGT:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockLT
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockLE:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockGE
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockGE:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockLE
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockULT:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockUGT
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockUGT:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockULT
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockULE:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockUGE
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockUGE:
+				if b.Control.Op == OpInvertFlags {
+					b.Kind = BlockULE
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockEQ:
+				if b.Control.Op == OpInvertFlags {
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			case BlockNE:
+				if b.Control.Op == OpInvertFlags {
+					b.Control = b.Control.Args[0]
+					continue
+				}
+			}
+			break
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/lowerAmd64.go b/src/cmd/compile/internal/ssa/lowerAmd64.go
new file mode 100644
index 0000000..51cef97
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/lowerAmd64.go
@@ -0,0 +1,773 @@
+// autogenerated from rulegen/lower_amd64.rules: do not edit!
+// generated with: go run rulegen/rulegen.go rulegen/lower_amd64.rules lowerAmd64 lowerAmd64.go
+package ssa
+
+func lowerAmd64(v *Value) bool {
+	switch v.Op {
+	case OpADDQ:
+		// match: (ADDQ x (MOVQconst [c]))
+		// cond:
+		// result: (ADDQconst [c] x)
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpMOVQconst {
+				goto endacffd55e74ee0ff59ad58a18ddfc9973
+			}
+			c := v.Args[1].Aux
+			v.Op = OpADDQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c
+			v.AddArg(x)
+			return true
+		}
+		goto endacffd55e74ee0ff59ad58a18ddfc9973
+	endacffd55e74ee0ff59ad58a18ddfc9973:
+		;
+		// match: (ADDQ (MOVQconst [c]) x)
+		// cond:
+		// result: (ADDQconst [c] x)
+		{
+			if v.Args[0].Op != OpMOVQconst {
+				goto end7166f476d744ab7a51125959d3d3c7e2
+			}
+			c := v.Args[0].Aux
+			x := v.Args[1]
+			v.Op = OpADDQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c
+			v.AddArg(x)
+			return true
+		}
+		goto end7166f476d744ab7a51125959d3d3c7e2
+	end7166f476d744ab7a51125959d3d3c7e2:
+		;
+		// match: (ADDQ x (SHLQconst [shift] y))
+		// cond: shift.(int64) == 3
+		// result: (LEAQ8 [int64(0)] x y)
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpSHLQconst {
+				goto endaf4f724e1e17f2b116d336c07da0165d
+			}
+			shift := v.Args[1].Aux
+			y := v.Args[1].Args[0]
+			if !(shift.(int64) == 3) {
+				goto endaf4f724e1e17f2b116d336c07da0165d
+			}
+			v.Op = OpLEAQ8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(0)
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endaf4f724e1e17f2b116d336c07da0165d
+	endaf4f724e1e17f2b116d336c07da0165d:
+		;
+	case OpADDQconst:
+		// match: (ADDQconst [c] (LEAQ8 [d] x y))
+		// cond:
+		// result: (LEAQ8 [addOff(c, d)] x y)
+		{
+			c := v.Aux
+			if v.Args[0].Op != OpLEAQ8 {
+				goto ende2cc681c9abf9913288803fb1b39e639
+			}
+			d := v.Args[0].Aux
+			x := v.Args[0].Args[0]
+			y := v.Args[0].Args[1]
+			v.Op = OpLEAQ8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(c, d)
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto ende2cc681c9abf9913288803fb1b39e639
+	ende2cc681c9abf9913288803fb1b39e639:
+		;
+		// match: (ADDQconst [off] x)
+		// cond: off.(int64) == 0
+		// result: (Copy x)
+		{
+			off := v.Aux
+			x := v.Args[0]
+			if !(off.(int64) == 0) {
+				goto endfa1c7cc5ac4716697e891376787f86ce
+			}
+			v.Op = OpCopy
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			return true
+		}
+		goto endfa1c7cc5ac4716697e891376787f86ce
+	endfa1c7cc5ac4716697e891376787f86ce:
+		;
+	case OpAdd:
+		// match: (Add <t> x y)
+		// cond: (is64BitInt(t) || isPtr(t))
+		// result: (ADDQ x y)
+		{
+			t := v.Type
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is64BitInt(t) || isPtr(t)) {
+				goto endf031c523d7dd08e4b8e7010a94cd94c9
+			}
+			v.Op = OpADDQ
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endf031c523d7dd08e4b8e7010a94cd94c9
+	endf031c523d7dd08e4b8e7010a94cd94c9:
+		;
+		// match: (Add <t> x y)
+		// cond: is32BitInt(t)
+		// result: (ADDL x y)
+		{
+			t := v.Type
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is32BitInt(t)) {
+				goto end35a02a1587264e40cf1055856ff8445a
+			}
+			v.Op = OpADDL
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto end35a02a1587264e40cf1055856ff8445a
+	end35a02a1587264e40cf1055856ff8445a:
+		;
+	case OpCMPQ:
+		// match: (CMPQ x (MOVQconst [c]))
+		// cond:
+		// result: (CMPQconst x [c])
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpMOVQconst {
+				goto end32ef1328af280ac18fa8045a3502dae9
+			}
+			c := v.Args[1].Aux
+			v.Op = OpCMPQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.Aux = c
+			return true
+		}
+		goto end32ef1328af280ac18fa8045a3502dae9
+	end32ef1328af280ac18fa8045a3502dae9:
+		;
+		// match: (CMPQ (MOVQconst [c]) x)
+		// cond:
+		// result: (InvertFlags (CMPQconst <TypeFlags> x [c]))
+		{
+			if v.Args[0].Op != OpMOVQconst {
+				goto endf8ca12fe79290bc82b11cfa463bc9413
+			}
+			c := v.Args[0].Aux
+			x := v.Args[1]
+			v.Op = OpInvertFlags
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpCMPQconst, TypeInvalid, nil)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.Aux = c
+			v.AddArg(v0)
+			return true
+		}
+		goto endf8ca12fe79290bc82b11cfa463bc9413
+	endf8ca12fe79290bc82b11cfa463bc9413:
+		;
+	case OpConst:
+		// match: (Const <t> [val])
+		// cond: is64BitInt(t)
+		// result: (MOVQconst [val])
+		{
+			t := v.Type
+			val := v.Aux
+			if !(is64BitInt(t)) {
+				goto end7f5c5b34093fbc6860524cb803ee51bf
+			}
+			v.Op = OpMOVQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = val
+			return true
+		}
+		goto end7f5c5b34093fbc6860524cb803ee51bf
+	end7f5c5b34093fbc6860524cb803ee51bf:
+		;
+	case OpGlobal:
+		// match: (Global [sym])
+		// cond:
+		// result: (LEAQglobal [GlobalOffset{sym,0}])
+		{
+			sym := v.Aux
+			v.Op = OpLEAQglobal
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = GlobalOffset{sym, 0}
+			return true
+		}
+		goto end3a3c76fac0e2e53c0e1c60b9524e6f1c
+	end3a3c76fac0e2e53c0e1c60b9524e6f1c:
+		;
+	case OpIsInBounds:
+		// match: (IsInBounds idx len)
+		// cond:
+		// result: (SETB (CMPQ <TypeFlags> idx len))
+		{
+			idx := v.Args[0]
+			len := v.Args[1]
+			v.Op = OpSETB
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpCMPQ, TypeInvalid, nil)
+			v0.Type = TypeFlags
+			v0.AddArg(idx)
+			v0.AddArg(len)
+			v.AddArg(v0)
+			return true
+		}
+		goto endb51d371171154c0f1613b687757e0576
+	endb51d371171154c0f1613b687757e0576:
+		;
+	case OpIsNonNil:
+		// match: (IsNonNil p)
+		// cond:
+		// result: (SETNE (TESTQ <TypeFlags> p p))
+		{
+			p := v.Args[0]
+			v.Op = OpSETNE
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpTESTQ, TypeInvalid, nil)
+			v0.Type = TypeFlags
+			v0.AddArg(p)
+			v0.AddArg(p)
+			v.AddArg(v0)
+			return true
+		}
+		goto endff508c3726edfb573abc6128c177e76c
+	endff508c3726edfb573abc6128c177e76c:
+		;
+	case OpLess:
+		// match: (Less x y)
+		// cond: is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type)
+		// result: (SETL (CMPQ <TypeFlags> x y))
+		{
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type)) {
+				goto endcecf13a952d4c6c2383561c7d68a3cf9
+			}
+			v.Op = OpSETL
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpCMPQ, TypeInvalid, nil)
+			v0.Type = TypeFlags
+			v0.AddArg(x)
+			v0.AddArg(y)
+			v.AddArg(v0)
+			return true
+		}
+		goto endcecf13a952d4c6c2383561c7d68a3cf9
+	endcecf13a952d4c6c2383561c7d68a3cf9:
+		;
+	case OpLoad:
+		// match: (Load <t> ptr mem)
+		// cond: t.IsBoolean()
+		// result: (MOVBload [int64(0)] ptr mem)
+		{
+			t := v.Type
+			ptr := v.Args[0]
+			mem := v.Args[1]
+			if !(t.IsBoolean()) {
+				goto end73f21632e56c3614902d3c29c82dc4ea
+			}
+			v.Op = OpMOVBload
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(0)
+			v.AddArg(ptr)
+			v.AddArg(mem)
+			return true
+		}
+		goto end73f21632e56c3614902d3c29c82dc4ea
+	end73f21632e56c3614902d3c29c82dc4ea:
+		;
+		// match: (Load <t> ptr mem)
+		// cond: (is64BitInt(t) || isPtr(t))
+		// result: (MOVQload [int64(0)] ptr mem)
+		{
+			t := v.Type
+			ptr := v.Args[0]
+			mem := v.Args[1]
+			if !(is64BitInt(t) || isPtr(t)) {
+				goto end581ce5a20901df1b8143448ba031685b
+			}
+			v.Op = OpMOVQload
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(0)
+			v.AddArg(ptr)
+			v.AddArg(mem)
+			return true
+		}
+		goto end581ce5a20901df1b8143448ba031685b
+	end581ce5a20901df1b8143448ba031685b:
+		;
+	case OpLsh:
+		// match: (Lsh <t> x y)
+		// cond: is64BitInt(t)
+		// result: (SHLQ x y)
+		{
+			t := v.Type
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is64BitInt(t)) {
+				goto end9f05c9539e51db6ad557989e0c822e9b
+			}
+			v.Op = OpSHLQ
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto end9f05c9539e51db6ad557989e0c822e9b
+	end9f05c9539e51db6ad557989e0c822e9b:
+		;
+	case OpMOVQload:
+		// match: (MOVQload [off1] (ADDQconst [off2] ptr) mem)
+		// cond:
+		// result: (MOVQload [addOff(off1, off2)] ptr mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpADDQconst {
+				goto end843d29b538c4483b432b632e5666d6e3
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			mem := v.Args[1]
+			v.Op = OpMOVQload
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(mem)
+			return true
+		}
+		goto end843d29b538c4483b432b632e5666d6e3
+	end843d29b538c4483b432b632e5666d6e3:
+		;
+		// match: (MOVQload [off1] (LEAQ8 [off2] ptr idx) mem)
+		// cond:
+		// result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpLEAQ8 {
+				goto end02f5ad148292c46463e7c20d3b821735
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			idx := v.Args[0].Args[1]
+			mem := v.Args[1]
+			v.Op = OpMOVQloadidx8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(idx)
+			v.AddArg(mem)
+			return true
+		}
+		goto end02f5ad148292c46463e7c20d3b821735
+	end02f5ad148292c46463e7c20d3b821735:
+		;
+	case OpMOVQloadidx8:
+		// match: (MOVQloadidx8 [off1] (ADDQconst [off2] ptr) idx mem)
+		// cond:
+		// result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpADDQconst {
+				goto ende81e44bcfb11f90916ccb440c590121f
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			idx := v.Args[1]
+			mem := v.Args[2]
+			v.Op = OpMOVQloadidx8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(idx)
+			v.AddArg(mem)
+			return true
+		}
+		goto ende81e44bcfb11f90916ccb440c590121f
+	ende81e44bcfb11f90916ccb440c590121f:
+		;
+	case OpMOVQstore:
+		// match: (MOVQstore [off1] (ADDQconst [off2] ptr) val mem)
+		// cond:
+		// result: (MOVQstore [addOff(off1, off2)] ptr val mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpADDQconst {
+				goto end2108c693a43c79aed10b9246c39c80aa
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			val := v.Args[1]
+			mem := v.Args[2]
+			v.Op = OpMOVQstore
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(val)
+			v.AddArg(mem)
+			return true
+		}
+		goto end2108c693a43c79aed10b9246c39c80aa
+	end2108c693a43c79aed10b9246c39c80aa:
+		;
+		// match: (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem)
+		// cond:
+		// result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpLEAQ8 {
+				goto endce1db8c8d37c8397c500a2068a65c215
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			idx := v.Args[0].Args[1]
+			val := v.Args[1]
+			mem := v.Args[2]
+			v.Op = OpMOVQstoreidx8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(idx)
+			v.AddArg(val)
+			v.AddArg(mem)
+			return true
+		}
+		goto endce1db8c8d37c8397c500a2068a65c215
+	endce1db8c8d37c8397c500a2068a65c215:
+		;
+	case OpMOVQstoreidx8:
+		// match: (MOVQstoreidx8 [off1] (ADDQconst [off2] ptr) idx val mem)
+		// cond:
+		// result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
+		{
+			off1 := v.Aux
+			if v.Args[0].Op != OpADDQconst {
+				goto end01c970657b0fdefeab82458c15022163
+			}
+			off2 := v.Args[0].Aux
+			ptr := v.Args[0].Args[0]
+			idx := v.Args[1]
+			val := v.Args[2]
+			mem := v.Args[3]
+			v.Op = OpMOVQstoreidx8
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = addOff(off1, off2)
+			v.AddArg(ptr)
+			v.AddArg(idx)
+			v.AddArg(val)
+			v.AddArg(mem)
+			return true
+		}
+		goto end01c970657b0fdefeab82458c15022163
+	end01c970657b0fdefeab82458c15022163:
+		;
+	case OpMULQ:
+		// match: (MULQ x (MOVQconst [c]))
+		// cond: c.(int64) == int64(int32(c.(int64)))
+		// result: (MULQconst [c] x)
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpMOVQconst {
+				goto ende8c09b194fcde7d9cdc69f2deff86304
+			}
+			c := v.Args[1].Aux
+			if !(c.(int64) == int64(int32(c.(int64)))) {
+				goto ende8c09b194fcde7d9cdc69f2deff86304
+			}
+			v.Op = OpMULQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c
+			v.AddArg(x)
+			return true
+		}
+		goto ende8c09b194fcde7d9cdc69f2deff86304
+	ende8c09b194fcde7d9cdc69f2deff86304:
+		;
+		// match: (MULQ (MOVQconst [c]) x)
+		// cond:
+		// result: (MULQconst [c] x)
+		{
+			if v.Args[0].Op != OpMOVQconst {
+				goto endc6e18d6968175d6e58eafa6dcf40c1b8
+			}
+			c := v.Args[0].Aux
+			x := v.Args[1]
+			v.Op = OpMULQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c
+			v.AddArg(x)
+			return true
+		}
+		goto endc6e18d6968175d6e58eafa6dcf40c1b8
+	endc6e18d6968175d6e58eafa6dcf40c1b8:
+		;
+	case OpMULQconst:
+		// match: (MULQconst [c] x)
+		// cond: c.(int64) == 8
+		// result: (SHLQconst [int64(3)] x)
+		{
+			c := v.Aux
+			x := v.Args[0]
+			if !(c.(int64) == 8) {
+				goto end7e16978c56138324ff2abf91fd6d94d4
+			}
+			v.Op = OpSHLQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(3)
+			v.AddArg(x)
+			return true
+		}
+		goto end7e16978c56138324ff2abf91fd6d94d4
+	end7e16978c56138324ff2abf91fd6d94d4:
+		;
+		// match: (MULQconst [c] x)
+		// cond: c.(int64) == 64
+		// result: (SHLQconst [int64(5)] x)
+		{
+			c := v.Aux
+			x := v.Args[0]
+			if !(c.(int64) == 64) {
+				goto end2c7a02f230e4b311ac3a4e22f70a4f08
+			}
+			v.Op = OpSHLQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(5)
+			v.AddArg(x)
+			return true
+		}
+		goto end2c7a02f230e4b311ac3a4e22f70a4f08
+	end2c7a02f230e4b311ac3a4e22f70a4f08:
+		;
+	case OpMove:
+		// match: (Move [size] dst src mem)
+		// cond:
+		// result: (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem)
+		{
+			size := v.Aux
+			dst := v.Args[0]
+			src := v.Args[1]
+			mem := v.Args[2]
+			v.Op = OpREPMOVSB
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(dst)
+			v.AddArg(src)
+			v0 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+			v0.Type = TypeUInt64
+			v0.Aux = size.(int64)
+			v.AddArg(v0)
+			v.AddArg(mem)
+			return true
+		}
+		goto end48909259b265a6bb2a076bc2c2dc7d1f
+	end48909259b265a6bb2a076bc2c2dc7d1f:
+		;
+	case OpMul:
+		// match: (Mul <t> x y)
+		// cond: is64BitInt(t)
+		// result: (MULQ x y)
+		{
+			t := v.Type
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is64BitInt(t)) {
+				goto endfab0d598f376ecba45a22587d50f7aff
+			}
+			v.Op = OpMULQ
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto endfab0d598f376ecba45a22587d50f7aff
+	endfab0d598f376ecba45a22587d50f7aff:
+		;
+	case OpOffPtr:
+		// match: (OffPtr [off] ptr)
+		// cond:
+		// result: (ADDQconst [off] ptr)
+		{
+			off := v.Aux
+			ptr := v.Args[0]
+			v.Op = OpADDQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = off
+			v.AddArg(ptr)
+			return true
+		}
+		goto end0429f947ee7ac49ff45a243e461a5290
+	end0429f947ee7ac49ff45a243e461a5290:
+		;
+	case OpSETL:
+		// match: (SETL (InvertFlags x))
+		// cond:
+		// result: (SETGE x)
+		{
+			if v.Args[0].Op != OpInvertFlags {
+				goto end456c7681d48305698c1ef462d244bdc6
+			}
+			x := v.Args[0].Args[0]
+			v.Op = OpSETGE
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			return true
+		}
+		goto end456c7681d48305698c1ef462d244bdc6
+	end456c7681d48305698c1ef462d244bdc6:
+		;
+	case OpSHLQ:
+		// match: (SHLQ x (MOVQconst [c]))
+		// cond:
+		// result: (SHLQconst [c] x)
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpMOVQconst {
+				goto endcca412bead06dc3d56ef034a82d184d6
+			}
+			c := v.Args[1].Aux
+			v.Op = OpSHLQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = c
+			v.AddArg(x)
+			return true
+		}
+		goto endcca412bead06dc3d56ef034a82d184d6
+	endcca412bead06dc3d56ef034a82d184d6:
+		;
+	case OpSUBQ:
+		// match: (SUBQ x (MOVQconst [c]))
+		// cond:
+		// result: (SUBQconst x [c])
+		{
+			x := v.Args[0]
+			if v.Args[1].Op != OpMOVQconst {
+				goto end5a74a63bd9ad15437717c6df3b25eebb
+			}
+			c := v.Args[1].Aux
+			v.Op = OpSUBQconst
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.Aux = c
+			return true
+		}
+		goto end5a74a63bd9ad15437717c6df3b25eebb
+	end5a74a63bd9ad15437717c6df3b25eebb:
+		;
+		// match: (SUBQ <t> (MOVQconst [c]) x)
+		// cond:
+		// result: (NEGQ (SUBQconst <t> x [c]))
+		{
+			t := v.Type
+			if v.Args[0].Op != OpMOVQconst {
+				goto end78e66b6fc298684ff4ac8aec5ce873c9
+			}
+			c := v.Args[0].Aux
+			x := v.Args[1]
+			v.Op = OpNEGQ
+			v.Aux = nil
+			v.resetArgs()
+			v0 := v.Block.NewValue(OpSUBQconst, TypeInvalid, nil)
+			v0.Type = t
+			v0.AddArg(x)
+			v0.Aux = c
+			v.AddArg(v0)
+			return true
+		}
+		goto end78e66b6fc298684ff4ac8aec5ce873c9
+	end78e66b6fc298684ff4ac8aec5ce873c9:
+		;
+	case OpStore:
+		// match: (Store ptr val mem)
+		// cond: (is64BitInt(val.Type) || isPtr(val.Type))
+		// result: (MOVQstore [int64(0)] ptr val mem)
+		{
+			ptr := v.Args[0]
+			val := v.Args[1]
+			mem := v.Args[2]
+			if !(is64BitInt(val.Type) || isPtr(val.Type)) {
+				goto end9680b43f504bc06f9fab000823ce471a
+			}
+			v.Op = OpMOVQstore
+			v.Aux = nil
+			v.resetArgs()
+			v.Aux = int64(0)
+			v.AddArg(ptr)
+			v.AddArg(val)
+			v.AddArg(mem)
+			return true
+		}
+		goto end9680b43f504bc06f9fab000823ce471a
+	end9680b43f504bc06f9fab000823ce471a:
+		;
+	case OpSub:
+		// match: (Sub <t> x y)
+		// cond: is64BitInt(t)
+		// result: (SUBQ x y)
+		{
+			t := v.Type
+			x := v.Args[0]
+			y := v.Args[1]
+			if !(is64BitInt(t)) {
+				goto ende6ef29f885a8ecf3058212bb95917323
+			}
+			v.Op = OpSUBQ
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(x)
+			v.AddArg(y)
+			return true
+		}
+		goto ende6ef29f885a8ecf3058212bb95917323
+	ende6ef29f885a8ecf3058212bb95917323:
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go
new file mode 100644
index 0000000..f02c1ae
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/op.go
@@ -0,0 +1,204 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "fmt"
+
+// An Op encodes the specific operation that a Value performs.
+// Opcodes' semantics can be modified by the type and aux fields of the Value.
+// For instance, OpAdd can be 32 or 64 bit, signed or unsigned, float or complex, depending on Value.Type.
+// Semantics of each op are described below.
+//
+// Ops come in two flavors, architecture-independent and architecture-dependent.
+// Architecture-independent opcodes appear in this file.
+// Architecture-dependent opcodes appear in op{arch}.go files.
+type Op int32
+
+// Opcode ranges, a generic one and one for each architecture.
+const (
+	opInvalid     Op = 0
+	opGenericBase Op = 1 + 1000*iota
+	opAMD64Base
+	op386Base
+
+	opMax // sentinel
+)
+
+// Generic opcodes
+const (
+	opGenericStart Op = opGenericBase + iota
+
+	// 2-input arithmetic
+	OpAdd // arg0 + arg1
+	OpSub // arg0 - arg1
+	OpMul // arg0 * arg1
+	OpLsh // arg0 << arg1
+	OpRsh // arg0 >> arg1 (signed/unsigned depending on signedness of type)
+
+	// 2-input comparisons
+	OpLess // arg0 < arg1
+
+	// constants.  Constant values are stored in the aux field.
+	// booleans have a bool aux field, strings have a string aux
+	// field, and so on.  All integer types store their value
+	// in the aux field as an int64 (including int, uint64, etc.).
+	// We could store int8 as an int8, but that won't work for int,
+	// as it may be different widths on the host and target.
+	OpConst
+
+	OpArg    // address of a function parameter/result.  Memory input is an arg called ".mem".  aux is a string (TODO: make it something other than a string?)
+	OpGlobal // the address of a global variable aux.(*gc.Sym)
+	OpFunc   // entry address of a function
+	OpFP     // frame pointer
+	OpSP     // stack pointer
+
+	OpCopy // output = arg0
+	OpMove // arg0=destptr, arg1=srcptr, arg2=mem, aux.(int64)=size.  Returns memory.
+	OpPhi  // select an argument based on which predecessor block we came from
+
+	OpSliceMake // arg0=ptr, arg1=len, arg2=cap
+	OpSlicePtr  // ptr(arg0)
+	OpSliceLen  // len(arg0)
+	OpSliceCap  // cap(arg0)
+
+	OpStringMake // arg0=ptr, arg1=len
+	OpStringPtr  // ptr(arg0)
+	OpStringLen  // len(arg0)
+
+	OpLoad       // Load from arg0+aux.(int64).  arg1=memory
+	OpStore      // Store arg1 to arg0+aux.(int64).  arg2=memory.  Returns memory.
+	OpArrayIndex // arg0=array, arg1=index.  Returns a[i]
+	OpPtrIndex   // arg0=ptr, arg1=index. Computes ptr+sizeof(*v.type)*index, where index is extended to ptrwidth type
+	OpIsNonNil   // arg0 != nil
+	OpIsInBounds // 0 <= arg0 < arg1
+
+	// function calls.  Arguments to the call have already been written to the stack.
+	// Return values appear on the stack.  The method receiver, if any, is treated
+	// as a phantom first argument.
+	OpCall       // arg0=code pointer, arg1=context ptr, arg2=memory.  Returns memory.
+	OpStaticCall // call function aux.(*gc.Sym), arg0=memory.  Returns memory.
+
+	OpConvert // convert arg0 to another type
+	OpConvNop // interpret arg0 as another type
+
+	OpOffPtr // arg0 + aux.(int64) (arg0 and result are pointers)
+
+	// spill&restore ops for the register allocator.  These are
+	// semantically identical to OpCopy; they do not take/return
+	// stores like regular memory ops do.  We can get away without memory
+	// args because we know there is no aliasing of spill slots on the stack.
+	OpStoreReg8
+	OpLoadReg8
+
+	// used during ssa construction.  Like OpCopy, but the arg has not been specified yet.
+	OpFwdRef
+
+	OpGenericEnd
+)
+
+// GlobalOffset represents a fixed offset within a global variable
+type GlobalOffset struct {
+	Global interface{} // holds a *gc.Sym
+	Offset int64
+}
+
+// offset adds x to the location specified by g and returns it.
+func (g GlobalOffset) offset(x int64) GlobalOffset {
+	return GlobalOffset{g.Global, g.Offset + x}
+}
+
+func (g GlobalOffset) String() string {
+	return fmt.Sprintf("%v+%d", g.Global, g.Offset)
+}
+
+//go:generate stringer -type=Op
+
+type opInfo struct {
+	flags int32
+
+	// assembly template
+	// %In: location of input n
+	// %On: location of output n
+	// %A: print aux with fmt.Print
+	asm string
+
+	// returns a reg constraint for the instruction. [0] gives a reg constraint
+	// for each input, [1] gives a reg constraint for each output. (Values have
+	// exactly one output for now)
+	reg [2][]regMask
+}
+
+const (
+	// possible properties of opcodes
+	OpFlagCommutative int32 = 1 << iota
+)
+
+// Opcodes that represent the input Go program
+var genericTable = map[Op]opInfo{
+	// the unknown op is used only during building and should not appear in a
+	// fully formed ssa representation.
+
+	OpAdd:  {flags: OpFlagCommutative},
+	OpSub:  {},
+	OpMul:  {flags: OpFlagCommutative},
+	OpLess: {},
+
+	OpConst:  {}, // aux matches the type (e.g. bool, int64 float64)
+	OpArg:    {}, // aux is the name of the input variable.  Currently only ".mem" is used
+	OpGlobal: {}, // address of a global variable
+	OpFunc:   {},
+	OpCopy:   {},
+	OpPhi:    {},
+
+	OpConvNop: {}, // aux is the type to convert to
+
+	/*
+		// build and take apart slices
+		{name: "slicemake"}, // (ptr,len,cap) -> slice
+		{name: "sliceptr"},  // pointer part of slice
+		{name: "slicelen"},  // length part of slice
+		{name: "slicecap"},  // capacity part of slice
+
+		// build and take apart strings
+		{name: "stringmake"}, // (ptr,len) -> string
+		{name: "stringptr"},  // pointer part of string
+		{name: "stringlen"},  // length part of string
+
+		// operations on arrays/slices/strings
+		{name: "slice"},     // (s, i, j) -> s[i:j]
+		{name: "index"},     // (mem, ptr, idx) -> val
+		{name: "indexaddr"}, // (ptr, idx) -> ptr
+
+		// loads & stores
+		{name: "load"},  // (mem, check, ptr) -> val
+		{name: "store"}, // (mem, check, ptr, val) -> mem
+
+		// checks
+		{name: "checknil"},   // (mem, ptr) -> check
+		{name: "checkbound"}, // (mem, idx, len) -> check
+
+		// functions
+		{name: "call"},
+
+		// builtins
+		{name: "len"},
+		{name: "convert"},
+
+		// tuples
+		{name: "tuple"},         // build a tuple out of its arguments
+		{name: "extract"},       // aux is an int64.  Extract that index out of a tuple
+		{name: "extractsuffix"}, // aux is an int64.  Slice a tuple with [aux:]
+
+	*/
+}
+
+// table of opcodes, indexed by opcode ID
+var opcodeTable [opMax]opInfo
+
+func init() {
+	for op, info := range genericTable {
+		opcodeTable[op] = info
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/op_string.go b/src/cmd/compile/internal/ssa/op_string.go
new file mode 100644
index 0000000..c8f27bb
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/op_string.go
@@ -0,0 +1,40 @@
+// generated by stringer -type=Op; DO NOT EDIT
+
+package ssa
+
+import "fmt"
+
+const (
+	_Op_name_0 = "opInvalid"
+	_Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLshOpRshOpLessOpConstOpArgOpGlobalOpFuncOpFPOpSPOpCopyOpMoveOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpArrayIndexOpPtrIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpOffPtrOpStoreReg8OpLoadReg8OpFwdRefOpGenericEnd"
+	_Op_name_2 = "opAMD64BaseOpADDQOpADDQconstOpSUBQOpSUBQconstOpMULQOpMULQconstOpSHLQOpSHLQconstOpNEGQOpADDLOpCMPQOpCMPQconstOpTESTQOpTESTBOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLEAQglobalOpMOVBloadOpMOVBQZXloadOpMOVBQSXloadOpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQconstOpREPMOVSB"
+	_Op_name_3 = "op386Base"
+	_Op_name_4 = "opMax"
+)
+
+var (
+	_Op_index_0 = [...]uint8{0, 9}
+	_Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 33, 38, 44, 51, 56, 64, 70, 74, 78, 84, 90, 95, 106, 116, 126, 136, 148, 159, 170, 176, 183, 195, 205, 215, 227, 233, 245, 254, 263, 271, 282, 292, 300, 312}
+	_Op_index_2 = [...]uint16{0, 11, 17, 28, 34, 45, 51, 62, 68, 79, 85, 91, 97, 108, 115, 122, 129, 136, 142, 149, 155, 168, 174, 181, 188, 195, 207, 217, 230, 243, 253, 264, 278, 293, 309, 326, 337, 347}
+	_Op_index_3 = [...]uint8{0, 9}
+	_Op_index_4 = [...]uint8{0, 5}
+)
+
+func (i Op) String() string {
+	switch {
+	case i == 0:
+		return _Op_name_0
+	case 1001 <= i && i <= 1038:
+		i -= 1001
+		return _Op_name_1[_Op_index_1[i]:_Op_index_1[i+1]]
+	case 2001 <= i && i <= 2037:
+		i -= 2001
+		return _Op_name_2[_Op_index_2[i]:_Op_index_2[i+1]]
+	case i == 3001:
+		return _Op_name_3
+	case i == 4001:
+		return _Op_name_4
+	default:
+		return fmt.Sprintf("Op(%d)", i)
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/opamd64.go b/src/cmd/compile/internal/ssa/opamd64.go
new file mode 100644
index 0000000..46a0069
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/opamd64.go
@@ -0,0 +1,177 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// amd64-specific opcodes
+
+const (
+	opAMD64start Op = opAMD64Base + iota
+
+	// Suffixes encode the bit width of various instructions.
+	// Q = 64 bit, L = 32 bit, W = 16 bit, B = 8 bit
+
+	// arithmetic
+	OpADDQ      // arg0 + arg1
+	OpADDQconst // arg + aux.(int64)
+	OpSUBQ      // arg0 - arg1
+	OpSUBQconst // arg - aux.(int64)
+	OpMULQ      // arg0 * arg1
+	OpMULQconst // arg * aux.(int64)
+	OpSHLQ      // arg0 << arg1
+	OpSHLQconst // arg << aux.(int64)
+	OpNEGQ      // -arg
+	OpADDL      // arg0 + arg1
+
+	// Flags value generation.
+	// We pretend the flags type is an opaque thing that comparisons generate
+	// and from which we can extract boolean conditions like <, ==, etc.
+	OpCMPQ      // arg0 compare to arg1
+	OpCMPQconst // arg0 compare to aux.(int64)
+	OpTESTQ     // (arg0 & arg1) compare to 0
+	OpTESTB     // (arg0 & arg1) compare to 0
+
+	// These opcodes extract a particular boolean condition from a flags value.
+	OpSETEQ // extract == condition from arg0
+	OpSETNE // extract != condition from arg0
+	OpSETL  // extract signed < condition from arg0
+	OpSETGE // extract signed >= condition from arg0
+	OpSETB  // extract unsigned < condition from arg0
+
+	// InvertFlags reverses the direction of a flags type interpretation:
+	// (InvertFlags (OpCMPQ a b)) == (OpCMPQ b a)
+	// This is a pseudo-op which can't appear in assembly output.
+	OpInvertFlags // reverse direction of arg0
+
+	OpLEAQ       // arg0 + arg1 + aux.(int64)
+	OpLEAQ2      // arg0 + 2*arg1 + aux.(int64)
+	OpLEAQ4      // arg0 + 4*arg1 + aux.(int64)
+	OpLEAQ8      // arg0 + 8*arg1 + aux.(int64)
+	OpLEAQglobal // no args.  address of aux.(GlobalOffset)
+
+	// Load/store from general address
+	OpMOVBload // Load from arg0+aux.(int64).  arg1=memory
+	OpMOVBQZXload
+	OpMOVBQSXload
+	OpMOVQload
+	OpMOVQstore     // Store arg1 to arg0+aux.(int64).  arg2=memory, returns memory.
+	OpMOVQloadidx8  // Load from arg0+arg1*8+aux.(int64).  arg2=memory
+	OpMOVQstoreidx8 // Store arg2 to arg0+arg1*8+aux.(int64).  arg3=memory, returns memory.
+
+	// Load/store from global.  Same as the above loads, but arg0 is missing and aux is a GlobalOffset instead of an int64.
+	OpMOVQloadglobal  // arg0 = memory
+	OpMOVQstoreglobal // store arg0.  arg1=memory, returns memory.
+
+	// materialize a constant into a register
+	OpMOVQconst // (takes no arguments)
+
+	// move memory
+	OpREPMOVSB // arg0=destptr, arg1=srcptr, arg2=len, arg3=mem
+)
+
+type regMask uint64
+
+var regsAMD64 = [...]string{
+	"AX",
+	"CX",
+	"DX",
+	"BX",
+	"SP",
+	"BP",
+	"SI",
+	"DI",
+	"R8",
+	"R9",
+	"R10",
+	"R11",
+	"R12",
+	"R13",
+	"R14",
+	"R15",
+
+	// pseudo registers
+	"FP",
+	"FLAGS",
+	"OVERWRITE0", // the same register as the first input
+}
+
+var gp regMask = 0x1ffff   // all integer registers including SP&FP
+var gpout regMask = 0xffef // integer registers not including SP&FP
+var cx regMask = 1 << 1
+var si regMask = 1 << 6
+var di regMask = 1 << 7
+var flags regMask = 1 << 17
+
+var (
+	// gp = general purpose (integer) registers
+	gp21      = [2][]regMask{{gp, gp}, {gpout}} // 2 input, 1 output
+	gp11      = [2][]regMask{{gp}, {gpout}}     // 1 input, 1 output
+	gp01      = [2][]regMask{{}, {gpout}}       // 0 input, 1 output
+	shift     = [2][]regMask{{gp, cx}, {gpout}} // shift operations
+	gp2_flags = [2][]regMask{{gp, gp}, {flags}} // generate flags from 2 gp regs
+	gp1_flags = [2][]regMask{{gp}, {flags}}     // generate flags from 1 gp reg
+
+	gpload     = [2][]regMask{{gp, 0}, {gpout}}
+	gploadidx  = [2][]regMask{{gp, gp, 0}, {gpout}}
+	gpstore    = [2][]regMask{{gp, gp, 0}, {0}}
+	gpstoreidx = [2][]regMask{{gp, gp, gp, 0}, {0}}
+
+	gpload_stack  = [2][]regMask{{0}, {gpout}}
+	gpstore_stack = [2][]regMask{{gp, 0}, {0}}
+)
+
+// Opcodes that appear in an output amd64 program
+var amd64Table = map[Op]opInfo{
+	OpADDQ:      {flags: OpFlagCommutative, asm: "ADDQ\t%I0,%I1,%O0", reg: gp21}, // TODO: overwrite
+	OpADDQconst: {asm: "ADDQ\t$%A,%I0,%O0", reg: gp11},                           // aux = int64 constant to add
+	OpSUBQ:      {asm: "SUBQ\t%I0,%I1,%O0", reg: gp21},
+	OpSUBQconst: {asm: "SUBQ\t$%A,%I0,%O0", reg: gp11},
+	OpMULQ:      {asm: "MULQ\t%I0,%I1,%O0", reg: gp21},
+	OpMULQconst: {asm: "IMULQ\t$%A,%I0,%O0", reg: gp11},
+	OpSHLQ:      {asm: "SHLQ\t%I0,%I1,%O0", reg: gp21},
+	OpSHLQconst: {asm: "SHLQ\t$%A,%I0,%O0", reg: gp11},
+
+	OpCMPQ:      {asm: "CMPQ\t%I0,%I1", reg: gp2_flags}, // compute arg[0]-arg[1] and produce flags
+	OpCMPQconst: {asm: "CMPQ\t$%A,%I0", reg: gp1_flags},
+	OpTESTQ:     {asm: "TESTQ\t%I0,%I1", reg: gp2_flags},
+	OpTESTB:     {asm: "TESTB\t%I0,%I1", reg: gp2_flags},
+
+	OpLEAQ:       {flags: OpFlagCommutative, asm: "LEAQ\t%A(%I0)(%I1*1),%O0", reg: gp21}, // aux = int64 constant to add
+	OpLEAQ2:      {asm: "LEAQ\t%A(%I0)(%I1*2),%O0"},
+	OpLEAQ4:      {asm: "LEAQ\t%A(%I0)(%I1*4),%O0"},
+	OpLEAQ8:      {asm: "LEAQ\t%A(%I0)(%I1*8),%O0"},
+	OpLEAQglobal: {asm: "LEAQ\t%A(SB),%O0", reg: gp01},
+
+	// loads and stores
+	OpMOVBload:      {asm: "MOVB\t%A(%I0),%O0", reg: gpload},
+	OpMOVQload:      {asm: "MOVQ\t%A(%I0),%O0", reg: gpload},
+	OpMOVQstore:     {asm: "MOVQ\t%I1,%A(%I0)", reg: gpstore},
+	OpMOVQloadidx8:  {asm: "MOVQ\t%A(%I0)(%I1*8),%O0", reg: gploadidx},
+	OpMOVQstoreidx8: {asm: "MOVQ\t%I2,%A(%I0)(%I1*8)", reg: gpstoreidx},
+
+	OpMOVQconst: {asm: "MOVQ\t$%A,%O0", reg: gp01},
+
+	OpStaticCall: {asm: "CALL\t%A(SB)"},
+
+	OpCopy:    {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific
+	OpConvNop: {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific.  Or get rid of this altogether.
+
+	// convert from flags back to boolean
+	OpSETL: {},
+
+	// ops for spilling of registers
+	// unlike regular loads & stores, these take no memory argument.
+	// They are just like OpCopy but we use them during register allocation.
+	// TODO: different widths, float
+	OpLoadReg8:  {asm: "MOVQ\t%I0,%O0"},
+	OpStoreReg8: {asm: "MOVQ\t%I0,%O0"},
+
+	OpREPMOVSB: {asm: "REP MOVSB", reg: [2][]regMask{{di, si, cx, 0}, {0}}}, // TODO: record that si/di/cx are clobbered
+}
+
+func init() {
+	for op, info := range amd64Table {
+		opcodeTable[op] = info
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/opt.go b/src/cmd/compile/internal/ssa/opt.go
new file mode 100644
index 0000000..ea2bcf0
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/opt.go
@@ -0,0 +1,13 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// machine-independent optimization
+
+//go:generate go run rulegen/rulegen.go rulegen/generic.rules genericRules generic.go
+
+func opt(f *Func) {
+	applyRewrite(f, genericRules)
+}
diff --git a/src/cmd/compile/internal/ssa/phielim.go b/src/cmd/compile/internal/ssa/phielim.go
new file mode 100644
index 0000000..19c0d07
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/phielim.go
@@ -0,0 +1,44 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// phielim eliminates redundant phi values from f.
+// A phi is redundant if its arguments are all equal.  For
+// purposes of counting, ignore the phi itself.  Both of
+// these phis are redundant:
+//   v = phi(x,x,x)
+//   v = phi(x,v,x,v)
+func phielim(f *Func) {
+	args := newSparseSet(f.NumValues())
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			args.clear()
+			for _, x := range v.Args {
+				for x.Op == OpCopy {
+					x = x.Args[0]
+				}
+				args.add(x.ID)
+			}
+			switch {
+			case args.size() == 1:
+				v.Op = OpCopy
+				v.SetArgs1(v.Args[0])
+			case args.size() == 2 && args.contains(v.ID):
+				var w *Value
+				for _, x := range v.Args {
+					if x.ID != v.ID {
+						w = x
+						break
+					}
+				}
+				v.Op = OpCopy
+				v.SetArgs1(w)
+			}
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/print.go b/src/cmd/compile/internal/ssa/print.go
new file mode 100644
index 0000000..eeea30d
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/print.go
@@ -0,0 +1,63 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "fmt"
+
+func printFunc(f *Func) {
+	fmt.Print(f.Name)
+	fmt.Print(" ")
+	fmt.Println(f.Type)
+	printed := make([]bool, f.NumValues())
+	for _, b := range f.Blocks {
+		fmt.Printf("  b%d:\n", b.ID)
+		n := 0
+
+		// print phis first since all value cycles contain a phi
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			fmt.Print("    ")
+			fmt.Println(v.LongString())
+			printed[v.ID] = true
+			n++
+		}
+
+		// print rest of values in dependency order
+		for n < len(b.Values) {
+			m := n
+		outer:
+			for _, v := range b.Values {
+				if printed[v.ID] {
+					continue
+				}
+				for _, w := range v.Args {
+					if w.Block == b && !printed[w.ID] {
+						continue outer
+					}
+				}
+				fmt.Print("    ")
+				fmt.Println(v.LongString())
+				printed[v.ID] = true
+				n++
+			}
+			if m == n {
+				fmt.Println("dependency cycle!")
+				for _, v := range b.Values {
+					if printed[v.ID] {
+						continue
+					}
+					fmt.Print("    ")
+					fmt.Println(v.LongString())
+					printed[v.ID] = true
+					n++
+				}
+			}
+		}
+
+		fmt.Println("    " + b.LongString())
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
new file mode 100644
index 0000000..c798d2e
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -0,0 +1,456 @@
+package ssa
+
+import (
+	"fmt"
+	"log"
+	"sort"
+)
+
+func setloc(home []Location, v *Value, loc Location) []Location {
+	for v.ID >= ID(len(home)) {
+		home = append(home, nil)
+	}
+	home[v.ID] = loc
+	return home
+}
+
+type register uint
+
+// TODO: make arch-dependent
+var numRegs register = 32
+
+var registers = [...]Register{
+	Register{0, "AX"},
+	Register{1, "CX"},
+	Register{2, "DX"},
+	Register{3, "BX"},
+	Register{4, "SP"},
+	Register{5, "BP"},
+	Register{6, "SI"},
+	Register{7, "DI"},
+	Register{8, "R8"},
+	Register{9, "R9"},
+	Register{10, "R10"},
+	Register{11, "R11"},
+	Register{12, "R12"},
+	Register{13, "R13"},
+	Register{14, "R14"},
+	Register{15, "R15"},
+
+	// TODO X0, ...
+	// TODO: make arch-dependent
+	Register{16, "FP"}, // pseudo-register, actually a constant offset from SP
+	Register{17, "FLAGS"},
+	Register{18, "OVERWRITE"},
+}
+
+// countRegs returns the number of set bits in the register mask.
+func countRegs(r regMask) int {
+	n := 0
+	for r != 0 {
+		n += int(r & 1)
+		r >>= 1
+	}
+	return n
+}
+
+// pickReg picks an arbitrary register from the register mask.
+func pickReg(r regMask) register {
+	// pick the lowest one
+	if r == 0 {
+		panic("can't pick a register from an empty set")
+	}
+	for i := register(0); ; i++ {
+		if r&1 != 0 {
+			return i
+		}
+		r >>= 1
+	}
+}
+
+// regalloc performs register allocation on f.  It sets f.RegAlloc
+// to the resulting allocation.
+func regalloc(f *Func) {
+	// For now, a very simple allocator.  Everything has a home
+	// location on the stack (TBD as a subsequent stackalloc pass).
+	// Values live in the home locations at basic block boundaries.
+	// We use a simple greedy allocator within a basic block.
+	home := make([]Location, f.NumValues())
+
+	addPhiCopies(f) // add copies of phi inputs in preceeding blocks
+
+	// Compute live values at the end of each block.
+	live := live(f)
+	lastUse := make([]int, f.NumValues())
+
+	var oldSched []*Value
+
+	// Hack to find fp, sp Values and assign them a register. (TODO: make not so hacky)
+	var fp, sp *Value
+	for _, v := range f.Entry.Values {
+		switch v.Op {
+		case OpSP:
+			sp = v
+			home = setloc(home, v, &registers[4]) // TODO: arch-dependent
+		case OpFP:
+			fp = v
+			home = setloc(home, v, &registers[16]) // TODO: arch-dependent
+		}
+	}
+
+	// Register allocate each block separately.  All live values will live
+	// in home locations (stack slots) between blocks.
+	for _, b := range f.Blocks {
+
+		// Compute the index of the last use of each Value in the Block.
+		// Scheduling has already happened, so Values are totally ordered.
+		// lastUse[x] = max(i) where b.Value[i] uses Value x.
+		for i, v := range b.Values {
+			lastUse[v.ID] = -1
+			for _, w := range v.Args {
+				// could condition this store on w.Block == b, but no need
+				lastUse[w.ID] = i
+			}
+		}
+		// Values which are live at block exit have a lastUse of len(b.Values).
+		if b.Control != nil {
+			lastUse[b.Control.ID] = len(b.Values)
+		}
+		// Values live after block exit have a lastUse of len(b.Values)+1.
+		for _, vid := range live[b.ID] {
+			lastUse[vid] = len(b.Values) + 1
+		}
+
+		// For each register, store which value it contains
+		type regInfo struct {
+			v     *Value // stack-homed original value (or nil if empty)
+			c     *Value // the register copy of v
+			dirty bool   // if the stack-homed copy is out of date
+		}
+		regs := make([]regInfo, numRegs)
+
+		// TODO: hack: initialize fixed registers
+		regs[4] = regInfo{sp, sp, false}
+		regs[16] = regInfo{fp, fp, false}
+
+		var used regMask  // has a 1 for each non-nil entry in regs
+		var dirty regMask // has a 1 for each dirty entry in regs
+
+		oldSched = append(oldSched[:0], b.Values...)
+		b.Values = b.Values[:0]
+
+		for idx, v := range oldSched {
+			// For each instruction, do:
+			//   set up inputs to v in registers
+			//   pick output register
+			//   run insn
+			//   mark output register as dirty
+			// Note that v represents the Value at "home" (on the stack), and c
+			// is its register equivalent.  There are two ways to establish c:
+			//   - use of v.  c will be a load from v's home.
+			//   - definition of v.  c will be identical to v but will live in
+			//     a register.  v will be modified into a spill of c.
+			regspec := opcodeTable[v.Op].reg
+			inputs := regspec[0]
+			outputs := regspec[1]
+			if len(inputs) == 0 && len(outputs) == 0 {
+				// No register allocation required (or none specified yet)
+				b.Values = append(b.Values, v)
+				continue
+			}
+
+			// Compute a good input ordering.  Start with the most constrained input.
+			order := make([]intPair, len(inputs))
+			for i, input := range inputs {
+				order[i] = intPair{countRegs(input), i}
+			}
+			sort.Sort(byKey(order))
+
+			// nospill contains registers that we can't spill because
+			// we already set them up for use by the current instruction.
+			var nospill regMask
+			nospill |= 0x10010 // SP and FP can't be spilled (TODO: arch-specific)
+
+			// Move inputs into registers
+			for _, o := range order {
+				w := v.Args[o.val]
+				mask := inputs[o.val]
+				if mask == 0 {
+					// Input doesn't need a register
+					continue
+				}
+				// TODO: 2-address overwrite instructions
+
+				// Find registers that w is already in
+				var wreg regMask
+				for r := register(0); r < numRegs; r++ {
+					if regs[r].v == w {
+						wreg |= regMask(1) << r
+					}
+				}
+
+				var r register
+				if mask&wreg != 0 {
+					// w is already in an allowed register.  We're done.
+					r = pickReg(mask & wreg)
+				} else {
+					// Pick a register for w
+					// Priorities (in order)
+					//  - an unused register
+					//  - a clean register
+					//  - a dirty register
+					// TODO: for used registers, pick the one whose next use is the
+					// farthest in the future.
+					mask &^= nospill
+					if mask & ^dirty != 0 {
+						mask &^= dirty
+					}
+					if mask & ^used != 0 {
+						mask &^= used
+					}
+					r = pickReg(mask)
+
+					// Kick out whomever is using this register.
+					if regs[r].v != nil {
+						x := regs[r].v
+						c := regs[r].c
+						if regs[r].dirty && lastUse[x.ID] > idx {
+							// Write x back to home.  Its value is currently held in c.
+							x.Op = OpStoreReg8
+							x.Aux = nil
+							x.resetArgs()
+							x.AddArg(c)
+							b.Values = append(b.Values, x)
+							regs[r].dirty = false
+							dirty &^= regMask(1) << r
+						}
+						regs[r].v = nil
+						regs[r].c = nil
+						used &^= regMask(1) << r
+					}
+
+					// Load w into this register
+					var c *Value
+					if len(w.Args) == 0 {
+						// Materialize w
+						if w.Op == OpFP || w.Op == OpSP || w.Op == OpGlobal {
+							c = b.NewValue1(OpCopy, w.Type, nil, w)
+						} else {
+							c = b.NewValue(w.Op, w.Type, w.Aux)
+						}
+					} else if len(w.Args) == 1 && (w.Args[0].Op == OpFP || w.Args[0].Op == OpSP || w.Args[0].Op == OpGlobal) {
+						// Materialize offsets from SP/FP/Global
+						c = b.NewValue1(w.Op, w.Type, w.Aux, w.Args[0])
+					} else if wreg != 0 {
+						// Copy from another register.
+						// Typically just an optimization, but this is
+						// required if w is dirty.
+						s := pickReg(wreg)
+						// inv: s != r
+						c = b.NewValue(OpCopy, w.Type, nil)
+						c.AddArg(regs[s].c)
+					} else {
+						// Load from home location
+						c = b.NewValue(OpLoadReg8, w.Type, nil)
+						c.AddArg(w)
+					}
+					home = setloc(home, c, &registers[r])
+					// Remember what we did
+					regs[r].v = w
+					regs[r].c = c
+					regs[r].dirty = false
+					used |= regMask(1) << r
+				}
+
+				// Replace w with its in-register copy.
+				v.SetArg(o.val, regs[r].c)
+
+				// Remember not to undo this register assignment until after
+				// the instruction is issued.
+				nospill |= regMask(1) << r
+			}
+
+			// pick a register for v itself.
+			if len(outputs) > 1 {
+				panic("can't do multi-output yet")
+			}
+			if len(outputs) == 0 || outputs[0] == 0 {
+				// output doesn't need a register
+				b.Values = append(b.Values, v)
+			} else {
+				mask := outputs[0]
+				if mask & ^dirty != 0 {
+					mask &^= dirty
+				}
+				if mask & ^used != 0 {
+					mask &^= used
+				}
+				r := pickReg(mask)
+
+				// Kick out whomever is using this register.
+				if regs[r].v != nil {
+					x := regs[r].v
+					c := regs[r].c
+					if regs[r].dirty && lastUse[x.ID] > idx {
+						// Write x back to home.  Its value is currently held in c.
+						x.Op = OpStoreReg8
+						x.Aux = nil
+						x.resetArgs()
+						x.AddArg(c)
+						b.Values = append(b.Values, x)
+						regs[r].dirty = false
+						dirty &^= regMask(1) << r
+					}
+					regs[r].v = nil
+					regs[r].c = nil
+					used &^= regMask(1) << r
+				}
+
+				// Reissue v with new op, with r as its home.
+				c := b.NewValue(v.Op, v.Type, v.Aux)
+				c.AddArgs(v.Args...)
+				home = setloc(home, c, &registers[r])
+
+				// Remember what we did
+				regs[r].v = v
+				regs[r].c = c
+				regs[r].dirty = true
+				used |= regMask(1) << r
+				dirty |= regMask(1) << r
+			}
+		}
+
+		// If the block ends in a call, we must put the call after the spill code.
+		var call *Value
+		if b.Kind == BlockCall {
+			call = b.Control
+			if call != b.Values[len(b.Values)-1] {
+				log.Fatalf("call not at end of block %b %v", b, call)
+			}
+			b.Values = b.Values[:len(b.Values)-1]
+			// TODO: do this for all control types?
+		}
+
+		// at the end of the block, spill any remaining dirty, live values
+		for r := register(0); r < numRegs; r++ {
+			if !regs[r].dirty {
+				continue
+			}
+			v := regs[r].v
+			c := regs[r].c
+			if lastUse[v.ID] <= len(oldSched) {
+				if v == v.Block.Control {
+					// link control value to register version
+					v.Block.Control = c
+				}
+				continue // not live after block
+			}
+
+			// change v to be a copy of c
+			v.Op = OpStoreReg8
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(c)
+			b.Values = append(b.Values, v)
+		}
+
+		// add call back after spills
+		if b.Kind == BlockCall {
+			b.Values = append(b.Values, call)
+		}
+	}
+	f.RegAlloc = home
+	deadcode(f) // remove values that had all of their uses rematerialized.  TODO: separate pass?
+}
+
+// addPhiCopies adds copies of phi inputs in the blocks
+// immediately preceding the phi's block.
+func addPhiCopies(f *Func) {
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				break // all phis should appear first
+			}
+			if v.Type.IsMemory() { // TODO: only "regallocable" types
+				continue
+			}
+			for i, w := range v.Args {
+				c := b.Preds[i]
+				cpy := c.NewValue1(OpCopy, v.Type, nil, w)
+				v.Args[i] = cpy
+			}
+		}
+	}
+}
+
+// live returns a map from block ID to a list of value IDs live at the end of that block
+// TODO: this could be quadratic if lots of variables are live across lots of
+// basic blocks.  Figure out a way to make this function (or, more precisely, the user
+// of this function) require only linear size & time.
+func live(f *Func) [][]ID {
+	live := make([][]ID, f.NumBlocks())
+	var phis []*Value
+
+	s := newSparseSet(f.NumValues())
+	t := newSparseSet(f.NumValues())
+	for {
+		for _, b := range f.Blocks {
+			fmt.Printf("live %s %v\n", b, live[b.ID])
+		}
+		changed := false
+
+		for _, b := range f.Blocks {
+			// Start with known live values at the end of the block
+			s.clear()
+			s.addAll(live[b.ID])
+
+			// Propagate backwards to the start of the block
+			// Assumes Values have been scheduled.
+			phis := phis[:0]
+			for i := len(b.Values) - 1; i >= 0; i-- {
+				v := b.Values[i]
+				s.remove(v.ID)
+				if v.Op == OpPhi {
+					// save phi ops for later
+					phis = append(phis, v)
+					continue
+				}
+				s.addAllValues(v.Args)
+			}
+
+			// for each predecessor of b, expand its list of live-at-end values
+			// inv: s contains the values live at the start of b (excluding phi inputs)
+			for i, p := range b.Preds {
+				t.clear()
+				t.addAll(live[p.ID])
+				t.addAll(s.contents())
+				for _, v := range phis {
+					t.add(v.Args[i].ID)
+				}
+				if t.size() == len(live[p.ID]) {
+					continue
+				}
+				// grow p's live set
+				c := make([]ID, t.size())
+				copy(c, t.contents())
+				live[p.ID] = c
+				changed = true
+			}
+		}
+
+		if !changed {
+			break
+		}
+	}
+	return live
+}
+
+// for sorting a pair of integers by key
+type intPair struct {
+	key, val int
+}
+type byKey []intPair
+
+func (a byKey) Len() int           { return len(a) }
+func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
new file mode 100644
index 0000000..671270d
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -0,0 +1,84 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import "log"
+
+func applyRewrite(f *Func, r func(*Value) bool) {
+	// repeat rewrites until we find no more rewrites
+	var curv *Value
+	defer func() {
+		if curv != nil {
+			log.Printf("panic during rewrite of %s\n", curv.LongString())
+			// TODO(khr): print source location also
+		}
+	}()
+	for {
+		change := false
+		for _, b := range f.Blocks {
+			for _, v := range b.Values {
+				// elide any copies generated during rewriting
+				for i, a := range v.Args {
+					if a.Op != OpCopy {
+						continue
+					}
+					for a.Op == OpCopy {
+						a = a.Args[0]
+					}
+					v.Args[i] = a
+				}
+
+				// apply rewrite function
+				curv = v
+				if r(v) {
+					change = true
+				}
+			}
+		}
+		if !change {
+			curv = nil
+			return
+		}
+	}
+}
+
+// Common functions called from rewriting rules
+
+func is64BitInt(t Type) bool {
+	return t.Size() == 8 && t.IsInteger()
+}
+
+func is32BitInt(t Type) bool {
+	return t.Size() == 4 && t.IsInteger()
+}
+
+func isPtr(t Type) bool {
+	return t.IsPtr()
+}
+
+func isSigned(t Type) bool {
+	return t.IsSigned()
+}
+
+func typeSize(t Type) int64 {
+	return t.Size()
+}
+
+// addOff adds two offset aux values.  Each should be an int64.  Fails if wraparound happens.
+func addOff(a, b interface{}) interface{} {
+	return addOffset(a.(int64), b.(int64))
+}
+func addOffset(x, y int64) int64 {
+	z := x + y
+	// x and y have same sign and z has a different sign => overflow
+	if x^y >= 0 && x^z < 0 {
+		log.Panicf("offset overflow %d %d\n", x, y)
+	}
+	return z
+}
+
+func inBounds(idx, len int64) bool {
+	return idx >= 0 && idx < len
+}
diff --git a/src/cmd/compile/internal/ssa/rulegen/generic.rules b/src/cmd/compile/internal/ssa/rulegen/generic.rules
new file mode 100644
index 0000000..c49d9d9
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rulegen/generic.rules
@@ -0,0 +1,24 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// constant folding
+(Add <t> (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)+d.(int64)}])
+(Mul <t> (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)*d.(int64)}])
+(IsInBounds (Const [c]) (Const [d])) -> (Const [inBounds(c.(int64),d.(int64))])
+
+// tear apart slices
+// TODO: anything that generates a slice needs to go in here.
+(SlicePtr (Load ptr mem)) -> (Load ptr mem)
+(SliceLen (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize)])) mem)
+(SliceCap (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
+
+// indexing operations
+// Note: bounds check has already been done
+(ArrayIndex (Load ptr mem) idx) -> (Load (PtrIndex <ptr.Type.Elem().Elem().PtrTo()> ptr idx) mem)
+(PtrIndex <t> ptr idx) -> (Add ptr (Mul <v.Block.Func.Config.Uintptr> idx (Const <v.Block.Func.Config.Uintptr> [t.Elem().Size()])))
+// TODO: hopefully this will get rid of all full-width array copies.
+
+// big-object moves
+// TODO: fix size
+(Store dst (Load <t> src mem) mem) && t.Size() > 8 -> (Move [t.Size()] dst src mem)
diff --git a/src/cmd/compile/internal/ssa/rulegen/lower_amd64.rules b/src/cmd/compile/internal/ssa/rulegen/lower_amd64.rules
new file mode 100644
index 0000000..dc910b7
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rulegen/lower_amd64.rules
@@ -0,0 +1,91 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// values are specified using the following format:
+// (op <type> [aux] arg0 arg1 ...)
+// the type and aux fields are optional
+// on the matching side
+//  - the types and aux fields must match if they are specified.
+// on the generated side
+//  - the type of the top-level expression is the same as the one on the left-hand side.
+//  - the type of any subexpressions must be specified explicitly.
+//  - aux will be nil if not specified.
+
+// x86 register conventions:
+//  - Integer types live in the low portion of registers.
+//    Upper portions are correctly extended.
+//  - Boolean types use the low-order byte of a register.  Upper bytes are junk.
+//  - We do not use AH,BH,CH,DH registers.
+//  - Floating-point types will live in the low natural slot of an sse2 register.
+//    Unused portions are junk.
+
+// These are the lowerings themselves
+(Add <t> x y) && (is64BitInt(t) || isPtr(t)) -> (ADDQ x y)
+(Add <t> x y) && is32BitInt(t) -> (ADDL x y)
+
+(Sub <t> x y) && is64BitInt(t) -> (SUBQ x y)
+
+(Mul <t> x y) && is64BitInt(t) -> (MULQ x y)
+(Lsh <t> x y) && is64BitInt(t) -> (SHLQ x y) // TODO: check y>63
+(Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ <TypeFlags> x y))
+
+(Load <t> ptr mem) && t.IsBoolean() -> (MOVBload [int64(0)] ptr mem)
+(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload [int64(0)] ptr mem)
+(Store ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVQstore [int64(0)] ptr val mem)
+
+// checks
+(IsNonNil p) -> (SETNE (TESTQ <TypeFlags> p p))
+(IsInBounds idx len) -> (SETB (CMPQ <TypeFlags> idx len))
+
+(Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem)
+
+(OffPtr [off] ptr) -> (ADDQconst [off] ptr)
+
+(Const <t> [val]) && is64BitInt(t) -> (MOVQconst [val])
+
+// Rules below here apply some simple optimizations after lowering.
+// TODO: Should this be a separate pass?
+
+// global loads/stores
+(Global [sym]) -> (LEAQglobal [GlobalOffset{sym,0}])
+
+// fold constants into instructions
+(ADDQ x (MOVQconst [c])) -> (ADDQconst [c] x) // TODO: restrict c to int32 range?
+(ADDQ (MOVQconst [c]) x) -> (ADDQconst [c] x)
+(SUBQ x (MOVQconst [c])) -> (SUBQconst x [c])
+(SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBQconst <t> x [c]))
+(MULQ x (MOVQconst [c])) && c.(int64) == int64(int32(c.(int64))) -> (MULQconst [c] x)
+(MULQ (MOVQconst [c]) x) -> (MULQconst [c] x)
+(SHLQ x (MOVQconst [c])) -> (SHLQconst [c] x)
+(CMPQ x (MOVQconst [c])) -> (CMPQconst x [c])
+(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPQconst <TypeFlags> x [c]))
+
+// strength reduction
+// TODO: do this a lot more generically
+(MULQconst [c] x) && c.(int64) == 8 -> (SHLQconst [int64(3)] x)
+(MULQconst [c] x) && c.(int64) == 64 -> (SHLQconst [int64(5)] x)
+
+// fold add/shift into leaq
+(ADDQ x (SHLQconst [shift] y)) && shift.(int64) == 3 -> (LEAQ8 [int64(0)] x y)
+(ADDQconst [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y)
+
+// reverse ordering of compare instruction
+(SETL (InvertFlags x)) -> (SETGE x)
+
+// fold constants into memory operations
+// Note that this is not always a good idea because if not all the uses of
+// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
+// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
+// Nevertheless, let's do it!
+(MOVQload [off1] (ADDQconst [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem)
+(MOVQstore [off1] (ADDQconst [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem)
+
+// indexed loads and stores
+(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
+
+(MOVQloadidx8 [off1] (ADDQconst [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+(MOVQstoreidx8 [off1] (ADDQconst [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
+
+(ADDQconst [off] x) && off.(int64) == 0 -> (Copy x)
diff --git a/src/cmd/compile/internal/ssa/rulegen/rulegen.go b/src/cmd/compile/internal/ssa/rulegen/rulegen.go
new file mode 100644
index 0000000..4ac9302
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rulegen/rulegen.go
@@ -0,0 +1,346 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program generates Go code that applies rewrite rules to a Value.
+// The generated code implements a function of type func (v *Value) bool
+// which returns true iff if did something.
+// Ideas stolen from Swift: http://www.hpl.hp.com/techreports/Compaq-DEC/WRL-2000-2.html
+
+// Run with something like "go run rulegen.go lower_amd64.rules lowerAmd64 lowerAmd64.go"
+
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"crypto/md5"
+	"fmt"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"sort"
+	"strings"
+)
+
+// rule syntax:
+//  sexpr [&& extra conditions] -> sexpr
+//
+// sexpr are s-expressions (lisp-like parenthesized groupings)
+// sexpr ::= (opcode sexpr*)
+//         | variable
+//         | [aux]
+//         | <type>
+//         | {code}
+//
+// aux      ::= variable | {code}
+// type     ::= variable | {code}
+// variable ::= some token
+// opcode   ::= one of the opcodes from ../op.go (without the Op prefix)
+
+// extra conditions is just a chunk of Go that evaluates to a boolean.  It may use
+// variables declared in the matching sexpr.  The variable "v" is predefined to be
+// the value matched by the entire rule.
+
+// If multiple rules match, the first one in file order is selected.
+
+func main() {
+	if len(os.Args) < 3 || len(os.Args) > 4 {
+		fmt.Printf("usage: go run rulegen.go <rule file> <function name> [<output file>]")
+		os.Exit(1)
+	}
+	rulefile := os.Args[1]
+	rulefn := os.Args[2]
+
+	// Open input file.
+	text, err := os.Open(rulefile)
+	if err != nil {
+		log.Fatalf("can't read rule file: %v", err)
+	}
+
+	// oprules contains a list of rules for each opcode
+	oprules := map[string][]string{}
+
+	// read rule file
+	scanner := bufio.NewScanner(text)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if i := strings.Index(line, "//"); i >= 0 {
+			// Remove comments.  Note that this isn't string safe, so
+			// it will truncate lines with // inside strings.  Oh well.
+			line = line[:i]
+		}
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		op := strings.Split(line, " ")[0][1:]
+		oprules[op] = append(oprules[op], line)
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatalf("scanner failed: %v\n", err)
+	}
+
+	// Start output buffer, write header.
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// autogenerated from %s: do not edit!\n", rulefile)
+	fmt.Fprintf(w, "// generated with: go run rulegen/rulegen.go %s\n", strings.Join(os.Args[1:], " "))
+	fmt.Fprintln(w, "package ssa")
+	fmt.Fprintf(w, "func %s(v *Value) bool {\n", rulefn)
+
+	// generate code for each rule
+	fmt.Fprintf(w, "switch v.Op {\n")
+	var ops []string
+	for op := range oprules {
+		ops = append(ops, op)
+	}
+	sort.Strings(ops)
+	for _, op := range ops {
+		fmt.Fprintf(w, "case Op%s:\n", op)
+		for _, rule := range oprules[op] {
+			// Note: we use a hash to identify the rule so that its
+			// identity is invariant to adding/removing rules elsewhere
+			// in the rules file.  This is useful to squash spurious
+			// diffs that would occur if we used rule index.
+			rulehash := fmt.Sprintf("%02x", md5.Sum([]byte(rule)))
+
+			// split at ->
+			s := strings.Split(rule, "->")
+			if len(s) != 2 {
+				log.Fatalf("no arrow in rule %s", rule)
+			}
+			lhs := strings.Trim(s[0], " \t")
+			result := strings.Trim(s[1], " \t\n")
+
+			// split match into matching part and additional condition
+			match := lhs
+			cond := ""
+			if i := strings.Index(match, "&&"); i >= 0 {
+				cond = strings.Trim(match[i+2:], " \t")
+				match = strings.Trim(match[:i], " \t")
+			}
+
+			fmt.Fprintf(w, "// match: %s\n", match)
+			fmt.Fprintf(w, "// cond: %s\n", cond)
+			fmt.Fprintf(w, "// result: %s\n", result)
+
+			fail := fmt.Sprintf("{\ngoto end%s\n}\n", rulehash)
+
+			fmt.Fprintf(w, "{\n")
+			genMatch(w, match, fail)
+
+			if cond != "" {
+				fmt.Fprintf(w, "if !(%s) %s", cond, fail)
+			}
+
+			genResult(w, result)
+			fmt.Fprintf(w, "return true\n")
+
+			fmt.Fprintf(w, "}\n")
+			fmt.Fprintf(w, "goto end%s\n", rulehash) // use label
+			fmt.Fprintf(w, "end%s:;\n", rulehash)
+		}
+	}
+	fmt.Fprintf(w, "}\n")
+	fmt.Fprintf(w, "return false\n")
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	b, err = format.Source(b)
+	if err != nil {
+		panic(err)
+	}
+
+	// Write to a file if given, otherwise stdout.
+	if len(os.Args) >= 4 {
+		err = ioutil.WriteFile(os.Args[3], b, 0666)
+	} else {
+		_, err = os.Stdout.Write(b)
+	}
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
+
+func genMatch(w io.Writer, match, fail string) {
+	genMatch0(w, match, "v", fail, map[string]string{}, true)
+}
+
+func genMatch0(w io.Writer, match, v, fail string, m map[string]string, top bool) {
+	if match[0] != '(' {
+		if x, ok := m[match]; ok {
+			// variable already has a definition.  Check whether
+			// the old definition and the new definition match.
+			// For example, (add x x).  Equality is just pointer equality
+			// on Values (so cse is important to do before lowering).
+			fmt.Fprintf(w, "if %s != %s %s", v, x, fail)
+			return
+		}
+		// remember that this variable references the given value
+		m[match] = v
+		fmt.Fprintf(w, "%s := %s\n", match, v)
+		return
+	}
+
+	// split body up into regions.  Split by spaces/tabs, except those
+	// contained in () or {}.
+	s := split(match[1 : len(match)-1])
+
+	// check op
+	if !top {
+		fmt.Fprintf(w, "if %s.Op != Op%s %s", v, s[0], fail)
+	}
+
+	// check type/aux/args
+	argnum := 0
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			t := a[1 : len(a)-1]
+			if t[0] == '{' {
+				// code.  We must match the results of this code.
+				fmt.Fprintf(w, "if %s.Type != %s %s", v, t[1:len(t)-1], fail)
+			} else {
+				// variable
+				if u, ok := m[t]; ok {
+					// must match previous variable
+					fmt.Fprintf(w, "if %s.Type != %s %s", v, u, fail)
+				} else {
+					m[t] = v + ".Type"
+					fmt.Fprintf(w, "%s := %s.Type\n", t, v)
+				}
+			}
+		} else if a[0] == '[' {
+			// aux restriction
+			x := a[1 : len(a)-1]
+			if x[0] == '{' {
+				// code
+				fmt.Fprintf(w, "if %s.Aux != %s %s", v, x[1:len(x)-1], fail)
+			} else {
+				// variable
+				if y, ok := m[x]; ok {
+					fmt.Fprintf(w, "if %s.Aux != %s %s", v, y, fail)
+				} else {
+					m[x] = v + ".Aux"
+					fmt.Fprintf(w, "%s := %s.Aux\n", x, v)
+				}
+			}
+		} else if a[0] == '{' {
+			fmt.Fprintf(w, "if %s.Args[%d] != %s %s", v, argnum, a[1:len(a)-1], fail)
+			argnum++
+		} else {
+			// variable or sexpr
+			genMatch0(w, a, fmt.Sprintf("%s.Args[%d]", v, argnum), fail, m, false)
+			argnum++
+		}
+	}
+}
+
+func genResult(w io.Writer, result string) {
+	genResult0(w, result, new(int), true)
+}
+func genResult0(w io.Writer, result string, alloc *int, top bool) string {
+	if result[0] != '(' {
+		// variable
+		if top {
+			fmt.Fprintf(w, "v.Op = %s.Op\n", result)
+			fmt.Fprintf(w, "v.Aux = %s.Aux\n", result)
+			fmt.Fprintf(w, "v.resetArgs()\n")
+			fmt.Fprintf(w, "v.AddArgs(%s.Args...)\n", result)
+		}
+		return result
+	}
+
+	s := split(result[1 : len(result)-1])
+	var v string
+	var hasType bool
+	if top {
+		v = "v"
+		fmt.Fprintf(w, "v.Op = Op%s\n", s[0])
+		fmt.Fprintf(w, "v.Aux = nil\n")
+		fmt.Fprintf(w, "v.resetArgs()\n")
+		hasType = true
+	} else {
+		v = fmt.Sprintf("v%d", *alloc)
+		*alloc++
+		fmt.Fprintf(w, "%s := v.Block.NewValue(Op%s, TypeInvalid, nil)\n", v, s[0])
+	}
+	for _, a := range s[1:] {
+		if a[0] == '<' {
+			// type restriction
+			t := a[1 : len(a)-1]
+			if t[0] == '{' {
+				t = t[1 : len(t)-1]
+			}
+			fmt.Fprintf(w, "%s.Type = %s\n", v, t)
+			hasType = true
+		} else if a[0] == '[' {
+			// aux restriction
+			x := a[1 : len(a)-1]
+			if x[0] == '{' {
+				x = x[1 : len(x)-1]
+			}
+			fmt.Fprintf(w, "%s.Aux = %s\n", v, x)
+		} else if a[0] == '{' {
+			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, a[1:len(a)-1])
+		} else {
+			// regular argument (sexpr or variable)
+			x := genResult0(w, a, alloc, false)
+			fmt.Fprintf(w, "%s.AddArg(%s)\n", v, x)
+		}
+	}
+	if !hasType {
+		log.Fatalf("sub-expression %s must have a type", result)
+	}
+	return v
+}
+
+func split(s string) []string {
+	var r []string
+
+outer:
+	for s != "" {
+		d := 0               // depth of ({[<
+		var open, close byte // opening and closing markers ({[< or )}]>
+		nonsp := false       // found a non-space char so far
+		for i := 0; i < len(s); i++ {
+			switch {
+			case d == 0 && s[i] == '(':
+				open, close = '(', ')'
+				d++
+			case d == 0 && s[i] == '<':
+				open, close = '<', '>'
+				d++
+			case d == 0 && s[i] == '[':
+				open, close = '[', ']'
+				d++
+			case d == 0 && s[i] == '{':
+				open, close = '{', '}'
+				d++
+			case d == 0 && (s[i] == ' ' || s[i] == '\t'):
+				if nonsp {
+					r = append(r, strings.TrimSpace(s[:i]))
+					s = s[i:]
+					continue outer
+				}
+			case d > 0 && s[i] == open:
+				d++
+			case d > 0 && s[i] == close:
+				d--
+			default:
+				nonsp = true
+			}
+		}
+		if d != 0 {
+			panic("imbalanced expression: " + s)
+		}
+		if nonsp {
+			r = append(r, strings.TrimSpace(s))
+		}
+		break
+	}
+	return r
+}
diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go
new file mode 100644
index 0000000..0a89ac3
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/schedule.go
@@ -0,0 +1,69 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// Schedule the Values in each Block.  After this phase returns, the
+// order of b.Values matters and is the order in which those values
+// will appear in the assembly output.  For now it generates an
+// arbitrary valid schedule using a topological sort.  TODO(khr):
+// schedule smarter.
+func schedule(f *Func) {
+	const (
+		unmarked = 0
+		found    = 1
+		expanded = 2
+		done     = 3
+	)
+	state := make([]byte, f.NumValues())
+	var queue []*Value //stack-like worklist.  Contains found and expanded nodes.
+	var order []*Value
+
+	for _, b := range f.Blocks {
+		// Topologically sort the values in b.
+		order = order[:0]
+		for _, v := range b.Values {
+			if v.Op == OpPhi {
+				// Phis all go first.  We handle phis specially
+				// because they may have self edges "a = phi(a, b, c)"
+				order = append(order, v)
+				continue
+			}
+			if state[v.ID] != unmarked {
+				if state[v.ID] != done {
+					panic("bad state")
+				}
+				continue
+			}
+			state[v.ID] = found
+			queue = append(queue, v)
+			for len(queue) > 0 {
+				v = queue[len(queue)-1]
+				switch state[v.ID] {
+				case found:
+					state[v.ID] = expanded
+					// Note that v is not popped.  We leave it in place
+					// until all its children have been explored.
+					for _, w := range v.Args {
+						if w.Block == b && w.Op != OpPhi && state[w.ID] == unmarked {
+							state[w.ID] = found
+							queue = append(queue, w)
+						}
+					}
+				case expanded:
+					queue = queue[:len(queue)-1]
+					state[v.ID] = done
+					order = append(order, v)
+				default:
+					panic("bad state")
+				}
+			}
+		}
+		copy(b.Values, order)
+	}
+	// TODO: only allow one live mem type and one live flags type (x86)
+	// This restriction will force any loads (and any flag uses) to appear
+	// before the next store (flag update).  This "anti-dependence" is not
+	// recorded explicitly in ssa form.
+}
diff --git a/src/cmd/compile/internal/ssa/sparseset.go b/src/cmd/compile/internal/ssa/sparseset.go
new file mode 100644
index 0000000..b79aee8
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/sparseset.go
@@ -0,0 +1,75 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// from http://research.swtch.com/sparse
+// in turn, from Briggs and Torczon
+
+type sparseSet struct {
+	dense  []ID
+	sparse []int
+}
+
+// newSparseSet returns a sparseSet that can represent
+// integers between 0 and n-1
+func newSparseSet(n int) *sparseSet {
+	return &sparseSet{nil, make([]int, n)}
+}
+
+func (s *sparseSet) size() int {
+	return len(s.dense)
+}
+
+func (s *sparseSet) contains(x ID) bool {
+	i := s.sparse[x]
+	return i < len(s.dense) && s.dense[i] == x
+}
+
+func (s *sparseSet) add(x ID) {
+	i := s.sparse[x]
+	if i < len(s.dense) && s.dense[i] == x {
+		return
+	}
+	s.dense = append(s.dense, x)
+	s.sparse[x] = len(s.dense) - 1
+}
+
+func (s *sparseSet) addAll(a []ID) {
+	for _, x := range a {
+		s.add(x)
+	}
+}
+
+func (s *sparseSet) addAllValues(a []*Value) {
+	for _, v := range a {
+		s.add(v.ID)
+	}
+}
+
+func (s *sparseSet) remove(x ID) {
+	i := s.sparse[x]
+	if i < len(s.dense) && s.dense[i] == x {
+		y := s.dense[len(s.dense)-1]
+		s.dense[i] = y
+		s.sparse[y] = i
+		s.dense = s.dense[:len(s.dense)-1]
+	}
+}
+
+// pop removes an arbitrary element from the set.
+// The set must be nonempty.
+func (s *sparseSet) pop() ID {
+	x := s.dense[len(s.dense)-1]
+	s.dense = s.dense[:len(s.dense)-1]
+	return x
+}
+
+func (s *sparseSet) clear() {
+	s.dense = s.dense[:0]
+}
+
+func (s *sparseSet) contents() []ID {
+	return s.dense
+}
diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go
new file mode 100644
index 0000000..ab68647
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/stackalloc.go
@@ -0,0 +1,111 @@
+package ssa
+
+import "log"
+
+// stackalloc allocates storage in the stack frame for
+// all Values that did not get a register.
+func stackalloc(f *Func) {
+	home := f.RegAlloc
+
+	// First compute the size of the outargs section.
+	n := int64(16) //TODO: compute max of all callsites
+
+	// Include one slot for deferreturn.
+	if false && n < f.Config.ptrSize { //TODO: check for deferreturn
+		n = f.Config.ptrSize
+	}
+
+	// TODO: group variables by ptr/nonptr, size, etc.  Emit ptr vars last
+	// so stackmap is smaller.
+
+	// Assign stack locations to phis first, because we
+	// must also assign the same locations to the phi copies
+	// introduced during regalloc.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op != OpPhi {
+				continue
+			}
+			if v.Type.IsMemory() { // TODO: only "regallocable" types
+				continue
+			}
+			n += v.Type.Size()
+			// a := v.Type.Align()
+			// n = (n + a - 1) / a * a  TODO
+			loc := &LocalSlot{n}
+			home = setloc(home, v, loc)
+			for _, w := range v.Args {
+				home = setloc(home, w, loc)
+			}
+		}
+	}
+
+	// Now do all other unassigned values.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.ID < ID(len(home)) && home[v.ID] != nil {
+				continue
+			}
+			if v.Type.IsMemory() { // TODO: only "regallocable" types
+				continue
+			}
+			if len(v.Args) == 0 {
+				// v will have been materialized wherever it is needed.
+				continue
+			}
+			if len(v.Args) == 1 && (v.Args[0].Op == OpFP || v.Args[0].Op == OpSP || v.Args[0].Op == OpGlobal) {
+				continue
+			}
+			// a := v.Type.Align()
+			// n = (n + a - 1) / a * a  TODO
+			n += v.Type.Size()
+			loc := &LocalSlot{n}
+			home = setloc(home, v, loc)
+		}
+	}
+
+	// TODO: align n
+	n += f.Config.ptrSize // space for return address.  TODO: arch-dependent
+	f.RegAlloc = home
+	f.FrameSize = n
+
+	// TODO: share stack slots among noninterfering (& gc type compatible) values
+
+	// adjust all uses of FP to SP now that we have the frame size.
+	var fp *Value
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			if v.Op == OpFP {
+				if fp != nil {
+					log.Panicf("multiple FP ops: %s %s", fp, v)
+				}
+				fp = v
+			}
+			for i, a := range v.Args {
+				if a.Op != OpFP {
+					continue
+				}
+				// TODO: do this with arch-specific rewrite rules somehow?
+				switch v.Op {
+				case OpADDQ:
+					// (ADDQ (FP) x) -> (LEAQ [n] (SP) x)
+					v.Op = OpLEAQ
+					v.Aux = n
+				case OpLEAQ, OpMOVQload, OpMOVQstore, OpMOVBload, OpMOVQloadidx8:
+					if v.Op == OpMOVQloadidx8 && i == 1 {
+						// Note: we could do it, but it is probably an error
+						log.Panicf("can't do FP->SP adjust on index slot of load %s", v.Op)
+					}
+					// eg: (MOVQload [c] (FP) mem) -> (MOVQload [c+n] (SP) mem)
+					v.Aux = addOffset(v.Aux.(int64), n)
+				default:
+					log.Panicf("can't do FP->SP adjust on %s", v.Op)
+				}
+			}
+		}
+	}
+	if fp != nil {
+		fp.Op = OpSP
+		home[fp.ID] = &registers[4] // TODO: arch-dependent
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/type.go b/src/cmd/compile/internal/ssa/type.go
new file mode 100644
index 0000000..611c858
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/type.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// TODO: use go/types instead?
+
+// A type interface used to import cmd/internal/gc:Type
+// Type instances are not guaranteed to be canonical.
+type Type interface {
+	Size() int64 // return the size in bytes
+
+	IsBoolean() bool // is a named or unnamed boolean type
+	IsInteger() bool //  ... ditto for the others
+	IsSigned() bool
+	IsFloat() bool
+	IsPtr() bool
+
+	IsMemory() bool // special ssa-package-only types
+	IsFlags() bool
+
+	Elem() Type  // given []T or *T, return T
+	PtrTo() Type // given T, return *T
+
+	String() string
+}
+
+// Stub implementation for now, until we are completely using ../gc:Type
+type TypeImpl struct {
+	Size_   int64
+	Boolean bool
+	Integer bool
+	Signed  bool
+	Float   bool
+	Ptr     bool
+
+	Memory bool
+	Flags  bool
+
+	Name string
+}
+
+func (t *TypeImpl) Size() int64     { return t.Size_ }
+func (t *TypeImpl) IsBoolean() bool { return t.Boolean }
+func (t *TypeImpl) IsInteger() bool { return t.Integer }
+func (t *TypeImpl) IsSigned() bool  { return t.Signed }
+func (t *TypeImpl) IsFloat() bool   { return t.Float }
+func (t *TypeImpl) IsPtr() bool     { return t.Ptr }
+func (t *TypeImpl) IsMemory() bool  { return t.Memory }
+func (t *TypeImpl) IsFlags() bool   { return t.Flags }
+func (t *TypeImpl) String() string  { return t.Name }
+func (t *TypeImpl) Elem() Type      { panic("not implemented"); return nil }
+func (t *TypeImpl) PtrTo() Type     { panic("not implemented"); return nil }
+
+var (
+	// shortcuts for commonly used basic types
+	TypeInt8   = &TypeImpl{Size_: 1, Integer: true, Signed: true, Name: "int8"}
+	TypeInt16  = &TypeImpl{Size_: 2, Integer: true, Signed: true, Name: "int16"}
+	TypeInt32  = &TypeImpl{Size_: 4, Integer: true, Signed: true, Name: "int32"}
+	TypeInt64  = &TypeImpl{Size_: 8, Integer: true, Signed: true, Name: "int64"}
+	TypeUInt8  = &TypeImpl{Size_: 1, Integer: true, Name: "uint8"}
+	TypeUInt16 = &TypeImpl{Size_: 2, Integer: true, Name: "uint16"}
+	TypeUInt32 = &TypeImpl{Size_: 4, Integer: true, Name: "uint32"}
+	TypeUInt64 = &TypeImpl{Size_: 8, Integer: true, Name: "uint64"}
+	TypeBool   = &TypeImpl{Size_: 1, Boolean: true, Name: "bool"}
+	//TypeString = types.Typ[types.String]
+
+	TypeInvalid = &TypeImpl{Name: "invalid"}
+
+	// Additional compiler-only types go here.
+	TypeMem   = &TypeImpl{Memory: true, Name: "mem"}
+	TypeFlags = &TypeImpl{Flags: true, Name: "flags"}
+)
diff --git a/src/cmd/compile/internal/ssa/value.go b/src/cmd/compile/internal/ssa/value.go
new file mode 100644
index 0000000..dab6239
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/value.go
@@ -0,0 +1,103 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A Value represents a value in the SSA representation of the program.
+// The ID and Type fields must not be modified.  The remainder may be modified
+// if they preserve the value of the Value (e.g. changing a (mul 2 x) to an (add x x)).
+type Value struct {
+	// A unique identifier for the value.  For performance we allocate these IDs
+	// densely starting at 0.  There is no guarantee that there won't be occasional holes, though.
+	ID ID
+
+	// The operation that computes this value.  See op.go.
+	Op Op
+
+	// The type of this value.  Normally this will be a Go type, but there
+	// are a few other pseudo-types, see type.go.
+	Type Type
+
+	// Auxiliary info for this value.  The type of this information depends on the opcode and type.
+	Aux interface{}
+
+	// Arguments of this value
+	Args []*Value
+
+	// Containing basic block
+	Block *Block
+
+	// Storage for the first two args
+	argstorage [2]*Value
+}
+
+// Examples:
+// Opcode          aux   args
+//  OpAdd          nil      2
+//  OpConst     string      0    string constant
+//  OpConst      int64      0    int64 constant
+//  OpAddcq      int64      1    amd64 op: v = arg[0] + constant
+
+// short form print.  Just v#.
+func (v *Value) String() string {
+	return fmt.Sprintf("v%d", v.ID)
+}
+
+// long form print.  v# = opcode <type> [aux] args [: reg]
+func (v *Value) LongString() string {
+	s := fmt.Sprintf("v%d = %s", v.ID, strings.TrimPrefix(v.Op.String(), "Op"))
+	s += " <" + v.Type.String() + ">"
+	if v.Aux != nil {
+		s += fmt.Sprintf(" [%v]", v.Aux)
+	}
+	for _, a := range v.Args {
+		s += fmt.Sprintf(" %v", a)
+	}
+	r := v.Block.Func.RegAlloc
+	if r != nil && r[v.ID] != nil {
+		s += " : " + r[v.ID].Name()
+	}
+	return s
+}
+
+func (v *Value) AddArg(w *Value) {
+	if v.Args == nil {
+		v.resetArgs() // use argstorage
+	}
+	v.Args = append(v.Args, w)
+}
+func (v *Value) AddArgs(a ...*Value) {
+	if v.Args == nil {
+		v.resetArgs() // use argstorage
+	}
+	v.Args = append(v.Args, a...)
+}
+func (v *Value) SetArg(i int, w *Value) {
+	v.Args[i] = w
+}
+func (v *Value) RemoveArg(i int) {
+	copy(v.Args[i:], v.Args[i+1:])
+	v.Args[len(v.Args)-1] = nil // aid GC
+	v.Args = v.Args[:len(v.Args)-1]
+}
+func (v *Value) SetArgs1(a *Value) {
+	v.resetArgs()
+	v.AddArg(a)
+}
+func (v *Value) SetArgs2(a *Value, b *Value) {
+	v.resetArgs()
+	v.AddArg(a)
+	v.AddArg(b)
+}
+
+func (v *Value) resetArgs() {
+	v.argstorage[0] = nil
+	v.argstorage[1] = nil
+	v.Args = v.argstorage[:0]
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 2840f71..7988129 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -35,6 +35,7 @@
 	"compile/internal/big",
 	"compile/internal/gc",
 	"compile/internal/ppc64",
+	"compile/internal/ssa",
 	"compile/internal/x86",
 	"internal/asm",
 	"internal/gcprog",
diff --git a/src/cmd/internal/obj/x86/6.out.go b/src/cmd/internal/obj/x86/6.out.go
index c7f46e1..e36cb9e 100644
--- a/src/cmd/internal/obj/x86/6.out.go
+++ b/src/cmd/internal/obj/x86/6.out.go
@@ -110,23 +110,23 @@
 	AINTO
 	AIRETL
 	AIRETW
-	AJCC
-	AJCS
+	AJCC // >= unsigned
+	AJCS // < unsigned
 	AJCXZL
-	AJEQ
-	AJGE
-	AJGT
-	AJHI
-	AJLE
-	AJLS
-	AJLT
-	AJMI
-	AJNE
-	AJOC
-	AJOS
-	AJPC
-	AJPL
-	AJPS
+	AJEQ // == (zero)
+	AJGE // >= signed
+	AJGT // > signed
+	AJHI // > unsigned
+	AJLE // <= signed
+	AJLS // <= unsigned
+	AJLT // < signed
+	AJMI // sign bit set (negative)
+	AJNE // != (nonzero)
+	AJOC // overflow clear
+	AJOS // overflow set
+	AJPC // parity clear
+	AJPL // sign bit clear (positive)
+	AJPS // parity set
 	ALAHF
 	ALARL
 	ALARW