src/cmd/compile/internal/ssa/gen/AMD64.rules - go - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // x86 register conventions:
 //  - Integer types live in the low portion of registers.
 //    Upper portions are correctly extended.
 //    TODO: reconsider?  The current choice means we need no extension for indexing,
 //    but we do need extension for e.g. 32-bit signed adds.
 //  - Boolean types use the low-order byte of a register.  Upper bytes are junk.
 //  - We do not use AH,BH,CH,DH registers.
 //  - Floating-point types will live in the low natural slot of an sse2 register.
 //    Unused portions are junk.

 // Lowering arithmetic
 (Add64 x y) -> (ADDQ x y)
 (Add64U x y) -> (ADDQ x y)
 (AddPtr x y) -> (ADDQ x y)
 (Add32U x y) -> (ADDL x y)
 (Add32 x y) -> (MOVLQSX (ADDL <v.Type> x y))
 (Add16U x y) -> (ADDW x y)
 (Add16 x y) -> (MOVWQSX (ADDW <v.Type> x y))
 (Add8U x y) -> (ADDB x y)
 (Add8 x y) -> (MOVBQSX (ADDB <v.Type> x y))

 (Sub64 x y) -> (SUBQ x y)
 (Sub64U x y) -> (SUBQ x y)
 (Sub32U x y) -> (SUBL x y)
 (Sub32 x y) -> (MOVLQSX (SUBL <v.Type> x y))
 (Sub16U x y) -> (SUBW x y)
 (Sub16 x y) -> (MOVWQSX (SUBW <v.Type> x y))
 (Sub8U x y) -> (SUBB x y)
 (Sub8 x y) -> (MOVBQSX (SUBB <v.Type> x y))

 (Neg64 x) -> (NEGQ x)
 (Neg64U x) -> (NEGQ x)
 (Neg32U x) -> (NEGL x)
 (Neg32 x) -> (MOVLQSX (NEGL <v.Type> x))
 (Neg16U x) -> (NEGW x)
 (Neg16 x) -> (MOVWQSX (NEGW <v.Type> x))
 (Neg8U x) -> (NEGB x)
 (Neg8 x) -> (MOVBQSX (NEGB <v.Type> x))

 (Mul64 x y) -> (MULQ x y)
 (Mul64U x y) -> (MULQ x y)
 (MulPtr x y) -> (MULQ x y)
 (Mul32 x y) -> (MOVLQSX (MULL <v.Type> x y))
 (Mul32U x y) -> (MULL x y)
 (Mul16 x y) -> (MOVWQSX (MULW <v.Type> x y))
 (Mul16U x y) -> (MULW x y)
 // Note: we use 16-bit multiply instructions for 8-bit multiplies because
 // the 16-bit multiply instructions are more forgiving (they operate on
 // any register instead of just AX/DX).
 (Mul8 x y) -> (MOVBQSX (MULW <TypeInt16> x y))
 (Mul8U x y) -> (MOVBQZX (MULW <TypeUInt16> x y))

 (MOVLstore ptr (MOVLQSX x) mem) -> (MOVLstore ptr x mem)
 (MOVWstore ptr (MOVWQSX x) mem) -> (MOVWstore ptr x mem)
 (MOVBstore ptr (MOVBQSX x) mem) -> (MOVBstore ptr x mem)
 (MOVLstore ptr (MOVLQZX x) mem) -> (MOVLstore ptr x mem)
 (MOVWstore ptr (MOVWQZX x) mem) -> (MOVWstore ptr x mem)
 (MOVBstore ptr (MOVBQZX x) mem) -> (MOVBstore ptr x mem)

 (Convert <t> x) && t.IsInteger() && x.Type.IsInteger() -> (Copy x)
 (ConvNop <t> x) && t == x.Type -> (Copy x)

 // Lowering shifts
 // Note: unsigned shifts need to return 0 if shift amount is >= 64.
 //   mask = shift >= 64 ? 0 : 0xffffffffffffffff
 //   result = mask & arg << shift
 (Lsh64 <t> x y) ->
 	(ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))
 (Rsh64U <t> x y) ->
 	(ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))

 // Note: signed right shift needs to return 0/-1 if shift amount is >= 64.
 //   if shift > 63 { shift = 63 }
 //   result = arg >> shift
 (Rsh64 <t> x y) ->
 	(SARQ <t> x (CMOVQCC <t>
 			(CMPQconst <TypeFlags> [64] y)
 			(Const <t> [63])
 			y))

 (Less64 x y) -> (SETL (CMPQ <TypeFlags> x y))
 (Leq64 x y) -> (SETLE (CMPQ <TypeFlags> x y))
 (Greater64 x y) -> (SETG (CMPQ <TypeFlags> x y))
 (Geq64 x y) -> (SETGE (CMPQ <TypeFlags> x y))

 (Eq64 x y) -> (SETEQ (CMPQ <TypeFlags> x y))
 (Eq32 x y) -> (SETEQ (CMPL <TypeFlags> x y))
 (Eq16 x y) -> (SETEQ (CMPW <TypeFlags> x y))
 (Eq8 x y) -> (SETEQ (CMPB <TypeFlags> x y))

 (Neq64 x y) -> (SETNE (CMPQ <TypeFlags> x y))
 (Neq32 x y) -> (SETNE (CMPL <TypeFlags> x y))
 (Neq16 x y) -> (SETNE (CMPW <TypeFlags> x y))
 (Neq8 x y) -> (SETNE (CMPB <TypeFlags> x y))

 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
 (Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
 (Store ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVQstore ptr val mem)
 (Store ptr val mem) && is32BitInt(val.Type) -> (MOVLstore ptr val mem)
 (Store ptr val mem) && is16BitInt(val.Type) -> (MOVWstore ptr val mem)
 (Store ptr val mem) && is8BitInt(val.Type) -> (MOVBstore ptr val mem)
 (Store ptr val mem) && val.Type.IsBoolean() -> (MOVBstore ptr val mem)

 // checks
 (IsNonNil p) -> (SETNE (TESTQ <TypeFlags> p p))
 (IsInBounds idx len) -> (SETB (CMPQ <TypeFlags> idx len))

 (Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size]) mem)

 (Not x) -> (XORQconst [1] x)

 (OffPtr [off] ptr) -> (ADDQconst [off] ptr)

 (Const <t> [val]) && t.IsInteger() -> (MOVQconst [val])
 (Const <t>) && t.IsPtr() -> (MOVQconst [0]) // nil is the only const pointer
 (Const <t>) && t.IsBoolean() && !v.Aux.(bool) -> (MOVQconst [0])
 (Const <t>) && t.IsBoolean() && v.Aux.(bool) -> (MOVQconst [1])

 (Addr {sym} base) -> (LEAQ {sym} base)

 // block rewrites
 (If (SETL cmp) yes no) -> (LT cmp yes no)
 (If (SETLE cmp) yes no) -> (LE cmp yes no)
 (If (SETG cmp) yes no) -> (GT cmp yes no)
 (If (SETGE cmp) yes no) -> (GE cmp yes no)
 (If (SETEQ cmp) yes no) -> (EQ cmp yes no)
 (If (SETNE cmp) yes no) -> (NE cmp yes no)
 (If (SETB cmp) yes no) -> (ULT cmp yes no)
 (If (SETBE cmp) yes no) -> (ULE cmp yes no)
 (If (SETA cmp) yes no) -> (UGT cmp yes no)
 (If (SETAE cmp) yes no) -> (UGE cmp yes no)
 (If cond yes no) && cond.Op == OpAMD64MOVBload -> (NE (TESTB <TypeFlags> cond cond) yes no)

 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)

 // Rules below here apply some simple optimizations after lowering.
 // TODO: Should this be a separate pass?

 // fold constants into instructions
 // TODO: restrict c to int32 range for all?
 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
 (ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x)
 (SUBQ x (MOVQconst [c])) -> (SUBQconst x [c])
 (SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBQconst <t> x [c]))
 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
 (MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x)
 (ANDQ x (MOVQconst [c])) -> (ANDQconst [c] x)
 (ANDQ (MOVQconst [c]) x) -> (ANDQconst [c] x)
 (SHLQ x (MOVQconst [c])) -> (SHLQconst [c] x)
 (SHRQ x (MOVQconst [c])) -> (SHRQconst [c] x)
 (SARQ x (MOVQconst [c])) -> (SARQconst [c] x)
 (CMPQ x (MOVQconst [c])) -> (CMPQconst x [c])
 (CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPQconst <TypeFlags> x [c]))

 // strength reduction
 (MULQconst [-1] x) -> (NEGQ x)
 (MULQconst [0] _) -> (MOVQconst [0])
 (MULQconst [1] x) -> (Copy x)
 (MULQconst [3] x) -> (LEAQ2 x x)
 (MULQconst [5] x) -> (LEAQ4 x x)
 (MULQconst [9] x) -> (LEAQ8 x x)
 (MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)

 // fold add/shift into leaq
 (ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
 (ADDQconst [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y)

 // reverse ordering of compare instruction
 (SETL (InvertFlags x)) -> (SETG x)
 (SETG (InvertFlags x)) -> (SETL x)

 // fold constants into memory operations
 // Note that this is not always a good idea because if not all the uses of
 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
 // Nevertheless, let's do it!
 (MOVQload [off1] (ADDQconst [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem)
 (MOVQstore [off1] (ADDQconst [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem)

 (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && (sym1 == nil || sym2 == nil) ->
 	  (MOVQload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem)
 (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && (sym1 == nil || sym2 == nil) ->
 	   (MOVQstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem)

 // indexed loads and stores
 (MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
 (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)

 (MOVQloadidx8 [off1] (ADDQconst [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
 (MOVQstoreidx8 [off1] (ADDQconst [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)

 (ADDQconst [0] x) -> (Copy x)

 // lower Zero instructions with word sizes
 (Zero [0] _ mem) -> (Copy mem)
 (Zero [1] destptr mem) -> (MOVBstore destptr (Const <TypeInt8> [0]) mem)
 (Zero [2] destptr mem) -> (MOVWstore destptr (Const <TypeInt16> [0]) mem)
 (Zero [4] destptr mem) -> (MOVLstore destptr (Const <TypeInt32> [0]) mem)
 (Zero [8] destptr mem) -> (MOVQstore destptr (Const <TypeInt64> [0]) mem)

 // rewrite anything less than 4 words into a series of MOV[BWLQ] $0, ptr(off) instructions
 (Zero [size] destptr mem) && size < 4*8 -> (MOVXzero [size] destptr mem)
 // Use STOSQ to zero memory. Rewrite this into storing the words with REPSTOSQ and then filling in the remainder with linear moves
 (Zero [size] destptr mem) && size >= 4*8 -> (Zero [size%8] (OffPtr <TypeUInt64> [size-(size%8)] destptr) (REPSTOSQ  <TypeMem> destptr (Const <TypeUInt64> [size/8]) mem))

 // Absorb InvertFlags into branches.
 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)

 // get rid of >=64 code for constant shifts
 (SBBQcarrymask (CMPQconst [c] (MOVQconst [d]))) && inBounds(d, c) -> (Const [-1])
 (SBBQcarrymask (CMPQconst [c] (MOVQconst [d]))) && !inBounds(d, c) -> (Const [0])
 (ANDQconst [0] _) -> (MOVQconst [0])
 (ANDQconst [-1] x) -> (Copy x)
 (CMOVQCC (CMPQconst [c] (MOVQconst [d])) _ x) && inBounds(d, c) -> (Copy x)
 (CMOVQCC (CMPQconst [c] (MOVQconst [d])) x _) && !inBounds(d, c) -> (Copy x)
	// Copyright 2015 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// x86 register conventions:
	// - Integer types live in the low portion of registers.
	// Upper portions are correctly extended.
	// TODO: reconsider? The current choice means we need no extension for indexing,
	// but we do need extension for e.g. 32-bit signed adds.
	// - Boolean types use the low-order byte of a register. Upper bytes are junk.
	// - We do not use AH,BH,CH,DH registers.
	// - Floating-point types will live in the low natural slot of an sse2 register.
	// Unused portions are junk.

	// Lowering arithmetic
	(Add64 x y) -> (ADDQ x y)
	(Add64U x y) -> (ADDQ x y)
	(AddPtr x y) -> (ADDQ x y)
	(Add32U x y) -> (ADDL x y)
	(Add32 x y) -> (MOVLQSX (ADDL <v.Type> x y))
	(Add16U x y) -> (ADDW x y)
	(Add16 x y) -> (MOVWQSX (ADDW <v.Type> x y))
	(Add8U x y) -> (ADDB x y)
	(Add8 x y) -> (MOVBQSX (ADDB <v.Type> x y))

	(Sub64 x y) -> (SUBQ x y)
	(Sub64U x y) -> (SUBQ x y)
	(Sub32U x y) -> (SUBL x y)
	(Sub32 x y) -> (MOVLQSX (SUBL <v.Type> x y))
	(Sub16U x y) -> (SUBW x y)
	(Sub16 x y) -> (MOVWQSX (SUBW <v.Type> x y))
	(Sub8U x y) -> (SUBB x y)
	(Sub8 x y) -> (MOVBQSX (SUBB <v.Type> x y))

	(Neg64 x) -> (NEGQ x)
	(Neg64U x) -> (NEGQ x)
	(Neg32U x) -> (NEGL x)
	(Neg32 x) -> (MOVLQSX (NEGL <v.Type> x))
	(Neg16U x) -> (NEGW x)
	(Neg16 x) -> (MOVWQSX (NEGW <v.Type> x))
	(Neg8U x) -> (NEGB x)
	(Neg8 x) -> (MOVBQSX (NEGB <v.Type> x))

	(Mul64 x y) -> (MULQ x y)
	(Mul64U x y) -> (MULQ x y)
	(MulPtr x y) -> (MULQ x y)
	(Mul32 x y) -> (MOVLQSX (MULL <v.Type> x y))
	(Mul32U x y) -> (MULL x y)
	(Mul16 x y) -> (MOVWQSX (MULW <v.Type> x y))
	(Mul16U x y) -> (MULW x y)
	// Note: we use 16-bit multiply instructions for 8-bit multiplies because
	// the 16-bit multiply instructions are more forgiving (they operate on
	// any register instead of just AX/DX).
	(Mul8 x y) -> (MOVBQSX (MULW <TypeInt16> x y))
	(Mul8U x y) -> (MOVBQZX (MULW <TypeUInt16> x y))

	(MOVLstore ptr (MOVLQSX x) mem) -> (MOVLstore ptr x mem)
	(MOVWstore ptr (MOVWQSX x) mem) -> (MOVWstore ptr x mem)
	(MOVBstore ptr (MOVBQSX x) mem) -> (MOVBstore ptr x mem)
	(MOVLstore ptr (MOVLQZX x) mem) -> (MOVLstore ptr x mem)
	(MOVWstore ptr (MOVWQZX x) mem) -> (MOVWstore ptr x mem)
	(MOVBstore ptr (MOVBQZX x) mem) -> (MOVBstore ptr x mem)

	(Convert <t> x) && t.IsInteger() && x.Type.IsInteger() -> (Copy x)
	(ConvNop <t> x) && t == x.Type -> (Copy x)

	// Lowering shifts
	// Note: unsigned shifts need to return 0 if shift amount is >= 64.
	// mask = shift >= 64 ? 0 : 0xffffffffffffffff
	// result = mask & arg << shift
	(Lsh64 <t> x y) ->
	(ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))
	(Rsh64U <t> x y) ->
	(ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst <TypeFlags> [64] y)))

	// Note: signed right shift needs to return 0/-1 if shift amount is >= 64.
	// if shift > 63 { shift = 63 }
	// result = arg >> shift
	(Rsh64 <t> x y) ->
	(SARQ <t> x (CMOVQCC <t>
	(CMPQconst <TypeFlags> [64] y)
	(Const <t> [63])
	y))

	(Less64 x y) -> (SETL (CMPQ <TypeFlags> x y))
	(Leq64 x y) -> (SETLE (CMPQ <TypeFlags> x y))
	(Greater64 x y) -> (SETG (CMPQ <TypeFlags> x y))
	(Geq64 x y) -> (SETGE (CMPQ <TypeFlags> x y))

	(Eq64 x y) -> (SETEQ (CMPQ <TypeFlags> x y))
	(Eq32 x y) -> (SETEQ (CMPL <TypeFlags> x y))
	(Eq16 x y) -> (SETEQ (CMPW <TypeFlags> x y))
	(Eq8 x y) -> (SETEQ (CMPB <TypeFlags> x y))

	(Neq64 x y) -> (SETNE (CMPQ <TypeFlags> x y))
	(Neq32 x y) -> (SETNE (CMPL <TypeFlags> x y))
	(Neq16 x y) -> (SETNE (CMPW <TypeFlags> x y))
	(Neq8 x y) -> (SETNE (CMPB <TypeFlags> x y))

	(Load <t> ptr mem) && (is64BitInt(t) \|\| isPtr(t)) -> (MOVQload ptr mem)
	(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
	(Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
	(Load <t> ptr mem) && (t.IsBoolean() \|\| is8BitInt(t)) -> (MOVBload ptr mem)
	(Store ptr val mem) && (is64BitInt(val.Type) \|\| isPtr(val.Type)) -> (MOVQstore ptr val mem)
	(Store ptr val mem) && is32BitInt(val.Type) -> (MOVLstore ptr val mem)
	(Store ptr val mem) && is16BitInt(val.Type) -> (MOVWstore ptr val mem)
	(Store ptr val mem) && is8BitInt(val.Type) -> (MOVBstore ptr val mem)
	(Store ptr val mem) && val.Type.IsBoolean() -> (MOVBstore ptr val mem)

	// checks
	(IsNonNil p) -> (SETNE (TESTQ <TypeFlags> p p))
	(IsInBounds idx len) -> (SETB (CMPQ <TypeFlags> idx len))

	(Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size]) mem)

	(Not x) -> (XORQconst [1] x)

	(OffPtr [off] ptr) -> (ADDQconst [off] ptr)

	(Const <t> [val]) && t.IsInteger() -> (MOVQconst [val])
	(Const <t>) && t.IsPtr() -> (MOVQconst [0]) // nil is the only const pointer
	(Const <t>) && t.IsBoolean() && !v.Aux.(bool) -> (MOVQconst [0])
	(Const <t>) && t.IsBoolean() && v.Aux.(bool) -> (MOVQconst [1])

	(Addr {sym} base) -> (LEAQ {sym} base)

	// block rewrites
	(If (SETL cmp) yes no) -> (LT cmp yes no)
	(If (SETLE cmp) yes no) -> (LE cmp yes no)
	(If (SETG cmp) yes no) -> (GT cmp yes no)
	(If (SETGE cmp) yes no) -> (GE cmp yes no)
	(If (SETEQ cmp) yes no) -> (EQ cmp yes no)
	(If (SETNE cmp) yes no) -> (NE cmp yes no)
	(If (SETB cmp) yes no) -> (ULT cmp yes no)
	(If (SETBE cmp) yes no) -> (ULE cmp yes no)
	(If (SETA cmp) yes no) -> (UGT cmp yes no)
	(If (SETAE cmp) yes no) -> (UGE cmp yes no)
	(If cond yes no) && cond.Op == OpAMD64MOVBload -> (NE (TESTB <TypeFlags> cond cond) yes no)

	(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
	(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)

	// Rules below here apply some simple optimizations after lowering.
	// TODO: Should this be a separate pass?

	// fold constants into instructions
	// TODO: restrict c to int32 range for all?
	(ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
	(ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x)
	(SUBQ x (MOVQconst [c])) -> (SUBQconst x [c])
	(SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBQconst <t> x [c]))
	(MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
	(MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x)
	(ANDQ x (MOVQconst [c])) -> (ANDQconst [c] x)
	(ANDQ (MOVQconst [c]) x) -> (ANDQconst [c] x)
	(SHLQ x (MOVQconst [c])) -> (SHLQconst [c] x)
	(SHRQ x (MOVQconst [c])) -> (SHRQconst [c] x)
	(SARQ x (MOVQconst [c])) -> (SARQconst [c] x)
	(CMPQ x (MOVQconst [c])) -> (CMPQconst x [c])
	(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPQconst <TypeFlags> x [c]))

	// strength reduction
	(MULQconst [-1] x) -> (NEGQ x)
	(MULQconst [0] _) -> (MOVQconst [0])
	(MULQconst [1] x) -> (Copy x)
	(MULQconst [3] x) -> (LEAQ2 x x)
	(MULQconst [5] x) -> (LEAQ4 x x)
	(MULQconst [9] x) -> (LEAQ8 x x)
	(MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)

	// fold add/shift into leaq
	(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
	(ADDQconst [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y)

	// reverse ordering of compare instruction
	(SETL (InvertFlags x)) -> (SETG x)
	(SETG (InvertFlags x)) -> (SETL x)

	// fold constants into memory operations
	// Note that this is not always a good idea because if not all the uses of
	// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
	// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
	// Nevertheless, let's do it!
	(MOVQload [off1] (ADDQconst [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem)
	(MOVQstore [off1] (ADDQconst [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem)

	(MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && (sym1 == nil \|\| sym2 == nil) ->
	(MOVQload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem)
	(MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && (sym1 == nil \|\| sym2 == nil) ->
	(MOVQstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem)

	// indexed loads and stores
	(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
	(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)

	(MOVQloadidx8 [off1] (ADDQconst [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
	(MOVQstoreidx8 [off1] (ADDQconst [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)

	(ADDQconst [0] x) -> (Copy x)

	// lower Zero instructions with word sizes
	(Zero [0] _ mem) -> (Copy mem)
	(Zero [1] destptr mem) -> (MOVBstore destptr (Const <TypeInt8> [0]) mem)
	(Zero [2] destptr mem) -> (MOVWstore destptr (Const <TypeInt16> [0]) mem)
	(Zero [4] destptr mem) -> (MOVLstore destptr (Const <TypeInt32> [0]) mem)
	(Zero [8] destptr mem) -> (MOVQstore destptr (Const <TypeInt64> [0]) mem)

	// rewrite anything less than 4 words into a series of MOV[BWLQ] $0, ptr(off) instructions
	(Zero [size] destptr mem) && size < 4*8 -> (MOVXzero [size] destptr mem)
	// Use STOSQ to zero memory. Rewrite this into storing the words with REPSTOSQ and then filling in the remainder with linear moves
	(Zero [size] destptr mem) && size >= 4*8 -> (Zero [size%8] (OffPtr <TypeUInt64> [size-(size%8)] destptr) (REPSTOSQ <TypeMem> destptr (Const <TypeUInt64> [size/8]) mem))

	// Absorb InvertFlags into branches.
	(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
	(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
	(LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
	(GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
	(ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
	(UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
	(ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
	(UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
	(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
	(NE (InvertFlags cmp) yes no) -> (NE cmp yes no)

	// get rid of >=64 code for constant shifts
	(SBBQcarrymask (CMPQconst [c] (MOVQconst [d]))) && inBounds(d, c) -> (Const [-1])
	(SBBQcarrymask (CMPQconst [c] (MOVQconst [d]))) && !inBounds(d, c) -> (Const [0])
	(ANDQconst [0] _) -> (MOVQconst [0])
	(ANDQconst [-1] x) -> (Copy x)
	(CMOVQCC (CMPQconst [c] (MOVQconst [d])) _ x) && inBounds(d, c) -> (Copy x)
	(CMOVQCC (CMPQconst [c] (MOVQconst [d])) x _) && !inBounds(d, c) -> (Copy x)