[dev.ssa] cmd/compile: better copying
Replace REP MOVSB with all the copying techniques used by the
old compiler. Copy in chunks, DUFFCOPY, etc.
Introduces MOVO opcodes and an Int128 type to move around
16 bytes at a time.
Change-Id: I1e73e68ca1d8b3dd58bb4af2f4c9e5d9bf13a502
Reviewed-on: https://go-review.googlesource.com/16174
Reviewed-by: Todd Neal <todd@tneal.org>
Run-TryBot: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 50fc935..96c9a5f 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3697,7 +3697,7 @@
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG
p.To.Reg = x
- case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload:
+ case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVBQZXload, ssa.OpAMD64MOVOload:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = regnum(v.Args[0])
@@ -3722,7 +3722,7 @@
p.From.Index = regnum(v.Args[1])
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
- case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore:
+ case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[1])
@@ -3763,6 +3763,11 @@
}
r := regnum(v)
opregreg(x86.AXORPS, r, r)
+ case ssa.OpAMD64DUFFCOPY:
+ p := Prog(obj.ADUFFCOPY)
+ p.To.Type = obj.TYPE_ADDR
+ p.To.Sym = Linksym(Pkglookup("duffcopy", Runtimepkg))
+ p.To.Offset = v.AuxInt
case ssa.OpCopy: // TODO: lower to MOVQ earlier?
if v.Type.IsMemory() {
@@ -3961,9 +3966,9 @@
case ssa.OpAMD64REPSTOSQ:
Prog(x86.AREP)
Prog(x86.ASTOSQ)
- case ssa.OpAMD64REPMOVSB:
+ case ssa.OpAMD64REPMOVSQ:
Prog(x86.AREP)
- Prog(x86.AMOVSB)
+ Prog(x86.AMOVSQ)
case ssa.OpVarDef:
Gvardef(v.Aux.(*Node))
case ssa.OpVarKill: