[dev.ssa] cmd/compile: clean up zeroing.  Use duffzero when appropriate.

Change-Id: I4deb03340e87f43179d5e22bf81843c17b5581fc
Reviewed-on: https://go-review.googlesource.com/14756
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index b9da5ed..51cf01a 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -3613,22 +3613,12 @@
 		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
 		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
 		opregreg(v.Op.Asm(), regnum(v), regnum(v.Args[0]))
-	case ssa.OpAMD64MOVXzero:
-		nb := v.AuxInt
-		offset := int64(0)
-		reg := regnum(v.Args[0])
-		for nb >= 8 {
-			nb, offset = movZero(x86.AMOVQ, 8, nb, offset, reg)
-		}
-		for nb >= 4 {
-			nb, offset = movZero(x86.AMOVL, 4, nb, offset, reg)
-		}
-		for nb >= 2 {
-			nb, offset = movZero(x86.AMOVW, 2, nb, offset, reg)
-		}
-		for nb >= 1 {
-			nb, offset = movZero(x86.AMOVB, 1, nb, offset, reg)
-		}
+	case ssa.OpAMD64DUFFZERO:
+		p := Prog(obj.ADUFFZERO)
+		p.To.Type = obj.TYPE_ADDR
+		p.To.Sym = Linksym(Pkglookup("duffzero", Runtimepkg))
+		p.To.Offset = v.AuxInt
+
 	case ssa.OpCopy: // TODO: lower to MOVQ earlier?
 		if v.Type.IsMemory() {
 			return
@@ -3830,11 +3820,6 @@
 	case ssa.OpAMD64InvertFlags:
 		v.Fatalf("InvertFlags should never make it to codegen %v", v)
 	case ssa.OpAMD64REPSTOSQ:
-		p := Prog(x86.AXORL) // TODO: lift out zeroing into its own instruction?
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = x86.REG_AX
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = x86.REG_AX
 		Prog(x86.AREP)
 		Prog(x86.ASTOSQ)
 	case ssa.OpAMD64REPMOVSB:
diff --git a/src/cmd/compile/internal/gc/ssa_test.go b/src/cmd/compile/internal/gc/ssa_test.go
index b63749f..dafbcf2 100644
--- a/src/cmd/compile/internal/gc/ssa_test.go
+++ b/src/cmd/compile/internal/gc/ssa_test.go
@@ -87,3 +87,5 @@
 func TestArray(t *testing.T) { runTest(t, "array_ssa.go") }
 
 func TestAppend(t *testing.T) { runTest(t, "append_ssa.go") }
+
+func TestZero(t *testing.T) { runTest(t, "zero_ssa.go") }
diff --git a/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go b/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
new file mode 100644
index 0000000..90e8029
--- /dev/null
+++ b/src/cmd/compile/internal/gc/testdata/gen/zeroGen.go
@@ -0,0 +1,88 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+)
+
+// This program generates tests to verify that zeroing operations
+// zero the data they are supposed to and clobber no adjacent values.
+
+// run as `go run zeroGen.go`.  A file called zero_ssa.go
+// will be written into the parent directory containing the tests.
+
+var sizes = [...]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 23, 24, 25, 31, 32, 33, 63, 64, 65, 1023, 1024, 1025}
+
+func main() {
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "// run\n")
+	fmt.Fprintf(w, "// autogenerated from gen/zeroGen.go - do not edit!\n")
+	fmt.Fprintf(w, "package main\n")
+	fmt.Fprintf(w, "import \"fmt\"\n")
+
+	for _, s := range sizes {
+		// type for test
+		fmt.Fprintf(w, "type T%d struct {\n", s)
+		fmt.Fprintf(w, "  pre [8]byte\n")
+		fmt.Fprintf(w, "  mid [%d]byte\n", s)
+		fmt.Fprintf(w, "  post [8]byte\n")
+		fmt.Fprintf(w, "}\n")
+
+		// function being tested
+		fmt.Fprintf(w, "func zero%d_ssa(x *[%d]byte) {\n", s, s)
+		fmt.Fprintf(w, "  switch{}\n")
+		fmt.Fprintf(w, "  *x = [%d]byte{}\n", s)
+		fmt.Fprintf(w, "}\n")
+
+		// testing harness
+		fmt.Fprintf(w, "func testZero%d() {\n", s)
+		fmt.Fprintf(w, "  a := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "255,")
+		}
+		fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
+		fmt.Fprintf(w, "  zero%d_ssa(&a.mid)\n", s)
+		fmt.Fprintf(w, "  want := T%d{[8]byte{255,255,255,255,255,255,255,255},[%d]byte{", s, s)
+		for i := 0; i < s; i++ {
+			fmt.Fprintf(w, "0,")
+		}
+		fmt.Fprintf(w, "},[8]byte{255,255,255,255,255,255,255,255}}\n")
+		fmt.Fprintf(w, "  if a != want {\n")
+		fmt.Fprintf(w, "    fmt.Printf(\"zero%d got=%%v, want %%v\\n\", a, want)\n", s)
+		fmt.Fprintf(w, "    failed=true\n")
+		fmt.Fprintf(w, "  }\n")
+		fmt.Fprintf(w, "}\n")
+	}
+
+	// boilerplate at end
+	fmt.Fprintf(w, "var failed bool\n")
+	fmt.Fprintf(w, "func main() {\n")
+	for _, s := range sizes {
+		fmt.Fprintf(w, "  testZero%d()\n", s)
+	}
+	fmt.Fprintf(w, "  if failed {\n")
+	fmt.Fprintf(w, "    panic(\"failed\")\n")
+	fmt.Fprintf(w, "  }\n")
+	fmt.Fprintf(w, "}\n")
+
+	// gofmt result
+	b := w.Bytes()
+	src, err := format.Source(b)
+	if err != nil {
+		fmt.Printf("%s\n", b)
+		panic(err)
+	}
+
+	// write to file
+	err = ioutil.WriteFile("../zero_ssa.go", src, 0666)
+	if err != nil {
+		log.Fatalf("can't write output: %v\n", err)
+	}
+}
diff --git a/src/cmd/compile/internal/gc/testdata/zero_ssa.go b/src/cmd/compile/internal/gc/testdata/zero_ssa.go
new file mode 100644
index 0000000..0ec883b
--- /dev/null
+++ b/src/cmd/compile/internal/gc/testdata/zero_ssa.go
@@ -0,0 +1,563 @@
+// run
+// autogenerated from gen/zeroGen.go - do not edit!
+package main
+
+import "fmt"
+
+type T1 struct {
+	pre  [8]byte
+	mid  [1]byte
+	post [8]byte
+}
+
+func zero1_ssa(x *[1]byte) {
+	switch {
+	}
+	*x = [1]byte{}
+}
+func testZero1() {
+	a := T1{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1]byte{255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero1_ssa(&a.mid)
+	want := T1{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1]byte{0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero1 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T2 struct {
+	pre  [8]byte
+	mid  [2]byte
+	post [8]byte
+}
+
+func zero2_ssa(x *[2]byte) {
+	switch {
+	}
+	*x = [2]byte{}
+}
+func testZero2() {
+	a := T2{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [2]byte{255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero2_ssa(&a.mid)
+	want := T2{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [2]byte{0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero2 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T3 struct {
+	pre  [8]byte
+	mid  [3]byte
+	post [8]byte
+}
+
+func zero3_ssa(x *[3]byte) {
+	switch {
+	}
+	*x = [3]byte{}
+}
+func testZero3() {
+	a := T3{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [3]byte{255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero3_ssa(&a.mid)
+	want := T3{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [3]byte{0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero3 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T4 struct {
+	pre  [8]byte
+	mid  [4]byte
+	post [8]byte
+}
+
+func zero4_ssa(x *[4]byte) {
+	switch {
+	}
+	*x = [4]byte{}
+}
+func testZero4() {
+	a := T4{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [4]byte{255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero4_ssa(&a.mid)
+	want := T4{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [4]byte{0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero4 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T5 struct {
+	pre  [8]byte
+	mid  [5]byte
+	post [8]byte
+}
+
+func zero5_ssa(x *[5]byte) {
+	switch {
+	}
+	*x = [5]byte{}
+}
+func testZero5() {
+	a := T5{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [5]byte{255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero5_ssa(&a.mid)
+	want := T5{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [5]byte{0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero5 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T6 struct {
+	pre  [8]byte
+	mid  [6]byte
+	post [8]byte
+}
+
+func zero6_ssa(x *[6]byte) {
+	switch {
+	}
+	*x = [6]byte{}
+}
+func testZero6() {
+	a := T6{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [6]byte{255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero6_ssa(&a.mid)
+	want := T6{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [6]byte{0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero6 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T7 struct {
+	pre  [8]byte
+	mid  [7]byte
+	post [8]byte
+}
+
+func zero7_ssa(x *[7]byte) {
+	switch {
+	}
+	*x = [7]byte{}
+}
+func testZero7() {
+	a := T7{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [7]byte{255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero7_ssa(&a.mid)
+	want := T7{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [7]byte{0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero7 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T8 struct {
+	pre  [8]byte
+	mid  [8]byte
+	post [8]byte
+}
+
+func zero8_ssa(x *[8]byte) {
+	switch {
+	}
+	*x = [8]byte{}
+}
+func testZero8() {
+	a := T8{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero8_ssa(&a.mid)
+	want := T8{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero8 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T9 struct {
+	pre  [8]byte
+	mid  [9]byte
+	post [8]byte
+}
+
+func zero9_ssa(x *[9]byte) {
+	switch {
+	}
+	*x = [9]byte{}
+}
+func testZero9() {
+	a := T9{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [9]byte{255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero9_ssa(&a.mid)
+	want := T9{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [9]byte{0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero9 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T10 struct {
+	pre  [8]byte
+	mid  [10]byte
+	post [8]byte
+}
+
+func zero10_ssa(x *[10]byte) {
+	switch {
+	}
+	*x = [10]byte{}
+}
+func testZero10() {
+	a := T10{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [10]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero10_ssa(&a.mid)
+	want := T10{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [10]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero10 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T15 struct {
+	pre  [8]byte
+	mid  [15]byte
+	post [8]byte
+}
+
+func zero15_ssa(x *[15]byte) {
+	switch {
+	}
+	*x = [15]byte{}
+}
+func testZero15() {
+	a := T15{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [15]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero15_ssa(&a.mid)
+	want := T15{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [15]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero15 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T16 struct {
+	pre  [8]byte
+	mid  [16]byte
+	post [8]byte
+}
+
+func zero16_ssa(x *[16]byte) {
+	switch {
+	}
+	*x = [16]byte{}
+}
+func testZero16() {
+	a := T16{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [16]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero16_ssa(&a.mid)
+	want := T16{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero16 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T17 struct {
+	pre  [8]byte
+	mid  [17]byte
+	post [8]byte
+}
+
+func zero17_ssa(x *[17]byte) {
+	switch {
+	}
+	*x = [17]byte{}
+}
+func testZero17() {
+	a := T17{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [17]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero17_ssa(&a.mid)
+	want := T17{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [17]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero17 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T23 struct {
+	pre  [8]byte
+	mid  [23]byte
+	post [8]byte
+}
+
+func zero23_ssa(x *[23]byte) {
+	switch {
+	}
+	*x = [23]byte{}
+}
+func testZero23() {
+	a := T23{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [23]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero23_ssa(&a.mid)
+	want := T23{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [23]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero23 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T24 struct {
+	pre  [8]byte
+	mid  [24]byte
+	post [8]byte
+}
+
+func zero24_ssa(x *[24]byte) {
+	switch {
+	}
+	*x = [24]byte{}
+}
+func testZero24() {
+	a := T24{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [24]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero24_ssa(&a.mid)
+	want := T24{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [24]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero24 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T25 struct {
+	pre  [8]byte
+	mid  [25]byte
+	post [8]byte
+}
+
+func zero25_ssa(x *[25]byte) {
+	switch {
+	}
+	*x = [25]byte{}
+}
+func testZero25() {
+	a := T25{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [25]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero25_ssa(&a.mid)
+	want := T25{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [25]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero25 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T31 struct {
+	pre  [8]byte
+	mid  [31]byte
+	post [8]byte
+}
+
+func zero31_ssa(x *[31]byte) {
+	switch {
+	}
+	*x = [31]byte{}
+}
+func testZero31() {
+	a := T31{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [31]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero31_ssa(&a.mid)
+	want := T31{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [31]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero31 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T32 struct {
+	pre  [8]byte
+	mid  [32]byte
+	post [8]byte
+}
+
+func zero32_ssa(x *[32]byte) {
+	switch {
+	}
+	*x = [32]byte{}
+}
+func testZero32() {
+	a := T32{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [32]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero32_ssa(&a.mid)
+	want := T32{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [32]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero32 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T33 struct {
+	pre  [8]byte
+	mid  [33]byte
+	post [8]byte
+}
+
+func zero33_ssa(x *[33]byte) {
+	switch {
+	}
+	*x = [33]byte{}
+}
+func testZero33() {
+	a := T33{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [33]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero33_ssa(&a.mid)
+	want := T33{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [33]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero33 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T63 struct {
+	pre  [8]byte
+	mid  [63]byte
+	post [8]byte
+}
+
+func zero63_ssa(x *[63]byte) {
+	switch {
+	}
+	*x = [63]byte{}
+}
+func testZero63() {
+	a := T63{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [63]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero63_ssa(&a.mid)
+	want := T63{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [63]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero63 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T64 struct {
+	pre  [8]byte
+	mid  [64]byte
+	post [8]byte
+}
+
+func zero64_ssa(x *[64]byte) {
+	switch {
+	}
+	*x = [64]byte{}
+}
+func testZero64() {
+	a := T64{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [64]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero64_ssa(&a.mid)
+	want := T64{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [64]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero64 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T65 struct {
+	pre  [8]byte
+	mid  [65]byte
+	post [8]byte
+}
+
+func zero65_ssa(x *[65]byte) {
+	switch {
+	}
+	*x = [65]byte{}
+}
+func testZero65() {
+	a := T65{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [65]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero65_ssa(&a.mid)
+	want := T65{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [65]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero65 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T1023 struct {
+	pre  [8]byte
+	mid  [1023]byte
+	post [8]byte
+}
+
+func zero1023_ssa(x *[1023]byte) {
+	switch {
+	}
+	*x = [1023]byte{}
+}
+func testZero1023() {
+	a := T1023{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1023]byte{}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero1023_ssa(&a.mid)
+	want := T1023{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1023]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero1023 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T1024 struct {
+	pre  [8]byte
+	mid  [1024]byte
+	post [8]byte
+}
+
+func zero1024_ssa(x *[1024]byte) {
+	switch {
+	}
+	*x = [1024]byte{}
+}
+func testZero1024() {
+	a := T1024{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1024]byte{}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero1024_ssa(&a.mid)
+	want := T1024{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1024]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero1024 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+type T1025 struct {
+	pre  [8]byte
+	mid  [1025]byte
+	post [8]byte
+}
+
+func zero1025_ssa(x *[1025]byte) {
+	switch {
+	}
+	*x = [1025]byte{}
+}
+func testZero1025() {
+	a := T1025{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1025]byte{}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	zero1025_ssa(&a.mid)
+	want := T1025{[8]byte{255, 255, 255, 255, 255, 255, 255, 255}, [1025]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, [8]byte{255, 255, 255, 255, 255, 255, 255, 255}}
+	if a != want {
+		fmt.Printf("zero1025 got=%v, want %v\n", a, want)
+		failed = true
+	}
+}
+
+var failed bool
+
+func main() {
+	testZero1()
+	testZero2()
+	testZero3()
+	testZero4()
+	testZero5()
+	testZero6()
+	testZero7()
+	testZero8()
+	testZero9()
+	testZero10()
+	testZero15()
+	testZero16()
+	testZero17()
+	testZero23()
+	testZero24()
+	testZero25()
+	testZero31()
+	testZero32()
+	testZero33()
+	testZero63()
+	testZero64()
+	testZero65()
+	testZero1023()
+	testZero1024()
+	testZero1025()
+	if failed {
+		panic("failed")
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 5b83c97..3d308d7 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -565,15 +565,50 @@
 
 // lower Zero instructions with word sizes
 (Zero [0] _ mem) -> mem
-(Zero [1] destptr mem) -> (MOVBstore destptr (MOVBconst <config.Frontend().TypeInt8()> [0]) mem)
-(Zero [2] destptr mem) -> (MOVWstore destptr (MOVWconst <config.Frontend().TypeInt16()> [0]) mem)
-(Zero [4] destptr mem) -> (MOVLstore destptr (MOVLconst <config.Frontend().TypeInt32()> [0]) mem)
-(Zero [8] destptr mem) -> (MOVQstore destptr (MOVQconst <config.Frontend().TypeInt64()> [0]) mem)
+(Zero [1] destptr mem) -> (MOVBstore destptr (MOVBconst [0]) mem)
+(Zero [2] destptr mem) -> (MOVWstore destptr (MOVWconst [0]) mem)
+(Zero [4] destptr mem) -> (MOVLstore destptr (MOVLconst [0]) mem)
+(Zero [8] destptr mem) -> (MOVQstore destptr (MOVQconst [0]) mem)
 
-// rewrite anything less than 4 words into a series of MOV[BWLQ] $0, ptr(off) instructions
-(Zero [size] destptr mem) && size < 4*8 -> (MOVXzero [size] destptr mem)
-// Use STOSQ to zero memory. Rewrite this into storing the words with REPSTOSQ and then filling in the remainder with linear moves
-(Zero [size] destptr mem) && size >= 4*8 -> (Zero [size%8] (OffPtr <config.Frontend().TypeUInt64()> [size-(size%8)] destptr) (REPSTOSQ  <TypeMem> destptr (MOVQconst <config.Frontend().TypeUInt64()> [size/8]) mem))
+(Zero [3] destptr mem) ->
+	(MOVBstore (ADDQconst [2] destptr) (MOVBconst [0])
+		(MOVWstore destptr (MOVWconst [0]) mem))
+(Zero [5] destptr mem) ->
+	(MOVBstore (ADDQconst [4] destptr) (MOVBconst [0])
+		(MOVLstore destptr (MOVLconst [0]) mem))
+(Zero [6] destptr mem) ->
+	(MOVWstore (ADDQconst [4] destptr) (MOVWconst [0])
+		(MOVLstore destptr (MOVLconst [0]) mem))
+(Zero [7] destptr mem) ->
+	(MOVLstore (ADDQconst [3] destptr) (MOVLconst [0])
+		(MOVLstore destptr (MOVLconst [0]) mem))
+
+// Strip off any fractional word zeroing.
+(Zero [size] destptr mem) && size%8 != 0 && size > 8 ->
+	(Zero [size-size%8] (ADDQconst destptr [size%8])
+		(MOVQstore destptr (MOVQconst [0]) mem))
+
+// Zero small numbers of words directly.
+(Zero [16] destptr mem) ->
+	(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
+		(MOVQstore destptr (MOVQconst [0]) mem))
+(Zero [24] destptr mem) ->
+	(MOVQstore (ADDQconst [16] destptr) (MOVQconst [0])
+		(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
+			(MOVQstore destptr (MOVQconst [0]) mem)))
+(Zero [32] destptr mem) ->
+	(MOVQstore (ADDQconst [24] destptr) (MOVQconst [0])
+		(MOVQstore (ADDQconst [16] destptr) (MOVQconst [0])
+			(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0])
+				(MOVQstore destptr (MOVQconst [0]) mem))))
+
+// Medium zeroing uses a duff device.
+(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 ->
+	(DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVQconst [0]) mem)
+
+// Large zeroing uses REP STOSQ.
+(Zero [size] destptr mem) && size > 1024 && size%8 == 0 ->
+	(REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem)
 
 // Absorb InvertFlags into branches.
 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 79d45e9..6f318d3 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -117,9 +117,8 @@
 		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
 		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
 
-		gpstore      = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
-		gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
-		gpstoreidx   = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
+		gpstore    = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
+		gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
 
 		fp01    = regInfo{inputs: []regMask{}, outputs: fponly}
 		fp21    = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
@@ -167,14 +166,14 @@
 		{name: "MOVSDstoreidx8", reg: fpstoreidx, asm: "MOVSD"}, // fp64 indexed by 8i store
 
 		// binary ops
-		{name: "ADDQ", reg: gp21, asm: "ADDQ"},      // arg0 + arg1
-		{name: "ADDL", reg: gp21, asm: "ADDL"},      // arg0 + arg1
-		{name: "ADDW", reg: gp21, asm: "ADDW"},      // arg0 + arg1
-		{name: "ADDB", reg: gp21, asm: "ADDB"},      // arg0 + arg1
-		{name: "ADDQconst", reg: gp11, asm: "ADDQ"}, // arg0 + auxint
-		{name: "ADDLconst", reg: gp11, asm: "ADDL"}, // arg0 + auxint
-		{name: "ADDWconst", reg: gp11, asm: "ADDW"}, // arg0 + auxint
-		{name: "ADDBconst", reg: gp11, asm: "ADDB"}, // arg0 + auxint
+		{name: "ADDQ", reg: gp21, asm: "ADDQ"},                     // arg0 + arg1
+		{name: "ADDL", reg: gp21, asm: "ADDL"},                     // arg0 + arg1
+		{name: "ADDW", reg: gp21, asm: "ADDW"},                     // arg0 + arg1
+		{name: "ADDB", reg: gp21, asm: "ADDB"},                     // arg0 + arg1
+		{name: "ADDQconst", reg: gp11, asm: "ADDQ", typ: "UInt64"}, // arg0 + auxint
+		{name: "ADDLconst", reg: gp11, asm: "ADDL"},                // arg0 + auxint
+		{name: "ADDWconst", reg: gp11, asm: "ADDW"},                // arg0 + auxint
+		{name: "ADDBconst", reg: gp11, asm: "ADDB"},                // arg0 + auxint
 
 		{name: "SUBQ", reg: gp21, asm: "SUBQ"},      // arg0 - arg1
 		{name: "SUBL", reg: gp21, asm: "SUBL"},      // arg0 - arg1
@@ -343,10 +342,10 @@
 
 		// clobbers flags as liblink will rewrite these to XOR reg, reg if the constant is zero
 		// TODO: revisit when issue 12405 is fixed
-		{name: "MOVBconst", reg: gp01flags, asm: "MOVB"}, // 8 low bits of auxint
-		{name: "MOVWconst", reg: gp01flags, asm: "MOVW"}, // 16 low bits of auxint
-		{name: "MOVLconst", reg: gp01flags, asm: "MOVL"}, // 32 low bits of auxint
-		{name: "MOVQconst", reg: gp01flags, asm: "MOVQ"}, // auxint
+		{name: "MOVBconst", reg: gp01flags, asm: "MOVB", typ: "UInt8"},  // 8 low bits of auxint
+		{name: "MOVWconst", reg: gp01flags, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint
+		{name: "MOVLconst", reg: gp01flags, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint
+		{name: "MOVQconst", reg: gp01flags, asm: "MOVQ", typ: "UInt64"}, // auxint
 
 		{name: "CVTTSD2SL", reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32
 		{name: "CVTTSD2SQ", reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64
@@ -368,24 +367,45 @@
 		{name: "LEAQ8", reg: gp21sb}, // arg0 + 8*arg1 + auxint
 
 		// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
-		{name: "MOVBload", reg: gpload, asm: "MOVB"},          // load byte from arg0+auxint+aux. arg1=mem
-		{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"},    // ditto, extend to int64
-		{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"},    // ditto, extend to uint64
-		{name: "MOVWload", reg: gpload, asm: "MOVW"},          // load 2 bytes from arg0+auxint+aux. arg1=mem
-		{name: "MOVLload", reg: gpload, asm: "MOVL"},          // load 4 bytes from arg0+auxint+aux. arg1=mem
-		{name: "MOVQload", reg: gpload, asm: "MOVQ"},          // load 8 bytes from arg0+auxint+aux. arg1=mem
-		{name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"},   // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
-		{name: "MOVBstore", reg: gpstore, asm: "MOVB"},        // store byte in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVWstore", reg: gpstore, asm: "MOVW"},        // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVLstore", reg: gpstore, asm: "MOVL"},        // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVQstore", reg: gpstore, asm: "MOVQ"},        // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
-		{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+		{name: "MOVBload", reg: gpload, asm: "MOVB"},               // load byte from arg0+auxint+aux. arg1=mem
+		{name: "MOVBQSXload", reg: gpload, asm: "MOVBQSX"},         // ditto, extend to int64
+		{name: "MOVBQZXload", reg: gpload, asm: "MOVBQZX"},         // ditto, extend to uint64
+		{name: "MOVWload", reg: gpload, asm: "MOVW"},               // load 2 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVLload", reg: gpload, asm: "MOVL"},               // load 4 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVQload", reg: gpload, asm: "MOVQ"},               // load 8 bytes from arg0+auxint+aux. arg1=mem
+		{name: "MOVQloadidx8", reg: gploadidx, asm: "MOVQ"},        // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+		{name: "MOVBstore", reg: gpstore, asm: "MOVB", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVWstore", reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVLstore", reg: gpstore, asm: "MOVL", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVQstore", reg: gpstore, asm: "MOVQ", typ: "Mem"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		{name: "MOVQstoreidx8", reg: gpstoreidx, asm: "MOVQ"},      // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
 
-		{name: "MOVXzero", reg: gpstoreconst}, // store auxint 0 bytes into arg0 using a series of MOV instructions. arg1=mem.
+		// arg0 = (duff-adjusted) pointer to start of memory to zero
+		// arg1 = value to store (will always be zero)
+		// arg2 = mem
+		// auxint = offset into duffzero code to start executing
+		// returns mem
+		{
+			name: "DUFFZERO",
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("AX")},
+				clobbers: buildReg("DI FLAGS"),
+			},
+		},
 
-		{name: "REPSTOSQ", reg: regInfo{[]regMask{buildReg("DI"), buildReg("CX")}, buildReg("DI AX CX FLAGS"), nil}}, // store arg1 8-byte words containing zero into arg0 using STOSQ. arg2=mem.
+		// arg0 = address of memory to zero
+		// arg1 = # of 8-byte words to zero
+		// arg2 = value to store (will always be zero)
+		// arg3 = mem
+		// returns mem
+		{
+			name: "REPSTOSQ",
+			reg: regInfo{
+				inputs:   []regMask{buildReg("DI"), buildReg("CX"), buildReg("AX")},
+				clobbers: buildReg("DI CX FLAGS"),
+			},
+		},
 
-		//TODO: set register clobber to everything?
 		{name: "CALLstatic", reg: regInfo{clobbers: callerSave}},                                 // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
 		{name: "CALLclosure", reg: regInfo{[]regMask{gpsp, buildReg("DX"), 0}, callerSave, nil}}, // call function via closure.  arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
 		{name: "CALLdefer", reg: regInfo{clobbers: callerSave}},                                  // call deferproc.  arg0=mem, auxint=argsize, returns mem
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 8617cf3..0da7946 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -260,7 +260,7 @@
 	OpAMD64MOVLstore
 	OpAMD64MOVQstore
 	OpAMD64MOVQstoreidx8
-	OpAMD64MOVXzero
+	OpAMD64DUFFZERO
 	OpAMD64REPSTOSQ
 	OpAMD64CALLstatic
 	OpAMD64CALLclosure
@@ -3034,11 +3034,13 @@
 		},
 	},
 	{
-		name: "MOVXzero",
+		name: "DUFFZERO",
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4295032831}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 .SB
+				{0, 128}, // .DI
+				{1, 1},   // .AX
 			},
+			clobbers: 8589934720, // .DI .FLAGS
 		},
 	},
 	{
@@ -3047,8 +3049,9 @@
 			inputs: []inputInfo{
 				{0, 128}, // .DI
 				{1, 2},   // .CX
+				{2, 1},   // .AX
 			},
-			clobbers: 8589934723, // .AX .CX .DI .FLAGS
+			clobbers: 8589934722, // .CX .DI .FLAGS
 		},
 	},
 	{
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 5c47ec6..4e78306 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -178,3 +178,52 @@
 func f2i(f float64) int64 {
 	return int64(math.Float64bits(f))
 }
+
+// DUFFZERO consists of repeated blocks of 4 MOVs + ADD,
+// with 4 STOSQs at the very end.
+// The trailing STOSQs prevent the need for a DI preadjustment
+// for small numbers of words to clear.
+// See runtime/mkduff.go.
+const (
+	dzBlocks    = 31 // number of MOV/ADD blocks
+	dzBlockLen  = 4  // number of clears per block
+	dzBlockSize = 19 // size of instructions in a single block
+	dzMovSize   = 4  // size of single MOV instruction w/ offset
+	dzAddSize   = 4  // size of single ADD instruction
+	dzDIStep    = 8  // number of bytes cleared by each MOV instruction
+
+	dzTailLen  = 4 // number of final STOSQ instructions
+	dzTailSize = 2 // size of single STOSQ instruction
+
+	dzSize = dzBlocks*dzBlockSize + dzTailLen*dzTailSize // total size of DUFFZERO routine
+)
+
+func duffStart(size int64) int64 {
+	x, _ := duff(size)
+	return x
+}
+func duffAdj(size int64) int64 {
+	_, x := duff(size)
+	return x
+}
+
+// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
+// required to use the duffzero mechanism for a block of the given size.
+func duff(size int64) (int64, int64) {
+	if size < 32 || size > 1024 || size%8 != 0 {
+		panic("bad duffzero size")
+	}
+	// TODO: arch-dependent
+	off := int64(dzSize)
+	off -= dzTailLen * dzTailSize
+	size -= dzTailLen * dzDIStep
+	q := size / dzDIStep
+	blocks, singles := q/dzBlockLen, q%dzBlockLen
+	off -= dzBlockSize * blocks
+	var adj int64
+	if singles > 0 {
+		off -= dzAddSize + dzMovSize*singles
+		adj -= dzDIStep * (dzBlockLen - singles)
+	}
+	return off, adj
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 50510d2..590efdb 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -10204,10 +10204,10 @@
 		;
 		// match: (Zero [1] destptr mem)
 		// cond:
-		// result: (MOVBstore destptr (MOVBconst <config.Frontend().TypeInt8()> [0]) mem)
+		// result: (MOVBstore destptr (MOVBconst [0]) mem)
 		{
 			if v.AuxInt != 1 {
-				goto end56bcaef03cce4d15c03efff669bb5585
+				goto endf7c8ca6a444f19e1142977e2ac42ab24
 			}
 			destptr := v.Args[0]
 			mem := v.Args[1]
@@ -10217,21 +10217,21 @@
 			v.resetArgs()
 			v.AddArg(destptr)
 			v0 := b.NewValue0(v.Line, OpAMD64MOVBconst, TypeInvalid)
-			v0.Type = config.Frontend().TypeInt8()
 			v0.AuxInt = 0
+			v0.Type = config.fe.TypeUInt8()
 			v.AddArg(v0)
 			v.AddArg(mem)
 			return true
 		}
-		goto end56bcaef03cce4d15c03efff669bb5585
-	end56bcaef03cce4d15c03efff669bb5585:
+		goto endf7c8ca6a444f19e1142977e2ac42ab24
+	endf7c8ca6a444f19e1142977e2ac42ab24:
 		;
 		// match: (Zero [2] destptr mem)
 		// cond:
-		// result: (MOVWstore destptr (MOVWconst <config.Frontend().TypeInt16()> [0]) mem)
+		// result: (MOVWstore destptr (MOVWconst [0]) mem)
 		{
 			if v.AuxInt != 2 {
-				goto endf52f08f1f7b0ae220c4cfca6586a8586
+				goto end7609a67450ab21eba86f456886fc8496
 			}
 			destptr := v.Args[0]
 			mem := v.Args[1]
@@ -10241,21 +10241,21 @@
 			v.resetArgs()
 			v.AddArg(destptr)
 			v0 := b.NewValue0(v.Line, OpAMD64MOVWconst, TypeInvalid)
-			v0.Type = config.Frontend().TypeInt16()
 			v0.AuxInt = 0
+			v0.Type = config.fe.TypeUInt16()
 			v.AddArg(v0)
 			v.AddArg(mem)
 			return true
 		}
-		goto endf52f08f1f7b0ae220c4cfca6586a8586
-	endf52f08f1f7b0ae220c4cfca6586a8586:
+		goto end7609a67450ab21eba86f456886fc8496
+	end7609a67450ab21eba86f456886fc8496:
 		;
 		// match: (Zero [4] destptr mem)
 		// cond:
-		// result: (MOVLstore destptr (MOVLconst <config.Frontend().TypeInt32()> [0]) mem)
+		// result: (MOVLstore destptr (MOVLconst [0]) mem)
 		{
 			if v.AuxInt != 4 {
-				goto end41c91e0c7a23e233de77812b5264fd10
+				goto enda8e1cf1298794cc3cb79cab108e33007
 			}
 			destptr := v.Args[0]
 			mem := v.Args[1]
@@ -10265,21 +10265,21 @@
 			v.resetArgs()
 			v.AddArg(destptr)
 			v0 := b.NewValue0(v.Line, OpAMD64MOVLconst, TypeInvalid)
-			v0.Type = config.Frontend().TypeInt32()
 			v0.AuxInt = 0
+			v0.Type = config.fe.TypeUInt32()
 			v.AddArg(v0)
 			v.AddArg(mem)
 			return true
 		}
-		goto end41c91e0c7a23e233de77812b5264fd10
-	end41c91e0c7a23e233de77812b5264fd10:
+		goto enda8e1cf1298794cc3cb79cab108e33007
+	enda8e1cf1298794cc3cb79cab108e33007:
 		;
 		// match: (Zero [8] destptr mem)
 		// cond:
-		// result: (MOVQstore destptr (MOVQconst <config.Frontend().TypeInt64()> [0]) mem)
+		// result: (MOVQstore destptr (MOVQconst [0]) mem)
 		{
 			if v.AuxInt != 8 {
-				goto end157ad586af643d8dac6cc84a776000ca
+				goto end1791556f0b03ea065d38a3267fbe01c6
 			}
 			destptr := v.Args[0]
 			mem := v.Args[1]
@@ -10289,70 +10289,395 @@
 			v.resetArgs()
 			v.AddArg(destptr)
 			v0 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
-			v0.Type = config.Frontend().TypeInt64()
 			v0.AuxInt = 0
+			v0.Type = config.fe.TypeUInt64()
 			v.AddArg(v0)
 			v.AddArg(mem)
 			return true
 		}
-		goto end157ad586af643d8dac6cc84a776000ca
-	end157ad586af643d8dac6cc84a776000ca:
+		goto end1791556f0b03ea065d38a3267fbe01c6
+	end1791556f0b03ea065d38a3267fbe01c6:
 		;
-		// match: (Zero [size] destptr mem)
-		// cond: size < 4*8
-		// result: (MOVXzero [size] destptr mem)
+		// match: (Zero [3] destptr mem)
+		// cond:
+		// result: (MOVBstore (ADDQconst [2] destptr) (MOVBconst [0]) 		(MOVWstore destptr (MOVWconst [0]) mem))
 		{
-			size := v.AuxInt
+			if v.AuxInt != 3 {
+				goto end7f8f5c8214f8b81a73fdde78b03ce53c
+			}
 			destptr := v.Args[0]
 			mem := v.Args[1]
-			if !(size < 4*8) {
-				goto endf0a22f1506977610ac0a310eee152075
-			}
-			v.Op = OpAMD64MOVXzero
+			v.Op = OpAMD64MOVBstore
 			v.AuxInt = 0
 			v.Aux = nil
 			v.resetArgs()
-			v.AuxInt = size
-			v.AddArg(destptr)
-			v.AddArg(mem)
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 2
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVBconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt8()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVWstore, TypeInvalid)
+			v2.AddArg(destptr)
+			v3 := b.NewValue0(v.Line, OpAMD64MOVWconst, TypeInvalid)
+			v3.AuxInt = 0
+			v3.Type = config.fe.TypeUInt16()
+			v2.AddArg(v3)
+			v2.AddArg(mem)
+			v2.Type = TypeMem
+			v.AddArg(v2)
 			return true
 		}
-		goto endf0a22f1506977610ac0a310eee152075
-	endf0a22f1506977610ac0a310eee152075:
+		goto end7f8f5c8214f8b81a73fdde78b03ce53c
+	end7f8f5c8214f8b81a73fdde78b03ce53c:
+		;
+		// match: (Zero [5] destptr mem)
+		// cond:
+		// result: (MOVBstore (ADDQconst [4] destptr) (MOVBconst [0]) 		(MOVLstore destptr (MOVLconst [0]) mem))
+		{
+			if v.AuxInt != 5 {
+				goto end54466baa4eac09020bee720efbb82d0f
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVBstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 4
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVBconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt8()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVLstore, TypeInvalid)
+			v2.AddArg(destptr)
+			v3 := b.NewValue0(v.Line, OpAMD64MOVLconst, TypeInvalid)
+			v3.AuxInt = 0
+			v3.Type = config.fe.TypeUInt32()
+			v2.AddArg(v3)
+			v2.AddArg(mem)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto end54466baa4eac09020bee720efbb82d0f
+	end54466baa4eac09020bee720efbb82d0f:
+		;
+		// match: (Zero [6] destptr mem)
+		// cond:
+		// result: (MOVWstore (ADDQconst [4] destptr) (MOVWconst [0]) 		(MOVLstore destptr (MOVLconst [0]) mem))
+		{
+			if v.AuxInt != 6 {
+				goto end3a37ae6095ddc37646d6ad6eeda986e2
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVWstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 4
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVWconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt16()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVLstore, TypeInvalid)
+			v2.AddArg(destptr)
+			v3 := b.NewValue0(v.Line, OpAMD64MOVLconst, TypeInvalid)
+			v3.AuxInt = 0
+			v3.Type = config.fe.TypeUInt32()
+			v2.AddArg(v3)
+			v2.AddArg(mem)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto end3a37ae6095ddc37646d6ad6eeda986e2
+	end3a37ae6095ddc37646d6ad6eeda986e2:
+		;
+		// match: (Zero [7] destptr mem)
+		// cond:
+		// result: (MOVLstore (ADDQconst [3] destptr) (MOVLconst [0]) 		(MOVLstore destptr (MOVLconst [0]) mem))
+		{
+			if v.AuxInt != 7 {
+				goto endd53a750fa01c5a5a238ba8fcabb416b2
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVLstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 3
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVLconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt32()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVLstore, TypeInvalid)
+			v2.AddArg(destptr)
+			v3 := b.NewValue0(v.Line, OpAMD64MOVLconst, TypeInvalid)
+			v3.AuxInt = 0
+			v3.Type = config.fe.TypeUInt32()
+			v2.AddArg(v3)
+			v2.AddArg(mem)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto endd53a750fa01c5a5a238ba8fcabb416b2
+	endd53a750fa01c5a5a238ba8fcabb416b2:
 		;
 		// match: (Zero [size] destptr mem)
-		// cond: size >= 4*8
-		// result: (Zero [size%8] (OffPtr <config.Frontend().TypeUInt64()> [size-(size%8)] destptr) (REPSTOSQ  <TypeMem> destptr (MOVQconst <config.Frontend().TypeUInt64()> [size/8]) mem))
+		// cond: size%8 != 0 && size > 8
+		// result: (Zero [size-size%8] (ADDQconst destptr [size%8]) 		(MOVQstore destptr (MOVQconst [0]) mem))
 		{
 			size := v.AuxInt
 			destptr := v.Args[0]
 			mem := v.Args[1]
-			if !(size >= 4*8) {
-				goto end84c39fe2e8d40e0042a10741a0ef16bd
+			if !(size%8 != 0 && size > 8) {
+				goto end5efefe1d9cca07e7ad6f4832f774b938
 			}
 			v.Op = OpZero
 			v.AuxInt = 0
 			v.Aux = nil
 			v.resetArgs()
-			v.AuxInt = size % 8
-			v0 := b.NewValue0(v.Line, OpOffPtr, TypeInvalid)
-			v0.Type = config.Frontend().TypeUInt64()
-			v0.AuxInt = size - (size % 8)
+			v.AuxInt = size - size%8
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
 			v0.AddArg(destptr)
+			v0.AuxInt = size % 8
+			v0.Type = config.fe.TypeUInt64()
 			v.AddArg(v0)
-			v1 := b.NewValue0(v.Line, OpAMD64REPSTOSQ, TypeInvalid)
-			v1.Type = TypeMem
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
 			v1.AddArg(destptr)
 			v2 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
-			v2.Type = config.Frontend().TypeUInt64()
-			v2.AuxInt = size / 8
+			v2.AuxInt = 0
+			v2.Type = config.fe.TypeUInt64()
 			v1.AddArg(v2)
 			v1.AddArg(mem)
+			v1.Type = TypeMem
 			v.AddArg(v1)
 			return true
 		}
-		goto end84c39fe2e8d40e0042a10741a0ef16bd
-	end84c39fe2e8d40e0042a10741a0ef16bd:
+		goto end5efefe1d9cca07e7ad6f4832f774b938
+	end5efefe1d9cca07e7ad6f4832f774b938:
+		;
+		// match: (Zero [16] destptr mem)
+		// cond:
+		// result: (MOVQstore (ADDQconst [8] destptr) (MOVQconst [0]) 		(MOVQstore destptr (MOVQconst [0]) mem))
+		{
+			if v.AuxInt != 16 {
+				goto endad489c16378959a764292e8b1cb72ba2
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVQstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 8
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt64()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v2.AddArg(destptr)
+			v3 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v3.AuxInt = 0
+			v3.Type = config.fe.TypeUInt64()
+			v2.AddArg(v3)
+			v2.AddArg(mem)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto endad489c16378959a764292e8b1cb72ba2
+	endad489c16378959a764292e8b1cb72ba2:
+		;
+		// match: (Zero [24] destptr mem)
+		// cond:
+		// result: (MOVQstore (ADDQconst [16] destptr) (MOVQconst [0]) 		(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0]) 			(MOVQstore destptr (MOVQconst [0]) mem)))
+		{
+			if v.AuxInt != 24 {
+				goto enddc443320a1be0b3c2e213bd6778197dd
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVQstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 16
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt64()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v3 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v3.AuxInt = 8
+			v3.AddArg(destptr)
+			v3.Type = config.fe.TypeUInt64()
+			v2.AddArg(v3)
+			v4 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v4.AuxInt = 0
+			v4.Type = config.fe.TypeUInt64()
+			v2.AddArg(v4)
+			v5 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v5.AddArg(destptr)
+			v6 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v6.AuxInt = 0
+			v6.Type = config.fe.TypeUInt64()
+			v5.AddArg(v6)
+			v5.AddArg(mem)
+			v5.Type = TypeMem
+			v2.AddArg(v5)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto enddc443320a1be0b3c2e213bd6778197dd
+	enddc443320a1be0b3c2e213bd6778197dd:
+		;
+		// match: (Zero [32] destptr mem)
+		// cond:
+		// result: (MOVQstore (ADDQconst [24] destptr) (MOVQconst [0]) 		(MOVQstore (ADDQconst [16] destptr) (MOVQconst [0]) 			(MOVQstore (ADDQconst [8] destptr) (MOVQconst [0]) 				(MOVQstore destptr (MOVQconst [0]) mem))))
+		{
+			if v.AuxInt != 32 {
+				goto end282b5e36693f06e2cd1ac563e0d419b5
+			}
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			v.Op = OpAMD64MOVQstore
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = 24
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt64()
+			v.AddArg(v1)
+			v2 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v3 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v3.AuxInt = 16
+			v3.AddArg(destptr)
+			v3.Type = config.fe.TypeUInt64()
+			v2.AddArg(v3)
+			v4 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v4.AuxInt = 0
+			v4.Type = config.fe.TypeUInt64()
+			v2.AddArg(v4)
+			v5 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v6 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v6.AuxInt = 8
+			v6.AddArg(destptr)
+			v6.Type = config.fe.TypeUInt64()
+			v5.AddArg(v6)
+			v7 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v7.AuxInt = 0
+			v7.Type = config.fe.TypeUInt64()
+			v5.AddArg(v7)
+			v8 := b.NewValue0(v.Line, OpAMD64MOVQstore, TypeInvalid)
+			v8.AddArg(destptr)
+			v9 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v9.AuxInt = 0
+			v9.Type = config.fe.TypeUInt64()
+			v8.AddArg(v9)
+			v8.AddArg(mem)
+			v8.Type = TypeMem
+			v5.AddArg(v8)
+			v5.Type = TypeMem
+			v2.AddArg(v5)
+			v2.Type = TypeMem
+			v.AddArg(v2)
+			return true
+		}
+		goto end282b5e36693f06e2cd1ac563e0d419b5
+	end282b5e36693f06e2cd1ac563e0d419b5:
+		;
+		// match: (Zero [size] destptr mem)
+		// cond: size <= 1024 && size%8 == 0
+		// result: (DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVQconst [0]) mem)
+		{
+			size := v.AuxInt
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			if !(size <= 1024 && size%8 == 0) {
+				goto endfae59ebc96f670276efea844c3b302ac
+			}
+			v.Op = OpAMD64DUFFZERO
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AuxInt = duffStart(size)
+			v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, TypeInvalid)
+			v0.AuxInt = duffAdj(size)
+			v0.AddArg(destptr)
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt64()
+			v.AddArg(v1)
+			v.AddArg(mem)
+			return true
+		}
+		goto endfae59ebc96f670276efea844c3b302ac
+	endfae59ebc96f670276efea844c3b302ac:
+		;
+		// match: (Zero [size] destptr mem)
+		// cond: size > 1024 && size%8 == 0
+		// result: (REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem)
+		{
+			size := v.AuxInt
+			destptr := v.Args[0]
+			mem := v.Args[1]
+			if !(size > 1024 && size%8 == 0) {
+				goto endb9d55d4ba0e70ed918e3ac757727441b
+			}
+			v.Op = OpAMD64REPSTOSQ
+			v.AuxInt = 0
+			v.Aux = nil
+			v.resetArgs()
+			v.AddArg(destptr)
+			v0 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v0.AuxInt = size / 8
+			v0.Type = config.fe.TypeUInt64()
+			v.AddArg(v0)
+			v1 := b.NewValue0(v.Line, OpAMD64MOVQconst, TypeInvalid)
+			v1.AuxInt = 0
+			v1.Type = config.fe.TypeUInt64()
+			v.AddArg(v1)
+			v.AddArg(mem)
+			return true
+		}
+		goto endb9d55d4ba0e70ed918e3ac757727441b
+	endb9d55d4ba0e70ed918e3ac757727441b:
 		;
 	case OpZeroExt16to32:
 		// match: (ZeroExt16to32 x)