blob: 918766650fd3debceec25f420d3850530cb23e73 [file] [log] [blame]
Josh Bleecher Snyderad360092015-03-31 09:19:10 -07001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// runtime·duffzero is a Duff's device for zeroing memory.
8// The compiler jumps to computed addresses within
9// the routine to zero chunks of memory.
10// Do not change duffzero without also
11// changing clearfat in cmd/?g/ggen.go.
12
13// runtime·duffcopy is a Duff's device for copying memory.
14// The compiler jumps to computed addresses within
15// the routine to copy chunks of memory.
16// Source and destination must not overlap.
17// Do not change duffcopy without also
Shenghou Mae7dd2882015-04-08 13:34:42 -040018// changing blockcopy in cmd/?g/cgen.go.
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070019
20// See the zero* and copy* generators below
21// for architecture-specific comments.
22
23// mkduff generates duff_*.s.
24package main
25
26import (
27 "bytes"
28 "fmt"
29 "io"
30 "io/ioutil"
31 "log"
32)
33
34func main() {
35 gen("amd64", notags, zeroAMD64, copyAMD64)
36 gen("386", notags, zero386, copy386)
37 gen("arm", notags, zeroARM, copyARM)
38 gen("arm64", notags, zeroARM64, copyARM64)
39 gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
40}
41
42func gen(arch string, tags, zero, copy func(io.Writer)) {
43 var buf bytes.Buffer
44
45 fmt.Fprintln(&buf, "// AUTO-GENERATED by mkduff.go")
46 fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
47 fmt.Fprintln(&buf, "// See mkduff.go for comments.")
48 tags(&buf)
49 fmt.Fprintln(&buf, "#include \"textflag.h\"")
50 fmt.Fprintln(&buf)
51 zero(&buf)
52 fmt.Fprintln(&buf)
53 copy(&buf)
54
55 if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
56 log.Fatalln(err)
57 }
58}
59
60func notags(w io.Writer) { fmt.Fprintln(w) }
61
62func zeroAMD64(w io.Writer) {
Ilya Tocar2421c6e2015-09-09 14:10:12 +030063 // X0: zero
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070064 // DI: ptr to memory to be zeroed
65 // DI is updated as a side effect.
66 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
Ilya Tocar2421c6e2015-09-09 14:10:12 +030067 for i := 0; i < 16; i++ {
68 fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
69 fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
70 fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
71 fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
72 fmt.Fprintln(w, "\tADDQ\t$64,DI")
Josh Bleecher Snyder7e0c11c2015-04-15 11:05:01 -070073 fmt.Fprintln(w)
74 }
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070075 fmt.Fprintln(w, "\tRET")
76}
77
78func copyAMD64(w io.Writer) {
79 // SI: ptr to source memory
80 // DI: ptr to destination memory
81 // SI and DI are updated as a side effect.
82 //
83 // This is equivalent to a sequence of MOVSQ but
84 // for some reason that is 3.5x slower than this code.
85 // The STOSQ in duffzero seem fine, though.
86 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
Ilya Tocar5cf281a2015-09-22 14:32:05 +030087 for i := 0; i < 64; i++ {
88 fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
89 fmt.Fprintln(w, "\tADDQ\t$16, SI")
90 fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
91 fmt.Fprintln(w, "\tADDQ\t$16, DI")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070092 fmt.Fprintln(w)
93 }
94 fmt.Fprintln(w, "\tRET")
95}
96
97func zero386(w io.Writer) {
98 // AX: zero
99 // DI: ptr to memory to be zeroed
100 // DI is updated as a side effect.
101 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
102 for i := 0; i < 128; i++ {
103 fmt.Fprintln(w, "\tSTOSL")
104 }
105 fmt.Fprintln(w, "\tRET")
106}
107
108func copy386(w io.Writer) {
109 // SI: ptr to source memory
110 // DI: ptr to destination memory
111 // SI and DI are updated as a side effect.
112 //
113 // This is equivalent to a sequence of MOVSL but
114 // for some reason MOVSL is really slow.
115 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
116 for i := 0; i < 128; i++ {
117 fmt.Fprintln(w, "\tMOVL\t(SI), CX")
118 fmt.Fprintln(w, "\tADDL\t$4, SI")
119 fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
120 fmt.Fprintln(w, "\tADDL\t$4, DI")
121 fmt.Fprintln(w)
122 }
123 fmt.Fprintln(w, "\tRET")
124}
125
126func zeroARM(w io.Writer) {
127 // R0: zero
128 // R1: ptr to memory to be zeroed
129 // R1 is updated as a side effect.
130 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
131 for i := 0; i < 128; i++ {
132 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
133 }
134 fmt.Fprintln(w, "\tRET")
135}
136
137func copyARM(w io.Writer) {
138 // R0: scratch space
139 // R1: ptr to source memory
140 // R2: ptr to destination memory
141 // R1 and R2 are updated as a side effect
142 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
143 for i := 0; i < 128; i++ {
144 fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
145 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
146 fmt.Fprintln(w)
147 }
148 fmt.Fprintln(w, "\tRET")
149}
150
151func zeroARM64(w io.Writer) {
152 // ZR: always zero
153 // R16 (aka REGRT1): ptr to memory to be zeroed - 8
154 // On return, R16 points to the last zeroed dword.
155 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
156 for i := 0; i < 128; i++ {
157 fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)")
158 }
159 fmt.Fprintln(w, "\tRET")
160}
161
162func copyARM64(w io.Writer) {
163 fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
164}
165
166func tagsPPC64x(w io.Writer) {
167 fmt.Fprintln(w)
168 fmt.Fprintln(w, "// +build ppc64 ppc64le")
169 fmt.Fprintln(w)
170}
171
172func zeroPPC64x(w io.Writer) {
173 // R0: always zero
174 // R3 (aka REGRT1): ptr to memory to be zeroed - 8
175 // On return, R3 points to the last zeroed dword.
176 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
177 for i := 0; i < 128; i++ {
178 fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
179 }
180 fmt.Fprintln(w, "\tRETURN")
181}
182
183func copyPPC64x(w io.Writer) {
184 fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
185}