blob: b7c7e2689cfd73fa083414118551160214e33694 [file] [log] [blame]
Josh Bleecher Snyderad360092015-03-31 09:19:10 -07001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// runtime·duffzero is a Duff's device for zeroing memory.
8// The compiler jumps to computed addresses within
9// the routine to zero chunks of memory.
10// Do not change duffzero without also
Emmanuel Odeke898ca6b2016-09-11 19:05:29 -070011// changing the uses in cmd/compile/internal/*/*.go.
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070012
13// runtime·duffcopy is a Duff's device for copying memory.
14// The compiler jumps to computed addresses within
15// the routine to copy chunks of memory.
16// Source and destination must not overlap.
17// Do not change duffcopy without also
Emmanuel Odeke898ca6b2016-09-11 19:05:29 -070018// changing the uses in cmd/compile/internal/*/*.go.
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070019
20// See the zero* and copy* generators below
21// for architecture-specific comments.
22
23// mkduff generates duff_*.s.
24package main
25
26import (
27 "bytes"
28 "fmt"
29 "io"
30 "io/ioutil"
31 "log"
32)
33
34func main() {
35 gen("amd64", notags, zeroAMD64, copyAMD64)
36 gen("386", notags, zero386, copy386)
37 gen("arm", notags, zeroARM, copyARM)
38 gen("arm64", notags, zeroARM64, copyARM64)
39 gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
Yao Zhang424738e2015-09-10 11:25:58 -040040 gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070041}
42
43func gen(arch string, tags, zero, copy func(io.Writer)) {
44 var buf bytes.Buffer
45
Brad Fitzpatrick6914b0e2017-06-10 20:08:18 -070046 fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070047 fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
48 fmt.Fprintln(&buf, "// See mkduff.go for comments.")
49 tags(&buf)
50 fmt.Fprintln(&buf, "#include \"textflag.h\"")
51 fmt.Fprintln(&buf)
52 zero(&buf)
53 fmt.Fprintln(&buf)
54 copy(&buf)
55
56 if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
57 log.Fatalln(err)
58 }
59}
60
61func notags(w io.Writer) { fmt.Fprintln(w) }
62
63func zeroAMD64(w io.Writer) {
Ilya Tocar2421c6e2015-09-09 14:10:12 +030064 // X0: zero
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070065 // DI: ptr to memory to be zeroed
66 // DI is updated as a side effect.
67 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
Ilya Tocar2421c6e2015-09-09 14:10:12 +030068 for i := 0; i < 16; i++ {
69 fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
70 fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
71 fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
72 fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
Ilya Tocardf709822017-08-09 14:50:58 -050073 fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
Josh Bleecher Snyder7e0c11c2015-04-15 11:05:01 -070074 fmt.Fprintln(w)
75 }
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070076 fmt.Fprintln(w, "\tRET")
77}
78
79func copyAMD64(w io.Writer) {
80 // SI: ptr to source memory
81 // DI: ptr to destination memory
82 // SI and DI are updated as a side effect.
83 //
84 // This is equivalent to a sequence of MOVSQ but
85 // for some reason that is 3.5x slower than this code.
86 // The STOSQ in duffzero seem fine, though.
87 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
Ilya Tocar5cf281a2015-09-22 14:32:05 +030088 for i := 0; i < 64; i++ {
89 fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
90 fmt.Fprintln(w, "\tADDQ\t$16, SI")
91 fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
92 fmt.Fprintln(w, "\tADDQ\t$16, DI")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070093 fmt.Fprintln(w)
94 }
95 fmt.Fprintln(w, "\tRET")
96}
97
98func zero386(w io.Writer) {
99 // AX: zero
100 // DI: ptr to memory to be zeroed
101 // DI is updated as a side effect.
102 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
103 for i := 0; i < 128; i++ {
104 fmt.Fprintln(w, "\tSTOSL")
105 }
106 fmt.Fprintln(w, "\tRET")
107}
108
109func copy386(w io.Writer) {
110 // SI: ptr to source memory
111 // DI: ptr to destination memory
112 // SI and DI are updated as a side effect.
113 //
114 // This is equivalent to a sequence of MOVSL but
115 // for some reason MOVSL is really slow.
116 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
117 for i := 0; i < 128; i++ {
118 fmt.Fprintln(w, "\tMOVL\t(SI), CX")
119 fmt.Fprintln(w, "\tADDL\t$4, SI")
120 fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
121 fmt.Fprintln(w, "\tADDL\t$4, DI")
122 fmt.Fprintln(w)
123 }
124 fmt.Fprintln(w, "\tRET")
125}
126
127func zeroARM(w io.Writer) {
128 // R0: zero
129 // R1: ptr to memory to be zeroed
130 // R1 is updated as a side effect.
131 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
132 for i := 0; i < 128; i++ {
133 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
134 }
135 fmt.Fprintln(w, "\tRET")
136}
137
138func copyARM(w io.Writer) {
139 // R0: scratch space
140 // R1: ptr to source memory
141 // R2: ptr to destination memory
142 // R1 and R2 are updated as a side effect
143 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
144 for i := 0; i < 128; i++ {
145 fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
146 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
147 fmt.Fprintln(w)
148 }
149 fmt.Fprintln(w, "\tRET")
150}
151
152func zeroARM64(w io.Writer) {
153 // ZR: always zero
Wei Xiaoc02fc162017-07-27 01:55:03 +0000154 // R16 (aka REGRT1): ptr to memory to be zeroed
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700155 // On return, R16 points to the last zeroed dword.
Austin Clementsbeeabbc2018-01-25 12:15:23 -0500156 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Wei Xiaoc02fc162017-07-27 01:55:03 +0000157 for i := 0; i < 63; i++ {
158 fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700159 }
Wei Xiaoc02fc162017-07-27 01:55:03 +0000160 fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700161 fmt.Fprintln(w, "\tRET")
162}
163
164func copyARM64(w io.Writer) {
Cherry Zhang9d4b40f2016-09-27 08:57:02 -0400165 // R16 (aka REGRT1): ptr to source memory
166 // R17 (aka REGRT2): ptr to destination memory
Meng Zhuo89167732018-03-06 02:47:09 +0000167 // R26, R27 (aka REGTMP): scratch space
Cherry Zhang9d4b40f2016-09-27 08:57:02 -0400168 // R16 and R17 are updated as a side effect
Meng Zhuo89167732018-03-06 02:47:09 +0000169 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
170
171 for i := 0; i < 64; i++ {
172 fmt.Fprintln(w, "\tLDP.P\t16(R16), (R26, R27)")
173 fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R17)")
Cherry Zhang9d4b40f2016-09-27 08:57:02 -0400174 fmt.Fprintln(w)
175 }
176 fmt.Fprintln(w, "\tRET")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700177}
178
179func tagsPPC64x(w io.Writer) {
180 fmt.Fprintln(w)
181 fmt.Fprintln(w, "// +build ppc64 ppc64le")
182 fmt.Fprintln(w)
183}
184
185func zeroPPC64x(w io.Writer) {
186 // R0: always zero
187 // R3 (aka REGRT1): ptr to memory to be zeroed - 8
188 // On return, R3 points to the last zeroed dword.
Michael Hudson-Doyle45c06b22015-10-09 12:44:27 +1300189 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700190 for i := 0; i < 128; i++ {
191 fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
192 }
Michael Hudson-Doyle45c06b22015-10-09 12:44:27 +1300193 fmt.Fprintln(w, "\tRET")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700194}
195
196func copyPPC64x(w io.Writer) {
197 fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
198}
Yao Zhang424738e2015-09-10 11:25:58 -0400199
200func tagsMIPS64x(w io.Writer) {
201 fmt.Fprintln(w)
202 fmt.Fprintln(w, "// +build mips64 mips64le")
203 fmt.Fprintln(w)
204}
205
206func zeroMIPS64x(w io.Writer) {
207 // R0: always zero
208 // R1 (aka REGRT1): ptr to memory to be zeroed - 8
209 // On return, R1 points to the last zeroed dword.
Austin Clementsdfbf5682018-01-25 12:15:23 -0500210 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Yao Zhang424738e2015-09-10 11:25:58 -0400211 for i := 0; i < 128; i++ {
212 fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
213 fmt.Fprintln(w, "\tADDV\t$8, R1")
214 }
215 fmt.Fprintln(w, "\tRET")
216}
217
218func copyMIPS64x(w io.Writer) {
219 fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
220}