blob: da191cc594c0de6daadd0dddca70070f38395341 [file] [log] [blame]
Josh Bleecher Snyderad360092015-03-31 09:19:10 -07001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Coxd4b26382021-02-19 18:35:10 -05005//go:build ignore
Josh Bleecher Snyderad360092015-03-31 09:19:10 -07006// +build ignore
7
8// runtime·duffzero is a Duff's device for zeroing memory.
9// The compiler jumps to computed addresses within
10// the routine to zero chunks of memory.
11// Do not change duffzero without also
Emmanuel Odeke898ca6b2016-09-11 19:05:29 -070012// changing the uses in cmd/compile/internal/*/*.go.
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070013
14// runtime·duffcopy is a Duff's device for copying memory.
15// The compiler jumps to computed addresses within
16// the routine to copy chunks of memory.
17// Source and destination must not overlap.
18// Do not change duffcopy without also
Emmanuel Odeke898ca6b2016-09-11 19:05:29 -070019// changing the uses in cmd/compile/internal/*/*.go.
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070020
21// See the zero* and copy* generators below
22// for architecture-specific comments.
23
24// mkduff generates duff_*.s.
25package main
26
27import (
28 "bytes"
29 "fmt"
30 "io"
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070031 "log"
Russ Cox4f1b0a42020-10-29 14:17:47 -040032 "os"
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070033)
34
35func main() {
36 gen("amd64", notags, zeroAMD64, copyAMD64)
37 gen("386", notags, zero386, copy386)
38 gen("arm", notags, zeroARM, copyARM)
39 gen("arm64", notags, zeroARM64, copyARM64)
40 gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
Yao Zhang424738e2015-09-10 11:25:58 -040041 gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
Michał Derkacz150d2442020-06-14 00:06:24 +020042 gen("riscv64", notags, zeroRISCV64, copyRISCV64)
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070043}
44
45func gen(arch string, tags, zero, copy func(io.Writer)) {
46 var buf bytes.Buffer
47
Brad Fitzpatrick6914b0e2017-06-10 20:08:18 -070048 fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070049 fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
50 fmt.Fprintln(&buf, "// See mkduff.go for comments.")
51 tags(&buf)
52 fmt.Fprintln(&buf, "#include \"textflag.h\"")
53 fmt.Fprintln(&buf)
54 zero(&buf)
55 fmt.Fprintln(&buf)
56 copy(&buf)
57
Russ Cox4f1b0a42020-10-29 14:17:47 -040058 if err := os.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070059 log.Fatalln(err)
60 }
61}
62
63func notags(w io.Writer) { fmt.Fprintln(w) }
64
65func zeroAMD64(w io.Writer) {
Cherry Zhang401d7e52021-01-29 13:46:34 -050066 // X15: zero
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070067 // DI: ptr to memory to be zeroed
68 // DI is updated as a side effect.
Cherry Zhang401d7e52021-01-29 13:46:34 -050069 fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT, $0-0")
Ilya Tocar2421c6e2015-09-09 14:10:12 +030070 for i := 0; i < 16; i++ {
Cherry Zhang401d7e52021-01-29 13:46:34 -050071 fmt.Fprintln(w, "\tMOVUPS\tX15,(DI)")
72 fmt.Fprintln(w, "\tMOVUPS\tX15,16(DI)")
73 fmt.Fprintln(w, "\tMOVUPS\tX15,32(DI)")
74 fmt.Fprintln(w, "\tMOVUPS\tX15,48(DI)")
Ilya Tocardf709822017-08-09 14:50:58 -050075 fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
Josh Bleecher Snyder7e0c11c2015-04-15 11:05:01 -070076 fmt.Fprintln(w)
77 }
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070078 fmt.Fprintln(w, "\tRET")
79}
80
81func copyAMD64(w io.Writer) {
82 // SI: ptr to source memory
83 // DI: ptr to destination memory
84 // SI and DI are updated as a side effect.
85 //
86 // This is equivalent to a sequence of MOVSQ but
87 // for some reason that is 3.5x slower than this code.
Cherry Zhang401d7e52021-01-29 13:46:34 -050088 fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT, $0-0")
Ilya Tocar5cf281a2015-09-22 14:32:05 +030089 for i := 0; i < 64; i++ {
90 fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
91 fmt.Fprintln(w, "\tADDQ\t$16, SI")
92 fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
93 fmt.Fprintln(w, "\tADDQ\t$16, DI")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -070094 fmt.Fprintln(w)
95 }
96 fmt.Fprintln(w, "\tRET")
97}
98
99func zero386(w io.Writer) {
100 // AX: zero
101 // DI: ptr to memory to be zeroed
102 // DI is updated as a side effect.
103 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
104 for i := 0; i < 128; i++ {
105 fmt.Fprintln(w, "\tSTOSL")
106 }
107 fmt.Fprintln(w, "\tRET")
108}
109
110func copy386(w io.Writer) {
111 // SI: ptr to source memory
112 // DI: ptr to destination memory
113 // SI and DI are updated as a side effect.
114 //
115 // This is equivalent to a sequence of MOVSL but
116 // for some reason MOVSL is really slow.
117 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
118 for i := 0; i < 128; i++ {
119 fmt.Fprintln(w, "\tMOVL\t(SI), CX")
120 fmt.Fprintln(w, "\tADDL\t$4, SI")
121 fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
122 fmt.Fprintln(w, "\tADDL\t$4, DI")
123 fmt.Fprintln(w)
124 }
125 fmt.Fprintln(w, "\tRET")
126}
127
128func zeroARM(w io.Writer) {
129 // R0: zero
130 // R1: ptr to memory to be zeroed
131 // R1 is updated as a side effect.
132 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0")
133 for i := 0; i < 128; i++ {
134 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
135 }
136 fmt.Fprintln(w, "\tRET")
137}
138
139func copyARM(w io.Writer) {
140 // R0: scratch space
141 // R1: ptr to source memory
142 // R2: ptr to destination memory
143 // R1 and R2 are updated as a side effect
144 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
145 for i := 0; i < 128; i++ {
146 fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
147 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
148 fmt.Fprintln(w)
149 }
150 fmt.Fprintln(w, "\tRET")
151}
152
153func zeroARM64(w io.Writer) {
154 // ZR: always zero
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400155 // R20: ptr to memory to be zeroed
156 // On return, R20 points to the last zeroed dword.
Austin Clementsbeeabbc2018-01-25 12:15:23 -0500157 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Wei Xiaoc02fc162017-07-27 01:55:03 +0000158 for i := 0; i < 63; i++ {
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400159 fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700160 }
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400161 fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700162 fmt.Fprintln(w, "\tRET")
163}
164
165func copyARM64(w io.Writer) {
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400166 // R20: ptr to source memory
167 // R21: ptr to destination memory
Meng Zhuo89167732018-03-06 02:47:09 +0000168 // R26, R27 (aka REGTMP): scratch space
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400169 // R20 and R21 are updated as a side effect
Meng Zhuo89167732018-03-06 02:47:09 +0000170 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
171
172 for i := 0; i < 64; i++ {
Cherry Zhang4ea7aa72019-06-25 14:48:04 -0400173 fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)")
174 fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)")
Cherry Zhang9d4b40f2016-09-27 08:57:02 -0400175 fmt.Fprintln(w)
176 }
177 fmt.Fprintln(w, "\tRET")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700178}
179
180func tagsPPC64x(w io.Writer) {
181 fmt.Fprintln(w)
Tobias Klauser2c76a6f2021-05-12 17:55:42 +0200182 fmt.Fprintln(w, "//go:build ppc64 || ppc64le")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700183 fmt.Fprintln(w, "// +build ppc64 ppc64le")
184 fmt.Fprintln(w)
185}
186
187func zeroPPC64x(w io.Writer) {
188 // R0: always zero
189 // R3 (aka REGRT1): ptr to memory to be zeroed - 8
190 // On return, R3 points to the last zeroed dword.
Michael Hudson-Doyle45c06b22015-10-09 12:44:27 +1300191 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700192 for i := 0; i < 128; i++ {
193 fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
194 }
Michael Hudson-Doyle45c06b22015-10-09 12:44:27 +1300195 fmt.Fprintln(w, "\tRET")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700196}
197
198func copyPPC64x(w io.Writer) {
Cherry Zhang7939c432020-03-31 19:13:39 -0400199 // duffcopy is not used on PPC64.
200 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
201 fmt.Fprintln(w, "\tUNDEF")
Josh Bleecher Snyderad360092015-03-31 09:19:10 -0700202}
Yao Zhang424738e2015-09-10 11:25:58 -0400203
204func tagsMIPS64x(w io.Writer) {
205 fmt.Fprintln(w)
Tobias Klauser2c76a6f2021-05-12 17:55:42 +0200206 fmt.Fprintln(w, "//go:build mips64 || mips64le")
Yao Zhang424738e2015-09-10 11:25:58 -0400207 fmt.Fprintln(w, "// +build mips64 mips64le")
208 fmt.Fprintln(w)
209}
210
211func zeroMIPS64x(w io.Writer) {
212 // R0: always zero
213 // R1 (aka REGRT1): ptr to memory to be zeroed - 8
214 // On return, R1 points to the last zeroed dword.
Austin Clementsdfbf5682018-01-25 12:15:23 -0500215 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
Yao Zhang424738e2015-09-10 11:25:58 -0400216 for i := 0; i < 128; i++ {
217 fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
218 fmt.Fprintln(w, "\tADDV\t$8, R1")
219 }
220 fmt.Fprintln(w, "\tRET")
221}
222
223func copyMIPS64x(w io.Writer) {
Meng Zhuo307544f2019-07-08 23:45:44 +0800224 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
225 for i := 0; i < 128; i++ {
226 fmt.Fprintln(w, "\tMOVV\t(R1), R23")
227 fmt.Fprintln(w, "\tADDV\t$8, R1")
228 fmt.Fprintln(w, "\tMOVV\tR23, (R2)")
229 fmt.Fprintln(w, "\tADDV\t$8, R2")
230 fmt.Fprintln(w)
231 }
232 fmt.Fprintln(w, "\tRET")
Yao Zhang424738e2015-09-10 11:25:58 -0400233}
Michał Derkacz150d2442020-06-14 00:06:24 +0200234
235func zeroRISCV64(w io.Writer) {
236 // ZERO: always zero
237 // X10: ptr to memory to be zeroed
238 // X10 is updated as a side effect.
239 fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
240 for i := 0; i < 128; i++ {
241 fmt.Fprintln(w, "\tMOV\tZERO, (X10)")
242 fmt.Fprintln(w, "\tADD\t$8, X10")
243 }
244 fmt.Fprintln(w, "\tRET")
245}
246
247func copyRISCV64(w io.Writer) {
248 // X10: ptr to source memory
249 // X11: ptr to destination memory
250 // X10 and X11 are updated as a side effect
251 fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
252 for i := 0; i < 128; i++ {
253 fmt.Fprintln(w, "\tMOV\t(X10), X31")
254 fmt.Fprintln(w, "\tADD\t$8, X10")
255 fmt.Fprintln(w, "\tMOV\tX31, (X11)")
256 fmt.Fprintln(w, "\tADD\t$8, X11")
257 fmt.Fprintln(w)
258 }
259 fmt.Fprintln(w, "\tRET")
260}