Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Russ Cox | d4b2638 | 2021-02-19 18:35:10 -0500 | [diff] [blame] | 5 | //go:build ignore |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 6 | // +build ignore |
| 7 | |
| 8 | // runtime·duffzero is a Duff's device for zeroing memory. |
| 9 | // The compiler jumps to computed addresses within |
| 10 | // the routine to zero chunks of memory. |
| 11 | // Do not change duffzero without also |
Emmanuel Odeke | 898ca6b | 2016-09-11 19:05:29 -0700 | [diff] [blame] | 12 | // changing the uses in cmd/compile/internal/*/*.go. |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 13 | |
| 14 | // runtime·duffcopy is a Duff's device for copying memory. |
| 15 | // The compiler jumps to computed addresses within |
| 16 | // the routine to copy chunks of memory. |
| 17 | // Source and destination must not overlap. |
| 18 | // Do not change duffcopy without also |
Emmanuel Odeke | 898ca6b | 2016-09-11 19:05:29 -0700 | [diff] [blame] | 19 | // changing the uses in cmd/compile/internal/*/*.go. |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 20 | |
| 21 | // See the zero* and copy* generators below |
| 22 | // for architecture-specific comments. |
| 23 | |
| 24 | // mkduff generates duff_*.s. |
| 25 | package main |
| 26 | |
| 27 | import ( |
| 28 | "bytes" |
| 29 | "fmt" |
| 30 | "io" |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 31 | "log" |
Russ Cox | 4f1b0a4 | 2020-10-29 14:17:47 -0400 | [diff] [blame] | 32 | "os" |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 33 | ) |
| 34 | |
| 35 | func main() { |
| 36 | gen("amd64", notags, zeroAMD64, copyAMD64) |
| 37 | gen("386", notags, zero386, copy386) |
| 38 | gen("arm", notags, zeroARM, copyARM) |
| 39 | gen("arm64", notags, zeroARM64, copyARM64) |
| 40 | gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 41 | gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) |
Michał Derkacz | 150d244 | 2020-06-14 00:06:24 +0200 | [diff] [blame] | 42 | gen("riscv64", notags, zeroRISCV64, copyRISCV64) |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 43 | } |
| 44 | |
| 45 | func gen(arch string, tags, zero, copy func(io.Writer)) { |
| 46 | var buf bytes.Buffer |
| 47 | |
Brad Fitzpatrick | 6914b0e | 2017-06-10 20:08:18 -0700 | [diff] [blame] | 48 | fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 49 | fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") |
| 50 | fmt.Fprintln(&buf, "// See mkduff.go for comments.") |
| 51 | tags(&buf) |
| 52 | fmt.Fprintln(&buf, "#include \"textflag.h\"") |
| 53 | fmt.Fprintln(&buf) |
| 54 | zero(&buf) |
| 55 | fmt.Fprintln(&buf) |
| 56 | copy(&buf) |
| 57 | |
Russ Cox | 4f1b0a4 | 2020-10-29 14:17:47 -0400 | [diff] [blame] | 58 | if err := os.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 59 | log.Fatalln(err) |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | func notags(w io.Writer) { fmt.Fprintln(w) } |
| 64 | |
| 65 | func zeroAMD64(w io.Writer) { |
Cherry Zhang | 401d7e5 | 2021-01-29 13:46:34 -0500 | [diff] [blame] | 66 | // X15: zero |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 67 | // DI: ptr to memory to be zeroed |
| 68 | // DI is updated as a side effect. |
Cherry Zhang | 401d7e5 | 2021-01-29 13:46:34 -0500 | [diff] [blame] | 69 | fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT, $0-0") |
Ilya Tocar | 2421c6e | 2015-09-09 14:10:12 +0300 | [diff] [blame] | 70 | for i := 0; i < 16; i++ { |
Cherry Zhang | 401d7e5 | 2021-01-29 13:46:34 -0500 | [diff] [blame] | 71 | fmt.Fprintln(w, "\tMOVUPS\tX15,(DI)") |
| 72 | fmt.Fprintln(w, "\tMOVUPS\tX15,16(DI)") |
| 73 | fmt.Fprintln(w, "\tMOVUPS\tX15,32(DI)") |
| 74 | fmt.Fprintln(w, "\tMOVUPS\tX15,48(DI)") |
Ilya Tocar | df70982 | 2017-08-09 14:50:58 -0500 | [diff] [blame] | 75 | fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags |
Josh Bleecher Snyder | 7e0c11c | 2015-04-15 11:05:01 -0700 | [diff] [blame] | 76 | fmt.Fprintln(w) |
| 77 | } |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 78 | fmt.Fprintln(w, "\tRET") |
| 79 | } |
| 80 | |
| 81 | func copyAMD64(w io.Writer) { |
| 82 | // SI: ptr to source memory |
| 83 | // DI: ptr to destination memory |
| 84 | // SI and DI are updated as a side effect. |
| 85 | // |
| 86 | // This is equivalent to a sequence of MOVSQ but |
| 87 | // for some reason that is 3.5x slower than this code. |
Cherry Zhang | 401d7e5 | 2021-01-29 13:46:34 -0500 | [diff] [blame] | 88 | fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT, $0-0") |
Ilya Tocar | 5cf281a | 2015-09-22 14:32:05 +0300 | [diff] [blame] | 89 | for i := 0; i < 64; i++ { |
| 90 | fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") |
| 91 | fmt.Fprintln(w, "\tADDQ\t$16, SI") |
| 92 | fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") |
| 93 | fmt.Fprintln(w, "\tADDQ\t$16, DI") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 94 | fmt.Fprintln(w) |
| 95 | } |
| 96 | fmt.Fprintln(w, "\tRET") |
| 97 | } |
| 98 | |
| 99 | func zero386(w io.Writer) { |
| 100 | // AX: zero |
| 101 | // DI: ptr to memory to be zeroed |
| 102 | // DI is updated as a side effect. |
| 103 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 104 | for i := 0; i < 128; i++ { |
| 105 | fmt.Fprintln(w, "\tSTOSL") |
| 106 | } |
| 107 | fmt.Fprintln(w, "\tRET") |
| 108 | } |
| 109 | |
| 110 | func copy386(w io.Writer) { |
| 111 | // SI: ptr to source memory |
| 112 | // DI: ptr to destination memory |
| 113 | // SI and DI are updated as a side effect. |
| 114 | // |
| 115 | // This is equivalent to a sequence of MOVSL but |
| 116 | // for some reason MOVSL is really slow. |
| 117 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 118 | for i := 0; i < 128; i++ { |
| 119 | fmt.Fprintln(w, "\tMOVL\t(SI), CX") |
| 120 | fmt.Fprintln(w, "\tADDL\t$4, SI") |
| 121 | fmt.Fprintln(w, "\tMOVL\tCX, (DI)") |
| 122 | fmt.Fprintln(w, "\tADDL\t$4, DI") |
| 123 | fmt.Fprintln(w) |
| 124 | } |
| 125 | fmt.Fprintln(w, "\tRET") |
| 126 | } |
| 127 | |
| 128 | func zeroARM(w io.Writer) { |
| 129 | // R0: zero |
| 130 | // R1: ptr to memory to be zeroed |
| 131 | // R1 is updated as a side effect. |
| 132 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 133 | for i := 0; i < 128; i++ { |
| 134 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") |
| 135 | } |
| 136 | fmt.Fprintln(w, "\tRET") |
| 137 | } |
| 138 | |
| 139 | func copyARM(w io.Writer) { |
| 140 | // R0: scratch space |
| 141 | // R1: ptr to source memory |
| 142 | // R2: ptr to destination memory |
| 143 | // R1 and R2 are updated as a side effect |
| 144 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 145 | for i := 0; i < 128; i++ { |
| 146 | fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") |
| 147 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") |
| 148 | fmt.Fprintln(w) |
| 149 | } |
| 150 | fmt.Fprintln(w, "\tRET") |
| 151 | } |
| 152 | |
| 153 | func zeroARM64(w io.Writer) { |
| 154 | // ZR: always zero |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 155 | // R20: ptr to memory to be zeroed |
| 156 | // On return, R20 points to the last zeroed dword. |
Austin Clements | beeabbc | 2018-01-25 12:15:23 -0500 | [diff] [blame] | 157 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Wei Xiao | c02fc16 | 2017-07-27 01:55:03 +0000 | [diff] [blame] | 158 | for i := 0; i < 63; i++ { |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 159 | fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 160 | } |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 161 | fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 162 | fmt.Fprintln(w, "\tRET") |
| 163 | } |
| 164 | |
| 165 | func copyARM64(w io.Writer) { |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 166 | // R20: ptr to source memory |
| 167 | // R21: ptr to destination memory |
Meng Zhuo | 8916773 | 2018-03-06 02:47:09 +0000 | [diff] [blame] | 168 | // R26, R27 (aka REGTMP): scratch space |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 169 | // R20 and R21 are updated as a side effect |
Meng Zhuo | 8916773 | 2018-03-06 02:47:09 +0000 | [diff] [blame] | 170 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") |
| 171 | |
| 172 | for i := 0; i < 64; i++ { |
Cherry Zhang | 4ea7aa7 | 2019-06-25 14:48:04 -0400 | [diff] [blame] | 173 | fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)") |
| 174 | fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)") |
Cherry Zhang | 9d4b40f | 2016-09-27 08:57:02 -0400 | [diff] [blame] | 175 | fmt.Fprintln(w) |
| 176 | } |
| 177 | fmt.Fprintln(w, "\tRET") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 178 | } |
| 179 | |
| 180 | func tagsPPC64x(w io.Writer) { |
| 181 | fmt.Fprintln(w) |
Tobias Klauser | 2c76a6f | 2021-05-12 17:55:42 +0200 | [diff] [blame] | 182 | fmt.Fprintln(w, "//go:build ppc64 || ppc64le") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 183 | fmt.Fprintln(w, "// +build ppc64 ppc64le") |
| 184 | fmt.Fprintln(w) |
| 185 | } |
| 186 | |
| 187 | func zeroPPC64x(w io.Writer) { |
| 188 | // R0: always zero |
| 189 | // R3 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 190 | // On return, R3 points to the last zeroed dword. |
Michael Hudson-Doyle | 45c06b2 | 2015-10-09 12:44:27 +1300 | [diff] [blame] | 191 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 192 | for i := 0; i < 128; i++ { |
| 193 | fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") |
| 194 | } |
Michael Hudson-Doyle | 45c06b2 | 2015-10-09 12:44:27 +1300 | [diff] [blame] | 195 | fmt.Fprintln(w, "\tRET") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 196 | } |
| 197 | |
| 198 | func copyPPC64x(w io.Writer) { |
Cherry Zhang | 7939c43 | 2020-03-31 19:13:39 -0400 | [diff] [blame] | 199 | // duffcopy is not used on PPC64. |
| 200 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") |
| 201 | fmt.Fprintln(w, "\tUNDEF") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 202 | } |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 203 | |
| 204 | func tagsMIPS64x(w io.Writer) { |
| 205 | fmt.Fprintln(w) |
Tobias Klauser | 2c76a6f | 2021-05-12 17:55:42 +0200 | [diff] [blame] | 206 | fmt.Fprintln(w, "//go:build mips64 || mips64le") |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 207 | fmt.Fprintln(w, "// +build mips64 mips64le") |
| 208 | fmt.Fprintln(w) |
| 209 | } |
| 210 | |
| 211 | func zeroMIPS64x(w io.Writer) { |
| 212 | // R0: always zero |
| 213 | // R1 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 214 | // On return, R1 points to the last zeroed dword. |
Austin Clements | dfbf568 | 2018-01-25 12:15:23 -0500 | [diff] [blame] | 215 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 216 | for i := 0; i < 128; i++ { |
| 217 | fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)") |
| 218 | fmt.Fprintln(w, "\tADDV\t$8, R1") |
| 219 | } |
| 220 | fmt.Fprintln(w, "\tRET") |
| 221 | } |
| 222 | |
| 223 | func copyMIPS64x(w io.Writer) { |
Meng Zhuo | 307544f | 2019-07-08 23:45:44 +0800 | [diff] [blame] | 224 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") |
| 225 | for i := 0; i < 128; i++ { |
| 226 | fmt.Fprintln(w, "\tMOVV\t(R1), R23") |
| 227 | fmt.Fprintln(w, "\tADDV\t$8, R1") |
| 228 | fmt.Fprintln(w, "\tMOVV\tR23, (R2)") |
| 229 | fmt.Fprintln(w, "\tADDV\t$8, R2") |
| 230 | fmt.Fprintln(w) |
| 231 | } |
| 232 | fmt.Fprintln(w, "\tRET") |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 233 | } |
Michał Derkacz | 150d244 | 2020-06-14 00:06:24 +0200 | [diff] [blame] | 234 | |
| 235 | func zeroRISCV64(w io.Writer) { |
| 236 | // ZERO: always zero |
| 237 | // X10: ptr to memory to be zeroed |
| 238 | // X10 is updated as a side effect. |
| 239 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
| 240 | for i := 0; i < 128; i++ { |
| 241 | fmt.Fprintln(w, "\tMOV\tZERO, (X10)") |
| 242 | fmt.Fprintln(w, "\tADD\t$8, X10") |
| 243 | } |
| 244 | fmt.Fprintln(w, "\tRET") |
| 245 | } |
| 246 | |
| 247 | func copyRISCV64(w io.Writer) { |
| 248 | // X10: ptr to source memory |
| 249 | // X11: ptr to destination memory |
| 250 | // X10 and X11 are updated as a side effect |
| 251 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") |
| 252 | for i := 0; i < 128; i++ { |
| 253 | fmt.Fprintln(w, "\tMOV\t(X10), X31") |
| 254 | fmt.Fprintln(w, "\tADD\t$8, X10") |
| 255 | fmt.Fprintln(w, "\tMOV\tX31, (X11)") |
| 256 | fmt.Fprintln(w, "\tADD\t$8, X11") |
| 257 | fmt.Fprintln(w) |
| 258 | } |
| 259 | fmt.Fprintln(w, "\tRET") |
| 260 | } |