Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // +build ignore |
| 6 | |
| 7 | // runtime·duffzero is a Duff's device for zeroing memory. |
| 8 | // The compiler jumps to computed addresses within |
| 9 | // the routine to zero chunks of memory. |
| 10 | // Do not change duffzero without also |
Emmanuel Odeke | 898ca6b | 2016-09-11 19:05:29 -0700 | [diff] [blame] | 11 | // changing the uses in cmd/compile/internal/*/*.go. |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 12 | |
| 13 | // runtime·duffcopy is a Duff's device for copying memory. |
| 14 | // The compiler jumps to computed addresses within |
| 15 | // the routine to copy chunks of memory. |
| 16 | // Source and destination must not overlap. |
| 17 | // Do not change duffcopy without also |
Emmanuel Odeke | 898ca6b | 2016-09-11 19:05:29 -0700 | [diff] [blame] | 18 | // changing the uses in cmd/compile/internal/*/*.go. |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 19 | |
| 20 | // See the zero* and copy* generators below |
| 21 | // for architecture-specific comments. |
| 22 | |
| 23 | // mkduff generates duff_*.s. |
| 24 | package main |
| 25 | |
| 26 | import ( |
| 27 | "bytes" |
| 28 | "fmt" |
| 29 | "io" |
| 30 | "io/ioutil" |
| 31 | "log" |
| 32 | ) |
| 33 | |
| 34 | func main() { |
| 35 | gen("amd64", notags, zeroAMD64, copyAMD64) |
| 36 | gen("386", notags, zero386, copy386) |
| 37 | gen("arm", notags, zeroARM, copyARM) |
| 38 | gen("arm64", notags, zeroARM64, copyARM64) |
| 39 | gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 40 | gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 41 | } |
| 42 | |
| 43 | func gen(arch string, tags, zero, copy func(io.Writer)) { |
| 44 | var buf bytes.Buffer |
| 45 | |
Brad Fitzpatrick | 6914b0e | 2017-06-10 20:08:18 -0700 | [diff] [blame] | 46 | fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 47 | fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") |
| 48 | fmt.Fprintln(&buf, "// See mkduff.go for comments.") |
| 49 | tags(&buf) |
| 50 | fmt.Fprintln(&buf, "#include \"textflag.h\"") |
| 51 | fmt.Fprintln(&buf) |
| 52 | zero(&buf) |
| 53 | fmt.Fprintln(&buf) |
| 54 | copy(&buf) |
| 55 | |
| 56 | if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { |
| 57 | log.Fatalln(err) |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | func notags(w io.Writer) { fmt.Fprintln(w) } |
| 62 | |
| 63 | func zeroAMD64(w io.Writer) { |
Ilya Tocar | 2421c6e | 2015-09-09 14:10:12 +0300 | [diff] [blame] | 64 | // X0: zero |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 65 | // DI: ptr to memory to be zeroed |
| 66 | // DI is updated as a side effect. |
| 67 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
Ilya Tocar | 2421c6e | 2015-09-09 14:10:12 +0300 | [diff] [blame] | 68 | for i := 0; i < 16; i++ { |
| 69 | fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)") |
| 70 | fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)") |
| 71 | fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)") |
| 72 | fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)") |
Ilya Tocar | df70982 | 2017-08-09 14:50:58 -0500 | [diff] [blame] | 73 | fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags |
Josh Bleecher Snyder | 7e0c11c | 2015-04-15 11:05:01 -0700 | [diff] [blame] | 74 | fmt.Fprintln(w) |
| 75 | } |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 76 | fmt.Fprintln(w, "\tRET") |
| 77 | } |
| 78 | |
| 79 | func copyAMD64(w io.Writer) { |
| 80 | // SI: ptr to source memory |
| 81 | // DI: ptr to destination memory |
| 82 | // SI and DI are updated as a side effect. |
| 83 | // |
| 84 | // This is equivalent to a sequence of MOVSQ but |
| 85 | // for some reason that is 3.5x slower than this code. |
| 86 | // The STOSQ in duffzero seem fine, though. |
| 87 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
Ilya Tocar | 5cf281a | 2015-09-22 14:32:05 +0300 | [diff] [blame] | 88 | for i := 0; i < 64; i++ { |
| 89 | fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") |
| 90 | fmt.Fprintln(w, "\tADDQ\t$16, SI") |
| 91 | fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") |
| 92 | fmt.Fprintln(w, "\tADDQ\t$16, DI") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 93 | fmt.Fprintln(w) |
| 94 | } |
| 95 | fmt.Fprintln(w, "\tRET") |
| 96 | } |
| 97 | |
| 98 | func zero386(w io.Writer) { |
| 99 | // AX: zero |
| 100 | // DI: ptr to memory to be zeroed |
| 101 | // DI is updated as a side effect. |
| 102 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 103 | for i := 0; i < 128; i++ { |
| 104 | fmt.Fprintln(w, "\tSTOSL") |
| 105 | } |
| 106 | fmt.Fprintln(w, "\tRET") |
| 107 | } |
| 108 | |
| 109 | func copy386(w io.Writer) { |
| 110 | // SI: ptr to source memory |
| 111 | // DI: ptr to destination memory |
| 112 | // SI and DI are updated as a side effect. |
| 113 | // |
| 114 | // This is equivalent to a sequence of MOVSL but |
| 115 | // for some reason MOVSL is really slow. |
| 116 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 117 | for i := 0; i < 128; i++ { |
| 118 | fmt.Fprintln(w, "\tMOVL\t(SI), CX") |
| 119 | fmt.Fprintln(w, "\tADDL\t$4, SI") |
| 120 | fmt.Fprintln(w, "\tMOVL\tCX, (DI)") |
| 121 | fmt.Fprintln(w, "\tADDL\t$4, DI") |
| 122 | fmt.Fprintln(w) |
| 123 | } |
| 124 | fmt.Fprintln(w, "\tRET") |
| 125 | } |
| 126 | |
| 127 | func zeroARM(w io.Writer) { |
| 128 | // R0: zero |
| 129 | // R1: ptr to memory to be zeroed |
| 130 | // R1 is updated as a side effect. |
| 131 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 132 | for i := 0; i < 128; i++ { |
| 133 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") |
| 134 | } |
| 135 | fmt.Fprintln(w, "\tRET") |
| 136 | } |
| 137 | |
| 138 | func copyARM(w io.Writer) { |
| 139 | // R0: scratch space |
| 140 | // R1: ptr to source memory |
| 141 | // R2: ptr to destination memory |
| 142 | // R1 and R2 are updated as a side effect |
| 143 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 144 | for i := 0; i < 128; i++ { |
| 145 | fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") |
| 146 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") |
| 147 | fmt.Fprintln(w) |
| 148 | } |
| 149 | fmt.Fprintln(w, "\tRET") |
| 150 | } |
| 151 | |
| 152 | func zeroARM64(w io.Writer) { |
| 153 | // ZR: always zero |
Wei Xiao | c02fc16 | 2017-07-27 01:55:03 +0000 | [diff] [blame] | 154 | // R16 (aka REGRT1): ptr to memory to be zeroed |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 155 | // On return, R16 points to the last zeroed dword. |
Austin Clements | beeabbc | 2018-01-25 12:15:23 -0500 | [diff] [blame] | 156 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Wei Xiao | c02fc16 | 2017-07-27 01:55:03 +0000 | [diff] [blame] | 157 | for i := 0; i < 63; i++ { |
| 158 | fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 159 | } |
Wei Xiao | c02fc16 | 2017-07-27 01:55:03 +0000 | [diff] [blame] | 160 | fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 161 | fmt.Fprintln(w, "\tRET") |
| 162 | } |
| 163 | |
| 164 | func copyARM64(w io.Writer) { |
Cherry Zhang | 9d4b40f | 2016-09-27 08:57:02 -0400 | [diff] [blame] | 165 | // R16 (aka REGRT1): ptr to source memory |
| 166 | // R17 (aka REGRT2): ptr to destination memory |
Meng Zhuo | 8916773 | 2018-03-06 02:47:09 +0000 | [diff] [blame] | 167 | // R26, R27 (aka REGTMP): scratch space |
Cherry Zhang | 9d4b40f | 2016-09-27 08:57:02 -0400 | [diff] [blame] | 168 | // R16 and R17 are updated as a side effect |
Meng Zhuo | 8916773 | 2018-03-06 02:47:09 +0000 | [diff] [blame] | 169 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") |
| 170 | |
| 171 | for i := 0; i < 64; i++ { |
| 172 | fmt.Fprintln(w, "\tLDP.P\t16(R16), (R26, R27)") |
| 173 | fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R17)") |
Cherry Zhang | 9d4b40f | 2016-09-27 08:57:02 -0400 | [diff] [blame] | 174 | fmt.Fprintln(w) |
| 175 | } |
| 176 | fmt.Fprintln(w, "\tRET") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | func tagsPPC64x(w io.Writer) { |
| 180 | fmt.Fprintln(w) |
| 181 | fmt.Fprintln(w, "// +build ppc64 ppc64le") |
| 182 | fmt.Fprintln(w) |
| 183 | } |
| 184 | |
| 185 | func zeroPPC64x(w io.Writer) { |
| 186 | // R0: always zero |
| 187 | // R3 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 188 | // On return, R3 points to the last zeroed dword. |
Michael Hudson-Doyle | 45c06b2 | 2015-10-09 12:44:27 +1300 | [diff] [blame] | 189 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 190 | for i := 0; i < 128; i++ { |
| 191 | fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") |
| 192 | } |
Michael Hudson-Doyle | 45c06b2 | 2015-10-09 12:44:27 +1300 | [diff] [blame] | 193 | fmt.Fprintln(w, "\tRET") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 194 | } |
| 195 | |
| 196 | func copyPPC64x(w io.Writer) { |
| 197 | fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") |
| 198 | } |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 199 | |
| 200 | func tagsMIPS64x(w io.Writer) { |
| 201 | fmt.Fprintln(w) |
| 202 | fmt.Fprintln(w, "// +build mips64 mips64le") |
| 203 | fmt.Fprintln(w) |
| 204 | } |
| 205 | |
| 206 | func zeroMIPS64x(w io.Writer) { |
| 207 | // R0: always zero |
| 208 | // R1 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 209 | // On return, R1 points to the last zeroed dword. |
Austin Clements | dfbf568 | 2018-01-25 12:15:23 -0500 | [diff] [blame] | 210 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") |
Yao Zhang | 424738e | 2015-09-10 11:25:58 -0400 | [diff] [blame] | 211 | for i := 0; i < 128; i++ { |
| 212 | fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)") |
| 213 | fmt.Fprintln(w, "\tADDV\t$8, R1") |
| 214 | } |
| 215 | fmt.Fprintln(w, "\tRET") |
| 216 | } |
| 217 | |
| 218 | func copyMIPS64x(w io.Writer) { |
| 219 | fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") |
| 220 | } |