Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // +build ignore |
| 6 | |
| 7 | // runtime·duffzero is a Duff's device for zeroing memory. |
| 8 | // The compiler jumps to computed addresses within |
| 9 | // the routine to zero chunks of memory. |
| 10 | // Do not change duffzero without also |
| 11 | // changing clearfat in cmd/?g/ggen.go. |
| 12 | |
| 13 | // runtime·duffcopy is a Duff's device for copying memory. |
| 14 | // The compiler jumps to computed addresses within |
| 15 | // the routine to copy chunks of memory. |
| 16 | // Source and destination must not overlap. |
| 17 | // Do not change duffcopy without also |
Shenghou Ma | e7dd288 | 2015-04-08 13:34:42 -0400 | [diff] [blame] | 18 | // changing blockcopy in cmd/?g/cgen.go. |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 19 | |
| 20 | // See the zero* and copy* generators below |
| 21 | // for architecture-specific comments. |
| 22 | |
| 23 | // mkduff generates duff_*.s. |
| 24 | package main |
| 25 | |
| 26 | import ( |
| 27 | "bytes" |
| 28 | "fmt" |
| 29 | "io" |
| 30 | "io/ioutil" |
| 31 | "log" |
| 32 | ) |
| 33 | |
| 34 | func main() { |
| 35 | gen("amd64", notags, zeroAMD64, copyAMD64) |
| 36 | gen("386", notags, zero386, copy386) |
| 37 | gen("arm", notags, zeroARM, copyARM) |
| 38 | gen("arm64", notags, zeroARM64, copyARM64) |
| 39 | gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) |
| 40 | } |
| 41 | |
| 42 | func gen(arch string, tags, zero, copy func(io.Writer)) { |
| 43 | var buf bytes.Buffer |
| 44 | |
| 45 | fmt.Fprintln(&buf, "// AUTO-GENERATED by mkduff.go") |
| 46 | fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") |
| 47 | fmt.Fprintln(&buf, "// See mkduff.go for comments.") |
| 48 | tags(&buf) |
| 49 | fmt.Fprintln(&buf, "#include \"textflag.h\"") |
| 50 | fmt.Fprintln(&buf) |
| 51 | zero(&buf) |
| 52 | fmt.Fprintln(&buf) |
| 53 | copy(&buf) |
| 54 | |
| 55 | if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { |
| 56 | log.Fatalln(err) |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | func notags(w io.Writer) { fmt.Fprintln(w) } |
| 61 | |
| 62 | func zeroAMD64(w io.Writer) { |
Ilya Tocar | 2421c6e | 2015-09-09 14:10:12 +0300 | [diff] [blame] | 63 | // X0: zero |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 64 | // DI: ptr to memory to be zeroed |
| 65 | // DI is updated as a side effect. |
| 66 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
Ilya Tocar | 2421c6e | 2015-09-09 14:10:12 +0300 | [diff] [blame] | 67 | for i := 0; i < 16; i++ { |
| 68 | fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)") |
| 69 | fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)") |
| 70 | fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)") |
| 71 | fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)") |
| 72 | fmt.Fprintln(w, "\tADDQ\t$64,DI") |
Josh Bleecher Snyder | 7e0c11c | 2015-04-15 11:05:01 -0700 | [diff] [blame] | 73 | fmt.Fprintln(w) |
| 74 | } |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 75 | fmt.Fprintln(w, "\tRET") |
| 76 | } |
| 77 | |
| 78 | func copyAMD64(w io.Writer) { |
| 79 | // SI: ptr to source memory |
| 80 | // DI: ptr to destination memory |
| 81 | // SI and DI are updated as a side effect. |
| 82 | // |
| 83 | // This is equivalent to a sequence of MOVSQ but |
| 84 | // for some reason that is 3.5x slower than this code. |
| 85 | // The STOSQ in duffzero seem fine, though. |
| 86 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
Ilya Tocar | 5cf281a | 2015-09-22 14:32:05 +0300 | [diff] [blame] | 87 | for i := 0; i < 64; i++ { |
| 88 | fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") |
| 89 | fmt.Fprintln(w, "\tADDQ\t$16, SI") |
| 90 | fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") |
| 91 | fmt.Fprintln(w, "\tADDQ\t$16, DI") |
Josh Bleecher Snyder | ad36009 | 2015-03-31 09:19:10 -0700 | [diff] [blame] | 92 | fmt.Fprintln(w) |
| 93 | } |
| 94 | fmt.Fprintln(w, "\tRET") |
| 95 | } |
| 96 | |
| 97 | func zero386(w io.Writer) { |
| 98 | // AX: zero |
| 99 | // DI: ptr to memory to be zeroed |
| 100 | // DI is updated as a side effect. |
| 101 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 102 | for i := 0; i < 128; i++ { |
| 103 | fmt.Fprintln(w, "\tSTOSL") |
| 104 | } |
| 105 | fmt.Fprintln(w, "\tRET") |
| 106 | } |
| 107 | |
| 108 | func copy386(w io.Writer) { |
| 109 | // SI: ptr to source memory |
| 110 | // DI: ptr to destination memory |
| 111 | // SI and DI are updated as a side effect. |
| 112 | // |
| 113 | // This is equivalent to a sequence of MOVSL but |
| 114 | // for some reason MOVSL is really slow. |
| 115 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 116 | for i := 0; i < 128; i++ { |
| 117 | fmt.Fprintln(w, "\tMOVL\t(SI), CX") |
| 118 | fmt.Fprintln(w, "\tADDL\t$4, SI") |
| 119 | fmt.Fprintln(w, "\tMOVL\tCX, (DI)") |
| 120 | fmt.Fprintln(w, "\tADDL\t$4, DI") |
| 121 | fmt.Fprintln(w) |
| 122 | } |
| 123 | fmt.Fprintln(w, "\tRET") |
| 124 | } |
| 125 | |
| 126 | func zeroARM(w io.Writer) { |
| 127 | // R0: zero |
| 128 | // R1: ptr to memory to be zeroed |
| 129 | // R1 is updated as a side effect. |
| 130 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") |
| 131 | for i := 0; i < 128; i++ { |
| 132 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") |
| 133 | } |
| 134 | fmt.Fprintln(w, "\tRET") |
| 135 | } |
| 136 | |
| 137 | func copyARM(w io.Writer) { |
| 138 | // R0: scratch space |
| 139 | // R1: ptr to source memory |
| 140 | // R2: ptr to destination memory |
| 141 | // R1 and R2 are updated as a side effect |
| 142 | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") |
| 143 | for i := 0; i < 128; i++ { |
| 144 | fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") |
| 145 | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") |
| 146 | fmt.Fprintln(w) |
| 147 | } |
| 148 | fmt.Fprintln(w, "\tRET") |
| 149 | } |
| 150 | |
| 151 | func zeroARM64(w io.Writer) { |
| 152 | // ZR: always zero |
| 153 | // R16 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 154 | // On return, R16 points to the last zeroed dword. |
| 155 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0") |
| 156 | for i := 0; i < 128; i++ { |
| 157 | fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)") |
| 158 | } |
| 159 | fmt.Fprintln(w, "\tRET") |
| 160 | } |
| 161 | |
| 162 | func copyARM64(w io.Writer) { |
| 163 | fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") |
| 164 | } |
| 165 | |
| 166 | func tagsPPC64x(w io.Writer) { |
| 167 | fmt.Fprintln(w) |
| 168 | fmt.Fprintln(w, "// +build ppc64 ppc64le") |
| 169 | fmt.Fprintln(w) |
| 170 | } |
| 171 | |
| 172 | func zeroPPC64x(w io.Writer) { |
| 173 | // R0: always zero |
| 174 | // R3 (aka REGRT1): ptr to memory to be zeroed - 8 |
| 175 | // On return, R3 points to the last zeroed dword. |
| 176 | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0") |
| 177 | for i := 0; i < 128; i++ { |
| 178 | fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") |
| 179 | } |
| 180 | fmt.Fprintln(w, "\tRETURN") |
| 181 | } |
| 182 | |
| 183 | func copyPPC64x(w io.Writer) { |
| 184 | fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") |
| 185 | } |