|  | // Copyright 2015 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | //go:build ignore | 
|  |  | 
|  | // runtime·duffzero is a Duff's device for zeroing memory. | 
|  | // The compiler jumps to computed addresses within | 
|  | // the routine to zero chunks of memory. | 
|  | // Do not change duffzero without also | 
|  | // changing the uses in cmd/compile/internal/*/*.go. | 
|  |  | 
|  | // runtime·duffcopy is a Duff's device for copying memory. | 
|  | // The compiler jumps to computed addresses within | 
|  | // the routine to copy chunks of memory. | 
|  | // Source and destination must not overlap. | 
|  | // Do not change duffcopy without also | 
|  | // changing the uses in cmd/compile/internal/*/*.go. | 
|  |  | 
|  | // See the zero* and copy* generators below | 
|  | // for architecture-specific comments. | 
|  |  | 
|  | // mkduff generates duff_*.s. | 
|  | package main | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "fmt" | 
|  | "io" | 
|  | "log" | 
|  | "os" | 
|  | ) | 
|  |  | 
|  | func main() { | 
|  | gen("amd64", notags, zeroAMD64, copyAMD64) | 
|  | gen("386", notags, zero386, copy386) | 
|  | gen("arm", notags, zeroARM, copyARM) | 
|  | gen("arm64", notags, zeroARM64, copyARM64) | 
|  | gen("loong64", notags, zeroLOONG64, copyLOONG64) | 
|  | gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) | 
|  | gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) | 
|  | gen("riscv64", notags, zeroRISCV64, copyRISCV64) | 
|  | } | 
|  |  | 
|  | func gen(arch string, tags, zero, copy func(io.Writer)) { | 
|  | var buf bytes.Buffer | 
|  |  | 
|  | fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.") | 
|  | fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") | 
|  | fmt.Fprintln(&buf, "// See mkduff.go for comments.") | 
|  | tags(&buf) | 
|  | fmt.Fprintln(&buf, "#include \"textflag.h\"") | 
|  | fmt.Fprintln(&buf) | 
|  | zero(&buf) | 
|  | fmt.Fprintln(&buf) | 
|  | copy(&buf) | 
|  |  | 
|  | if err := os.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { | 
|  | log.Fatalln(err) | 
|  | } | 
|  | } | 
|  |  | 
|  | func notags(w io.Writer) { fmt.Fprintln(w) } | 
|  |  | 
|  | func zeroAMD64(w io.Writer) { | 
|  | // X15: zero | 
|  | // DI: ptr to memory to be zeroed | 
|  | // DI is updated as a side effect. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 16; i++ { | 
|  | fmt.Fprintln(w, "\tMOVUPS\tX15,(DI)") | 
|  | fmt.Fprintln(w, "\tMOVUPS\tX15,16(DI)") | 
|  | fmt.Fprintln(w, "\tMOVUPS\tX15,32(DI)") | 
|  | fmt.Fprintln(w, "\tMOVUPS\tX15,48(DI)") | 
|  | fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyAMD64(w io.Writer) { | 
|  | // SI: ptr to source memory | 
|  | // DI: ptr to destination memory | 
|  | // SI and DI are updated as a side effect. | 
|  | // | 
|  | // This is equivalent to a sequence of MOVSQ but | 
|  | // for some reason that is 3.5x slower than this code. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 64; i++ { | 
|  | fmt.Fprintln(w, "\tMOVUPS\t(SI), X0") | 
|  | fmt.Fprintln(w, "\tADDQ\t$16, SI") | 
|  | fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)") | 
|  | fmt.Fprintln(w, "\tADDQ\t$16, DI") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func zero386(w io.Writer) { | 
|  | // AX: zero | 
|  | // DI: ptr to memory to be zeroed | 
|  | // DI is updated as a side effect. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tSTOSL") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copy386(w io.Writer) { | 
|  | // SI: ptr to source memory | 
|  | // DI: ptr to destination memory | 
|  | // SI and DI are updated as a side effect. | 
|  | // | 
|  | // This is equivalent to a sequence of MOVSL but | 
|  | // for some reason MOVSL is really slow. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVL\t(SI), CX") | 
|  | fmt.Fprintln(w, "\tADDL\t$4, SI") | 
|  | fmt.Fprintln(w, "\tMOVL\tCX, (DI)") | 
|  | fmt.Fprintln(w, "\tADDL\t$4, DI") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func zeroARM(w io.Writer) { | 
|  | // R0: zero | 
|  | // R1: ptr to memory to be zeroed | 
|  | // R1 is updated as a side effect. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyARM(w io.Writer) { | 
|  | // R0: scratch space | 
|  | // R1: ptr to source memory | 
|  | // R2: ptr to destination memory | 
|  | // R1 and R2 are updated as a side effect | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") | 
|  | fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func zeroARM64(w io.Writer) { | 
|  | // ZR: always zero | 
|  | // R20: ptr to memory to be zeroed | 
|  | // On return, R20 points to the last zeroed dword. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 63; i++ { | 
|  | fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R20)") | 
|  | } | 
|  | fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R20)") | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyARM64(w io.Writer) { | 
|  | // R20: ptr to source memory | 
|  | // R21: ptr to destination memory | 
|  | // R26, R27 (aka REGTMP): scratch space | 
|  | // R20 and R21 are updated as a side effect | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  |  | 
|  | for i := 0; i < 64; i++ { | 
|  | fmt.Fprintln(w, "\tLDP.P\t16(R20), (R26, R27)") | 
|  | fmt.Fprintln(w, "\tSTP.P\t(R26, R27), 16(R21)") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func zeroLOONG64(w io.Writer) { | 
|  | // R0: always zero | 
|  | // R19 (aka REGRT1): ptr to memory to be zeroed - 8 | 
|  | // On return, R19 points to the last zeroed dword. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVV\tR0, 8(R19)") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R19") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyLOONG64(w io.Writer) { | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVV\t(R19), R30") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R19") | 
|  | fmt.Fprintln(w, "\tMOVV\tR30, (R20)") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R20") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func tagsPPC64x(w io.Writer) { | 
|  | fmt.Fprintln(w) | 
|  | fmt.Fprintln(w, "//go:build ppc64 || ppc64le") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  |  | 
|  | func zeroPPC64x(w io.Writer) { | 
|  | // R0: always zero | 
|  | // R3 (aka REGRT1): ptr to memory to be zeroed - 8 | 
|  | // On return, R3 points to the last zeroed dword. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVDU\tR0, 8(R20)") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyPPC64x(w io.Writer) { | 
|  | // duffcopy is not used on PPC64. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVDU\t8(R20), R5") | 
|  | fmt.Fprintln(w, "\tMOVDU\tR5, 8(R21)") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func tagsMIPS64x(w io.Writer) { | 
|  | fmt.Fprintln(w) | 
|  | fmt.Fprintln(w, "//go:build mips64 || mips64le") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  |  | 
|  | func zeroMIPS64x(w io.Writer) { | 
|  | // R0: always zero | 
|  | // R1 (aka REGRT1): ptr to memory to be zeroed - 8 | 
|  | // On return, R1 points to the last zeroed dword. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R1") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyMIPS64x(w io.Writer) { | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOVV\t(R1), R23") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R1") | 
|  | fmt.Fprintln(w, "\tMOVV\tR23, (R2)") | 
|  | fmt.Fprintln(w, "\tADDV\t$8, R2") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func zeroRISCV64(w io.Writer) { | 
|  | // ZERO: always zero | 
|  | // X25: ptr to memory to be zeroed | 
|  | // X25 is updated as a side effect. | 
|  | fmt.Fprintln(w, "TEXT runtime·duffzero<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOV\tZERO, (X25)") | 
|  | fmt.Fprintln(w, "\tADD\t$8, X25") | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } | 
|  |  | 
|  | func copyRISCV64(w io.Writer) { | 
|  | // X24: ptr to source memory | 
|  | // X25: ptr to destination memory | 
|  | // X24 and X25 are updated as a side effect | 
|  | fmt.Fprintln(w, "TEXT runtime·duffcopy<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-0") | 
|  | for i := 0; i < 128; i++ { | 
|  | fmt.Fprintln(w, "\tMOV\t(X24), X31") | 
|  | fmt.Fprintln(w, "\tADD\t$8, X24") | 
|  | fmt.Fprintln(w, "\tMOV\tX31, (X25)") | 
|  | fmt.Fprintln(w, "\tADD\t$8, X25") | 
|  | fmt.Fprintln(w) | 
|  | } | 
|  | fmt.Fprintln(w, "\tRET") | 
|  | } |