| // Copyright 2024 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | // This code was translated into a form compatible with 6a from the public | 
 | // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html | 
 |  | 
 | package main | 
 |  | 
 | import ( | 
 | 	. "github.com/mmcloughlin/avo/build" | 
 | 	"github.com/mmcloughlin/avo/ir" | 
 | 	. "github.com/mmcloughlin/avo/operand" | 
 | 	. "github.com/mmcloughlin/avo/reg" | 
 | 	_ "golang.org/x/crypto/salsa20/salsa" | 
 | ) | 
 |  | 
 | //go:generate go run . -out ../salsa20_amd64.s -pkg salsa | 
 |  | 
 | func main() { | 
 | 	Package("golang.org/x/crypto/salsa20/salsa") | 
 | 	ConstraintExpr("amd64,!purego,gc") | 
 | 	salsa2020XORKeyStream() | 
 | 	Generate() | 
 | } | 
 |  | 
 | func salsa2020XORKeyStream() { | 
 | 	Implement("salsa2020XORKeyStream") | 
 | 	Attributes(0) | 
 | 	AllocLocal(456) // frame = 424 + 32 byte alignment | 
 | 	Comment("This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.") | 
 |  | 
 | 	Load(Param("out"), RDI) | 
 | 	Load(Param("in"), RSI) | 
 | 	Load(Param("n"), RDX) | 
 | 	Load(Param("nonce"), RCX) | 
 | 	Load(Param("key"), R8) | 
 |  | 
 | 	MOVQ(RSP, R12) | 
 | 	ADDQ(Imm(31), R12) | 
 | 	ANDQ(I32(^31), R12) | 
 |  | 
 | 	MOVQ(RDX, R9) | 
 | 	MOVQ(RCX, RDX) | 
 | 	MOVQ(R8, R10) | 
 | 	CMPQ(R9, Imm(0)) | 
 | 	JBE(LabelRef("DONE")) | 
 |  | 
 | 	START() | 
 | 	BYTESATLEAST256() | 
 | 	MAINLOOP1() | 
 | 	BYTESBETWEEN1AND255() | 
 | 	NOCOPY() | 
 | 	MAINLOOP2() | 
 |  | 
 | 	Label("BYTESATLEAST64") | 
 | 	Label("DONE") | 
 | 	RET() | 
 | 	Label("BYTESATLEAST65") | 
 | 	SUBQ(Imm(64), R9) | 
 | 	ADDQ(Imm(64), RDI) | 
 | 	ADDQ(Imm(64), RSI) | 
 | 	JMP(LabelRef("BYTESBETWEEN1AND255")) | 
 | } | 
 |  | 
 | func START() { | 
 | 	Label("START") | 
 | 	MOVL(Mem{Base: R10}.Offset(20), ECX) | 
 | 	MOVL(Mem{Base: R10}.Offset(0), R8L) | 
 | 	MOVL(Mem{Base: EDX}.Offset(0), EAX) | 
 | 	MOVL(Mem{Base: R10}.Offset(16), R11L) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(0)) | 
 | 	MOVL(R8L, Mem{Base: R12}.Offset(4)) | 
 | 	MOVL(EAX, Mem{Base: R12}.Offset(8)) | 
 | 	MOVL(R11L, Mem{Base: R12}.Offset(12)) | 
 | 	MOVL(Mem{Base: EDX}.Offset(8), ECX) | 
 | 	MOVL(Mem{Base: R10}.Offset(24), R8L) | 
 | 	MOVL(Mem{Base: R10}.Offset(4), EAX) | 
 | 	MOVL(Mem{Base: EDX}.Offset(4), R11L) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(16)) | 
 | 	MOVL(R8L, Mem{Base: R12}.Offset(20)) | 
 | 	MOVL(EAX, Mem{Base: R12}.Offset(24)) | 
 | 	MOVL(R11L, Mem{Base: R12}.Offset(28)) | 
 | 	MOVL(Mem{Base: EDX}.Offset(12), ECX) | 
 | 	MOVL(Mem{Base: R10}.Offset(12), EDX) | 
 | 	MOVL(Mem{Base: R10}.Offset(28), R8L) | 
 | 	MOVL(Mem{Base: R10}.Offset(8), EAX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(32)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(36)) | 
 | 	MOVL(R8L, Mem{Base: R12}.Offset(40)) | 
 | 	MOVL(EAX, Mem{Base: R12}.Offset(44)) | 
 | 	MOVQ(Imm(1634760805), RDX) | 
 | 	MOVQ(Imm(857760878), RCX) | 
 | 	MOVQ(Imm(2036477234), R8) | 
 | 	MOVQ(Imm(1797285236), RAX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(48)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(52)) | 
 | 	MOVL(R8L, Mem{Base: R12}.Offset(56)) | 
 | 	MOVL(EAX, Mem{Base: R12}.Offset(60)) | 
 | 	CMPQ(R9, U32(256)) | 
 | 	JB(LabelRef("BYTESBETWEEN1AND255")) | 
 | 	MOVOA(Mem{Base: R12}.Offset(48), X0) | 
 | 	PSHUFL(Imm(0x55), X0, X1) | 
 | 	PSHUFL(Imm(0xAA), X0, X2) | 
 | 	PSHUFL(Imm(0xFF), X0, X3) | 
 | 	PSHUFL(Imm(0x00), X0, X0) | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(64)) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(80)) | 
 | 	MOVOA(X3, Mem{Base: R12}.Offset(96)) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(112)) | 
 | 	MOVOA(Mem{Base: R12}.Offset(0), X0) | 
 | 	PSHUFL(Imm(0xAA), X0, X1) | 
 | 	PSHUFL(Imm(0xFF), X0, X2) | 
 | 	PSHUFL(Imm(0x00), X0, X3) | 
 | 	PSHUFL(Imm(0x55), X0, X0) | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(128)) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(144)) | 
 | 	MOVOA(X3, Mem{Base: R12}.Offset(160)) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(176)) | 
 | 	MOVOA(Mem{Base: R12}.Offset(16), X0) | 
 | 	PSHUFL(Imm(0xFF), X0, X1) | 
 | 	PSHUFL(Imm(0x55), X0, X2) | 
 | 	PSHUFL(Imm(0xAA), X0, X0) | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(192)) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(208)) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(224)) | 
 | 	MOVOA(Mem{Base: R12}.Offset(32), X0) | 
 | 	PSHUFL(Imm(0x00), X0, X1) | 
 | 	PSHUFL(Imm(0xAA), X0, X2) | 
 | 	PSHUFL(Imm(0xFF), X0, X0) | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(240)) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(256)) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(272)) | 
 |  | 
 | } | 
 |  | 
 | func BYTESATLEAST256() { | 
 | 	Label("BYTESATLEAST256") | 
 | 	MOVL(Mem{Base: R12}.Offset(16), EDX) | 
 | 	MOVL(Mem{Base: R12}.Offset(36), ECX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(288)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(304)) | 
 | 	SHLQ(Imm(32), RCX) | 
 | 	ADDQ(RCX, RDX) | 
 | 	ADDQ(Imm(1), RDX) | 
 | 	MOVQ(RDX, RCX) | 
 | 	SHRQ(Imm(32), RCX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(292)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(308)) | 
 | 	ADDQ(Imm(1), RDX) | 
 | 	MOVQ(RDX, RCX) | 
 | 	SHRQ(Imm(32), RCX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(296)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(312)) | 
 | 	ADDQ(Imm(1), RDX) | 
 | 	MOVQ(RDX, RCX) | 
 | 	SHRQ(Imm(32), RCX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(300)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(316)) | 
 | 	ADDQ(Imm(1), RDX) | 
 | 	MOVQ(RDX, RCX) | 
 | 	SHRQ(Imm(32), RCX) | 
 | 	MOVL(EDX, Mem{Base: R12}.Offset(16)) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(36)) | 
 | 	MOVQ(R9, Mem{Base: R12}.Offset(352)) | 
 | 	MOVQ(U32(20), RDX) | 
 | 	MOVOA(Mem{Base: R12}.Offset(64), X0) | 
 | 	MOVOA(Mem{Base: R12}.Offset(80), X1) | 
 | 	MOVOA(Mem{Base: R12}.Offset(96), X2) | 
 | 	MOVOA(Mem{Base: R12}.Offset(256), X3) | 
 | 	MOVOA(Mem{Base: R12}.Offset(272), X4) | 
 | 	MOVOA(Mem{Base: R12}.Offset(128), X5) | 
 | 	MOVOA(Mem{Base: R12}.Offset(144), X6) | 
 | 	MOVOA(Mem{Base: R12}.Offset(176), X7) | 
 | 	MOVOA(Mem{Base: R12}.Offset(192), X8) | 
 | 	MOVOA(Mem{Base: R12}.Offset(208), X9) | 
 | 	MOVOA(Mem{Base: R12}.Offset(224), X10) | 
 | 	MOVOA(Mem{Base: R12}.Offset(304), X11) | 
 | 	MOVOA(Mem{Base: R12}.Offset(112), X12) | 
 | 	MOVOA(Mem{Base: R12}.Offset(160), X13) | 
 | 	MOVOA(Mem{Base: R12}.Offset(240), X14) | 
 | 	MOVOA(Mem{Base: R12}.Offset(288), X15) | 
 | } | 
 |  | 
 | func MAINLOOP1() { | 
 | 	Label("MAINLOOP1") | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(320)) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(336)) | 
 | 	MOVOA(X13, X1) | 
 | 	PADDL(X12, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(7), X1) | 
 | 	PXOR(X1, X14) | 
 | 	PSRLL(Imm(25), X2) | 
 | 	PXOR(X2, X14) | 
 | 	MOVOA(X7, X1) | 
 | 	PADDL(X0, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(7), X1) | 
 | 	PXOR(X1, X11) | 
 | 	PSRLL(Imm(25), X2) | 
 | 	PXOR(X2, X11) | 
 | 	MOVOA(X12, X1) | 
 | 	PADDL(X14, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(9), X1) | 
 | 	PXOR(X1, X15) | 
 | 	PSRLL(Imm(23), X2) | 
 | 	PXOR(X2, X15) | 
 | 	MOVOA(X0, X1) | 
 | 	PADDL(X11, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(9), X1) | 
 | 	PXOR(X1, X9) | 
 | 	PSRLL(Imm(23), X2) | 
 | 	PXOR(X2, X9) | 
 | 	MOVOA(X14, X1) | 
 | 	PADDL(X15, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(13), X1) | 
 | 	PXOR(X1, X13) | 
 | 	PSRLL(Imm(19), X2) | 
 | 	PXOR(X2, X13) | 
 | 	MOVOA(X11, X1) | 
 | 	PADDL(X9, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(13), X1) | 
 | 	PXOR(X1, X7) | 
 | 	PSRLL(Imm(19), X2) | 
 | 	PXOR(X2, X7) | 
 | 	MOVOA(X15, X1) | 
 | 	PADDL(X13, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(18), X1) | 
 | 	PXOR(X1, X12) | 
 | 	PSRLL(Imm(14), X2) | 
 | 	PXOR(X2, X12) | 
 | 	MOVOA(Mem{Base: R12}.Offset(320), X1) | 
 | 	MOVOA(X12, Mem{Base: R12}.Offset(320)) | 
 | 	MOVOA(X9, X2) | 
 | 	PADDL(X7, X2) | 
 | 	MOVOA(X2, X12) | 
 | 	PSLLL(Imm(18), X2) | 
 | 	PXOR(X2, X0) | 
 | 	PSRLL(Imm(14), X12) | 
 | 	PXOR(X12, X0) | 
 | 	MOVOA(X5, X2) | 
 | 	PADDL(X1, X2) | 
 | 	MOVOA(X2, X12) | 
 | 	PSLLL(Imm(7), X2) | 
 | 	PXOR(X2, X3) | 
 | 	PSRLL(Imm(25), X12) | 
 | 	PXOR(X12, X3) | 
 | 	MOVOA(Mem{Base: R12}.Offset(336), X2) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(336)) | 
 | 	MOVOA(X6, X0) | 
 | 	PADDL(X2, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(7), X0) | 
 | 	PXOR(X0, X4) | 
 | 	PSRLL(Imm(25), X12) | 
 | 	PXOR(X12, X4) | 
 | 	MOVOA(X1, X0) | 
 | 	PADDL(X3, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(9), X0) | 
 | 	PXOR(X0, X10) | 
 | 	PSRLL(Imm(23), X12) | 
 | 	PXOR(X12, X10) | 
 | 	MOVOA(X2, X0) | 
 | 	PADDL(X4, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(9), X0) | 
 | 	PXOR(X0, X8) | 
 | 	PSRLL(Imm(23), X12) | 
 | 	PXOR(X12, X8) | 
 | 	MOVOA(X3, X0) | 
 | 	PADDL(X10, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(13), X0) | 
 | 	PXOR(X0, X5) | 
 | 	PSRLL(Imm(19), X12) | 
 | 	PXOR(X12, X5) | 
 | 	MOVOA(X4, X0) | 
 | 	PADDL(X8, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(13), X0) | 
 | 	PXOR(X0, X6) | 
 | 	PSRLL(Imm(19), X12) | 
 | 	PXOR(X12, X6) | 
 | 	MOVOA(X10, X0) | 
 | 	PADDL(X5, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(18), X0) | 
 | 	PXOR(X0, X1) | 
 | 	PSRLL(Imm(14), X12) | 
 | 	PXOR(X12, X1) | 
 | 	MOVOA(Mem{Base: R12}.Offset(320), X0) | 
 | 	MOVOA(X1, Mem{Base: R12}.Offset(320)) | 
 | 	MOVOA(X4, X1) | 
 | 	PADDL(X0, X1) | 
 | 	MOVOA(X1, X12) | 
 | 	PSLLL(Imm(7), X1) | 
 | 	PXOR(X1, X7) | 
 | 	PSRLL(Imm(25), X12) | 
 | 	PXOR(X12, X7) | 
 | 	MOVOA(X8, X1) | 
 | 	PADDL(X6, X1) | 
 | 	MOVOA(X1, X12) | 
 | 	PSLLL(Imm(18), X1) | 
 | 	PXOR(X1, X2) | 
 | 	PSRLL(Imm(14), X12) | 
 | 	PXOR(X12, X2) | 
 | 	MOVOA(Mem{Base: R12}.Offset(336), X12) | 
 | 	MOVOA(X2, Mem{Base: R12}.Offset(336)) | 
 | 	MOVOA(X14, X1) | 
 | 	PADDL(X12, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(7), X1) | 
 | 	PXOR(X1, X5) | 
 | 	PSRLL(Imm(25), X2) | 
 | 	PXOR(X2, X5) | 
 | 	MOVOA(X0, X1) | 
 | 	PADDL(X7, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(9), X1) | 
 | 	PXOR(X1, X10) | 
 | 	PSRLL(Imm(23), X2) | 
 | 	PXOR(X2, X10) | 
 | 	MOVOA(X12, X1) | 
 | 	PADDL(X5, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(9), X1) | 
 | 	PXOR(X1, X8) | 
 | 	PSRLL(Imm(23), X2) | 
 | 	PXOR(X2, X8) | 
 | 	MOVOA(X7, X1) | 
 | 	PADDL(X10, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(13), X1) | 
 | 	PXOR(X1, X4) | 
 | 	PSRLL(Imm(19), X2) | 
 | 	PXOR(X2, X4) | 
 | 	MOVOA(X5, X1) | 
 | 	PADDL(X8, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(13), X1) | 
 | 	PXOR(X1, X14) | 
 | 	PSRLL(Imm(19), X2) | 
 | 	PXOR(X2, X14) | 
 | 	MOVOA(X10, X1) | 
 | 	PADDL(X4, X1) | 
 | 	MOVOA(X1, X2) | 
 | 	PSLLL(Imm(18), X1) | 
 | 	PXOR(X1, X0) | 
 | 	PSRLL(Imm(14), X2) | 
 | 	PXOR(X2, X0) | 
 | 	MOVOA(Mem{Base: R12}.Offset(320), X1) | 
 | 	MOVOA(X0, Mem{Base: R12}.Offset(320)) | 
 | 	MOVOA(X8, X0) | 
 | 	PADDL(X14, X0) | 
 | 	MOVOA(X0, X2) | 
 | 	PSLLL(Imm(18), X0) | 
 | 	PXOR(X0, X12) | 
 | 	PSRLL(Imm(14), X2) | 
 | 	PXOR(X2, X12) | 
 | 	MOVOA(X11, X0) | 
 | 	PADDL(X1, X0) | 
 | 	MOVOA(X0, X2) | 
 | 	PSLLL(Imm(7), X0) | 
 | 	PXOR(X0, X6) | 
 | 	PSRLL(Imm(25), X2) | 
 | 	PXOR(X2, X6) | 
 | 	MOVOA(Mem{Base: R12}.Offset(336), X2) | 
 | 	MOVOA(X12, Mem{Base: R12}.Offset(336)) | 
 | 	MOVOA(X3, X0) | 
 | 	PADDL(X2, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(7), X0) | 
 | 	PXOR(X0, X13) | 
 | 	PSRLL(Imm(25), X12) | 
 | 	PXOR(X12, X13) | 
 | 	MOVOA(X1, X0) | 
 | 	PADDL(X6, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(9), X0) | 
 | 	PXOR(X0, X15) | 
 | 	PSRLL(Imm(23), X12) | 
 | 	PXOR(X12, X15) | 
 | 	MOVOA(X2, X0) | 
 | 	PADDL(X13, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(9), X0) | 
 | 	PXOR(X0, X9) | 
 | 	PSRLL(Imm(23), X12) | 
 | 	PXOR(X12, X9) | 
 | 	MOVOA(X6, X0) | 
 | 	PADDL(X15, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(13), X0) | 
 | 	PXOR(X0, X11) | 
 | 	PSRLL(Imm(19), X12) | 
 | 	PXOR(X12, X11) | 
 | 	MOVOA(X13, X0) | 
 | 	PADDL(X9, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(13), X0) | 
 | 	PXOR(X0, X3) | 
 | 	PSRLL(Imm(19), X12) | 
 | 	PXOR(X12, X3) | 
 | 	MOVOA(X15, X0) | 
 | 	PADDL(X11, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(18), X0) | 
 | 	PXOR(X0, X1) | 
 | 	PSRLL(Imm(14), X12) | 
 | 	PXOR(X12, X1) | 
 | 	MOVOA(X9, X0) | 
 | 	PADDL(X3, X0) | 
 | 	MOVOA(X0, X12) | 
 | 	PSLLL(Imm(18), X0) | 
 | 	PXOR(X0, X2) | 
 | 	PSRLL(Imm(14), X12) | 
 | 	PXOR(X12, X2) | 
 | 	MOVOA(Mem{Base: R12}.Offset(320), X12) | 
 | 	MOVOA(Mem{Base: R12}.Offset(336), X0) | 
 | 	SUBQ(Imm(2), RDX) | 
 | 	JA(LabelRef("MAINLOOP1")) | 
 | 	PADDL(Mem{Base: R12}.Offset(112), X12) | 
 | 	PADDL(Mem{Base: R12}.Offset(176), X7) | 
 | 	PADDL(Mem{Base: R12}.Offset(224), X10) | 
 | 	PADDL(Mem{Base: R12}.Offset(272), X4) | 
 | 	MOVD(X12, EDX) | 
 | 	MOVD(X7, ECX) | 
 | 	MOVD(X10, R8) | 
 | 	MOVD(X4, R9) | 
 | 	PSHUFL(Imm(0x39), X12, X12) | 
 | 	PSHUFL(Imm(0x39), X7, X7) | 
 | 	PSHUFL(Imm(0x39), X10, X10) | 
 | 	PSHUFL(Imm(0x39), X4, X4) | 
 | 	XORL(Mem{Base: SI}.Offset(0), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(4), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(8), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(12), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(0)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(4)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(8)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(12)) | 
 | 	MOVD(X12, EDX) | 
 | 	MOVD(X7, ECX) | 
 | 	MOVD(X10, R8) | 
 | 	MOVD(X4, R9) | 
 | 	PSHUFL(Imm(0x39), X12, X12) | 
 | 	PSHUFL(Imm(0x39), X7, X7) | 
 | 	PSHUFL(Imm(0x39), X10, X10) | 
 | 	PSHUFL(Imm(0x39), X4, X4) | 
 | 	XORL(Mem{Base: SI}.Offset(64), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(68), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(72), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(76), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(64)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(68)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(72)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(76)) | 
 | 	MOVD(X12, EDX) | 
 | 	MOVD(X7, ECX) | 
 | 	MOVD(X10, R8) | 
 | 	MOVD(X4, R9) | 
 | 	PSHUFL(Imm(0x39), X12, X12) | 
 | 	PSHUFL(Imm(0x39), X7, X7) | 
 | 	PSHUFL(Imm(0x39), X10, X10) | 
 | 	PSHUFL(Imm(0x39), X4, X4) | 
 | 	XORL(Mem{Base: SI}.Offset(128), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(132), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(136), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(140), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(128)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(132)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(136)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(140)) | 
 | 	MOVD(X12, EDX) | 
 | 	MOVD(X7, ECX) | 
 | 	MOVD(X10, R8) | 
 | 	MOVD(X4, R9) | 
 | 	XORL(Mem{Base: SI}.Offset(192), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(196), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(200), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(204), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(192)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(196)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(200)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(204)) | 
 | 	PADDL(Mem{Base: R12}.Offset(240), X14) | 
 | 	PADDL(Mem{Base: R12}.Offset(64), X0) | 
 | 	PADDL(Mem{Base: R12}.Offset(128), X5) | 
 | 	PADDL(Mem{Base: R12}.Offset(192), X8) | 
 | 	MOVD(X14, EDX) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X5, R8) | 
 | 	MOVD(X8, R9) | 
 | 	PSHUFL(Imm(0x39), X14, X14) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X5, X5) | 
 | 	PSHUFL(Imm(0x39), X8, X8) | 
 | 	XORL(Mem{Base: SI}.Offset(16), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(20), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(24), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(28), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(16)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(20)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(24)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(28)) | 
 | 	MOVD(X14, EDX) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X5, R8) | 
 | 	MOVD(X8, R9) | 
 | 	PSHUFL(Imm(0x39), X14, X14) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X5, X5) | 
 | 	PSHUFL(Imm(0x39), X8, X8) | 
 | 	XORL(Mem{Base: SI}.Offset(80), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(84), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(88), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(92), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(80)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(84)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(88)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(92)) | 
 | 	MOVD(X14, EDX) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X5, R8) | 
 | 	MOVD(X8, R9) | 
 | 	PSHUFL(Imm(0x39), X14, X14) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X5, X5) | 
 | 	PSHUFL(Imm(0x39), X8, X8) | 
 | 	XORL(Mem{Base: SI}.Offset(144), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(148), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(152), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(156), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(144)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(148)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(152)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(156)) | 
 | 	MOVD(X14, EDX) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X5, R8) | 
 | 	MOVD(X8, R9) | 
 | 	XORL(Mem{Base: SI}.Offset(208), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(212), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(216), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(220), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(208)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(212)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(216)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(220)) | 
 | 	PADDL(Mem{Base: R12}.Offset(288), X15) | 
 | 	PADDL(Mem{Base: R12}.Offset(304), X11) | 
 | 	PADDL(Mem{Base: R12}.Offset(80), X1) | 
 | 	PADDL(Mem{Base: R12}.Offset(144), X6) | 
 | 	MOVD(X15, EDX) | 
 | 	MOVD(X11, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X6, R9) | 
 | 	PSHUFL(Imm(0x39), X15, X15) | 
 | 	PSHUFL(Imm(0x39), X11, X11) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X6, X6) | 
 | 	XORL(Mem{Base: SI}.Offset(32), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(36), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(40), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(44), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(32)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(36)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(40)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(44)) | 
 | 	MOVD(X15, EDX) | 
 | 	MOVD(X11, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X6, R9) | 
 | 	PSHUFL(Imm(0x39), X15, X15) | 
 | 	PSHUFL(Imm(0x39), X11, X11) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X6, X6) | 
 | 	XORL(Mem{Base: SI}.Offset(96), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(100), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(104), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(108), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(96)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(100)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(104)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(108)) | 
 | 	MOVD(X15, EDX) | 
 | 	MOVD(X11, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X6, R9) | 
 | 	PSHUFL(Imm(0x39), X15, X15) | 
 | 	PSHUFL(Imm(0x39), X11, X11) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X6, X6) | 
 | 	XORL(Mem{Base: SI}.Offset(160), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(164), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(168), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(172), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(160)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(164)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(168)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(172)) | 
 | 	MOVD(X15, EDX) | 
 | 	MOVD(X11, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X6, R9) | 
 | 	XORL(Mem{Base: SI}.Offset(224), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(228), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(232), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(236), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(224)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(228)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(232)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(236)) | 
 | 	PADDL(Mem{Base: R12}.Offset(160), X13) | 
 | 	PADDL(Mem{Base: R12}.Offset(208), X9) | 
 | 	PADDL(Mem{Base: R12}.Offset(256), X3) | 
 | 	PADDL(Mem{Base: R12}.Offset(96), X2) | 
 | 	MOVD(X13, EDX) | 
 | 	MOVD(X9, ECX) | 
 | 	MOVD(X3, R8) | 
 | 	MOVD(X2, R9) | 
 | 	PSHUFL(Imm(0x39), X13, X13) | 
 | 	PSHUFL(Imm(0x39), X9, X9) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	XORL(Mem{Base: SI}.Offset(48), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(52), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(56), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(60), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(48)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(52)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(56)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(60)) | 
 | 	MOVD(X13, EDX) | 
 | 	MOVD(X9, ECX) | 
 | 	MOVD(X3, R8) | 
 | 	MOVD(X2, R9) | 
 | 	PSHUFL(Imm(0x39), X13, X13) | 
 | 	PSHUFL(Imm(0x39), X9, X9) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	XORL(Mem{Base: SI}.Offset(112), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(116), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(120), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(124), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(112)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(116)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(120)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(124)) | 
 | 	MOVD(X13, EDX) | 
 | 	MOVD(X9, ECX) | 
 | 	MOVD(X3, R8) | 
 | 	MOVD(X2, R9) | 
 | 	PSHUFL(Imm(0x39), X13, X13) | 
 | 	PSHUFL(Imm(0x39), X9, X9) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	XORL(Mem{Base: SI}.Offset(176), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(180), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(184), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(188), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(176)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(180)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(184)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(188)) | 
 | 	MOVD(X13, EDX) | 
 | 	MOVD(X9, ECX) | 
 | 	MOVD(X3, R8) | 
 | 	MOVD(X2, R9) | 
 | 	XORL(Mem{Base: SI}.Offset(240), EDX) | 
 | 	XORL(Mem{Base: SI}.Offset(244), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(248), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(252), R9L) | 
 | 	MOVL(EDX, Mem{Base: DI}.Offset(240)) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(244)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(248)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(252)) | 
 | 	MOVQ(Mem{Base: R12}.Offset(352), R9) | 
 | 	SUBQ(U32(256), R9) | 
 | 	ADDQ(U32(256), RSI) | 
 | 	ADDQ(U32(256), RDI) | 
 | 	CMPQ(R9, U32(256)) | 
 | 	JAE(LabelRef("BYTESATLEAST256")) | 
 | 	CMPQ(R9, Imm(0)) | 
 | 	JBE(LabelRef("DONE")) | 
 | } | 
 |  | 
 | func BYTESBETWEEN1AND255() { | 
 | 	Label("BYTESBETWEEN1AND255") | 
 | 	CMPQ(R9, Imm(64)) | 
 | 	JAE(LabelRef("NOCOPY")) | 
 | 	MOVQ(RDI, RDX) | 
 | 	LEAQ(Mem{Base: R12}.Offset(360), RDI) | 
 | 	MOVQ(R9, RCX) | 
 | 	// Hack to get Avo to emit: | 
 | 	// 	REP; MOVSB | 
 | 	Instruction(&ir.Instruction{Opcode: "REP; MOVSB"}) | 
 | 	LEAQ(Mem{Base: R12}.Offset(360), RDI) | 
 | 	LEAQ(Mem{Base: R12}.Offset(360), RSI) | 
 | } | 
 |  | 
 | func NOCOPY() { | 
 | 	Label("NOCOPY") | 
 | 	MOVQ(R9, Mem{Base: R12}.Offset(352)) | 
 | 	MOVOA(Mem{Base: R12}.Offset(48), X0) | 
 | 	MOVOA(Mem{Base: R12}.Offset(0), X1) | 
 | 	MOVOA(Mem{Base: R12}.Offset(16), X2) | 
 | 	MOVOA(Mem{Base: R12}.Offset(32), X3) | 
 | 	MOVOA(X1, X4) | 
 | 	MOVQ(U32(20), RCX) | 
 | } | 
 |  | 
 | func MAINLOOP2() { | 
 | 	Label("MAINLOOP2") | 
 | 	PADDL(X0, X4) | 
 | 	MOVOA(X0, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(7), X4) | 
 | 	PSRLL(Imm(25), X6) | 
 | 	PXOR(X4, X3) | 
 | 	PXOR(X6, X3) | 
 | 	PADDL(X3, X5) | 
 | 	MOVOA(X3, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(9), X5) | 
 | 	PSRLL(Imm(23), X6) | 
 | 	PXOR(X5, X2) | 
 | 	PSHUFL(Imm(0x93), X3, X3) | 
 | 	PXOR(X6, X2) | 
 | 	PADDL(X2, X4) | 
 | 	MOVOA(X2, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(13), X4) | 
 | 	PSRLL(Imm(19), X6) | 
 | 	PXOR(X4, X1) | 
 | 	PSHUFL(Imm(0x4E), X2, X2) | 
 | 	PXOR(X6, X1) | 
 | 	PADDL(X1, X5) | 
 | 	MOVOA(X3, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(18), X5) | 
 | 	PSRLL(Imm(14), X6) | 
 | 	PXOR(X5, X0) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PXOR(X6, X0) | 
 | 	PADDL(X0, X4) | 
 | 	MOVOA(X0, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(7), X4) | 
 | 	PSRLL(Imm(25), X6) | 
 | 	PXOR(X4, X1) | 
 | 	PXOR(X6, X1) | 
 | 	PADDL(X1, X5) | 
 | 	MOVOA(X1, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(9), X5) | 
 | 	PSRLL(Imm(23), X6) | 
 | 	PXOR(X5, X2) | 
 | 	PSHUFL(Imm(0x93), X1, X1) | 
 | 	PXOR(X6, X2) | 
 | 	PADDL(X2, X4) | 
 | 	MOVOA(X2, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(13), X4) | 
 | 	PSRLL(Imm(19), X6) | 
 | 	PXOR(X4, X3) | 
 | 	PSHUFL(Imm(0x4E), X2, X2) | 
 | 	PXOR(X6, X3) | 
 | 	PADDL(X3, X5) | 
 | 	MOVOA(X1, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(18), X5) | 
 | 	PSRLL(Imm(14), X6) | 
 | 	PXOR(X5, X0) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	PXOR(X6, X0) | 
 | 	PADDL(X0, X4) | 
 | 	MOVOA(X0, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(7), X4) | 
 | 	PSRLL(Imm(25), X6) | 
 | 	PXOR(X4, X3) | 
 | 	PXOR(X6, X3) | 
 | 	PADDL(X3, X5) | 
 | 	MOVOA(X3, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(9), X5) | 
 | 	PSRLL(Imm(23), X6) | 
 | 	PXOR(X5, X2) | 
 | 	PSHUFL(Imm(0x93), X3, X3) | 
 | 	PXOR(X6, X2) | 
 | 	PADDL(X2, X4) | 
 | 	MOVOA(X2, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(13), X4) | 
 | 	PSRLL(Imm(19), X6) | 
 | 	PXOR(X4, X1) | 
 | 	PSHUFL(Imm(0x4E), X2, X2) | 
 | 	PXOR(X6, X1) | 
 | 	PADDL(X1, X5) | 
 | 	MOVOA(X3, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(18), X5) | 
 | 	PSRLL(Imm(14), X6) | 
 | 	PXOR(X5, X0) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PXOR(X6, X0) | 
 | 	PADDL(X0, X4) | 
 | 	MOVOA(X0, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(7), X4) | 
 | 	PSRLL(Imm(25), X6) | 
 | 	PXOR(X4, X1) | 
 | 	PXOR(X6, X1) | 
 | 	PADDL(X1, X5) | 
 | 	MOVOA(X1, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(9), X5) | 
 | 	PSRLL(Imm(23), X6) | 
 | 	PXOR(X5, X2) | 
 | 	PSHUFL(Imm(0x93), X1, X1) | 
 | 	PXOR(X6, X2) | 
 | 	PADDL(X2, X4) | 
 | 	MOVOA(X2, X5) | 
 | 	MOVOA(X4, X6) | 
 | 	PSLLL(Imm(13), X4) | 
 | 	PSRLL(Imm(19), X6) | 
 | 	PXOR(X4, X3) | 
 | 	PSHUFL(Imm(0x4E), X2, X2) | 
 | 	PXOR(X6, X3) | 
 | 	SUBQ(Imm(4), RCX) | 
 | 	PADDL(X3, X5) | 
 | 	MOVOA(X1, X4) | 
 | 	MOVOA(X5, X6) | 
 | 	PSLLL(Imm(18), X5) | 
 | 	PXOR(X7, X7) | 
 | 	PSRLL(Imm(14), X6) | 
 | 	PXOR(X5, X0) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	PXOR(X6, X0) | 
 | 	JA(LabelRef("MAINLOOP2")) | 
 | 	PADDL(Mem{Base: R12}.Offset(48), X0) | 
 | 	PADDL(Mem{Base: R12}.Offset(0), X1) | 
 | 	PADDL(Mem{Base: R12}.Offset(16), X2) | 
 | 	PADDL(Mem{Base: R12}.Offset(32), X3) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X2, R9) | 
 | 	MOVD(X3, EAX) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	XORL(Mem{Base: SI}.Offset(0), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(48), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(32), R9L) | 
 | 	XORL(Mem{Base: SI}.Offset(16), EAX) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(0)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(48)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(32)) | 
 | 	MOVL(EAX, Mem{Base: DI}.Offset(16)) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X2, R9) | 
 | 	MOVD(X3, EAX) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	XORL(Mem{Base: SI}.Offset(20), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(4), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(52), R9L) | 
 | 	XORL(Mem{Base: SI}.Offset(36), EAX) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(20)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(4)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(52)) | 
 | 	MOVL(EAX, Mem{Base: DI}.Offset(36)) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X2, R9) | 
 | 	MOVD(X3, EAX) | 
 | 	PSHUFL(Imm(0x39), X0, X0) | 
 | 	PSHUFL(Imm(0x39), X1, X1) | 
 | 	PSHUFL(Imm(0x39), X2, X2) | 
 | 	PSHUFL(Imm(0x39), X3, X3) | 
 | 	XORL(Mem{Base: SI}.Offset(40), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(24), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(8), R9L) | 
 | 	XORL(Mem{Base: SI}.Offset(56), EAX) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(40)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(24)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(8)) | 
 | 	MOVL(EAX, Mem{Base: DI}.Offset(56)) | 
 | 	MOVD(X0, ECX) | 
 | 	MOVD(X1, R8) | 
 | 	MOVD(X2, R9) | 
 | 	MOVD(X3, EAX) | 
 | 	XORL(Mem{Base: SI}.Offset(60), ECX) | 
 | 	XORL(Mem{Base: SI}.Offset(44), R8L) | 
 | 	XORL(Mem{Base: SI}.Offset(28), R9L) | 
 | 	XORL(Mem{Base: SI}.Offset(12), EAX) | 
 | 	MOVL(ECX, Mem{Base: DI}.Offset(60)) | 
 | 	MOVL(R8L, Mem{Base: DI}.Offset(44)) | 
 | 	MOVL(R9L, Mem{Base: DI}.Offset(28)) | 
 | 	MOVL(EAX, Mem{Base: DI}.Offset(12)) | 
 | 	MOVQ(Mem{Base: R12}.Offset(352), R9) | 
 | 	MOVL(Mem{Base: R12}.Offset(16), ECX) | 
 | 	MOVL(Mem{Base: R12}.Offset(36), R8L) | 
 | 	ADDQ(Imm(1), RCX) | 
 | 	SHLQ(Imm(32), R8) | 
 | 	ADDQ(R8, RCX) | 
 | 	MOVQ(RCX, R8) | 
 | 	SHRQ(Imm(32), R8) | 
 | 	MOVL(ECX, Mem{Base: R12}.Offset(16)) | 
 | 	MOVL(R8L, Mem{Base: R12}.Offset(36)) | 
 | 	CMPQ(R9, Imm(64)) | 
 | 	JA(LabelRef("BYTESATLEAST65")) | 
 | 	JAE(LabelRef("BYTESATLEAST64")) | 
 | 	MOVQ(RDI, RSI) | 
 | 	MOVQ(RDX, RDI) | 
 | 	MOVQ(R9, RCX) | 
 | 	// Hack to get Avo to emit: | 
 | 	// 	REP; MOVSB | 
 | 	Instruction(&ir.Instruction{Opcode: "REP; MOVSB"}) | 
 | } |