| // Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT. |
| |
| //go:build amd64 && gc && !purego |
| |
| #include "textflag.h" |
| |
| // func blamkaSSE4(b *block) |
| // Requires: SSE2, SSSE3 |
| TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8 |
| MOVQ b+0(FP), AX |
| MOVOU ·c40<>+0(SB), X10 |
| MOVOU ·c48<>+0(SB), X11 |
| MOVOU (AX), X0 |
| MOVOU 16(AX), X1 |
| MOVOU 32(AX), X2 |
| MOVOU 48(AX), X3 |
| MOVOU 64(AX), X4 |
| MOVOU 80(AX), X5 |
| MOVOU 96(AX), X6 |
| MOVOU 112(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, (AX) |
| MOVOU X1, 16(AX) |
| MOVOU X2, 32(AX) |
| MOVOU X3, 48(AX) |
| MOVOU X4, 64(AX) |
| MOVOU X5, 80(AX) |
| MOVOU X6, 96(AX) |
| MOVOU X7, 112(AX) |
| MOVOU 128(AX), X0 |
| MOVOU 144(AX), X1 |
| MOVOU 160(AX), X2 |
| MOVOU 176(AX), X3 |
| MOVOU 192(AX), X4 |
| MOVOU 208(AX), X5 |
| MOVOU 224(AX), X6 |
| MOVOU 240(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 128(AX) |
| MOVOU X1, 144(AX) |
| MOVOU X2, 160(AX) |
| MOVOU X3, 176(AX) |
| MOVOU X4, 192(AX) |
| MOVOU X5, 208(AX) |
| MOVOU X6, 224(AX) |
| MOVOU X7, 240(AX) |
| MOVOU 256(AX), X0 |
| MOVOU 272(AX), X1 |
| MOVOU 288(AX), X2 |
| MOVOU 304(AX), X3 |
| MOVOU 320(AX), X4 |
| MOVOU 336(AX), X5 |
| MOVOU 352(AX), X6 |
| MOVOU 368(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 256(AX) |
| MOVOU X1, 272(AX) |
| MOVOU X2, 288(AX) |
| MOVOU X3, 304(AX) |
| MOVOU X4, 320(AX) |
| MOVOU X5, 336(AX) |
| MOVOU X6, 352(AX) |
| MOVOU X7, 368(AX) |
| MOVOU 384(AX), X0 |
| MOVOU 400(AX), X1 |
| MOVOU 416(AX), X2 |
| MOVOU 432(AX), X3 |
| MOVOU 448(AX), X4 |
| MOVOU 464(AX), X5 |
| MOVOU 480(AX), X6 |
| MOVOU 496(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 384(AX) |
| MOVOU X1, 400(AX) |
| MOVOU X2, 416(AX) |
| MOVOU X3, 432(AX) |
| MOVOU X4, 448(AX) |
| MOVOU X5, 464(AX) |
| MOVOU X6, 480(AX) |
| MOVOU X7, 496(AX) |
| MOVOU 512(AX), X0 |
| MOVOU 528(AX), X1 |
| MOVOU 544(AX), X2 |
| MOVOU 560(AX), X3 |
| MOVOU 576(AX), X4 |
| MOVOU 592(AX), X5 |
| MOVOU 608(AX), X6 |
| MOVOU 624(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 512(AX) |
| MOVOU X1, 528(AX) |
| MOVOU X2, 544(AX) |
| MOVOU X3, 560(AX) |
| MOVOU X4, 576(AX) |
| MOVOU X5, 592(AX) |
| MOVOU X6, 608(AX) |
| MOVOU X7, 624(AX) |
| MOVOU 640(AX), X0 |
| MOVOU 656(AX), X1 |
| MOVOU 672(AX), X2 |
| MOVOU 688(AX), X3 |
| MOVOU 704(AX), X4 |
| MOVOU 720(AX), X5 |
| MOVOU 736(AX), X6 |
| MOVOU 752(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 640(AX) |
| MOVOU X1, 656(AX) |
| MOVOU X2, 672(AX) |
| MOVOU X3, 688(AX) |
| MOVOU X4, 704(AX) |
| MOVOU X5, 720(AX) |
| MOVOU X6, 736(AX) |
| MOVOU X7, 752(AX) |
| MOVOU 768(AX), X0 |
| MOVOU 784(AX), X1 |
| MOVOU 800(AX), X2 |
| MOVOU 816(AX), X3 |
| MOVOU 832(AX), X4 |
| MOVOU 848(AX), X5 |
| MOVOU 864(AX), X6 |
| MOVOU 880(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 768(AX) |
| MOVOU X1, 784(AX) |
| MOVOU X2, 800(AX) |
| MOVOU X3, 816(AX) |
| MOVOU X4, 832(AX) |
| MOVOU X5, 848(AX) |
| MOVOU X6, 864(AX) |
| MOVOU X7, 880(AX) |
| MOVOU 896(AX), X0 |
| MOVOU 912(AX), X1 |
| MOVOU 928(AX), X2 |
| MOVOU 944(AX), X3 |
| MOVOU 960(AX), X4 |
| MOVOU 976(AX), X5 |
| MOVOU 992(AX), X6 |
| MOVOU 1008(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 896(AX) |
| MOVOU X1, 912(AX) |
| MOVOU X2, 928(AX) |
| MOVOU X3, 944(AX) |
| MOVOU X4, 960(AX) |
| MOVOU X5, 976(AX) |
| MOVOU X6, 992(AX) |
| MOVOU X7, 1008(AX) |
| MOVOU (AX), X0 |
| MOVOU 128(AX), X1 |
| MOVOU 256(AX), X2 |
| MOVOU 384(AX), X3 |
| MOVOU 512(AX), X4 |
| MOVOU 640(AX), X5 |
| MOVOU 768(AX), X6 |
| MOVOU 896(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, (AX) |
| MOVOU X1, 128(AX) |
| MOVOU X2, 256(AX) |
| MOVOU X3, 384(AX) |
| MOVOU X4, 512(AX) |
| MOVOU X5, 640(AX) |
| MOVOU X6, 768(AX) |
| MOVOU X7, 896(AX) |
| MOVOU 16(AX), X0 |
| MOVOU 144(AX), X1 |
| MOVOU 272(AX), X2 |
| MOVOU 400(AX), X3 |
| MOVOU 528(AX), X4 |
| MOVOU 656(AX), X5 |
| MOVOU 784(AX), X6 |
| MOVOU 912(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 16(AX) |
| MOVOU X1, 144(AX) |
| MOVOU X2, 272(AX) |
| MOVOU X3, 400(AX) |
| MOVOU X4, 528(AX) |
| MOVOU X5, 656(AX) |
| MOVOU X6, 784(AX) |
| MOVOU X7, 912(AX) |
| MOVOU 32(AX), X0 |
| MOVOU 160(AX), X1 |
| MOVOU 288(AX), X2 |
| MOVOU 416(AX), X3 |
| MOVOU 544(AX), X4 |
| MOVOU 672(AX), X5 |
| MOVOU 800(AX), X6 |
| MOVOU 928(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 32(AX) |
| MOVOU X1, 160(AX) |
| MOVOU X2, 288(AX) |
| MOVOU X3, 416(AX) |
| MOVOU X4, 544(AX) |
| MOVOU X5, 672(AX) |
| MOVOU X6, 800(AX) |
| MOVOU X7, 928(AX) |
| MOVOU 48(AX), X0 |
| MOVOU 176(AX), X1 |
| MOVOU 304(AX), X2 |
| MOVOU 432(AX), X3 |
| MOVOU 560(AX), X4 |
| MOVOU 688(AX), X5 |
| MOVOU 816(AX), X6 |
| MOVOU 944(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 48(AX) |
| MOVOU X1, 176(AX) |
| MOVOU X2, 304(AX) |
| MOVOU X3, 432(AX) |
| MOVOU X4, 560(AX) |
| MOVOU X5, 688(AX) |
| MOVOU X6, 816(AX) |
| MOVOU X7, 944(AX) |
| MOVOU 64(AX), X0 |
| MOVOU 192(AX), X1 |
| MOVOU 320(AX), X2 |
| MOVOU 448(AX), X3 |
| MOVOU 576(AX), X4 |
| MOVOU 704(AX), X5 |
| MOVOU 832(AX), X6 |
| MOVOU 960(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 64(AX) |
| MOVOU X1, 192(AX) |
| MOVOU X2, 320(AX) |
| MOVOU X3, 448(AX) |
| MOVOU X4, 576(AX) |
| MOVOU X5, 704(AX) |
| MOVOU X6, 832(AX) |
| MOVOU X7, 960(AX) |
| MOVOU 80(AX), X0 |
| MOVOU 208(AX), X1 |
| MOVOU 336(AX), X2 |
| MOVOU 464(AX), X3 |
| MOVOU 592(AX), X4 |
| MOVOU 720(AX), X5 |
| MOVOU 848(AX), X6 |
| MOVOU 976(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 80(AX) |
| MOVOU X1, 208(AX) |
| MOVOU X2, 336(AX) |
| MOVOU X3, 464(AX) |
| MOVOU X4, 592(AX) |
| MOVOU X5, 720(AX) |
| MOVOU X6, 848(AX) |
| MOVOU X7, 976(AX) |
| MOVOU 96(AX), X0 |
| MOVOU 224(AX), X1 |
| MOVOU 352(AX), X2 |
| MOVOU 480(AX), X3 |
| MOVOU 608(AX), X4 |
| MOVOU 736(AX), X5 |
| MOVOU 864(AX), X6 |
| MOVOU 992(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 96(AX) |
| MOVOU X1, 224(AX) |
| MOVOU X2, 352(AX) |
| MOVOU X3, 480(AX) |
| MOVOU X4, 608(AX) |
| MOVOU X5, 736(AX) |
| MOVOU X6, 864(AX) |
| MOVOU X7, 992(AX) |
| MOVOU 112(AX), X0 |
| MOVOU 240(AX), X1 |
| MOVOU 368(AX), X2 |
| MOVOU 496(AX), X3 |
| MOVOU 624(AX), X4 |
| MOVOU 752(AX), X5 |
| MOVOU 880(AX), X6 |
| MOVOU 1008(AX), X7 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X6, X8 |
| PUNPCKLQDQ X6, X9 |
| PUNPCKHQDQ X7, X6 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X7, X9 |
| MOVO X8, X7 |
| MOVO X2, X8 |
| PUNPCKHQDQ X9, X7 |
| PUNPCKLQDQ X3, X9 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X3 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFD $0xb1, X6, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| PSHUFB X10, X2 |
| MOVO X0, X8 |
| PMULULQ X2, X8 |
| PADDQ X2, X0 |
| PADDQ X8, X0 |
| PADDQ X8, X0 |
| PXOR X0, X6 |
| PSHUFB X11, X6 |
| MOVO X4, X8 |
| PMULULQ X6, X8 |
| PADDQ X6, X4 |
| PADDQ X8, X4 |
| PADDQ X8, X4 |
| PXOR X4, X2 |
| MOVO X2, X8 |
| PADDQ X2, X8 |
| PSRLQ $0x3f, X2 |
| PXOR X8, X2 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFD $0xb1, X7, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| PSHUFB X10, X3 |
| MOVO X1, X8 |
| PMULULQ X3, X8 |
| PADDQ X3, X1 |
| PADDQ X8, X1 |
| PADDQ X8, X1 |
| PXOR X1, X7 |
| PSHUFB X11, X7 |
| MOVO X5, X8 |
| PMULULQ X7, X8 |
| PADDQ X7, X5 |
| PADDQ X8, X5 |
| PADDQ X8, X5 |
| PXOR X5, X3 |
| MOVO X3, X8 |
| PADDQ X3, X8 |
| PSRLQ $0x3f, X3 |
| PXOR X8, X3 |
| MOVO X4, X8 |
| MOVO X5, X4 |
| MOVO X8, X5 |
| MOVO X2, X8 |
| PUNPCKLQDQ X2, X9 |
| PUNPCKHQDQ X3, X2 |
| PUNPCKHQDQ X9, X2 |
| PUNPCKLQDQ X3, X9 |
| MOVO X8, X3 |
| MOVO X6, X8 |
| PUNPCKHQDQ X9, X3 |
| PUNPCKLQDQ X7, X9 |
| PUNPCKHQDQ X9, X6 |
| PUNPCKLQDQ X8, X9 |
| PUNPCKHQDQ X9, X7 |
| MOVOU X0, 112(AX) |
| MOVOU X1, 240(AX) |
| MOVOU X2, 368(AX) |
| MOVOU X3, 496(AX) |
| MOVOU X4, 624(AX) |
| MOVOU X5, 752(AX) |
| MOVOU X6, 880(AX) |
| MOVOU X7, 1008(AX) |
| RET |
| |
| DATA ·c40<>+0(SB)/8, $0x0201000706050403 |
| DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b |
| GLOBL ·c40<>(SB), RODATA|NOPTR, $16 |
| |
| DATA ·c48<>+0(SB)/8, $0x0100070605040302 |
| DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a |
| GLOBL ·c48<>(SB), RODATA|NOPTR, $16 |
| |
| // func mixBlocksSSE2(out *block, a *block, b *block, c *block) |
| // Requires: SSE2 |
| TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32 |
| MOVQ out+0(FP), DX |
| MOVQ a+8(FP), AX |
| MOVQ b+16(FP), BX |
| MOVQ c+24(FP), CX |
| MOVQ $0x00000080, DI |
| |
| loop: |
| MOVOU (AX), X0 |
| MOVOU (BX), X1 |
| MOVOU (CX), X2 |
| PXOR X1, X0 |
| PXOR X2, X0 |
| MOVOU X0, (DX) |
| ADDQ $0x10, AX |
| ADDQ $0x10, BX |
| ADDQ $0x10, CX |
| ADDQ $0x10, DX |
| SUBQ $0x02, DI |
| JA loop |
| RET |
| |
| // func xorBlocksSSE2(out *block, a *block, b *block, c *block) |
| // Requires: SSE2 |
| TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32 |
| MOVQ out+0(FP), DX |
| MOVQ a+8(FP), AX |
| MOVQ b+16(FP), BX |
| MOVQ c+24(FP), CX |
| MOVQ $0x00000080, DI |
| |
| loop: |
| MOVOU (AX), X0 |
| MOVOU (BX), X1 |
| MOVOU (CX), X2 |
| MOVOU (DX), X3 |
| PXOR X1, X0 |
| PXOR X2, X0 |
| PXOR X3, X0 |
| MOVOU X0, (DX) |
| ADDQ $0x10, AX |
| ADDQ $0x10, BX |
| ADDQ $0x10, CX |
| ADDQ $0x10, DX |
| SUBQ $0x02, DI |
| JA loop |
| RET |