blob: 57d510fc08de53182b0ecb400dc1f2f68e977609 [file] [log] [blame]
// Code generated by command: go run blake2s_amd64_asm.go -out ../blake2s_amd64.s -pkg blake2s. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
// func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
// Requires: SSE2
TEXT ·hashBlocksSSE2(SB), $672-48
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVL flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DX
MOVQ SP, BP
ADDQ $0x0f, BP
ANDQ $-16, BP
MOVQ (BX), R9
MOVQ R9, (BP)
MOVQ CX, 8(BP)
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU iv0<>+0(SB), X2
MOVOU iv1<>+0(SB), X3
MOVOU counter<>+0(SB), X12
MOVOU rol16<>+0(SB), X13
MOVOU rol8<>+0(SB), X14
MOVO (BP), X15
loop:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ X12, X15
PXOR X15, X7
MOVQ (SI), R8
MOVQ 8(SI), R9
MOVQ 16(SI), R10
MOVQ 24(SI), R11
MOVQ 32(SI), R12
MOVQ 40(SI), R13
MOVQ 48(SI), R14
MOVQ 56(SI), R15
MOVL R8, 16(BP)
MOVL R8, 116(BP)
MOVL R8, 164(BP)
MOVL R8, 264(BP)
MOVL R8, 288(BP)
MOVL R8, 344(BP)
MOVL R8, 432(BP)
MOVL R8, 512(BP)
MOVL R8, 540(BP)
MOVL R8, 652(BP)
SHRQ $0x20, R8
MOVL R8, 32(BP)
MOVL R8, 112(BP)
MOVL R8, 200(BP)
MOVL R8, 228(BP)
MOVL R8, 320(BP)
MOVL R8, 380(BP)
MOVL R8, 404(BP)
MOVL R8, 488(BP)
MOVL R8, 568(BP)
MOVL R8, 604(BP)
MOVL R9, 20(BP)
MOVL R9, 132(BP)
MOVL R9, 168(BP)
MOVL R9, 240(BP)
MOVL R9, 280(BP)
MOVL R9, 336(BP)
MOVL R9, 456(BP)
MOVL R9, 508(BP)
MOVL R9, 576(BP)
MOVL R9, 608(BP)
SHRQ $0x20, R9
MOVL R9, 36(BP)
MOVL R9, 140(BP)
MOVL R9, 180(BP)
MOVL R9, 212(BP)
MOVL R9, 316(BP)
MOVL R9, 364(BP)
MOVL R9, 452(BP)
MOVL R9, 476(BP)
MOVL R9, 552(BP)
MOVL R9, 632(BP)
MOVL R10, 24(BP)
MOVL R10, 84(BP)
MOVL R10, 204(BP)
MOVL R10, 248(BP)
MOVL R10, 296(BP)
MOVL R10, 368(BP)
MOVL R10, 412(BP)
MOVL R10, 516(BP)
MOVL R10, 584(BP)
MOVL R10, 612(BP)
SHRQ $0x20, R10
MOVL R10, 40(BP)
MOVL R10, 124(BP)
MOVL R10, 152(BP)
MOVL R10, 244(BP)
MOVL R10, 276(BP)
MOVL R10, 388(BP)
MOVL R10, 416(BP)
MOVL R10, 496(BP)
MOVL R10, 588(BP)
MOVL R10, 620(BP)
MOVL R11, 28(BP)
MOVL R11, 108(BP)
MOVL R11, 196(BP)
MOVL R11, 256(BP)
MOVL R11, 312(BP)
MOVL R11, 340(BP)
MOVL R11, 436(BP)
MOVL R11, 520(BP)
MOVL R11, 528(BP)
MOVL R11, 616(BP)
SHRQ $0x20, R11
MOVL R11, 44(BP)
MOVL R11, 136(BP)
MOVL R11, 184(BP)
MOVL R11, 208(BP)
MOVL R11, 292(BP)
MOVL R11, 372(BP)
MOVL R11, 448(BP)
MOVL R11, 468(BP)
MOVL R11, 580(BP)
MOVL R11, 600(BP)
MOVL R12, 48(BP)
MOVL R12, 100(BP)
MOVL R12, 160(BP)
MOVL R12, 268(BP)
MOVL R12, 328(BP)
MOVL R12, 348(BP)
MOVL R12, 444(BP)
MOVL R12, 504(BP)
MOVL R12, 556(BP)
MOVL R12, 596(BP)
SHRQ $0x20, R12
MOVL R12, 64(BP)
MOVL R12, 88(BP)
MOVL R12, 188(BP)
MOVL R12, 224(BP)
MOVL R12, 272(BP)
MOVL R12, 396(BP)
MOVL R12, 440(BP)
MOVL R12, 492(BP)
MOVL R12, 548(BP)
MOVL R12, 628(BP)
MOVL R13, 52(BP)
MOVL R13, 96(BP)
MOVL R13, 176(BP)
MOVL R13, 260(BP)
MOVL R13, 284(BP)
MOVL R13, 356(BP)
MOVL R13, 428(BP)
MOVL R13, 524(BP)
MOVL R13, 572(BP)
MOVL R13, 592(BP)
SHRQ $0x20, R13
MOVL R13, 68(BP)
MOVL R13, 120(BP)
MOVL R13, 144(BP)
MOVL R13, 220(BP)
MOVL R13, 308(BP)
MOVL R13, 360(BP)
MOVL R13, 460(BP)
MOVL R13, 480(BP)
MOVL R13, 536(BP)
MOVL R13, 640(BP)
MOVL R14, 56(BP)
MOVL R14, 128(BP)
MOVL R14, 148(BP)
MOVL R14, 232(BP)
MOVL R14, 324(BP)
MOVL R14, 352(BP)
MOVL R14, 400(BP)
MOVL R14, 472(BP)
MOVL R14, 560(BP)
MOVL R14, 648(BP)
SHRQ $0x20, R14
MOVL R14, 72(BP)
MOVL R14, 92(BP)
MOVL R14, 172(BP)
MOVL R14, 216(BP)
MOVL R14, 332(BP)
MOVL R14, 384(BP)
MOVL R14, 424(BP)
MOVL R14, 464(BP)
MOVL R14, 564(BP)
MOVL R14, 636(BP)
MOVL R15, 60(BP)
MOVL R15, 80(BP)
MOVL R15, 192(BP)
MOVL R15, 236(BP)
MOVL R15, 304(BP)
MOVL R15, 392(BP)
MOVL R15, 408(BP)
MOVL R15, 484(BP)
MOVL R15, 532(BP)
MOVL R15, 644(BP)
SHRQ $0x20, R15
MOVL R15, 76(BP)
MOVL R15, 104(BP)
MOVL R15, 156(BP)
MOVL R15, 252(BP)
MOVL R15, 300(BP)
MOVL R15, 376(BP)
MOVL R15, 420(BP)
MOVL R15, 500(BP)
MOVL R15, 544(BP)
MOVL R15, 624(BP)
PADDL 16(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 32(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 48(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 64(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 80(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 96(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 112(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 128(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 144(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 160(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 176(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 192(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 208(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 224(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 240(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 256(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 272(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 288(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 304(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 320(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 336(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 352(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 368(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 384(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 400(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 416(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 432(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 448(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 464(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 480(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 496(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 512(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 528(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 544(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 560(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 576(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 592(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 608(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 624(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x10, X8
PSRLL $0x10, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 640(BP), X4
PADDL X5, X4
PXOR X4, X7
MOVO X7, X8
PSLLL $0x18, X8
PSRLL $0x08, X7
PXOR X8, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PXOR X4, X0
PXOR X5, X1
PXOR X6, X0
PXOR X7, X1
LEAQ 64(SI), SI
SUBQ $0x40, DX
JNE loop
MOVO X15, (BP)
MOVQ (BP), R9
MOVQ R9, (BX)
MOVOU X0, (AX)
MOVOU X1, 16(AX)
RET
DATA iv0<>+0(SB)/4, $0x6a09e667
DATA iv0<>+4(SB)/4, $0xbb67ae85
DATA iv0<>+8(SB)/4, $0x3c6ef372
DATA iv0<>+12(SB)/4, $0xa54ff53a
GLOBL iv0<>(SB), RODATA|NOPTR, $16
DATA iv1<>+0(SB)/4, $0x510e527f
DATA iv1<>+4(SB)/4, $0x9b05688c
DATA iv1<>+8(SB)/4, $0x1f83d9ab
DATA iv1<>+12(SB)/4, $0x5be0cd19
GLOBL iv1<>(SB), RODATA|NOPTR, $16
DATA counter<>+0(SB)/8, $0x0000000000000040
DATA counter<>+8(SB)/8, $0x0000000000000000
GLOBL counter<>(SB), RODATA|NOPTR, $16
DATA rol16<>+0(SB)/8, $0x0504070601000302
DATA rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a
GLOBL rol16<>(SB), RODATA|NOPTR, $16
DATA rol8<>+0(SB)/8, $0x0407060500030201
DATA rol8<>+8(SB)/8, $0x0c0f0e0d080b0a09
GLOBL rol8<>(SB), RODATA|NOPTR, $16
// func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
// Requires: SSE2, SSSE3
TEXT ·hashBlocksSSSE3(SB), $672-48
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVL flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DX
MOVQ SP, BP
ADDQ $0x0f, BP
ANDQ $-16, BP
MOVQ (BX), R9
MOVQ R9, (BP)
MOVQ CX, 8(BP)
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU iv0<>+0(SB), X2
MOVOU iv1<>+0(SB), X3
MOVOU counter<>+0(SB), X12
MOVOU rol16<>+0(SB), X13
MOVOU rol8<>+0(SB), X14
MOVO (BP), X15
loop:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ X12, X15
PXOR X15, X7
MOVQ (SI), R8
MOVQ 8(SI), R9
MOVQ 16(SI), R10
MOVQ 24(SI), R11
MOVQ 32(SI), R12
MOVQ 40(SI), R13
MOVQ 48(SI), R14
MOVQ 56(SI), R15
MOVL R8, 16(BP)
MOVL R8, 116(BP)
MOVL R8, 164(BP)
MOVL R8, 264(BP)
MOVL R8, 288(BP)
MOVL R8, 344(BP)
MOVL R8, 432(BP)
MOVL R8, 512(BP)
MOVL R8, 540(BP)
MOVL R8, 652(BP)
SHRQ $0x20, R8
MOVL R8, 32(BP)
MOVL R8, 112(BP)
MOVL R8, 200(BP)
MOVL R8, 228(BP)
MOVL R8, 320(BP)
MOVL R8, 380(BP)
MOVL R8, 404(BP)
MOVL R8, 488(BP)
MOVL R8, 568(BP)
MOVL R8, 604(BP)
MOVL R9, 20(BP)
MOVL R9, 132(BP)
MOVL R9, 168(BP)
MOVL R9, 240(BP)
MOVL R9, 280(BP)
MOVL R9, 336(BP)
MOVL R9, 456(BP)
MOVL R9, 508(BP)
MOVL R9, 576(BP)
MOVL R9, 608(BP)
SHRQ $0x20, R9
MOVL R9, 36(BP)
MOVL R9, 140(BP)
MOVL R9, 180(BP)
MOVL R9, 212(BP)
MOVL R9, 316(BP)
MOVL R9, 364(BP)
MOVL R9, 452(BP)
MOVL R9, 476(BP)
MOVL R9, 552(BP)
MOVL R9, 632(BP)
MOVL R10, 24(BP)
MOVL R10, 84(BP)
MOVL R10, 204(BP)
MOVL R10, 248(BP)
MOVL R10, 296(BP)
MOVL R10, 368(BP)
MOVL R10, 412(BP)
MOVL R10, 516(BP)
MOVL R10, 584(BP)
MOVL R10, 612(BP)
SHRQ $0x20, R10
MOVL R10, 40(BP)
MOVL R10, 124(BP)
MOVL R10, 152(BP)
MOVL R10, 244(BP)
MOVL R10, 276(BP)
MOVL R10, 388(BP)
MOVL R10, 416(BP)
MOVL R10, 496(BP)
MOVL R10, 588(BP)
MOVL R10, 620(BP)
MOVL R11, 28(BP)
MOVL R11, 108(BP)
MOVL R11, 196(BP)
MOVL R11, 256(BP)
MOVL R11, 312(BP)
MOVL R11, 340(BP)
MOVL R11, 436(BP)
MOVL R11, 520(BP)
MOVL R11, 528(BP)
MOVL R11, 616(BP)
SHRQ $0x20, R11
MOVL R11, 44(BP)
MOVL R11, 136(BP)
MOVL R11, 184(BP)
MOVL R11, 208(BP)
MOVL R11, 292(BP)
MOVL R11, 372(BP)
MOVL R11, 448(BP)
MOVL R11, 468(BP)
MOVL R11, 580(BP)
MOVL R11, 600(BP)
MOVL R12, 48(BP)
MOVL R12, 100(BP)
MOVL R12, 160(BP)
MOVL R12, 268(BP)
MOVL R12, 328(BP)
MOVL R12, 348(BP)
MOVL R12, 444(BP)
MOVL R12, 504(BP)
MOVL R12, 556(BP)
MOVL R12, 596(BP)
SHRQ $0x20, R12
MOVL R12, 64(BP)
MOVL R12, 88(BP)
MOVL R12, 188(BP)
MOVL R12, 224(BP)
MOVL R12, 272(BP)
MOVL R12, 396(BP)
MOVL R12, 440(BP)
MOVL R12, 492(BP)
MOVL R12, 548(BP)
MOVL R12, 628(BP)
MOVL R13, 52(BP)
MOVL R13, 96(BP)
MOVL R13, 176(BP)
MOVL R13, 260(BP)
MOVL R13, 284(BP)
MOVL R13, 356(BP)
MOVL R13, 428(BP)
MOVL R13, 524(BP)
MOVL R13, 572(BP)
MOVL R13, 592(BP)
SHRQ $0x20, R13
MOVL R13, 68(BP)
MOVL R13, 120(BP)
MOVL R13, 144(BP)
MOVL R13, 220(BP)
MOVL R13, 308(BP)
MOVL R13, 360(BP)
MOVL R13, 460(BP)
MOVL R13, 480(BP)
MOVL R13, 536(BP)
MOVL R13, 640(BP)
MOVL R14, 56(BP)
MOVL R14, 128(BP)
MOVL R14, 148(BP)
MOVL R14, 232(BP)
MOVL R14, 324(BP)
MOVL R14, 352(BP)
MOVL R14, 400(BP)
MOVL R14, 472(BP)
MOVL R14, 560(BP)
MOVL R14, 648(BP)
SHRQ $0x20, R14
MOVL R14, 72(BP)
MOVL R14, 92(BP)
MOVL R14, 172(BP)
MOVL R14, 216(BP)
MOVL R14, 332(BP)
MOVL R14, 384(BP)
MOVL R14, 424(BP)
MOVL R14, 464(BP)
MOVL R14, 564(BP)
MOVL R14, 636(BP)
MOVL R15, 60(BP)
MOVL R15, 80(BP)
MOVL R15, 192(BP)
MOVL R15, 236(BP)
MOVL R15, 304(BP)
MOVL R15, 392(BP)
MOVL R15, 408(BP)
MOVL R15, 484(BP)
MOVL R15, 532(BP)
MOVL R15, 644(BP)
SHRQ $0x20, R15
MOVL R15, 76(BP)
MOVL R15, 104(BP)
MOVL R15, 156(BP)
MOVL R15, 252(BP)
MOVL R15, 300(BP)
MOVL R15, 376(BP)
MOVL R15, 420(BP)
MOVL R15, 500(BP)
MOVL R15, 544(BP)
MOVL R15, 624(BP)
PADDL 16(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 32(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 48(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 64(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 80(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 96(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 112(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 128(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 144(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 160(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 176(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 192(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 208(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 224(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 240(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 256(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 272(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 288(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 304(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 320(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 336(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 352(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 368(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 384(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 400(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 416(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 432(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 448(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 464(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 480(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 496(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 512(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 528(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 544(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 560(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 576(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PADDL 592(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 608(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL 624(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL 640(BP), X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PXOR X4, X0
PXOR X5, X1
PXOR X6, X0
PXOR X7, X1
LEAQ 64(SI), SI
SUBQ $0x40, DX
JNE loop
MOVO X15, (BP)
MOVQ (BP), R9
MOVQ R9, (BX)
MOVOU X0, (AX)
MOVOU X1, 16(AX)
RET
// func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
// Requires: SSE2, SSE4.1, SSSE3
TEXT ·hashBlocksSSE4(SB), $32-48
MOVQ h+0(FP), AX
MOVQ c+8(FP), BX
MOVL flag+16(FP), CX
MOVQ blocks_base+24(FP), SI
MOVQ blocks_len+32(FP), DX
MOVQ SP, BP
ADDQ $0x0f, BP
ANDQ $-16, BP
MOVQ (BX), R9
MOVQ R9, (BP)
MOVQ CX, 8(BP)
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU iv0<>+0(SB), X2
MOVOU iv1<>+0(SB), X3
MOVOU counter<>+0(SB), X12
MOVOU rol16<>+0(SB), X13
MOVOU rol8<>+0(SB), X14
MOVO (BP), X15
loop:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ X12, X15
PXOR X15, X7
MOVL (SI), X8
PINSRD $0x01, 8(SI), X8
PINSRD $0x02, 16(SI), X8
PINSRD $0x03, 24(SI), X8
MOVL 4(SI), X9
PINSRD $0x01, 12(SI), X9
PINSRD $0x02, 20(SI), X9
PINSRD $0x03, 28(SI), X9
MOVL 32(SI), X10
PINSRD $0x01, 40(SI), X10
PINSRD $0x02, 48(SI), X10
PINSRD $0x03, 56(SI), X10
MOVL 36(SI), X11
PINSRD $0x01, 44(SI), X11
PINSRD $0x02, 52(SI), X11
PINSRD $0x03, 60(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 56(SI), X8
PINSRD $0x01, 16(SI), X8
PINSRD $0x02, 36(SI), X8
PINSRD $0x03, 52(SI), X8
MOVL 40(SI), X9
PINSRD $0x01, 32(SI), X9
PINSRD $0x02, 60(SI), X9
PINSRD $0x03, 24(SI), X9
MOVL 4(SI), X10
PINSRD $0x01, (SI), X10
PINSRD $0x02, 44(SI), X10
PINSRD $0x03, 20(SI), X10
MOVL 48(SI), X11
PINSRD $0x01, 8(SI), X11
PINSRD $0x02, 28(SI), X11
PINSRD $0x03, 12(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 44(SI), X8
PINSRD $0x01, 48(SI), X8
PINSRD $0x02, 20(SI), X8
PINSRD $0x03, 60(SI), X8
MOVL 32(SI), X9
PINSRD $0x01, (SI), X9
PINSRD $0x02, 8(SI), X9
PINSRD $0x03, 52(SI), X9
MOVL 40(SI), X10
PINSRD $0x01, 12(SI), X10
PINSRD $0x02, 28(SI), X10
PINSRD $0x03, 36(SI), X10
MOVL 56(SI), X11
PINSRD $0x01, 24(SI), X11
PINSRD $0x02, 4(SI), X11
PINSRD $0x03, 16(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 28(SI), X8
PINSRD $0x01, 12(SI), X8
PINSRD $0x02, 52(SI), X8
PINSRD $0x03, 44(SI), X8
MOVL 36(SI), X9
PINSRD $0x01, 4(SI), X9
PINSRD $0x02, 48(SI), X9
PINSRD $0x03, 56(SI), X9
MOVL 8(SI), X10
PINSRD $0x01, 20(SI), X10
PINSRD $0x02, 16(SI), X10
PINSRD $0x03, 60(SI), X10
MOVL 24(SI), X11
PINSRD $0x01, 40(SI), X11
PINSRD $0x02, (SI), X11
PINSRD $0x03, 32(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 36(SI), X8
PINSRD $0x01, 20(SI), X8
PINSRD $0x02, 8(SI), X8
PINSRD $0x03, 40(SI), X8
MOVL (SI), X9
PINSRD $0x01, 28(SI), X9
PINSRD $0x02, 16(SI), X9
PINSRD $0x03, 60(SI), X9
MOVL 56(SI), X10
PINSRD $0x01, 44(SI), X10
PINSRD $0x02, 24(SI), X10
PINSRD $0x03, 12(SI), X10
MOVL 4(SI), X11
PINSRD $0x01, 48(SI), X11
PINSRD $0x02, 32(SI), X11
PINSRD $0x03, 52(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 8(SI), X8
PINSRD $0x01, 24(SI), X8
PINSRD $0x02, (SI), X8
PINSRD $0x03, 32(SI), X8
MOVL 48(SI), X9
PINSRD $0x01, 40(SI), X9
PINSRD $0x02, 44(SI), X9
PINSRD $0x03, 12(SI), X9
MOVL 16(SI), X10
PINSRD $0x01, 28(SI), X10
PINSRD $0x02, 60(SI), X10
PINSRD $0x03, 4(SI), X10
MOVL 52(SI), X11
PINSRD $0x01, 20(SI), X11
PINSRD $0x02, 56(SI), X11
PINSRD $0x03, 36(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 48(SI), X8
PINSRD $0x01, 4(SI), X8
PINSRD $0x02, 56(SI), X8
PINSRD $0x03, 16(SI), X8
MOVL 20(SI), X9
PINSRD $0x01, 60(SI), X9
PINSRD $0x02, 52(SI), X9
PINSRD $0x03, 40(SI), X9
MOVL (SI), X10
PINSRD $0x01, 24(SI), X10
PINSRD $0x02, 36(SI), X10
PINSRD $0x03, 32(SI), X10
MOVL 28(SI), X11
PINSRD $0x01, 12(SI), X11
PINSRD $0x02, 8(SI), X11
PINSRD $0x03, 44(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 52(SI), X8
PINSRD $0x01, 28(SI), X8
PINSRD $0x02, 48(SI), X8
PINSRD $0x03, 12(SI), X8
MOVL 44(SI), X9
PINSRD $0x01, 56(SI), X9
PINSRD $0x02, 4(SI), X9
PINSRD $0x03, 36(SI), X9
MOVL 20(SI), X10
PINSRD $0x01, 60(SI), X10
PINSRD $0x02, 32(SI), X10
PINSRD $0x03, 8(SI), X10
MOVL (SI), X11
PINSRD $0x01, 16(SI), X11
PINSRD $0x02, 24(SI), X11
PINSRD $0x03, 40(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 24(SI), X8
PINSRD $0x01, 56(SI), X8
PINSRD $0x02, 44(SI), X8
PINSRD $0x03, (SI), X8
MOVL 60(SI), X9
PINSRD $0x01, 36(SI), X9
PINSRD $0x02, 12(SI), X9
PINSRD $0x03, 32(SI), X9
MOVL 48(SI), X10
PINSRD $0x01, 52(SI), X10
PINSRD $0x02, 4(SI), X10
PINSRD $0x03, 40(SI), X10
MOVL 8(SI), X11
PINSRD $0x01, 28(SI), X11
PINSRD $0x02, 16(SI), X11
PINSRD $0x03, 20(SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
MOVL 40(SI), X8
PINSRD $0x01, 32(SI), X8
PINSRD $0x02, 28(SI), X8
PINSRD $0x03, 4(SI), X8
MOVL 8(SI), X9
PINSRD $0x01, 16(SI), X9
PINSRD $0x02, 24(SI), X9
PINSRD $0x03, 20(SI), X9
MOVL 60(SI), X10
PINSRD $0x01, 36(SI), X10
PINSRD $0x02, 12(SI), X10
PINSRD $0x03, 52(SI), X10
MOVL 44(SI), X11
PINSRD $0x01, 56(SI), X11
PINSRD $0x02, 48(SI), X11
PINSRD $0x03, (SI), X11
PADDL X8, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X9, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X5, X5
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X7, X7
PADDL X10, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X13, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x14, X8
PSRLL $0x0c, X5
PXOR X8, X5
PADDL X11, X4
PADDL X5, X4
PXOR X4, X7
PSHUFB X14, X7
PADDL X7, X6
PXOR X6, X5
MOVO X5, X8
PSLLL $0x19, X8
PSRLL $0x07, X5
PXOR X8, X5
PSHUFL $0x39, X7, X7
PSHUFL $0x4e, X6, X6
PSHUFL $0x93, X5, X5
PXOR X4, X0
PXOR X5, X1
PXOR X6, X0
PXOR X7, X1
LEAQ 64(SI), SI
SUBQ $0x40, DX
JNE loop
MOVO X15, (BP)
MOVQ (BP), R9
MOVQ R9, (BX)
MOVOU X0, (AX)
MOVOU X1, 16(AX)
RET