| // Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT. |
| |
| //go:build !purego |
| |
| #include "textflag.h" |
| |
| // func blockAVX2(dig *digest, p []byte) |
| // Requires: AVX, AVX2, BMI, BMI2, CMOV |
| TEXT ·blockAVX2(SB), $1408-32 |
| MOVQ dig+0(FP), DI |
| MOVQ p_base+8(FP), SI |
| MOVQ p_len+16(FP), DX |
| SHRQ $0x06, DX |
| SHLQ $0x06, DX |
| LEAQ K_XMM_AR<>+0(SB), R8 |
| MOVQ DI, R9 |
| MOVQ SI, R10 |
| LEAQ 64(SI), R13 |
| ADDQ SI, DX |
| ADDQ $0x40, DX |
| MOVQ DX, R11 |
| CMPQ R13, R11 |
| CMOVQCC R8, R13 |
| VMOVDQU BSWAP_SHUFB_CTL<>+0(SB), Y10 |
| MOVL (R9), CX |
| MOVL 4(R9), SI |
| MOVL 8(R9), DI |
| MOVL 12(R9), AX |
| MOVL 16(R9), DX |
| MOVQ SP, R14 |
| LEAQ 672(SP), R15 |
| VMOVDQU (R10), X0 |
| VINSERTI128 $0x01, (R13), Y0, Y0 |
| VPSHUFB Y10, Y0, Y15 |
| VPADDD (R8), Y15, Y0 |
| VMOVDQU Y0, (R14) |
| VMOVDQU 16(R10), X0 |
| VINSERTI128 $0x01, 16(R13), Y0, Y0 |
| VPSHUFB Y10, Y0, Y14 |
| VPADDD (R8), Y14, Y0 |
| VMOVDQU Y0, 32(R14) |
| VMOVDQU 32(R10), X0 |
| VINSERTI128 $0x01, 32(R13), Y0, Y0 |
| VPSHUFB Y10, Y0, Y13 |
| VPADDD (R8), Y13, Y0 |
| VMOVDQU Y0, 64(R14) |
| VMOVDQU 48(R10), X0 |
| VINSERTI128 $0x01, 48(R13), Y0, Y0 |
| VPSHUFB Y10, Y0, Y12 |
| VPADDD (R8), Y12, Y0 |
| VMOVDQU Y0, 96(R14) |
| VPALIGNR $0x08, Y15, Y14, Y8 |
| VPSRLDQ $0x04, Y12, Y0 |
| VPXOR Y13, Y8, Y8 |
| VPXOR Y15, Y0, Y0 |
| VPXOR Y0, Y8, Y8 |
| VPSLLDQ $0x0c, Y8, Y9 |
| VPSLLD $0x01, Y8, Y0 |
| VPSRLD $0x1f, Y8, Y8 |
| VPOR Y8, Y0, Y0 |
| VPSLLD $0x02, Y9, Y8 |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y8, Y0, Y0 |
| VPXOR Y9, Y0, Y8 |
| VPADDD (R8), Y8, Y0 |
| VMOVDQU Y0, 128(R14) |
| VPALIGNR $0x08, Y14, Y13, Y7 |
| VPSRLDQ $0x04, Y8, Y0 |
| VPXOR Y12, Y7, Y7 |
| VPXOR Y14, Y0, Y0 |
| VPXOR Y0, Y7, Y7 |
| VPSLLDQ $0x0c, Y7, Y9 |
| VPSLLD $0x01, Y7, Y0 |
| VPSRLD $0x1f, Y7, Y7 |
| VPOR Y7, Y0, Y0 |
| VPSLLD $0x02, Y9, Y7 |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y7, Y0, Y0 |
| VPXOR Y9, Y0, Y7 |
| VPADDD 32(R8), Y7, Y0 |
| VMOVDQU Y0, 160(R14) |
| VPALIGNR $0x08, Y13, Y12, Y5 |
| VPSRLDQ $0x04, Y7, Y0 |
| VPXOR Y8, Y5, Y5 |
| VPXOR Y13, Y0, Y0 |
| VPXOR Y0, Y5, Y5 |
| VPSLLDQ $0x0c, Y5, Y9 |
| VPSLLD $0x01, Y5, Y0 |
| VPSRLD $0x1f, Y5, Y5 |
| VPOR Y5, Y0, Y0 |
| VPSLLD $0x02, Y9, Y5 |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y5, Y0, Y0 |
| VPXOR Y9, Y0, Y5 |
| VPADDD 32(R8), Y5, Y0 |
| VMOVDQU Y0, 192(R14) |
| VPALIGNR $0x08, Y12, Y8, Y3 |
| VPSRLDQ $0x04, Y5, Y0 |
| VPXOR Y7, Y3, Y3 |
| VPXOR Y12, Y0, Y0 |
| VPXOR Y0, Y3, Y3 |
| VPSLLDQ $0x0c, Y3, Y9 |
| VPSLLD $0x01, Y3, Y0 |
| VPSRLD $0x1f, Y3, Y3 |
| VPOR Y3, Y0, Y0 |
| VPSLLD $0x02, Y9, Y3 |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y3, Y0, Y0 |
| VPXOR Y9, Y0, Y3 |
| VPADDD 32(R8), Y3, Y0 |
| VMOVDQU Y0, 224(R14) |
| VPALIGNR $0x08, Y5, Y3, Y0 |
| VPXOR Y14, Y15, Y15 |
| VPXOR Y8, Y0, Y0 |
| VPXOR Y0, Y15, Y15 |
| VPSLLD $0x02, Y15, Y0 |
| VPSRLD $0x1e, Y15, Y15 |
| VPOR Y15, Y0, Y15 |
| VPADDD 32(R8), Y15, Y0 |
| VMOVDQU Y0, 256(R14) |
| VPALIGNR $0x08, Y3, Y15, Y0 |
| VPXOR Y13, Y14, Y14 |
| VPXOR Y7, Y0, Y0 |
| VPXOR Y0, Y14, Y14 |
| VPSLLD $0x02, Y14, Y0 |
| VPSRLD $0x1e, Y14, Y14 |
| VPOR Y14, Y0, Y14 |
| VPADDD 32(R8), Y14, Y0 |
| VMOVDQU Y0, 288(R14) |
| VPALIGNR $0x08, Y15, Y14, Y0 |
| VPXOR Y12, Y13, Y13 |
| VPXOR Y5, Y0, Y0 |
| VPXOR Y0, Y13, Y13 |
| VPSLLD $0x02, Y13, Y0 |
| VPSRLD $0x1e, Y13, Y13 |
| VPOR Y13, Y0, Y13 |
| VPADDD 64(R8), Y13, Y0 |
| VMOVDQU Y0, 320(R14) |
| VPALIGNR $0x08, Y14, Y13, Y0 |
| VPXOR Y8, Y12, Y12 |
| VPXOR Y3, Y0, Y0 |
| VPXOR Y0, Y12, Y12 |
| VPSLLD $0x02, Y12, Y0 |
| VPSRLD $0x1e, Y12, Y12 |
| VPOR Y12, Y0, Y12 |
| VPADDD 64(R8), Y12, Y0 |
| VMOVDQU Y0, 352(R14) |
| VPALIGNR $0x08, Y13, Y12, Y0 |
| VPXOR Y7, Y8, Y8 |
| VPXOR Y15, Y0, Y0 |
| VPXOR Y0, Y8, Y8 |
| VPSLLD $0x02, Y8, Y0 |
| VPSRLD $0x1e, Y8, Y8 |
| VPOR Y8, Y0, Y8 |
| VPADDD 64(R8), Y8, Y0 |
| VMOVDQU Y0, 384(R14) |
| VPALIGNR $0x08, Y12, Y8, Y0 |
| VPXOR Y5, Y7, Y7 |
| VPXOR Y14, Y0, Y0 |
| VPXOR Y0, Y7, Y7 |
| VPSLLD $0x02, Y7, Y0 |
| VPSRLD $0x1e, Y7, Y7 |
| VPOR Y7, Y0, Y7 |
| VPADDD 64(R8), Y7, Y0 |
| VMOVDQU Y0, 416(R14) |
| VPALIGNR $0x08, Y8, Y7, Y0 |
| VPXOR Y3, Y5, Y5 |
| VPXOR Y13, Y0, Y0 |
| VPXOR Y0, Y5, Y5 |
| VPSLLD $0x02, Y5, Y0 |
| VPSRLD $0x1e, Y5, Y5 |
| VPOR Y5, Y0, Y5 |
| VPADDD 64(R8), Y5, Y0 |
| VMOVDQU Y0, 448(R14) |
| VPALIGNR $0x08, Y7, Y5, Y0 |
| VPXOR Y15, Y3, Y3 |
| VPXOR Y12, Y0, Y0 |
| VPXOR Y0, Y3, Y3 |
| VPSLLD $0x02, Y3, Y0 |
| VPSRLD $0x1e, Y3, Y3 |
| VPOR Y3, Y0, Y3 |
| VPADDD 96(R8), Y3, Y0 |
| VMOVDQU Y0, 480(R14) |
| VPALIGNR $0x08, Y5, Y3, Y0 |
| VPXOR Y14, Y15, Y15 |
| VPXOR Y8, Y0, Y0 |
| VPXOR Y0, Y15, Y15 |
| VPSLLD $0x02, Y15, Y0 |
| VPSRLD $0x1e, Y15, Y15 |
| VPOR Y15, Y0, Y15 |
| VPADDD 96(R8), Y15, Y0 |
| VMOVDQU Y0, 512(R14) |
| VPALIGNR $0x08, Y3, Y15, Y0 |
| VPXOR Y13, Y14, Y14 |
| VPXOR Y7, Y0, Y0 |
| VPXOR Y0, Y14, Y14 |
| VPSLLD $0x02, Y14, Y0 |
| VPSRLD $0x1e, Y14, Y14 |
| VPOR Y14, Y0, Y14 |
| VPADDD 96(R8), Y14, Y0 |
| VMOVDQU Y0, 544(R14) |
| VPALIGNR $0x08, Y15, Y14, Y0 |
| VPXOR Y12, Y13, Y13 |
| VPXOR Y5, Y0, Y0 |
| VPXOR Y0, Y13, Y13 |
| VPSLLD $0x02, Y13, Y0 |
| VPSRLD $0x1e, Y13, Y13 |
| VPOR Y13, Y0, Y13 |
| VPADDD 96(R8), Y13, Y0 |
| VMOVDQU Y0, 576(R14) |
| VPALIGNR $0x08, Y14, Y13, Y0 |
| VPXOR Y8, Y12, Y12 |
| VPXOR Y3, Y0, Y0 |
| VPXOR Y0, Y12, Y12 |
| VPSLLD $0x02, Y12, Y0 |
| VPSRLD $0x1e, Y12, Y12 |
| VPOR Y12, Y0, Y12 |
| VPADDD 96(R8), Y12, Y0 |
| VMOVDQU Y0, 608(R14) |
| XCHGQ R15, R14 |
| |
| loop: |
| CMPQ R10, R8 |
| JNE begin |
| VZEROUPPER |
| RET |
| |
| begin: |
| MOVL SI, BX |
| RORXL $0x02, SI, SI |
| ANDNL AX, BX, BP |
| ANDL DI, BX |
| XORL BP, BX |
| ADDL (R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VMOVDQU 128(R10), X0 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 4(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VINSERTI128 $0x01, 128(R13), Y0, Y0 |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 8(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSHUFB Y10, Y0, Y15 |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 12(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 32(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPADDD (R8), Y15, Y0 |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 36(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 40(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 44(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VMOVDQU Y0, (R14) |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 64(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VMOVDQU 144(R10), X0 |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 68(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VINSERTI128 $0x01, 144(R13), Y0, Y0 |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 72(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPSHUFB Y10, Y0, Y14 |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 76(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 96(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPADDD (R8), Y14, Y0 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 100(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 104(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 108(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VMOVDQU Y0, 32(R14) |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 128(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VMOVDQU 160(R10), X0 |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 132(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VINSERTI128 $0x01, 160(R13), Y0, Y0 |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 136(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPSHUFB Y10, Y0, Y13 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 140(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 160(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPADDD (R8), Y13, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 164(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 168(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 172(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VMOVDQU Y0, 64(R14) |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 192(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VMOVDQU 176(R10), X0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 196(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VINSERTI128 $0x01, 176(R13), Y0, Y0 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 200(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSHUFB Y10, Y0, Y12 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 204(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 224(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPADDD (R8), Y12, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 228(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 232(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 236(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VMOVDQU Y0, 96(R14) |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 256(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPALIGNR $0x08, Y15, Y14, Y8 |
| VPSRLDQ $0x04, Y12, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 260(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y13, Y8, Y8 |
| VPXOR Y15, Y0, Y0 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 264(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPXOR Y0, Y8, Y8 |
| VPSLLDQ $0x0c, Y8, Y9 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 268(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPSLLD $0x01, Y8, Y0 |
| VPSRLD $0x1f, Y8, Y8 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 288(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPOR Y8, Y0, Y0 |
| VPSLLD $0x02, Y9, Y8 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 292(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y8, Y0, Y0 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 296(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 300(R15), SI |
| VPXOR Y9, Y0, Y8 |
| VPADDD (R8), Y8, Y0 |
| VMOVDQU Y0, 128(R14) |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 320(R15), BX |
| VPALIGNR $0x08, Y14, Y13, Y7 |
| VPSRLDQ $0x04, Y8, Y0 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 324(R15), CX |
| VPXOR Y12, Y7, Y7 |
| VPXOR Y14, Y0, Y0 |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 328(R15), DX |
| VPXOR Y0, Y7, Y7 |
| VPSLLDQ $0x0c, Y7, Y9 |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 332(R15), AX |
| VPSLLD $0x01, Y7, Y0 |
| VPSRLD $0x1f, Y7, Y7 |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 352(R15), DI |
| VPOR Y7, Y0, Y0 |
| VPSLLD $0x02, Y9, Y7 |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 356(R15), SI |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y7, Y0, Y0 |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 360(R15), BX |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 364(R15), CX |
| VPXOR Y9, Y0, Y7 |
| VPADDD 32(R8), Y7, Y0 |
| VMOVDQU Y0, 160(R14) |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 384(R15), DX |
| VPALIGNR $0x08, Y13, Y12, Y5 |
| VPSRLDQ $0x04, Y7, Y0 |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 388(R15), AX |
| VPXOR Y8, Y5, Y5 |
| VPXOR Y13, Y0, Y0 |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 392(R15), DI |
| VPXOR Y0, Y5, Y5 |
| VPSLLDQ $0x0c, Y5, Y9 |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 396(R15), SI |
| VPSLLD $0x01, Y5, Y0 |
| VPSRLD $0x1f, Y5, Y5 |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 416(R15), BX |
| VPOR Y5, Y0, Y0 |
| VPSLLD $0x02, Y9, Y5 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 420(R15), CX |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y5, Y0, Y0 |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 424(R15), DX |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 428(R15), AX |
| VPXOR Y9, Y0, Y5 |
| VPADDD 32(R8), Y5, Y0 |
| VMOVDQU Y0, 192(R14) |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 448(R15), DI |
| VPALIGNR $0x08, Y12, Y8, Y3 |
| VPSRLDQ $0x04, Y5, Y0 |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 452(R15), SI |
| VPXOR Y7, Y3, Y3 |
| VPXOR Y12, Y0, Y0 |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 456(R15), BX |
| VPXOR Y0, Y3, Y3 |
| VPSLLDQ $0x0c, Y3, Y9 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 460(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPSLLD $0x01, Y3, Y0 |
| VPSRLD $0x1f, Y3, Y3 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDQ $0x80, R10 |
| CMPQ R10, R11 |
| CMOVQCC R8, R10 |
| ADDL 480(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPOR Y3, Y0, Y0 |
| VPSLLD $0x02, Y9, Y3 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 484(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPSRLD $0x1e, Y9, Y9 |
| VPXOR Y3, Y0, Y0 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 488(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 492(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y9, Y0, Y3 |
| VPADDD 32(R8), Y3, Y0 |
| VMOVDQU Y0, 224(R14) |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 512(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPALIGNR $0x08, Y5, Y3, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 516(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y14, Y15, Y15 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 520(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPXOR Y8, Y0, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 524(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y0, Y15, Y15 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 544(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSLLD $0x02, Y15, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 548(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPSRLD $0x1e, Y15, Y15 |
| VPOR Y15, Y0, Y15 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 552(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 556(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPADDD 32(R8), Y15, Y0 |
| VMOVDQU Y0, 256(R14) |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 576(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPALIGNR $0x08, Y3, Y15, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 580(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y13, Y14, Y14 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 584(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPXOR Y7, Y0, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 588(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y0, Y14, Y14 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 608(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPSLLD $0x02, Y14, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 612(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPSRLD $0x1e, Y14, Y14 |
| VPOR Y14, Y0, Y14 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 616(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 620(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| VPADDD 32(R8), Y14, Y0 |
| VMOVDQU Y0, 288(R14) |
| ADDL R12, AX |
| ADDL (R9), AX |
| MOVL AX, (R9) |
| ADDL 4(R9), DX |
| MOVL DX, 4(R9) |
| ADDL 8(R9), BX |
| MOVL BX, 8(R9) |
| ADDL 12(R9), SI |
| MOVL SI, 12(R9) |
| ADDL 16(R9), DI |
| MOVL DI, 16(R9) |
| CMPQ R10, R8 |
| JE loop |
| MOVL DX, CX |
| MOVL CX, DX |
| RORXL $0x02, CX, CX |
| ANDNL SI, DX, BP |
| ANDL BX, DX |
| XORL BP, DX |
| ADDL 16(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPALIGNR $0x08, Y15, Y14, Y0 |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 20(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y12, Y13, Y13 |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 24(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPXOR Y5, Y0, Y0 |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 28(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y0, Y13, Y13 |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 48(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPSLLD $0x02, Y13, Y0 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 52(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPSRLD $0x1e, Y13, Y13 |
| VPOR Y13, Y0, Y13 |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 56(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 60(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPADDD 64(R8), Y13, Y0 |
| VMOVDQU Y0, 320(R14) |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 80(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPALIGNR $0x08, Y14, Y13, Y0 |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 84(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y8, Y12, Y12 |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 88(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPXOR Y3, Y0, Y0 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 92(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y0, Y12, Y12 |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 112(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSLLD $0x02, Y12, Y0 |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 116(R15), SI |
| ANDNL CX, DI, BP |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPSRLD $0x1e, Y12, Y12 |
| VPOR Y12, Y0, Y12 |
| ANDL DX, DI |
| XORL BP, DI |
| LEAL (SI)(R12*1), SI |
| ADDL 120(R15), BX |
| ANDNL DX, SI, BP |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL AX, SI |
| XORL BP, SI |
| LEAL (BX)(R12*1), BX |
| ADDL 124(R15), CX |
| ANDNL AX, BX, BP |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPADDD 64(R8), Y12, Y0 |
| VMOVDQU Y0, 352(R14) |
| ANDL DI, BX |
| XORL BP, BX |
| LEAL (CX)(R12*1), CX |
| ADDL 144(R15), DX |
| ANDNL DI, CX, BP |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPALIGNR $0x08, Y13, Y12, Y0 |
| ANDL SI, CX |
| XORL BP, CX |
| LEAL (DX)(R12*1), DX |
| ADDL 148(R15), AX |
| ANDNL SI, DX, BP |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y7, Y8, Y8 |
| ANDL BX, DX |
| XORL BP, DX |
| LEAL (AX)(R12*1), AX |
| ADDL 152(R15), DI |
| ANDNL BX, AX, BP |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPXOR Y15, Y0, Y0 |
| ANDL CX, AX |
| XORL BP, AX |
| LEAL (DI)(R12*1), DI |
| ADDL 156(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y0, Y8, Y8 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 176(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPSLLD $0x02, Y8, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 180(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPSRLD $0x1e, Y8, Y8 |
| VPOR Y8, Y0, Y8 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 184(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 188(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPADDD 64(R8), Y8, Y0 |
| VMOVDQU Y0, 384(R14) |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 208(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPALIGNR $0x08, Y12, Y8, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 212(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y5, Y7, Y7 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 216(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPXOR Y14, Y0, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 220(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y0, Y7, Y7 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 240(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPSLLD $0x02, Y7, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 244(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPSRLD $0x1e, Y7, Y7 |
| VPOR Y7, Y0, Y7 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 248(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 252(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPADDD 64(R8), Y7, Y0 |
| VMOVDQU Y0, 416(R14) |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 272(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPALIGNR $0x08, Y8, Y7, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 276(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y3, Y5, Y5 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 280(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPXOR Y13, Y0, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 284(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y0, Y5, Y5 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 304(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSLLD $0x02, Y5, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 308(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPSRLD $0x1e, Y5, Y5 |
| VPOR Y5, Y0, Y5 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 312(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 316(R15), CX |
| VPADDD 64(R8), Y5, Y0 |
| VMOVDQU Y0, 448(R14) |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 336(R15), DX |
| VPALIGNR $0x08, Y7, Y5, Y0 |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 340(R15), AX |
| VPXOR Y15, Y3, Y3 |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 344(R15), DI |
| VPXOR Y12, Y0, Y0 |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 348(R15), SI |
| VPXOR Y0, Y3, Y3 |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 368(R15), BX |
| VPSLLD $0x02, Y3, Y0 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 372(R15), CX |
| VPSRLD $0x1e, Y3, Y3 |
| VPOR Y3, Y0, Y3 |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 376(R15), DX |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 380(R15), AX |
| VPADDD 96(R8), Y3, Y0 |
| VMOVDQU Y0, 480(R14) |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 400(R15), DI |
| VPALIGNR $0x08, Y5, Y3, Y0 |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 404(R15), SI |
| VPXOR Y14, Y15, Y15 |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 408(R15), BX |
| VPXOR Y8, Y0, Y0 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 412(R15), CX |
| VPXOR Y0, Y15, Y15 |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 432(R15), DX |
| VPSLLD $0x02, Y15, Y0 |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 436(R15), AX |
| VPSRLD $0x1e, Y15, Y15 |
| VPOR Y15, Y0, Y15 |
| LEAL (AX)(CX*1), AX |
| MOVL BX, BP |
| ORL DX, BP |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| ANDL SI, BP |
| ANDL BX, DX |
| ORL BP, DX |
| ADDL R12, AX |
| ADDL 440(R15), DI |
| LEAL (DI)(DX*1), DI |
| MOVL CX, BP |
| ORL AX, BP |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| ANDL BX, BP |
| ANDL CX, AX |
| ORL BP, AX |
| ADDL R12, DI |
| ADDL 444(R15), SI |
| VPADDD 96(R8), Y15, Y0 |
| VMOVDQU Y0, 512(R14) |
| LEAL (SI)(AX*1), SI |
| MOVL DX, BP |
| ORL DI, BP |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| ANDL CX, BP |
| ANDL DX, DI |
| ORL BP, DI |
| ADDL R12, SI |
| ADDL 464(R15), BX |
| VPALIGNR $0x08, Y3, Y15, Y0 |
| LEAL (BX)(DI*1), BX |
| MOVL AX, BP |
| ORL SI, BP |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| ANDL DX, BP |
| ANDL AX, SI |
| ORL BP, SI |
| ADDL R12, BX |
| ADDL 468(R15), CX |
| VPXOR Y13, Y14, Y14 |
| LEAL (CX)(SI*1), CX |
| MOVL DI, BP |
| ORL BX, BP |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| ANDL AX, BP |
| ANDL DI, BX |
| ORL BP, BX |
| ADDL R12, CX |
| ADDL 472(R15), DX |
| VPXOR Y7, Y0, Y0 |
| LEAL (DX)(BX*1), DX |
| MOVL SI, BP |
| ORL CX, BP |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| ANDL DI, BP |
| ANDL SI, CX |
| ORL BP, CX |
| ADDL R12, DX |
| ADDL 476(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y0, Y14, Y14 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDQ $0x80, R13 |
| CMPQ R13, R11 |
| CMOVQCC R8, R10 |
| ADDL 496(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPSLLD $0x02, Y14, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 500(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPSRLD $0x1e, Y14, Y14 |
| VPOR Y14, Y0, Y14 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 504(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 508(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPADDD 96(R8), Y14, Y0 |
| VMOVDQU Y0, 544(R14) |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 528(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPALIGNR $0x08, Y15, Y14, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 532(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPXOR Y12, Y13, Y13 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 536(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPXOR Y5, Y0, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 540(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y0, Y13, Y13 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 560(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPSLLD $0x02, Y13, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 564(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPSRLD $0x1e, Y13, Y13 |
| VPOR Y13, Y0, Y13 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 568(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 572(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPADDD 96(R8), Y13, Y0 |
| VMOVDQU Y0, 576(R14) |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 592(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| VPALIGNR $0x08, Y14, Y13, Y0 |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 596(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| RORXL $0x02, DI, AX |
| VPXOR Y8, Y12, Y12 |
| XORL DX, DI |
| ADDL R12, SI |
| XORL CX, DI |
| ADDL 600(R15), BX |
| LEAL (BX)(DI*1), BX |
| RORXL $0x1b, SI, R12 |
| RORXL $0x02, SI, DI |
| VPXOR Y3, Y0, Y0 |
| XORL AX, SI |
| ADDL R12, BX |
| XORL DX, SI |
| ADDL 604(R15), CX |
| LEAL (CX)(SI*1), CX |
| RORXL $0x1b, BX, R12 |
| RORXL $0x02, BX, SI |
| VPXOR Y0, Y12, Y12 |
| XORL DI, BX |
| ADDL R12, CX |
| XORL AX, BX |
| ADDL 624(R15), DX |
| LEAL (DX)(BX*1), DX |
| RORXL $0x1b, CX, R12 |
| RORXL $0x02, CX, BX |
| VPSLLD $0x02, Y12, Y0 |
| XORL SI, CX |
| ADDL R12, DX |
| XORL DI, CX |
| ADDL 628(R15), AX |
| LEAL (AX)(CX*1), AX |
| RORXL $0x1b, DX, R12 |
| RORXL $0x02, DX, CX |
| VPSRLD $0x1e, Y12, Y12 |
| VPOR Y12, Y0, Y12 |
| XORL BX, DX |
| ADDL R12, AX |
| XORL SI, DX |
| ADDL 632(R15), DI |
| LEAL (DI)(DX*1), DI |
| RORXL $0x1b, AX, R12 |
| RORXL $0x02, AX, DX |
| XORL CX, AX |
| ADDL R12, DI |
| XORL BX, AX |
| ADDL 636(R15), SI |
| LEAL (SI)(AX*1), SI |
| RORXL $0x1b, DI, R12 |
| VPADDD 96(R8), Y12, Y0 |
| VMOVDQU Y0, 608(R14) |
| ADDL R12, SI |
| ADDL (R9), SI |
| MOVL SI, (R9) |
| ADDL 4(R9), DI |
| MOVL DI, 4(R9) |
| ADDL 8(R9), DX |
| MOVL DX, 8(R9) |
| ADDL 12(R9), CX |
| MOVL CX, 12(R9) |
| ADDL 16(R9), BX |
| MOVL BX, 16(R9) |
| MOVL SI, R12 |
| MOVL DI, SI |
| MOVL DX, DI |
| MOVL BX, DX |
| MOVL CX, AX |
| MOVL R12, CX |
| XCHGQ R15, R14 |
| JMP loop |
| |
| DATA K_XMM_AR<>+0(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+4(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+8(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+12(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+16(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+20(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+24(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+28(SB)/4, $0x5a827999 |
| DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1 |
| DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc |
| DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6 |
| DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6 |
| GLOBL K_XMM_AR<>(SB), RODATA, $128 |
| |
| DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203 |
| DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607 |
| DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b |
| DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f |
| DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203 |
| DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607 |
| DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b |
| DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f |
| GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32 |
| |
| // func blockSHANI(dig *digest, p []byte) |
| // Requires: AVX, SHA, SSE2, SSE4.1, SSSE3 |
| TEXT ·blockSHANI(SB), $48-32 |
| MOVQ dig+0(FP), DI |
| MOVQ p_base+8(FP), SI |
| MOVQ p_len+16(FP), DX |
| CMPQ DX, $0x00 |
| JEQ done |
| ADDQ SI, DX |
| |
| // Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes |
| LEAQ 15(SP), AX |
| MOVQ $0x000000000000000f, CX |
| NOTQ CX |
| ANDQ CX, AX |
| |
| // Load initial hash state |
| PINSRD $0x03, 16(DI), X5 |
| VMOVDQU (DI), X0 |
| PAND upper_mask<>+0(SB), X5 |
| PSHUFD $0x1b, X0, X0 |
| VMOVDQA shuffle_mask<>+0(SB), X7 |
| |
| loop: |
| // Save ABCD and E working values |
| VMOVDQA X5, (AX) |
| VMOVDQA X0, 16(AX) |
| |
| // Rounds 0-3 |
| VMOVDQU (SI), X1 |
| PSHUFB X7, X1 |
| PADDD X1, X5 |
| VMOVDQA X0, X6 |
| SHA1RNDS4 $0x00, X5, X0 |
| |
| // Rounds 4-7 |
| VMOVDQU 16(SI), X2 |
| PSHUFB X7, X2 |
| SHA1NEXTE X2, X6 |
| VMOVDQA X0, X5 |
| SHA1RNDS4 $0x00, X6, X0 |
| SHA1MSG1 X2, X1 |
| |
| // Rounds 8-11 |
| VMOVDQU 32(SI), X3 |
| PSHUFB X7, X3 |
| SHA1NEXTE X3, X5 |
| VMOVDQA X0, X6 |
| SHA1RNDS4 $0x00, X5, X0 |
| SHA1MSG1 X3, X2 |
| PXOR X3, X1 |
| |
| // Rounds 12-15 |
| VMOVDQU 48(SI), X4 |
| PSHUFB X7, X4 |
| SHA1NEXTE X4, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X4, X1 |
| SHA1RNDS4 $0x00, X6, X0 |
| SHA1MSG1 X4, X3 |
| PXOR X4, X2 |
| |
| // Rounds 16-19 |
| SHA1NEXTE X1, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X1, X2 |
| SHA1RNDS4 $0x00, X5, X0 |
| SHA1MSG1 X1, X4 |
| PXOR X1, X3 |
| |
| // Rounds 20-23 |
| SHA1NEXTE X2, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X2, X3 |
| SHA1RNDS4 $0x01, X6, X0 |
| SHA1MSG1 X2, X1 |
| PXOR X2, X4 |
| |
| // Rounds 24-27 |
| SHA1NEXTE X3, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X3, X4 |
| SHA1RNDS4 $0x01, X5, X0 |
| SHA1MSG1 X3, X2 |
| PXOR X3, X1 |
| |
| // Rounds 28-31 |
| SHA1NEXTE X4, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X4, X1 |
| SHA1RNDS4 $0x01, X6, X0 |
| SHA1MSG1 X4, X3 |
| PXOR X4, X2 |
| |
| // Rounds 32-35 |
| SHA1NEXTE X1, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X1, X2 |
| SHA1RNDS4 $0x01, X5, X0 |
| SHA1MSG1 X1, X4 |
| PXOR X1, X3 |
| |
| // Rounds 36-39 |
| SHA1NEXTE X2, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X2, X3 |
| SHA1RNDS4 $0x01, X6, X0 |
| SHA1MSG1 X2, X1 |
| PXOR X2, X4 |
| |
| // Rounds 40-43 |
| SHA1NEXTE X3, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X3, X4 |
| SHA1RNDS4 $0x02, X5, X0 |
| SHA1MSG1 X3, X2 |
| PXOR X3, X1 |
| |
| // Rounds 44-47 |
| SHA1NEXTE X4, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X4, X1 |
| SHA1RNDS4 $0x02, X6, X0 |
| SHA1MSG1 X4, X3 |
| PXOR X4, X2 |
| |
| // Rounds 48-51 |
| SHA1NEXTE X1, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X1, X2 |
| SHA1RNDS4 $0x02, X5, X0 |
| SHA1MSG1 X1, X4 |
| PXOR X1, X3 |
| |
| // Rounds 52-55 |
| SHA1NEXTE X2, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X2, X3 |
| SHA1RNDS4 $0x02, X6, X0 |
| SHA1MSG1 X2, X1 |
| PXOR X2, X4 |
| |
| // Rounds 56-59 |
| SHA1NEXTE X3, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X3, X4 |
| SHA1RNDS4 $0x02, X5, X0 |
| SHA1MSG1 X3, X2 |
| PXOR X3, X1 |
| |
| // Rounds 60-63 |
| SHA1NEXTE X4, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X4, X1 |
| SHA1RNDS4 $0x03, X6, X0 |
| SHA1MSG1 X4, X3 |
| PXOR X4, X2 |
| |
| // Rounds 64-67 |
| SHA1NEXTE X1, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X1, X2 |
| SHA1RNDS4 $0x03, X5, X0 |
| SHA1MSG1 X1, X4 |
| PXOR X1, X3 |
| |
| // Rounds 68-71 |
| SHA1NEXTE X2, X6 |
| VMOVDQA X0, X5 |
| SHA1MSG2 X2, X3 |
| SHA1RNDS4 $0x03, X6, X0 |
| PXOR X2, X4 |
| |
| // Rounds 72-75 |
| SHA1NEXTE X3, X5 |
| VMOVDQA X0, X6 |
| SHA1MSG2 X3, X4 |
| SHA1RNDS4 $0x03, X5, X0 |
| |
| // Rounds 76-79 |
| SHA1NEXTE X4, X6 |
| VMOVDQA X0, X5 |
| SHA1RNDS4 $0x03, X6, X0 |
| |
| // Add saved E and ABCD |
| SHA1NEXTE (AX), X5 |
| PADDD 16(AX), X0 |
| |
| // Check if we are done, if not return to the loop |
| ADDQ $0x40, SI |
| CMPQ SI, DX |
| JNE loop |
| |
| // Write the hash state back to digest |
| PSHUFD $0x1b, X0, X0 |
| VMOVDQU X0, (DI) |
| PEXTRD $0x03, X5, 16(DI) |
| |
| done: |
| RET |
| |
| DATA upper_mask<>+0(SB)/8, $0x0000000000000000 |
| DATA upper_mask<>+8(SB)/8, $0xffffffff00000000 |
| GLOBL upper_mask<>(SB), RODATA, $16 |
| |
| DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f |
| DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607 |
| GLOBL shuffle_mask<>(SB), RODATA, $16 |